Merge branch 'master' of git://github.com/kovidgoyal/calibre

2025-07-09 03:04:10 -04:00 · 2013-10-31 23:46:24 +01:00 · 2013-10-31 23:46:24 +01:00 · 0ac211b150
commit 0ac211b150
parent f97f69a207 bf634eef36
622 changed files with 90279 additions and 20385 deletions
--- a/.gitignore
+++ b/.gitignore
@ -14,7 +14,6 @@ build
 dist
 docs
 resources/localization
 resources/images.qrc
 resources/scripts.pickle
 resources/ebook-convert-complete.pickle
 resources/builtin_recipes.xml
@ -42,3 +41,4 @@ calibre_plugins/
 recipes/*.mobi
 recipes/*.epub
 recipes/debug
 /.metadata/
--- a/Changelog.yaml
+++ b/Changelog.yaml
@ -20,6 +20,950 @@
 #   new recipes:
 #     - title: 
 - version: 1.8.0
  date: 2013-10-25
  new features:
    - title: "DOCX Input: Support linked (as opposed to embedded) images, if the linked image is found on the local computer."
      tickets: [1243597]
    - title: 'FB2 Input: Add support for note and cite back references. Link pairs of type="note" and type="cite" now automatically generate the correct back reference.'
      tickets: [1243714]
    - title: "When automerging books during during an add, include the author as well as the title in the report of merged books."
    - title: "OS X Mavericks (10.9) breaks connecting to iTunes and iBooks on iOS devices. For more details see: http://www.mobileread.com/forums/showthread.php?t=215624"
  bug fixes:
    - title: "OS X: Fix system tray notifications causing crashes on some OS X 10.9 (Mavericks) systems (those that had Growl installed at some point)."
      tickets: [1224491]
    - title: "OSX: Fix font size in completion popups too small on Mavericks (I hope)"
      tickets: [1243761]
    - title: "PDF Output: Fix rendering of some semi-transparent images. All semi-transparent images are now rendered using soft masks."
      tickets: [1243829]
    - title: "MOBI Output: Fix text marked with white-space:pre-wrap causing the Kindle to break lines at arbitrary points inside words."
      tickets: [1240235]
    - title: "FB2 Input: Fix a regression that broke conversion of FB2 files with paragraphs having both a style and an id attribute."
      tickets: [1243709]
    - title: "TXT Input: Ensure that <title> in the generated HTML has a meaningful value."
      tickets: [1236923]
    - title: "Book details panel: Fix HTML in author names and identifiers not being escaped"
      tickets: [1243976]
    - title: "HTML 5 parsing: Fix handling of xml:lang attributes on all elements xml:lang is now mapped to a plain lang on all elements, not just <html>"
    - title: "Update HTML 5 parser used in calibre (html5lib-python) to fix a few corner cases"
    - title: "When bulk deleting formats, use a single temporary directory for the deleted files. This makes restoring them from the recycle bin a little cleaner. Also might fix the reported issue with the windows recycle bin choking on creating a large number of folders."
    - title: "DOCX Input: Add support for hyperlink fields that have only anchors and not URLs"
    - title: "DOCX Input: Fix handling of multiple block level bookmarks at the same location."
      tickets: [1241451]
    - title: "HTMLZ Output: Fix Htmlz does not apply inline css from <body>."
      tickets: [1242261]
    - title: "Fix the restore database operation failing on windows installs with long usernames (this would cause the path to the temporary folder used to restore the database to become too long)."
    - title: "ODT Input: Various workarounds for broken ODT files generated my mk4ht"
    - title: "Fix a bug with non-ascii text in the create catalog dialog"
      ticket: [1241515]
  improved recipes:
    - A List Apart
 - version: 1.7.0
  date: 2013-10-18
  new features:
    - title: "Cover grid: Allow using images as the background for the cover grid. To choose an image, go to Preferences->Look & Feel->Cover Grid."
      tickets: [1239194]
    - title: "An option to mark newly added books with a temporary mark. Option is in Preferences->Adding books."
      tickets: [1238609]
    - title: "Edit metadata dialog: Allow turning off the cover size displayed in the bottom right corner of the cover by right clicking the cover and choosing 'Hide cover size'. It can be restored the same way."
  bug fixes:
    - title: "Conversion: If both embed font family and the filter css option to remove fonts are set, do not remove the font specified by the embed font family option."
    - title: "Fix a few remaining situations that could cause formats column to show an error message about SHLock"
    - title: "Make deleting books to recycle bin more robust. Ensure that the temporary directory created during the move to recycle bin process is not left behind in case of errors."
    - title: "Windows: Check if the books' files are in use before deleting"
    - title: "Fix custom device driver swap main and card option not working. Also fix swapping not happening for a few devices on linux"
      tickets: [1240504]
    - title: "Edit metadata dialog: The Edit metadata dialog currently limits its max size based on the geometry of the smallest attached screen. Change that to use the geometry of the screen on which it will be shown."
      tickets: [1239597]
    - title: "HTMLZ Output: Fix <style> tag placed inside <body> instead of <head>."
      tickets: [1239530]
    - title: "HTMLZ Output: Fix inline styles not escaping quotes properly."
      tickets: [1239527]
    - title: "HTMLZ Output: Fix incorrect handling of some self closing tags like <br>."
      tickets: [1239555] 
    - title: "Content server: Fix single item categories not working with reverse proxy setup."
      tickets: [1238987]
    - title: "Fix a bug that could cause calibre to crash when switching from a large library to a smaller library with marked books."
      tickets: [1239210]
    - title: "Get Books: Fix downloading of some books in formats that do not have metadata yielding nonsense titles"
    - title: "Allow marked book button to be added to main toolbar when device is connected"
      tickets: [1239163]
    - title: "Fix error if a marked book is deleted/merged."
      tickets: [1239161]
    - title: "Template language: Fix formatter function days_between to compute the right value when the answer is negative."
    - title: "Windows: Fix spurious file in use by other process error if the book's folder contained multiple hard links pointing to the same file"
      tickets: [1240788, 1240194]
    - title: "Windows: Fix duplicate files being created in very special circumstances when changing title and/or author. (the title or author had to be between 31 and 35 characters long and the book entry had to have been created by a pre 1.x version of calibre). You can check if your library has any such duplicates and remove them, by using the Check Library tool (Right click the calibre button on the toolbar and select Library Maintenance->Check Library)."
  improved recipes:
    - Wall Street Journal
    - Newsweek Polska
    - Wired Magazine
    - cracked.com
    - Television Without Pity
    - Carta
    - Diagonales
 - version: 1.6.0
  date: 2013-10-11
  new features:
    - title: "Temporary marking of books in the library"
      description: "This allows you to select books from your calibre library manually and mark them. This 'mark' will remain until you restart calibre, or clear the marks. You can easily work with only the marked subset of books by right clicking the Mark Books button. To use this feature, go to Preferences->Toolbars and add the 'Mark Books' tool to the main toolbar."
      type: major
    - title: "Get Books: Add Wolne Lektury and Amazon (Canada) ebook stores"
    - title: "DOCX Input: Handle hyperlinks in footnotes and endnotes"
      tickets: [1232790]
    - title: "Driver for Sunstech reader"
      tickets: [1231590]
    - title: "Allow using both uri: and url: identifiers to create two different arbitrary links instead of just one in the Book details panel"
    - title: "E-book viewer: Make all keyboard shortcuts configurable"
      tickets: [1232019]
    - title: "Conversion: Add an option to not condense CSS rules for margin, padding, border, etc. Option is under the Look & Feel section of the conversion dialog."
      tickets: [1233220]
    - title: "calibredb: Allow setting of title sort field"
      tickets: [1233711]
    - title: "ebook-meta: Add an --identifier option to set identifiers."
  bug fixes:
    - title: "Fix a locking error when composite columns containing formats are used and formats are added/deleted."
      tickets: [1233330]
    - title: "EPUB Output: Do not strip <object> tags with type application/svg+xml in addition to those that use image/svg+xml."
      tickets: [1236845]
    - title: "Cover grid: Fix selecting all books with Ctrl+A causing subsequent deselects to not fully work."
      tickets: [1236348]
    - title: "HTMLZ Output: Fix long titles causing error when converting on windows."
      tickets: [1235815]
    - title: "Content server: Fix OPDS category links to composite columns"
    - title: "E-book viewer: Fix regression that broke import/export of bookmarks"
      tickets: [1231980]
    - title: "E-book viewer: Use the default font size setting for the dictionary view as well."
      tickets: [1232025]
    - title: "DOCX Input: Avoid using the value attribute for simple numbered lists, to silence the asinine epubcheck"
    - title: "HTML Input: Images linked by the poster attribute of the <video> tag are now recognized and processed."
    - title: "DOCX Input: Fix erorr when converting docx files that have numbering defined with no associated character style."
      tickets: [1232100]
    - title: "EPUB Metadata: Implementing updating identifiers other than isbn in the epub file from calibre when polishing or exporting the epub"
    - title: "Amazon metadata download: Fix parsing of some dates on amazon.de"
      tickets: [1238125]
  improved recipes:
    - National Geographic Magazine
    - New York Review of Books
    - Focus (PL)
    - Carta Capital
    - AM 730 
    - Ming Pao (HK)
    - Neu Osnabrucker Zeitung
  new recipes:
    - title: Various Uruguayan news sources 
      author: Carlos Alves
 - version: 1.5.0
  date: 2013-09-26
  new features:
    - title: "Driver for Woxter Scriba"
      tickets: [1228690]
    - title: "Bulk metadata edit: Allow setting the comments for all selected books and also allow cloning the covers. Cloning covers means that the cover of the first selected book will be set for all other selected books."
      tickets: [1230040]
  bug fixes:
    - title: "Windows: Improved device ejection code. Eject individual drives before trying to eject the device. This fixes incomplete ejection with the Nook devices."
    - title: "Catalogs: fix exclude tags rules not working in non-English locales when creating catalogs in EPUB/MOBI formats."
      tickets: [1228949]
    - title: "Kobo driver: Fix reading status being cleared when connecting to a Kobo with older firmware and metadata management set to automatic."
      tickets: [1230018]
    - title: "Content server: Sort virtual libraries by name"
      tickets: [1229459]
    - title: "DOCX Input: Convert tabs in the docx file into non-breaking spaces in the output document. Note that custom tab stops are not supported."
      tickets: [1228893]
    - title: "Conversion: Handle the style attribute on the <html> tag"
    - title: "Handle databases with invalid ratings link tables"
      tickets: [1228517]
    - title: "DOCX Input: Handle DOCX files with missing referenced styles"
      tickets: [1228669]
    - title: "Update amazon metadata download plugin for changes to the Amazon website"
  improved recipes:
    - Slate
    - El Universal (VE)
    - GoComics
  new recipes:
    - title: 
 - version: 1.4.0
  date: 2013-09-20
  new features:
    - title: "Column icons: Allow the use of multiple icons with column icon rules."
      description: "You can now have column icon rules display multiple icons in a single column, side by side. There are two ways to do this, either specify multiple icons when creating the rule, or create multiple rules that match the same book and specify the icon type to be 'composed' for every rule. See Preferences->Look & Feel->Column icons for details."
    - title: "Kobo driver: Add support for new cover handling in Kobo Aura with updated firmware"
    - title: "Driver for Icarus Essence"
      tickets: [1226304] 
    - title: "Show a warning when attempting to copy books between libraries that do not have the same set of custom columns."
      tickets: [1225484]
    - title: "EPUB/AZW3 Output: Use shorthand forms for margin, padding and border CSS properties, where possible"
    - title: "Allow colons in identifier values, needed for using URIs as identifiers"
      tickets: [1224885]
    - title: "Comments editor: Allow treating arbitrary URLs as images"
    - title: "Show full path of library under mouse in status bar when switching/renaming/deleting libraries via the calibre library button."
      tickets: [1224925]
    - title: "DOCX Input: Add support for embedded EMF images that are just wrappers around an actual raster image."
      tickets: [1224849]
  bug fixes:
    - title: "Conversion: Fix font subsetting not working for large fonts with more than 4000 glyphs, such as CJK fonts"
    - title: "HTML Input: Fix a regression that broke processing of HTML files that contain meta tags with dc: namespaced attribute values."
    - title: "Fix switching to an empty virtual library not blanking the book details panel"
    - title: "Keep position when deleting tags in the tag editor"
      tickets: [1226093]
    - title: "Book details panel: Render custom comments fields the same as the builtin comments field. In particular this fixes problems caused by wide text and images in custom comments fields."
      tickets: [1226350]
    - title: "Metadata jackets: Do not error out when using a custom template with some custom fields that are not present."
      tickets: [1225357]
    - title: "AZW3 Output: Dont choke on invalid (undecodable) links in the input document"
    - title: "Cover grid: Respect the double click on library view tweak"
    - title: "Fix covers set by drag and drop or pasting in the edit metadata dialog showing compression artifacts due to aggressive jpeg compression"
    - title: "Conversion: Fix a bug that could cause incorrect border values to be used when cascading, shorthand border CSS is present."
    - title: "Fix regression in 1.3 that caused the book list to not track the current book when using Next/Previous in the edit metadata dialog."
  improved recipes:
    - Liberation
    - Politika
  new recipes:
    - title: Sage News 
      author: Brian Hahn
    - title: Il Cambiamento 
      author: ghib9
 - version: 1.3.0
  date: 2013-09-13
  new features:
    - title: "When doing searches or switching between virtual libraries in the main book list, preserve the current book. The currently selected book will remain visible if it is present in the results of the search or the selected virtual library."
      tickets: [1216713]
    - title: "Drivers for Oppo Find 5 and PocketBook Mini 515"
      tickets: [1223853]
  bug fixes:
    - title: "DOCX Input: Handle numbered paragraphs where the numbering is specified in the paragraph style, instead of on the paragraph directly. Also support the use of arbitrary, styled text for bullets."
    - title: "DOCX Input: Fix a single line break at the end of a paragraph not being rendered as a blank line."
    - title: "DOCX Input: Fix extra top/bottom margins around headings when the heading style in word does not specify any top/bottom margins."
    - title: "DOCX Input: Handle images in footnotes and endnotes."
      tickets: [1221686]
    - title: "ODT Input: Only treat the first image as a cover if it is of suitable size, instead of any image in the document."
      tickets: [1224157]
    - title: "Book polishing: Do not leave behind the old comments when updating metadata if the comments have been deleted in calibre."
    - title: "PDF Output: Fix non-breaking space characters incorrectly rendered in PDF outline."
      tickets: [1223862]
    - title: "Content server: Fix error in opds feed after using virtual libraries in the main server."
      tickets: [1222108]
    - title: "Do not scroll the book list horizontally after editing metadata."
      tickets: [1221552]
    - title: "New database backend: Handle databases that contain multiple tags/series/publishers/etc. that differ only in case."
      tickets: [1221545] 
  improved recipes:
    - Harvard Business Review
    - Jakarta Post 
    - Jakarta Globe
    - Dilema Veche
    - Daily Express
    - Anandtech
    - High Country News
  new recipes:
    - title: Caravan Magazine
      author: Kovid Goyal
    - title: Phys Org 
      author: Krittika Goyal
 - version: 1.2.0
  date: 2013-09-06
  new features:
    - title: "Conversion: Add support for the CSS3 rem font size unit"
    - title: "MTP devices, such as Android tablets/phones: Allow ignoring any folder on the device, not just top level folders. For newly connected devices, also scan /Android/data/com.amazon.kindle for books by default (newer versions of the Kindle app place downloaded files there)."
    - title: "Speed up sorting when the book list is showing a restricted set of books, such as when the results of a search are displayed or a virtual library is used."
      tickets: [1217622]
    - title: "Edit metadata dialog: Add an undo option for the Trim cover button."
      tickets: [1219227]
  bug fixes:
    - title: "Amazon metadata download: Update to handle website changes at amazon.com"
    - title: "PDF Output: Workaround a bug in the library calibre uses to render HTML to PDF that caused text in some documents that used small-caps fonts to not render correctly."
      tickets: [1216354]
    - title: "Kobo driver: When a sideloaded kepub is added from a Kobo device to the calibre library, it is added as an epub, but the file copied is corrupt."
      tickets: [1221035]
    - title: "Fix changing the user interface language in the welcome wizard causing some parts of the interface to remain in the old language until calibre is restarted."
      tickets: [1220767]
    - title: "Fix regression in 1.0 that broke setting author names with the | character in them."
      tickets: [1220348]
    - title: "Content server: When running from inside the main calibre program, do not restrict the books shown based on the current virtual library in the main program. If you wish to restrict the books shown in the content server, use Preferences->Sharing over the net."
    - title: "Output dates in the local timezone instead of UTC when generating CSV catalogs"
    - title: "Library maintenance: When doing a check library instead of dumping the database to SQL and restoring it, run a VACUUM. This works around various bugs in the dump and restore capabilities of apsw."
      tickets: [1217988] 
    - title: "Edit metadata dialog: Fix adding an image to an empty comments block not working"
    - title: "Conversion: Fix font declarations with invalid font-family values causing conversion to abort when subsetting is enabled."
    - title: "MOBI Output: Fix conversion of some super/sub scripts failling if they are the first or last element in a paragraph."
      tickets: [1218858]
    - title: "New database backend: Various improvements to make the backend more robust against databases with invalid/corrupt data in them."
      tickets: [1218465, 1218783]
  improved recipes:
    - Countryfile
 - version: 1.1.0
  date: 2013-08-30
  new features:
    - title: "Rewrite the HTML metadata parser to make it faster and more robust."
      tickets: [1217751]
    - title: "Book list: When sorting on a currently unsorted column, use the last applied sort for that column, instead of always sorting in ascending order."
      tickets: [1216714]
    - title: "PocketBook driver: Scan for books files in the entire device not just in the 'books' folder"
  bug fixes:
    - title: "Fix a regression in 1.0 that could cause the dates in custom date-type columns to change in some timezones when using the edit metadata dialog to make unrelated changes."
      tickets: [1217096]
    - title: "When replacing formats in a book with a very long title+authors on windows, calibre could leave behind the old format file, because the filename shortening algorithm has changed. Handle that case."
    - title: "Fix content server giving an error if you return to the top level page after using the virtual libraries."
      tickets: [1216838]
    - title: "Fix calibredb not updating the running calibre instance properly in 1.0"
      tickets: [1218177]
    - title: "Fix a regression in 1.0 that broke splitting of multiple valued field like tags into many items during a rename."
      tickets: [1216699]
    - title: "Fix a regression in 1.0 that caused an error when trying to set values for tags with the same item repeated, with different case."
      tickets: [1216398]
    - title: "Fix a regression that broke downloading news when output format is set to PDF"
    - title: "Creating a catalog with an already existing catalog in the library would cause a temporary duplicate entry in the book list. Also fix the author sort for catalogs generated in the AZW3 format not being correct."
    - title: "EPUB metadata: When changing the title in an EPUB 2.0 file that has multiple titles, remove the extra titles."
      tickets: [1211949]
    - title: "Fix a regression in 1.0 that caused Search and Replace in the bulk metadata edit dialog to be much slower than before"
    - title: "Fix a regression in 1.0 that caused incorrect sorting and searching on some composite columns (columns built from other columns)."
    - title: "Fix a regression in 1.0 that prevented the moving of libraries inside calibre"
      tickets: [1216401]
    - title: "Virtual Library tabs: If the user activates a hidden tab via the Virtual Library button, change the name of the All Books tab to reflect the hidden virtual library."
      tickets: [1216174]
    - title: "Ignore text records in the database that are damaged, instead of erroring out. Lets the rest of the data be used."
      tickets: [1215981]
    - title: "Fix regression that broke calibredb catalog when sorting on the id field."
      tickets: [1216090]
    - title: "HTML Input: Handle malformed OPF files when converting. "
      tickets: [1215924]
    - title: "Ensure that the Formats custom column (if present) is updated when a new format is created as a result of a conversion."
      tickets: [1215885]
    - title: "Fix a bug in 1.0 that broke the Check Library function on computers with non-English locales."
      tickets: [1215819]
    - title: "Content server: Fix blank username causing error on startup."
      tickets: [1215893]
    - title: "Fix sorting of book list by multi-valued fields like tags not correct in the new backend."
      tickets: [1215820]
  improved recipes:
    - Daily Mirror
  new recipes:
    - title: VFR Magazine 
      author: Krittika Goyal
 - version: 1.0.0
  date: 2013-08-23
  new features:
    - title: "A new 'cover grid' view of the books in your calibre library"
      description: "Excellent for judging your books by their covers :) To use click the button with the icon of a grid in the bottom right corner of the main window. It can be configured via Preferences->Look & Feel->Cover Grid"
      type: major
    - title: "A new, faster database backend"
      description: "The database backend in calibre has been re-written from scratch. The new code is smaller, more robust and much faster than the old code. The exact speedup will depend on the number of books and number and type of custom columns in your library. Users have reported calibre startup times decreasing by a factor of 2-3 times."
      type: major
    - title: "For a summary of the major changes in calibre between 0.9 and 1.0, see http://calibre-ebook.com/new-in/ten"
      type: major
    - title: "RTF Input: Add option to ignore WMF images iinstead of replacing them with a placeholder."
      tickets: [1213599]
    - title: "Content server: Make virtual libraries available as searches from the start page. They work just like saved searches, clicking on a virtual library will show you all the books in that virtual library."
  bug fixes:
    - title: "Remove extra, useless 'language' entry in metadata download configuration dialog"
    - title: "Kobo driver: Display device collections even if the driver is configured to not manage shelves on the device."
      tickets: [1214233]
    - title: "Fix typo in calibre.desktop file on linux"
      tickets: [1213664]
    - title: "Edit metadata dialog: Disable OK button while results are being downloaded."
      tickets: [1213397]
    - title: "In OS X 10.8 Apple stopped redirecting stdout/stderr to Console.app for applications launched by launch services. Re-enable the redirection, useful for debugging."
    - title: "Fix virtual library tabs not being updated when using VL button"
  improved recipes:
    - Consumerist
    - jeuxvideo
    - Metro UK
    - El Tribuno
    - High Country News
    - Daily Express
    - Providence Journal
    - mediapart
  new recipes:
    - title: News24 and Nuus24
      author: Nicki de Wet
 - version: 0.9.44
  date: 2013-08-16
  new features:
    - title: "Add an option to display all virtual libraries as tabs above the book list."
      description: "Convenient to quickly switch between virtual libraries. To enable, click the Virtual library button and select 'Show virtual libraries as tabs'.  You can re-arrange the tabs by drag and drop and close tabs you do not want. Right click on the tabs to restore closed tabs."
    - title: "An improved cover trimming algorithm to automatically detect and remove borders and extra space from the edge of cover images. To try it use the 'Trim' button in the edit metadata dialog. This can sometimes remove too much so if you dont like the result, just click cancel. You can make the algorithm more or less aggressive via Preferences->Tweaks"
    - title: "Allow customizing the comic metadata reader plugin via Preferences->Plugins to read the series index from either the volume or the issue number of the comic."
      tickets: [1211433]
    - title: "Linux MTP driver: Add ids for some newer devices."
      tickets: [1212458]
    - title: "Add a trim cover option to the bulk metadata edit dialog"
    - title: "Make the book information dialog user resizable, with a splitter between the cover and the info panel. Also change the background of the cover panel for books that have been marked using the Temp marker plugin."
      tickets: [1209057]
    - title: "Driver for Samsung Galaxy Young Android phone"
      tickets: [1212918]
  bug fixes:
    - title: "PDF Output: Do not abort conversion if the document being converted has an HTML cover (found in some broken EPUB files)."
    - title: "RTF Input: When converting RTF files with no codepage, use the input encoding setting as the codepage."
      tickets: [1163572]
  improved recipes:
    - The Independent
    - El Periodica de Aragon 
    - El Correo
  new recipes:
    - title: Daily Express 
      author: Dave Asbury
 - version: 0.9.43
  date: 2013-08-09
  new features:
    - title: "TXT Input: Allow using various markdown extensions for more features when converting markdown formatted txt files. See http://pythonhosted.org/Markdown/extensions/index.html for details."
    - title: "Sending by email: Allow sending by email to an arbitrary combination of email address. Access it via the 'Select recipients' menu entry in the Email To menu."
      tickets: [1207818]
    - title: "A new 'Sort By' action for the right click menu. This allows sorting on all columns in the library, not just the visible columns. To use it go to Preferences->Toolbars and add it to 'The context menu for books in the calibre library'"
    - title: "Allow adding images into the comments field, by clicking on the insert link button in the comments editor in the edit metadata dialog."
    - title: "Allow skipping the confirm bulk reconvert dialog"
    - title: "EPUB Input: If the EPUB file identifies an actual cover image in addition to the titlepage html file, use the cover image instead of rendering the titlepage. This is faster and has the advantage that an EPUB to EPUB conversion preserves internal cover structure."
    - title: "Get Books: Improve searching by removing punctuation from title/authors before matching."
  bug fixes:
    - title: "Conversion: Fix empty inline tags that are the second child of a paragraph causing text change location."
      tickets: [1207735]
    - title: "Fix book count in tooltip of choose library button not updating"
      ticket: [1208217]
    - title: "Kobo driver: When deleting shelves that have been synced, the Activity entry for the shelf was not being deleted. This left a tile for the shelf on the home screen of the Glo and AuraHD."
      tickets: [1208159]
    - title: "Comments editor: The Insert Link button has no effect until the user clicks inside the comments box, therefore disable it until it is ready, to prevent confusion."
      tickets: [1208073]
    - title: "Get Books: Update various Polish store plugins"
  improved recipes:
    - The Sunday Times UK and The Times Online
    - Telegraph UK
    - "Le Monde: Edition abonnés"
    - The Scotsman
  new recipes:
    - title: Various French news sources 
      author: Malah
    - title: La Capital de Rosario 
      author: Darko Miletic
    - title: Jot Down 
      author: desUbiKado
    - title: Private Eye 
      author: Martyn Pritchard
 - version: 0.9.42
  date: 2013-08-02
  new features:
    - title: "When downloading metadata from Amazon, convert the amazon categories into tags. You can turn this off by going to Preferences->Metadata download and configuring the Amazon source."
      tickets: [1206763]
    - title: "Kobo driver: Add an option to modify the styling in books being sent to the device, based on a template on the device."
      tickets: [1207151]
    - title: "Get Books: Add support for two more Polish ebook stores: cdp.pl and ebooki.allegro.pl"
    - title: "calibredb: Add a new clone command to create clones of libraries with the same custom columns, virtual libraries, etc. as the current library."
  bug fixes:
    - title: "MOBI metadata: Do not fail to set metadata in MOBI files if they have EXTH fields with NULL pointers to a cover or thumbnail."
      tickets: [1205757]
    - title: "Fix editing of book metadata failing when its timestamp is out of range for the system."
      tickets: [1191599]
    - title: "Fix renaming a user category to the same name it already has erases the user category."
      tickets: [1207131]
    - title: "Fix drag 'n drop of cover onto conversion dialog not working"
    - title: "Device drivers: Explicitly fsync() all files when writing to devices, to reduce chances of file corruption if the device is disconnected while jobs are running"
    - title: "Fix calibre not appearing in Ubuntu's 'Open with..' menu"
      tickets: [1207518]
  improved recipes:
    - PC World
 - version: 0.9.41
  date: 2013-07-27
  new features:
    - title: "Add a button to clear the current virtual library easily"
    - title: "Driver for Surftab Ventos"
      tickets: [1204885]
    - title: "Ebook-viewer: Allow re-ordering bookmarks in the bookmarks manager by drag and drop."
  bug fixes:
    - title: "DOCX Input: Fix conversion breaking for files that use heading style paragraphs to insert line rules"
    - title: "Content server: Fix last search query not being fully sanitized in results page"
      tickets: [1205385]
    - title: "Book polishing: Fix page margins being removed if an unused font was found during subsetting of embedded fonts."
    - title: "PDF Output: Do not error out when the input document uses a font that cannot be subset, such as the Symbol font. Instead print a warning and embed the full font."
      tickets: [1203449]
    - title: "Conversion: Fix a regression in the last release that broke conversion of a few files with comments just before a chapter start."
      tickets: [1188635]
  improved recipes:
    - Something Awful
    - Spektrum der Wissenschaft
    - mediapart.fr
    - Dilbert 
    - Antyweb
    - Scientific American
    - taz.de (RSS)
  new recipes:
    - title: Blindbuch and No names, No jackets
      author: Armin Geller
    - title: El Tribuno Salta and Jujuy 
      author: Darko Miletic
 - version: 0.9.40
  date: 2013-07-19
  new features:
    - title: "EPUB Output: Add an option to insert an inline Table of Contents into the main text."
      tickets: [1201006]
    - title: "Driver for LG Android phone"
      tickets: [1202013]
    - title: "When matching books in the library against the device manually, pre-fill the search field with the book title"
      tickets: [1200826]
  bug fixes:
    - title: "PDF Input: Fix a regression that caused some images to be flipped when converting PDF files that use image rotation operators."
      tickets: [1201083]
    - title: "Fix regression that caused incorrect font size in dropcaps generated by the DOCX input plugin"
    - title: "Get Books: Fix searching for title and author returning some extra matches, if the title starts with an article like the, a or an."
      tickets: [1200012]
    - title: "PDF Output: Fix extra blank page being inserted at the start of the chapter when converting some epub files from feedbooks"
    - title: "PDF Output: Workaround bug in WebKit's getBoundingClientRect() method that could cause links to occasionally point to incorrect locations."
      tickets: [1202390]
    - title: "E-book viewer: Fix a bug that could cause the reported position to be incorrect immediately after opening a previously opened book. This also fixes the Back button not working if a link is clicked on the page immediately after opening the book."
    - title: "Fix memory card not being detected for Elonex 621 on Windows"
    - title: "Fix regression in last release that broke auto-conversion of ebooks when sending to device/sending by email."
      tickets: [1200864]
    - title: "Get Books: Update amazon plugins for website changes"
    - title: "Allow using non-ascii chars in email passwords."
      tickets: [1202825]
  improved recipes:
    - Galaxy's Edge
  new recipes:
    - title: Il Foglio 
      author: faber1971
    - title: Le Monde Diplomatique and Acrimed
      author: Gaetan Lehmann
 - version: 0.9.39
  date: 2013-07-12
  new features:
    - title: "Bulk metadata edit: Add a checkbox to prevent the refreshing of the book list after the bulk edit. This means that the book list will not be resorted and any existing search/virtual library will not be refreshed.  Useful if you have a large library as the refresh can be slow."
    - title: "Allow manually marking a book in the calibre library as being on the device. To do so click the device icon in calibre, then right click on the book you want marked and choose 'Match book to library'. Once you are done marking all the books, right click the device icon and choose 'Update cached metadata'"
    - title: "Driver for Coby Kyros MID1126"
      tickets: [1199410]
    - title: "When adding formats to an existing book, by right clicking the add books button, ask for confirmation if some formats will be overwritten."
    - title: "Add a tweak to restrict the list of output formats available in the conversion dialog. Go to Preferences->Tweaks to change it."
  bug fixes:
    - title: "Amazon metadata download: Update plugin to deal with the new amazon.com website"
    - title: "Edelweiss metadata download plugin: Workaround for advanced search being broken at the Edelweiss website."
    - title: "Invalid data in the device database on sony readers could cause errors when sorting device collections, ignore those errors."
    - title: "DOCX Input: Fix no page break being inserted before the last section."
      tickets: [1198414]
    - title: "Metadata download dialog: Have the OK button enabled in the results screen as well."
      tickets: [1198288]
    - title: "Get Books: Update empik store plugin"
  improved recipes:
    - Houston Chronicle
    - cracked.com
    - mediapart.fr
  new recipes:
    - title: Glenn Brenwald and Ludwig von Mises Institute 
      author: anywho
 - version: 0.9.38
  date: 2013-07-05
  new features:
    - title: "Book polishing: Add option to embed all referenced fonts when polishing books using the 'Polish Books' tool."
      tickets: [1196038]
    - title: "DOCX Input: Add support for clickable (hyperlinked) images"
      tickets: [1196728]
    - title: "DOCX Input: Insert page breaks at the start of every new section"
      tickets: [1196728]
    - title: "Drivers for Trekstor Pyrus Maxi and PocketBook Surfpad 2"
      tickets: [1196931, 1182850]
    - title: "DOCX Input: Add support for horizontal rules created by typing three hyphens and pressing enter."
  bug fixes:
    - title: "Fix detection of SD Card in some PRS-T2N devices"
      tickets: [1197970]
    - title: "MOBI Input: Fix a regression that broke parsing of MOBI files with malformed markup that also used entities for apostrophes."
      ticket: [1197585]
    - title: "Get Books: Update Woblink store plugin"
    - title: "Metadata download dialog: Prevent the buttons from being re-ordered when the Next button is clicked."
    - title: "PDF Output: Fix links that point to URLs with query parameters being mangled by the conversion process."
      tickets: [1197006]
    - title: "DOCX Input: Fix links pointing to locations in the same document that contain multiple, redundant bookmarks not working."
    - title: "EPUB/AZW3 Output: Fix splitting on page-break-after with plain text immediately following the split point causing the text to be added before rather than after the split point."
      tickets: [1196728]
    - title: "DOCX Input: handle bookmarks defined at the paragraph level"
      tickets: [1196728]
    - title: "DOCX Input: Handle hyperlinks created as fields"
      tickets: [1196728]
  improved recipes:
    - iprofessional
  new recipes:
    - title: Democracy Now 
      author: Antoine Beaupre
 - version: 0.9.37
  date: 2013-06-28
  new features:
    - title: "Conversion: Add option to embed all referenced fonts"
      type: major
      description: "Add an option to embed all fonts that are referenced in the input document but are not already embedded. This will search your system for the referenced font, and if found, the font will be embedded.  Only works if the output format supports font embedding (for example: EPUB or AZW3). The option is under the Look & Feel section of the conversion dialog."
    - title: "ToC Editor: When generating a ToC from files, if the file has no text, do not skip it. Instead create an entry using the filename of the file."
    - title: "AZW3 Input: Add support for the page-progression-direction that is used to indicate page turns should happen from right to left. The attribute is passed into EPUB when converting."
      tickets: [1194766]
    - title: "ebook-convert: Add a --from-opf option to read metadata from OPF files directly, instead of having to run ebook-meta --from-opf after conversion"
  bug fixes:
    - title: "PDF Output: Fix Table of Contents being added to the end of the PDF even without the Add Table of Contents option being enabled."
      tickets: [1194836]
    - title: "When auto-merging books on add, also merge identifiers."
    - title: "Fix an error when using the Template Editor to create a template that uses custom columns."
      tickets: [1193763]
    - title: "LRF Output: Fix &quot; entities in attribute values causing problems"
    - title: "News download: Apply the default page margin conversion settings. Also, when converting to PDF, apply the pdf conversion defaults."
      tickets: [1193912]
    - title: "Fix a regression that broke scanning for books on all devices that used the Aluratek Color driver."
      tickets: [1192940]
    - title: "fetch-ebbok-metadata: Fix --opf argument erroneously requiring a value"
    - title: "When waiting before sending email, log the wait."
      tickets: [1195173]
  improved recipes:
    - taz.de (RSS)
    - Miradas al sur
    - Frontline
    - La Nacion (Costa Rica)
 - version: 0.9.36
  date: 2013-06-21
  new features:
    - title: "DOCX Input: Support for Table of Contents created using the Word Table of Contents tool. calibre now first looks for such a Table of Contents and only if one is not found does it generate a ToC from headings."
    - title: "DOCX Input: Add support for images used as bullets in lists"
    - title: "DOCX Input: If a large image that looks like a cover is present at the start of the document, remove it and use it as the cover of the output ebook. This can be turned off under the DOCX Input section of the conversion dialog."
    - title: "When dropping files onto the Book Details panel, ask for confirmation before adding the files to the book. The confirmation can be disabled."
    - title: "News download: Add the 'downloaded from' link at the bottom of every article when using a touchscreen output profile (like the Tablet profile)."
    - title: "E-book viewer: Change the bookmark button to always popup a menu when clicked, makes accessing existing bookmarks easier."
    - title: "After a bulk metadata download, focus the review button on the popup notification, instead of the OK button."
      tickets: [1190931]
  bug fixes:
    - title: "DOCX Input: Hide text that has been marked as not being visible in the web view in Word."
    - title: "DOCX Input: When converting docx files with large numbers of unnamed images, do not crash on windows."
      tickets: [1191354]
    - title: "DOCX Input: Add support for the Word setting 'No space between paragraphs with the same style'."
      tickets: [119100]
    - title: "MOBI Output: Fix rendering of SVG images that embed large raster images in 64bit calibre installs."
      tickets: [1191020]
    - title: "HTMLZ Output: Fix handling of images with URL unsafe filenames."
      tickets: [1192687]
    - title: "Fix unable to change the case of a previously used search because of the search history."
    - title: "When searching allow use of uppercase location names, such as AUTHOR instead of author, automatically lowercasing them."
      tickets: [1192785]
    - title: "DOCX metadata: When reading covers from DOCX files use the first image as specified in the actual markup instead of just the first image in the container."
    - title: "Kobo driver: Fix a regression when deleting empty shelves on Kobo devices with older firmware."
      tickets: [1192441]
    - title: "Do not show builtin plugins in the get new plugins dialog If a builtin plugin with the same name as a third party plugin exists, then the builtin plagin was displayed in the get new plugins dialog as installed (happened with the new DOCX Input plugin)."
    - title: "Apple driver: When in synchronous mode (direct to iBooks), disable PDF transfers, as we can't update metadata in iTunes. Not sure when this started, but as of iTunes 11.0.4 it's broken."
    - title: "Get Books: Fix error when using internal browser on some systems"
      tickets: [1191199]
  improved recipes:
    - The Walrus Mag
    - Various Polish news sources
  new recipes:
    - title: Various Polish news sources
      author: fenuks
 - version: 0.9.35
  date: 2013-06-14
--- a/README.md
+++ b/README.md
@ -24,3 +24,10 @@ Development
 A [tarball of the source code](http://status.calibre-ebook.com/dist/src) for the 
 current calibre release.
 Bugs
 ------
 Bug reports and feature requests should be made in the calibre bug tracker at [launchpad](https://bugs.launchpad.net/calibre).
 The GitHub bug tracker is only for people contributing code to calibre.
--- a/imgsrc/marked.svg
+++ b/imgsrc/marked.svg
@ -0,0 +1,162 @@
 <?xml version="1.0" encoding="UTF-8" standalone="no"?>
 <!-- Created with Inkscape (http://www.inkscape.org/) -->
 <svg
   xmlns:dc="http://purl.org/dc/elements/1.1/"
   xmlns:cc="http://creativecommons.org/ns#"
   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
   xmlns:svg="http://www.w3.org/2000/svg"
   xmlns="http://www.w3.org/2000/svg"
   xmlns:xlink="http://www.w3.org/1999/xlink"
   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
   width="128"
   height="128"
   id="svg2"
   version="1.1"
   inkscape:version="0.48.4 r9939"
   sodipodi:docname="marked.svg"
   inkscape:export-filename="/home/kovid/work/calibre/resources/images/marked.png"
   inkscape:export-xdpi="90"
   inkscape:export-ydpi="90">
  <title
     id="title3847">Pushpin Icon</title>
  <defs
     id="defs4">
    <linearGradient
       id="linearGradient3782">
      <stop
         style="stop-color:#000000;stop-opacity:1;"
         offset="0"
         id="stop3784" />
      <stop
         style="stop-color:#c3c3c0;stop-opacity:1;"
         offset="1"
         id="stop3786" />
    </linearGradient>
    <linearGradient
       inkscape:collect="always"
       xlink:href="#linearGradient3782"
       id="linearGradient3813"
       gradientUnits="userSpaceOnUse"
       gradientTransform="matrix(0.70710678,-0.70710678,0.70710678,0.70710678,-18.805519,996.21376)"
       x1="58"
       y1="91"
       x2="73"
       y2="91" />
    <filter
       id="filter3014"
       inkscape:label="Ridged border"
       inkscape:menu="Bevels"
       inkscape:menu-tooltip="Ridged border with inner bevel"
       color-interpolation-filters="sRGB">
      <feMorphology
         id="feMorphology3016"
         radius="4.3"
         in="SourceAlpha"
         result="result91" />
      <feComposite
         id="feComposite3018"
         in2="result91"
         operator="out"
         in="SourceGraphic" />
      <feGaussianBlur
         id="feGaussianBlur3020"
         result="result0"
         stdDeviation="1.2" />
      <feDiffuseLighting
         id="feDiffuseLighting3022"
         diffuseConstant="1"
         result="result92">
        <feDistantLight
           id="feDistantLight3024"
           elevation="66"
           azimuth="225" />
      </feDiffuseLighting>
      <feBlend
         id="feBlend3026"
         in2="SourceGraphic"
         mode="multiply"
         result="result93" />
      <feComposite
         id="feComposite3028"
         in2="SourceAlpha"
         operator="in" />
    </filter>
  </defs>
  <sodipodi:namedview
     id="base"
     pagecolor="#ffffff"
     bordercolor="#666666"
     borderopacity="1.0"
     inkscape:pageopacity="0.0"
     inkscape:pageshadow="2"
     inkscape:zoom="5.6568542"
     inkscape:cx="30.580486"
     inkscape:cy="63.624717"
     inkscape:document-units="px"
     inkscape:current-layer="layer1"
     showgrid="true"
     inkscape:snap-smooth-nodes="false"
     inkscape:window-width="1920"
     inkscape:window-height="1058"
     inkscape:window-x="0"
     inkscape:window-y="22"
     inkscape:window-maximized="0"
     inkscape:snap-bbox="false"
     inkscape:object-paths="true"
     inkscape:snap-midpoints="false"
     inkscape:snap-global="true">
    <inkscape:grid
       empspacing="5"
       visible="true"
       enabled="true"
       snapvisiblegridlinesonly="true"
       type="xygrid"
       id="grid2985" />
  </sodipodi:namedview>
  <metadata
     id="metadata7">
    <rdf:RDF>
      <cc:Work
         rdf:about="">
        <dc:format>image/svg+xml</dc:format>
        <dc:type
           rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
        <dc:title>Pushpin Icon</dc:title>
        <dc:creator>
          <cc:Agent>
            <dc:title>Kovid Goyal</dc:title>
          </cc:Agent>
        </dc:creator>
        <dc:rights>
          <cc:Agent>
            <dc:title>Public domain</dc:title>
          </cc:Agent>
        </dc:rights>
      </cc:Work>
    </rdf:RDF>
  </metadata>
  <g
     inkscape:label="Layer 1"
     inkscape:groupmode="layer"
     id="layer1"
     transform="translate(0,-924.36218)">
    <path
       style="fill:#f39509;fill-opacity:1;stroke:#7a6822;stroke-opacity:1;stroke-width:0;stroke-miterlimit:4;stroke-dasharray:none;filter:url(#filter3014)"
       d="m 1.9128912,974.70018 49.4974748,-49.49747 -7.071068,21.2132 31.819805,17.67767 24.433067,-3.85121 -63.639613,63.63963 3.851207,-24.43308 -17.677669,-31.81981 z"
       id="path3088"
       inkscape:connector-curvature="0"
       sodipodi:nodetypes="ccccccccc"
       inkscape:export-xdpi="90"
       inkscape:export-ydpi="90" />
    <path
       style="fill:url(#linearGradient3813);fill-opacity:1;stroke:none"
       d="M 63.925974,996.92087 120,1042.5389 74.532576,986.31427"
       id="path3097"
       inkscape:connector-curvature="0"
       sodipodi:nodetypes="ccc"
       inkscape:export-xdpi="90"
       inkscape:export-ydpi="90" />
  </g>
 </svg>
--- a/imgsrc/tweak.svg
+++ b/imgsrc/tweak.svg
--- a/manual/conversion.rst
+++ b/manual/conversion.rst
@ -537,25 +537,38 @@ Set the :guilabel:`Level 1 TOC` setting to ``//h:h2``. Then, for chapter two, |a
 How options are set/saved for Conversion
 -------------------------------------------
-There are two places where conversion options can be set in |app|. The first is in Preferences->Conversion. These
+There are two places where conversion options can be set in |app|. The first is
-settings are the defaults for the conversion options. Whenever you try to convert a new book, the settings set here
+in Preferences->Conversion. These settings are the defaults for the conversion
-will be used by default.
+options. Whenever you try to convert a new book, the settings set here will be
 used by default.
-You can also change settings in the conversion dialog for each book conversion. When you convert a book, |app| remembers the
+You can also change settings in the conversion dialog for each book conversion.
-settings you used for that book, so that if you convert it again, the saved settings for the individual book will take
+When you convert a book, |app| remembers the settings you used for that book,
-precedence over the defaults set in Preferences. You can restore the individual settings to defaults by using the Restore to defaults
+so that if you convert it again, the saved settings for the individual book
-button in the individual book conversion dialog.
+will take precedence over the defaults set in Preferences. You can restore the
 individual settings to defaults by using the Restore to defaults button in the
 individual book conversion dialog. You can remove the saved settings for a
 group of books by selecting all the books and then clicking the edit metadata
 button to bring up the bulk metadata edit dialog, near the bottom of the dialog
 is an option to remove stored conversion settings.
-When you Bulk Convert a set of books, settings are taken in the following order:
+When you Bulk Convert a set of books, settings are taken in the following order (last one wins):
    * From the defaults set in Preferences->Conversion 
-    * From the saved conversion settings for each book being converted (if any). This can be turned off by the option in the top left corner of the Bulk Conversion dialog.
+
    * From the saved conversion settings for each book being converted (if
      any). This can be turned off by the option in the top left corner of the
      Bulk Conversion dialog.  
    * From the settings set in the Bulk conversion dialog
-Note that the final settings for each book in a Bulk Conversion will be saved and re-used if the book is converted again. Since the
+Note that the final settings for each book in a Bulk Conversion will be saved
-highest priority in Bulk Conversion is given to the settings in the Bulk Conversion dialog, these will override any book specific
+and re-used if the book is converted again. Since the highest priority in Bulk
-settings. So you should only bulk convert books together that need similar settings. The exceptions are metadata and input format specific 
+Conversion is given to the settings in the Bulk Conversion dialog, these will
-settings. Since the Bulk Conversion dialog does not have settings for these two categories, they will be taken from book specific
+override any book specific settings. So you should only bulk convert books
 together that need similar settings. The exceptions are metadata and input
 format specific settings. Since the Bulk Conversion dialog does not have
 settings for these two categories, they will be taken from book specific
 settings (if any) or the defaults. 
 .. note::
@ -772,9 +785,11 @@ size. By default, |app| uses a page size defined by the current
 :guilabel:`Output profile`. So if your output profile is set to Kindle, |app|
 will create a PDF with page size suitable for viewing on the small kindle
 screen. However, if you view this PDF file on a computer screen, then it will
-appear to have too large fonts. To create "normal" sized PDFs, use the override
+appear to have too large fonts. To create "normal" sized PDFs, use the
-page size option under :guilabel:`PDF Output` in the conversion dialog.
+:guilabel:`Override page size` option under :guilabel:`PDF Output` in the conversion dialog.
 Headers and Footers
 ^^^^^^^^^^^^^^^^^^^^
 You can insert arbitrary headers and footers on each page of the PDF by
 specifying header and footer templates. Templates are just snippets of HTML
 code that get rendered in the header and footer locations. For example, to
@ -813,6 +828,9 @@ the page will be used.
    bottom margins to large enough values, under the Page Setup section of the
    conversion dialog.
 Printable Table of Contents
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 You can also insert a printable Table of Contents at the end of the PDF that
 lists the page numbers for every section. This is very useful if you intend to
 print out the PDF to paper. If you wish to use the PDF on an electronic device,
--- a/manual/creating_plugins.rst
+++ b/manual/creating_plugins.rst
@ -92,6 +92,11 @@ The first thing to note is that this zip file has a lot more files in it, explai
    **about.txt**
        A text file with information about the plugin
    **translations**
        A folder containing .mo files with the translations of the user
        interface of your plugin into different languages. See below for
        details.
 Now let's look at the code.
 __init__.py
@ -175,6 +180,42 @@ You can see the ``prefs`` object being used in main.py:
 .. literalinclude:: plugin_examples/interface_demo/main.py
    :pyobject: DemoDialog.config
 Adding translations to your plugin
 --------------------------------------
 You can have all the user interface strings in your plugin translated and
 displayed in whatever language is set for the main calibre user interface.
 The first step is to go through your plugin's source code and mark all user
 visible strings as translatable, by surrounding them in _(). For example::
    action_spec = (_('My plugin'), None, _('My plugin is cool'), None)
 Then use some program to generate .po files from your plugin source code. There
 should be one .po file for every language you want to translate into. For
 example: de.po for German, fr.po for French and so on. You can use the 
 `poedit <http://www.poedit.net/>`_ program for this.
 Send these .po files to your translators. Once you get them back, compile them
 into .mo files. You can again use poedit for that, or just do::
    calibre-debug -c "from calibre.translations.msgfmt import main; main()" filename.po
 Put the .mo files into the ``translations`` folder in your plugin.
 The last step is to simply call the function `load_translations()` at the top
 of your plugin's .py files. For performance reasons you should only call this
 function in those .py files that actually have translatable strings. So in a
 typical User Interface plugin you would call it at the top of ``ui.py`` but not
 ``__init__.py``.
 You can test the translations of your plugins by changing the user interface
 language in calibre under Preferences->Look & Feel or by running calibre like
 this::
    CALIBRE_OVERRIDE_LANG=de calibre
 Replace ``de`` with the language code of the language you want to test.
 The plugin API
 --------------------------------
--- a/manual/custom.py
+++ b/manual/custom.py
@ -83,7 +83,6 @@ def generate_calibredb_help(preamble, info):
    global_options = '\n'.join(render_options('calibredb', groups, False, False))
    lines, toc = [], []
    for cmd in COMMANDS:
        args = []
@ -99,7 +98,7 @@ def generate_calibredb_help(preamble, info):
        usage = [i for i in usage.replace('%prog', 'calibredb').splitlines()]
        cmdline = '    '+usage[0]
        usage = usage[1:]
-        usage = [i.replace(cmd, ':command:`%s`'%cmd) for i in usage]
+        usage = [re.sub(r'(%s)([^a-zA-Z0-9])'%cmd, r':command:`\1`\2', i) for i in usage]
        lines += ['.. code-block:: none', '', cmdline, '']
        lines += usage
        groups = [(None, None, parser.option_list)]
@ -152,7 +151,6 @@ def generate_ebook_convert_help(preamble, info):
        prog = 'ebook-convert-'+(pl.name.lower().replace(' ', '-'))
        raw += '\n\n' + '\n'.join(render_options(prog, groups, False, True))
    update_cli_doc(os.path.join('cli', 'ebook-convert.rst'), raw, info)
 def update_cli_doc(path, raw, info):
@ -200,7 +198,8 @@ def cli_docs(app):
    for script in entry_points['console_scripts'] + entry_points['gui_scripts']:
        module = script[script.index('=')+1:script.index(':')].strip()
        cmd = script[:script.index('=')].strip()
-        if cmd in ('calibre-complete', 'calibre-parallel'): continue
+        if cmd in ('calibre-complete', 'calibre-parallel'):
            continue
        module = __import__(module, fromlist=[module.split('.')[-1]])
        if hasattr(module, 'option_parser'):
            documented_cmds.append((cmd, getattr(module, 'option_parser')()))
@ -260,3 +259,4 @@ def setup(app):
 def finished(app, exception):
    pass
--- a/manual/customize.rst
+++ b/manual/customize.rst
@ -30,10 +30,13 @@ Environment variables
    * ``CALIBRE_OVERRIDE_DATABASE_PATH`` - allows you to specify the full path to metadata.db. Using this variable you can have metadata.db be in a location other than the library folder. Useful if your library folder is on a networked drive that does not support file locking.
    * ``CALIBRE_DEVELOP_FROM`` - Used to run from a calibre development environment. See :ref:`develop`.
    * ``CALIBRE_OVERRIDE_LANG`` - Used to force the language used by the interface (ISO 639 language code)
-    * ``CALIBRE_NO_NATIVE_FILEDIALOGS`` - Causes calibre to not use native file dialogs for selecting files/directories.
+    * ``CALIBRE_NO_NATIVE_FILEDIALOGS`` - Causes calibre to not use native file dialogs for selecting files/directories. Set it to 1 to enable.
    * ``SYSFS_PATH`` - Use if sysfs is mounted somewhere other than /sys
    * ``http_proxy`` - Used on linux to specify an HTTP proxy
 See `How to set environment variables in windows <http://www.computerhope.com/issues/ch000549.htm>`_ or
 `How to set environment variables in OS X <http://blog.dowdandassociates.com/content/howto-set-an-environment-variable-in-mac-os-x-home-slash-dot-macosx-slash-environment-dot-plist/>`_.
 Tweaks
 ------------
@ -46,17 +49,31 @@ The default values for the tweaks are reproduced below
 Overriding icons, templates, et cetera
 ----------------------------------------
-|app| allows you to override the static resources, like icons, templates, javascript, etc. with customized versions that you like.
+|app| allows you to override the static resources, like icons, javascript and
-All static resources are stored in the resources sub-folder of the calibre install location. On Windows, this is usually
+templates for the metadata jacket, catalogs, etc. with customized versions that
-:file:`C:/Program Files/Calibre2/resources`. On OS X, :file:`/Applications/calibre.app/Contents/Resources/resources/`. On linux, if you are using the binary installer
+you like.  All static resources are stored in the resources sub-folder of the
-from the calibre website it will be :file:`/opt/calibre/resources`. These paths can change depending on where you choose to install |app|. 
+calibre install location. On Windows, this is usually :file:`C:/Program Files/Calibre2/resources`. 
 On OS X, :file:`/Applications/calibre.app/Contents/Resources/resources/`. On linux, if
 you are using the binary installer from the calibre website it will be
 :file:`/opt/calibre/resources`. These paths can change depending on where you
 choose to install |app|. 
-You should not change the files in this resources folder, as your changes will get overwritten the next time you update |app|. Instead, go to
+You should not change the files in this resources folder, as your changes will
-:guilabel:`Preferences->Advanced->Miscellaneous` and click :guilabel:`Open calibre configuration directory`. In this configuration directory, create a sub-folder called resources and place the files you want to override in it. Place the files in the appropriate sub folders, for example place images in :file:`resources/images`, etc. 
+get overwritten the next time you update |app|. Instead, go to
-|app| will automatically use your custom file in preference to the built-in one the next time it is started.
+:guilabel:`Preferences->Advanced->Miscellaneous` and click 
 :guilabel:`Open calibre configuration directory`. In this configuration directory, create a
 sub-folder called resources and place the files you want to override in it.
 Place the files in the appropriate sub folders, for example place images in
 :file:`resources/images`, etc. |app| will automatically use your custom file
 in preference to the built-in one the next time it is started.
-For example, if you wanted to change the icon for the :guilabel:`Remove books` action, you would first look in the built-in resources folder and see that the relevant file is
+For example, if you wanted to change the icon for the :guilabel:`Remove books`
-:file:`resources/images/trash.png`. Assuming you have an alternate icon in PNG format called :file:`mytrash.png` you would save it in the configuration directory as :file:`resources/images/trash.png`. All the icons used by the calibre user interface are in :file:`resources/images` and its sub-folders.
+action, you would first look in the built-in resources folder and see that the
 relevant file is :file:`resources/images/trash.png`. Assuming you have an
 alternate icon in PNG format called :file:`mytrash.png` you would save it in
 the configuration directory as :file:`resources/images/trash.png`. All the
 icons used by the calibre user interface are in :file:`resources/images` and
 its sub-folders.
 Customizing |app| with plugins
 --------------------------------
--- a/manual/develop.rst
+++ b/manual/develop.rst
@ -49,7 +49,7 @@ All the |app| python code is in the ``calibre`` package. This package contains t
        * Metadata reading, writing, and downloading is all in ``ebooks.metadata``
        * Conversion happens in a pipeline, for the structure of the pipeline,
          see :ref:`conversion-introduction`. The pipeline consists of an input
-          plugin, various transforms and an output plugin. The that code constructs
+          plugin, various transforms and an output plugin. The code that constructs
          and drives the pipeline is in :file:`plumber.py`. The pipeline works on a
          representation of an ebook that is like an unzipped epub, with
          manifest, spine, toc, guide, html content, etc. The
--- a/manual/faq.rst
+++ b/manual/faq.rst
@ -499,11 +499,17 @@ that allows you to create collections on your Kindle from the |app| metadata. It
 I am getting an error when I try to use |app| with my Kobo Touch/Glo/etc.?
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-The Kobo has very buggy firmware. Connecting to it has been known to fail at random. Certain combinations of motherboard, USB ports/cables/hubs can exacerbate this tendency to fail. If you are getting an error when connecting to your touch with |app| try the following, each of which has solved the problem for *some* |app| users.
+The Kobo has very buggy firmware. Connecting to it has been known to fail at
 random. Certain combinations of motherboard, USB ports/cables/hubs can
 exacerbate this tendency to fail. If you are getting an error when connecting
 to your touch with |app| try the following, each of which has solved the
 problem for *some* |app| users.
  * Connect the Kobo directly to your computer, not via USB Hub
  * Try a different USB cable and a different USB port on your computer
-  * Try a different computer (preferably an older model)
+  * Try a different computer, in particular the Kobo does not work well with
    some Windows XP machines. If you are on Windows XP, try a computer with a
    newer version of windows.
  * Try upgrading the firmware on your Kobo Touch to the latest
  * Try resetting the Kobo (sometimes this cures the problem for a little while, but then it re-appears, in which case you have to reset again and again)
  * Try only putting one or two books onto the Kobo at a time and do not keep large collections on the Kobo
@ -622,13 +628,29 @@ should fix by hand.
 The list of books in |app| is blank!
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-In order to understand why that happened, you have to understand what a |app| library is. At the most basic level, a |app| library is just a folder. Whenever you add a book to |app|, that book's files are copied into this folder (arranged into sub folders by author and title). Inside the |app| library folder, at the top level, you will see a file called metadata.db. This file is where |app| stores the metadata like title/author/rating/tags etc. for *every* book in your |app| library. The list of books that |app| displays is created by reading the contents of this metadata.db file.
+In order to understand why that happened, you have to understand what a |app|
 library is. At the most basic level, a |app| library is just a folder. Whenever
 you add a book to |app|, that book's files are copied into this folder
 (arranged into sub folders by author and title). Inside the |app| library
 folder, at the top level, you will see a file called metadata.db. This file is
 where |app| stores the metadata like title/author/rating/tags etc. for *every*
 book in your |app| library. The list of books that |app| displays is created by
 reading the contents of this metadata.db file.
 There can be two reasons why |app| is showing a empty list of books:
-  * Your |app| library folder changed its location. This can happen if it was on an external disk and the drive letter for that disk changed. Or if you accidentally moved the folder. In this case, |app| cannot find its library and so starts up with an empty library instead. To remedy this, do a right-click on the |app| icon in the |app| toolbar (it will say 0 books underneath it) and select Switch/create library. Click the little blue icon to select the new location of your |app| library and click OK.
+  * Your |app| library folder changed its location. This can happen if it was
    on an external disk and the drive letter for that disk changed. Or if you
    accidentally moved the folder. In this case, |app| cannot find its library
    and so starts up with an empty library instead. To remedy this, do a
    right-click on the |app| icon in the |app| toolbar and select Switch/create
    library. Click the little blue icon to select the new location of your
    |app| library and click OK.
-  * Your metadata.db file was deleted/corrupted. In this case, you can ask |app| to rebuild the metadata.db from its backups. Right click the |app| icon in the |app| toolbar (it will say 0 books underneath it) and select Library maintenance->Restore database. |app| will automatically rebuild metadata.db.
+  * Your metadata.db file was deleted/corrupted. In this case, you can ask
    |app| to rebuild the metadata.db from its backups. Right click the |app|
    icon in the |app| toolbar and select Library maintenance->Restore database.
    |app| will automatically rebuild metadata.db.
 I am getting errors with my calibre library on a networked drive/NAS?
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@ -701,7 +723,13 @@ Take your pick:
 Why does |app| show only some of my fonts on OS X?
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 
-|app| embeds fonts in ebook files it creates. Ebook files support embedding only TrueType (.ttf) fonts. Most fonts on OS X systems are in .dfont format, thus they cannot be embedded. |app| shows only TrueType fonts found on your system. You can obtain many TrueType fonts on the web. Simply download the .ttf files and add them to the Library/Fonts directory in your home directory.
+
 |app| embeds fonts in ebook files it creates. Ebook files support embedding
 only TrueType and OpenType (.ttf and .otf) fonts. Most fonts on OS X systems
 are in .dfont format, thus they cannot be embedded. |app| shows only TrueType
 and OpenType fonts found on your system. You can obtain many such fonts on the
 web. Simply download the .ttf/.otf files and add them to the Library/Fonts
 directory in your home directory.
 |app| is not starting on Windows?
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@ -763,6 +791,13 @@ There are several possible things I know of, that can cause this:
      that prevent 64-bit |app| from working properly. If you are using the 64-bit
      version of |app| try switching to the 32-bit version.
    * If the crashes happen specifically when you are using a file open dialog,
      like clicking on the Add Books button or the Save to Disk button, then
      you may have an issue with the windows file open dialogs on your
      computer. You can tell calibre to use its own file open dialogs by
      setting the environment variable ``CALIBRE_NO_NATIVE_FILEDIALOGS=1``.
      See `How to set environment variables in windows <http://www.computerhope.com/issues/ch000549.htm>`_.
 If none of the above apply to you, then there is some other program on your
 computer that is interfering with |app|. First reboot your computer in safe
 mode, to have as few running programs as possible, and see if the crashes still
@ -776,6 +811,31 @@ The only way to find the culprit is to eliminate the programs one by one and
 see which one is causing the issue. Basically, stop a program, run calibre,
 check for crashes. If they still happen, stop another program and repeat.
 Using the viewer or doing any conversions results in a permission denied error on windows
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 Something on your computer is preventing calibre from accessing its own
 temporary files. Most likely the permissions on your Temp folder are incorrect.
 Go to the folder file:`C:\\Users\\USERNAME\\AppData\\Local` in Windows
 Explorer and then right click on the file:`Temp` folder, select Properties and go to
 the Security tab. Make sure that your user account has full control for this
 folder.
 Some users have reported that running the following command in an Administrator
 Command Prompt fixed their permissions.  To get an Administrator Command Prompt
 search for cmd.exe in the start menu, then right click on the command prompt
 entry and select Run as Administrator. At the command prompt type the following
 command and press Enter::
    icacls "%appdata%\..\Local\Temp" /reset /T
 Alternately, you can run calibre as Administrator, but doing so will cause
 some functionality, such as drag and drop to not work.
 Finally, some users have reported that disabling UAC fixes the problem.
 |app| is not starting on OS X?
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@ -816,9 +876,10 @@ My antivirus program claims |app| is a virus/trojan?
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 The first thing to check is that you are downloading |app| from the official
-website: `<http://calibre-ebook.com/download>`_. |app| is a very popular program
+website: `<http://calibre-ebook.com/download>`_. Make sure you are clicking the
-and unscrupulous people try to setup websites offering it for download to fool
+download links on the left, not the advertisements on the right. |app| is a
-the unwary.
+very popular program and unscrupulous people try to setup websites offering it
 for download to fool the unwary.
 If you have the official download and your antivirus program is still claiming
 |app| is a virus, then, your antivirus program is wrong. Antivirus programs use
@ -880,10 +941,25 @@ Why doesn't |app| have an automatic update?
 For many reasons:
-  * *There is no need to update every week*. If you are happy with how |app| works turn off the update notification and be on your merry way. Check back to see if you want to update once a year or so.
+  * *There is no need to update every week*. If you are happy with how |app|
-  * Pre downloading the updates for all users in the background would require about 80TB of bandwidth *every week*. That costs thousands of dollars a month. And |app| is currently growing at 300,000 new users every month.
+    works turn off the update notification and be on your merry way. Check back
-  * If I implement a dialog that downloads the update and launches it, instead of going to the website as it does now, that would save the most ardent |app| updater, *at most five clicks a week*. There are far higher priority things to do in |app| development.
+    to see if you want to update once a year or so. There is a check box to
-  * If you really, really hate downloading |app| every week but still want to be up to the latest, I encourage you to run from source, which makes updating trivial. Instructions are :ref:`available here <develop>`.
+    turn off the update notification, on the update notification itself.
  * |app| downloads currently use `about 100TB of bandwidth a month
    <http://status.calibre-ebook.com/downloads>`_. Implementing automatic
    updates would greatly increase that and end up costing thousands of dollars
    a month, which someone has to pay. And |app| is currently growing at `half
    a million new installs a month <https://status.calibre-ebook.com>`_.
  * If I implement a dialog that downloads the update and launches it, instead
    of going to the website as it does now, that would save the most ardent
    |app| updater, *at most five clicks a week*. There are far higher priority
    things to do in |app| development.
  * If you really, really hate downloading |app| every week but still want to
    be up to the latest, I encourage you to run from source, which makes
    updating trivial. Instructions are :ref:`available here <develop>`.
 How is |app| licensed?
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
--- a/manual/gui.rst
+++ b/manual/gui.rst
@ -62,7 +62,13 @@ Add books
 The :guilabel:`Add books` action can read metadata from a wide variety of ebook formats. In addition, it tries to guess metadata from the filename.
 See the :ref:`config_filename_metadata` section, to learn how to configure this.
-To add an additional format for an existing book use the :ref:`edit_meta_information` action.
+To add an additional format for an existing book you can do any of three things:
    1. Drag and drop the file onto the book details panel on the right side of the main window
    2. Right click the Add books button and choose :guilabel`:Add files to selected books`.
    3. Click the red add books button in the top right area of the :guilabel:`Edit Metadata` dialog, accessed by the :ref:`edit_meta_information` action.
 .. _edit_meta_information:
@ -593,6 +599,8 @@ Calibre has several keyboard shortcuts to save you time and mouse movement. Thes
      - Toggle Book Details panel
    * - :kbd:`Alt+Shift+T`
      - Toggle Tag Browser
    * - :kbd:`Alt+Shift+G`
      - Toggle Cover Grid
    * - :kbd:`Alt+A`
      - Show books by the same author as the current book
    * - :kbd:`Alt+T`
--- a/manual/index.rst
+++ b/manual/index.rst
@ -38,6 +38,8 @@ Sections
   glossary
 .. REMOVE_IN_PDF
 The main |app| user interface
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
--- a/manual/regexp.rst
+++ b/manual/regexp.rst
@ -1,4 +1,3 @@
 .. include:: global.rst
 .. _regexptutorial:
@ -26,7 +25,7 @@ There are a few places |app| uses regular expressions. There's the Search & Repl
 What on earth *is* a regular expression?
 ------------------------------------------------
-A regular expression is a way to describe sets of strings. A single regular expression cat *match* a number of different strings. This is what makes regular expression so powerful -- they are a concise way of describing a potentially large number of variations.
+A regular expression is a way to describe sets of strings. A single regular expression can *match* a number of different strings. This is what makes regular expression so powerful -- they are a concise way of describing a potentially large number of variations.
 .. note:: I'm using string here in the sense it is used in programming languages: a string of one or more characters, characters including actual characters, numbers, punctuation and so-called whitespace (linebreaks, tabulators etc.). Please note that generally, uppercase and lowercase characters are not considered the same, thus "a" being a different character from "A" and so forth. In |app|, regular expressions are case insensitive in the search bar, but not in the conversion options. There's a way to make every regular expression case insensitive, but we'll discuss that later. It gets complicated because regular expressions allow for variations in the strings it matches, so one expression can match multiple strings, which is why people bother using them at all. More on that in a bit.
--- a/manual/resources/simple_donate_button.gif
+++ b/manual/resources/simple_donate_button.gif
--- a/manual/server.rst
+++ b/manual/server.rst
@ -104,7 +104,7 @@ Save this adapter as :file:`calibre-wsgi-adpater.py` somewhere your server will
 Let's suppose that we want to use WSGI in Apache. First enable WSGI in Apache by adding the following to :file:`httpd.conf`::
-    LoadModule proxy_module modules/mod_wsgi.so
+    LoadModule wsgi_module modules/mod_wsgi.so
 The exact technique for enabling the wsgi module will vary depending on your Apache installation. Once you have the proxy modules enabled, add the following rules to httpd.conf (or if you are using virtual hosts to the conf file for the virtual host in question::
--- a/manual/templates/layout.html
+++ b/manual/templates/layout.html
@ -16,16 +16,13 @@
        <div class="body">
            {% if not embedded %}
            <div id="ad-container" style="text-align:center">
-                <script type="text/javascript"><!--
+                <script async="async" src="http://pagead2.googlesyndication.com/pagead/js/adsbygoogle.js"></script>
-                    google_ad_client = "ca-pub-5939552585043235";
+                <ins class="adsbygoogle"
-                    /* User Manual horizontal */
+                    style="display:inline-block;width:728px;height:90px"
-                    google_ad_slot = "7580893187";
+                    data-ad-client="ca-pub-5939552585043235"
-                    google_ad_width = 728;
+                    data-ad-slot="7580893187"></ins>
-                    google_ad_height = 90;
+                <script>
-                    //-->
+                (adsbygoogle = window.adsbygoogle || []).push({});
                </script>
                <script type="text/javascript"
                src="http://pagead2.googlesyndication.com/pagead/show_ads.js">
                </script>
            </div>
            {% endif %}
@ -62,7 +59,7 @@
        <form action="https://www.paypal.com/cgi-bin/webscr" method="post" title="Contribute to support calibre development">
            <input type="hidden" name="cmd" value="_s-xclick" />
            <input type="hidden" name="hosted_button_id" value="AF4H3B8QVDG6N" />
-            <input type="image" src="http://manual.calibre-ebook.com/simple_donate_button.gif" border="0" name="submit" alt="Contribute to support calibre development" style="border:0pt" />
+            <input type="image" src="_static/simple_donate_button.gif" border="0" name="submit" alt="Contribute to support calibre development" style="border:0pt" />
            <img alt="" border="0" src="https://www.paypalobjects.com/en_GB/i/scr/pixel.gif" width="1" height="1" />
        </form>
        <hr/>
--- a/manual/virtual_libraries.rst
+++ b/manual/virtual_libraries.rst
@ -94,6 +94,13 @@ You can quickly use the current search as a temporary virtual library by
 clicking the :guilabel:`Virtual Library` button and choosing the
 :guilabel:`*current search` entry.
 You can display all available virtual libraries as tabs above the book list.
 This is particularly handy if you like switching between virtual libraries very
 often. Click the :guilabel:`Virtual Library` button and select :guilabel:`Show
 virtual libraries as tabs`. You can re-arrange the tabs by drag and drop and
 close ones you do not want to see. Closed tabs can be restored by
 right-clicking on the tab bar.
 Using additional restrictions
 -------------------------------
--- a/recipes/10minutos.recipe
+++ b/recipes/10minutos.recipe
@ -0,0 +1,50 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __author__ = '2013, Carlos Alves <carlosalves90@gmail.com>'
 '''
 10minutos.com.uy
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class General(BasicNewsRecipe):
    title                 = '10minutos'
    __author__            = 'Carlos Alves'
    description           = 'Noticias de Salto - Uruguay'
    tags = 'news, sports'
    language       = 'es_UY'
    timefmt        = '[%a, %d %b, %Y]'
    use_embedded_content  = False
    recursion             = 5
    encoding = 'utf8'
    remove_javascript = True
    no_stylesheets = True
    oldest_article        = 2
    max_articles_per_feed = 100
    keep_only_tags = [dict(name='div', attrs={'class':'post-content'})]
    remove_tags = [
             dict(name='div', attrs={'class':['hr', 'titlebar', 'navigation']}),
             dict(name='p', attrs={'class':'post-meta'}),
             dict(name=['object','link'])
                  ]
    extra_css = '''
                h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;}
                h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;}
                h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;}
                p {font-family:Arial,Helvetica,sans-serif;}
                '''
    feeds = [
           (u'Articulos', u'http://10minutos.com.uy/feed/')
        ]
    def get_cover_url(self):
        return 'http://10minutos.com.uy/a/img/logo.png'
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
        return soup
--- a/recipes/acrimed.recipe
+++ b/recipes/acrimed.recipe
@ -0,0 +1,30 @@
 # vim:fileencoding=utf-8
 from __future__ import unicode_literals
 __license__   = 'GPL v3'
 __copyright__ = '2012'
 '''
 acrimed.org
 '''
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 class Acrimed(BasicNewsRecipe):
    title                  = u'Acrimed'
    __author__             = 'Gaëtan Lehmann'
    oldest_article         = 30
    max_articles_per_feed  = 100
    auto_cleanup           = True
    auto_cleanup_keep      = '//div[@class="crayon article-chapo-4112 chapo"]'
    language               = 'fr'
    masthead_url           = 'http://www.acrimed.org/IMG/siteon0.gif'
    feeds                  = [(u'Acrimed', u'http://www.acrimed.org/spip.php?page=backend')]
    preprocess_regexps     = [
        (re.compile(r'<title>(.*) - Acrimed \| Action Critique M.*dias</title>'), lambda m: '<title>' + m.group(1) + '</title>'),
        (re.compile(r'<h2>(.*) - Acrimed \| Action Critique M.*dias</h2>'), lambda m: '<h2>' + m.group(1) + '</h2>')]
    extra_css              = """
                                .chapo{font-style:italic; margin: 1em 0 0.5em}
                            """
--- a/recipes/am730.recipe
+++ b/recipes/am730.recipe
@ -3,10 +3,10 @@ from __future__ import unicode_literals
 __license__   = 'GPL v3'
 __copyright__ = '2013, Eddie Lau'
 __Date__ = ''
 __HiResImg__ = True
 '''
 Change Log:
 2013/09/28 -- update due to website redesign, add cover
 2013/03/30 -- first version
 '''
@ -15,7 +15,7 @@ from calibre.utils.date import now as nowf
 import os, datetime, re
 from calibre.web.feeds.recipes import BasicNewsRecipe
 from contextlib import nested
-from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
+from calibre.ebooks.BeautifulSoup import BeautifulSoup
 from calibre.ebooks.metadata.opf2 import OPFCreator
 from calibre.ebooks.metadata.toc import TOC
 from calibre.ebooks.metadata import MetaInformation
@ -32,18 +32,17 @@ class AppleDaily(BasicNewsRecipe):
    encoding = 'utf-8'
    auto_cleanup = False
    remove_javascript = True
-    use_embedded_content   = False
+    use_embedded_content = False
    no_stylesheets = True
    description = 'http://www.am730.com.hk'
    category    = 'Chinese, News, Hong Kong'
    masthead_url = 'http://www.am730.com.hk/images/logo.jpg'
-
+    extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 20px; margin-bottom: 20px; max-height:70%;} div[id=articleHeader] {font-size:200%; text-align:left; font-weight:bold;} li {font-size:50%; margin-left:auto; margin-right:auto;}'
-    extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px; max-height:90%;} div[id=articleHeader] {font-size:200%; text-align:left; font-weight:bold;} photocaption {font-size:50%; margin-left:auto; margin-right:auto;}'
+    keep_only_tags = [dict(name='h2', attrs={'class':'printTopic'}),
-    keep_only_tags = [dict(name='div', attrs={'id':'articleHeader'}),
+                      dict(name='div', attrs={'id':'article_content'}),
-                      dict(name='div', attrs={'class':'thecontent wordsnap'}),
+                      dict(name='div', attrs={'id':'slider'})]
-                      dict(name='a', attrs={'class':'lightboximg'})]
+    remove_tags = [dict(name='img', attrs={'src':'images/am730_article_logo.jpg'}),
-    remove_tags = [dict(name='img', attrs={'src':'/images/am730_article_logo.jpg'}),
+                   dict(name='img', attrs={'src':'images/am_endmark.gif'})]
                   dict(name='img', attrs={'src':'/images/am_endmark.gif'})]
    def get_dtlocal(self):
        dt_utc = datetime.datetime.utcnow()
@ -84,6 +83,16 @@ class AppleDaily(BasicNewsRecipe):
    def get_weekday(self):
        return self.get_dtlocal().weekday()
    def get_cover_url(self):
        soup = self.index_to_soup('http://www.am730.com.hk')
        cover = 'http://www.am730.com.hk/' + soup.find(attrs={'id':'mini_news_img'}).find('img').get('src', False)
        br = BasicNewsRecipe.get_browser(self)
        try:
            br.open(cover)
        except:
            cover = None
        return cover
    def populate_article_metadata(self, article, soup, first):
        if first and hasattr(self, 'add_toc_thumbnail'):
            picdiv = soup.find('img')
@ -93,48 +102,17 @@ class AppleDaily(BasicNewsRecipe):
    def parse_index(self):
        feeds = []
        soup = self.index_to_soup('http://www.am730.com.hk/')
-        ul = soup.find(attrs={'class':'nav-section'})
+        optgroups = soup.findAll('optgroup')
-        sectionList = []
+        for optgroup in optgroups:
-        for li in ul.findAll('li'):
+            sectitle = optgroup.get('label')
-            a = 'http://www.am730.com.hk/' + li.find('a', href=True).get('href', False)
+            articles = []
-            title = li.find('a').get('title', False).strip()
+            for option in optgroup.findAll('option'):
-            sectionList.append((title, a))
+                articlelink = "http://www.am730.com.hk/" + option.get('value')
-        for title, url in sectionList:
+                title = option.string
-            articles = self.parse_section(url)
+                articles.append({'title': title, 'url': articlelink})
-            if articles:
+            feeds.append((sectitle, articles))
                feeds.append((title, articles))
        return feeds
    def parse_section(self, url):
        soup = self.index_to_soup(url)
        items = soup.findAll(attrs={'style':'padding-bottom: 15px;'})
        current_articles = []
        for item in items:
            a = item.find(attrs={'class':'t6 f14'}).find('a', href=True)
            articlelink = 'http://www.am730.com.hk/' + a.get('href', True)
            title = self.tag_to_string(a)
            description = self.tag_to_string(item.find(attrs={'class':'t3 f14'}))
            current_articles.append({'title': title, 'url': articlelink, 'description': description})
        return current_articles
    def preprocess_html(self, soup):
        multia = soup.findAll('a')
        for a in multia:
            if not (a == None):
                image = a.find('img')
                if not (image == None):
                    if __HiResImg__:
                        image['src'] = image.get('src').replace('/thumbs/', '/')
                    caption = image.get('alt')
                    tag = Tag(soup, "photo", [])
                    tag2 = Tag(soup, "photocaption", [])
                    tag.insert(0, image)
                    if not caption == None:
                        tag2.insert(0, caption)
                        tag.insert(1, tag2)
                    a.replaceWith(tag)
        return soup
    def create_opf(self, feeds, dir=None):
        if dir is None:
            dir = self.output_dir
@ -288,3 +266,4 @@ class AppleDaily(BasicNewsRecipe):
        with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
            opf.render(opf_file, ncx_file)
--- a/recipes/anandtech.recipe
+++ b/recipes/anandtech.recipe
@ -12,26 +12,30 @@ class anan(BasicNewsRecipe):
    title = 'Anandtech'
    description = 'comprehensive Hardware Tests'
-    __author__ = 'Oliver Niesner' # 2012-09-20 AGE: update
+    __author__ = 'Oliver Niesner, Armin Geller'  # 2013-09-07 AGE: update
    use_embedded_content   = False
    language = 'en'
    timefmt = ' [%d %b %Y]'
-    oldest_article        = 7 # 2012-09-20 AGE: update
+    oldest_article        = 7
    max_articles_per_feed = 40
    no_stylesheets = True
    remove_javascript = True
    encoding = 'utf-8'
-    cover_url = 'http://www.anandtech.com/content/images/globals/header_logo.png' # 2012-09-20 AGE: new
+    cover_url = 'http://www.anandtech.com/content/images/globals/header_logo.png'
-    masthead_url = 'http://www.anandtech.com/content/images/globals/printheader.png' # 2012-09-20 AGE: update
+    masthead_url = 'http://www.anandtech.com/content/images/globals/printheader.png'
    keep_only_tags = [
                       dict(name='section', attrs={'class':['main_cont']}),
                     ]
    remove_tags=[                                            # 2013-09-07 AGE: update
                  dict(name='div', attrs={'class':['print',  # logo
                                                    'breadcrumb_area noprint',
                                                    'fl-rt noprint',
                                                    'blog_top_right',]})
                 ]
-    remove_tags=[
+    feeds =  [('Anandtech', 'http://www.anandtech.com/rss/')]
                  dict(name='a', attrs={'class': 'bluebutton noprint'}),
                  dict(name='img', attrs={'alt': 'header'}),
                 ] # 2012-09-20 AGE: update
    feeds =  [ ('Anandtech', 'http://www.anandtech.com/rss/')]
    def print_version(self,url):
-        return url.replace('0Cshow0C', '0Cprint0C') # 2012-09-20 AGE: update
+        return url.replace("0Cshow0C", "0Cprint0C")  # 2013-09-07 AGE: update
--- a/recipes/antyweb.recipe
+++ b/recipes/antyweb.recipe
@ -21,21 +21,9 @@ class AntywebRecipe(BasicNewsRecipe):
    simultaneous_downloads = 3
    keep_only_tags =[]
-    keep_only_tags.append(dict(name = 'h1', attrs = { 'class' : 'mm-article-title'}))
+    keep_only_tags.append(dict(name = 'h1', attrs = { 'class' : 'entry-title '}))
-    keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'mm-article-content'}))
+    keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'news-content'}))
-
+    extra_css = '''body {font-family: verdana, arial, helvetica, geneva, sans-serif ;}'''
    remove_tags =[]
    remove_tags.append(dict(name = 'h2', attrs = {'class' : 'widgettitle'}))
    remove_tags.append(dict(name = 'img', attrs = {'class' : 'alignleft'}))
    remove_tags.append(dict(name = 'div', attrs = {'class' : 'float: right;margin-left:1em;margin-bottom: 0.5em;padding-bottom: 3px; width: 72px;'}))
    remove_tags.append(dict(name = 'img', attrs = {'src' : 'http://antyweb.pl/wp-content/uploads/2011/09/HOSTERSI_testy_pasek600x30.gif'}))
    remove_tags.append(dict(name = 'div', attrs = {'class' : 'podwpisowe'}))
    extra_css = '''
                    body {font-family: verdana, arial, helvetica, geneva, sans-serif ;}
                       '''
    feeds          = [
                            (u'Artykuly', u'feed://feeds.feedburner.com/Antyweb?format=xml'),
--- a/recipes/blind_buch_de.recipe
+++ b/recipes/blind_buch_de.recipe
@ -0,0 +1,63 @@
 #
 # Written:      July 2013
 # Last Edited:  2013-07-11
 # Version:      1.0
 # Last update:  2013-07-25
 #
 __license__   = 'GPL v3'
 __copyright__ = '2013, Armin Geller'
 '''
 Fetch blindenbuch.de
 '''
 from calibre.web.feeds.recipes import BasicNewsRecipe
 class AdvancedUserRecipe1303841067(BasicNewsRecipe):
    title                 = u'Blindbuch - Bücher neu entdecken'
    __author__            = 'Armin Geller'  # AGe 2013-07-11
    description           = u'Bücher blind präsentiert'
    publisher             = 'blindbuch.de'
    publication_type      = 'ebook news'
    tags                  = 'Bücher, Literatur, E-Books, Germany'
    timefmt               = ' [%a, %d %b %Y]'
    publication_type      = 'Feed'
    language              = 'de-DE'
    encoding              = 'utf-8'
    oldest_article        = 14
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    remove_javascript     = True
    conversion_options    = {'title'       : title,
                              'comments'    : description,
                              'tags'        : tags,
                              'language'    : language,
                              'publisher'   : publisher,
                              'authors'     : publisher,
                             }
    cover_url             = 'http://blindbuch.de/img/blindbuch_calibre.png'
    masthead_url          = 'http://www.blindbuch.de/img/Masterhead.JPG'
    extra_css = '''
                    h1{font-weight:bold;font-size:large;}
                    .post-meta {font-size: 1em;text-align: left; font-style: italic}
                '''
    keep_only_tags = [
                       dict(name='article')
                     ]
    remove_tags = [
                    dict(name='div', attrs={'class':['su-spoiler su-spoiler-style-1','post-comments comments',]}),
                    dict(name='span', attrs={'class':['post-comments comments',]}),
                    dict(name='div', attrs={'addthis':['title',]}),
                  ]
    feeds = [(u'Blindbuch', u'http://www.blindbuch.de/feed/')]
--- a/recipes/caravan_magazine.recipe
+++ b/recipes/caravan_magazine.recipe
@ -0,0 +1,92 @@
 import html5lib
 from lxml import etree
 from calibre.web.feeds.recipes import BasicNewsRecipe
 from calibre.utils.cleantext import clean_xml_chars
 def is_title(tag):
    return tag.name == 'h2' and tag.parent.name == 'div' and tag.parent['class'] == 'left-corner'
 class CaravanMagazine(BasicNewsRecipe):
    title = 'Caravan Magazine'
    __author__ = 'Kovid Goyal'
    description = 'An Indian Journal of politics and culture'
    language = 'en_IN'
    timefmt = ' [%b, %Y]'
    no_stylesheets = True
    keep_only_tags = [
        dict(name=is_title),
        dict(attrs={'class':['subhheading', 'authorndate', 'full-image-view', 'fullpage-body']}),
    ]
    remove_tags = [
        dict(attrs={'class':['share-with']}),
        dict(attrs={'class':lambda x: x and 'thumb-image-view' in x}),
    ]
    def preprocess_raw_html(self, raw_html, url):
        root = html5lib.parse(
            clean_xml_chars(raw_html), treebuilder='lxml',
            namespaceHTMLElements=False)
        for s in root.xpath('//script'):
            s.getparent().remove(s)
        return etree.tostring(root, encoding=unicode)
    def preprocess_html(self, soup):
        # Handle the image thumbnails
        for div in soup.findAll('div', attrs={'class':lambda x: x and x.startswith('show-image')}):
            if div['class'] == 'show-image':
                div.extract()
            else:
                div['style'] = 'page-break-inside:avoid'
        return soup
    # To parse artice toc
    def parse_index(self):
        raw = self.index_to_soup(
            'http://caravanmagazine.in/current-issue', raw=True)
        raw = raw.decode('utf-8')
        raw = self.preprocess_raw_html(raw, None)
        soup = self.index_to_soup(raw)
        a = soup.find('a', rel=lambda x:x and '[field_c_issues_image]' in x)
        if a is not None:
            self.cover_url = a['href']
        ci = soup.find(attrs={'class': 'current-issue-block'})
        current_section = 'Section'
        current_articles = []
        feeds = []
        for div in ci.findAll(
                attrs={'class': ['view-header', 'view-content']}):
            if div['class'] == 'view-header':
                if current_articles:
                    feeds.append((current_section, current_articles))
                current_section = self.tag_to_string(div).replace('paging_filter', '')
                current_articles = []
                self.log('Section:', current_section)
            else:
                for art in div.findAll('div', attrs={'class': lambda x: x and 'views-row' in x.split()}):
                    title = div.find(attrs={'class': 'views-field-title'})
                    if title is not None:
                        a = title.find('a', href=True)
                        if a is not None:
                            href = a['href']
                            if href.startswith('/'):
                                href = 'http://caravanmagazine.in' + href
                            article = {
                                'title': self.tag_to_string(title), 'url': href}
                            title.extract()
                            desc = self.tag_to_string(div).strip()
                            if desc:
                                article['description'] = desc
                            current_articles.append(article)
                            self.log('\t' + article['title'])
                            self.log('\t\t' + article['url'])
        if current_articles:
            feeds.append((current_section, current_articles))
        return feeds
--- a/recipes/carta.recipe
+++ b/recipes/carta.recipe
@ -12,7 +12,7 @@ class Carta(BasicNewsRecipe):
    title = u'Carta'
    description = 'News about electronic publishing'
-    __author__ = 'Oliver Niesner'
+    __author__ = 'Oliver Niesner' # AGe Update 2013-10-13
    use_embedded_content   = False
    timefmt = ' [%a %d %b %Y]'
    oldest_article = 7
@ -25,7 +25,7 @@ class Carta(BasicNewsRecipe):
-    remove_tags_after = [dict(name='p', attrs={'class':'tags-blog'})]
+    remove_tags_after = [dict(name='div', attrs={'id':'BlogContent'})] # AGe
    remove_tags = [dict(name='p', attrs={'class':'print'}),
                   dict(name='p', attrs={'class':'tags-blog'}),
--- a/recipes/carta_capital.recipe
+++ b/recipes/carta_capital.recipe
@ -1,23 +1,29 @@
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 from __future__ import (unicode_literals, division, absolute_import,
                        print_function)
 from calibre.web.feeds.news import BasicNewsRecipe
-class AdvancedUserRecipe1312361378(BasicNewsRecipe):
+class AdvancedUserRecipe1380852962(BasicNewsRecipe):
-    title          = u'Carta capital'
+    title          = u'Carta Capital'
-    __author__ = 'Pablo Aldama'
+    __author__ = 'Erico Lisboa'
    language = 'pt_BR'
-    oldest_article = 9
+    oldest_article = 15
    max_articles_per_feed = 100
    auto_cleanup = True
    use_embedded_content = False
-    feeds          = [(u'Politica', u'http://www.cartacapital.com.br/category/politica/feed')
+    feeds          = [(u'Pol\xedtica',
-                      ,(u'Economia', u'http://www.cartacapital.com.br/category/economia/feed')
+u'http://www.cartacapital.com.br/politica/politica/rss'), (u'Economia',
-                      ,(u'Cultura', u'http://www.cartacapital.com.br/category/cultura/feed')
+u'http://www.cartacapital.com.br/economia/economia/atom.xml'),
-                      ,(u'Internacional', u'http://www.cartacapital.com.br/category/internacional/feed')
+(u'Sociedade',
-                      ,(u'Saude', u'http://www.cartacapital.com.br/category/saude/feed')
+u'http://www.cartacapital.com.br/sociedade/sociedade/atom.xml'),
-                      ,(u'Sociedade', u'http://www.cartacapital.com.br/category/sociedade/feed')
+(u'Internacional',
-                      ,(u'Tecnologia', u'http://www.cartacapital.com.br/category/tecnologia/feed')
+u'http://www.cartacapital.com.br/internacional/internacional/atom.xml'),
-                      ,(u'Carta na escola', u'http://www.cartacapital.com.br/category/carta-na-escola/feed')
+(u'Tecnologia',
-                      ,(u'Carta fundamental', u'http://www.cartacapital.com.br/category/carta-fundamental/feed')
+u'http://www.cartacapital.com.br/tecnologia/tecnologia/atom.xml'),
-                      ,(u'Carta verde', u'http://www.cartacapital.com.br/category/carta-verde/feed')
+(u'Cultura',
-
+u'http://www.cartacapital.com.br/cultura/cultura/atom.xml'),
-]
+(u'Sa\xfade', u'http://www.cartacapital.com.br/saude/saude/atom.xml'),
-    def print_version(self, url):
+(u'Educa\xe7\xe3o',
-        return url + '/print'
+u'http://www.cartacapital.com.br/educacao/educacao/atom.xml')]
--- a/recipes/consumerist.recipe
+++ b/recipes/consumerist.recipe
@ -1,3 +1,5 @@
 ## Last Edit:  2013-08-23
 ## From:  Armin Geller
 __license__   = 'GPL v3'
 __copyright__ = '2010, NA'
 '''
@ -18,33 +20,30 @@ class Consumerist(BasicNewsRecipe):
    encoding              = 'utf-8'
    use_embedded_content  = False
    language              = 'en'
-    masthead_url          = 'http://consumerist.com/css/images/footer_man.gif'
+    masthead_url          = 'http://consumermediallc.files.wordpress.com/2013/02/consumerist.png'# AGe 2013-08-23
    extra_css             = '''
-	body{font-family: "Lucida Grande",Helvetica,Arial,sans-serif}
+                              body{font-family: "Lucida Grande",Helvetica,Arial,sans-serif}
-	img{margin-bottom: 1em}
+                              img{margin-bottom: 1em}
-	h1{font-family :Arial,Helvetica,sans-serif; font-size:x-large}
+                              h1{font-family :Arial,Helvetica,sans-serif; font-size:x-large}
-	h2{font-family :Arial,Helvetica,sans-serif; font-size:large}
+                              h2{font-family :Arial,Helvetica,sans-serif; font-size:large}
-	              '''
+                            '''
    conversion_options = {
-                          'comment'   : description
+                          'comment'   : description,
-                        , 'tags'      : category
+                          'tags'      : category,
-                        , 'publisher' : publisher
+                          'publisher' : publisher,
-                        , 'language'  : language
+                          'language'  : language,
-                        }
+                          }
    remove_attributes  = ['width','height']
    #keep_only_tags     = [dict(attrs={'class':['', 'category-breadcrumb']}),]
    remove_tags_before = dict(name='h2')
-    remove_tags = [
+    keep_only_tags     = dict(name='div', attrs={'class':['hfeed',]}) # AGe 2013-08-23
       #dict(name='iframe'),
       dict(name='div', attrs={'class':['e-comments', 'more-about', 'entry-tags']}),
       #dict(name='div', attrs={'id':['IEContainer', 'clickIncludeBox']}),
       #dict(name='ul', attrs={'class':'article-tools'}),
       #dict(name='ul', attrs={'class':'articleTools'}),
    ]
-    remove_tags_after  = dict(attrs={'class':'e-body'})
+    remove_tags        = [dict(name='div', attrs={'class':['navigation', # AGe 2013-08-23
                                         'wpcom-related-posts widget widget_related_posts', # AGe 2013-08-23
                                         'sharedaddy sd-like-enabled sd-sharing-enabled',]}), # AGe 2013-08-23
                          dict(name='div', attrs={'id':['comments',]}), # AGe 2013-08-23
                          ]
    feeds = [(u'Articles', u'http://consumerist.com/index.xml')]
--- a/recipes/countryfile.recipe
+++ b/recipes/countryfile.recipe
@ -20,27 +20,22 @@ class AdvancedUserRecipe1325006965(BasicNewsRecipe):
    #article_already_exists = False
    #feed_hash = ''
    def get_cover_url(self):
-            soup = self.index_to_soup('http://www.countryfile.com/magazine')
+        soup = self.index_to_soup('http://www.countryfile.com/magazine')
-            cov = soup.find(attrs={'class' : re.compile('imagecache imagecache-250px_wide')})#'width' : '160',
+        cov = soup.find(attrs={'class' : re.compile('imagecache imagecache-250px')})  # 'width' : '160',
            print '&&&&&&&& ',cov,' ***'
            cov=str(cov)
            #cov2 =  re.findall('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', cov)
            cov2 =  re.findall('/(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', cov)
-            cov2 = str(cov2)
+        cov=str(cov)
-            cov2= "http://www.countryfile.com"+cov2[2:len(cov2)-8]
+        cov=cov[10:]
-
+        cov=cov[:-135]
-            print '******** ',cov2,' ***'
+        br = browser()
-             # try to get cover - if can't get known cover
+        br.set_handle_redirect(False)
-            br = browser()
+        try:
-
+            br.open_novisit(cov)
-            br.set_handle_redirect(False)
+            cover_url = cov
-            try:
+        except:
-                br.open_novisit(cov2)
+            cover_url = 'http://www.countryfile.com/sites/default/files/imagecache/160px_wide/cover/2_1.jpg'
-                cover_url = cov2
+        return cover_url
-            except:
+    preprocess_regexps = [
-                  cover_url = 'http://www.countryfile.com/sites/default/files/imagecache/160px_wide/cover/2_1.jpg'
+        (re.compile(r' \| Countryfile.com', re.IGNORECASE | re.DOTALL), lambda match: '')]
            return cover_url
    remove_tags    = [
                             # dict(attrs={'class' : ['player']}),
@ -48,6 +43,5 @@ class AdvancedUserRecipe1325006965(BasicNewsRecipe):
    feeds          = [
    (u'Homepage', u'http://www.countryfile.com/rss/home'),
    (u'Country News', u'http://www.countryfile.com/rss/news'),
-            (u'Countryside', u'http://www.countryfile.com/rss/countryside'),
+    (u'Countryside', u'http://www.countryfile.com/rss/countryside'),
            ]
--- a/recipes/cracked_com.recipe
+++ b/recipes/cracked_com.recipe
@ -1,63 +1,51 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class Cracked(BasicNewsRecipe):
    title                 = u'Cracked.com'
    __author__            = 'UnWeave'
    language              = 'en'
    description           = "America's Only HumorSite since 1958"
    publisher             = 'Cracked'
    category              = 'comedy, lists'
    oldest_article        = 3 #days
    max_articles_per_feed = 100
    no_stylesheets        = True
    encoding              = 'ascii'
    remove_javascript     = True
    use_embedded_content  = False
-    feeds = [ (u'Articles', u'http://feeds.feedburner.com/CrackedRSS/') ]
+class Cracked(BasicNewsRecipe):
    title = u'Cracked.com'
    __author__ = 'UnWeave'
    language = 'en'
    description = "America's Only HumorSite since 1958"
    publisher = 'Cracked'
    category = 'comedy, lists'
    oldest_article = 3  # days
    max_articles_per_feed = 100
    no_stylesheets = True
    encoding = 'ascii'
    remove_javascript = True
    use_embedded_content = False
    # auto_cleanup = True
    feeds = [(u'Articles', u'http://feeds.feedburner.com/CrackedRSS/')]
    conversion_options = {
-                          'comment'   : description
+        'comment': description, 'tags': category, 'publisher': publisher, 'language': language
-                        , 'tags'      : category
+    }
                        , 'publisher' : publisher
                        , 'language'  : language
                        }
-    remove_tags_before = dict(id='PrimaryContent')
+    keep_only_tags = [dict(name='article', attrs={'class': 'module article dropShadowBottomCurved'}),
                        dict(name='article', attrs={'class': 'module blog dropShadowBottomCurved'})]
-    remove_tags_after = dict(name='div', attrs={'class':'shareBar'})
+    remove_tags = [
-
+        dict(name='section', attrs={'class': ['socialTools', 'quickFixModule']})]
    remove_tags = [ dict(name='div', attrs={'class':['social',
                                                     'FacebookLike',
                                                     'shareBar'
                                                     ]}),
                    dict(name='div', attrs={'id':['inline-share-buttons',
                                                  ]}),
                    dict(name='span', attrs={'class':['views',
                                                      'KonaFilter'
                                                      ]}),
                    #dict(name='img'),
                    ]
    def appendPage(self, soup, appendTag, position):
        # Check if article has multiple pages
-        pageNav = soup.find('nav', attrs={'class':'PaginationContent'})
+        pageNav = soup.find('nav', attrs={'class': 'PaginationContent'})
        if pageNav:
            # Check not at last page
-            nextPage = pageNav.find('a', attrs={'class':'next'})
+            nextPage = pageNav.find('a', attrs={'class': 'next'})
            if nextPage:
                nextPageURL = nextPage['href']
                nextPageSoup = self.index_to_soup(nextPageURL)
                # 8th <section> tag contains article content
-                nextPageContent = nextPageSoup.findAll('section')[7]
+                nextPageContent = nextPageSoup.findAll('article')[0]
                newPosition = len(nextPageContent.contents)
-                self.appendPage(nextPageSoup,nextPageContent,newPosition)
+                self.appendPage(nextPageSoup, nextPageContent, newPosition)
                nextPageContent.extract()
                pageNav.extract()
-                appendTag.insert(position,nextPageContent)
+                appendTag.insert(position, nextPageContent)
    def preprocess_html(self, soup):
        self.appendPage(soup, soup.body, 3)
        return soup
--- a/recipes/daily_express.recipe
+++ b/recipes/daily_express.recipe
@ -0,0 +1,88 @@
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 class AdvancedUserRecipe1376229553(BasicNewsRecipe):
    title          = u'Daily Express'
    __author__ = 'Dave Asbury'
    # 9-9-13 added article author and now use (re.compile(r'>[\w].+? News<'
    encoding    = 'utf-8'
    remove_empty_feeds = True
    #remove_javascript     = True
    no_stylesheets        = True
    oldest_article = 1
    max_articles_per_feed = 10
    #auto_cleanup = True
    compress_news_images = True
    compress_news_images_max_size = 30
    ignore_duplicate_articles = {'title', 'url'}
    masthead_url = 'http://cdn.images.dailyexpress.co.uk/img/page/express_logo.png'
    preprocess_regexps = [
                (re.compile(r'widget', re.IGNORECASE | re.DOTALL), lambda match: ''),
                        (re.compile(r'Related articles', re.IGNORECASE | re.DOTALL), lambda match: ''),
                        (re.compile(r'Add Your Comment<', re.IGNORECASE | re.DOTALL), lambda match: '<'),
                (re.compile(r'>More [\w].+?<', re.IGNORECASE), lambda match: '><'),
                                (re.compile(r'>[\w].+? News<', re.IGNORECASE), lambda match: '><'),
                                #(re.compile(r'Health News<', re.IGNORECASE | re.DOTALL), lambda match: '<'),
                        #(re.compile(r'Car News<', re.IGNORECASE | re.DOTALL), lambda match: '<'),
                        #(re.compile(r'TV & Radio News<', re.IGNORECASE | re.DOTALL), lambda match: '<'),
                        #(re.compile(r'Food & Recipe News<', re.IGNORECASE | re.DOTALL), lambda match: '<'),
                        #(re.compile(r'More City & Business<', re.IGNORECASE | re.DOTALL), lambda match: '<'),
                        #(re.compile(r'Travel News<', re.IGNORECASE | re.DOTALL), lambda match: '<'),
                        #(re.compile(r'Garden News<', re.IGNORECASE | re.DOTALL), lambda match: '<'),
                #(re.compile(r'Fashion & Beauty News<', re.IGNORECASE | re.DOTALL), lambda match: '<'),
                #(re.compile(r'More Personal Finance<', re.IGNORECASE | re.DOTALL), lambda match: '<'),
                                #(re.compile(r'<h3>More UK</h3>', re.IGNORECASE | re.DOTALL), lambda match: ''),
                ]
    remove_tags = [
                                dict(attrs={'class' : 'quote'}),
                #dict(attrs={'class' : 'author'}),
                                dict(name='footer'),
                                dict(attrs={'id' : 'header_addons'}),
                dict(attrs={'class' : 'hoverException'}),
                                dict(name='_li'),dict(name='li'),
                        dict(attrs={'class' : 'box related-articles clear'}),
                                dict(attrs={'class' : 'news-list'}),
                                dict(attrs={'class' : 'sponsored-section'}),
                                dict(attrs={'class' : 'pull-quote on-right'}),
                                dict(attrs={'class' : 'pull-quote on-left'}),
                             ]
    keep_only_tags = [
                dict(name='h1'),
                                dict(attrs={'class' : 'publish-info'}),
                                dict(name='h3', limit=2),
                                dict(attrs={'class' : 'clearfix hR new-style'}),
                             ]
    feeds          = [(u'UK News', u'http://www.express.co.uk/posts/rss/1/uk'),
                 (u'World News',u'http://www.express.co.uk/posts/rss/78/world'),
                         (u'Finance',u'http://www.express.co.uk/posts/rss/21/finance'),
                 (u'Sport',u'http://www.express.co.uk/posts/rss/65/sport'),
                 (u'Entertainment',u'http://www.express.co.uk/posts/rss/18/entertainment'),
                         (u'Lifestyle',u'http://www.express.co.uk/posts/rss/8/life&style'),
                 (u'Fun',u'http://www.express.co.uk/posts/rss/110/fun'),
                        ]
    def get_cover_url(self):
        soup = self.index_to_soup('http://www.express.co.uk/ourpaper/')
        cov = soup.find(attrs={'src' : re.compile('http://images.dailyexpress.co.uk/img/covers/')})
        cov=str(cov)
        cov2 =  re.findall('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', cov)
        cov=str(cov2)
        cov=cov[2:len(cov)-2]
        cover_url=cov
        return cover_url
    extra_css = '''
                    h1{font-weight:bold;font-size:175%;}
                    h2{font-weight:normal;font-size:75%;}
                    #p{font-size:14px;}
                    #body{font-size:14px;}
                    .photo-caption {display: block;margin-left: auto;margin-right: auto;width:100%;font-size:40%;}
                    .publish-info {font-size:50%;}
                    .photo img {display: block;margin-left: auto;margin-right: auto;width:100%;}
      '''
--- a/recipes/daily_mirror.recipe
+++ b/recipes/daily_mirror.recipe
@ -7,50 +7,50 @@ class AdvancedUserRecipe1306061239(BasicNewsRecipe):
    description = 'News as provided by The Daily Mirror -UK'
    __author__ = 'Dave Asbury'
-    # last updated 19/10/12
+    # last updated 27/8/13
    language = 'en_GB'
    #cover_url = 'http://yookeo.com/screens/m/i/mirror.co.uk.jpg'
    masthead_url = 'http://www.nmauk.co.uk/nma/images/daily_mirror.gif'
-
+    #recursions = 10
    compress_news_images = True
-    oldest_article = 1
+    compress_news_images_max_size = 30
-    max_articles_per_feed = 12
+    oldest_article = 1.5
    max_articles_per_feed = 10
    remove_empty_feeds = True
    remove_javascript     = True
    no_stylesheets = True
-    ignore_duplicate_articles = {'title'}
+    ignore_duplicate_articles = {'url'}
-   # auto_cleanup = True
+    #auto_cleanup = True
    #conversion_options = { 'linearize_tables' : True }
-
+    keep_only_tags = [dict(name='h1'),
    keep_only_tags = [         dict(name='h1'),
                         dict(name='div',attrs={'class' : 'lead-text'}),
-                         dict(name='div',attrs={'class' : 'styleGroup clearfix'}),
+                         dict(attrs={'class' : 'tools clearfix'}),
                         dict(name='div',attrs={'class' : 'widget relatedContents pictures widget-editable viziwyg-section-245 inpage-widget-158123'}),
                        # dict(name='figure',attrs={'class' : 'clearfix'}),
                         dict(name='div',attrs={'class' :'body '}),
-
+                 dict(name='div',attrs={'class' :'thumb'}),
                         dict(attrs={'img alt' : ['Perishers','Horace']}),
                 #dict(attrs={'class' : 'tmRow span-15-5 col-1 article-page'}),
       #dict(attrs={'class' : ['article-attr','byline append-1','published']}),
-       #dict(name='p'),
+       # dict(name='p'),
        ]
    remove_tags = [
-           dict(attrs={'class' : ['article sa-teaser type-opinion','image-gallery','gallery-caption']}),
+           dict(attrs={'class' : ['article sa-teaser type-opinion','last','gallery-caption','gallery-data','ir btn-fullscreen','avatar']}),  # ,'image-gallery'
           dict(attrs={'class' : 'comment'}),
           dict(name='title'),
           dict(name='ul',attrs={'class' :  'clearfix breadcrumbs '}),
           dict(name='ul',attrs={'id' : 'login-201109171215'}),
-           dict(name='div',attrs={'class' : ['inline-ad span-16 last','caption']}),#'widget navigation breadcrumb widget-editable viziwyg-section-198 inpage-widget-80721 span-17','image-credit'
+           #'widget navigation breadcrumb widget-editable viziwyg-section-198 inpage-widget-80721 span-17','image-credit'
           dict(name='div',attrs={'class' : ['inline-ad span-16 last','caption']}),
                    ]
    preprocess_regexps = [
        (re.compile(r'- mirror.co.uk', re.IGNORECASE | re.DOTALL), lambda match: '')]
    feeds          = [
        (u'News',u'http://www.mirror.co.uk/news/rss.xml'),
        (u'Sports',u'http://www.mirror.co.uk/sport/rss.xml'),
@ -63,26 +63,31 @@ class AdvancedUserRecipe1306061239(BasicNewsRecipe):
           # example of commented out feed not needed ,(u'Travel','http://www.mirror.co.uk/advice/travel/rss.xml')
  ]
    extra_css = '''
-                    h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
+                    h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:170%;}
-                    h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
+                    .article figure figcaption {display: block;margin-left: auto;margin-right: auto;
-                    p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+                    width:100%;font-family:Arial,Helvetica,sans-serif;font-size:40%;}
-                    body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
+
-		'''
+                    #h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;}
                    p{font-family:Arial,Helvetica,sans-serif;}
                    body{font-family:Helvetica,Arial,sans-serif;}
                    .article figure{display: block;margin-left: auto;margin-right: auto;width:100%;}
                    .lead-text p {font-size:150%}
    '''
    def get_cover_url(self):
        soup = self.index_to_soup('http://www.politicshome.com/uk/latest_frontpage.html')
-# look for the block containing the mirror button and url
+        # look for the block containing the mirror button and url
        cov = soup.find(attrs={'style' : 'background-image: url(http://www.politicshome.com/images/sources/source_frontpage_button_92.gif);'})
        cov2 = str(cov)
        cov2='http://www.politicshome.com'+cov2[9:-142]
-#cov2 now contains url of the page containing pic
+        # cov2 now contains url of the page containing pic
        soup = self.index_to_soup(cov2)
        cov = soup.find(attrs={'id' : 'large'})
        cov=str(cov)
        cov2 =  re.findall('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', cov)
        cov2 = str(cov2)
        cov2=cov2[2:len(cov2)-2]
-        #cov2 now is pic url, now  go back to original function
+        # cov2 now is pic url, now  go back to original function
        br = browser()
        br.set_handle_redirect(False)
        try:
--- a/recipes/democracy_now.recipe
+++ b/recipes/democracy_now.recipe
@ -0,0 +1,45 @@
 # vim:fileencoding=utf-8
 from calibre.web.feeds.news import BasicNewsRecipe
 class DemocracyNowRecipe(BasicNewsRecipe):
    title = u'Democracy now!'
    __author__ = u'Antoine Beaupré'
    description = 'A daily TV/radio news program, hosted by Amy Goodman and Juan Gonzalez, airing on over 1,100 stations, pioneering the largest community media collaboration in the United States.'  # noqa
    language = 'en'
    cover_url = 'http://www.democracynow.org/images/dn-logo-for-podcast.png'
    oldest_article = 1
    max_articles_per_feed = 10
    publication_type = 'magazine'
    auto_cleanup = False
    use_embedded_content = False
    no_stylesheets = True
    remove_javascript = True
    feeds = [
        (u'Daily news', u'http://www.democracynow.org/democracynow.rss')]
    keep_only_tags = [dict(name='div', attrs={'id': 'page'}), ]
    remove_tags = [dict(name='div', attrs={'id': 'topics_list'}),
                   dict(name='div', attrs={'id': 'header'}),
                   dict(name='div', attrs={'id': 'footer'}),
                   dict(name='div', attrs={'id': 'right'}),
                   dict(name='div', attrs={'id': 'left-panel'}),
                   dict(name='div', attrs={'id': 'top-video-content'}),
                   dict(name='div', attrs={'id': 'google-news-date'}),
                   dict(name='div', attrs={'id': 'story-donate'}),
                   dict(
                   name='div', attrs={'id': 'transcript-expand-collapse'}),
                   dict(name='span', attrs={'class': 'show-links'}),
                   dict(name='span', attrs={'class': 'storyNav'}),
                   dict(name='div', attrs={'class': 'headline_share'}),
                   dict(name='div', attrs={'class': 'mediaBar'}),
                   dict(name='div', attrs={'class': 'shareAndPrinterBar'}),
                   dict(name='div', attrs={'class': 'utility-navigation'}),
                   dict(name='div', attrs={'class': 'bottomContentNav'}),
                   dict(name='div', attrs={'class': 'recentShows'}),
                   dict(
                   name='div', attrs={'class': 'printer-and-transcript-links'}),
                   ]
--- a/recipes/diagonales.recipe
+++ b/recipes/diagonales.recipe
@ -1,72 +1,50 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
-__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2009-2013, Darko Miletic <darko.miletic at gmail.com>'
 '''
-elargentino.com
+diagonales.infonews.com
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import Tag
 class Diagonales(BasicNewsRecipe):
    title                 = 'Diagonales'
    __author__            = 'Darko Miletic'
-    description           = 'El nuevo diario de La Plata'
+    description           = 'Para estar bien informado sobre los temas de actualidad. Conoce sobre pais, economia, deportes, mundo, espectaculos, sociedad, entrevistas y tecnologia.'
-    publisher             = 'ElArgentino.com'
+    publisher             = 'INFOFIN S.A.'
    category              = 'news, politics, Argentina, La Plata'
    oldest_article        = 2
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    encoding              = 'utf-8'
-    language = 'es_AR'
+    language              = 'es_AR'
-
+    publication_type      = 'newspaper'
-    lang                  = 'es-AR'
+    delay                 = 1
-    direction             = 'ltr'
+    remove_empty_feeds    = True
    INDEX                 = 'http://www.elargentino.com/medios/122/Diagonales.html'
    extra_css             = ' .titulo{font-size: x-large; font-weight: bold} .volantaImp{font-size: small; font-weight: bold} '
-    html2lrf_options = [
+    conversion_options = {
-                          '--comment'  , description
+                          'comment'   : description
-                        , '--category' , category
+                        , 'tags'      : category
-                        , '--publisher', publisher
+                        , 'publisher' : publisher
-                        ]
+                        , 'language'  : language
-
+                        }
    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0cm; margin-top: 0em; margin-bottom: 0.5em} "'
    keep_only_tags = [dict(name='div', attrs={'class':'ContainerPop'})]
    remove_tags    = [dict(name='link')]
-    remove_tags = [dict(name='link')]
+    feeds = [
-
+               (u'Pais'        , u'http://diagonales.infonews.com/Highlights.aspx?ParentType=Section&ParentId=112&Content-Type=text/xml&ChannelDesc=Pa%C3%ADs')
-    feeds = [(u'Articulos', u'http://www.elargentino.com/Highlights.aspx?ParentType=Section&ParentId=122&Content-Type=text/xml&ChannelDesc=Diagonales')]
+              ,(u'Deportes'    , u'http://diagonales.infonews.com/Highlights.aspx?ParentType=Section&ParentId=106&Content-Type=text/xml&ChannelDesc=Deportes')
              ,(u'Economia'    , u'http://diagonales.infonews.com/Highlights.aspx?ParentType=Section&ParentId=107&Content-Type=text/xml&ChannelDesc=Econom%C3%ADa')
              ,(u'Sociedad'    , u'http://diagonales.infonews.com/Highlights.aspx?ParentType=Section&ParentId=109&Content-Type=text/xml&ChannelDesc=Sociedad')
              ,(u'Mundo'       , u'http://diagonales.infonews.com/Highlights.aspx?ParentType=Section&ParentId=113&Content-Type=text/xml&ChannelDesc=Mundo')
              ,(u'Espectaculos', u'http://diagonales.infonews.com/Highlights.aspx?ParentType=Section&ParentId=114&Content-Type=text/xml&ChannelDesc=Espect%C3%A1culos')
              ,(u'Entrevistas' , u'http://diagonales.infonews.com/Highlights.aspx?ParentType=Section&ParentId=115&Content-Type=text/xml&ChannelDesc=Entrevistas')
              ,(u'Tecnologia'  , u'http://diagonales.infonews.com/Highlights.aspx?ParentType=Section&ParentId=118&Content-Type=text/xml&ChannelDesc=Tecnolog%C3%ADa')
            ]
    def print_version(self, url):
        main, sep, article_part = url.partition('/nota-')
        article_id, rsep, rrest = article_part.partition('-')
-        return u'http://www.elargentino.com/Impresion.aspx?Id=' + article_id
+        return u'http://diagonales.infonews.com/Impresion.aspx?Id=' + article_id
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
        soup.html['lang'] = self.lang
        soup.html['dir' ] = self.direction
        mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
        mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
        soup.head.insert(0,mlang)
        soup.head.insert(1,mcharset)
        return soup
    def get_cover_url(self):
        cover_url = None
        soup = self.index_to_soup(self.INDEX)
        cover_item = soup.find('div',attrs={'class':'colder'})
        if cover_item:
           clean_url = self.image_url_processor(None,cover_item.div.img['src'])
           cover_url = 'http://www.elargentino.com' + clean_url + '&height=600'
        return cover_url
    def image_url_processor(self, baseurl, url):
        base, sep, rest = url.rpartition('?Id=')
        img, sep2, rrest = rest.partition('&')
        return base + sep + img
--- a/recipes/diario_el_pueblo.recipe
+++ b/recipes/diario_el_pueblo.recipe
@ -0,0 +1,51 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __author__ = '2013, Carlos Alves <carlosalves90@gmail.com>'
 '''
 diarioelpueblo.com.uy
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class General(BasicNewsRecipe):
    title                 = 'Diario El Pueblo'
    __author__            = 'Carlos Alves'
    description           = 'Noticias de Salto - Uruguay'
    tags = 'news, sports'
    language       = 'es_UY'
    timefmt        = '[%a, %d %b, %Y]'
    use_embedded_content  = False
    recursion             = 5
    encoding = 'utf8'
    remove_javascript = True
    no_stylesheets = True
    oldest_article        = 2
    max_articles_per_feed = 100
    keep_only_tags = [dict(name='div', attrs={'class':'post-alt blog'})]
    remove_tags = [
             dict(name='div', attrs={'class':['hr', 'titlebar', 'volver-arriba-right','navigation']}),
             dict(name='div', attrs={'id':'comment','id':'suckerfish','id':'crp_related'}),
             dict(name='h3', attrs={'class':['post_date']}),
             dict(name=['object','link'])
                  ]
    extra_css = '''
                h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;}
                h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;}
                h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;}
                p {font-family:Arial,Helvetica,sans-serif;}
                '''
    feeds = [
           (u'Articulos', u'http://www.diarioelpueblo.com.uy/feed')
        ]
    def get_cover_url(self):
        return 'http://www.diarioelpueblo.com.uy/wp-content/uploads/2013/06/Cabezal_Web1.jpg'
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
        return soup
--- a/recipes/diario_salto.recipe
+++ b/recipes/diario_salto.recipe
@ -0,0 +1,50 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __author__ = '2013, Carlos Alves <carlosalves90@gmail.com>'
 '''
 diarisalto.com.uy
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class General(BasicNewsRecipe):
    title                 = 'Diario Salto'
    __author__            = 'Carlos Alves'
    description           = 'Noticias de Salto - Uruguay'
    tags = 'news, sports'
    language       = 'es_UY'
    timefmt        = '[%a, %d %b, %Y]'
    use_embedded_content  = False
    recursion             = 5
    encoding = 'utf8'
    remove_javascript = True
    no_stylesheets = True
    oldest_article        = 2
    max_articles_per_feed = 100
    keep_only_tags = [dict(name='div', attrs={'class':'post'})]
    remove_tags = [
             dict(name='div', attrs={'class':['hr', 'titlebar', 'navigation']}),
             dict(name='div', attrs={'id':'comment'}),
             dict(name=['object','link'])
                  ]
    extra_css = '''
                h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;}
                h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;}
                h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;}
                p {font-family:Arial,Helvetica,sans-serif;}
                '''
    feeds = [
           (u'Articulos', u'http://www.diariosalto.com.uy/feed/atom')
        ]
    def get_cover_url(self):
        return 'http://diariosalto.com.uy/demo/wp-content/uploads/2011/12/diario-salto_logo-final-b-b.png'
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
        return soup
--- a/recipes/dilbert.recipe
+++ b/recipes/dilbert.recipe
@ -6,6 +6,7 @@ DrMerry added cover Image 2011-11-12
 '''
 from calibre.web.feeds.recipes import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import BeautifulSoup
 import re
 class DilbertBig(BasicNewsRecipe):
@ -16,7 +17,7 @@ class DilbertBig(BasicNewsRecipe):
    oldest_article         = 15
    max_articles_per_feed  = 100
    no_stylesheets         = True
-    use_embedded_content   = True
+    use_embedded_content   = False
    encoding               = 'utf-8'
    publisher              = 'UNITED FEATURE SYNDICATE, INC.'
    category               = 'comic'
@ -30,25 +31,14 @@ class DilbertBig(BasicNewsRecipe):
                            ,'publisher'       : publisher
                         }
-    feeds = [(u'Dilbert', u'http://feed.dilbert.com/dilbert/daily_strip' )]
+    feeds = [(u'Dilbert', u'http://feed.dilbert.com/dilbert/daily_strip')]
    def get_article_url(self, article):
        return article.get('feedburner_origlink', None)
    preprocess_regexps = [
        (re.compile('strip\..*\.gif', re.DOTALL|re.IGNORECASE), lambda match: 'strip.zoom.gif')
        ]
    def preprocess_html(self, soup):
-       for tag in soup.findAll(name='a'):
+        for tag in soup.findAll(name='input'):
-           if tag['href'].find('http://feedads') >= 0:
+            image = BeautifulSoup('<img src=' + tag['value'] + '></img>')
-              tag.extract()
+        return image
       return soup
    extra_css = '''
                    h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
                    h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
                    img {max-width:100%; min-width:100%;}
                    p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
                    body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
 		'''
--- a/recipes/dilemaveche.recipe
+++ b/recipes/dilemaveche.recipe
@ -6,46 +6,87 @@ __copyright__ = u'2011, Silviu Cotoar\u0103'
 '''
 dilemaveche.ro
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class DilemaVeche(BasicNewsRecipe):
-    title                 = u'Dilema Veche'
+    # apare vinerea, mai pe dupa-masa,depinde de Luiza cred (care se semneaza ca fiind creatorul fiecarui articol in feed-ul RSS)
-    __author__            = u'Silviu Cotoar\u0103'
+    title          = u'Dilema Veche'
-    description           = 'Sint vechi, domnule! (I.L. Caragiale)'
+    __author__            = 'song2'  # inspirat din scriptul pentru Le Monde. Inspired from the Le Monde script
-    publisher             = u'Adev\u0103rul Holding'
+    description           = '"Sint vechi, domnule!" (I.L. Caragiale)'
-    oldest_article        = 5
+    publisher             = 'Adevarul Holding'
-    language              = 'ro'
+    oldest_article        = 7
-    max_articles_per_feed = 100
+    max_articles_per_feed = 200
-    no_stylesheets        = True
+    encoding              = 'utf8'
-    use_embedded_content  = False
+    language = 'ro'
-    category              = 'Ziare'
+    masthead_url = 'http://www.dilemaveche.ro/sites/all/themes/dilema/theme/dilema_two/layouter/dilema_two_homepage/logo.png'
-    encoding              = 'utf-8'
+    publication_type = 'magazine'
-    cover_url             = 'http://dilemaveche.ro/sites/all/themes/dilema/theme/dilema_two/layouter/dilema_two_homepage/logo.png'
+    feeds = [
-
+                ('Editoriale si opinii - Situatiunea', 'http://www.dilemaveche.ro/taxonomy/term/37/0/feed'),
-    conversion_options = {
+                ('Editoriale si opinii - Pe ce lume traim', 'http://www.dilemaveche.ro/taxonomy/term/38/0/feed'),
-                             'comments'   : description
+                ('Editoriale si opinii - Bordeie si obiceie', 'http://www.dilemaveche.ro/taxonomy/term/44/0/feed'),
-                            ,'tags'       : category
+                ('Editoriale si opinii - Talc Show', 'http://www.dilemaveche.ro/taxonomy/term/44/0/feed'),
-                            ,'language'   : language
+                ('Tema saptamanii', 'http://www.dilemaveche.ro/taxonomy/term/19/0/feed'),
-							,'publisher'  : publisher
+                ('La zi in cultura - Dilema va recomanda', 'http://www.dilemaveche.ro/taxonomy/term/58/0/feed'),
-                         }
+                ('La zi in cultura - Carte', 'http://www.dilemaveche.ro/taxonomy/term/14/0/feed'),
-
+                ('La zi in cultura - Film', 'http://www.dilemaveche.ro/taxonomy/term/13/0/feed'),
-    keep_only_tags = [
+                ('La zi in cultura - Muzica', 'http://www.dilemaveche.ro/taxonomy/term/1341/0/feed'),
-            dict(name='div', attrs={'class':'c_left_column'})
+                ('La zi in cultura - Arte performative', 'http://www.dilemaveche.ro/taxonomy/term/1342/0/feed'),
-                     ]
+                ('La zi in cultura - Arte vizuale', 'http://www.dilemaveche.ro/taxonomy/term/1512/0/feed'),
-
+                ('Societate - Ieri cu vedere spre azi', 'http://www.dilemaveche.ro/taxonomy/term/15/0/feed'),
-    remove_tags = [
+                ('Societate - Din polul opus', 'http://www.dilemaveche.ro/taxonomy/term/41/0/feed'),
-             dict(name='div', attrs={'id':['adshop_widget_428x60']}) ,
+                ('Societate - Mass comedia', 'http://www.dilemaveche.ro/taxonomy/term/43/0/feed'),
-			 dict(name='div', attrs={'id':['gallery']})			 
+                ('Societate - La singular si la plural', 'http://www.dilemaveche.ro/taxonomy/term/42/0/feed'),
-                  ]
+                ('Oameni si idei - Educatie', 'http://www.dilemaveche.ro/taxonomy/term/46/0/feed'),
-
+                ('Oameni si idei - Polemici si dezbateri', 'http://www.dilemaveche.ro/taxonomy/term/48/0/feed'),
-    remove_tags_after = [
+                ('Oameni si idei - Stiinta si tehnologie', 'http://www.dilemaveche.ro/taxonomy/term/46/0/feed'),
-              dict(name='div', attrs={'id':['adshop_widget_428x60']})    
+                ('Dileme on-line', 'http://www.dilemaveche.ro/taxonomy/term/005/0/feed')
            ]
    feeds          = [
            (u'Feeds', u'http://dilemaveche.ro/rss.xml')
                 ]
    remove_tags_before = dict(name='div',attrs={'class':'spacer_10'})
    remove_tags = [
        dict(name='div', attrs={'class':'art_related_left'}),
        dict(name='div', attrs={'class':'controale'}),
                dict(name='div', attrs={'class':'simple_overlay'}),
    ]
    remove_tags_after = [dict(id='facebookLike')]
    remove_javascript = True
    no_stylesheets        = True
    remove_empty_feeds = True
    extra_css             = """
        body{font-family: Georgia,Times,serif }
        img{margin-bottom: 0.4em; display:block}
                            """
    needs_subscription = 'optional'
    cover_margins = (10, 15, '#ffffff')
    def get_browser(self):
        br = BasicNewsRecipe.get_browser(self)
        if self.username is not None and self.password is not None:
            br.open('http://dilemaveche.ro/user/login')
            br.select_form(nr=0)
            br['username'] = self.username
            br['password'] = self.password
            br.submit()
        return br
    def get_cover_url(self):
        cover_url = None
        soup = self.index_to_soup('http://dilemaveche.ro')
        link_item = soup.find('div',attrs={'class':'box_dr_pdf_picture'})
        if link_item and link_item.a:
            cover_url = link_item.a['href']
        br = BasicNewsRecipe.get_browser()
        try:
            br.open(cover_url)
        except:  # daca nu gaseste pdf-ul
            self.log("\nPDF indisponibil")
            link_item = soup.find('div',attrs={'class':'box_dr_pdf_picture'})
            if link_item and link_item.img:
                cover_url = link_item.img['src']
            br = BasicNewsRecipe.get_browser()
            try:
                br.open(cover_url)
            except:  # daca nu gaseste nici imaginea mica mica
                print('Mama lor de nenorociti! nu este nici pdf nici imagine')
                cover_url ='http://www.dilemaveche.ro/sites/all/themes/dilema/theme/dilema_two/layouter/dilema_two_homepage/logo.png'
        return cover_url
    def preprocess_html(self, soup):
        return self.adeify_images(soup)
--- a/recipes/dot_net.recipe
+++ b/recipes/dot_net.recipe
@ -11,30 +11,31 @@ class dotnetMagazine (BasicNewsRecipe):
    no_stylesheets        = True
    encoding              = 'utf8'
    use_embedded_content  = False
    auto_cleanup = True
    # recursion = 1
    language              = 'en'
    remove_empty_feeds    = True
    extra_css             = ' body{font-family: Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} '
    cover_url = u'http://media.netmagazine.futurecdn.net/sites/all/themes/netmag/logo.png'
-    remove_tags_after = dict(name='footer', id=lambda x:not x)
+    #remove_tags_after = dict(name='footer', id=lambda x:not x)
-    remove_tags_before = dict(name='header', id=lambda x:not x)
+    #remove_tags_before = dict(name='header', id=lambda x:not x)
-    remove_tags = [
+    #remove_tags = [
-         dict(name='div', attrs={'class': 'item-list'}),
+         #dict(name='div', attrs={'class': 'item-list'}),
-         dict(name='h4', attrs={'class': 'std-hdr'}),
+         #dict(name='h4', attrs={'class': 'std-hdr'}),
-         dict(name='div', attrs={'class': 'item-list share-links'}),  # removes share links
+         #dict(name='div', attrs={'class': 'item-list share-links'}),  # removes share links
-         dict(name=['script', 'noscript']),
+         #dict(name=['script', 'noscript']),
-         dict(name='div', attrs={'id': 'comments-form'}),  # comment these out if you want the comments to show
+         #dict(name='div', attrs={'id': 'comments-form'}),  # comment these out if you want the comments to show
-         dict(name='div', attrs={'id': re.compile('advertorial_block_($|| )')}),
+         #dict(name='div', attrs={'id': re.compile('advertorial_block_($|| )')}),
-         dict(name='div', attrs={'id': 'right-col'}),
+         #dict(name='div', attrs={'id': 'right-col'}),
-         dict(name='div', attrs={'id': 'comments'}),  # comment these out if you want the comments to show
+         #dict(name='div', attrs={'id': 'comments'}),  # comment these out if you want the comments to show
-         dict(name='div', attrs={'class': 'item-list related-content'}),
+         #dict(name='div', attrs={'class': 'item-list related-content'}),
-         ]
+         #]
    feeds = [
-               (u'net', u'http://feeds.feedburner.com/net/topstories?format=xml')
+               (u'net', u'http://feeds.feedburner.com/creativebloq/')
            ]
    def skip_ad_pages(self, soup):
--- a/recipes/el_correo.recipe
+++ b/recipes/el_correo.recipe
@ -3,10 +3,10 @@ __license__     = 'GPL v3'
 __copyright__   = '08 Januery 2011, desUBIKado'
 __author__      = 'desUBIKado'
 __description__ = 'Daily newspaper from Biscay'
-__version__     = 'v0.08'
+__version__     = 'v0.10'
-__date__        = '08, Januery 2011'
+__date__        = '07, August 2013'
 '''
-[url]http://www.elcorreo.com/[/url]
+http://www.elcorreo.com/
 '''
 import time
@ -24,6 +24,7 @@ class heraldo(BasicNewsRecipe):
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    masthead_url          = 'http://www.elcorreo.com/vizcaya/noticias/201002/02/Media/logo-elcorreo-nuevo.png'
    language              = 'es'
    timefmt               = '[%a, %d %b, %Y]'
    encoding              = 'iso-8859-1'
@ -33,15 +34,15 @@ class heraldo(BasicNewsRecipe):
    feeds              = [
                           (u'Portada',       u'http://www.elcorreo.com/vizcaya/portada.xml'),
                           (u'Local',         u'http://www.elcorreo.com/vizcaya/rss/feeds/vizcaya.xml'),
-               (u'Internacional', u'hhttp://www.elcorreo.com/vizcaya/rss/feeds/internacional.xml'),
+                           (u'Internacional', u'hhttp://www.elcorreo.com/vizcaya/rss/feeds/internacional.xml'),
-               (u'Econom\xeda',   u'http://www.elcorreo.com/vizcaya/rss/feeds/economia.xml'),
+                           (u'Econom\xeda',   u'http://www.elcorreo.com/vizcaya/rss/feeds/economia.xml'),
                           (u'Pol\xedtica',   u'http://www.elcorreo.com/vizcaya/rss/feeds/politica.xml'),
-               (u'Opini\xf3n',    u'http://www.elcorreo.com/vizcaya/rss/feeds/opinion.xml'),
+                           (u'Opini\xf3n',    u'http://www.elcorreo.com/vizcaya/rss/feeds/opinion.xml'),
-               (u'Deportes',      u'http://www.elcorreo.com/vizcaya/rss/feeds/deportes.xml'),
+                           (u'Deportes',      u'http://www.elcorreo.com/vizcaya/rss/feeds/deportes.xml'),
                           (u'Sociedad',      u'http://www.elcorreo.com/vizcaya/rss/feeds/sociedad.xml'),
-               (u'Cultura',       u'http://www.elcorreo.com/vizcaya/rss/feeds/cultura.xml'),
+                           (u'Cultura',       u'http://www.elcorreo.com/vizcaya/rss/feeds/cultura.xml'),
-               (u'Televisi\xf3n', u'http://www.elcorreo.com/vizcaya/rss/feeds/television.xml'),
+                           (u'Televisi\xf3n', u'http://www.elcorreo.com/vizcaya/rss/feeds/television.xml'),
-               (u'Gente',         u'http://www.elcorreo.com/vizcaya/rss/feeds/gente.xml')
+                           (u'Gente',         u'http://www.elcorreo.com/vizcaya/rss/feeds/gente.xml')
                         ]
    keep_only_tags     = [
@ -54,14 +55,14 @@ class heraldo(BasicNewsRecipe):
                          dict(name='div', attrs={'class':['mod_lomas','bloque_lomas','blm_header','link-app3','link-app4','botones_listado']}),
                          dict(name='div', attrs={'class':['navegacion_galeria','modulocanalpromocion','separa','separacion','compartir','tags_relacionados']}),
                          dict(name='div', attrs={'class':['moduloBuscadorDeportes','modulo-gente','moddestacadopeq','OpcArt','articulopiniones']}),
-                          dict(name='div', attrs={'class':['modulo-especial','publiEspecial']}),
+                          dict(name='div', attrs={'class':['modulo-especial','publiEspecial','carruselNoticias','vj','modulocomun2']}),
-                          dict(name='div', attrs={'id':['articulopina']}),
+                          dict(name='div', attrs={'id':['articulopina','webs_asociadas']}),
                          dict(name='br', attrs={'class':'clear'}),
                          dict(name='form', attrs={'name':'frm_conversor2'})
                         ]
    remove_tags_before = dict(name='div' , attrs={'class':'articulo  '})
-    remove_tags_after  = dict(name='div' , attrs={'class':'comentarios'})
+    remove_tags_after  = dict(name='div' , attrs={'class':'robapaginas'})
    def get_cover_url(self):
        cover = None
@ -69,10 +70,8 @@ class heraldo(BasicNewsRecipe):
        year = str(st.tm_year)
        month = "%.2d" % st.tm_mon
        day = "%.2d" % st.tm_mday
-        #[url]http://img.kiosko.net/2011/01/02/es/elcorreo.750.jpg[/url]
+                 # http://info.elcorreo.com/pdf/07082013-viz.pdf
-                #[url]http://info.elcorreo.com/pdf/06012011-viz.pdf[/url]
+        cover='http://info.elcorreo.com/pdf/'+ day +  month + year +'-viz.pdf'
        cover='http://info.elcorreo.com/pdf/'+ day + month + year +'-viz.pdf'
        br = BasicNewsRecipe.get_browser(self)
        try:
            br.open(cover)
@ -92,29 +91,27 @@ class heraldo(BasicNewsRecipe):
                    img{margin-bottom: 0.4em}
                '''
    preprocess_regexps = [
- # To present the image of the embedded video
+ # Para presentar la imagen de los video incrustados
                           (re.compile(r'var RUTA_IMAGEN', re.DOTALL|re.IGNORECASE), lambda match: '</script><img src'),
                           (re.compile(r'.jpg";', re.DOTALL|re.IGNORECASE), lambda match: '.jpg">'),
                           (re.compile(r'var SITIO = "elcorreo";', re.DOTALL|re.IGNORECASE), lambda match: '<SCRIPT TYPE="text/JavaScript"'),
-# To separate paragraphs with a blank line
+# Para separar los parrafos con una linea en blanco
                           (re.compile(r'<div class="p"', re.DOTALL|re.IGNORECASE), lambda match: '<p></p><div class="p"'),
-# To put a blank line between the subtitle and the date and time of the news
+# Para poner una linea en blanco entre el subttulo y la fecha y hora de la noticia
                           (re.compile(r'<div class="date">', re.DOTALL|re.IGNORECASE), lambda match: '<br><div class="date">'),
-# To put a blank line between the intro of the embedded videos and the previous text
+# Para poner una linea en blanco entre la entradilla de los videos incrustados y el texto anterior
                           (re.compile(r'<div class="video"', re.DOTALL|re.IGNORECASE), lambda match: '<br><div class="video"'),
-# To view photos from the first when these are presented as a gallery
+# Para sacar las fotos a partir de la primera cuando se presentan como una galeria
                           (re.compile(r'src="/img/shim.gif"', re.DOTALL|re.IGNORECASE), lambda match: ''),
                           (re.compile(r'rel=', re.DOTALL|re.IGNORECASE), lambda match: 'src='),
-# To remove the link of the title
+# Para quitar el enlace del titulo
                           (re.compile(r'<h1 class="headline">\n<a href="', re.DOTALL|re.IGNORECASE), lambda match: '<h1 class="'),
                           (re.compile(r'</a>\n</h1>', re.DOTALL|re.IGNORECASE), lambda match: '</h1>'),
--- a/recipes/el_observador.recipe
+++ b/recipes/el_observador.recipe
@ -1,18 +1,23 @@
 #!/usr/bin/env  python
 ##
 ## Last Edited:  2013-09-29 Carlos Alves <carlosalves90@gmail.com>
 ##
 __license__   = 'GPL v3'
 __author__ = '2010, Yuri Alvarez<me at yurialvarez.com>'
 __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 '''
-observa.com.uy
+elobservador.com.uy
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
-class ObservaDigital(BasicNewsRecipe):
+class Noticias(BasicNewsRecipe):
-    title                 = 'Observa Digital'
+    title                 = 'El Observador'
-    __author__            = 'yrvn'
+    __author__ = 'yrvn'
-    description           = 'Noticias de Uruguay'
+    description           = 'Noticias desde Uruguay'
    tags = 'news, sports, entretainment'
    language       = 'es_UY'
    timefmt        = '[%a, %d %b, %Y]'
    use_embedded_content  = False
@ -23,13 +28,18 @@ class ObservaDigital(BasicNewsRecipe):
    oldest_article        = 2
    max_articles_per_feed = 100
-    keep_only_tags = [dict(id=['contenido'])]
+    keep_only_tags = [
             dict(name='div', attrs={'class':'story collapsed'})
             ]
    remove_tags = [
-                     dict(name='div', attrs={'id':'contenedorVinculadas'}),
+             dict(name='div', attrs={'class':['fecha', 'copyright', 'story_right']}),
-             dict(name='p', attrs={'id':'nota_firma'}),
+             dict(name='div', attrs={'class':['photo', 'social']}),
             dict(name='div', attrs={'id':'widget'}),
             dict(name=['object','link'])
                  ]
    remove_attributes = ['width','height', 'style', 'font', 'color']
    extra_css = '''
                h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;}
                h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;}
@ -37,19 +47,9 @@ class ObservaDigital(BasicNewsRecipe):
                p {font-family:Arial,Helvetica,sans-serif;}
                '''
    feeds = [
-               (u'Actualidad', u'http://www.observa.com.uy/RSS/actualidad.xml'),
+           (u'Portada', u'http://elobservador.com.uy/rss/portada/'),
           (u'Deportes', u'http://www.observa.com.uy/RSS/deportes.xml'),
           (u'Vida', u'http://www.observa.com.uy/RSS/vida.xml'),
           (u'Ciencia y Tecnologia', u'http://www.observa.com.uy/RSS/ciencia.xml')
        ]
    def get_cover_url(self):
        index = 'http://www.observa.com.uy/'
        soup = self.index_to_soup(index)
        for image in soup.findAll('img',alt=True):
           if image['alt'].startswith('Tapa El Observador'):
              return image['src'].rstrip('b.jpg') + '.jpg'
        return None
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
--- a/recipes/el_periodico.recipe
+++ b/recipes/el_periodico.recipe
@ -5,8 +5,8 @@ __license__     = 'GPL v3'
 __copyright__   = '04 December 2010, desUBIKado'
 __author__      = 'desUBIKado'
 __description__ = 'Daily newspaper from Aragon'
-__version__     = 'v0.08'
+__version__     = 'v0.09'
-__date__        = '13, November 2011'
+__date__        = '07, August 2013'
 '''
 elperiodicodearagon.com
 '''
@ -25,11 +25,11 @@ class elperiodicodearagon(BasicNewsRecipe):
    no_stylesheets        = True
    use_embedded_content  = False
    language              = 'es'
    masthead_url          = 'http://pdf.elperiodicodearagon.com/img/logotipo.gif'
    encoding              = 'iso-8859-1'
    remove_empty_feeds    = True
    remove_javascript     = True
    conversion_options = {
                             'comments'  : description
                            ,'tags'      : category
@ -56,23 +56,21 @@ class elperiodicodearagon(BasicNewsRecipe):
                           (u'Fiestas del Pilar', u'http://zetaestaticos.com/aragon/rss/107_es.xml')
                         ]
    remove_attributes = ['height','width']
    keep_only_tags     = [dict(name='div', attrs={'id':'Noticia'})]
    # Recuperamos la portada de papel (la imagen format=1 tiene mayor resolucion)
    def get_cover_url(self):
-        index = 'http://pdf.elperiodicodearagon.com/'
+        index = 'http://pdf.elperiodicodearagon.com/edicion.php'
        soup = self.index_to_soup(index)
        for image in soup.findAll('img',src=True):
-           if image['src'].startswith('http://pdf.elperiodicodearagon.com/funciones/portada-preview.php?eid='):
+            if image['src'].startswith('/funciones/img-public.php?key='):
-              return image['src'].rstrip('format=2') + 'format=1'
+                return 'http://pdf.elperiodicodearagon.com' + image['src']
        return None
    # Usamos la versión para móviles
    def print_version(self, url):
-          return url.replace('http://www.elperiodicodearagon.com/', 'http://www.elperiodicodearagon.com/m/')
+        return url.replace('http://www.elperiodicodearagon.com/', 'http://www.elperiodicodearagon.com/m/')
--- a/recipes/elguardian.recipe
+++ b/recipes/elguardian.recipe
@ -1,93 +0,0 @@
 __license__   = 'GPL v3'
 __copyright__ = '2013, Darko Miletic <darko.miletic at gmail.com>'
 '''
 elguardian.com.ar
 '''
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 class ElGuardian(BasicNewsRecipe):
    title                 = 'El Guardian'
    __author__            = 'Darko Miletic'
    description           = "Semanario con todas las tendencias de un pais"
    publisher             = 'Editorial Apache SA'
    category              = 'news,politics,Argentina'
    oldest_article        = 8
    max_articles_per_feed = 200
    no_stylesheets        = True
    encoding              = 'utf8'
    use_embedded_content  = False
    language              = 'es_AR'
    remove_empty_feeds    = True
    publication_type      = 'magazine'
    issn                  = '1666-7476'
    masthead_url          = 'http://elguardian.com.ar/application/templates/frontend/images/home/logo.png'
    extra_css             = """
                               body{font-family: Arial,sans-serif}
                               img{margin-bottom: 0.4em; display:block}
                            """
    conversion_options = {
                          'comment'   : description
                        , 'tags'      : category
                        , 'publisher' : publisher
                        , 'language'  : language
                        , 'series'    : title
                        , 'isbn'      : issn
                        }
    keep_only_tags    = [dict(attrs={'class':['fotos', 'header_nota', 'nota']})]
    remove_tags       = [dict(name=['meta','link','iframe','embed','object'])]
    remove_attributes = ['lang']
    feeds = [
              (u'El Pais'       , u'http://elguardian.com.ar/RSS/el-pais.xml'       )
             ,(u'Columnistas'   , u'http://elguardian.com.ar/RSS/columnistas.xml'   )
             ,(u'Personajes'    , u'http://elguardian.com.ar/RSS/personajes.xml'    )
             ,(u'Tinta roja'    , u'http://elguardian.com.ar/RSS/tinta-roja.xml'    )
             ,(u'Yo fui'        , u'http://elguardian.com.ar/RSS/yo-fui.xml'        )
             ,(u'Ciencia'       , u'http://elguardian.com.ar/RSS/ciencia.xml'       )
             ,(u'Cronicas'      , u'http://elguardian.com.ar/RSS/cronicas.xml'      )
             ,(u'Culturas'      , u'http://elguardian.com.ar/RSS/culturas.xml'      )
             ,(u'DxT'           , u'http://elguardian.com.ar/RSS/dxt.xml'           )
             ,(u'Fierros'       , u'http://elguardian.com.ar/RSS/fierros.xml'       )
             ,(u'Frente fashion', u'http://elguardian.com.ar/RSS/frente-fashion.xml')
             ,(u'Pan y vino'    , u'http://elguardian.com.ar/RSS/pan-y-vino.xml'    )
             ,(u'Turismo'       , u'http://elguardian.com.ar/RSS/turismo.xml'       )
            ]
    def get_cover_url(self):
        soup = self.index_to_soup('http://elguardian.com.ar/')
        udata = soup.find('div', attrs={'class':'datosNumero'})
        if udata:
           sdata = udata.find('div')
           if sdata:
               stra = re.findall(r'\d+', self.tag_to_string(sdata))
               self.conversion_options.update({'series_index':int(stra[1])})
        unumero = soup.find('div', attrs={'class':'ultimoNumero'})
        if unumero:
           img = unumero.find('img', src=True)
           if img:
              return img['src']
        return None
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
        for item in soup.findAll('a'):
            limg = item.find('img')
            if item.string is not None:
               str = item.string
               item.replaceWith(str)
            else:
               if limg:
                  item.name = 'div'
                  item.attrs = []
               else:
                   str = self.tag_to_string(item)
                   item.replaceWith(str)
        for item in soup.findAll('img'):
            if not item.has_key('alt'):
               item['alt'] = 'image'
        return soup
--- a/recipes/eltribuno_jujuy_impreso.recipe
+++ b/recipes/eltribuno_jujuy_impreso.recipe
@ -0,0 +1,126 @@
 __license__   = 'GPL v3'
 __copyright__ = '2013, Darko Miletic <darko.miletic at gmail.com>'
 '''
 http://www.eltribuno.info/jujuy/edicion_impresa.aspx
 '''
 import urllib
 from calibre.ptempfile import PersistentTemporaryFile
 from calibre.web.feeds.news import BasicNewsRecipe
 from collections import OrderedDict
 class ElTribunoJujuyImpreso(BasicNewsRecipe):
    title                   = 'El Tribuno Jujuy (Edición Impresa)'
    __author__              = 'Darko Miletic'
    description             = "Diario principal de Jujuy"
    publisher               = 'Horizontes S.A.'
    category                = 'news, politics, Jujuy, Argentina, World'
    oldest_article          = 2
    language                = 'es_AR'
    max_articles_per_feed   = 250
    no_stylesheets          = True
    use_embedded_content    = False
    encoding                = 'utf8'
    publication_type        = 'newspaper'
    delay                   = 1
    articles_are_obfuscated = True
    temp_files              = []
    PREFIX                  = 'http://www.eltribuno.info/jujuy/'
    INDEX                   = PREFIX + 'edicion_impresa.aspx'
    PRINTURL                = PREFIX + 'nota_print.aspx?%s'
    conversion_options = {
                          'comment'          : description
                        , 'tags'             : category
                        , 'publisher'        : publisher
                        , 'language'         : language
                        , 'linearize_tables' : True
                        }
    keep_only_tags = [dict(name='div' , attrs={'class':['notaHead', 'notaContent']})]
    remove_tags = [
                     dict(name=['meta','iframe','base','object','embed','link','img']),
                     dict(name='ul', attrs={'class':'Tabs'})
                  ]
    extra_css = """
                body{font-family: Arial,Helvetica,sans-serif}
                .notaHead h4{text-transform: uppercase; color: gray}
                img{margin-top: 0.8em; display: block}
                """
    def parse_index(self):
        feeds = OrderedDict()
        soup = None
        count = 0
        while (count < 5):
            try:
                soup = self.index_to_soup(self.INDEX)
                count = 5
            except:
                print "Retrying download..."
            count += 1
        if not soup:
            return []
        alink = soup.find('a', href=True, attrs={'class':'ZoomTapa'})
        if alink and 'href' in alink:
            self.cover_url = alink['href']
        sections = soup.findAll('div', attrs={'id':lambda x: x and x.startswith('Ediciones')})
        for section in sections:
            section_title = 'Sin titulo'
            sectiont=section.find('h3', attrs={'class':'NombreSeccion'})
            if sectiont:
                section_title = self.tag_to_string(sectiont.span)
            arts = section.findAll('div', attrs={'class':'Noticia NoticiaAB1'})
            for article in arts:
                articles = []
                title=self.tag_to_string(article.div.h3.a)
                url=article.div.h3.a['href']
                description=self.tag_to_string(article.p)
                articles.append({'title':title, 'url':url, 'description':description, 'date':''})
                if articles:
                    if section_title not in feeds:
                        feeds[section_title] = []
                    feeds[section_title] += articles
        ans = [(key, val) for key, val in feeds.iteritems()]
        return ans
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
        for item in soup.findAll('a'):
            if item.string is not None:
                str = item.string
                item.replaceWith(str)
            else:
                str = self.tag_to_string(item)
                item.replaceWith(str)
        return soup
    def get_masthead_title(self):
        return 'El Tribuno'
    def get_obfuscated_article(self, url):
        count = 0
        while (count < 10):
            try:
                response = self.browser.open(url)
                html = response.read()
                count = 10
            except:
                print "Retrying download..."
            count += 1
        tfile = PersistentTemporaryFile('_fa.html')
        tfile.write(html)
        tfile.close()
        self.temp_files.append(tfile)
        return tfile.name
    def print_version(self, url):
        right = url.rpartition('/')[2]
        artid = right.partition('-')[0]
        params = {'Note':artid}
        return (self.PRINTURL % urllib.urlencode(params))
--- a/recipes/eltribuno_salta_impreso.recipe
+++ b/recipes/eltribuno_salta_impreso.recipe
@ -0,0 +1,126 @@
 __license__   = 'GPL v3'
 __copyright__ = '2013, Darko Miletic <darko.miletic at gmail.com>'
 '''
 http://www.eltribuno.info/salta/edicion_impresa.aspx
 '''
 import urllib
 from calibre.ptempfile import PersistentTemporaryFile
 from calibre.web.feeds.news import BasicNewsRecipe
 from collections import OrderedDict
 class ElTribunoSaltaImpreso(BasicNewsRecipe):
    title                   = 'El Tribuno Salta (Edición Impresa)'
    __author__              = 'Darko Miletic'
    description             = "Diario principal de Salta"
    publisher               = 'Horizontes S.A.'
    category                = 'news, politics, Salta, Argentina, World'
    oldest_article          = 2
    language                = 'es_AR'
    max_articles_per_feed   = 250
    no_stylesheets          = True
    use_embedded_content    = False
    encoding                = 'utf8'
    publication_type        = 'newspaper'
    delay                   = 1
    articles_are_obfuscated = True
    temp_files              = []
    PREFIX                  = 'http://www.eltribuno.info/salta/'
    INDEX                   = PREFIX + 'edicion_impresa.aspx'
    PRINTURL                = PREFIX + 'nota_print.aspx?%s'
    conversion_options = {
                          'comment'          : description
                        , 'tags'             : category
                        , 'publisher'        : publisher
                        , 'language'         : language
                        , 'linearize_tables' : True
                        }
    keep_only_tags = [dict(name='div' , attrs={'class':['notaHead', 'notaContent']})]
    remove_tags = [
                     dict(name=['meta','iframe','base','object','embed','link','img']),
                     dict(name='ul', attrs={'class':'Tabs'})
                  ]
    extra_css = """
                body{font-family: Arial,Helvetica,sans-serif}
                .notaHead h4{text-transform: uppercase; color: gray}
                img{margin-top: 0.8em; display: block}
                """
    def parse_index(self):
        feeds = OrderedDict()
        soup = None
        count = 0
        while (count < 5):
            try:
                soup = self.index_to_soup(self.INDEX)
                count = 5
            except:
                print "Retrying download..."
            count += 1
        if not soup:
            return []
        alink = soup.find('a', href=True, attrs={'class':'ZoomTapa'})
        if alink and 'href' in alink:
            self.cover_url = alink['href']
        sections = soup.findAll('div', attrs={'id':lambda x: x and x.startswith('Ediciones')})
        for section in sections:
            section_title = 'Sin titulo'
            sectiont=section.find('h3', attrs={'class':'NombreSeccion'})
            if sectiont:
                section_title = self.tag_to_string(sectiont.span)
            arts = section.findAll('div', attrs={'class':'Noticia NoticiaAB1'})
            for article in arts:
                articles = []
                title=self.tag_to_string(article.div.h3.a)
                url=article.div.h3.a['href']
                description=self.tag_to_string(article.p)
                articles.append({'title':title, 'url':url, 'description':description, 'date':''})
                if articles:
                    if section_title not in feeds:
                        feeds[section_title] = []
                    feeds[section_title] += articles
        ans = [(key, val) for key, val in feeds.iteritems()]
        return ans
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
        for item in soup.findAll('a'):
            if item.string is not None:
                str = item.string
                item.replaceWith(str)
            else:
                str = self.tag_to_string(item)
                item.replaceWith(str)
        return soup
    def get_masthead_title(self):
        return 'El Tribuno'
    def get_obfuscated_article(self, url):
        count = 0
        while (count < 10):
            try:
                response = self.browser.open(url)
                html = response.read()
                count = 10
            except:
                print "Retrying download..."
            count += 1
        tfile = PersistentTemporaryFile('_fa.html')
        tfile.write(html)
        tfile.close()
        self.temp_files.append(tfile)
        return tfile.name
    def print_version(self, url):
        right = url.rpartition('/')[2]
        artid = right.partition('-')[0]
        params = {'Note':artid}
        return (self.PRINTURL % urllib.urlencode(params))
--- a/recipes/eluniversal_ve.recipe
+++ b/recipes/eluniversal_ve.recipe
@ -1,16 +1,15 @@
 __license__   = 'GPL v3'
-__copyright__ = '2010-2011, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2010-2013, Darko Miletic <darko.miletic at gmail.com>'
 '''
 www.eluniversal.com
 '''
 from calibre import strftime
 from calibre.web.feeds.recipes import BasicNewsRecipe
 class ElUniversal(BasicNewsRecipe):
    title                  = 'El Universal'
    __author__             = 'Darko Miletic'
-    description            = 'Noticias de Venezuela'
+    description            = 'Noticias de Venezuela y el mundo. Avances informativos de ultimo minuto. Incluye secciones de politica, deportes, economia y mas.'
    oldest_article         = 2
    max_articles_per_feed  = 100
    no_stylesheets         = True
@ -21,7 +20,9 @@ class ElUniversal(BasicNewsRecipe):
    category               = 'news, Caracas, Venezuela, world'
    language               = 'es_VE'
    publication_type       = 'newspaper'
-    cover_url              = strftime('http://static.eluniversal.com/%Y/%m/%d/portada.jpg')
+    masthead_url           = 'http://cdn.eluniversal.com/images/eu4/back/logo-eluniversal.gif'
    #cover_url              = strftime('http://cdn.eluniversal.com/%Y/%m/%d/portada.jpg')
    cover_url              = 'http://images.eluniversal.com//pdf/primeraPlana.pdf'
    extra_css              = """
                                .txt60{font-family: Tahoma,Geneva,sans-serif; font-size: small}
                                .txt29{font-family: Tahoma,Geneva,sans-serif; font-size: small; color: gray}
@ -30,10 +31,10 @@ class ElUniversal(BasicNewsRecipe):
                                body{font-family: Verdana,Arial,Helvetica,sans-serif}
                             """
    conversion_options = {
-                             'comments'        : description
+                             'comments'  : description
-                            ,'tags'            : category
+                            ,'tags'      : category
-                            ,'language'        : language
+                            ,'language'  : language
-                            ,'publisher'       : publisher
+                            ,'publisher' : publisher
                         }
    remove_tags_before=dict(attrs={'class':'header-print MB10'})
--- a/recipes/focus_pl.recipe
+++ b/recipes/focus_pl.recipe
@ -1,85 +1,51 @@
-#!/usr/bin/env python
+from calibre.web.feeds.recipes import BasicNewsRecipe
 __license__ = 'GPL v3'
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
-class FocusRecipe(BasicNewsRecipe):
+class NYTimes(BasicNewsRecipe):
-    __author__ = u'Artur Stachecki <artur.stachecki@gmail.com>'
+    title = 'Focus'
    __author__ = 'Krittika Goyal'
    language = 'pl'
-    version = 1
+    description = 'Polish scientific monthly magazine'
-
+    timefmt = ' [%d %b, %Y]'
-    title = u'Focus'
+    needs_subscription = False
    publisher = u'Gruner + Jahr Polska'
    category = u'News'
    description = u'Focus.pl - pierwszy w Polsce portal społecznościowy dla miłośników nauki. Tematyka: nauka, historia, cywilizacja, technika, przyroda, sport, gadżety'
    category = 'magazine'
    cover_url = ''
    remove_empty_feeds = True
    no_stylesheets = True
    oldest_article = 7
    max_articles_per_feed = 100000
    recursions = 0
    no_stylesheets = True
-    remove_javascript = True
+    keep_only_tags = dict(name='article', attrs={'class': 'content'})
-    encoding = 'utf-8'
+    remove_tags_after = dict(name='div', attrs={'class': 'inner_article'})
-    # Seems to work best, but YMMV
+    remove_tags = [
-    simultaneous_downloads = 5
+        dict(name='div', attrs={'class': ['social_btns']}),
    r = re.compile('.*(?P<url>http:\/\/(www.focus.pl)|(rss.feedsportal.com\/c)\/.*\.html?).*')
    keep_only_tags = []
    keep_only_tags.append(dict(name='div', attrs={'id': 'cll'}))
    remove_tags = []
    remove_tags.append(dict(name='div', attrs={'class': 'ulm noprint'}))
    remove_tags.append(dict(name='div', attrs={'class': 'txb'}))
    remove_tags.append(dict(name='div', attrs={'class': 'h2'}))
    remove_tags.append(dict(name='ul', attrs={'class': 'txu'}))
    remove_tags.append(dict(name='div', attrs={'class': 'ulc'}))
    extra_css = '''
                    body {font-family: verdana, arial, helvetica, geneva, sans-serif ;}
                    h1{text-align: left;}
                    h2{font-size: medium; font-weight: bold;}
                    p.lead {font-weight: bold; text-align: left;}
                    .authordate {font-size: small; color: #696969;}
                    .fot{font-size: x-small; color: #666666;}
                    '''
    feeds = [
        ('Nauka', 'http://www.focus.pl/nauka/rss/'),
        ('Historia', 'http://www.focus.pl/historia/rss/'),
        ('Cywilizacja', 'http://www.focus.pl/cywilizacja/rss/'),
        ('Sport', 'http://www.focus.pl/sport/rss/'),
        ('Technika', 'http://www.focus.pl/technika/rss/'),
        ('Przyroda', 'http://www.focus.pl/przyroda/rss/'),
        ('Technologie', 'http://www.focus.pl/gadzety/rss/')
    ]
-    def skip_ad_pages(self, soup):
+    # TO GET ARTICLE TOC
-        if ('advertisement' in soup.find('title').string.lower()):
+    def nejm_get_index(self):
-            href = soup.find('a').get('href')
+            return self.index_to_soup('http://www.focus.pl/')
            return self.index_to_soup(href, raw=True)
        else:
            return None
-    def get_cover_url(self):
+    # To parse artice toc
-        soup = self.index_to_soup('http://www.focus.pl/magazyn/')
+    def parse_index(self):
-        tag = soup.find(name='div', attrs={'class': 'clr fl'})
+            soup = self.nejm_get_index()
        if tag:
            self.cover_url = 'http://www.focus.pl/' + tag.a['href']
            return getattr(self, 'cover_url', self.cover_url)
-    def print_version(self, url):
+            toc = soup.find('div', id='wrapper')
-        if url.count('focus.pl.feedsportal.com'):
+
-            u = url.find('focus0Bpl')
+            articles = []
-            u = 'http://www.focus.pl/' + url[u + 11:]
+            feeds = []
-            u = u.replace('0C', '/')
+            section_title = 'Focus Articles'
-            u = u.replace('A', '')
+            for x in toc.findAll(True):
-            u = u.replace('0E', '-')
+                if x.name == 'h1':
-            u = u.replace('/nc/1//story01.htm', '/do-druku/1')
+                    # Article found
-        else:
+                    a = x.find('a')
-            u = url.replace('/nc/1', '/do-druku/1')
+                    if a is None:
-        return u
+                        continue
                    title = self.tag_to_string(a)
                    url = a.get('href', False)
                    if not url or not title:
                        continue
                    # if url.startswith('story'):
                    url = 'http://www.focus.pl' + url
                    self.log('\t\tFound article:', title)
                    self.log('\t\t\t', url)
                    articles.append({'title': title, 'url': url,
                                     'description': '', 'date': ''})
            feeds.append((section_title, articles))
            return feeds
--- a/recipes/frontlineonnet.recipe
+++ b/recipes/frontlineonnet.recipe
@ -46,35 +46,34 @@ class Frontlineonnet(BasicNewsRecipe):
    keep_only_tags= [
                      dict(name='div', attrs={'id':'content'})
                     #,dict(attrs={'class':'byline'})
                    ]
-    #remove_attributes=['size','noshade','border']
+    remove_attributes=['size','noshade','border']
    #def preprocess_html(self, soup):
        #for item in soup.findAll(style=True):
            #del item['style']
        #for item in soup.findAll('img'):
            #if not item.has_key('alt'):
               #item['alt'] = 'image'
        #return soup
    def parse_index(self):
        articles = []
        current_section = None
        feeds = []
        soup = self.index_to_soup(self.INDEX)
-        for feed_link in soup.findAll('div', id='headseccol'):
+        for h3 in soup.findAll('h3'):
-            a = feed_link.find('a', href=True)
+            if h3.get('class', None) == 'artListSec':
-            title = self.tag_to_string(a)
+                if articles:
-            url = a['href']
+                    feeds.append((current_section, articles))
-            articles.append({
+                articles = []
-                              'title'      :title
+                current_section = self.tag_to_string(h3).strip()
-                             ,'date'       :''
+                self.log(current_section)
-                             ,'url'        :url
+            elif h3.get('id', None) in {'headseccol', 'headsec'}:
-                             ,'description':''
+                a = h3.find('a', href=True)
-                            })
+                if a is not None:
-        return [('Frontline', articles)]
+                    title = self.tag_to_string(a)
                    url = a['href']
                    articles.append({
                                    'title'      :title
                                    ,'date'       :''
                                    ,'url'        :url
                                    ,'description':''
                                    })
                    self.log('\t', title, url)
        if articles:
            feeds.append((current_section, articles))
        return feeds
    #def print_version(self, url):
        #return "http://www.hinduonnet.com/thehindu/thscrip/print.pl?prd=fline&file=" + url.rpartition('/')[2]
    #def image_url_processor(self, baseurl, url):
        #return url.replace('../images/', self.INDEX + 'images/').strip()
--- a/recipes/galaxys_edge.recipe
+++ b/recipes/galaxys_edge.recipe
@ -14,19 +14,12 @@ class GalaxyEdge(BasicNewsRecipe):
    auto_cleanup = True
    #keep_only_tags = [dict(id='content')]
    #remove_tags = [dict(attrs={'class':['article-links', 'breadcr']}),
            #dict(id=['email-section', 'right-column', 'printfooter', 'topover',
                     #'slidebox', 'th_footer'])]
    extra_css = '.photo-caption { font-size: smaller }'
    def parse_index(self):
        soup = self.index_to_soup('http://www.galaxysedge.com/')
-        main = soup.find('table', attrs={'width':'911'})
+        main = soup.find('table', attrs={'width':'944'})
-        toc = main.find('td', attrs={'width':'225'})
+        toc = main.find('td', attrs={'width':'204'})
        current_section = None
        current_articles = []
@ -68,41 +61,7 @@ class GalaxyEdge(BasicNewsRecipe):
                    current_articles.append({'title': title, 'url':url,
                        'description':'', 'date':''})
            if current_articles and current_section:
-                 feeds.append((current_section, current_articles))
+                feeds.append((current_section, current_articles))
        return feeds
    #def preprocess_raw_html(self, raw, url):
        #return raw.replace('<body><p>', '<p>').replace('</p></body>', '</p>')
    #def postprocess_html(self, soup, first_fetch):
        #for t in soup.findAll(['table', 'tr', 'td','center']):
            #t.name = 'div'
        #return soup
    #def parse_index(self):
        #today = time.strftime('%Y-%m-%d')
        #soup = self.index_to_soup(
                #'http://www.thehindu.com/todays-paper/tp-index/?date=' + today)
        #div = soup.find(id='left-column')
        #feeds = []
        #current_section = None
        #current_articles = []
        #for x in div.findAll(['h3', 'div']):
            #if current_section and x.get('class', '') == 'tpaper':
                #a = x.find('a', href=True)
                #if a is not None:
                    #current_articles.append({'url':a['href']+'?css=print',
                        #'title':self.tag_to_string(a), 'date': '',
                        #'description':''})
            #if x.name == 'h3':
                #if current_section and current_articles:
                    #feeds.append((current_section, current_articles))
                #current_section = self.tag_to_string(x)
                #current_articles = []
        #return feeds
--- a/recipes/gamekult.recipe
+++ b/recipes/gamekult.recipe
@ -0,0 +1,36 @@
 from __future__ import unicode_literals
 __license__   = 'GPL v3'
 __copyright__ = '2013, Malah <malah at neuf dot fr>'
 '''
 Gamekult.com
 '''
 __author__ = '2013, Malah <malah at neuf dot fr>'
 from calibre.web.feeds.news import BasicNewsRecipe
 class GamekultCom(BasicNewsRecipe):
    title          = u'Gamekult.com'
    __author__ = 'Malah'
    description = u'Toute l`actualité du jeu vidéo PC, consoles, mobiles.'
    oldest_article = 1.5
    language = 'fr'
    max_articles_per_feed = 100
    remove_empty_feeds = True
    use_embedded_content = False
    no_stylesheets = True
    ignore_duplicate_articles = {'title', 'url'}
    keep_only_tags = [dict(id=['story-page','story-body'])]
    remove_tags = [
      dict(name='div', attrs={'class':'sharebar'}),
      dict(name='object', attrs={'type':'application/x-shockwave-flash'}),
      dict(name='span', attrs={'class':'share'}),
      dict(name='div', attrs={'class':'story-pagination'}),
      dict(name='div', attrs={'class':'pagination pagination-centered'}),
    ]
    masthead_url       = u'https://upload.wikimedia.org/wikipedia/fr/9/9c/Logo_-_GAMEKULT.png'
    feeds =  [
      ('Test', u'http://www.gamekult.com/feeds/test.html'),
      ('Actu', u'http://www.gamekult.com/feeds/actu.html'),
    ]
--- a/recipes/glenn_greenwald.recipe
+++ b/recipes/glenn_greenwald.recipe
@ -0,0 +1,10 @@
 from calibre.web.feeds.news import AutomaticNewsRecipe
 class BasicUserRecipe1373130920(AutomaticNewsRecipe):
    title          = u'Glenn Greenwald | guardian.co.uk'
    language = 'en_GB'
    __author__ = 'anywho'
    oldest_article = 7
    max_articles_per_feed = 100
    auto_cleanup = True
    feeds          = [(u'Latest', u'http://www.guardian.co.uk/profile/glenn-greenwald/rss')]
--- a/recipes/go_comics.recipe
+++ b/recipes/go_comics.recipe
@ -4,44 +4,29 @@ __copyright__ = 'Copyright 2010 Starson17'
 www.gocomics.com
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 import re
 class GoComics(BasicNewsRecipe):
    title               = 'Go Comics'
    __author__          = 'Starson17'
    __version__         = '1.06'
    __date__            = '07 June 2011'
-    description         = u'200+ Comics - Customize for more days/comics: Defaults to 7 days, 25 comics - 20 general, 5 editorial.'
+    description         = u'200+ Comics - Customize for more days/comics: Defaults to 1 day, 25 comics - 20 general, 5 editorial.'
    category            = 'news, comics'
    language            = 'en'
    use_embedded_content= False
    no_stylesheets      = True
    remove_javascript   = True
    remove_attributes = ['style']
-    ####### USER PREFERENCES - COMICS, IMAGE SIZE AND NUMBER OF COMICS TO RETRIEVE ########
+    # USER PREFERENCES - COMICS AND NUMBER OF COMICS TO RETRIEVE ########
    # num_comics_to_get - I've tried up to 99 on Calvin&Hobbes
    num_comics_to_get = 1
    # comic_size 300 is small, 600 is medium, 900 is large, 1500 is extra-large
    comic_size = 900
    # CHOOSE COMIC STRIPS BELOW - REMOVE COMMENT '# ' FROM IN FRONT OF DESIRED STRIPS
    # Please do not overload their servers by selecting all comics and 1000 strips from each!
-    conversion_options = {'linearize_tables'  : True
+    keep_only_tags     = [
-                        , 'comment'           : description
+        dict(name='h1'),
-                        , 'tags'              : category
+        dict(name='div', id=lambda x: x and x.startswith('mutable_')),
-                        , 'language'          : language
+    ]
                        }
    keep_only_tags     = [dict(name='div', attrs={'class':['feature','banner']}),
                          ]
    remove_tags = [dict(name='a', attrs={'class':['beginning','prev','cal','next','newest']}),
                   dict(name='div', attrs={'class':['tag-wrapper']}),
                   dict(name='a', attrs={'href':re.compile(r'.*mutable_[0-9]+', re.IGNORECASE)}),
                   dict(name='img', attrs={'src':re.compile(r'.*mutable_[0-9]+', re.IGNORECASE)}),
                   dict(name='ul', attrs={'class':['share-nav','feature-nav']}),
                   ]
    def get_browser(self):
        br = BasicNewsRecipe.get_browser(self)
@ -50,7 +35,7 @@ class GoComics(BasicNewsRecipe):
    def parse_index(self):
        feeds = []
-        for title, url in [
+        for i, (title, url) in enumerate([  # {{{
                       #(u"2 Cows and a Chicken", u"http://www.gocomics.com/2cowsandachicken"),
                       #(u"9 Chickweed Lane", u"http://www.gocomics.com/9chickweedlane"),
                       #(u"Adam At Home", u"http://www.gocomics.com/adamathome"),
@ -271,7 +256,7 @@ class GoComics(BasicNewsRecipe):
                       (u"Strange Brew", u"http://www.gocomics.com/strangebrew"),
                       (u"The Argyle Sweater", u"http://www.gocomics.com/theargylesweater"),
                       #
-                       ######## EDITORIAL CARTOONS #####################
+                       # EDITORIAL CARTOONS #####################
                       #(u"Adam Zyglis", u"http://www.gocomics.com/adamzyglis"),
                       #(u"Andy Singer", u"http://www.gocomics.com/andysinger"),
                       #(u"Ben Sargent",u"http://www.gocomics.com/bensargent"),
@ -363,81 +348,65 @@ class GoComics(BasicNewsRecipe):
                       #(u"Walt Handelsman",u"http://www.gocomics.com/walthandelsman"),
                       #(u"Wayne Stayskal",u"http://www.gocomics.com/waynestayskal"),
                       #(u"Wit of the World",u"http://www.gocomics.com/witoftheworld"),
-                             ]:
+                             ]):  # }}}
-            print 'Working on: ', title
+            self.log('Working on: ', title, url)
            articles = self.make_links(url)
            if articles:
                feeds.append((title, articles))
            if self.test and i > 0:
                break
        return feeds
    def make_links(self, url):
        title = 'Temp'
        current_articles = []
-        pages = range(1, self.num_comics_to_get+1)
+        if self.test:
-        for page in pages:
+            self.num_comics_to_get = 2
        num = self.num_comics_to_get
        while num > 0:
            num -= 1
            page_soup = self.index_to_soup(url)
-            if page_soup:
+            if not page_soup:
-                try:
+                break
-                    strip_title = page_soup.find(name='div', attrs={'class':'top'}).h1.a.string
+            content = page_soup.find(id='content')
-                except:
+            if content is None:
-                    strip_title = 'Error - no Title found'
+                break
-                try:
+            feature = content.find(name='div', attrs={'class':'feature'})
-                    date_title = page_soup.find('ul', attrs={'class': 'feature-nav'}).li.string
+            feature_nav = content.find(name='ul', attrs={'class':'feature-nav'})
-                    if not date_title:
+            if feature is None or feature_nav is None:
-                        date_title = page_soup.find('ul', attrs={'class': 'feature-nav'}).li.string
+                break
-                except:
+            try:
-                    date_title = 'Error - no Date found'
+                a = feature.find('h1').find('a', href=True)
-                title = strip_title + ' - ' + date_title
+            except:
-                for i in range(2):
+                self.log.exception('Failed to find current page link')
-                    try:
+                break
-                        strip_url_date = page_soup.find(name='div', attrs={'class':'top'}).h1.a['href']
+            page_url = a['href']
-                        break  # success - this is normal exit
+            if page_url.startswith('/'):
-                    except:
+                page_url = 'http://www.gocomics.com' + page_url
-                        strip_url_date = None
+            try:
-                        continue  # try to get strip_url_date again
+                strip_title = self.tag_to_string(feature.find('h1').find('a', href=True))
-                for i in range(2):
+            except:
-                    try:
+                strip_title = 'Error - no Title found'
-                        prev_strip_url_date = page_soup.find('a', attrs={'class': 'prev'})['href']
+            try:
-                        break  # success - this is normal exit
+                date_title = self.tag_to_string(feature_nav.find('li'))
-                    except:
+            except:
-                        prev_strip_url_date = None
+                date_title = 'Error - no Date found'
-                        continue  # try to get prev_strip_url_date again
+            title = strip_title + ' - ' + date_title
                if strip_url_date:
                    page_url = 'http://www.gocomics.com' + strip_url_date
                else:
                    continue
                if prev_strip_url_date:
                    prev_page_url = 'http://www.gocomics.com' + prev_strip_url_date
                else:
                    continue
            current_articles.append({'title': title, 'url': page_url, 'description':'', 'date':''})
-            url = prev_page_url
+            a = feature_nav.find('a', href=True, attrs={'class':'prev'})
            if a is None:
                break
            url = a['href']
            if url.startswith('/'):
                url = 'http://www.gocomics.com' + url
        current_articles.reverse()
        return current_articles
    def preprocess_html(self, soup):
-        if soup.title:
+        headings = soup.findAll('h1')
-            title_string = soup.title.string.strip()
+        for h1 in headings[1:]:
-            _cd = title_string.split(',',1)[1]
+            h1.extract()
-            comic_date = ' '.join(_cd.split(' ', 4)[0:-1])
+        self.adeify_images(soup)
-        if soup.h1.span:
+        return soup
            artist = soup.h1.span.string
            soup.h1.span.string.replaceWith(comic_date + artist)
        feature_item = soup.find('p',attrs={'class':'feature_item'})
        if feature_item.a:
            a_tag = feature_item.a
            a_href = a_tag["href"]
            img_tag = a_tag.img
            img_tag["src"] = a_href
            img_tag["width"] = self.comic_size
            img_tag["height"] = None
        return self.adeify_images(soup)
    extra_css = '''
                    h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
                    h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
                    img {max-width:100%; min-width:100%;}
                    p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
                    body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
    '''
--- a/recipes/gosc_niedzielny.recipe
+++ b/recipes/gosc_niedzielny.recipe
@ -47,13 +47,7 @@ class GN(BasicNewsRecipe):
                return feeds
        def find_articles(self, main_block):
-                for a in main_block.findAll('div', attrs={'class':'prev_doc2'}):
+                for a in main_block.findAll('div', attrs={'class':['prev_doc2', 'sr-document']}):
 						art = a.find('a')
 						yield {
                                'title' : self.tag_to_string(art),
                                'url'   : 'http://www.gosc.pl' + art['href']
                                }
                for a in main_block.findAll('div', attrs={'class':'sr-document'}):
 						art = a.find('a')
 						yield {
                                'title' : self.tag_to_string(art),
--- a/recipes/hbr.recipe
+++ b/recipes/hbr.recipe
@ -39,10 +39,10 @@ class HBR(BasicNewsRecipe):
            br.visit('https://hbr.org/login?request_url=/', timeout=20)
        except Timeout:
            pass
-        br.click('#accordion div[tabindex="0"]', wait_for_load=False)
+        br.click('#form-wrapper h3[tabindex="0"]', wait_for_load=False)
-        f = br.select_form('#signin-form')
+        f = br.select_form('#login-form')
-        f['signin-form:username'] = username
+        f['username'] = username
-        f['signin-form:password'] = password
+        f['password'] = password
        br.submit(wait_for_load=False)
        br.run_for_a_time(30)
@ -56,7 +56,8 @@ class HBR(BasicNewsRecipe):
        articles = []
        for x in soup.find(id='issueFeaturesContent').findAll(['li', 'h4']):
            if x.name == 'h4':
-                if x.get('class', None) == 'basic':continue
+                if x.get('class', None) == 'basic':
                    continue
                if current_section is not None and articles:
                    feeds.append((current_section, articles))
                current_section = self.tag_to_string(x).capitalize()
@ -64,7 +65,8 @@ class HBR(BasicNewsRecipe):
                self.log('\tFound section:', current_section)
            else:
                a = x.find('a', href=True)
-                if a is None: continue
+                if a is None:
                    continue
                title = self.tag_to_string(a)
                url = a['href']
                if '/ar/' not in url:
@ -90,11 +92,11 @@ class HBR(BasicNewsRecipe):
    def parse_index(self):
        soup0 = self.index_to_soup('http://hbr.org/magazine')
        datencover = soup0.find('ul', attrs={'id':'magazineArchiveCarousel'}).findAll('li')[-1]
-        #find date & cover
+        # find date & cover
        self.cover_url=datencover.img['src']
        dates=self.tag_to_string(datencover.img['alt'])
        self.timefmt = u' [%s]'%dates
-        soup = self.index_to_soup(self.INDEX + soup0.find('div', attrs = {'class':'magazine_page'}).a['href'])
+        soup = self.index_to_soup(self.INDEX + soup0.find('div', attrs={'class':'magazine_page'}).a['href'])
        feeds = self.hbr_parse_toc(soup)
        return feeds
--- a/recipes/high_country_blogs.recipe
+++ b/recipes/high_country_blogs.recipe
@ -1,44 +0,0 @@
 # -*- coding: utf-8 -*-
 __license__   = 'GPL v3'
 __copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>, Armin Geller'
 '''
 Fetch High Country News - Blogs
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class HighCountryNewsBlogs(BasicNewsRecipe):
    title                 = u'High Country News - Blogs'
    description           = u'High Country News - Blogs (RSS Version)'
    __author__            = 'Armin Geller' # 2012-08-01
    publisher             = 'High Country News'
    category              = 'news, politics, Germany'
    timefmt               = ' [%a, %d %b %Y]'
    language              = 'en'
    encoding              = 'UTF-8'
    publication_type      = 'newspaper'
    oldest_article        = 7
    max_articles_per_feed = 100
    no_stylesheets        = True
    auto_cleanup          = True
    remove_javascript     = True
    use_embedded_content  = False
    masthead_url          = 'http://www.hcn.org/logo.jpg'
    cover_source          = 'http://www.hcn.org'
    def get_cover_url(self):
       cover_source_soup = self.index_to_soup(self.cover_source)
       preview_image_div = cover_source_soup.find(attrs={'class':' portaltype-Plone Site content--hcn template-homepage_view'})
       return preview_image_div.div.img['src']
    feeds = [
              (u'From the Blogs', u'http://feeds.feedburner.com/hcn/FromTheBlogs?format=xml'),
              (u'Heard around the West', u'http://feeds.feedburner.com/hcn/heard?format=xml'),
              (u'The GOAT Blog', u'http://feeds.feedburner.com/hcn/goat?format=xml'),
              (u'The Range', u'http://feeds.feedburner.com/hcn/range?format=xml'),
             ]
    def print_version(self, url):
          return url
--- a/recipes/high_country_news.recipe
+++ b/recipes/high_country_news.recipe
@ -1,6 +1,12 @@
 # -*- coding: utf-8 -*-
 #
 # Written:      2012-01-28
 # Last Edited:  2013-09-06
 # Remark:       Version 1.3
 # Update cleanup for new web article design
 #
 __license__   = 'GPL v3'
-__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>, Armin Geller'
+__copyright__ = '2013, Armin Geller'
 '''
 Fetch High Country News
@ -9,35 +15,77 @@ from calibre.web.feeds.news import BasicNewsRecipe
 class HighCountryNews(BasicNewsRecipe):
    title                 = u'High Country News'
-    description           = u'News from the American West'
+    description           = u'High Country News (RSS Version)'
-    __author__            = 'Armin Geller' # 2012-01-31
+    __author__            = 'Armin Geller'
    publisher             = 'High Country News'
    category              = 'news, politics'
    timefmt               = ' [%a, %d %b %Y]'
    language              = 'en'
    encoding              = 'UTF-8'
    publication_type      = 'newspaper'
-    oldest_article        = 7
+    oldest_article        = 14
    max_articles_per_feed = 100
    no_stylesheets        = True
-    auto_cleanup          = True
+    auto_cleanup          = False
    remove_javascript     = True
    remove_empty_feeds    = True
    use_embedded_content  = False
    masthead_url          = 'http://www.hcn.org/logo.jpg' # 2012-01-31 AGe add
    cover_source          = 'http://www.hcn.org'          # 2012-01-31 AGe add
-    def get_cover_url(self):                              # 2012-01-31 AGe add
+    masthead_url          = 'http://www.hcn.org/logo.jpg'
-       cover_source_soup = self.index_to_soup(self.cover_source)
+    cover_source          = 'http://www.hcn.org'
-       preview_image_div = cover_source_soup.find(attrs={'class':' portaltype-Plone Site content--hcn template-homepage_view'})
+
-       return preview_image_div.div.img['src']
+    def get_cover_url(self):
        cover_source_soup = self.index_to_soup(self.cover_source)
        preview_image_div = cover_source_soup.find(attrs={'class':' portaltype-Plone Site content--hcn template-homepage_view'})
        return preview_image_div.div.img['src']
    feeds = [
-              (u'Most recent', u'http://feeds.feedburner.com/hcn/most-recent'),
+              (u'Most recent', u'http://feeds.feedburner.com/hcn/most-recent?format=xml'),
-              (u'Current Issue', u'http://feeds.feedburner.com/hcn/current-issue'),
+              (u'Current Issue', u'http://feeds.feedburner.com/hcn/current-issue?format=xml'),
              (u'From the Blogs', u'http://feeds.feedburner.com/hcn/FromTheBlogs?format=xml'),
              (u'Heard around the West', u'http://feeds.feedburner.com/hcn/heard?format=xml'),
              (u'The GOAT Blog', u'http://feeds.feedburner.com/hcn/goat?format=xml'),
              (u'The Range', u'http://feeds.feedburner.com/hcn/range?format=xml'),
              (u'Writers on the Range', u'http://feeds.feedburner.com/hcn/wotr'),
              (u'High Country Views', u'http://feeds.feedburner.com/hcn/HighCountryViews'),
             ]
-    def print_version(self, url):
+ # 2013-07-23 AGe New coding w/o using print_version
          return url + '/print_view'
    keep_only_tags    = [
                          dict(name='div', attrs={'id':['content']}),
                        ]
    remove_tags = [
                    dict(name='div', attrs={'class':['documentActions supercedeDocumentActions editorialDocumentActions',
                                                      'documentActions supercedeDocumentActions editorialDocumentActions editorialFooterDocumentActions',
                                                      'article-sidebar',
                                                      'image-viewer-controls nojs',
                                                      'protectedArticleWrapper',
                                                      'visualClear',
                                                      'feed-icons',  # 2013-09-06 AGe add
                                                      'PayWallEmail',  # 2013-09-06 AGe add
                                                     ]}),
                    dict(name='div', attrs={'id':['offer-below-locked-article']}),  # 2013-09-06 AGe add
                  ]
    INDEX                 = ''
    def append_page(self, soup, appendtag, position):
        pager = soup.find('span',attrs={'class':'next'})
        if pager:
            nexturl = self.INDEX + pager.a['href']
            soup2 = self.index_to_soup(nexturl)
            texttag = soup2.find('div', attrs={'class':'article-text'})
            newpos = len(texttag.contents)
            self.append_page(soup2,texttag,newpos)
            texttag.extract()
            appendtag.insert(position,texttag)
    def preprocess_html(self, soup):
        self.append_page(soup, soup.body, 3)
        pager = soup.find('div',attrs={'class':'listingBar listingBar-article'})
        if pager:
            pager.extract()
        return self.adeify_images(soup)
--- a/recipes/houston_chronicle.recipe
+++ b/recipes/houston_chronicle.recipe
@ -1,41 +1,206 @@
 #!/usr/bin/env python
-# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+# -*- coding: utf-8 -*-
 __license__   = 'GPL v3'
 __copyright__ = '2013, Dale Furrow dkfurrow@gmail.com'
 '''
 chron.com
 '''
 import re, time
 from calibre.web.feeds.recipes import BasicNewsRecipe
 from calibre.utils.date import dt_factory, local_tz
 from datetime import datetime, timedelta, date
 from lxml import html
 from calibre.web.feeds.news import BasicNewsRecipe
 class HoustonChronicle(BasicNewsRecipe):
-    title          = u'The Houston Chronicle'
+    title      =  u'The Houston Chronicle'
    description    = 'News from Houston, Texas'
-    __author__     = 'Kovid Goyal'
+    __author__ = 'Dale Furrow'
-    language       = 'en'
+    language = 'en'
    timefmt        = ' [%a, %d %b, %Y]'
    no_stylesheets = True
-    use_embedded_content = False
+    # use_embedded_content = False
    remove_attributes = ['style']
-    auto_cleanup = True
+    remove_empty_feeds = True
-
+    timefmt = '[%a, %d %b %Y]'
-    oldest_article = 3.0
+    timestampfmt = '%Y%m%d%H%M%S'
-
+    ignore_duplicate_articles = {'url'}
    #keep_only_tags = {'class':lambda x: x and ('hst-articletitle' in x or
        #'hst-articletext' in x or 'hst-galleryitem' in x)}
    remove_attributes = ['xmlns']
-    feeds = [
+    remove_tags = [dict(name='div', attrs={'class':'socialBar'}),
-            ('News', "http://www.chron.com/rss/feed/News-270.php"),
+                   dict(name='div', attrs={'class':re.compile('post-commentmeta')}),
-            ('Sports',
+                   dict(name='div', attrs={'class':re.compile('slideshow_wrapper')}),
-                'http://www.chron.com/sports/headlines/collectionRss/Sports-Headlines-Staff-Stories-10767.php'),
+                   dict(name='div', attrs={'class':'entry-summary'}),
-            ('Neighborhood',
+                   dict(name='a', attrs={'rel':'item-license'})]
-                'http://www.chron.com/rss/feed/Neighborhood-305.php'),
+
-            ('Business', 'http://www.chron.com/rss/feed/Business-287.php'),
+    baseUrl = 'http://www.chron.com'
-            ('Entertainment',
+
-                'http://www.chron.com/rss/feed/Entertainment-293.php'),
+    oldest_web_article = 7.0
-            ('Editorials',
+
-                'http://www.chron.com/opinion/editorials/collectionRss/Opinion-Editorials-Headline-List-10567.php'),
+    if oldest_web_article is None:
-            ('Life', 'http://www.chron.com/rss/feed/Life-297.php'),
+        earliest_date = date.today()
-            ('Science & Tech',
+    else:
-                'http://www.chron.com/rss/feed/AP-Technology-and-Science-266.php'),
+        earliest_date = date.today() - timedelta(days=oldest_web_article)
-        ]
+
    pages = [('news' , '/news/houston-texas/'),
        ('business' , '/business/'),
        ('opinion', '/opinion/'),
        ('sports', '/sports/')]
    def getLinksFromSectionPage(self, sectionUrl):
        pageDoc = html.parse(sectionUrl)
        els = pageDoc.xpath("""//div[contains(@class, 'scp-item')
        or @class='scp-feature' or contains(@class, 'simplelist')
        or contains(@class, 'scp-blogpromo')]
        //a[@href and not(@target) and not(child::img)]""")
        elList = []
        for el in els:
            link = el.get('href')
            title = el.text
            if link[:4] != 'http':
                link = self.baseUrl + link
            if title is not None:
                elList.append((link, el.text))
        return elList
    def getArticleDescriptionFromDoc(self, pageDoc):
        descriptionCharsBreak = 140
        descriptionMaxChars = 300
        descXpath = """//div[contains(@class, 'article-body') or
        contains(@class, 'resource-content') or contains(@class, 'post')]//p"""
        sentenceRegex = re.compile("(\S.+?[.!?])(?=\s+|$)")
        def stringify_children(node):
            return ''.join([x for x in node.itertext()])
        try:
            els = pageDoc.xpath(descXpath)
            outText = ""
            ellipsis = ""
            for el in els:
                sentences = re.findall(sentenceRegex, stringify_children(el))
                for sentence in sentences:
                    if len(outText) < descriptionCharsBreak:
                        outText += sentence + " "
                    else:
                        if len(outText) > descriptionMaxChars:
                            ellipsis = "..."
                        return outText[:descriptionMaxChars] + ellipsis
            return outText
        except:
            self.log('Error on Article Description')
            return ""
    def getPublishedTimeFromDoc(self, pageDoc):
        regexDateOnly = re.compile("""(?:January|February|March|April|
        May|June|July|August|September|October|November|
        December)\s[0-9]{1,2},\s20[01][0-9]""")
        regextTimeOnly = re.compile("""[0-9]{1,2}:[0-9]{1,2} \w{2}""")
        def getRegularTimestamp(dateString):
            try:
                outDate = datetime.strptime(dateString, "%Y-%m-%dT%H:%M:%SZ")
                return outDate
            except:
                return None
        def getDateFromString(inText):
            match = re.findall(regexDateOnly, inText)
            if match:
                try:
                    outDate = datetime.strptime(match[0], "%B %d, %Y")
                    match = re.findall(regextTimeOnly, inText)
                    if match:
                        outTime = datetime.strptime(match[0], "%I:%M %p")
                        return datetime.combine(outDate.date(), outTime.time())
                    return outDate
                except:
                    return None
                else:
                    return None
        el = pageDoc.xpath("//*[@class='timestamp'][1]")
        if len(el) == 1:
            return getRegularTimestamp(el[0].get('title'))
        else:
            el = pageDoc.xpath("//*[@class='entry-date' or @class='post-date'][1]")
            if len(el) == 1:
                return getDateFromString(el[0].text_content())
            else:
                return None
    def getAllFeedDataFromPage(self, page):
        articles = []
        linkList = self.getLinksFromSectionPage(self.baseUrl + page[1])
        self.log('from section: ', page[0], " found ", len(linkList), " links")
        for link in linkList:
            try:
                articleDoc = html.parse(link[0])
                description = self.getArticleDescriptionFromDoc(articleDoc)
                articleDate = self.getPublishedTimeFromDoc(articleDoc)
                if articleDate is not None and description is not None and articleDate.date() > self.earliest_date:
                    dateText = articleDate.strftime('%a, %d %b')
                    author = articleDate.strftime(self.timestampfmt)
                    articles.append({'title':link[1], 'url':link[0],
                                     'description':description, 'date':dateText, 'author':author})
                    self.log(page[0] + ": " + link[1] + ', from ' + dateText +
                     " description of " + str(len(description)) + ' characters at ' + link[0])
                else:
                    msg = ""
                    if articleDate is None:
                        msg = " No Timestamp Found"
                    else:
                        msg = " article older than " + str(self.oldest_web_article) + ' days...'
                    self.log("Skipping article: ", link[0], msg)
            except:
                print 'error on fetching ' + link[0]
                continue
        return articles
    def parse_index(self):
        self.timefmt = ' [%a, %d %b, %Y]'
        self.log('starting parse_index: ',  time.strftime(self.timestampfmt))
        feeds = []
        for page in self.pages:
            articles = []
            articles = self.getAllFeedDataFromPage(page)
            if articles:
                feeds.append((page[0], articles))
        self.log('finished parse_index: ', time.strftime(self.timestampfmt))
        return feeds
    def preprocess_html(self, thisSoup):
        baseTags = []
        baseTags.extend(thisSoup.findAll(name='div', attrs={'id':re.compile('post-\d+')}))
        baseTags.extend(thisSoup.findAll(name='div', attrs={'class':'hnews hentry item'}))
        allTags = []
        allTags.extend(baseTags)
        if len(baseTags) > 0:
            for tag in baseTags:
                allTags.extend(tag.findAll(True))
        paragraphs = thisSoup.findAll(name='p')
        for paragraph in paragraphs:
            if paragraph not in allTags:
                allTags.append(paragraph)
        for tag in baseTags:
            while tag.parent is not None:
                allTags.append(tag)
                tag = tag.parent
        for tag in thisSoup.findAll(True):
            if tag not in allTags:
                tag.extract()
        return thisSoup
    def populate_article_metadata(self, article, soup, first):
        if not first:
            return
        try:
            article.date = time.strptime(article.author, self.timestampfmt)
            article.utctime = dt_factory(article.date, assume_utc=False, as_utc=False)
            article.localtime = article.utctime.astimezone(local_tz)
        except Exception as inst:  # remove after debug
            self.log('Exception: ', article.title)  # remove after debug
            self.log(type(inst))  # remove after debug
            self.log(inst)  # remove after debug
--- a/recipes/icons/acrimed.png
+++ b/recipes/icons/acrimed.png
--- a/recipes/icons/diagonales.png
+++ b/recipes/icons/diagonales.png
--- a/recipes/icons/eltribuno_jujuy_impreso.png
+++ b/recipes/icons/eltribuno_jujuy_impreso.png
--- a/recipes/icons/eltribuno_salta_impreso.png
+++ b/recipes/icons/eltribuno_salta_impreso.png
--- a/recipes/icons/lacapital.png
+++ b/recipes/icons/lacapital.png
--- a/recipes/icons/le_monde_diplomatique_fr.png
+++ b/recipes/icons/le_monde_diplomatique_fr.png
--- a/recipes/icons/le_monde_sub.png
+++ b/recipes/icons/le_monde_sub.png
--- a/recipes/icons/miradasalsur.png
+++ b/recipes/icons/miradasalsur.png
--- a/recipes/il_cambiamento.recipe
+++ b/recipes/il_cambiamento.recipe
@ -0,0 +1,12 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class IC(BasicNewsRecipe):
    title          = u'il Cambiamento'
    oldest_article = 12
    max_articles_per_feed = 50
    language = 'it'
    __author__ = 'ghib9'
    auto_cleanup = True
    use_embedded_content = False
    feeds          = [(u'il Cambiamento', u'http://www.ilcambiamento.it/rss.xml')]
--- a/recipes/il_foglio.recipe
+++ b/recipes/il_foglio.recipe
@ -0,0 +1,16 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class AdvancedUserRecipe1373969939(BasicNewsRecipe):
    title          = u'Il Foglio - Editoriali'
    oldest_article = 1
    max_articles_per_feed = 10
    auto_cleanup = False
    keep_only_tags = [
                       dict(name='div', attrs={'class':'sec_item'})
                     ]
    feeds          = [(u'Il Foglio - Editoriali', u'http://feed43.com/8814237344800115.xml')]
    no_stylesheets = True
    __author__    = 'faber1971'
    description   = 'Leading articles from an Italian newspaper - v1.00 (16 July, 2013)'
    language = 'it'
    masthead_url            = 'http://www.ilfoglio.it/media/img/interface/logo_testata_small.gif'
--- a/recipes/independent.recipe
+++ b/recipes/independent.recipe
@ -1,504 +1,34 @@
 # adapted from old recipe by Darko Miletic <darko.miletic at gmail.com>
 import re
 from calibre.web.feeds.recipes import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import Tag, NavigableString
 class TheIndependentNew(BasicNewsRecipe):
-    # flag to enable/disable article graphics on business pages/some others
+     # used for converting rating to stars
    # eg http://www.independent.co.uk/news/world/europe/berlusconi-departure-fails-to-calm-the-markets-6259682.html
    # -max dimensions can be altered using the .pictureContainer img selector in the css
    _FETCH_ARTICLE_GRAPHICS = True
    #Flag to enable/disable image fetching (not business)
    _FETCH_IMAGES = True
    #Set max gallery images here (respects _FETCH_IMAGES)
    # -1 for infinite
    _MAX_GALLERY_IMAGES = -1
     #used for converting rating to stars
    _STAR_URL = 'http://www.independent.co.uk/skins/ind/images/rating_star.png'
    _NO_STAR_URL = 'http://www.independent.co.uk/skins/ind/images/rating_star_grey.png'
    title                   = u'The Independent'
-    __author__              = 'Will'
+    __author__              = 'Krittika Goyal'
    description             = 'The latest in UK News and World News from The \
                               Independent. Wide range of international and local news, sports \
                               news, commentary and opinion pieces.Independent News - Breaking news \
                               that matters. Your daily comprehensive news source - The \
                               Independent Newspaper'
    publisher               = 'The Independent'
    oldest_article          = 2.0
    ignore_duplicate_articles = {'title', 'url'}
    remove_empty_feeds      = True
    category                = 'news, UK'
    no_stylesheets          = True
    use_embedded_content    = False
    remove_empty_feeds      = True
    auto_cleanup = True
    language                = 'en_GB'
    publication_type        = 'newspaper'
    masthead_url            = 'http://www.independent.co.uk/independent.co.uk/editorial/logo/independent_Masthead.png'
    encoding                = 'utf-8'
    compress_news_images    = True
    remove_tags             =[
                               dict(attrs={'id' : ['RelatedArtTag','renderBiography']}),
                               dict(attrs={'class' : ['autoplay','openBiogPopup']}),
                               dict(name='img',attrs={'alt' : ['Get Adobe Flash player']}),
                               dict(name='img',attrs={'alt' : ['view gallery']}),
                               dict(attrs={'style' : re.compile('.*')}),
                               dict(attrs={'class':lambda x: x and 'voicesRelatedTopics' in x.split()}),
                             ]
    keep_only_tags          =[dict(attrs={'id':['main','top']})]
    recursions = 0
    # fixes non compliant html nesting and 'marks' article graphics links
    preprocess_regexps      = [
                                (re.compile('<span class="storyTop ">(?P<nested>.*?)</span>', re.DOTALL),
                                lambda match: '<div class="storyTop">' + match.group('nested') + '</div>'),
                                (re.compile('(<strong>.*?[Cc]lick.*?<a.*?((HERE)|([Hh]ere)).*?</strong>)', re.DOTALL),
                                lambda match: '<div class="article-graphic">' + match.group(0) + '</div>'),
                              ]
    conversion_options = {
                          'comment'   : description
                        , 'tags'      : category
                        , 'publisher' : publisher
                        , 'language'  : language
                        }
    extra_css             = """
                               h1{font-family: Georgia,serif ; font-size: x-large; }
                               body{font-family: Verdana,Arial,Helvetica,sans-serif}
                               img{margin-bottom: 0.4em; display:block}
                               .starRating img {float: left}
                               .starRating {margin-top:0.4em; display: block}
                               .image {clear:left; font-size: x-small; color:#888888;}
                               .articleByTimeLocation {font-size: x-small; color:#888888;
                                margin-bottom:0.2em ; margin-top:0.2em ; display:block}
                                .subtitle {clear:left ;}
                               .column-1 h1 { color: #191919}
                               .column-1 h2 { color: #333333}
                               .column-1 h3 { color: #444444}
                               .subtitle { color: #777777; font-size: medium;}
                               .column-1 a,h1,h2,h3 { margin: 0; }
                               .column-1 div{margin: 0;}
                               .articleContent {display: block; clear:left;}
                               .articleContent {color: #000000; font-size: medium;}
                               .ivDrip-section {color: #000000; font-size: medium;}
                               .datetime {color: #888888}
                               .title {font-weight:bold;}
                               .storyTop{}
                               .pictureContainer img { max-width: 400px; max-height: 400px;}
                               .image img { max-width: 400px; max-height: 400px;}
                            """
    oldest_article = 1
    max_articles_per_feed = 100
    _processed_urls = []
    def get_article_url(self, article):
        url = super(self.__class__,self).get_article_url(article)
        title = article.get('title', None)
        if title and re.search("^Video:",title):
            return None
        #remove duplicates
        if not (url in self._processed_urls):
            self._processed_urls.append(url)
        else:
            url = None
        return url
    def populate_article_metadata(self, article, soup, first):
        if first and hasattr(self, 'add_toc_thumbnail'):
            picdiv = soup.find('img')
            if picdiv is not None:
                self.add_toc_thumbnail(article,picdiv['src'])
    def preprocess_html(self, soup):
        #remove 'advertorial articles'
        strapline = soup.find('div',attrs={'class' : re.compile('.*strapLine.*')})
        if strapline:
            for para in strapline.findAll('p'):
                if len(para.contents) and isinstance(para.contents[0],NavigableString) \
                and para.contents[0] == 'ADVERTORIAL FEATURE':
                    return None
        # remove Suggested Topics
        items_to_extract = []
        for item in soup.findAll('div',attrs={'class' : re.compile('.*RelatedArtTag.*')}):
            items_to_extract.append(item)
        for item in items_to_extract:
            item.extract()
        items_to_extract = []
        slideshow_elements = []
        for item in soup.findAll(attrs={'class' : re.compile("widget.*")}):
            remove = True
            pattern = re.compile('((articleContent)|(title))$')
            if (pattern.search(item['class'])) is not None:
                remove = False
            # corrections
            # story content always good
            pattern = re.compile('storyContent')
            if (pattern.search(item['class'])) is not None:
                remove = False
            #images
            pattern = re.compile('slideshow')
            if (pattern.search(item['class'])) is not None:
                if self._FETCH_IMAGES:
                    remove = False
                    slideshow_elements.append(item)
                else:
                    remove = True
            #social widgets always bad
            pattern = re.compile('socialwidget')
            if (pattern.search(item['class'])) is not None:
                remove = True
            if remove:
                items_to_extract.append(item)
        for item in items_to_extract:
            item.extract()
        items_to_extract = []
        if self._FETCH_IMAGES:
            for element in slideshow_elements:
                for item in element.findAll('a',attrs={'href' : re.compile('.*')}):
                    if item.img is not None:
                        #use full size image
                        images = []
                        img = item.findNext('img')
                        if  not '?action=gallery' in item['href']:
                            img['src'] = item['href']
                            tag = Tag(soup,'h3')
                            text = ''
                            try:
                                text = img['data-title']
                            except:
                                pass
                            if img.get('title') and (len(img['title']) > 1):
                                text = NavigableString(img['title'])
                            tag.insert(0,text)
                            images.append((img, tag))
                        else:
                            gallery_images, remove_link = self._get_gallery_images(item['href'])
                            images = images + gallery_images
                            if remove_link:
                                gal_link = soup.find('a',attrs={'id' : 'view-gallery'})
                                if gal_link:
                                    gal_link.extract()
                            img.extract()
                        for (img, title) in images:
                            #insert caption if available
                            if title:
                                #picture before text
                                img.extract()
                                item.insert(0,img)
                                item.insert(1,title)
                            # remove link
                            item.name = "div"
                            item["class"]='image'
                            del item["href"]
        #remove empty subtitles
        """
        currently the subtitle is located in first paragraph after
        sibling <h3 class="subtitle"> tag. This may be 'fixed' at
        some point.
        """
        subtitle = soup.find('h3',attrs={'class' : 'subtitle'})
        if subtitle is not None:
            subtitleText = subtitle.findNext('p')
            if subtitleText is not None:
                if len(subtitleText.contents[0]) <= 1 :
                    subtitleText.extract()
                    subtitle.extract()
        #replace rating numbers with stars
        for item in soup.findAll('div',attrs={ 'class' : 'starRating'}):
            if item is not None:
                soup2 = self._insertRatingStars(soup,item)
            if soup2 is not None:
                soup = soup2
        #remove empty paragraph tags in storyTop which can leave a space
        #between first paragraph and rest of story
        nested_content = False
        storyTop = soup.find('div',attrs={ 'class' : ['storyTop']})
        for item in storyTop.findAll('p'):
            for nested in item:
                if isinstance(nested, Tag):
                    nested_content = True
                    break
            if not nested_content and item.contents is not None and len(item.contents[0]) <= 1 :
                items_to_extract.append(item)
        for item in items_to_extract:
            item.extract()
        items_to_extract = []
        #remove line breaks immediately next to tags with default margins
        #to prevent double line spacing and narrow columns of text
        storyTop = soup.find('div',attrs={ 'class' : ['storyTop']})
        self._remove_undesired_line_breaks_from_tag(storyTop,soup)
        #replace article graphics link with the graphics themselves
        if self._FETCH_ARTICLE_GRAPHICS:
            items_to_insert = []
            for item in soup.findAll('div', attrs={'class' : ['article-graphic']}):
                strong = item.find('strong')
                if not strong:
                    continue
                for child in strong:
                    if isinstance(child,Tag):
                        if str(child.name) == 'a':
                            items_to_insert.extend(self._get_article_graphic(strong,child['href'],soup))
            for item in items_to_insert:
                item[0].replaceWith(item[1])
        for item in items_to_extract:
            item.extract()
        return soup
    def _get_article_graphic(self,old_item,url,soup):
        items_to_insert = []
        if re.search('\.jpg$',str(url)):
            div = Tag(soup,'div')
            div['class'] = 'pictureContainer'
            img = Tag(soup,'img')
            img['src'] = url
            img['alt'] = 'article graphic'
            div.insert(0,img)
            items_to_insert.append((old_item,div,))
            return items_to_insert
        soup2 = self.index_to_soup(url)
        for item in soup2.findAll('div',attrs={'class' : re.compile("widget picture article.*")}):
            items_to_insert.append((old_item,item),)
        return items_to_insert
    def _insertRatingStars(self,soup,item):
        if item.contents is None or len(item.contents) < 1:
            return
        rating = item.contents[0]
        try:
            rating = float(item.contents[0])
        except:
            print 'Could not convert decimal rating to star: malformatted float.'
            return
        for i in range(1,6):
            star = Tag(soup,'img')
            if i <= rating:
                star['src'] = self._STAR_URL
            else:
                star['src'] = self._NO_STAR_URL
            star['alt'] = 'star number ' +  str(i)
            item.insert(i,star)
        #item.contents[0] = NavigableString('(' + str(rating) + ')')
        item.contents[0] = ''
    def postprocess_html(self,soup, first_fetch):
        #mark subtitle parent as non-compliant nesting causes
        # p's to be 'popped out' of the h3 tag they are nested in.
        subtitle = soup.find('h3', attrs={'class' : 'subtitle'})
        subtitle_div = None
        if subtitle:
            subtitle_div = subtitle.parent
        if subtitle_div:
            clazz = ''
            if 'class' in subtitle_div:
                clazz = subtitle_div['class'] + ' '
            clazz = clazz + 'subtitle'
            subtitle_div['class'] = clazz
        #find broken images and remove captions
        items_to_extract = []
        for item in soup.findAll('div', attrs={'class' : 'image'}):
            img = item.findNext('img')
            if img and img.get('src'):
                # broken images still point to remote url
                pattern = re.compile('http://www.independent.co.uk.*')
                if pattern.match(img["src"]) is not None:
                    caption = img.findNextSibling('h3')
                    if caption is not None:
                        items_to_extract.append(caption)
                    items_to_extract.append(img)
        for item in items_to_extract:
            item.extract()
        # nickredding's fix for non-justified text
        for ptag in soup.findAll('p',attrs={'align':'left'}):
            del(ptag['align'])
        return soup
    def _get_gallery_images(self,url):
        gallery_soup = self.index_to_soup(url)
        images = []
        remove_link = True
        total = 1
        try:
            counter = gallery_soup.find('div',attrs={'id' : ['counter']})
            total = counter.contents[0].split('/')
            total = int(total[1].rstrip())
        except:
            total = 1
        if self._MAX_GALLERY_IMAGES >= 0 and total > self._MAX_GALLERY_IMAGES:
            total = self._MAX_GALLERY_IMAGES
            remove_link = False
        for i in range(1, total +1):
            image, title = self._get_image_from_gallery(gallery_soup)
            if image:
                images.append((image,title))
            next = url + '&ino=' + str(i + 1)
            gallery_soup = self.index_to_soup(next)
        images.reverse()
        return images, remove_link
    def _get_image_from_gallery(self,soup):
        try:
            container = soup.find('div',attrs={'id' : ['main-image']})
            image = container.find('img')
            if image:
                title = soup.find('div',attrs={'id' : ['image-title']})
            return image, title
        except:
            print 'error fetching gallery image'
            return None
    def _recurisvely_linearise_tag_tree(
        self,
        item,
        linearised= None,
        count=0,
        limit = 100
        ):
        linearised = linearised or []
        count = count + 1
        if count > limit:
            return linearised
        if not (isinstance(item,Tag)):
            return linearised
        for nested in item:
            linearised.append(nested)
            linearised = self._recurisvely_linearise_tag_tree(nested,linearised, count)
        return linearised
    def _get_previous_tag(self,current_index, tag_tree):
        if current_index == 0:
            return None
        else:
            return tag_tree[current_index - 1]
    def _get_next_tag(self,current_index, tag_tree):
        if current_index < len(tag_tree) - 1:
            return tag_tree[current_index + 1]
        else:
            return None
    def _list_match(self,test_str, list_regex):
        for regex in list_regex:
            match = re.match(regex, test_str)
            if match is not None:
                return True
        return False
    def _remove_undesired_line_breaks_from_tag(self,parent,soup):
        if parent is None:
            return
        tag_tree = self._recurisvely_linearise_tag_tree(parent)
        items_to_remove = []
        for item in tag_tree:
            if item == u'\n':
               items_to_remove.append(item)
               continue;
        for item in items_to_remove:
            tag_tree.remove(item)
        spaced_tags = [r'p', r'h\d', r'blockquote']
        tags_to_extract = []
        tags_to_replace = []
        for (i, tag) in enumerate(tag_tree):
            if isinstance(tag, Tag):
                if str(tag) == '<br />':
                    previous_tag = self._get_previous_tag(i, tag_tree)
                    if isinstance(previous_tag, Tag):
                        previous_tag_is_spaced = previous_tag is not None\
                             and self._list_match(str(previous_tag.name),
                                spaced_tags)
                    else:
                        previous_tag_is_spaced = False
                    next_tag = self._get_next_tag(i, tag_tree)
                    if isinstance(next_tag, Tag):
                        next_tag_is_spaced = next_tag is not None\
                             and self._list_match(str(next_tag.name), spaced_tags)
                    else:
                        next_tag_is_spaced = False
                    if previous_tag_is_spaced or next_tag_is_spaced or i == 0\
                         or i == len(tag_tree) - 1:
                        tags_to_extract.append(tag)
                    else:
                        tags_to_replace.append((tag,NavigableString(' '),))
        for pair in tags_to_replace:
            pair[0].replaceWith(pair[1])
        for tag in tags_to_extract:
            tag.extract()
    feeds = [
        (u'News - UK',
@ -610,3 +140,4 @@ class TheIndependentNew(BasicNewsRecipe):
         u'http://www.independent.co.uk/extras/indybest/?service=rss'),
        ]
--- a/recipes/instapaper.recipe
+++ b/recipes/instapaper.recipe
@ -33,8 +33,8 @@ class AdvancedUserRecipe1299694372(BasicNewsRecipe):
            (u'Instapaper Starred', u'http://www.instapaper.com/starred')
            ]
-    #Adds the title tag to the body of the recipe. Use this if your articles miss headings.
+    # Adds the title tag to the body of the recipe. Use this if your articles miss headings.
-    add_title_tag = False;
+    add_title_tag = False
    def get_browser(self):
        br = BasicNewsRecipe.get_browser(self)
@ -43,7 +43,7 @@ class AdvancedUserRecipe1299694372(BasicNewsRecipe):
            br.select_form(nr=0)
            br['username'] = self.username
            if self.password is not None:
-               br['password'] = self.password
+                br['password'] = self.password
            br.submit()
        return br
@ -55,7 +55,7 @@ class AdvancedUserRecipe1299694372(BasicNewsRecipe):
            self.report_progress(0, 'Fetching feed'+' %s...'%(feedtitle if feedtitle else feedurl))
            articles = []
            soup = self.index_to_soup(feedurl)
-            for item in soup.findAll('div', attrs={'class':'cornerControls'}):
+            for item in soup.findAll('div', attrs={'class':'title_row'}):
                #description = self.tag_to_string(item.div)
                atag = item.a
                if atag and atag.has_key('href'):
@ -73,10 +73,10 @@ class AdvancedUserRecipe1299694372(BasicNewsRecipe):
        article.title  = soup.find('title').contents[0].strip()
    def postprocess_html(self, soup, first_fetch):
-        #adds the title to each story, as it is not always included
+        # adds the title to each story, as it is not always included
        if self.add_title_tag:
            for link_tag in soup.findAll(attrs={"id" : "story"}):
                link_tag.insert(0,'<h1>'+soup.find('title').contents[0].strip()+'</h1>')
-        #print repr(soup)
+        # print repr(soup)
        return soup
--- a/recipes/iprofesional.recipe
+++ b/recipes/iprofesional.recipe
@ -1,5 +1,4 @@
-__license__   = 'GPL v3'
+__copyright__ = '2011-2013, Darko Miletic <darko.miletic at gmail.com>'
 __copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
 '''
 www.iprofesional.com
 '''
@ -19,13 +18,15 @@ class iProfesional(BasicNewsRecipe):
    use_embedded_content  = False
    language              = 'es_AR'
    remove_empty_feeds    = True
-    publication_type      = 'nesportal'
+    publication_type      = 'newsportal'
-    masthead_url          = 'http://www.iprofesional.com/img/logo-iprofesional.png'
+    masthead_url          = 'http://www.iprofesional.com/img/header/logoiprofesional.png'
    extra_css             = """
-                               body{font-family: Arial,Helvetica,sans-serif }
+                               body{font-family: 'Droid Sans',Arial,sans-serif }
                               img{margin-bottom: 0.4em; display:block}
-                               .titulo-interior{font-family: Georgia,"Times New Roman",Times,serif}
+                               .titulo{font-family: WhitneyBoldWhitneyBold,Arial,Helvetica,sans-serif; color: blue}
-                               .autor-nota{font-size: small; font-weight: bold; font-style: italic; color: gray}
+                               .fecha-archivo{font-weight: bold; color: rgb(205, 150, 24)}
                               .description{font-weight: bold; color: gray }
                               .firma{font-size: small}
                            """
    conversion_options = {
@ -35,27 +36,21 @@ class iProfesional(BasicNewsRecipe):
                        , 'language'  : language
                        }
-    keep_only_tags = [dict(attrs={'class':['fecha','interior-nota']})]
+    keep_only_tags = [dict(attrs={'class':'desarrollo'})]
-
+    remove_tags = [dict(name=['meta','link','base','embed','object','iframe'])]
    remove_tags = [
                     dict(name=['meta','link','base','embed','object','iframe'])
                    ,dict(attrs={'class':['menu-imprimir','guardarNota','IN-widget','fin','permalink']})
                  ]
    remove_attributes=['lang','xmlns:og','xmlns:fb']
    feeds = [
              (u'Ultimas noticias'  , u'http://feeds.feedburner.com/iprofesional-principales-noticias')
-             ,(u'Finanzas'          , u'http://feeds.feedburner.com/iprofesional-finanzas'            )
+             ,(u'Finanzas'          , u'http://feeds.feedburner.com/iprofesional-finanzas')
-             ,(u'Impuestos'         , u'http://feeds.feedburner.com/iprofesional-impuestos'           )
+             ,(u'Impuestos'         , u'http://feeds.feedburner.com/iprofesional-impuestos')
-             ,(u'Negocios'          , u'http://feeds.feedburner.com/iprofesional-economia'            )
+             ,(u'Negocios'          , u'http://feeds.feedburner.com/iprofesional-economia')
-             ,(u'Comercio Exterior' , u'http://feeds.feedburner.com/iprofesional-comercio-exterior'   )
+             ,(u'Comercio Exterior' , u'http://feeds.feedburner.com/iprofesional-comercio-exterior')
-             ,(u'Tecnologia'        , u'http://feeds.feedburner.com/iprofesional-tecnologia'          )
+             ,(u'Tecnologia'        , u'http://feeds.feedburner.com/iprofesional-tecnologia')
-             ,(u'Management'        , u'http://feeds.feedburner.com/iprofesional-managment'           )
+             ,(u'Management'        , u'http://feeds.feedburner.com/iprofesional-managment')
-             ,(u'Marketing'         , u'http://feeds.feedburner.com/iprofesional-marketing'           )
+             ,(u'Marketing'         , u'http://feeds.feedburner.com/iprofesional-marketing')
-             ,(u'Legales'           , u'http://feeds.feedburner.com/iprofesional-legales'             )
+             ,(u'Legales'           , u'http://feeds.feedburner.com/iprofesional-legales')
-             ,(u'Autos'             , u'http://feeds.feedburner.com/iprofesional-autos'               )
+             ,(u'Autos'             , u'http://feeds.feedburner.com/iprofesional-autos')
-             ,(u'Vinos'             , u'http://feeds.feedburner.com/iprofesional-vinos-bodegas'       )
+             ,(u'Vinos'             , u'http://feeds.feedburner.com/iprofesional-vinos-bodegas')
            ]
    def preprocess_html(self, soup):
@ -64,16 +59,17 @@ class iProfesional(BasicNewsRecipe):
        for item in soup.findAll('a'):
            limg = item.find('img')
            if item.string is not None:
-               str = item.string
+                str = item.string
-               item.replaceWith(str)
+                item.replaceWith(str)
            else:
-               if limg:
+                if limg:
-                  item.name = 'div'
+                    item.name = 'div'
-                  item.attrs = []
+                    item.attrs = []
-               else:
+                else:
-                   str = self.tag_to_string(item)
+                    str = self.tag_to_string(item)
-                   item.replaceWith(str)
+                    item.replaceWith(str)
        for item in soup.findAll('img'):
-            if not item.has_key('alt'):
+            if 'alt' not in item:
-               item['alt'] = 'image'
+                item['alt'] = 'image'
        return soup
--- a/recipes/jakarta_globe.recipe
+++ b/recipes/jakarta_globe.recipe
@ -6,29 +6,23 @@ class JakartaGlobe(BasicNewsRecipe):
    max_articles_per_feed = 100
    feeds          = [
-	(u'News', u'http://www.thejakartaglobe.com/pages/getrss/getrss-news.php'),
+    (u'News', u'http://www.thejakartaglobe.com/news/feed/'),
-	(u'Business', u'http://www.thejakartaglobe.com/pages/getrss/getrss-business.php'),
+    (u'Business', u'http://www.thejakartaglobe.com/business/feed/'),
-	(u'Technology', u'http://www.thejakartaglobe.com/pages/getrss/getrss-tech.php'),
+    (u'Opinion', u'http://www.thejakartaglobe.com/opinion/feed/'),
-	(u'My Jakarta', u'http://www.thejakartaglobe.com/pages/getrss/getrss-myjakarta.php'),
+    (u'Count me in', u'http://www.thejakartaglobe.com/count-me-in/feed/'),
-	(u'International', u'http://www.thejakartaglobe.com/pages/getrss/getrss-international.php'),
+    (u'International', u'http://www.thejakartaglobe.com/international/feed/'),
-	(u'Life and Times', u'http://www.thejakartaglobe.com/pages/getrss/getrss-lifeandtimes.php'),
+    (u'Sports', u'http://www.thejakartaglobe.com/sports/feed/'),
-	]
+    ]
    __author__ = 'rty'
    pubisher  = 'JakartaGlobe.com'
    description           = 'JakartaGlobe, Indonesia, Newspaper'
    category              = 'News, Indonesia'
    remove_javascript = True
    use_embedded_content   = False
    no_stylesheets = True
    auto_cleanup = True
    language = 'en_ID'
    encoding               = 'utf-8'
    conversion_options = {'linearize_tables':True}
    masthead_url = 'http://www.thejakartaglobe.com/pages/2010/images/jak-globe-logo.jpg'
    keep_only_tags = [
 							dict(name='div', attrs={'class':'story'}),
 							dict(name='span', attrs={'class':'headline'}),
 							dict(name='div', attrs={'class':'story'}),
 							dict(name='p', attrs={'id':'bodytext'})
                               ]
--- a/recipes/jakarta_post.recipe
+++ b/recipes/jakarta_post.recipe
@ -27,12 +27,11 @@ class JakartaPost(BasicNewsRecipe):
    use_embedded_content  = False
    no_javascript         = True
    remove_empty_feeds    = True
    auto_cleanup = True
    timefmt               = ' [%A, %d %B, %Y]'
    encoding              = 'utf-8'
    keep_only_tags = [dict(name='div', attrs ={'id':'news-main'})]
    extra_css = '''
                  h1{font-family:Georgia,"Times New Roman",Times,serif; font-weight:bold; font-size:large;}
                  .cT-storyDetails{font-family:Arial,Helvetica,sans-serif; color:#666666;font-size:x-small;}
@ -51,10 +50,6 @@ class JakartaPost(BasicNewsRecipe):
                  body{font-family:Arial,Helvetica,sans-serif; font-size:x-small;}
                '''
    remove_tags     = [
                        dict(name='div', attrs ={'class':['text-size']}),
                       ]
    feeds          = [
                      (u'Breaking News', u'http://www.thejakartapost.com/breaking/feed'),
--- a/recipes/jeuxvideo.recipe
+++ b/recipes/jeuxvideo.recipe
@ -0,0 +1,47 @@
 from __future__ import unicode_literals
 __license__   = 'GPL v3'
 __copyright__ = '2013, Malah <malah at neuf dot fr>'
 '''
 JeuxVideo.com
 '''
 __author__ = '2013, Malah <malah at neuf dot fr>'
 from calibre.web.feeds.news import BasicNewsRecipe
 class JeuxVideoCom(BasicNewsRecipe):
    title          = 'JeuxVideo.com'
    __author__ = 'Malah'
    description = 'La Référence des Jeux Vidéo sur PC et Consoles !'
    oldest_article = 1.5
    language = 'fr'
    max_articles_per_feed = 100
    remove_empty_feeds = True
    use_embedded_content = False
    no_stylesheets = True
    ignore_duplicate_articles = {'title', 'url'}
    keep_only_tags = [dict(id=['news_detail','test_txt','test_avis'])]
    remove_tags = [
        dict(name='div', attrs={'id':'player_video_article'}),
        dict(name='div', attrs={'class':'liste-fiches'})
    ]
    masthead_url       = u'https://upload.wikimedia.org/wikipedia/commons/3/39/Jeuxvideocom.png'
    feeds =  [
        (u'Section PC',u'http://www.jeuxvideo.com/rss/rss-pc.xml'),
        (u'Section Xbox One',u'http://www.jeuxvideo.com/rss/rss-xo.xml'),
        (u'Section PlayStation 4',u'http://www.jeuxvideo.com/rss/rss-ps4.xml'),
        (u'Section Xbox 360',u'http://www.jeuxvideo.com/rss/rss-360.xml'),
        (u'Section PlayStation 3',u'http://www.jeuxvideo.com/rss/rss-ps3.xml'),
        (u'Section Wii U',u'http://www.jeuxvideo.com/rss/rss-wiiu.xml'),
        (u'Section Wii',u'http://www.jeuxvideo.com/rss/rss-wii.xml'),
        (u'Section Nintendo 3DS',u'http://www.jeuxvideo.com/rss/rss-3ds.xml'),
        (u'Section Nintendo DS',u'http://www.jeuxvideo.com/rss/rss-ds.xml'),
        (u'Section PlayStation Vita',u'http://www.jeuxvideo.com/rss/rss-vita.xml'),
        (u'Section PlayStation Protable',u'http://www.jeuxvideo.com/rss/rss-psp.xml'),
        (u'Section Android',u'http://www.jeuxvideo.com/rss/rss-android.xml'),
        (u'Section Iphone',u'http://www.jeuxvideo.com/rss/rss-iphone.xml'),
        (u'Section Web',u'http://www.jeuxvideo.com/rss/rss-wb.xml'),
        (u'Autres news', u'http://www.jeuxvideo.com/rss/rss-news.xml'),
        (u'Autres vidéos', u'http://www.jeuxvideo.com/rss/rss-videos.xml'),
        (u'Autres articles', u'http://www.jeuxvideo.com/rss/rss.xml'),
    ]
--- a/recipes/jot_down.recipe
+++ b/recipes/jot_down.recipe
@ -0,0 +1,69 @@
 #!/usr/bin/env  python
 # vim:fileencoding=utf-8
 from __future__ import unicode_literals
 __license__     = 'GPL v3'
 __copyright__   = '23 June 2013, desUBIKado'
 __author__      = 'desUBIKado'
 __description__ = 'Contemporary Culture Magazine'
 __version__     = 'v0.01'
 __date__        = '23, June 2013'
 '''
 http://www.jotdown.es/
 '''
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 class jotdown(BasicNewsRecipe):
    author        = 'desUBIKado'
    description   = 'Revista digital con magníficos y extensos artículos'
    title          = u'Jot Down - Contemporary Culture Magazine'
    publisher      = 'Wabi Sabi Investments, S.C.'
    category       = 'Opinion, culture, science, movies, TV shows, music, blogs'
    language       = 'es'
    timefmt        = '[%a, %d %b, %Y]'
    oldest_article = 7
    delay          = 1
    max_articles_per_feed = 20
    masthead_url   = 'http://www.jotdown.es/wp-content/uploads/2011/04/logoJotDown.png'
    use_embedded_content  = False
    remove_javascript = True
    no_stylesheets = True
    feeds          = [
                        (u'Portada', u'http://www.jotdown.es/feed/')
                     ]
    keep_only_tags     = [dict(name='div', attrs={'class':['single']}),
                          dict(name='div', attrs={'id':['comments']}),
                         ]
    remove_tags        = [dict(name='a', attrs={'href':['http://alternativaseconomicas.coop/']}),
                          dict(name='div', attrs={'class':['reply','after-meta','comment-author vcard']}),
                          dict(name='div', attrs={'align':['center']}),
                          dict(name='span', attrs={'class':['fbreplace']}),
                          dict(name='div', attrs={'id':'respond'})
                     ]
    remove_tags_after  = dict(name='div' , attrs={'id':'respond'})
    extra_css = '''
                    .comment-list {font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:12px;}
                '''
    preprocess_regexps = [
 # To present the image of the embedded video
                           (re.compile(r'<object type="application/x-shockwave-flash" data="http://www.youtube.com/v',
                            re.DOTALL|re.IGNORECASE), lambda match: '<img src="http://img.youtube.com/vi'),
                           (re.compile(r'&rel=0&fs=1"', re.DOTALL|re.IGNORECASE), lambda match: '/0.jpg"><object'),
 # To remove the link of the category
                           (re.compile(r'<div class="meta">', re.DOTALL|re.IGNORECASE), lambda match: '<div class="meta"><!-- '),
                           (re.compile(r'</a>, <a href="http://www.jotdown.es/category', re.DOTALL|re.IGNORECASE), lambda match: ', <!--'),
                           (re.compile(r'"category tag">', re.DOTALL|re.IGNORECASE), lambda match: '--> '),
                           (re.compile(r'</a> &mdash;', re.DOTALL|re.IGNORECASE), lambda match: ''),
 # To remove the link of the title
                           (re.compile(r'<h1> <a href="', re.DOTALL|re.IGNORECASE), lambda match: '<h1> <div class="'),
                           (re.compile(r'</a> </h1>', re.DOTALL|re.IGNORECASE), lambda match: '</div> </h1>')
                      ]
--- a/recipes/la_nacion_cr.recipe
+++ b/recipes/la_nacion_cr.recipe
@ -20,7 +20,7 @@ class crnews(BasicNewsRecipe):
    no_stylesheets = True
-    feeds          = [(u'Portada', u'http://www.nacion.com/Generales/RSS/EdicionRss.aspx?section=portada'), (u'Ultima Hora', u'http://www.nacion.com/Generales/RSS/UltimaHoraRss.aspx'), (u'Nacionales', u'http://www.nacion.com/Generales/RSS/EdicionRss.aspx?section=elpais'), (u'Entretenimiento', u'http://www.nacion.com/Generales/RSS/EdicionRss.aspx?section=entretenimiento'), (u'Sucesos', u'http://www.nacion.com/Generales/RSS/EdicionRss.aspx?section=sucesos'), (u'Deportes', u'http://www.nacion.com/Generales/RSS/EdicionRss.aspx?section=deportes'), (u'Internacionales', u'http://www.nacion.com/Generales/RSS/EdicionRss.aspx?section=mundo'), (u'Economia', u'http://www.nacion.com/Generales/RSS/EdicionRss.aspx?section=economia'), (u'Aldea Global', u'http://www.nacion.com/Generales/RSS/EdicionRss.aspx?section=aldeaglobal'), (u'Tecnologia', u'http://www.nacion.com/Generales/RSS/EdicionRss.aspx?section=tecnologia'), (u'Opinion', u'http://www.nacion.com/Generales/RSS/EdicionRss.aspx?section=opinion')]
+    feeds          = [(u'Portada', u'http://www.nacion.com/rss/'), (u'Ultima Hora', u'http://www.nacion.com/rss/latest/'), (u'Nacionales', u'http://www.nacion.com/rss/nacional/'), (u'Entretenimiento', u'http://www.nacion.com/rss/ocio/'), (u'Sucesos', u'http://www.nacion.com/rss/sucesos/'), (u'Deportes', u'http://www.nacion.com/rss/deportes/'), (u'Internacionales', u'http://www.nacion.com/rss/mundo/'), (u'Economia', u'http://www.nacion.com/rss/economia/'), (u'Vivir', u'http://www.nacion.com/rss/vivir/'), (u'Tecnologia', u'http://www.nacion.com/rss/tecnologia/'), (u'Opinion', u'http://www.nacion.com/rss/opinion/')]
    def get_cover_url(self):
        index = 'http://kiosko.net/cr/np/cr_nacion.html'
--- a/recipes/lacapital.recipe
+++ b/recipes/lacapital.recipe
@ -0,0 +1,76 @@
 __license__   = 'GPL v3'
 __copyright__ = '2013, Darko Miletic <darko.miletic at gmail.com>'
 '''
 www.lacapital.com.ar
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class LaCapital(BasicNewsRecipe):
    title                 = 'La Capital de Rosario'
    __author__            = 'Darko Miletic'
    description           = 'Noticias, actualidad y toda la informacion de Rosario y la region'
    publisher             = 'Diario La Capital S. A.'
    category              = 'news, politics, Rosario, Santa Fe, Argentina'
    oldest_article        = 2
    max_articles_per_feed = 200
    no_stylesheets        = True
    encoding              = 'utf8'
    use_embedded_content  = False
    language              = 'es_AR'
    remove_empty_feeds    = True
    publication_type      = 'newspaper'
    masthead_url          = 'http://www.lacapital.com.ar/system/modules/com.tfsla.diario.core/resources/images/logoLaCapital_noCom.png'
    extra_css             = """
                               body{font-family: Georgia,"Times New Roman",Times,serif }
                               img{margin-bottom: 0.4em; display:block}
                            """
    conversion_options = {
                          'comment'   : description
                        , 'tags'      : category
                        , 'publisher' : publisher
                        , 'language'  : language
                        }
    keep_only_tags=[dict(attrs={'class':'leer'})]
    remove_tags_after=dict(attrs={'class':'notaA'})
    remove_tags = [
                     dict(name=['meta','link','iframe','object'])
                    ,dict(name='div', attrs={'class':['herramientas','almargen','relacionadas']})
                  ]
    feeds = [
              (u'Portada'             , u'http://www.lacapital.com.ar/rss/home.xml'            )
             ,(u'La Ciudad'           , u'http://www.lacapital.com.ar/rss/laciudad.xml'        )
             ,(u'Politica'            , u'http://www.lacapital.com.ar/rss/politica.xml'        )
             ,(u'Economia'            , u'http://www.lacapital.com.ar/rss/economia.xml'        )
             ,(u'La Region'           , u'http://www.lacapital.com.ar/rss/laregion.xml'        )
             ,(u'Informacion General' , u'http://www.lacapital.com.ar/rss/informaciongral.xml' )
             ,(u'El Mundo'            , u'http://www.lacapital.com.ar/rss/elmundo.xml'         )
             ,(u'Opinion'             , u'http://www.lacapital.com.ar/rss/opinion.xml'         )
             ,(u'Cartas de lectores'  , u'http://www.lacapital.com.ar/rss/cartasdelectores.xml')
             ,(u'Escenario'           , u'http://www.lacapital.com.ar/rss/escenario.xml'       )
             ,(u'Policiales'          , u'http://www.lacapital.com.ar/rss/policiales.xml'      )
             ,(u'Ovacion'             , u'http://www.lacapital.com.ar/rss/ovacion.xml'         )
             ,(u'Turismo'             , u'http://www.lacapital.com.ar/rss/turismo.xml'         )
             ,(u'Economia'            , u'http://www.lacapital.com.ar/rss/economia.xml'        )
             ,(u'Señales'             , u'http://www.lacapital.com.ar/rss/senales.xml'         )
             ,(u'Educacion'           , u'http://www.lacapital.com.ar/rss/educacion.xml'       )
             ,(u'Estilo'              , u'http://www.lacapital.com.ar/rss/estilo.xml'          )
             ,(u'Salud'               , u'http://www.lacapital.com.ar/rss/salud.xml'           )
             ,(u'Tecnologia'          , u'http://www.lacapital.com.ar/rss/tecnologia.xml'      )
            ]
    def get_cover_url(self):
        soup = self.index_to_soup('http://www.lacapital.com.ar/impresa/tapa.html')
        for image in soup.findAll('img',alt=True):
           if image['alt'].startswith('Tapa de papel'):
              return image['src']
        return None
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
        return soup
--- a/recipes/lamebook.recipe
+++ b/recipes/lamebook.recipe
@ -13,6 +13,8 @@ class LamebookRecipe(BasicNewsRecipe):
    language = 'en'
    use_embedded_content  = False
    publication_type = 'blog'
    reverse_article_order = True
    encoding = 'utf-8'
    keep_only_tags = [
        dict(name='div', attrs={'class':'entry'})
--- a/recipes/le_gorafi.recipe
+++ b/recipes/le_gorafi.recipe
@ -0,0 +1,34 @@
 from __future__ import unicode_literals
 __license__   = 'GPL v3'
 __copyright__ = '2013, Malah <malah at neuf dot fr>'
 '''
 Le GORAFI.fr
 '''
 __author__ = '2013, Malah <malah at neuf dot fr>'
 from calibre.web.feeds.news import BasicNewsRecipe
 class legorafi(BasicNewsRecipe):
    title = u'Le GORAFI.fr'
    __author__ = 'Malah'
    description = u'Depuis 1826, toute l\'information de sources contradictoires'
    oldest_article = 7
    language = 'fr'
    max_articles_per_feed = 100
    use_embedded_content = False
    no_stylesheets = True
    keep_only_tags = [
        dict(name='div', attrs={'class':'entry-content'}),
        dict(name='h3', attrs={'id':'comments-title'}),
    ]
    remove_tags = [
        dict(name='div', attrs={'id':'soshake-sharebox'}),
        dict(name='div', attrs={'class':'social-ring'}),
        dict(name='div', attrs={'class':'entry-utility'}),
        dict(name='div', attrs={'id':'respond'}),
    ]
    masthead_url = u'http://web.gweno.free.fr/img/logositeter.png'
    couverture_url = u'http://www.legorafi.fr/wp-content/uploads/2013/02/iconegorafi.png'
    feeds =  [
      (u'Articles', u'http://www.legorafi.fr/feed/'),
    ]
--- a/recipes/le_monde_diplomatique_fr.recipe
+++ b/recipes/le_monde_diplomatique_fr.recipe
@ -0,0 +1,111 @@
 # vim:fileencoding=utf-8
 from __future__ import unicode_literals
 __license__   = 'GPL v3'
 __copyright__ = '2013'
 '''
 monde-diplomatique.fr
 '''
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.web.feeds import feeds_from_index
 class LeMondeDiplomatiqueSiteWeb(BasicNewsRecipe):
    title                  = u'Le Monde diplomatique.fr'
    __author__             = 'Gaëtan Lehmann'
    description            = "Le Monde diplomatique est un mensuel français d’information et d’opinion à la ligne éditoriale nettement engagée en faveur d'une gauche de rupture avec le capitalisme. Il aborde de nombreux sujets — géopolitique, relations internationales, économie, questions sociales, écologie, culture, médias, …"  # noqa
    oldest_article         = 7
    max_articles_per_feed  = 100
    auto_cleanup = True
    publisher              = 'monde-diplomatique.fr'
    category               = 'news, France, world'
    language               = 'fr'
    masthead_url           = 'http://www.monde-diplomatique.fr/squelettes/images/logotyfa.png'
    timefmt                = ' [%d %b %Y]'
    no_stylesheets         = True
    feeds                  = [(u'Blogs', u'http://blog.mondediplo.net/spip.php?page=backend'), (u'Archives', u'http://www.monde-diplomatique.fr/rss/')]
    preprocess_regexps     = [
        (re.compile(r'<title>(.*) - Les blogs du Diplo</title>'), lambda m: '<title>' + m.group(1) + '</title>'),
        (re.compile(r'<h2>(.*) - Les blogs du Diplo</h2>'), lambda m: '<h2>' + m.group(1) + '</h2>'),
        (re.compile(r'<title>(.*) \(Le Monde diplomatique\)</title>'), lambda m: '<title>' + m.group(1) + '</title>'),
        (re.compile(r'<h2>(.*) \(Le Monde diplomatique\)</h2>'), lambda m: '<h2>' + m.group(1) + '</h2>'),
        (re.compile(r'<h3>Grand format</h3>'), lambda m: '')]
    remove_tags            = [dict(name='div', attrs={'class':'voiraussi liste'}),
      dict(name='ul', attrs={'class':'hermetique carto hombre_demi_inverse'}),
      dict(name='a', attrs={'class':'tousles'}),
      dict(name='h3', attrs={'class':'cat'}),
      dict(name='div', attrs={'class':'logodiplo'}),
      dict(name='img', attrs={'class':'spip_logos'}),
      dict(name='p', attrs={'id':'hierarchie'}),
      dict(name='div', attrs={'class':'espace'})]
    conversion_options     = {
                              'comments'        : description
                             ,'tags'            : category
                             ,'language'        : language
                             ,'publisher'       : publisher
                             ,'linearize_tables': True
                          }
    remove_empty_feeds     = True
    filterDuplicates       = True
    # don't use parse_index - we need it to send an exception so we can mix
    # feed and parse_index results in parse_feeds
    def parse_index_valise(self):
        articles = []
        soup = self.index_to_soup('http://www.monde-diplomatique.fr/carnet/')
        cnt = soup.find('ul',attrs={'class':'hermetique liste'})
        for item in cnt.findAll('li'):
            description = ''
            feed_link = item.find('a')
            desc = item.find('div',attrs={'class':'intro'})
            date = item.find('div',attrs={'class':'dates_auteurs'})
            if desc:
                description = desc.string
            if feed_link and feed_link.has_key('href'):
                url   = 'http://www.monde-diplomatique.fr' + feed_link['href']
                title = self.tag_to_string(feed_link)
                articles.append({
                                  'title'      :title
                                 ,'date'       :date.string.strip()
                                 ,'url'        :url
                                 ,'description':description
                                })
        return [("La valise diplomatique", articles)]
    def parse_index_cartes(self):
        articles = []
        soup = self.index_to_soup('http://www.monde-diplomatique.fr/cartes/')
        cnt = soup.find('div',attrs={'class':'decale hermetique'})
        for item in cnt.findAll('div',attrs={'class':re.compile('grid_3 filet hombre_demi')}):
            feed_link = item.find('a',attrs={'class':'couve'})
            h3 = item.find('h3')
            authorAndDate = item.find('div',attrs={'class':'dates_auteurs'})
            author, date = authorAndDate.string.strip().split(', ')
            if feed_link and feed_link.has_key('href'):
                url   = 'http://www.monde-diplomatique.fr' + feed_link['href']
                title = self.tag_to_string(h3)
                articles.append({
                                  'title'      :title
                                 ,'date'       :date
                                 ,'url'        :url
                                 ,'description': author
                                })
        return [("Cartes", articles)]
    def parse_feeds(self):
        feeds = BasicNewsRecipe.parse_feeds(self)
        valise = feeds_from_index(self.parse_index_valise(), oldest_article=self.oldest_article,
                                     max_articles_per_feed=self.max_articles_per_feed,
                                    log=self.log)
        cartes = feeds_from_index(self.parse_index_cartes(), oldest_article=self.oldest_article,
                                     max_articles_per_feed=self.max_articles_per_feed,
                                     log=self.log)
        feeds = valise + feeds + cartes
        return feeds
--- a/recipes/le_monde_sub.recipe
+++ b/recipes/le_monde_sub.recipe
@ -2,7 +2,7 @@
 __author__    = 'Sylvain Durand <sylvain.durand@ponts.org>'
 __license__   = 'GPL v3'
-import time
+import time, re
 from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe
@ -13,7 +13,7 @@ class LeMonde(BasicNewsRecipe):
    title              = u'Le Monde: Édition abonnés'
    __author__         = 'Sylvain Durand'
-    description        = u'Disponible du lundi au samedi à partir de 14 heures environ, avec tous ses cahiers.'
+    description        = u'La version papier du quotidien Le Monde, disponible du lundi au samedi à partir de 14 heures environ, avec tous ses cahiers.'
    language           = 'fr'
    encoding           = 'utf8'
@ -65,26 +65,38 @@ class LeMonde(BasicNewsRecipe):
        url = time.strftime(self.journal_url,self.date)
        soup = self.index_to_soup(url).sommaire
        sections = []
-        for sec in soup.findAll("section"):
+        try:
-            articles = []
+            for sec in soup.findAll("section"):
-            if sec['cahier'] != "Le Monde":
+                articles = []
-                for col in sec.findAll("fnts"):
+                if sec['cahier'] != "Le Monde":
-                    col.extract()
+                    for col in sec.findAll("fnts"):
-            if sec['cahier']=="Le Monde Magazine":
+                        col.extract()
-                continue
+                if sec['cahier']=="Le Monde Magazine":
-            for art in sec.findAll("art"):
+                    continue
-                if art.txt.string and art.ttr.string:
+                for art in sec.findAll("art"):
-                    if art.find(['url']):
+                    if art.txt.string and art.ttr.string:
-                        art.insert(6,'<div id="photo"><img src="'+art.find(['url']).string+'" /></div>')
+                        if art.find(['url']):
-                    if art.find(['lgd']) and art.find(['lgd']).string:
+                            art.insert(6,'<div id="photo"><img src="'+art.find(['url']).string+'" /></div>')
-                        art.insert(7,'<div id="lgd">'+art.find(['lgd']).string+'</div>')
+                        if art.find(['lgd']) and art.find(['lgd']).string:
-                    article = "<html><head></head><body>"+unicode(art)+"</body></html>"
+                            art.insert(7,'<div id="lgd">'+art.find(['lgd']).string+'</div>')
-                    article = article.replace('<![CDATA[','').replace(']]>','').replace(' oC ','°C ')
+
-                    article = article.replace('srttr>','h3>').replace('ssttr>','h2>').replace('ttr>','h1>')
+                        def guillemets(match):
-                    f = PersistentTemporaryFile()
+                            if match.group(1) == u"=":
-                    f.write(article)
+                                return match.group(0)
-                    articles.append({'title':art.ttr.string,'url':"file:///"+f.name})
+                            return u'%s«&nbsp;%s&nbsp;»' % (match.group(1), match.group(2))
-            sections.append((sec['nom'], articles))
+
                        article = "<html><head></head><body>"+unicode(art)+"</body></html>"
                        article = article.replace('<![CDATA[','').replace(']]>','').replace(' oC ','°C ')
                        article = article.replace('srttr>','h3>').replace('ssttr>','h2>').replace('ttr>','h1>')
                        article = article.replace("'" , u'\u2019')
                        article = re.sub('(.|^)"([^"]+)"', guillemets, article)
                        f = PersistentTemporaryFile()
                        f.write(article)
                        articles.append({'title':art.ttr.string,'url':"file:///"+f.name})
                sections.append((sec['nom'], articles))
        except AttributeError:
            self.log("Vos identifiants sont incorrects, ou votre abonnement LeMonde.fr ne vous permet pas de télécharger le journal.")
        return sections
    def preprocess_html(self, soup):
@ -92,3 +104,4 @@ class LeMonde(BasicNewsRecipe):
            lgd.contents[-1].extract()
        return soup
--- a/recipes/le_nouvel_observateur.recipe
+++ b/recipes/le_nouvel_observateur.recipe
@ -0,0 +1,49 @@
 from __future__ import unicode_literals
 __license__   = 'GPL v3'
 __copyright__ = '2013, Malah <malah at neuf dot fr>'
 '''
 Le Nouvel Observateur
 '''
 __author__ = '2013, Malah <malah at neuf dot fr>'
 from calibre.web.feeds.news import BasicNewsRecipe
 class LeNouvelObs(BasicNewsRecipe):
    title          = u'Le Nouvel Observateur'
    __author__ = 'Malah'
    description = u'Actualités en temps réel, Info à la Une'
    oldest_article = 1
    language = 'fr'
    max_articles_per_feed = 25
    use_embedded_content = False
    ignore_duplicate_articles = ('title', 'url')
    remove_empty_feeds = True
    no_stylesheets = True
    masthead_url       = u'https://upload.wikimedia.org/wikipedia/fr/f/f9/Le_Nouvel_observateur.png'
    feeds          = [
        (u'Politique', u'http://tempsreel.nouvelobs.com/politique/rss.xml'),
        (u'Société', u'http://tempsreel.nouvelobs.com/societe/rss.xml'),
        (u'Monde', u'http://tempsreel.nouvelobs.com/monde/rss.xml'),
        (u'Economie', u'http://tempsreel.nouvelobs.com/economie/rss.xml'),
        (u'Culture', u'http://tempsreel.nouvelobs.com/culture/rss.xml'),
        (u'High Tech', u'http://obsession.nouvelobs.com/high-tech/rss.xml'),
        (u'Education', u'http://tempsreel.nouvelobs.com/education/rss.xml'),
        (u'Services', u'http://tempsreel.nouvelobs.com/services/rss.xml'),
        (u'Sport', u'http://tempsreel.nouvelobs.com/sport/rss.xml'),
        (u'CinéObs', u'http://cinema.nouvelobs.com/articles.rss'),
        (u'TéléObs', u'http://teleobs.nouvelobs.com/rss.xml'),
        (u'Autres Actualités',u'http://tempsreel.nouvelobs.com/rss.xml'),
    ]
    keep_only_tags = [
        dict(name='h1', attrs={'id':'obs-article-title'}),
        dict(name='div', attrs={'class':'obs-date'}),
        dict(name='div', attrs={'class':'art-auteur'}),
        dict(name='h2', attrs={'class':'obs-article-intro'}),
        dict(name='div', attrs={'id':'obs-article-keywords'}),
        dict(name='div', attrs={'id':'obs-article-mainpic'}),
        dict(name='div', attrs={'itemprop':'articleBody'}),
        dict(name='img', attrs={'id':'ObsImg'}),
        dict(name='p', attrs={'class':'date-media'}),
        dict(name='p', attrs={'id':'ObsDesc'}),
    ]
--- a/recipes/liberation.recipe
+++ b/recipes/liberation.recipe
@ -21,42 +21,10 @@ class Liberation(BasicNewsRecipe):
    max_articles_per_feed  = 15
    no_stylesheets         = True
    remove_empty_feeds     = True
-    filterDuplicates       = True
+    needs_subscription     = 'optional'
-    extra_css = '''
+    keep_only_tags = [dict(name='article')]
-                    h1, h2, h3 {font-size:xx-large; font-family:Arial,Helvetica,sans-serif;}
+    remove_tags = [dict(attrs={'class':['tool-bar']})]
                    p.subtitle {font-size:xx-small; font-family:Arial,Helvetica,sans-serif;}
                    h4, h5, h2.rubrique,  {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
                    .ref, .date, .author, .legende {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
                    .mna-body, entry-body  {font-size:medium; font-family:Arial,Helvetica,sans-serif;}
                '''
    keep_only_tags    = [
                  dict(name='div', attrs={'class':'article'})
                  ,dict(name='div', attrs={'class':'text-article m-bot-s1'})
                  ,dict(name='div', attrs={'class':'entry'})
                  ,dict(name='div', attrs={'class':'col_contenu'})
    ]
    remove_tags_after = [
        dict(name='div',attrs={'class':['object-content text text-item', 'object-content', 'entry-content', 'col01', 'bloc_article_01']})
        ,dict(name='p',attrs={'class':['chapo']})
        ,dict(id='_twitter_facebook')
    ]
    remove_tags    = [
                        dict(name='iframe')
                        ,dict(name='a', attrs={'class':'lnk-comments'})
                        ,dict(name='div', attrs={'class':'toolbox'})
                        ,dict(name='ul', attrs={'class':'share-box'})
                        ,dict(name='ul', attrs={'class':'tool-box'})
                        ,dict(name='ul', attrs={'class':'rub'})
                        ,dict(name='p',attrs={'class':['chapo']})
                        ,dict(name='p',attrs={'class':['tag']})
                        ,dict(name='div',attrs={'class':['blokLies']})
                        ,dict(name='div',attrs={'class':['alire']})
                        ,dict(id='_twitter_facebook')
                     ]
    feeds          = [
                         (u'La une', u'http://rss.liberation.fr/rss/9/')
@ -69,6 +37,16 @@ class Liberation(BasicNewsRecipe):
                        ,(u'Sports', u'http://www.liberation.fr/rss/12/')
                     ]
    def get_browser(self):
        br = BasicNewsRecipe.get_browser(self)
        if self.username is not None and self.password is not None:
            br.open('http://token.liberation.fr/accounts/login/')
            br.select_form(nr=0)
            br['email']    = self.username
            br['password'] = self.password
            br.submit()
        return br
    def get_masthead_url(self):
        masthead = 'http://s0.libe.com/libe/img/common/logo-liberation-150.png'
        br = BasicNewsRecipe.get_browser(self)
@ -78,3 +56,15 @@ class Liberation(BasicNewsRecipe):
            self.log("\nCover unavailable")
            masthead = None
        return masthead
    def get_article_url(self, article):
        url = BasicNewsRecipe.get_article_url(self, article)
        url = url.split('/')[-2]
        encoding = {'0B': '.', '0C': '/', '0A': '0', '0F': '=', '0G': '&',
                '0D': '?', '0E': '-', '0N': '.com', '0L': 'http://', '0S':
                    'www.', '0I': '_'}
        for k, v in encoding.iteritems():
            url = url.replace(k, v)
        return url.partition('?')[0]
--- a/recipes/liberation_sub.recipe
+++ b/recipes/liberation_sub.recipe
@ -1,103 +0,0 @@
 #!/usr/bin/env  python
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 __license__   = 'GPL v3'
 __copyright__ = '2012, Rémi Vanicat <vanicat at debian.org>'
 '''
 liberation.fr
 '''
 # The cleanning is from the Liberation recipe, by Darko Miletic
 from calibre.web.feeds.news import BasicNewsRecipe
 class Liberation(BasicNewsRecipe):
    title                 = u'Libération: Édition abonnés'
    __author__            = 'Rémi Vanicat'
    description           = u'Actualités'
    category              = 'Actualités, France, Monde'
    language              = 'fr'
    needs_subscription    = True
    use_embedded_content   = False
    no_stylesheets         = True
    remove_empty_feeds     = True
    extra_css = '''
                    h1, h2, h3 {font-size:xx-large; font-family:Arial,Helvetica,sans-serif;}
                    p.subtitle {font-size:xx-small; font-family:Arial,Helvetica,sans-serif;}
                    h4, h5, h2.rubrique,  {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
                    .ref, .date, .author, .legende {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
                    .mna-body, entry-body  {font-size:medium; font-family:Arial,Helvetica,sans-serif;}
                '''
    keep_only_tags    = [
                  dict(name='div', attrs={'class':'article'})
                  ,dict(name='div', attrs={'class':'text-article m-bot-s1'})
                  ,dict(name='div', attrs={'class':'entry'})
                  ,dict(name='div', attrs={'class':'col_contenu'})
    ]
    remove_tags_after = [
        dict(name='div',attrs={'class':['object-content text text-item', 'object-content', 'entry-content', 'col01', 'bloc_article_01']})
        ,dict(name='p',attrs={'class':['chapo']})
        ,dict(id='_twitter_facebook')
    ]
    remove_tags    = [
                        dict(name='iframe')
                        ,dict(name='a', attrs={'class':'lnk-comments'})
                        ,dict(name='div', attrs={'class':'toolbox'})
                        ,dict(name='ul', attrs={'class':'share-box'})
                        ,dict(name='ul', attrs={'class':'tool-box'})
                        ,dict(name='ul', attrs={'class':'rub'})
                        ,dict(name='p',attrs={'class':['chapo']})
                        ,dict(name='p',attrs={'class':['tag']})
                        ,dict(name='div',attrs={'class':['blokLies']})
                        ,dict(name='div',attrs={'class':['alire']})
                        ,dict(id='_twitter_facebook')
                     ]
    index           = 'http://www.liberation.fr/abonnes/'
    def get_browser(self):
        br = BasicNewsRecipe.get_browser(self)
        if self.username is not None and self.password is not None:
            br.open('http://www.liberation.fr/jogger/login/')
            br.select_form(nr=0)
            br['email']    = self.username
            br['password'] = self.password
            br.submit()
        return br
    def parse_index(self):
        soup=self.index_to_soup(self.index)
        content = soup.find('div', { 'class':'block-content' })
        articles = []
        cat_articles = []
        for tag in content.findAll(recursive=False):
            if(tag['class']=='headrest headrest-basic-rounded'):
                cat_articles = []
                articles.append((tag.find('h5').contents[0],cat_articles))
            else:
                title = tag.find('h3').contents[0]
                url = tag.find('a')['href']
                print(url)
                descripion = tag.find('p',{ 'class':'subtitle' }).contents[0]
                article = {
                    'title': title,
                    'url': url,
                    'descripion': descripion,
                    'content': ''
                    }
                cat_articles.append(article)
        return articles
 # Local Variables:
 # mode: python
 # End:
--- a/recipes/list_apart.recipe
+++ b/recipes/list_apart.recipe
@ -1,23 +1,30 @@
 # vim:fileencoding=UTF-8
 from __future__ import unicode_literals
 from calibre.web.feeds.news import BasicNewsRecipe
 import re
 class AListApart (BasicNewsRecipe):
-   __author__ = 'Marc Busqué <marc@lamarciana.com>'
+    __author__ = 'Marc Busqué <marc@lamarciana.com>'
-   __url__ = 'http://www.lamarciana.com'
+    __url__ = 'http://www.lamarciana.com'
-   __version__ = '2.0'
+    __version__ = '2.0.1'
-   __license__   = 'GPL v3'
+    __license__   = 'GPL v3'
-   __copyright__ = '2012, Marc Busqué <marc@lamarciana.com>'
+    __copyright__ = '2012, Marc Busqué <marc@lamarciana.com>'
-   title = u'A List Apart'
+    title = u'A List Apart'
-   description = u'A List Apart Magazine (ISSN: 1534-0295) explores the design, development, and meaning of web content, with a special focus on web standards and best practices. This recipe retrieve articles and columns.'
+    description = u'A List Apart Magazine (ISSN: 1534-0295) explores the design, development, and meaning of web content, with a special focus on web standards and best practices. This recipe retrieve articles and columns.'
-   language = 'en'
+    language = 'en'
-   tags = 'web development, software'
+    tags = 'web development, software'
-   oldest_article = 120
+    oldest_article = 120
-   remove_empty_feeds = True
+    remove_empty_feeds = True
-   encoding = 'utf8'
+    encoding = 'utf8'
-   cover_url = u'http://alistapart.com/pix/alalogo.gif'
+    cover_url = u'http://alistapart.com/pix/alalogo.gif'
-   extra_css = u'img {max-width: 100%; display: block; margin: auto;}'
+    extra_css = u'img {max-width: 100%; display: block; margin: auto;}'
-   feeds = [
+    feeds = [
-         (u'A List Apart', u'http://feeds.feedburner.com/alistapart/abridged'),
+          (u'A List Apart', u'http://feeds.feedburner.com/alistapart/abridged'),
-         ]
+          ]
    def image_url_processor(self, baseurl, url):
        if re.findall('alistapart\.com', url):
            return 'http:'+url
        else:
            return url
--- a/recipes/living_digital.recipe
+++ b/recipes/living_digital.recipe
@ -1,16 +0,0 @@
 from calibre.web.feeds.news import CalibrePeriodical
 class LivingDigital(CalibrePeriodical):
    title = 'Living Digital'
    calibre_periodicals_slug = 'living-digital'
    description = '''
    Catch the latest buzz in the digital world with Living Digital. Enjoy
    reviews, news, features and recommendations on a wide range of consumer
    technology products - from smartphones to flat panel TVs, netbooks to
    cameras, and many more consumer lifestyle gadgets. To subscribe, visit
    <a href="http://news.calibre-ebook.com/periodical/living-digital">calibre
    Periodicals</a>.
    '''
    language = 'en_IN'
--- a/recipes/ludwig_mises.recipe
+++ b/recipes/ludwig_mises.recipe
@ -0,0 +1,14 @@
 from calibre.web.feeds.news import AutomaticNewsRecipe
 class BasicUserRecipe1373130372(AutomaticNewsRecipe):
    title          = u'Ludwig von Mises Institute'
    __author__ = 'anywho'
    language = 'en'
    oldest_article = 7
    max_articles_per_feed = 100
    auto_cleanup = True
    feeds          = [(u'Daily Articles (Full text version)',
                       u'http://feed.mises.org/MisesFullTextArticles'),
                      (u'Mises Blog Posts',
                       u'http://mises.org/blog/index.rdf')]
--- a/recipes/mediapart.recipe
+++ b/recipes/mediapart.recipe
@ -1,3 +1,6 @@
 # -*- mode:python -*-
 from __future__ import unicode_literals
 __license__   = 'GPL v3'
 __copyright__ = '2009, Mathieu Godlewski <mathieu at godlewski.fr>; 2010-2012, Louis Gesbert <meta at antislash dot info>'
 '''
@ -6,57 +9,164 @@ Mediapart
 __author__ = '2009, Mathieu Godlewski <mathieu at godlewski.fr>; 2010-2012, Louis Gesbert <meta at antislash dot info>'
-from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
+import re
 from calibre.ebooks.BeautifulSoup import BeautifulSoup
 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.web.feeds import feeds_from_index
 from datetime import date,timedelta
 class Mediapart(BasicNewsRecipe):
-    title          = 'Mediapart'
+    title = 'Mediapart'
    __author__ = 'Mathieu Godlewski, Louis Gesbert'
    description = 'Global news in french from news site Mediapart'
-    oldest_article = 7
+    publication_type = 'newspaper'
    language = 'fr'
    needs_subscription = True
-    max_articles_per_feed = 50
+    oldest_article = 2
    use_embedded_content = False
    no_stylesheets = True
-    cover_url = 'http://static.mediapart.fr/files/pave_mediapart.jpg'
+    cover_url = 'https://static.mediapart.fr/files/M%20Philips/logo-mediapart.png'
 # --
    oldest_article_date = date.today() - timedelta(days=oldest_article)
 # -- get the index (the feed at 'http://www.mediapart.fr/articles/feed' only has
 #    the 10 last elements :/)
    feeds =  [
-        ('Les articles', 'http://www.mediapart.fr/articles/feed'),
+        ('La Une', 'http://www.mediapart.fr/articles/feed'),
    ]
    def parse_feeds(self):
        feeds = super(Mediapart, self).parse_feeds()
        feeds += feeds_from_index(self.my_parse_index(feeds))
        return feeds
    def my_parse_index(self, la_une):
        articles = []
        breves = []
        liens = []
        confidentiels = []
        soup = self.index_to_soup('https://www.mediapart.fr/journal/fil-dactualites')
        page = soup.find('div', {'id':'pageFirstContent'})
        fils = page.find('div', {'class':re.compile(r'\bcontent-journal\b')})
        for article in fils.findAll('div'):
            try:
                title = article.find('h2',recursive=False)
                if title is None or title['class'] == 'title-specific':
                    continue
                # print "found fil ",title
                article_type = article.find('a', {'href': re.compile(r'.*\/type-darticles\/.*')}).renderContents()
                # print "kind: ",article_type
                for s in title('span'):
                    s.replaceWith(s.renderContents() + "\n")
                url = title.find('a', href=True)['href']
                article_date = self.parse_french_date(article.find("span", "article-date").renderContents())
                if article_date < self.oldest_article_date:
                    # print "too old"
                    continue
                authors = article.findAll('a',{'class':re.compile(r'\bjournalist\b')})
                authors = [self.tag_to_string(a) for a in authors]
                description = article.find('div', {'class': lambda c: c != 'taxonomy-teaser'}, recursive=False).findAll('p')
                # print "fil ",title," by ",authors," : ",description
                summary = {
                    'title': self.tag_to_string(title).strip(),
                    'author': ', '.join(authors),
                    'url': url,
                    'date': u'' + article_date.strftime("%A %d %b %Y"),
                    'description': '\n'.join([self.tag_to_string(d) for d in description]),
                }
                {
                    "Brève": breves,
                    "Lien": liens,
                    "Confidentiel": confidentiels,
                }.get(article_type).append(summary)
            except:
                pass
        # print 'La Une: ', len(la_une), ' articles'
        # for a in la_une: print a["title"]
        # print 'Brèves: ', len(breves), ' articles'
        # print 'Revue web: ', len(liens), ' articles'
        # print 'Confidentiel: ', len(confidentiels), ' articles'
        articles += [('Brèves', breves)] if breves else []
        articles += [('Revue du Web', liens)] if liens else []
        articles += [('Confidentiel', confidentiels)] if confidentiels else []
        return articles
 # -- print-version
-    conversion_options = { 'smarten_punctuation' : True }
+    conversion_options = {'smarten_punctuation' : True}
-    remove_tags = [ dict(name='div', attrs={'class':'print-source_url'}) ]
+    remove_tags = [dict(name='div', attrs={'class':'print-source_url'})]
    # non-locale specific date parse (strptime("%d %b %Y",s) would work with french locale)
    def parse_french_date(self, date_str):
        date_arr = date_str.lower().split()
        return date(day=int(date_arr[0]),
                    year=int(date_arr[2]),
                    month=
                      [None, 'janvier', 'février', 'mars', 'avril', 'mai', 'juin', 'juillet',
                       'août', 'septembre', 'octobre', 'novembre', 'décembre'].index(date_arr[1]))
    def print_version(self, url):
        raw = self.browser.open(url).read()
        soup = BeautifulSoup(raw.decode('utf8', 'replace'))
-        link = soup.find('a', {'title':'Imprimer'})
+
-        if link is None:
+        # Filter old articles
        article_date = self.parse_french_date(self.tag_to_string(soup.find('span', 'article-date')))
        if article_date < self.oldest_article_date:
            return None
-        return link['href']
+
        tools = soup.find('div', {'class':'menu-tools'})
        link = tools.find('a', {'href': re.compile(r'\/print\/.*')})
        if link is None:
            print 'Error: print link not found'
            return None
        return 'https://mediapart.fr/' + link['href']
 # -- Handle login
    def get_browser(self):
        br = BasicNewsRecipe.get_browser(self)
        if self.username is not None and self.password is not None:
-            br.open('http://www.mediapart.fr/')
+            br.open('https://www.mediapart.fr/user')
-            br.select_form(nr=0)
+            br.select_form(nr=1)
            br['name'] = self.username
            br['pass'] = self.password
            br.submit()
        return br
-    def preprocess_html(self, soup):
+    # This is a workaround articles with scribd content that include
-        for title in soup.findAll('p', {'class':'titre_page'}):
+    # <body></body> tags _within_ the body
-            title.name = 'h3'
+    preprocess_regexps = [
-        for legend in soup.findAll('span', {'class':'legend'}):
+        (re.compile(r'(<body.*?>)(.*)</body>', re.IGNORECASE|re.DOTALL),
-            legend.insert(0, Tag(soup, 'br', []))
+         lambda match:
-            legend.name = 'small'
+             match.group(1)
-        return soup
+             + re.sub(re.compile(r'</?body>', re.IGNORECASE|re.DOTALL),'',
                      match.group(2))
             + '</body>')
    ]
    # def preprocess_html(self, soup):
    #     for title in soup.findAll('p', {'class':'titre_page'}):
    #         title.name = 'h3'
    #     for legend in soup.findAll('span', {'class':'legend'}):
    #         legend.insert(0, Tag(soup, 'br', []))
    #         legend.name = 'em'
    #     return soup
--- a/recipes/metro_uk.recipe
+++ b/recipes/metro_uk.recipe
@ -7,71 +7,75 @@ import time
 class AdvancedUserRecipe1306097511(BasicNewsRecipe):
    title          = u'Metro UK'
    description = 'News from The Metro, UK'
    #timefmt = ''
    __author__ = 'Dave Asbury'
    #last update 4/4/13
    #cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/276636_117118184990145_2132092232_n.jpg'
    cover_url = 'https://twimg0-a.akamaihd.net/profile_images/1638332595/METRO_LETTERS-01.jpg'
    remove_empty_feeds = True
    remove_javascript     = True
    no_stylesheets        = True
    auto_cleanup = True
    max_articles_per_feed = 12
    ignore_duplicate_articles = {'title', 'url'}
-    encoding = 'UTF-8'
+    #encoding = 'UTF-8'
    language = 'en_GB'
    masthead_url        = 'http://e-edition.metro.co.uk/images/metro_logo.gif'
    compress_news_images = True
    compress_news_images_max_size = 30
    remove_attributes = ['style', 'font']
    preprocess_regexps = [
                (re.compile(r'\| Metro News', re.IGNORECASE | re.DOTALL), lambda match: ''),
                        ]
    def parse_index(self):
-		articles = {}
+        articles = {}
-		key = None
+        key = None
-		ans = []
+        ans = []
-		feeds = [ ('UK', 'http://metro.co.uk/news/uk/'),
+        feeds = [('UK', 'http://metro.co.uk/news/uk/'),
-			('World', 'http://metro.co.uk/news/world/'),
+                ('World', 'http://metro.co.uk/news/world/'),
-			('Weird', 'http://metro.co.uk/news/weird/'),
+                ('Weird', 'http://metro.co.uk/news/weird/'),
-			('Money', 'http://metro.co.uk/news/money/'),
+                ('Money', 'http://metro.co.uk/news/money/'),
-			('Sport', 'http://metro.co.uk/sport/'),
+                ('Sport', 'http://metro.co.uk/sport/'),
-			('Guilty Pleasures', 'http://metro.co.uk/guilty-pleasures/')
+                ('Guilty Pleasures', 'http://metro.co.uk/guilty-pleasures/')
-			]
+                ]
-		for key, feed in feeds:
+        for key, feed in feeds:
-			soup = self.index_to_soup(feed)
+            soup = self.index_to_soup(feed)
-			articles[key] = []
+            articles[key] = []
-			ans.append(key)
+            ans.append(key)
-			today = datetime.date.today()
+            today = datetime.date.today()
-			today = time.mktime(today.timetuple())-60*60*24
+            today = time.mktime(today.timetuple())-60*60*24
-			for a in soup.findAll('a'):
+            for a in soup.findAll('a'):
-				for name, value in a.attrs:
+                for name, value in a.attrs:
-					if name == "class" and value=="post":
+                    if name == "class" and value=="post":
-						url = a['href']
+                        url = a['href']
-						title = a['title']
+                        title = a['title']
-						print title
+                        print title
-						description = ''
+                        description = ''
-						m = re.search('^.*uk/([^/]*)/([^/]*)/([^/]*)/', url)
+                        m = re.search('^.*uk/([^/]*)/([^/]*)/([^/]*)/', url)
-						skip = 1
+                        skip = 1
-						if len(m.groups()) == 3:
+                        if len(m.groups()) == 3:
-							g = m.groups()
+                            g = m.groups()
-							dt = datetime.datetime.strptime(''+g[0]+'-'+g[1]+'-'+g[2], '%Y-%m-%d')
+                            dt = datetime.datetime.strptime(''+g[0]+'-'+g[1]+'-'+g[2], '%Y-%m-%d')
-							pubdate = time.strftime('%a, %d %b', dt.timetuple())
+                            pubdate = time.strftime('%a, %d %b', dt.timetuple())
-							dt = time.mktime(dt.timetuple())
+                            dt = time.mktime(dt.timetuple())
-							if dt >= today:
+                            if dt >= today:
-								print pubdate
+                                print pubdate
-								skip = 0
+                                skip = 0
-						else:
+                        else:
-							pubdate = strftime('%a, %d %b')
+                            pubdate = strftime('%a, %d %b')
-						summary = a.find(True, attrs={'class':'excerpt'})
+                        summary = a.find(True, attrs={'class':'excerpt'})
-						if summary:
+                        if summary:
-							description = self.tag_to_string(summary, use_alt=False)
+                            description = self.tag_to_string(summary, use_alt=False)
-						if skip == 0:
+                        if skip == 0:
-							articles[key].append(
+                            articles[key].append(
-										dict(title=title, url=url, date=pubdate,
+                                                    dict(title=title, url=url, date=pubdate,
-												description=description,
+                                                                    description=description,
-												content=''))
+                                                                    content=''))
-		#ans = self.sort_index_by(ans, {'The Front Page':-1, 'Dining In, Dining Out':1, 'Obituaries':2})
+        #ans = self.sort_index_by(ans, {'The Front Page':-1, 'Dining In, Dining Out':1, 'Obituaries':2})
-		ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
+        ans = [(key, articles[key]) for key in ans if key in articles]
-		return ans
+        return ans
--- a/recipes/ming_pao.recipe
+++ b/recipes/ming_pao.recipe
@ -1,5 +1,5 @@
 __license__   = 'GPL v3'
-__copyright__ = '2010-2011, Eddie Lau'
+__copyright__ = '2010-2013, Eddie Lau'
 # Region - Hong Kong, Vancouver, Toronto
 __Region__ = 'Hong Kong'
@ -32,6 +32,7 @@ __Date__ = ''
 '''
 Change Log:
 2013/09/28: allow thumbnails even with hi-res images
 2012/04/24: improved parsing of news.mingpao.com content
 2011/12/18: update the overridden create_odf(.) routine with the one from Calibre version 0.8.31. Move __UseChineseTitle__ usage away
            from create_odf(.). Optional support of text_summary and thumbnail images in Kindle's article view. Start new day
@ -846,8 +847,7 @@ class MPRecipe(BasicNewsRecipe):
        return soup
    def populate_article_metadata(self, article, soup, first):
-        # thumbnails shouldn't be available if using hi-res images
+        if __IncludeThumbnails__ and first and hasattr(self, 'add_toc_thumbnail'):
        if __IncludeThumbnails__ and __HiResImg__ == False and first and hasattr(self, 'add_toc_thumbnail'):
            img = soup.find('img')
            if img is not None:
                self.add_toc_thumbnail(article, img['src'])
@ -1071,3 +1071,4 @@ class MPRecipe(BasicNewsRecipe):
--- a/recipes/miradasalsur.recipe
+++ b/recipes/miradasalsur.recipe
@ -1,18 +1,15 @@
-#!/usr/bin/env  python
+__copyright__ = '2009-2013, Darko Miletic <darko.miletic at gmail.com>'
 __license__   = 'GPL v3'
 __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
 '''
-elargentino.com
+sur.infonews.com
 '''
 import datetime
 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import Tag
 class MiradasAlSur(BasicNewsRecipe):
    title                 = 'Miradas al Sur'
    __author__            = 'Darko Miletic'
-    description           = 'Revista Argentina'
+    description           = 'Semanario Argentino'
    publisher             = 'ElArgentino.com'
    category              = 'news, politics, Argentina'
    oldest_article        = 7
@ -20,53 +17,51 @@ class MiradasAlSur(BasicNewsRecipe):
    no_stylesheets        = True
    use_embedded_content  = False
    encoding              = 'utf-8'
-    language = 'es_AR'
+    language              = 'es_AR'
    remove_empty_feeds    = True
    masthead_url          = 'http://sur.infonews.com/sites/default/files/www_miradas_al_sur_com_logo.gif'
    extra_css             = """
                             body{font-family: Arial,Helvetica,sans-serif}
                             h1{font-family: Georgia,Times,serif}
                             .field-field-story-author{color: gray; font-size: small}
                            """
    conversion_options = {
                          'comment'   : description
                        , 'tags'      : category
                        , 'publisher' : publisher
                        , 'language'  : language
                        , 'series'    : title
                        }
-    lang                  = 'es-AR'
+    keep_only_tags = [dict(name='div', attrs={'id':['content-header', 'content-area']})]
-    direction             = 'ltr'
+    remove_tags    = [
-    INDEX                 = 'http://www.elargentino.com/medios/123/Miradas-al-Sur.html'
+                      dict(name=['link','meta','iframe','embed','object']),
-    extra_css             = ' .titulo{font-size: x-large; font-weight: bold} .volantaImp{font-size: small; font-weight: bold} '
+                      dict(name='form', attrs={'class':'fivestar-widget'}),
                      dict(attrs={'class':lambda x: x and 'terms-inline' in x.split()})
                     ]
-    html2lrf_options = [
+    feeds = [
-                          '--comment'  , description
+             (u'Politica'           , u'http://sur.infonews.com/taxonomy/term/1/0/feed'),
-                        , '--category' , category
+             (u'Internacional'      , u'http://sur.infonews.com/taxonomy/term/2/0/feed'),
-                        , '--publisher', publisher
+             (u'Informe Especial'   , u'http://sur.infonews.com/taxonomy/term/14/0/feed'),
-                        ]
+             (u'Delitos y pesquisas', u'http://sur.infonews.com/taxonomy/term/6/0/feed'),
-
+             (u'Lesa Humanidad'     , u'http://sur.infonews.com/taxonomy/term/7/0/feed'),
-    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0cm; margin-top: 0em; margin-bottom: 0.5em} "'
+             (u'Cultura'            , u'http://sur.infonews.com/taxonomy/term/8/0/feed'),
-
+             (u'Deportes'           , u'http://sur.infonews.com/taxonomy/term/9/0/feed'),
-    keep_only_tags = [dict(name='div', attrs={'class':'ContainerPop'})]
+             (u'Contratapa'         , u'http://sur.infonews.com/taxonomy/term/10/0/feed'),
-
+            ]
    remove_tags = [dict(name='link')]
    feeds = [(u'Articulos', u'http://www.elargentino.com/Highlights.aspx?ParentType=Section&ParentId=123&Content-Type=text/xml&ChannelDesc=Miradas%20al%20Sur')]
    def print_version(self, url):
        main, sep, article_part = url.partition('/nota-')
        article_id, rsep, rrest = article_part.partition('-')
        return u'http://www.elargentino.com/Impresion.aspx?Id=' + article_id
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
        soup.html['lang'] = self.lang
        soup.html['dir' ] = self.direction
        mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
        mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
        soup.head.insert(0,mlang)
        soup.head.insert(1,mcharset)
        return soup
    def get_cover_url(self):
        # determine the series number, unfortunately not gonna happen now
        #self.conversion_options.update({'series_index':seriesnr})
        cover_url = None
-        soup = self.index_to_soup(self.INDEX)
+        cdate = datetime.date.today()
-        cover_item = soup.find('div',attrs={'class':'colder'})
+        todayweekday = cdate.isoweekday()
        if (todayweekday != 7):
            cdate -= datetime.timedelta(days=todayweekday)
        cover_page_url = cdate.strftime('http://sur.infonews.com/ediciones/%Y-%m-%d/tapa')
        soup = self.index_to_soup(cover_page_url)
        cover_item = soup.find('img', attrs={'class':lambda x: x and 'imagecache-tapa_edicion_full' in x.split()})
        if cover_item:
-           clean_url = self.image_url_processor(None,cover_item.div.img['src'])
+            cover_url = cover_item['src']
           cover_url = 'http://www.elargentino.com' + clean_url + '&height=600'
        return cover_url
    def image_url_processor(self, baseurl, url):
        base, sep, rest = url.rpartition('?Id=')
        img, sep2, rrest = rest.partition('&')
        return base + sep + img
--- a/recipes/national_geographic_mag.recipe
+++ b/recipes/national_geographic_mag.recipe
@ -1,46 +1,49 @@
 from calibre.web.feeds.recipes import BasicNewsRecipe
-class NatGeoMag(BasicNewsRecipe):
+class NGM(BasicNewsRecipe):
    title                  = 'National Geographic Mag'
    __author__             = 'Terminal Veracity'
    description            = 'The National Geographic Magazine'
    publisher              = 'National Geographic'
    oldest_article         = 31
    max_articles_per_feed  = 50
    category               = 'geography, magazine'
    language               = 'en'
    publication_type       = 'magazine'
    cover_url              = 'http://www.yourlogoresources.com/wp-content/uploads/2011/09/national-geographic-logo.jpg'
    use_embedded_content   = False
    no_stylesheets         = True
    remove_javascript      = True
    recursions             = 1
    remove_empty_feeds     = True
    feeds                  = [('National Geographic Magazine', 'http://feeds.nationalgeographic.com/ng/NGM/NGM_Magazine')]
    remove_tags            = [dict(name='div', attrs={'class':['nextpage_continue', 'subscribe']})]
    keep_only_tags         = [dict(attrs={'class':'main_3narrow'})]
    extra_css              = """
                                h1 {font-size: large; font-weight: bold; margin: .5em 0; }
                                h2 {font-size: large; font-weight: bold; margin: .5em 0; }
                                h3 {font-size: medium; font-weight: bold; margin: 0 0; }
                                .article_credits_author {font-size: small; font-style: italic; }
                                .article_credits_photographer {font-size: small; font-style: italic; display: inline }
                             """
-    def parse_feeds(self):
+    title       = 'National Geographic Magazine'
-        feeds = BasicNewsRecipe.parse_feeds(self)
+    __author__  = 'Krittika Goyal'
-        for feed in feeds:
+    description = 'National Geographic Magazine'
-            for article in feed.articles[:]:
+    timefmt = ' [%d %b, %Y]'
-                if 'Flashback' in article.title:
+
-                    feed.articles.remove(article)
+    no_stylesheets = True
-                elif 'Desktop Wallpaper' in article.title:
+    auto_cleanup = True
-                    feed.articles.remove(article)
+    auto_cleanup_keep = '//div[@class="featurepic"]'
-                elif 'Visions of Earth' in article.title:
+
-                    feed.articles.remove(article)
+    def nejm_get_index(self):
-                elif 'Your Shot' in article.title:
+        return self.index_to_soup('http://ngm.nationalgeographic.com/2013/10/table-of-contents')
-                    feed.articles.remove(article)
+
-                elif 'MyShot' in article.title:
+    # To parse artice toc
-                    feed.articles.remove(article)
+    def parse_index(self):
-                elif 'Field Test' in article.title:
+            soup = self.nejm_get_index()
-                    feed.articles.remove(article)
+            tocfull = soup.find('div', attrs={'class':'coltoc'})
-        return feeds
+
            toc = tocfull.find('div', attrs={'class':'more_section'})
            articles = []
            feeds = []
            section_title = 'Features'
            for x in toc.findAll(True):
                if x.name == 'a':
                    # Article found
                    title = self.tag_to_string(x)
                    url = x.get('href', False)
                    if not url or not title:
                        continue
                    url = 'http://ngm.nationalgeographic.com' + url
                    self.log('\t\tFound article:', title)
                    self.log('\t\t\t', url)
                    articles.append({'title': title, 'url':url,
                        'description':'', 'date':''})
            feeds.append((section_title, articles))
            art1 = tocfull.findAll('a')[1]
            art1_title = self.tag_to_string(art1.find('div', attrs={'class': 'toched'}))
            art1_url = art1.get('href', False)
            art1_url = 'http://ngm.nationalgeographic.com' + art1_url
            art1feed = {'title': art1_title, 'url':art1_url,
                        'description':'', 'date':''}
            feeds.append(('Cover Story', [art1feed]))
            return feeds
--- a/recipes/neu_osnabrucker_zeitung.recipe
+++ b/recipes/neu_osnabrucker_zeitung.recipe
@ -1,49 +1,108 @@
 # vim:fileencoding=utf-8
 from calibre.web.feeds.news import BasicNewsRecipe
-class AdvancedUserRecipe1344926684(BasicNewsRecipe):
+class AdvancedUserRecipe1380105782(BasicNewsRecipe):
-    title          = u'Neue Osnabrücker Zeitung'
+    title                  = u'Neue Osnabrücker Zeitung'
-    __author__ = 'Krittika Goyal'
+    __author__             = 'vo_he'
-    oldest_article = 7
+    description            = 'Online auch ohne IPhone'
-    max_articles_per_feed = 100
+    encoding               = 'utf-8'
-    # auto_cleanup = True
+    language = 'de'
    no_stylesheets         = True
    use_embedded_content   = False
    language               = 'de'
    remove_javascript      = True
    no_stylesheets         = True
    oldest_article = 2
    max_articles_per_feed = 100
    cover_url              = 'http://www.noz.de/bundles/nozplatform/images/logos/osnabruecker-zeitung.png'
    remove_tags_before =dict(id='feedContent')
    remove_tags_before =dict(id='headline')
    remove_tags_after =dict(id='article-authorbox')
    remove_tags_after =dict(id='footer-start')
    remove_tags_after =dict(name='div', attrs={'class':'morelinks'})
    keep_only_tags = [
        dict(name='div', attrs={'class':'article'}),
        dict(name='span', attrs={'id':'articletext'})
        ]
    remove_tags = [
-        dict(name='div', attrs={'id':'retresco-title'}),
+        dict(name='div', attrs={'id':'ui-datepicker-div'}),
-        dict(name='div', attrs={'class':'retresco-item s1 relative'}),
+        dict(name='div', attrs={'class':'nav-second'}),
-        dict(name='a', attrs={'class':'medium2 largeSpaceTop icon'}),
+        dict(name='div', attrs={'class':'nav-first'}),
-        dict(name='div', attrs={'class':'articleFunctions inlineTeaserRight'}),
+        dict(name='div', attrs={'class':'icon-print'}),
-        dict(name='div', attrs={'class':'imageContainer '}),
+        dict(name='div', attrs={'class':'social-button'}),
-        dict(name='div', attrs={'class':'imageContainer centerContainer'}),
+        dict(name='div', attrs={'class':'social-media-bar'}),
-        dict(name='div', attrs={'class':'grid singleCol articleTeaser'}),
+        dict(name='div', attrs={'class':'pull-right'}),
-        dict(name='h3', attrs={'class':'teaserRow'}),
+        dict(name='div', attrs={'class':'btn btn-primary flat-button'}),
-        dict(name='div', attrs={'class':'related-comments'}),
+        dict(name='div', attrs={'class':'carousel-wrapper'}),
-        dict(name='a', attrs={'class':' icon'}),
+        dict(name='a', attrs={'class':'right-content merchandising hidden-tablet'}),
-        dict(name='a', attrs={'class':'right small'}),
+        dict(name='div', attrs={'class':'border-circle pull-left'}),
-        dict(name='span', attrs={'class':'small block spaceBottom rectangleAd'}),
+        dict(name='div', attrs={'class':'row show-grid general-infoimageContainer '}),
        dict(name='div', attrs={'class':'location-list'}),
        dict(name='div', attrs={'class':'block'}),
        dict(name='div', attrs={'class':'furtherGalleries largeSpaceTop'})
        ]
-    feeds          = [(u'Lokales', u'http://www.noz.de/rss/Lokales'),
+    feeds          = [(u'Melle Mitte', u'http://www.noz.de/rss/ressort/Melle%20Mitte'),
-(u'Vermischtes', u'http://www.noz.de/rss/Vermischtes'),
+                        (u'Melle Nord', u'http://www.noz.de/rss/ressort/Melle%20Nord'),
-(u'Politik', u'http://www.noz.de/rss/Politik'),
+                        (u'Melle Sued', u'http://www.noz.de/rss/ressort/Melle%20S%C3%BCd'),
-(u'Wirtschaft', u'http://www.noz.de/rss/Wirtschaft'),
+                        (u'Nordrhein Westfalen', u'http://www.noz.de/rss/ressort/Nordrhein-Westfalen'),
-(u'Kultur', u'http://www.noz.de/rss/Kultur'),
+                        (u'Niedersachsen', u'http://www.noz.de/rss/ressort/Niedersachsen'),
-(u'Medien', u'http://www.noz.de/rss/Medien'),
+                        (u'Vermischtes', u'http://www.noz.de/rss/ressort/Vermischtes'),
-(u'Wissenschaft', u'http://www.noz.de/rss/wissenschaft'),
+                        (u'GutzuWissen', u'http://www.noz.de/rss/ressort/Gut%20zu%20Wissen'),
-(u'Sport', u'http://www.noz.de/rss/Sport'),
+                        (u'Sport', u'http://www.noz.de/rss/ressort/Sport'),
-(u'Computer', u'http://www.noz.de/rss/Computer'),
+                        (u'Kultur', u'http://www.noz.de/rss/ressort/Kultur'),
-(u'Musik', u'http://www.noz.de/rss/Musik'),
+                        (u'Medien', u'http://www.noz.de/rss/ressort/Medien'),
-(u'Szene', u'http://www.noz.de/rss/Szene'),
+                        (u'Belm', u'http://www.noz.de/rss/ressort/Belm'),
-(u'Niedersachsen', u'http://www.noz.de/rss/Niedersachsen'),
+                        (u'Bissendorf', u' [url]http://www.noz.de/rss/ressort/Bissendorf[/url]'),
-(u'Kino', u'http://www.noz.de/rss/Kino')]
+                        (u'Osnabrueck', u'http://www.noz.de/rss/ressort/Osnabr%C3%BCck'),
-
+                        (u'Bad Essen', u'http://www.noz.de/rss/ressort/Bad%20Essen'),
                        (u'Politik', u'http://www.noz.de/rss/ressort/Politik'),
                        (u'Wirtschaft', u'http://www.noz.de/rss/ressort/Wirtschaft'),
 #(u'Fussball', u'http:/www.noz.de/rss/ressort/Fußball'),
 #(u'VfL Osnabrueck', u'http://www.noz.de/rss/ressort/VfL%20Osnabr%C3%BCck'),
 #(u'SF Lotte', u'http://www.noz.de/rss/ressort/SF%20Lotte'),
 #(u'SV Meppen', u'http://www.noz.de/rss/ressort/SV%20Meppen'),
 #(u'Artland Dragons', u'http://www.noz.de/rss/ressort/Artland%20Dragons'),
 #(u'Panthers', u'http://www.noz.de/rss/ressort/Panthers'),
 (u'OS-Sport', u'http://www.noz.de/rss/ressort/OS-Sport'),
 #(u'Emsland Sport', u'http://www.noz.de/rss/ressort/EL-Sport'),
 #(u'Lingen', u'http://www.noz.de/rss/ressort/Lingen'),
 #(u'Lohne', u'http://www.noz.de/rss/ressort/Lohne'),
 #(u'Emsbueren', u'http://www.noz.de/rss/ressort/Emsb%C3%BCren'),
 #(u'Salzbergen', u'http://www.noz.de/rss/ressort/Salzbergen'),
 #(u'Spelle', u'http://www.noz.de/rss/ressort/Spelle'),
 #(u'Freren', u'http://www.noz.de/rss/ressort/Freren'),
 #(u'Lengerich', u'http://www.noz.de/rss/ressort/Lengerich'),
 #(u'Bad Iburg', u'http://www.noz.de/rss/ressort/Bad%20Iburg'),
 #(u'Bad Laer', u'http://www.noz.de/rss/ressort/Bad%20Laer'),
 #(u'Bad Rothenfelde', u'http://www.noz.de/rss/ressort/Bad%20Rothenfelde'),
 #(u'GMHütte', u'http://www.noz.de/rss/ressort/Georgsmarienh%C3%BCtte'),
 #(u'Glandorf', u'http://www.noz.de/rss/ressort/Glandorf'),
 #(u'Hagen', u'http://www.noz.de/rss/ressort/Hagen'),
 #(u'Hasbergen', u'http://www.noz.de/rss/ressort/Hasbergen'),
 #(u'Hilter', u'http://www.noz.de/rss/ressort/Hilter'),
 #(u'Lotte', u'http://www.noz.de/rss/ressort/Lotte'),
 #(u'Wallenhorst', u'http://www.noz.de/rss/ressort/Wallenhorst'),
 #(u'Westerkappeln', u'http://www.noz.de/rss/ressort/Westerkappeln'),
 #(u'Artland', u'http://www.noz.de/rss/ressort/Artland'),
 #(u'Bersenbrück', u'http://www.noz.de/rss/ressort/Bersenbr%C3%BCck'),
 #(u'Fürstenau', u'http://www.noz.de/rss/ressort/F%C3%BCrstenau'),
 #(u'Neuenkirchen', u'http://www.noz.de/rss/ressort/Neuenkirchen'),
 #(u'Lokalsport', u'http://www.noz.de/rss/ressort/Lokalsport%20Nordkreis'),
 #(u'Bramsche', u'http://www.noz.de/rss/ressort/Bramsche'),
 #(u'Bramsche Ortsteile', u'http://www.noz.de/rss/ressort/Bramscher%20Ortsteile'),
 #(u'Neuenkirchen Vörden', u'http://www.noz.de/rss/ressort/Neuenkirchen-V%C3%B6rden'),
 #(u'Papenburg', u'http://www.noz.de/rss/ressort/Papenburg'),
 #(u'Dörpen', u'http://www.noz.de/rss/ressort/D%C3%B6rpen'),
 #(u'Rhede', u'http://www.noz.de/rss/ressort/Rhede'),
 #(u'Lathen', u'http://www.noz.de/rss/ressort/Lathen'),
 #(u'Sögel', u'http://www.noz.de/rss/ressort/S%C3%B6gel'),
 #(u'Nordhümmling', u'http://www.noz.de/rss/ressort/Nordh%C3%BCmmling'),
 #(u'Werlte', u'http://www.noz.de/rss/ressort/Werlte'),
 #(u'Westoverledingen', u'http://www.noz.de/rss/ressort/Westoverledingen'),
 #(u'Geeste', u'http://www.noz.de/rss/ressort/Geeste'),
 #(u'Haren', u'http://www.noz.de/rss/ressort/Haren'),
 #(u'Haselünne', u'http://www.noz.de/rss/ressort/Hasel%C3%BCnne'),
 #(u'Herzlake', u'http://www.noz.de/rss/ressort/Herzlake'),
 #(u'Meppen', u'http://www.noz.de/rss/ressort/Meppen'),
 #(u'Twist', u'http://www.noz.de/rss/ressort/Twist'),
 #(u'Bohmte', u'http://www.noz.de/rss/ressort/Bohmte'),
 #(u'Ostercappeln', u'http://www.noz.de/rss/ressort/Ostercappeln')
 ]
--- a/recipes/new_york_review_of_books.recipe
+++ b/recipes/new_york_review_of_books.recipe
@ -1,4 +1,3 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
@ -11,6 +10,9 @@ import re
 from calibre.web.feeds.news import BasicNewsRecipe
 def find_header(tag):
    return tag.name == 'header' and tag.parent['class'] == 'article'
 class NewYorkReviewOfBooks(BasicNewsRecipe):
    title = u'New York Review of Books'
@ -23,65 +25,70 @@ class NewYorkReviewOfBooks(BasicNewsRecipe):
    no_javascript = True
    needs_subscription = True
-    keep_only_tags = [dict(id=['article-body','page-title'])]
+    keep_only_tags = [
-    remove_tags = [dict(attrs={'class':['article-tools', 'article-links',
+        dict(name='section', attrs={'class':'article_body'}),
-        'center advertisement']})]
+        dict(name=find_header),
        dict(name='div', attrs={'class':'for-subscribers-only'}),
    ]
    preprocess_regexps = [(re.compile(r'<head>.*?</head>', re.DOTALL), lambda
        m:'<head></head>')]
    def print_version(self, url):
        return url+'?pagination=false'
    def get_browser(self):
        br = BasicNewsRecipe.get_browser(self)
        br.open('http://www.nybooks.com/account/signin/')
-        br.select_form(nr = 1)
+        br.select_form(nr=2)
        br['username'] = self.username
        br['password'] = self.password
        br.submit()
        return br
-    def print_version(self, url):
+    def preprocess_html(self, soup):
-        return url+'?pagination=false'
+        header = soup.find('header')
        body = soup.find('body')
        body.insert(0, header)
        header.find('div', attrs={'class':'details'}).extract()
        for i in soup.findAll('input'):
            i.extract()
        return soup
    def parse_index(self):
        soup = self.index_to_soup('http://www.nybooks.com/current-issue')
        # Find cover
-        sidebar = soup.find(id='sidebar')
+        sidebar = soup.find('div', attrs={'class':'issue_cover'})
        if sidebar is not None:
-            a = sidebar.find('a', href=lambda x: x and 'view-photo' in x)
+            img = sidebar.find('img', src=True)
-            if a is not None:
+            self.cover_url = 'http://www.nybooks.com' + img['src']
-                psoup = self.index_to_soup('http://www.nybooks.com'+a['href'])
+            self.log('Found cover at:', self.cover_url)
                cover = psoup.find('img', src=True)
                self.cover_url = cover['src']
                self.log('Found cover at:', self.cover_url)
        # Find date
-        div = soup.find(id='page-title')
+        div = soup.find('time', pubdate='pubdate')
        if div is not None:
-            h5 = div.find('h5')
+            text = self.tag_to_string(div)
-            if h5 is not None:
+            date = text.partition(u'\u2022')[0].strip()
-                text = self.tag_to_string(h5)
+            self.timefmt = u' [%s]'%date
-                date = text.partition(u'\u2022')[0].strip()
+            self.log('Issue date:', date)
                self.timefmt = u' [%s]'%date
                self.log('Issue date:', date)
        # Find TOC
-        tocs = soup.findAll('ul', attrs={'class':'issue-article-list'})
+        toc = soup.find('div', attrs={'class':'current_issue'}).find('div', attrs={'class':'articles_list'})
        articles = []
-        for toc in tocs:
+        for div in toc.findAll('div', attrs={'class':'row'}):
-            for li in toc.findAll('li'):
+            h2 = div.find('h2')
-                h3 = li.find('h3')
+            title = self.tag_to_string(h2).strip()
-                title = self.tag_to_string(h3)
+            author = self.tag_to_string(div.find('div', attrs={'class':'author'})).strip()
-                author = self.tag_to_string(li.find('h4'))
+            title = title + u' (%s)'%author
-                title = title + u' (%s)'%author
+            url = 'http://www.nybooks.com' + h2.find('a', href=True)['href']
-                url = 'http://www.nybooks.com'+h3.find('a', href=True)['href']
+            desc = ''
-                desc = ''
+            for p in div.findAll('p', attrs={'class':lambda x: x and 'quiet' in x}):
-                for p in li.findAll('p'):
+                desc += self.tag_to_string(p)
-                    desc += self.tag_to_string(p)
+            self.log('Found article:', title)
-                self.log('Found article:', title)
+            self.log('\t', url)
-                self.log('\t', url)
+            self.log('\t', desc)
-                self.log('\t', desc)
+            articles.append({'title':title, 'url':url, 'date':'',
                articles.append({'title':title, 'url':url, 'date':'',
                'description':desc})
        return [('Current Issue', articles)]
--- a/recipes/new_york_review_of_books_no_sub.recipe
+++ b/recipes/new_york_review_of_books_no_sub.recipe
@ -10,6 +10,9 @@ import re
 from calibre.web.feeds.news import BasicNewsRecipe
 def find_header(tag):
    return tag.name == 'header' and tag.parent['class'] == 'article'
 class NewYorkReviewOfBooks(BasicNewsRecipe):
    title = u'New York Review of Books (no subscription)'
@ -21,9 +24,11 @@ class NewYorkReviewOfBooks(BasicNewsRecipe):
    no_stylesheets = True
    no_javascript = True
-    keep_only_tags = [dict(id=['article-body', 'page-title'])]
+    keep_only_tags = [
-    remove_tags = [dict(attrs={'class':['article-tools', 'article-links',
+        dict(name='section', attrs={'class':'article_body'}),
-        'center advertisement']})]
+        dict(name=find_header),
        dict(name='div', attrs={'class':'for-subscribers-only'}),
    ]
    preprocess_regexps = [(re.compile(r'<head>.*?</head>', re.DOTALL), lambda
        m:'<head></head>')]
@ -31,40 +36,44 @@ class NewYorkReviewOfBooks(BasicNewsRecipe):
    def print_version(self, url):
        return url+'?pagination=false'
    def preprocess_html(self, soup):
        header = soup.find('header')
        body = soup.find('body')
        body.insert(0, header)
        header.find('div', attrs={'class':'details'}).extract()
        for i in soup.findAll('input'):
            i.extract()
        return soup
    def parse_index(self):
        soup = self.index_to_soup('http://www.nybooks.com/current-issue')
        # Find cover
-        sidebar = soup.find(id='sidebar')
+        sidebar = soup.find('div', attrs={'class':'issue_cover'})
        if sidebar is not None:
-            a = sidebar.find('a', href=lambda x: x and 'view-photo' in x)
+            img = sidebar.find('img', src=True)
-            if a is not None:
+            self.cover_url = 'http://www.nybooks.com' + img['src']
-                psoup = self.index_to_soup('http://www.nybooks.com'+a['href'])
+            self.log('Found cover at:', self.cover_url)
                cover = psoup.find('img', src=True)
                self.cover_url = cover['src']
                self.log('Found cover at:', self.cover_url)
        # Find date
-        div = soup.find(id='page-title')
+        div = soup.find('time', pubdate='pubdate')
        if div is not None:
-            h5 = div.find('h5')
+            text = self.tag_to_string(div)
-            if h5 is not None:
+            date = text.partition(u'\u2022')[0].strip()
-                text = self.tag_to_string(h5)
+            self.timefmt = u' [%s]'%date
-                date = text.partition(u'\u2022')[0].strip()
+            self.log('Issue date:', date)
                self.timefmt = u' [%s]'%date
                self.log('Issue date:', date)
        # Find TOC
-        toc = soup.find('ul', attrs={'class':'issue-article-list'})
+        toc = soup.find('div', attrs={'class':'current_issue'}).find('div', attrs={'class':'articles_list'})
        articles = []
-        for li in toc.findAll('li'):
+        for div in toc.findAll('div', attrs={'class':'row'}):
-            h3 = li.find('h3')
+            h2 = div.find('h2')
-            title = self.tag_to_string(h3)
+            title = self.tag_to_string(h2).strip()
-            author = self.tag_to_string(li.find('h4'))
+            author = self.tag_to_string(div.find('div', attrs={'class':'author'})).strip()
            title = title + u' (%s)'%author
-            url = 'http://www.nybooks.com'+h3.find('a', href=True)['href']
+            url = 'http://www.nybooks.com' + h2.find('a', href=True)['href']
            desc = ''
-            for p in li.findAll('p'):
+            for p in div.findAll('p', attrs={'class':lambda x: x and 'quiet' in x}):
                desc += self.tag_to_string(p)
            self.log('Found article:', title)
            self.log('\t', url)
--- a/recipes/news24.recipe
+++ b/recipes/news24.recipe
@ -0,0 +1,53 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class AdvancedUserRecipe1375900744(BasicNewsRecipe):
    title                 = u'News24'
    description           = "News24."
    __author__            = 'Nicki de Wet'
    publisher             = 'Media24'
    category              = 'news, politics, South Africa'
    oldest_article        = 3
    max_articles_per_feed = 20
    no_stylesheets        = True
    encoding              = 'utf8'
    use_embedded_content  = False
    auto_cleanup          = False
    language              = 'en_ZA'
    remove_empty_feeds    = True
    publication_type      = 'newsportal'
    masthead_url          = 'http://www.24.com/images/widgethead_news.png'
    extra_css             = """
                                body{font-family: Arial,Helvetica,sans-serif }
                                img{display: block}
                            """
    conversion_options = {
                          'comment'   : description
                        , 'tags'      : category
                        , 'publisher' : publisher
                        , 'language'  : language
                        }
    remove_tags = [
        dict(name=['object','embed','iframe','table','meta','link']),
        dict(attrs={
                'class':['TwitterfacebookLink','superSportArticleBlock',
                         'videoHighlights', 'facebookComments','share',
                         'item_block','kalahari_product left', 'block red',
                         'credit']}),
        dict(attrs={'id':['comments_wrap', 'article_toolbox_bot',
                          'inside_news','sponsored-links', 'lnkGalleries',
                          'relatedlinks_box', 'lnkUserGalleries',
                          'lnkNewsGalleries', 'relatedlinks',
                          'divRelatedLinks']})]
    keep_only_tags = [
        dict(attrs={'class':['left col633', 'article col626',
                             'columnWrapperLeft', 'articlecolumn',
                             'article_img', 'picture_caption', 'DiveTable']})]
    feeds          = [
        (u'Top Stories', u'http://feeds.news24.com/articles/news24/TopStories/rss'),
        (u'South Africa', u'http://feeds.news24.com/articles/news24/SouthAfrica/rss'),
        (u'World', u'http://feeds.news24.com/articles/news24/World/rss'),
        (u'Sport', u'http://feeds.24.com/articles/sport/featured/topstories/rss')]
--- a/recipes/newsweek_polska.recipe
+++ b/recipes/newsweek_polska.recipe
@ -2,173 +2,263 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
-__copyright__ = '2010, matek09, matek09@gmail.com; 2012, admroz, a.rozewicki@gmail.com'
+__copyright__ = '2010, matek09, matek09@gmail.com; 2012-2013, admroz, a.rozewicki@gmail.com'
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.ptempfile import PersistentTemporaryFile
 from string import capwords
 import datetime
 from calibre.ebooks.BeautifulSoup import BeautifulSoup
 class Newsweek(BasicNewsRecipe):
-	# how many issues to go back, 0 means get the most current one
+    # how many issues to go back, 0 means get the most current one
-	BACK_ISSUES = 2
+    BACK_ISSUES = 1
-	EDITION = '0'
+    EDITION = '0'
-	DATE = None
+    DATE = None
-	YEAR = datetime.datetime.now().year
+    YEAR = datetime.datetime.now().year
-	title = u'Newsweek Polska'
+    title = u'Newsweek Polska'
-	__author__ = 'matek09, admroz'
+    __author__ = 'matek09, admroz'
-	description = 'Weekly magazine'
+    description = 'Weekly magazine'
-	encoding = 'utf-8'
+    encoding = 'utf-8'
-	language = 'pl'
+    language = 'pl'
-	remove_javascript = True
+    remove_javascript = True
-	temp_files = []
+    temp_files = []
-	articles_are_obfuscated = True
+    articles_are_obfuscated = True
-	#
+    #
-	# Parses each article
+    # Parses article contents from one page
-	#
+    #
-	def get_obfuscated_article(self, url):
+    def get_article_divs(self, css, main_section):
-		br = self.get_browser()
+        strs = []
 		br.open(url)
 		source = br.response().read()
 		page = self.index_to_soup(source)
-		main_section = page.find(id='mainSection')
+        # get all divs with given css class
        article_divs = main_section.findAll('div', attrs={'class' : css})
        for article_div in article_divs:
-		title = main_section.find('h1')
+            # remove sections like 'read more...' etc.
-		info = main_section.find('ul', attrs={'class' : 'articleInfo'})
+            for p in article_div.findAll('p'):
 		authors = info.find('li').find('h4')
 		article = main_section.find('div', attrs={'id' : 'article'})
-		# remove related articles box
+                if p.find('span', attrs={'style' : 'color: #800000; font-size: medium;'}):
-		related = article.find('div', attrs={'class' : 'relatedBox'})
+                    p.extract()
-		if related is not None:
+                    continue
 			related.extract()
-		# remove div with social networking links and links to
+                if p.find('span', attrs={'style' : 'font-size: medium; color: #800000;'}):
-		# other articles in web version
+                    p.extract()
-		for div in article.findAll('div'):
+                    continue
 			if div.find('span', attrs={'class' : 'google-plus'}):
 				div.extract()
-			for p in div.findAll('p'):
+                if p.find('span', attrs={'style' : 'font-size: medium;'}):
-				if p.find('span', attrs={'style' : 'color: rgb(255, 0, 0);'}):
+                    p.extract()
-					p.extract()
+                    continue
-					continue
+
-				for a in p.findAll('a'):
+                if p.find('span', attrs={'style' : 'color: #800000;'}):
-					if a.find('span', attrs={'style' : 'font-size: larger;'}):
+                    p.extract()
-						a.extract()
+                    continue
                obj = p.find('object')
                if obj:
                    obj.extract()
                    continue
                strong = p.find('strong')
                if strong:
                    newest = re.compile("Tekst pochodzi z najnowszego numeru Tygodnika Newsweek")
                    if newest.search(str(strong)):
                        strong.extract()
                        continue
                itunes = p.find('a')
                if itunes:
                    reurl = re.compile("itunes.apple.com")
                    if reurl.search(str(itunes['href'])):
                        p.extract()
                        continue
                imagedesc = p.find('div', attrs={'class' : 'image-desc'})
                if imagedesc:
                    redesc = re.compile("Okładka numeru")
                    if (redesc.search(str(imagedesc))):
                        p.extract()
                        continue
 		html = unicode(title) + unicode(authors) + unicode(article)
 		next = main_section.find('li', attrs={'class' : 'next'})
-		while next:
+            # get actual contents
-			url = next.find('a')['href']
+            for content in article_div.contents:
-			br.open(url)
+                strs.append("".join(str(content)))
-			source = br.response().read()
+
-			page = self.index_to_soup(source)
+        # return contents as a string
-			main_section = page.find(id='mainSection')
+        return unicode("".join(strs))
 			article = main_section.find('div', attrs={'id' : 'article'})
 			aside = article.find(id='articleAside')
 			if aside is not None:
 				aside.extract()
 			html = html + unicode(article)
 			next = main_section.find('li', attrs={'class' : 'next'})
-		self.temp_files.append(PersistentTemporaryFile('_temparse.html'))
+    #
-		self.temp_files[-1].write(html)
+    # Articles can be divided into several pages, this method parses them recursevely
-		self.temp_files[-1].close()
+    #
-		return self.temp_files[-1].name
+    def get_article_page(self, br, url, page):
        br.open(url)
        source = br.response().read()
        html = ''
        matches = re.search(r'<article>(.*)</article>', source, re.DOTALL)
        if matches is None:
            print "no article tag found, returning..."
            return
        main_section = BeautifulSoup(matches.group(0))
        if page == 0:
            title = main_section.find('h1')
            html = html + unicode(title)
            authors = ''
            authorBox = main_section.find('div', attrs={'class' : 'AuthorBox'})
            if authorBox is not None:
                authorH4 = authorBox.find('h4')
                if authorH4 is not None:
                    authors = self.tag_to_string(authorH4)
            html = html + unicode(authors)
            info = main_section.find('p', attrs={'class' : 'lead'})
            html = html + unicode(info)
        html = html + self.get_article_divs('3917dc34e07c9c7180df2ea9ef103361845c8af42b71f51b960059226090a1ac articleStart', main_section)
        html = html + self.get_article_divs('3917dc34e07c9c7180df2ea9ef103361845c8af42b71f51b960059226090a1ac', main_section)
        nextPage = main_section.find('a', attrs={'class' : 'next'})
        if nextPage:
            html = html + self.get_article_page(br, nextPage['href'], page+1)
        return html
    #
    # Parses each article
    #
    def get_obfuscated_article(self, url):
        br = self.get_browser()
        html = self.get_article_page(br, url, 0)
        self.temp_files.append(PersistentTemporaryFile('_temparse.html'))
        self.temp_files[-1].write(html)
        self.temp_files[-1].close()
        return self.temp_files[-1].name
-	#
+    #
-	# Goes back given number of issues. It also knows how to go back
+    # Goes back given number of issues. It also knows how to go back
-	# to the previous year if there are not enough issues in the current one
+    # to the previous year if there are not enough issues in the current one
-	#
+    #
-	def find_last_issue(self, archive_url):
+    def find_last_issue(self, archive_url):
-		archive_soup = self.index_to_soup(archive_url)
+        archive_soup = self.index_to_soup(archive_url, True)
 		select = archive_soup.find('select', attrs={'id' : 'paper_issue_select'})
 		options = select.findAll(lambda tag: tag.name == 'option' and tag.has_key('value'))
-		# check if need to go back to previous year
+        # workaround because html is so messed up that find() method on soup returns None
-		if len(options) > self.BACK_ISSUES:
+        # and therefore we need to extract subhtml that we need
-			option = options[self.BACK_ISSUES];
+        matches = re.search(r'<ul class="rightIssueList">(.*?)</ul>', archive_soup, re.DOTALL)
-			self.EDITION = option['value'].replace('http://www.newsweek.pl/wydania/','')
+        if matches is None:
-			self.index_to_soup('http://www.newsweek.pl/wydania/' + self.EDITION)
+            return
-		else:
+
-			self.BACK_ISSUES = self.BACK_ISSUES - len(options)
+        subSoup = BeautifulSoup(matches.group(0))
-			self.YEAR = self.YEAR - 1
+        issueLinks = subSoup.findAll('a')
-			self.find_last_issue(archive_url + ',' + str(self.YEAR))
+
        # check if need to go back to previous year
        if len(issueLinks) > self.BACK_ISSUES:
            link = issueLinks[self.BACK_ISSUES];
            self.EDITION = link['href'].replace('http://www.newsweek.pl/wydania/','')
            self.index_to_soup('http://www.newsweek.pl/wydania/' + self.EDITION)
        else:
            self.BACK_ISSUES = self.BACK_ISSUES - len(issueLinks)
            self.YEAR = self.YEAR - 1
            self.find_last_issue(archive_url + '/' + str(self.YEAR))
-	#
+    #
-	# Looks for the last issue which we want to download. Then goes on each
+    # Looks for the last issue which we want to download. Then goes on each
-	# section and article and stores them (assigning to sections)
+    # section and article and stores them (assigning to sections)
-	#
+    #
-	def parse_index(self):
+    def parse_index(self):
-		archive_url = 'http://www.newsweek.pl/wydania/archiwum'
+        archive_url = 'http://www.newsweek.pl/wydania/archiwum'
-		self.find_last_issue(archive_url)
+        self.find_last_issue(archive_url)
-		soup = self.index_to_soup('http://www.newsweek.pl/wydania/' + self.EDITION)
+        soup = self.index_to_soup('http://www.newsweek.pl/wydania/' + self.EDITION)
 		self.DATE = self.tag_to_string(soup.find('span', attrs={'class' : 'data'}))
 		main_section = soup.find(id='mainSection')
 		img = main_section.find(lambda tag: tag.name == 'img' and tag.has_key('alt') and tag.has_key('title'))
 		self.cover_url = img['src']
 		feeds = []
 		articles = {}
 		sections = []
-		news_list = main_section.find('ul', attrs={'class' : 'newsList'})
+        matches = re.search(r'<div class="Issue-Entry">(.*)ARTICLE_BOTTOM', soup.prettify(), re.DOTALL)
-		section = 'Inne'
+        if matches is None:
            return
-		for li in news_list.findAll('li'):
+        main_section = BeautifulSoup(matches.group(0))
 			h3 = li.find('h3')
 			if h3 is not None:
 				section = capwords(self.tag_to_string(h3))
 				continue
 			else:
 				h2 = li.find('h2')
 				if h2 is not None:
 					article = self.create_article(h2)
 					if article is None :
 						continue
-					if articles.has_key(section):
+        # date
-						articles[section].append(article)
+        matches = re.search(r'(\d{2}-\d{2}-\d{4})', self.tag_to_string(main_section.find('h2')))
-					else:
+        if matches:
-						articles[section] = [article]
+            self.DATE = matches.group(0)
-						sections.append(section)
+
        # cover
        img = main_section.find(lambda tag: tag.name == 'img' and tag.has_key('alt') and tag.has_key('title'))
        self.cover_url = img['src']
        feeds = []
        articles = {}
        sections = []
        # sections
        for sectionUl in main_section.findAll('ul', attrs={'class' : 'whatsin'}):
            # section header
            header = sectionUl.find('li', attrs={'class' : 'header'})
            if header is None:
                continue
            section = capwords(self.tag_to_string(header))
            # articles in section
            articleUl = sectionUl.find('ul')
            if articleUl is None:
                continue
            for articleLi in articleUl.findAll('li'):
                # check if article is closed which should be skipped
                closed = articleLi.find('span', attrs={'class' : 'closeart'})
                if closed is not None:
                    continue
                article = self.create_article(articleLi)
                if article is None :
                    continue
                if articles.has_key(section):
                    articles[section].append(article)
                else:
                    articles[section] = [article]
                    sections.append(section)
        for section in sections:
 #             print("%s -> %d" % (section, len(articles[section])))
 #
 #             for article in articles[section]:
 #                 print(" - %s" % article)
            feeds.append((section, articles[section]))
        return feeds
-		for section in sections:
+    #
-			feeds.append((section, articles[section]))
+    # Creates each article metadata (skips locked ones). The content will
-		return feeds
+    # be extracted later by other method (get_obfuscated_article).
    #
    def create_article(self, articleLi):
        article = {}
        a = articleLi.find('a')
        if a is None:
            return None
-	#
+        article['title'] = self.tag_to_string(a)
-	# Creates each article metadata (skips locked ones). The content will
+        article['url'] = a['href']
-	# be extracted later by other method (get_obfuscated_article).
+        article['date'] = self.DATE
-	#
+        article['description'] = ''
 	def create_article(self, h2):
 		article = {}
 		a = h2.find('a')
 		if a is None:
 			return None
-		article['title'] = self.tag_to_string(a)
+        return article
 		article['url'] = a['href']
 		article['date'] = self.DATE
 		desc = h2.findNext('p')
 		if desc is not None:
 			article['description'] = self.tag_to_string(desc)
 		else:
 			article['description'] = ''
 		return article
--- a/recipes/no_names_no_jackets.recipe
+++ b/recipes/no_names_no_jackets.recipe
@ -0,0 +1,57 @@
 #
 # Written:      July 2013
 # Last Edited:  2013-07-25
 # Version:      1.0
 # Last update:  2013-07-25
 #
 __license__   = 'GPL v3'
 __copyright__ = '2013, Armin Geller'
 '''
 Fetch blindenbuch.de
 '''
 from calibre.web.feeds.recipes import BasicNewsRecipe
 class AdvancedUserRecipe1303841067(BasicNewsRecipe):
    title                 = u'No Names, No Jackets'
    __author__            = 'Armin Geller'  # AGe 2013-07-25
    description           = u'One chapter. Just the writing. Discover something new.'
    publisher             = 'nonamesnojackets.com/'
    publication_type      = 'ebook news'
    tags                  = 'Books, Literature, E-Books, US'
    timefmt               = ' [%a, %d %b %Y]'
    publication_type      = 'Feed'
    language              = 'en'
    encoding              = 'utf-8'
    oldest_article        = 14
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    remove_javascript     = True
    conversion_options    = {'title'       : title,
                              'comments'    : description,
                              'tags'        : tags,
                              'language'    : language,
                              'publisher'   : publisher,
                              'authors'     : publisher,
                             }
 #    cover_url = ''
 #    masthead_url = ''
    extra_css = '''
                    h1,h2 {font-weight:bold;font-size:large;}
                    .entry-meta {font-size: 1em;text-align: left; font-style: italic}
                '''
    keep_only_tags    = [
                          dict(name='article')
                        ]
    feeds          = [(u'No Names, No Jackets', u'http://www.nonamesnojackets.com/feed/')]
--- a/recipes/nuus24.recipe
+++ b/recipes/nuus24.recipe
@ -0,0 +1,57 @@
 import re
 from calibre import strftime
 from calibre.web.feeds.recipes import BasicNewsRecipe
 class Nuus24(BasicNewsRecipe):
    title       = 'Nuus24'
    __author__  = 'Nicki de Wet'
    encoding              = 'utf-8'
    description = 'Daaglikse Afrikaanse Nuus via Nuus24'
    language = 'af'
    publisher             = 'Media24'
    timefmt = ' [%a, %d %b, %Y]'
    masthead_url = 'http://afrikaans.news24.com/images/nuus.jpg'
    max_articles_per_feed = 25
    remove_tags_before = dict(id='TheFeed')
    remove_tags_after  = dict(id='TheFeed')
    remove_tags = [dict(
        attrs={
            'class':[
                'personal-bar row-fluid', 'navbar main-menu-fixed',
                'breaking-news-wrapper', 'row-fluid comments-bg',
                'unstyled actions', 'modal-body', 'modal-header', 'desktop']}),
        dict(id=['weather-forecast', 'topics', 'side-widgets', 'footer-container', 'sb-container', 'myModal']),
        dict(name=['script', 'noscript', 'style'])]
    keep_only_tags = [dict(attrs={'class':['span8 border-right']}),
                                                dict(name=['article', 'section']),
                                                dict(id=['img-wrapper'])]
    extra_css = """ div.carousel-inner{ overflow:hidden;display: block;height:300px;} img{display: block} """
    no_stylesheets = True
    def parse_index(self):
        soup = self.index_to_soup('http://afrikaans.news24.com/Index.aspx')
        def feed_title(div):
            return ''.join(div.findAll(text=True, recursive=False)).strip()
        articles = {}
        key = None
        key = 'Nuus in Afrikaans'
        articles[key] = []
        ans= []
        for anchor in soup.findAll(True,
            attrs={'id':['lnkLink']}):
            url = re.sub(r'\?.*', '', anchor['href'])
            title = self.tag_to_string(anchor, use_alt=True).strip()
            print title
            description = ''
            pubdate = strftime('%a, %d %b')
            articles[key].append(
                    dict(title=title, url=url, date=pubdate,
                                    description=description,
                                    content=''))
        ans = [(key, articles[key])]
        return ans
--- a/recipes/padreydecano.recipe
+++ b/recipes/padreydecano.recipe
@ -0,0 +1,50 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __author__ = '2013, Carlos Alves <carlosalves90@gmail.com>'
 '''
 padreydecano.com
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class General(BasicNewsRecipe):
    title                 = 'Padre y Decano'
    __author__            = 'Carlos Alves'
    description           = 'El sitio del pueblo'
    tags = 'soccer, futbol, Peñarol'
    language       = 'es_UY'
    timefmt        = '[%a, %d %b, %Y]'
    use_embedded_content  = False
    recursion             = 5
    encoding = None
    remove_javascript = True
    no_stylesheets = True
    oldest_article        = 2
    max_articles_per_feed = 100
    keep_only_tags = [
             dict(name='h1', attrs={'class':'entry-title'}),
             dict(name='div', attrs={'class':'entry-content clearfix'})
             ]
    remove_tags = [
             dict(name='div', attrs={'class':['br', 'hr', 'titlebar', 'navigation']}),
             dict(name='dl', attrs={'class':'gallery-item'}),
             dict(name=['object','link'])
                  ]
    extra_css = '''
                h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;}
                h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;}
                h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;}
                p {font-family:Arial,Helvetica,sans-serif;}
                '''
    feeds = [
           (u'Padre y Decano | Club Atlético Peñarol', u'http://www.padreydecano.com/cms/feed/')
        ]
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
        return soup
--- a/recipes/pc_quest_india.recipe
+++ b/recipes/pc_quest_india.recipe
@ -1,16 +0,0 @@
 from calibre.web.feeds.news import CalibrePeriodical
 class PCQ(CalibrePeriodical):
    title = 'PCQuest'
    calibre_periodicals_slug = 'pc-quest-india'
    description = '''
    Buying a tech product? Seeking a tech solution? Consult PCQuest, India's
    market-leading selection and implementation guide for the latest
    technologies: servers, business apps, security, open source, gadgets and
    more. To subscribe visit, <a
    href="http://news.calibre-ebook.com/periodical/pc-quest-india">calibre
    Periodicals</a>.
    '''
    language = 'en_IN'
--- a/Show More
+++ b/Show More