diff --git a/.gitignore b/.gitignore index 090d11fd24..192b503429 100644 --- a/.gitignore +++ b/.gitignore @@ -14,7 +14,6 @@ build dist docs resources/localization -resources/images.qrc resources/scripts.pickle resources/ebook-convert-complete.pickle resources/builtin_recipes.xml @@ -42,3 +41,4 @@ calibre_plugins/ recipes/*.mobi recipes/*.epub recipes/debug +/.metadata/ diff --git a/Changelog.yaml b/Changelog.yaml index 8462264e38..da03033cb9 100644 --- a/Changelog.yaml +++ b/Changelog.yaml @@ -20,6 +20,950 @@ # new recipes: # - title: +- version: 1.8.0 + date: 2013-10-25 + + new features: + - title: "DOCX Input: Support linked (as opposed to embedded) images, if the linked image is found on the local computer." + tickets: [1243597] + + - title: 'FB2 Input: Add support for note and cite back references. Link pairs of type="note" and type="cite" now automatically generate the correct back reference.' + tickets: [1243714] + + - title: "When automerging books during during an add, include the author as well as the title in the report of merged books." + + - title: "OS X Mavericks (10.9) breaks connecting to iTunes and iBooks on iOS devices. For more details see: http://www.mobileread.com/forums/showthread.php?t=215624" + + bug fixes: + - title: "OS X: Fix system tray notifications causing crashes on some OS X 10.9 (Mavericks) systems (those that had Growl installed at some point)." + tickets: [1224491] + + - title: "OSX: Fix font size in completion popups too small on Mavericks (I hope)" + tickets: [1243761] + + - title: "PDF Output: Fix rendering of some semi-transparent images. All semi-transparent images are now rendered using soft masks." + tickets: [1243829] + + - title: "MOBI Output: Fix text marked with white-space:pre-wrap causing the Kindle to break lines at arbitrary points inside words." + tickets: [1240235] + + - title: "FB2 Input: Fix a regression that broke conversion of FB2 files with paragraphs having both a style and an id attribute." + tickets: [1243709] + + - title: "TXT Input: Ensure that in the generated HTML has a meaningful value." + tickets: [1236923] + + - title: "Book details panel: Fix HTML in author names and identifiers not being escaped" + tickets: [1243976] + + - title: "HTML 5 parsing: Fix handling of xml:lang attributes on all elements xml:lang is now mapped to a plain lang on all elements, not just <html>" + + - title: "Update HTML 5 parser used in calibre (html5lib-python) to fix a few corner cases" + + - title: "When bulk deleting formats, use a single temporary directory for the deleted files. This makes restoring them from the recycle bin a little cleaner. Also might fix the reported issue with the windows recycle bin choking on creating a large number of folders." + + - title: "DOCX Input: Add support for hyperlink fields that have only anchors and not URLs" + + - title: "DOCX Input: Fix handling of multiple block level bookmarks at the same location." + tickets: [1241451] + + - title: "HTMLZ Output: Fix Htmlz does not apply inline css from <body>." + tickets: [1242261] + + - title: "Fix the restore database operation failing on windows installs with long usernames (this would cause the path to the temporary folder used to restore the database to become too long)." + + - title: "ODT Input: Various workarounds for broken ODT files generated my mk4ht" + + - title: "Fix a bug with non-ascii text in the create catalog dialog" + ticket: [1241515] + + improved recipes: + - A List Apart + +- version: 1.7.0 + date: 2013-10-18 + + new features: + - title: "Cover grid: Allow using images as the background for the cover grid. To choose an image, go to Preferences->Look & Feel->Cover Grid." + tickets: [1239194] + + - title: "An option to mark newly added books with a temporary mark. Option is in Preferences->Adding books." + tickets: [1238609] + + - title: "Edit metadata dialog: Allow turning off the cover size displayed in the bottom right corner of the cover by right clicking the cover and choosing 'Hide cover size'. It can be restored the same way." + + bug fixes: + - title: "Conversion: If both embed font family and the filter css option to remove fonts are set, do not remove the font specified by the embed font family option." + + - title: "Fix a few remaining situations that could cause formats column to show an error message about SHLock" + + - title: "Make deleting books to recycle bin more robust. Ensure that the temporary directory created during the move to recycle bin process is not left behind in case of errors." + + - title: "Windows: Check if the books' files are in use before deleting" + + - title: "Fix custom device driver swap main and card option not working. Also fix swapping not happening for a few devices on linux" + tickets: [1240504] + + - title: "Edit metadata dialog: The Edit metadata dialog currently limits its max size based on the geometry of the smallest attached screen. Change that to use the geometry of the screen on which it will be shown." + tickets: [1239597] + + - title: "HTMLZ Output: Fix <style> tag placed inside <body> instead of <head>." + tickets: [1239530] + + - title: "HTMLZ Output: Fix inline styles not escaping quotes properly." + tickets: [1239527] + + - title: "HTMLZ Output: Fix incorrect handling of some self closing tags like <br>." + tickets: [1239555] + + - title: "Content server: Fix single item categories not working with reverse proxy setup." + tickets: [1238987] + + - title: "Fix a bug that could cause calibre to crash when switching from a large library to a smaller library with marked books." + tickets: [1239210] + + - title: "Get Books: Fix downloading of some books in formats that do not have metadata yielding nonsense titles" + + - title: "Allow marked book button to be added to main toolbar when device is connected" + tickets: [1239163] + + - title: "Fix error if a marked book is deleted/merged." + tickets: [1239161] + + - title: "Template language: Fix formatter function days_between to compute the right value when the answer is negative." + + - title: "Windows: Fix spurious file in use by other process error if the book's folder contained multiple hard links pointing to the same file" + tickets: [1240788, 1240194] + + - title: "Windows: Fix duplicate files being created in very special circumstances when changing title and/or author. (the title or author had to be between 31 and 35 characters long and the book entry had to have been created by a pre 1.x version of calibre). You can check if your library has any such duplicates and remove them, by using the Check Library tool (Right click the calibre button on the toolbar and select Library Maintenance->Check Library)." + + improved recipes: + - Wall Street Journal + - Newsweek Polska + - Wired Magazine + - cracked.com + - Television Without Pity + - Carta + - Diagonales + + +- version: 1.6.0 + date: 2013-10-11 + + new features: + - title: "Temporary marking of books in the library" + description: "This allows you to select books from your calibre library manually and mark them. This 'mark' will remain until you restart calibre, or clear the marks. You can easily work with only the marked subset of books by right clicking the Mark Books button. To use this feature, go to Preferences->Toolbars and add the 'Mark Books' tool to the main toolbar." + type: major + + - title: "Get Books: Add Wolne Lektury and Amazon (Canada) ebook stores" + + - title: "DOCX Input: Handle hyperlinks in footnotes and endnotes" + tickets: [1232790] + + - title: "Driver for Sunstech reader" + tickets: [1231590] + + - title: "Allow using both uri: and url: identifiers to create two different arbitrary links instead of just one in the Book details panel" + + - title: "E-book viewer: Make all keyboard shortcuts configurable" + tickets: [1232019] + + - title: "Conversion: Add an option to not condense CSS rules for margin, padding, border, etc. Option is under the Look & Feel section of the conversion dialog." + tickets: [1233220] + + - title: "calibredb: Allow setting of title sort field" + tickets: [1233711] + + - title: "ebook-meta: Add an --identifier option to set identifiers." + + bug fixes: + - title: "Fix a locking error when composite columns containing formats are used and formats are added/deleted." + tickets: [1233330] + + - title: "EPUB Output: Do not strip <object> tags with type application/svg+xml in addition to those that use image/svg+xml." + tickets: [1236845] + + - title: "Cover grid: Fix selecting all books with Ctrl+A causing subsequent deselects to not fully work." + tickets: [1236348] + + - title: "HTMLZ Output: Fix long titles causing error when converting on windows." + tickets: [1235815] + + - title: "Content server: Fix OPDS category links to composite columns" + + - title: "E-book viewer: Fix regression that broke import/export of bookmarks" + tickets: [1231980] + + - title: "E-book viewer: Use the default font size setting for the dictionary view as well." + tickets: [1232025] + + - title: "DOCX Input: Avoid using the value attribute for simple numbered lists, to silence the asinine epubcheck" + + - title: "HTML Input: Images linked by the poster attribute of the <video> tag are now recognized and processed." + + - title: "DOCX Input: Fix erorr when converting docx files that have numbering defined with no associated character style." + tickets: [1232100] + + - title: "EPUB Metadata: Implementing updating identifiers other than isbn in the epub file from calibre when polishing or exporting the epub" + + - title: "Amazon metadata download: Fix parsing of some dates on amazon.de" + tickets: [1238125] + + improved recipes: + - National Geographic Magazine + - New York Review of Books + - Focus (PL) + - Carta Capital + - AM 730 + - Ming Pao (HK) + - Neu Osnabrucker Zeitung + + new recipes: + - title: Various Uruguayan news sources + author: Carlos Alves + +- version: 1.5.0 + date: 2013-09-26 + + new features: + - title: "Driver for Woxter Scriba" + tickets: [1228690] + + - title: "Bulk metadata edit: Allow setting the comments for all selected books and also allow cloning the covers. Cloning covers means that the cover of the first selected book will be set for all other selected books." + tickets: [1230040] + + bug fixes: + - title: "Windows: Improved device ejection code. Eject individual drives before trying to eject the device. This fixes incomplete ejection with the Nook devices." + + - title: "Catalogs: fix exclude tags rules not working in non-English locales when creating catalogs in EPUB/MOBI formats." + tickets: [1228949] + + - title: "Kobo driver: Fix reading status being cleared when connecting to a Kobo with older firmware and metadata management set to automatic." + tickets: [1230018] + + - title: "Content server: Sort virtual libraries by name" + tickets: [1229459] + + - title: "DOCX Input: Convert tabs in the docx file into non-breaking spaces in the output document. Note that custom tab stops are not supported." + tickets: [1228893] + + - title: "Conversion: Handle the style attribute on the <html> tag" + + - title: "Handle databases with invalid ratings link tables" + tickets: [1228517] + + - title: "DOCX Input: Handle DOCX files with missing referenced styles" + tickets: [1228669] + + - title: "Update amazon metadata download plugin for changes to the Amazon website" + + improved recipes: + - Slate + - El Universal (VE) + - GoComics + + new recipes: + - title: + +- version: 1.4.0 + date: 2013-09-20 + + new features: + - title: "Column icons: Allow the use of multiple icons with column icon rules." + description: "You can now have column icon rules display multiple icons in a single column, side by side. There are two ways to do this, either specify multiple icons when creating the rule, or create multiple rules that match the same book and specify the icon type to be 'composed' for every rule. See Preferences->Look & Feel->Column icons for details." + + - title: "Kobo driver: Add support for new cover handling in Kobo Aura with updated firmware" + + - title: "Driver for Icarus Essence" + tickets: [1226304] + + - title: "Show a warning when attempting to copy books between libraries that do not have the same set of custom columns." + tickets: [1225484] + + - title: "EPUB/AZW3 Output: Use shorthand forms for margin, padding and border CSS properties, where possible" + + - title: "Allow colons in identifier values, needed for using URIs as identifiers" + tickets: [1224885] + + - title: "Comments editor: Allow treating arbitrary URLs as images" + + - title: "Show full path of library under mouse in status bar when switching/renaming/deleting libraries via the calibre library button." + tickets: [1224925] + + - title: "DOCX Input: Add support for embedded EMF images that are just wrappers around an actual raster image." + tickets: [1224849] + + bug fixes: + - title: "Conversion: Fix font subsetting not working for large fonts with more than 4000 glyphs, such as CJK fonts" + + - title: "HTML Input: Fix a regression that broke processing of HTML files that contain meta tags with dc: namespaced attribute values." + + - title: "Fix switching to an empty virtual library not blanking the book details panel" + + - title: "Keep position when deleting tags in the tag editor" + tickets: [1226093] + + - title: "Book details panel: Render custom comments fields the same as the builtin comments field. In particular this fixes problems caused by wide text and images in custom comments fields." + tickets: [1226350] + + - title: "Metadata jackets: Do not error out when using a custom template with some custom fields that are not present." + tickets: [1225357] + + - title: "AZW3 Output: Dont choke on invalid (undecodable) links in the input document" + + - title: "Cover grid: Respect the double click on library view tweak" + + - title: "Fix covers set by drag and drop or pasting in the edit metadata dialog showing compression artifacts due to aggressive jpeg compression" + + - title: "Conversion: Fix a bug that could cause incorrect border values to be used when cascading, shorthand border CSS is present." + + - title: "Fix regression in 1.3 that caused the book list to not track the current book when using Next/Previous in the edit metadata dialog." + + improved recipes: + - Liberation + - Politika + + new recipes: + - title: Sage News + author: Brian Hahn + + - title: Il Cambiamento + author: ghib9 + +- version: 1.3.0 + date: 2013-09-13 + + new features: + - title: "When doing searches or switching between virtual libraries in the main book list, preserve the current book. The currently selected book will remain visible if it is present in the results of the search or the selected virtual library." + tickets: [1216713] + + - title: "Drivers for Oppo Find 5 and PocketBook Mini 515" + tickets: [1223853] + + bug fixes: + - title: "DOCX Input: Handle numbered paragraphs where the numbering is specified in the paragraph style, instead of on the paragraph directly. Also support the use of arbitrary, styled text for bullets." + + - title: "DOCX Input: Fix a single line break at the end of a paragraph not being rendered as a blank line." + + - title: "DOCX Input: Fix extra top/bottom margins around headings when the heading style in word does not specify any top/bottom margins." + + - title: "DOCX Input: Handle images in footnotes and endnotes." + tickets: [1221686] + + - title: "ODT Input: Only treat the first image as a cover if it is of suitable size, instead of any image in the document." + tickets: [1224157] + + - title: "Book polishing: Do not leave behind the old comments when updating metadata if the comments have been deleted in calibre." + + - title: "PDF Output: Fix non-breaking space characters incorrectly rendered in PDF outline." + tickets: [1223862] + + - title: "Content server: Fix error in opds feed after using virtual libraries in the main server." + tickets: [1222108] + + - title: "Do not scroll the book list horizontally after editing metadata." + tickets: [1221552] + + - title: "New database backend: Handle databases that contain multiple tags/series/publishers/etc. that differ only in case." + tickets: [1221545] + + improved recipes: + - Harvard Business Review + - Jakarta Post + - Jakarta Globe + - Dilema Veche + - Daily Express + - Anandtech + - High Country News + + new recipes: + - title: Caravan Magazine + author: Kovid Goyal + + - title: Phys Org + author: Krittika Goyal + +- version: 1.2.0 + date: 2013-09-06 + + new features: + - title: "Conversion: Add support for the CSS3 rem font size unit" + + - title: "MTP devices, such as Android tablets/phones: Allow ignoring any folder on the device, not just top level folders. For newly connected devices, also scan /Android/data/com.amazon.kindle for books by default (newer versions of the Kindle app place downloaded files there)." + + - title: "Speed up sorting when the book list is showing a restricted set of books, such as when the results of a search are displayed or a virtual library is used." + tickets: [1217622] + + - title: "Edit metadata dialog: Add an undo option for the Trim cover button." + tickets: [1219227] + + bug fixes: + - title: "Amazon metadata download: Update to handle website changes at amazon.com" + + - title: "PDF Output: Workaround a bug in the library calibre uses to render HTML to PDF that caused text in some documents that used small-caps fonts to not render correctly." + tickets: [1216354] + + - title: "Kobo driver: When a sideloaded kepub is added from a Kobo device to the calibre library, it is added as an epub, but the file copied is corrupt." + tickets: [1221035] + + - title: "Fix changing the user interface language in the welcome wizard causing some parts of the interface to remain in the old language until calibre is restarted." + tickets: [1220767] + + - title: "Fix regression in 1.0 that broke setting author names with the | character in them." + tickets: [1220348] + + - title: "Content server: When running from inside the main calibre program, do not restrict the books shown based on the current virtual library in the main program. If you wish to restrict the books shown in the content server, use Preferences->Sharing over the net." + + - title: "Output dates in the local timezone instead of UTC when generating CSV catalogs" + + - title: "Library maintenance: When doing a check library instead of dumping the database to SQL and restoring it, run a VACUUM. This works around various bugs in the dump and restore capabilities of apsw." + tickets: [1217988] + + - title: "Edit metadata dialog: Fix adding an image to an empty comments block not working" + + - title: "Conversion: Fix font declarations with invalid font-family values causing conversion to abort when subsetting is enabled." + + - title: "MOBI Output: Fix conversion of some super/sub scripts failling if they are the first or last element in a paragraph." + tickets: [1218858] + + - title: "New database backend: Various improvements to make the backend more robust against databases with invalid/corrupt data in them." + tickets: [1218465, 1218783] + + improved recipes: + - Countryfile + +- version: 1.1.0 + date: 2013-08-30 + + new features: + - title: "Rewrite the HTML metadata parser to make it faster and more robust." + tickets: [1217751] + + - title: "Book list: When sorting on a currently unsorted column, use the last applied sort for that column, instead of always sorting in ascending order." + tickets: [1216714] + + - title: "PocketBook driver: Scan for books files in the entire device not just in the 'books' folder" + + bug fixes: + - title: "Fix a regression in 1.0 that could cause the dates in custom date-type columns to change in some timezones when using the edit metadata dialog to make unrelated changes." + tickets: [1217096] + + - title: "When replacing formats in a book with a very long title+authors on windows, calibre could leave behind the old format file, because the filename shortening algorithm has changed. Handle that case." + + - title: "Fix content server giving an error if you return to the top level page after using the virtual libraries." + tickets: [1216838] + + - title: "Fix calibredb not updating the running calibre instance properly in 1.0" + tickets: [1218177] + + - title: "Fix a regression in 1.0 that broke splitting of multiple valued field like tags into many items during a rename." + tickets: [1216699] + + - title: "Fix a regression in 1.0 that caused an error when trying to set values for tags with the same item repeated, with different case." + tickets: [1216398] + + - title: "Fix a regression that broke downloading news when output format is set to PDF" + + - title: "Creating a catalog with an already existing catalog in the library would cause a temporary duplicate entry in the book list. Also fix the author sort for catalogs generated in the AZW3 format not being correct." + + - title: "EPUB metadata: When changing the title in an EPUB 2.0 file that has multiple titles, remove the extra titles." + tickets: [1211949] + + - title: "Fix a regression in 1.0 that caused Search and Replace in the bulk metadata edit dialog to be much slower than before" + + - title: "Fix a regression in 1.0 that caused incorrect sorting and searching on some composite columns (columns built from other columns)." + + - title: "Fix a regression in 1.0 that prevented the moving of libraries inside calibre" + tickets: [1216401] + + - title: "Virtual Library tabs: If the user activates a hidden tab via the Virtual Library button, change the name of the All Books tab to reflect the hidden virtual library." + tickets: [1216174] + + - title: "Ignore text records in the database that are damaged, instead of erroring out. Lets the rest of the data be used." + tickets: [1215981] + + - title: "Fix regression that broke calibredb catalog when sorting on the id field." + tickets: [1216090] + + - title: "HTML Input: Handle malformed OPF files when converting. " + tickets: [1215924] + + - title: "Ensure that the Formats custom column (if present) is updated when a new format is created as a result of a conversion." + tickets: [1215885] + + - title: "Fix a bug in 1.0 that broke the Check Library function on computers with non-English locales." + tickets: [1215819] + + - title: "Content server: Fix blank username causing error on startup." + tickets: [1215893] + + - title: "Fix sorting of book list by multi-valued fields like tags not correct in the new backend." + tickets: [1215820] + + improved recipes: + - Daily Mirror + + new recipes: + - title: VFR Magazine + author: Krittika Goyal + +- version: 1.0.0 + date: 2013-08-23 + + new features: + - title: "A new 'cover grid' view of the books in your calibre library" + description: "Excellent for judging your books by their covers :) To use click the button with the icon of a grid in the bottom right corner of the main window. It can be configured via Preferences->Look & Feel->Cover Grid" + type: major + + - title: "A new, faster database backend" + description: "The database backend in calibre has been re-written from scratch. The new code is smaller, more robust and much faster than the old code. The exact speedup will depend on the number of books and number and type of custom columns in your library. Users have reported calibre startup times decreasing by a factor of 2-3 times." + type: major + + - title: "For a summary of the major changes in calibre between 0.9 and 1.0, see http://calibre-ebook.com/new-in/ten" + type: major + + - title: "RTF Input: Add option to ignore WMF images iinstead of replacing them with a placeholder." + tickets: [1213599] + + - title: "Content server: Make virtual libraries available as searches from the start page. They work just like saved searches, clicking on a virtual library will show you all the books in that virtual library." + + bug fixes: + - title: "Remove extra, useless 'language' entry in metadata download configuration dialog" + + - title: "Kobo driver: Display device collections even if the driver is configured to not manage shelves on the device." + tickets: [1214233] + + - title: "Fix typo in calibre.desktop file on linux" + tickets: [1213664] + + - title: "Edit metadata dialog: Disable OK button while results are being downloaded." + tickets: [1213397] + + - title: "In OS X 10.8 Apple stopped redirecting stdout/stderr to Console.app for applications launched by launch services. Re-enable the redirection, useful for debugging." + + - title: "Fix virtual library tabs not being updated when using VL button" + + improved recipes: + - Consumerist + - jeuxvideo + - Metro UK + - El Tribuno + - High Country News + - Daily Express + - Providence Journal + - mediapart + + new recipes: + - title: News24 and Nuus24 + author: Nicki de Wet + +- version: 0.9.44 + date: 2013-08-16 + + new features: + - title: "Add an option to display all virtual libraries as tabs above the book list." + description: "Convenient to quickly switch between virtual libraries. To enable, click the Virtual library button and select 'Show virtual libraries as tabs'. You can re-arrange the tabs by drag and drop and close tabs you do not want. Right click on the tabs to restore closed tabs." + + - title: "An improved cover trimming algorithm to automatically detect and remove borders and extra space from the edge of cover images. To try it use the 'Trim' button in the edit metadata dialog. This can sometimes remove too much so if you dont like the result, just click cancel. You can make the algorithm more or less aggressive via Preferences->Tweaks" + + - title: "Allow customizing the comic metadata reader plugin via Preferences->Plugins to read the series index from either the volume or the issue number of the comic." + tickets: [1211433] + + - title: "Linux MTP driver: Add ids for some newer devices." + tickets: [1212458] + + - title: "Add a trim cover option to the bulk metadata edit dialog" + + - title: "Make the book information dialog user resizable, with a splitter between the cover and the info panel. Also change the background of the cover panel for books that have been marked using the Temp marker plugin." + tickets: [1209057] + + - title: "Driver for Samsung Galaxy Young Android phone" + tickets: [1212918] + + bug fixes: + - title: "PDF Output: Do not abort conversion if the document being converted has an HTML cover (found in some broken EPUB files)." + + - title: "RTF Input: When converting RTF files with no codepage, use the input encoding setting as the codepage." + tickets: [1163572] + + improved recipes: + - The Independent + - El Periodica de Aragon + - El Correo + + new recipes: + - title: Daily Express + author: Dave Asbury + +- version: 0.9.43 + date: 2013-08-09 + + new features: + - title: "TXT Input: Allow using various markdown extensions for more features when converting markdown formatted txt files. See http://pythonhosted.org/Markdown/extensions/index.html for details." + + - title: "Sending by email: Allow sending by email to an arbitrary combination of email address. Access it via the 'Select recipients' menu entry in the Email To menu." + tickets: [1207818] + + - title: "A new 'Sort By' action for the right click menu. This allows sorting on all columns in the library, not just the visible columns. To use it go to Preferences->Toolbars and add it to 'The context menu for books in the calibre library'" + + - title: "Allow adding images into the comments field, by clicking on the insert link button in the comments editor in the edit metadata dialog." + + - title: "Allow skipping the confirm bulk reconvert dialog" + + - title: "EPUB Input: If the EPUB file identifies an actual cover image in addition to the titlepage html file, use the cover image instead of rendering the titlepage. This is faster and has the advantage that an EPUB to EPUB conversion preserves internal cover structure." + + - title: "Get Books: Improve searching by removing punctuation from title/authors before matching." + + bug fixes: + - title: "Conversion: Fix empty inline tags that are the second child of a paragraph causing text change location." + tickets: [1207735] + + - title: "Fix book count in tooltip of choose library button not updating" + ticket: [1208217] + + - title: "Kobo driver: When deleting shelves that have been synced, the Activity entry for the shelf was not being deleted. This left a tile for the shelf on the home screen of the Glo and AuraHD." + tickets: [1208159] + + - title: "Comments editor: The Insert Link button has no effect until the user clicks inside the comments box, therefore disable it until it is ready, to prevent confusion." + tickets: [1208073] + + - title: "Get Books: Update various Polish store plugins" + + improved recipes: + - The Sunday Times UK and The Times Online + - Telegraph UK + - "Le Monde: Edition abonnés" + - The Scotsman + + new recipes: + - title: Various French news sources + author: Malah + + - title: La Capital de Rosario + author: Darko Miletic + + - title: Jot Down + author: desUbiKado + + - title: Private Eye + author: Martyn Pritchard + +- version: 0.9.42 + date: 2013-08-02 + + new features: + - title: "When downloading metadata from Amazon, convert the amazon categories into tags. You can turn this off by going to Preferences->Metadata download and configuring the Amazon source." + tickets: [1206763] + + - title: "Kobo driver: Add an option to modify the styling in books being sent to the device, based on a template on the device." + tickets: [1207151] + + - title: "Get Books: Add support for two more Polish ebook stores: cdp.pl and ebooki.allegro.pl" + + - title: "calibredb: Add a new clone command to create clones of libraries with the same custom columns, virtual libraries, etc. as the current library." + + bug fixes: + - title: "MOBI metadata: Do not fail to set metadata in MOBI files if they have EXTH fields with NULL pointers to a cover or thumbnail." + tickets: [1205757] + + - title: "Fix editing of book metadata failing when its timestamp is out of range for the system." + tickets: [1191599] + + - title: "Fix renaming a user category to the same name it already has erases the user category." + tickets: [1207131] + + - title: "Fix drag 'n drop of cover onto conversion dialog not working" + + - title: "Device drivers: Explicitly fsync() all files when writing to devices, to reduce chances of file corruption if the device is disconnected while jobs are running" + + - title: "Fix calibre not appearing in Ubuntu's 'Open with..' menu" + tickets: [1207518] + + improved recipes: + - PC World + +- version: 0.9.41 + date: 2013-07-27 + + new features: + - title: "Add a button to clear the current virtual library easily" + + - title: "Driver for Surftab Ventos" + tickets: [1204885] + + - title: "Ebook-viewer: Allow re-ordering bookmarks in the bookmarks manager by drag and drop." + + bug fixes: + - title: "DOCX Input: Fix conversion breaking for files that use heading style paragraphs to insert line rules" + + - title: "Content server: Fix last search query not being fully sanitized in results page" + tickets: [1205385] + + - title: "Book polishing: Fix page margins being removed if an unused font was found during subsetting of embedded fonts." + + - title: "PDF Output: Do not error out when the input document uses a font that cannot be subset, such as the Symbol font. Instead print a warning and embed the full font." + tickets: [1203449] + + - title: "Conversion: Fix a regression in the last release that broke conversion of a few files with comments just before a chapter start." + tickets: [1188635] + + improved recipes: + - Something Awful + - Spektrum der Wissenschaft + - mediapart.fr + - Dilbert + - Antyweb + - Scientific American + - taz.de (RSS) + + new recipes: + - title: Blindbuch and No names, No jackets + author: Armin Geller + + - title: El Tribuno Salta and Jujuy + author: Darko Miletic + +- version: 0.9.40 + date: 2013-07-19 + + new features: + - title: "EPUB Output: Add an option to insert an inline Table of Contents into the main text." + tickets: [1201006] + + - title: "Driver for LG Android phone" + tickets: [1202013] + + - title: "When matching books in the library against the device manually, pre-fill the search field with the book title" + tickets: [1200826] + + bug fixes: + - title: "PDF Input: Fix a regression that caused some images to be flipped when converting PDF files that use image rotation operators." + tickets: [1201083] + + - title: "Fix regression that caused incorrect font size in dropcaps generated by the DOCX input plugin" + + - title: "Get Books: Fix searching for title and author returning some extra matches, if the title starts with an article like the, a or an." + tickets: [1200012] + + - title: "PDF Output: Fix extra blank page being inserted at the start of the chapter when converting some epub files from feedbooks" + + - title: "PDF Output: Workaround bug in WebKit's getBoundingClientRect() method that could cause links to occasionally point to incorrect locations." + tickets: [1202390] + + - title: "E-book viewer: Fix a bug that could cause the reported position to be incorrect immediately after opening a previously opened book. This also fixes the Back button not working if a link is clicked on the page immediately after opening the book." + + - title: "Fix memory card not being detected for Elonex 621 on Windows" + + - title: "Fix regression in last release that broke auto-conversion of ebooks when sending to device/sending by email." + tickets: [1200864] + + - title: "Get Books: Update amazon plugins for website changes" + + - title: "Allow using non-ascii chars in email passwords." + tickets: [1202825] + + improved recipes: + - Galaxy's Edge + + new recipes: + - title: Il Foglio + author: faber1971 + + - title: Le Monde Diplomatique and Acrimed + author: Gaetan Lehmann + +- version: 0.9.39 + date: 2013-07-12 + + new features: + - title: "Bulk metadata edit: Add a checkbox to prevent the refreshing of the book list after the bulk edit. This means that the book list will not be resorted and any existing search/virtual library will not be refreshed. Useful if you have a large library as the refresh can be slow." + + - title: "Allow manually marking a book in the calibre library as being on the device. To do so click the device icon in calibre, then right click on the book you want marked and choose 'Match book to library'. Once you are done marking all the books, right click the device icon and choose 'Update cached metadata'" + + - title: "Driver for Coby Kyros MID1126" + tickets: [1199410] + + - title: "When adding formats to an existing book, by right clicking the add books button, ask for confirmation if some formats will be overwritten." + + - title: "Add a tweak to restrict the list of output formats available in the conversion dialog. Go to Preferences->Tweaks to change it." + + bug fixes: + - title: "Amazon metadata download: Update plugin to deal with the new amazon.com website" + + - title: "Edelweiss metadata download plugin: Workaround for advanced search being broken at the Edelweiss website." + + - title: "Invalid data in the device database on sony readers could cause errors when sorting device collections, ignore those errors." + + - title: "DOCX Input: Fix no page break being inserted before the last section." + tickets: [1198414] + + - title: "Metadata download dialog: Have the OK button enabled in the results screen as well." + tickets: [1198288] + + - title: "Get Books: Update empik store plugin" + + improved recipes: + - Houston Chronicle + - cracked.com + - mediapart.fr + + new recipes: + - title: Glenn Brenwald and Ludwig von Mises Institute + author: anywho + +- version: 0.9.38 + date: 2013-07-05 + + new features: + - title: "Book polishing: Add option to embed all referenced fonts when polishing books using the 'Polish Books' tool." + tickets: [1196038] + + - title: "DOCX Input: Add support for clickable (hyperlinked) images" + tickets: [1196728] + + - title: "DOCX Input: Insert page breaks at the start of every new section" + tickets: [1196728] + + - title: "Drivers for Trekstor Pyrus Maxi and PocketBook Surfpad 2" + tickets: [1196931, 1182850] + + - title: "DOCX Input: Add support for horizontal rules created by typing three hyphens and pressing enter." + + bug fixes: + - title: "Fix detection of SD Card in some PRS-T2N devices" + tickets: [1197970] + + - title: "MOBI Input: Fix a regression that broke parsing of MOBI files with malformed markup that also used entities for apostrophes." + ticket: [1197585] + + - title: "Get Books: Update Woblink store plugin" + + - title: "Metadata download dialog: Prevent the buttons from being re-ordered when the Next button is clicked." + + - title: "PDF Output: Fix links that point to URLs with query parameters being mangled by the conversion process." + tickets: [1197006] + + - title: "DOCX Input: Fix links pointing to locations in the same document that contain multiple, redundant bookmarks not working." + + - title: "EPUB/AZW3 Output: Fix splitting on page-break-after with plain text immediately following the split point causing the text to be added before rather than after the split point." + tickets: [1196728] + + - title: "DOCX Input: handle bookmarks defined at the paragraph level" + tickets: [1196728] + + - title: "DOCX Input: Handle hyperlinks created as fields" + tickets: [1196728] + + improved recipes: + - iprofessional + + new recipes: + - title: Democracy Now + author: Antoine Beaupre + +- version: 0.9.37 + date: 2013-06-28 + + new features: + - title: "Conversion: Add option to embed all referenced fonts" + type: major + description: "Add an option to embed all fonts that are referenced in the input document but are not already embedded. This will search your system for the referenced font, and if found, the font will be embedded. Only works if the output format supports font embedding (for example: EPUB or AZW3). The option is under the Look & Feel section of the conversion dialog." + + - title: "ToC Editor: When generating a ToC from files, if the file has no text, do not skip it. Instead create an entry using the filename of the file." + + - title: "AZW3 Input: Add support for the page-progression-direction that is used to indicate page turns should happen from right to left. The attribute is passed into EPUB when converting." + tickets: [1194766] + + - title: "ebook-convert: Add a --from-opf option to read metadata from OPF files directly, instead of having to run ebook-meta --from-opf after conversion" + + bug fixes: + - title: "PDF Output: Fix Table of Contents being added to the end of the PDF even without the Add Table of Contents option being enabled." + tickets: [1194836] + + - title: "When auto-merging books on add, also merge identifiers." + + - title: "Fix an error when using the Template Editor to create a template that uses custom columns." + tickets: [1193763] + + - title: "LRF Output: Fix " entities in attribute values causing problems" + + - title: "News download: Apply the default page margin conversion settings. Also, when converting to PDF, apply the pdf conversion defaults." + tickets: [1193912] + + - title: "Fix a regression that broke scanning for books on all devices that used the Aluratek Color driver." + tickets: [1192940] + + - title: "fetch-ebbok-metadata: Fix --opf argument erroneously requiring a value" + + - title: "When waiting before sending email, log the wait." + tickets: [1195173] + + improved recipes: + - taz.de (RSS) + - Miradas al sur + - Frontline + - La Nacion (Costa Rica) + + +- version: 0.9.36 + date: 2013-06-21 + + new features: + - title: "DOCX Input: Support for Table of Contents created using the Word Table of Contents tool. calibre now first looks for such a Table of Contents and only if one is not found does it generate a ToC from headings." + + - title: "DOCX Input: Add support for images used as bullets in lists" + + - title: "DOCX Input: If a large image that looks like a cover is present at the start of the document, remove it and use it as the cover of the output ebook. This can be turned off under the DOCX Input section of the conversion dialog." + + - title: "When dropping files onto the Book Details panel, ask for confirmation before adding the files to the book. The confirmation can be disabled." + + - title: "News download: Add the 'downloaded from' link at the bottom of every article when using a touchscreen output profile (like the Tablet profile)." + + - title: "E-book viewer: Change the bookmark button to always popup a menu when clicked, makes accessing existing bookmarks easier." + + - title: "After a bulk metadata download, focus the review button on the popup notification, instead of the OK button." + tickets: [1190931] + + bug fixes: + - title: "DOCX Input: Hide text that has been marked as not being visible in the web view in Word." + + - title: "DOCX Input: When converting docx files with large numbers of unnamed images, do not crash on windows." + tickets: [1191354] + + - title: "DOCX Input: Add support for the Word setting 'No space between paragraphs with the same style'." + tickets: [119100] + + - title: "MOBI Output: Fix rendering of SVG images that embed large raster images in 64bit calibre installs." + tickets: [1191020] + + - title: "HTMLZ Output: Fix handling of images with URL unsafe filenames." + tickets: [1192687] + + - title: "Fix unable to change the case of a previously used search because of the search history." + + - title: "When searching allow use of uppercase location names, such as AUTHOR instead of author, automatically lowercasing them." + tickets: [1192785] + + - title: "DOCX metadata: When reading covers from DOCX files use the first image as specified in the actual markup instead of just the first image in the container." + + - title: "Kobo driver: Fix a regression when deleting empty shelves on Kobo devices with older firmware." + tickets: [1192441] + + - title: "Do not show builtin plugins in the get new plugins dialog If a builtin plugin with the same name as a third party plugin exists, then the builtin plagin was displayed in the get new plugins dialog as installed (happened with the new DOCX Input plugin)." + + - title: "Apple driver: When in synchronous mode (direct to iBooks), disable PDF transfers, as we can't update metadata in iTunes. Not sure when this started, but as of iTunes 11.0.4 it's broken." + + - title: "Get Books: Fix error when using internal browser on some systems" + tickets: [1191199] + + improved recipes: + - The Walrus Mag + - Various Polish news sources + + new recipes: + - title: Various Polish news sources + author: fenuks + - version: 0.9.35 date: 2013-06-14 diff --git a/README.md b/README.md index 2e2130bb42..ad09fe71aa 100644 --- a/README.md +++ b/README.md @@ -24,3 +24,10 @@ Development A [tarball of the source code](http://status.calibre-ebook.com/dist/src) for the current calibre release. + +Bugs +------ + +Bug reports and feature requests should be made in the calibre bug tracker at [launchpad](https://bugs.launchpad.net/calibre). +The GitHub bug tracker is only for people contributing code to calibre. + diff --git a/imgsrc/marked.svg b/imgsrc/marked.svg new file mode 100644 index 0000000000..c812038668 --- /dev/null +++ b/imgsrc/marked.svg @@ -0,0 +1,162 @@ +<?xml version="1.0" encoding="UTF-8" standalone="no"?> +<!-- Created with Inkscape (http://www.inkscape.org/) --> + +<svg + xmlns:dc="http://purl.org/dc/elements/1.1/" + xmlns:cc="http://creativecommons.org/ns#" + xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" + xmlns:svg="http://www.w3.org/2000/svg" + xmlns="http://www.w3.org/2000/svg" + xmlns:xlink="http://www.w3.org/1999/xlink" + xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd" + xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape" + width="128" + height="128" + id="svg2" + version="1.1" + inkscape:version="0.48.4 r9939" + sodipodi:docname="marked.svg" + inkscape:export-filename="/home/kovid/work/calibre/resources/images/marked.png" + inkscape:export-xdpi="90" + inkscape:export-ydpi="90"> + <title + id="title3847">Pushpin Icon + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + Pushpin Icon + + + Kovid Goyal + + + + + Public domain + + + + + + + + + + diff --git a/imgsrc/tweak.svg b/imgsrc/tweak.svg new file mode 100644 index 0000000000..4dc8c97145 --- /dev/null +++ b/imgsrc/tweak.svgimage/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/manual/conversion.rst b/manual/conversion.rst index c693d0be15..46f8bcd3a0 100644 --- a/manual/conversion.rst +++ b/manual/conversion.rst @@ -537,25 +537,38 @@ Set the :guilabel:`Level 1 TOC` setting to ``//h:h2``. Then, for chapter two, |a How options are set/saved for Conversion ------------------------------------------- -There are two places where conversion options can be set in |app|. The first is in Preferences->Conversion. These -settings are the defaults for the conversion options. Whenever you try to convert a new book, the settings set here -will be used by default. +There are two places where conversion options can be set in |app|. The first is +in Preferences->Conversion. These settings are the defaults for the conversion +options. Whenever you try to convert a new book, the settings set here will be +used by default. -You can also change settings in the conversion dialog for each book conversion. When you convert a book, |app| remembers the -settings you used for that book, so that if you convert it again, the saved settings for the individual book will take -precedence over the defaults set in Preferences. You can restore the individual settings to defaults by using the Restore to defaults -button in the individual book conversion dialog. +You can also change settings in the conversion dialog for each book conversion. +When you convert a book, |app| remembers the settings you used for that book, +so that if you convert it again, the saved settings for the individual book +will take precedence over the defaults set in Preferences. You can restore the +individual settings to defaults by using the Restore to defaults button in the +individual book conversion dialog. You can remove the saved settings for a +group of books by selecting all the books and then clicking the edit metadata +button to bring up the bulk metadata edit dialog, near the bottom of the dialog +is an option to remove stored conversion settings. -When you Bulk Convert a set of books, settings are taken in the following order: +When you Bulk Convert a set of books, settings are taken in the following order (last one wins): + + * From the defaults set in Preferences->Conversion + + * From the saved conversion settings for each book being converted (if + any). This can be turned off by the option in the top left corner of the + Bulk Conversion dialog. - * From the defaults set in Preferences->Conversion - * From the saved conversion settings for each book being converted (if any). This can be turned off by the option in the top left corner of the Bulk Conversion dialog. * From the settings set in the Bulk conversion dialog -Note that the final settings for each book in a Bulk Conversion will be saved and re-used if the book is converted again. Since the -highest priority in Bulk Conversion is given to the settings in the Bulk Conversion dialog, these will override any book specific -settings. So you should only bulk convert books together that need similar settings. The exceptions are metadata and input format specific -settings. Since the Bulk Conversion dialog does not have settings for these two categories, they will be taken from book specific +Note that the final settings for each book in a Bulk Conversion will be saved +and re-used if the book is converted again. Since the highest priority in Bulk +Conversion is given to the settings in the Bulk Conversion dialog, these will +override any book specific settings. So you should only bulk convert books +together that need similar settings. The exceptions are metadata and input +format specific settings. Since the Bulk Conversion dialog does not have +settings for these two categories, they will be taken from book specific settings (if any) or the defaults. .. note:: @@ -772,9 +785,11 @@ size. By default, |app| uses a page size defined by the current :guilabel:`Output profile`. So if your output profile is set to Kindle, |app| will create a PDF with page size suitable for viewing on the small kindle screen. However, if you view this PDF file on a computer screen, then it will -appear to have too large fonts. To create "normal" sized PDFs, use the override -page size option under :guilabel:`PDF Output` in the conversion dialog. +appear to have too large fonts. To create "normal" sized PDFs, use the +:guilabel:`Override page size` option under :guilabel:`PDF Output` in the conversion dialog. +Headers and Footers +^^^^^^^^^^^^^^^^^^^^ You can insert arbitrary headers and footers on each page of the PDF by specifying header and footer templates. Templates are just snippets of HTML code that get rendered in the header and footer locations. For example, to @@ -813,6 +828,9 @@ the page will be used. bottom margins to large enough values, under the Page Setup section of the conversion dialog. +Printable Table of Contents +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + You can also insert a printable Table of Contents at the end of the PDF that lists the page numbers for every section. This is very useful if you intend to print out the PDF to paper. If you wish to use the PDF on an electronic device, diff --git a/manual/creating_plugins.rst b/manual/creating_plugins.rst index 9418f4a955..fe58b6e34e 100644 --- a/manual/creating_plugins.rst +++ b/manual/creating_plugins.rst @@ -92,6 +92,11 @@ The first thing to note is that this zip file has a lot more files in it, explai **about.txt** A text file with information about the plugin + **translations** + A folder containing .mo files with the translations of the user + interface of your plugin into different languages. See below for + details. + Now let's look at the code. __init__.py @@ -175,6 +180,42 @@ You can see the ``prefs`` object being used in main.py: .. literalinclude:: plugin_examples/interface_demo/main.py :pyobject: DemoDialog.config +Adding translations to your plugin +-------------------------------------- + +You can have all the user interface strings in your plugin translated and +displayed in whatever language is set for the main calibre user interface. + +The first step is to go through your plugin's source code and mark all user +visible strings as translatable, by surrounding them in _(). For example:: + + action_spec = (_('My plugin'), None, _('My plugin is cool'), None) + +Then use some program to generate .po files from your plugin source code. There +should be one .po file for every language you want to translate into. For +example: de.po for German, fr.po for French and so on. You can use the +`poedit `_ program for this. + +Send these .po files to your translators. Once you get them back, compile them +into .mo files. You can again use poedit for that, or just do:: + + calibre-debug -c "from calibre.translations.msgfmt import main; main()" filename.po + +Put the .mo files into the ``translations`` folder in your plugin. + +The last step is to simply call the function `load_translations()` at the top +of your plugin's .py files. For performance reasons you should only call this +function in those .py files that actually have translatable strings. So in a +typical User Interface plugin you would call it at the top of ``ui.py`` but not +``__init__.py``. + +You can test the translations of your plugins by changing the user interface +language in calibre under Preferences->Look & Feel or by running calibre like +this:: + + CALIBRE_OVERRIDE_LANG=de calibre + +Replace ``de`` with the language code of the language you want to test. The plugin API -------------------------------- diff --git a/manual/custom.py b/manual/custom.py index 30ca28ec96..bffe3d914b 100644 --- a/manual/custom.py +++ b/manual/custom.py @@ -83,7 +83,6 @@ def generate_calibredb_help(preamble, info): global_options = '\n'.join(render_options('calibredb', groups, False, False)) - lines, toc = [], [] for cmd in COMMANDS: args = [] @@ -99,7 +98,7 @@ def generate_calibredb_help(preamble, info): usage = [i for i in usage.replace('%prog', 'calibredb').splitlines()] cmdline = ' '+usage[0] usage = usage[1:] - usage = [i.replace(cmd, ':command:`%s`'%cmd) for i in usage] + usage = [re.sub(r'(%s)([^a-zA-Z0-9])'%cmd, r':command:`\1`\2', i) for i in usage] lines += ['.. code-block:: none', '', cmdline, ''] lines += usage groups = [(None, None, parser.option_list)] @@ -152,7 +151,6 @@ def generate_ebook_convert_help(preamble, info): prog = 'ebook-convert-'+(pl.name.lower().replace(' ', '-')) raw += '\n\n' + '\n'.join(render_options(prog, groups, False, True)) - update_cli_doc(os.path.join('cli', 'ebook-convert.rst'), raw, info) def update_cli_doc(path, raw, info): @@ -200,7 +198,8 @@ def cli_docs(app): for script in entry_points['console_scripts'] + entry_points['gui_scripts']: module = script[script.index('=')+1:script.index(':')].strip() cmd = script[:script.index('=')].strip() - if cmd in ('calibre-complete', 'calibre-parallel'): continue + if cmd in ('calibre-complete', 'calibre-parallel'): + continue module = __import__(module, fromlist=[module.split('.')[-1]]) if hasattr(module, 'option_parser'): documented_cmds.append((cmd, getattr(module, 'option_parser')())) @@ -260,3 +259,4 @@ def setup(app): def finished(app, exception): pass + diff --git a/manual/customize.rst b/manual/customize.rst index ceee4ece62..cfd63cea66 100644 --- a/manual/customize.rst +++ b/manual/customize.rst @@ -30,10 +30,13 @@ Environment variables * ``CALIBRE_OVERRIDE_DATABASE_PATH`` - allows you to specify the full path to metadata.db. Using this variable you can have metadata.db be in a location other than the library folder. Useful if your library folder is on a networked drive that does not support file locking. * ``CALIBRE_DEVELOP_FROM`` - Used to run from a calibre development environment. See :ref:`develop`. * ``CALIBRE_OVERRIDE_LANG`` - Used to force the language used by the interface (ISO 639 language code) - * ``CALIBRE_NO_NATIVE_FILEDIALOGS`` - Causes calibre to not use native file dialogs for selecting files/directories. + * ``CALIBRE_NO_NATIVE_FILEDIALOGS`` - Causes calibre to not use native file dialogs for selecting files/directories. Set it to 1 to enable. * ``SYSFS_PATH`` - Use if sysfs is mounted somewhere other than /sys * ``http_proxy`` - Used on linux to specify an HTTP proxy +See `How to set environment variables in windows `_ or +`How to set environment variables in OS X `_. + Tweaks ------------ @@ -46,17 +49,31 @@ The default values for the tweaks are reproduced below Overriding icons, templates, et cetera ---------------------------------------- -|app| allows you to override the static resources, like icons, templates, javascript, etc. with customized versions that you like. -All static resources are stored in the resources sub-folder of the calibre install location. On Windows, this is usually -:file:`C:/Program Files/Calibre2/resources`. On OS X, :file:`/Applications/calibre.app/Contents/Resources/resources/`. On linux, if you are using the binary installer -from the calibre website it will be :file:`/opt/calibre/resources`. These paths can change depending on where you choose to install |app|. +|app| allows you to override the static resources, like icons, javascript and +templates for the metadata jacket, catalogs, etc. with customized versions that +you like. All static resources are stored in the resources sub-folder of the +calibre install location. On Windows, this is usually :file:`C:/Program Files/Calibre2/resources`. +On OS X, :file:`/Applications/calibre.app/Contents/Resources/resources/`. On linux, if +you are using the binary installer from the calibre website it will be +:file:`/opt/calibre/resources`. These paths can change depending on where you +choose to install |app|. -You should not change the files in this resources folder, as your changes will get overwritten the next time you update |app|. Instead, go to -:guilabel:`Preferences->Advanced->Miscellaneous` and click :guilabel:`Open calibre configuration directory`. In this configuration directory, create a sub-folder called resources and place the files you want to override in it. Place the files in the appropriate sub folders, for example place images in :file:`resources/images`, etc. -|app| will automatically use your custom file in preference to the built-in one the next time it is started. +You should not change the files in this resources folder, as your changes will +get overwritten the next time you update |app|. Instead, go to +:guilabel:`Preferences->Advanced->Miscellaneous` and click +:guilabel:`Open calibre configuration directory`. In this configuration directory, create a +sub-folder called resources and place the files you want to override in it. +Place the files in the appropriate sub folders, for example place images in +:file:`resources/images`, etc. |app| will automatically use your custom file +in preference to the built-in one the next time it is started. -For example, if you wanted to change the icon for the :guilabel:`Remove books` action, you would first look in the built-in resources folder and see that the relevant file is -:file:`resources/images/trash.png`. Assuming you have an alternate icon in PNG format called :file:`mytrash.png` you would save it in the configuration directory as :file:`resources/images/trash.png`. All the icons used by the calibre user interface are in :file:`resources/images` and its sub-folders. +For example, if you wanted to change the icon for the :guilabel:`Remove books` +action, you would first look in the built-in resources folder and see that the +relevant file is :file:`resources/images/trash.png`. Assuming you have an +alternate icon in PNG format called :file:`mytrash.png` you would save it in +the configuration directory as :file:`resources/images/trash.png`. All the +icons used by the calibre user interface are in :file:`resources/images` and +its sub-folders. Customizing |app| with plugins -------------------------------- diff --git a/manual/develop.rst b/manual/develop.rst index a939a442b4..9e5b47e8e7 100644 --- a/manual/develop.rst +++ b/manual/develop.rst @@ -49,7 +49,7 @@ All the |app| python code is in the ``calibre`` package. This package contains t * Metadata reading, writing, and downloading is all in ``ebooks.metadata`` * Conversion happens in a pipeline, for the structure of the pipeline, see :ref:`conversion-introduction`. The pipeline consists of an input - plugin, various transforms and an output plugin. The that code constructs + plugin, various transforms and an output plugin. The code that constructs and drives the pipeline is in :file:`plumber.py`. The pipeline works on a representation of an ebook that is like an unzipped epub, with manifest, spine, toc, guide, html content, etc. The diff --git a/manual/faq.rst b/manual/faq.rst index bdac21a622..b718ce6b75 100644 --- a/manual/faq.rst +++ b/manual/faq.rst @@ -499,11 +499,17 @@ that allows you to create collections on your Kindle from the |app| metadata. It I am getting an error when I try to use |app| with my Kobo Touch/Glo/etc.? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The Kobo has very buggy firmware. Connecting to it has been known to fail at random. Certain combinations of motherboard, USB ports/cables/hubs can exacerbate this tendency to fail. If you are getting an error when connecting to your touch with |app| try the following, each of which has solved the problem for *some* |app| users. +The Kobo has very buggy firmware. Connecting to it has been known to fail at +random. Certain combinations of motherboard, USB ports/cables/hubs can +exacerbate this tendency to fail. If you are getting an error when connecting +to your touch with |app| try the following, each of which has solved the +problem for *some* |app| users. * Connect the Kobo directly to your computer, not via USB Hub * Try a different USB cable and a different USB port on your computer - * Try a different computer (preferably an older model) + * Try a different computer, in particular the Kobo does not work well with + some Windows XP machines. If you are on Windows XP, try a computer with a + newer version of windows. * Try upgrading the firmware on your Kobo Touch to the latest * Try resetting the Kobo (sometimes this cures the problem for a little while, but then it re-appears, in which case you have to reset again and again) * Try only putting one or two books onto the Kobo at a time and do not keep large collections on the Kobo @@ -622,13 +628,29 @@ should fix by hand. The list of books in |app| is blank! ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -In order to understand why that happened, you have to understand what a |app| library is. At the most basic level, a |app| library is just a folder. Whenever you add a book to |app|, that book's files are copied into this folder (arranged into sub folders by author and title). Inside the |app| library folder, at the top level, you will see a file called metadata.db. This file is where |app| stores the metadata like title/author/rating/tags etc. for *every* book in your |app| library. The list of books that |app| displays is created by reading the contents of this metadata.db file. +In order to understand why that happened, you have to understand what a |app| +library is. At the most basic level, a |app| library is just a folder. Whenever +you add a book to |app|, that book's files are copied into this folder +(arranged into sub folders by author and title). Inside the |app| library +folder, at the top level, you will see a file called metadata.db. This file is +where |app| stores the metadata like title/author/rating/tags etc. for *every* +book in your |app| library. The list of books that |app| displays is created by +reading the contents of this metadata.db file. There can be two reasons why |app| is showing a empty list of books: - * Your |app| library folder changed its location. This can happen if it was on an external disk and the drive letter for that disk changed. Or if you accidentally moved the folder. In this case, |app| cannot find its library and so starts up with an empty library instead. To remedy this, do a right-click on the |app| icon in the |app| toolbar (it will say 0 books underneath it) and select Switch/create library. Click the little blue icon to select the new location of your |app| library and click OK. + * Your |app| library folder changed its location. This can happen if it was + on an external disk and the drive letter for that disk changed. Or if you + accidentally moved the folder. In this case, |app| cannot find its library + and so starts up with an empty library instead. To remedy this, do a + right-click on the |app| icon in the |app| toolbar and select Switch/create + library. Click the little blue icon to select the new location of your + |app| library and click OK. - * Your metadata.db file was deleted/corrupted. In this case, you can ask |app| to rebuild the metadata.db from its backups. Right click the |app| icon in the |app| toolbar (it will say 0 books underneath it) and select Library maintenance->Restore database. |app| will automatically rebuild metadata.db. + * Your metadata.db file was deleted/corrupted. In this case, you can ask + |app| to rebuild the metadata.db from its backups. Right click the |app| + icon in the |app| toolbar and select Library maintenance->Restore database. + |app| will automatically rebuild metadata.db. I am getting errors with my calibre library on a networked drive/NAS? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -700,8 +722,14 @@ Take your pick: |app| is pronounced as cal-i-ber *not* ca-li-bre. If you're wondering, |app| is the British/commonwealth spelling for caliber. Being Indian, that's the natural spelling for me. Why does |app| show only some of my fonts on OS X? -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -|app| embeds fonts in ebook files it creates. Ebook files support embedding only TrueType (.ttf) fonts. Most fonts on OS X systems are in .dfont format, thus they cannot be embedded. |app| shows only TrueType fonts found on your system. You can obtain many TrueType fonts on the web. Simply download the .ttf files and add them to the Library/Fonts directory in your home directory. +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +|app| embeds fonts in ebook files it creates. Ebook files support embedding +only TrueType and OpenType (.ttf and .otf) fonts. Most fonts on OS X systems +are in .dfont format, thus they cannot be embedded. |app| shows only TrueType +and OpenType fonts found on your system. You can obtain many such fonts on the +web. Simply download the .ttf/.otf files and add them to the Library/Fonts +directory in your home directory. |app| is not starting on Windows? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -763,6 +791,13 @@ There are several possible things I know of, that can cause this: that prevent 64-bit |app| from working properly. If you are using the 64-bit version of |app| try switching to the 32-bit version. + * If the crashes happen specifically when you are using a file open dialog, + like clicking on the Add Books button or the Save to Disk button, then + you may have an issue with the windows file open dialogs on your + computer. You can tell calibre to use its own file open dialogs by + setting the environment variable ``CALIBRE_NO_NATIVE_FILEDIALOGS=1``. + See `How to set environment variables in windows `_. + If none of the above apply to you, then there is some other program on your computer that is interfering with |app|. First reboot your computer in safe mode, to have as few running programs as possible, and see if the crashes still @@ -776,6 +811,31 @@ The only way to find the culprit is to eliminate the programs one by one and see which one is causing the issue. Basically, stop a program, run calibre, check for crashes. If they still happen, stop another program and repeat. + +Using the viewer or doing any conversions results in a permission denied error on windows +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Something on your computer is preventing calibre from accessing its own +temporary files. Most likely the permissions on your Temp folder are incorrect. +Go to the folder file:`C:\\Users\\USERNAME\\AppData\\Local` in Windows +Explorer and then right click on the file:`Temp` folder, select Properties and go to +the Security tab. Make sure that your user account has full control for this +folder. + +Some users have reported that running the following command in an Administrator +Command Prompt fixed their permissions. To get an Administrator Command Prompt +search for cmd.exe in the start menu, then right click on the command prompt +entry and select Run as Administrator. At the command prompt type the following +command and press Enter:: + + icacls "%appdata%\..\Local\Temp" /reset /T + +Alternately, you can run calibre as Administrator, but doing so will cause +some functionality, such as drag and drop to not work. + +Finally, some users have reported that disabling UAC fixes the problem. + + |app| is not starting on OS X? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -816,9 +876,10 @@ My antivirus program claims |app| is a virus/trojan? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The first thing to check is that you are downloading |app| from the official -website: ``_. |app| is a very popular program -and unscrupulous people try to setup websites offering it for download to fool -the unwary. +website: ``_. Make sure you are clicking the +download links on the left, not the advertisements on the right. |app| is a +very popular program and unscrupulous people try to setup websites offering it +for download to fool the unwary. If you have the official download and your antivirus program is still claiming |app| is a virus, then, your antivirus program is wrong. Antivirus programs use @@ -880,10 +941,25 @@ Why doesn't |app| have an automatic update? For many reasons: - * *There is no need to update every week*. If you are happy with how |app| works turn off the update notification and be on your merry way. Check back to see if you want to update once a year or so. - * Pre downloading the updates for all users in the background would require about 80TB of bandwidth *every week*. That costs thousands of dollars a month. And |app| is currently growing at 300,000 new users every month. - * If I implement a dialog that downloads the update and launches it, instead of going to the website as it does now, that would save the most ardent |app| updater, *at most five clicks a week*. There are far higher priority things to do in |app| development. - * If you really, really hate downloading |app| every week but still want to be up to the latest, I encourage you to run from source, which makes updating trivial. Instructions are :ref:`available here `. + * *There is no need to update every week*. If you are happy with how |app| + works turn off the update notification and be on your merry way. Check back + to see if you want to update once a year or so. There is a check box to + turn off the update notification, on the update notification itself. + + * |app| downloads currently use `about 100TB of bandwidth a month + `_. Implementing automatic + updates would greatly increase that and end up costing thousands of dollars + a month, which someone has to pay. And |app| is currently growing at `half + a million new installs a month `_. + + * If I implement a dialog that downloads the update and launches it, instead + of going to the website as it does now, that would save the most ardent + |app| updater, *at most five clicks a week*. There are far higher priority + things to do in |app| development. + + * If you really, really hate downloading |app| every week but still want to + be up to the latest, I encourage you to run from source, which makes + updating trivial. Instructions are :ref:`available here `. How is |app| licensed? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/manual/gui.rst b/manual/gui.rst index 127d0062c0..03e012f31e 100755 --- a/manual/gui.rst +++ b/manual/gui.rst @@ -62,7 +62,13 @@ Add books The :guilabel:`Add books` action can read metadata from a wide variety of ebook formats. In addition, it tries to guess metadata from the filename. See the :ref:`config_filename_metadata` section, to learn how to configure this. -To add an additional format for an existing book use the :ref:`edit_meta_information` action. +To add an additional format for an existing book you can do any of three things: + + 1. Drag and drop the file onto the book details panel on the right side of the main window + + 2. Right click the Add books button and choose :guilabel`:Add files to selected books`. + + 3. Click the red add books button in the top right area of the :guilabel:`Edit Metadata` dialog, accessed by the :ref:`edit_meta_information` action. .. _edit_meta_information: @@ -593,6 +599,8 @@ Calibre has several keyboard shortcuts to save you time and mouse movement. Thes - Toggle Book Details panel * - :kbd:`Alt+Shift+T` - Toggle Tag Browser + * - :kbd:`Alt+Shift+G` + - Toggle Cover Grid * - :kbd:`Alt+A` - Show books by the same author as the current book * - :kbd:`Alt+T` diff --git a/manual/index.rst b/manual/index.rst index b8f98a5561..3d057ec740 100755 --- a/manual/index.rst +++ b/manual/index.rst @@ -38,6 +38,8 @@ Sections glossary +.. REMOVE_IN_PDF + The main |app| user interface ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/manual/regexp.rst b/manual/regexp.rst index 7c879fa2e2..be2faf25b3 100644 --- a/manual/regexp.rst +++ b/manual/regexp.rst @@ -1,4 +1,3 @@ - .. include:: global.rst .. _regexptutorial: @@ -26,7 +25,7 @@ There are a few places |app| uses regular expressions. There's the Search & Repl What on earth *is* a regular expression? ------------------------------------------------ -A regular expression is a way to describe sets of strings. A single regular expression cat *match* a number of different strings. This is what makes regular expression so powerful -- they are a concise way of describing a potentially large number of variations. +A regular expression is a way to describe sets of strings. A single regular expression can *match* a number of different strings. This is what makes regular expression so powerful -- they are a concise way of describing a potentially large number of variations. .. note:: I'm using string here in the sense it is used in programming languages: a string of one or more characters, characters including actual characters, numbers, punctuation and so-called whitespace (linebreaks, tabulators etc.). Please note that generally, uppercase and lowercase characters are not considered the same, thus "a" being a different character from "A" and so forth. In |app|, regular expressions are case insensitive in the search bar, but not in the conversion options. There's a way to make every regular expression case insensitive, but we'll discuss that later. It gets complicated because regular expressions allow for variations in the strings it matches, so one expression can match multiple strings, which is why people bother using them at all. More on that in a bit. diff --git a/manual/resources/simple_donate_button.gif b/manual/resources/simple_donate_button.gif new file mode 100644 index 0000000000..42dd2c3c88 Binary files /dev/null and b/manual/resources/simple_donate_button.gif differ diff --git a/manual/server.rst b/manual/server.rst index 20d4434a28..b5aa203ea8 100644 --- a/manual/server.rst +++ b/manual/server.rst @@ -104,7 +104,7 @@ Save this adapter as :file:`calibre-wsgi-adpater.py` somewhere your server will Let's suppose that we want to use WSGI in Apache. First enable WSGI in Apache by adding the following to :file:`httpd.conf`:: - LoadModule proxy_module modules/mod_wsgi.so + LoadModule wsgi_module modules/mod_wsgi.so The exact technique for enabling the wsgi module will vary depending on your Apache installation. Once you have the proxy modules enabled, add the following rules to httpd.conf (or if you are using virtual hosts to the conf file for the virtual host in question:: diff --git a/manual/templates/layout.html b/manual/templates/layout.html index b8389b0ac9..ff2e7b0113 100644 --- a/manual/templates/layout.html +++ b/manual/templates/layout.html @@ -16,16 +16,13 @@
{% if not embedded %}
- - + +
{% endif %} @@ -62,7 +59,7 @@
- +

diff --git a/manual/virtual_libraries.rst b/manual/virtual_libraries.rst index f695bb0e45..9432bb04ef 100644 --- a/manual/virtual_libraries.rst +++ b/manual/virtual_libraries.rst @@ -94,6 +94,13 @@ You can quickly use the current search as a temporary virtual library by clicking the :guilabel:`Virtual Library` button and choosing the :guilabel:`*current search` entry. +You can display all available virtual libraries as tabs above the book list. +This is particularly handy if you like switching between virtual libraries very +often. Click the :guilabel:`Virtual Library` button and select :guilabel:`Show +virtual libraries as tabs`. You can re-arrange the tabs by drag and drop and +close ones you do not want to see. Closed tabs can be restored by +right-clicking on the tab bar. + Using additional restrictions ------------------------------- diff --git a/recipes/10minutos.recipe b/recipes/10minutos.recipe new file mode 100644 index 0000000000..4c2f8a7ec7 --- /dev/null +++ b/recipes/10minutos.recipe @@ -0,0 +1,50 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__author__ = '2013, Carlos Alves ' +''' +10minutos.com.uy +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class General(BasicNewsRecipe): + title = '10minutos' + __author__ = 'Carlos Alves' + description = 'Noticias de Salto - Uruguay' + tags = 'news, sports' + language = 'es_UY' + timefmt = '[%a, %d %b, %Y]' + use_embedded_content = False + recursion = 5 + encoding = 'utf8' + remove_javascript = True + no_stylesheets = True + + oldest_article = 2 + max_articles_per_feed = 100 + keep_only_tags = [dict(name='div', attrs={'class':'post-content'})] + + remove_tags = [ + dict(name='div', attrs={'class':['hr', 'titlebar', 'navigation']}), + dict(name='p', attrs={'class':'post-meta'}), + dict(name=['object','link']) + ] + + extra_css = ''' + h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;} + h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;} + h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;} + p {font-family:Arial,Helvetica,sans-serif;} + ''' + feeds = [ + (u'Articulos', u'http://10minutos.com.uy/feed/') + ] + + def get_cover_url(self): + return 'http://10minutos.com.uy/a/img/logo.png' + + def preprocess_html(self, soup): + for item in soup.findAll(style=True): + del item['style'] + return soup diff --git a/recipes/acrimed.recipe b/recipes/acrimed.recipe new file mode 100644 index 0000000000..acd98a063a --- /dev/null +++ b/recipes/acrimed.recipe @@ -0,0 +1,30 @@ +# vim:fileencoding=utf-8 +from __future__ import unicode_literals + +__license__ = 'GPL v3' +__copyright__ = '2012' +''' +acrimed.org +''' + +import re +from calibre.web.feeds.news import BasicNewsRecipe + +class Acrimed(BasicNewsRecipe): + title = u'Acrimed' + __author__ = 'Gaëtan Lehmann' + oldest_article = 30 + max_articles_per_feed = 100 + auto_cleanup = True + auto_cleanup_keep = '//div[@class="crayon article-chapo-4112 chapo"]' + language = 'fr' + masthead_url = 'http://www.acrimed.org/IMG/siteon0.gif' + feeds = [(u'Acrimed', u'http://www.acrimed.org/spip.php?page=backend')] + + preprocess_regexps = [ + (re.compile(r'(.*) - Acrimed \| Action Critique M.*dias'), lambda m: '' + m.group(1) + ''), + (re.compile(r'

(.*) - Acrimed \| Action Critique M.*dias

'), lambda m: '

' + m.group(1) + '

')] + + extra_css = """ + .chapo{font-style:italic; margin: 1em 0 0.5em} + """ diff --git a/recipes/am730.recipe b/recipes/am730.recipe index 0fac4bea51..d491c49ef1 100644 --- a/recipes/am730.recipe +++ b/recipes/am730.recipe @@ -3,10 +3,10 @@ from __future__ import unicode_literals __license__ = 'GPL v3' __copyright__ = '2013, Eddie Lau' __Date__ = '' -__HiResImg__ = True ''' Change Log: +2013/09/28 -- update due to website redesign, add cover 2013/03/30 -- first version ''' @@ -15,7 +15,7 @@ from calibre.utils.date import now as nowf import os, datetime, re from calibre.web.feeds.recipes import BasicNewsRecipe from contextlib import nested -from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag +from calibre.ebooks.BeautifulSoup import BeautifulSoup from calibre.ebooks.metadata.opf2 import OPFCreator from calibre.ebooks.metadata.toc import TOC from calibre.ebooks.metadata import MetaInformation @@ -32,18 +32,17 @@ class AppleDaily(BasicNewsRecipe): encoding = 'utf-8' auto_cleanup = False remove_javascript = True - use_embedded_content = False + use_embedded_content = False no_stylesheets = True description = 'http://www.am730.com.hk' category = 'Chinese, News, Hong Kong' masthead_url = 'http://www.am730.com.hk/images/logo.jpg' - - extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px; max-height:90%;} div[id=articleHeader] {font-size:200%; text-align:left; font-weight:bold;} photocaption {font-size:50%; margin-left:auto; margin-right:auto;}' - keep_only_tags = [dict(name='div', attrs={'id':'articleHeader'}), - dict(name='div', attrs={'class':'thecontent wordsnap'}), - dict(name='a', attrs={'class':'lightboximg'})] - remove_tags = [dict(name='img', attrs={'src':'/images/am730_article_logo.jpg'}), - dict(name='img', attrs={'src':'/images/am_endmark.gif'})] + extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 20px; margin-bottom: 20px; max-height:70%;} div[id=articleHeader] {font-size:200%; text-align:left; font-weight:bold;} li {font-size:50%; margin-left:auto; margin-right:auto;}' + keep_only_tags = [dict(name='h2', attrs={'class':'printTopic'}), + dict(name='div', attrs={'id':'article_content'}), + dict(name='div', attrs={'id':'slider'})] + remove_tags = [dict(name='img', attrs={'src':'images/am730_article_logo.jpg'}), + dict(name='img', attrs={'src':'images/am_endmark.gif'})] def get_dtlocal(self): dt_utc = datetime.datetime.utcnow() @@ -84,6 +83,16 @@ class AppleDaily(BasicNewsRecipe): def get_weekday(self): return self.get_dtlocal().weekday() + def get_cover_url(self): + soup = self.index_to_soup('http://www.am730.com.hk') + cover = 'http://www.am730.com.hk/' + soup.find(attrs={'id':'mini_news_img'}).find('img').get('src', False) + br = BasicNewsRecipe.get_browser(self) + try: + br.open(cover) + except: + cover = None + return cover + def populate_article_metadata(self, article, soup, first): if first and hasattr(self, 'add_toc_thumbnail'): picdiv = soup.find('img') @@ -93,48 +102,17 @@ class AppleDaily(BasicNewsRecipe): def parse_index(self): feeds = [] soup = self.index_to_soup('http://www.am730.com.hk/') - ul = soup.find(attrs={'class':'nav-section'}) - sectionList = [] - for li in ul.findAll('li'): - a = 'http://www.am730.com.hk/' + li.find('a', href=True).get('href', False) - title = li.find('a').get('title', False).strip() - sectionList.append((title, a)) - for title, url in sectionList: - articles = self.parse_section(url) - if articles: - feeds.append((title, articles)) + optgroups = soup.findAll('optgroup') + for optgroup in optgroups: + sectitle = optgroup.get('label') + articles = [] + for option in optgroup.findAll('option'): + articlelink = "http://www.am730.com.hk/" + option.get('value') + title = option.string + articles.append({'title': title, 'url': articlelink}) + feeds.append((sectitle, articles)) return feeds - def parse_section(self, url): - soup = self.index_to_soup(url) - items = soup.findAll(attrs={'style':'padding-bottom: 15px;'}) - current_articles = [] - for item in items: - a = item.find(attrs={'class':'t6 f14'}).find('a', href=True) - articlelink = 'http://www.am730.com.hk/' + a.get('href', True) - title = self.tag_to_string(a) - description = self.tag_to_string(item.find(attrs={'class':'t3 f14'})) - current_articles.append({'title': title, 'url': articlelink, 'description': description}) - return current_articles - - def preprocess_html(self, soup): - multia = soup.findAll('a') - for a in multia: - if not (a == None): - image = a.find('img') - if not (image == None): - if __HiResImg__: - image['src'] = image.get('src').replace('/thumbs/', '/') - caption = image.get('alt') - tag = Tag(soup, "photo", []) - tag2 = Tag(soup, "photocaption", []) - tag.insert(0, image) - if not caption == None: - tag2.insert(0, caption) - tag.insert(1, tag2) - a.replaceWith(tag) - return soup - def create_opf(self, feeds, dir=None): if dir is None: dir = self.output_dir @@ -288,3 +266,4 @@ class AppleDaily(BasicNewsRecipe): with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file): opf.render(opf_file, ncx_file) + diff --git a/recipes/anandtech.recipe b/recipes/anandtech.recipe index b9aa4520db..6d5d6b4716 100644 --- a/recipes/anandtech.recipe +++ b/recipes/anandtech.recipe @@ -12,26 +12,30 @@ class anan(BasicNewsRecipe): title = 'Anandtech' description = 'comprehensive Hardware Tests' - __author__ = 'Oliver Niesner' # 2012-09-20 AGE: update + __author__ = 'Oliver Niesner, Armin Geller' # 2013-09-07 AGE: update use_embedded_content = False language = 'en' timefmt = ' [%d %b %Y]' - oldest_article = 7 # 2012-09-20 AGE: update + oldest_article = 7 max_articles_per_feed = 40 no_stylesheets = True remove_javascript = True encoding = 'utf-8' - cover_url = 'http://www.anandtech.com/content/images/globals/header_logo.png' # 2012-09-20 AGE: new - masthead_url = 'http://www.anandtech.com/content/images/globals/printheader.png' # 2012-09-20 AGE: update + cover_url = 'http://www.anandtech.com/content/images/globals/header_logo.png' + masthead_url = 'http://www.anandtech.com/content/images/globals/printheader.png' + keep_only_tags = [ + dict(name='section', attrs={'class':['main_cont']}), + ] + remove_tags=[ # 2013-09-07 AGE: update + dict(name='div', attrs={'class':['print', # logo + 'breadcrumb_area noprint', + 'fl-rt noprint', + 'blog_top_right',]}) + ] - remove_tags=[ - dict(name='a', attrs={'class': 'bluebutton noprint'}), - dict(name='img', attrs={'alt': 'header'}), - ] # 2012-09-20 AGE: update - - feeds = [ ('Anandtech', 'http://www.anandtech.com/rss/')] + feeds = [('Anandtech', 'http://www.anandtech.com/rss/')] def print_version(self,url): - return url.replace('0Cshow0C', '0Cprint0C') # 2012-09-20 AGE: update \ No newline at end of file + return url.replace("0Cshow0C", "0Cprint0C") # 2013-09-07 AGE: update diff --git a/recipes/antyweb.recipe b/recipes/antyweb.recipe index b7d3d2583c..d85ed4adcc 100644 --- a/recipes/antyweb.recipe +++ b/recipes/antyweb.recipe @@ -21,21 +21,9 @@ class AntywebRecipe(BasicNewsRecipe): simultaneous_downloads = 3 keep_only_tags =[] - keep_only_tags.append(dict(name = 'h1', attrs = { 'class' : 'mm-article-title'})) - keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'mm-article-content'})) - - - remove_tags =[] - remove_tags.append(dict(name = 'h2', attrs = {'class' : 'widgettitle'})) - remove_tags.append(dict(name = 'img', attrs = {'class' : 'alignleft'})) - remove_tags.append(dict(name = 'div', attrs = {'class' : 'float: right;margin-left:1em;margin-bottom: 0.5em;padding-bottom: 3px; width: 72px;'})) - remove_tags.append(dict(name = 'img', attrs = {'src' : 'http://antyweb.pl/wp-content/uploads/2011/09/HOSTERSI_testy_pasek600x30.gif'})) - remove_tags.append(dict(name = 'div', attrs = {'class' : 'podwpisowe'})) - - - extra_css = ''' - body {font-family: verdana, arial, helvetica, geneva, sans-serif ;} - ''' + keep_only_tags.append(dict(name = 'h1', attrs = { 'class' : 'entry-title '})) + keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'news-content'})) + extra_css = '''body {font-family: verdana, arial, helvetica, geneva, sans-serif ;}''' feeds = [ (u'Artykuly', u'feed://feeds.feedburner.com/Antyweb?format=xml'), diff --git a/recipes/blind_buch_de.recipe b/recipes/blind_buch_de.recipe new file mode 100644 index 0000000000..9ec24fb5d2 --- /dev/null +++ b/recipes/blind_buch_de.recipe @@ -0,0 +1,63 @@ +# +# Written: July 2013 +# Last Edited: 2013-07-11 +# Version: 1.0 +# Last update: 2013-07-25 +# + +__license__ = 'GPL v3' +__copyright__ = '2013, Armin Geller' + +''' +Fetch blindenbuch.de +''' + +from calibre.web.feeds.recipes import BasicNewsRecipe +class AdvancedUserRecipe1303841067(BasicNewsRecipe): + + title = u'Blindbuch - Bücher neu entdecken' + __author__ = 'Armin Geller' # AGe 2013-07-11 + description = u'Bücher blind präsentiert' + publisher = 'blindbuch.de' + publication_type = 'ebook news' + tags = 'Bücher, Literatur, E-Books, Germany' + timefmt = ' [%a, %d %b %Y]' + publication_type = 'Feed' + language = 'de-DE' + encoding = 'utf-8' + + oldest_article = 14 + max_articles_per_feed = 100 + + no_stylesheets = True + use_embedded_content = False + remove_javascript = True + + conversion_options = {'title' : title, + 'comments' : description, + 'tags' : tags, + 'language' : language, + 'publisher' : publisher, + 'authors' : publisher, + } + + cover_url = 'http://blindbuch.de/img/blindbuch_calibre.png' + masthead_url = 'http://www.blindbuch.de/img/Masterhead.JPG' + + extra_css = ''' + h1{font-weight:bold;font-size:large;} + .post-meta {font-size: 1em;text-align: left; font-style: italic} + ''' + + keep_only_tags = [ + dict(name='article') + ] + + remove_tags = [ + dict(name='div', attrs={'class':['su-spoiler su-spoiler-style-1','post-comments comments',]}), + dict(name='span', attrs={'class':['post-comments comments',]}), + dict(name='div', attrs={'addthis':['title',]}), + ] + + feeds = [(u'Blindbuch', u'http://www.blindbuch.de/feed/')] + diff --git a/recipes/caravan_magazine.recipe b/recipes/caravan_magazine.recipe new file mode 100644 index 0000000000..e6ed872b8b --- /dev/null +++ b/recipes/caravan_magazine.recipe @@ -0,0 +1,92 @@ +import html5lib +from lxml import etree +from calibre.web.feeds.recipes import BasicNewsRecipe +from calibre.utils.cleantext import clean_xml_chars + +def is_title(tag): + return tag.name == 'h2' and tag.parent.name == 'div' and tag.parent['class'] == 'left-corner' + +class CaravanMagazine(BasicNewsRecipe): + + title = 'Caravan Magazine' + __author__ = 'Kovid Goyal' + description = 'An Indian Journal of politics and culture' + language = 'en_IN' + timefmt = ' [%b, %Y]' + + no_stylesheets = True + + keep_only_tags = [ + dict(name=is_title), + dict(attrs={'class':['subhheading', 'authorndate', 'full-image-view', 'fullpage-body']}), + ] + remove_tags = [ + dict(attrs={'class':['share-with']}), + dict(attrs={'class':lambda x: x and 'thumb-image-view' in x}), + ] + + def preprocess_raw_html(self, raw_html, url): + root = html5lib.parse( + clean_xml_chars(raw_html), treebuilder='lxml', + namespaceHTMLElements=False) + for s in root.xpath('//script'): + s.getparent().remove(s) + return etree.tostring(root, encoding=unicode) + + def preprocess_html(self, soup): + # Handle the image thumbnails + for div in soup.findAll('div', attrs={'class':lambda x: x and x.startswith('show-image')}): + if div['class'] == 'show-image': + div.extract() + else: + div['style'] = 'page-break-inside:avoid' + + return soup + + # To parse artice toc + def parse_index(self): + raw = self.index_to_soup( + 'http://caravanmagazine.in/current-issue', raw=True) + raw = raw.decode('utf-8') + raw = self.preprocess_raw_html(raw, None) + soup = self.index_to_soup(raw) + + a = soup.find('a', rel=lambda x:x and '[field_c_issues_image]' in x) + if a is not None: + self.cover_url = a['href'] + + ci = soup.find(attrs={'class': 'current-issue-block'}) + current_section = 'Section' + current_articles = [] + feeds = [] + for div in ci.findAll( + attrs={'class': ['view-header', 'view-content']}): + if div['class'] == 'view-header': + if current_articles: + feeds.append((current_section, current_articles)) + current_section = self.tag_to_string(div).replace('paging_filter', '') + current_articles = [] + self.log('Section:', current_section) + else: + for art in div.findAll('div', attrs={'class': lambda x: x and 'views-row' in x.split()}): + title = div.find(attrs={'class': 'views-field-title'}) + if title is not None: + a = title.find('a', href=True) + if a is not None: + href = a['href'] + if href.startswith('/'): + href = 'http://caravanmagazine.in' + href + article = { + 'title': self.tag_to_string(title), 'url': href} + title.extract() + desc = self.tag_to_string(div).strip() + if desc: + article['description'] = desc + current_articles.append(article) + self.log('\t' + article['title']) + self.log('\t\t' + article['url']) + + if current_articles: + feeds.append((current_section, current_articles)) + + return feeds diff --git a/recipes/carta.recipe b/recipes/carta.recipe index b63ab0aa6d..7f2a9e1e19 100644 --- a/recipes/carta.recipe +++ b/recipes/carta.recipe @@ -12,7 +12,7 @@ class Carta(BasicNewsRecipe): title = u'Carta' description = 'News about electronic publishing' - __author__ = 'Oliver Niesner' + __author__ = 'Oliver Niesner' # AGe Update 2013-10-13 use_embedded_content = False timefmt = ' [%a %d %b %Y]' oldest_article = 7 @@ -25,7 +25,7 @@ class Carta(BasicNewsRecipe): - remove_tags_after = [dict(name='p', attrs={'class':'tags-blog'})] + remove_tags_after = [dict(name='div', attrs={'id':'BlogContent'})] # AGe remove_tags = [dict(name='p', attrs={'class':'print'}), dict(name='p', attrs={'class':'tags-blog'}), diff --git a/recipes/carta_capital.recipe b/recipes/carta_capital.recipe index 8bd21046b1..ba13856e16 100644 --- a/recipes/carta_capital.recipe +++ b/recipes/carta_capital.recipe @@ -1,23 +1,29 @@ +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai +from __future__ import (unicode_literals, division, absolute_import, + print_function) + from calibre.web.feeds.news import BasicNewsRecipe -class AdvancedUserRecipe1312361378(BasicNewsRecipe): - title = u'Carta capital' - __author__ = 'Pablo Aldama' +class AdvancedUserRecipe1380852962(BasicNewsRecipe): + title = u'Carta Capital' + __author__ = 'Erico Lisboa' language = 'pt_BR' - oldest_article = 9 + oldest_article = 15 max_articles_per_feed = 100 + auto_cleanup = True + use_embedded_content = False - feeds = [(u'Politica', u'http://www.cartacapital.com.br/category/politica/feed') - ,(u'Economia', u'http://www.cartacapital.com.br/category/economia/feed') - ,(u'Cultura', u'http://www.cartacapital.com.br/category/cultura/feed') - ,(u'Internacional', u'http://www.cartacapital.com.br/category/internacional/feed') - ,(u'Saude', u'http://www.cartacapital.com.br/category/saude/feed') - ,(u'Sociedade', u'http://www.cartacapital.com.br/category/sociedade/feed') - ,(u'Tecnologia', u'http://www.cartacapital.com.br/category/tecnologia/feed') - ,(u'Carta na escola', u'http://www.cartacapital.com.br/category/carta-na-escola/feed') - ,(u'Carta fundamental', u'http://www.cartacapital.com.br/category/carta-fundamental/feed') - ,(u'Carta verde', u'http://www.cartacapital.com.br/category/carta-verde/feed') - -] - def print_version(self, url): - return url + '/print' + feeds = [(u'Pol\xedtica', +u'http://www.cartacapital.com.br/politica/politica/rss'), (u'Economia', +u'http://www.cartacapital.com.br/economia/economia/atom.xml'), +(u'Sociedade', +u'http://www.cartacapital.com.br/sociedade/sociedade/atom.xml'), +(u'Internacional', +u'http://www.cartacapital.com.br/internacional/internacional/atom.xml'), +(u'Tecnologia', +u'http://www.cartacapital.com.br/tecnologia/tecnologia/atom.xml'), +(u'Cultura', +u'http://www.cartacapital.com.br/cultura/cultura/atom.xml'), +(u'Sa\xfade', u'http://www.cartacapital.com.br/saude/saude/atom.xml'), +(u'Educa\xe7\xe3o', +u'http://www.cartacapital.com.br/educacao/educacao/atom.xml')] diff --git a/recipes/consumerist.recipe b/recipes/consumerist.recipe index 3d8056e49d..2050185180 100644 --- a/recipes/consumerist.recipe +++ b/recipes/consumerist.recipe @@ -1,3 +1,5 @@ +## Last Edit: 2013-08-23 +## From: Armin Geller __license__ = 'GPL v3' __copyright__ = '2010, NA' ''' @@ -18,33 +20,30 @@ class Consumerist(BasicNewsRecipe): encoding = 'utf-8' use_embedded_content = False language = 'en' - masthead_url = 'http://consumerist.com/css/images/footer_man.gif' + masthead_url = 'http://consumermediallc.files.wordpress.com/2013/02/consumerist.png'# AGe 2013-08-23 + extra_css = ''' - body{font-family: "Lucida Grande",Helvetica,Arial,sans-serif} - img{margin-bottom: 1em} - h1{font-family :Arial,Helvetica,sans-serif; font-size:x-large} - h2{font-family :Arial,Helvetica,sans-serif; font-size:large} - ''' + body{font-family: "Lucida Grande",Helvetica,Arial,sans-serif} + img{margin-bottom: 1em} + h1{font-family :Arial,Helvetica,sans-serif; font-size:x-large} + h2{font-family :Arial,Helvetica,sans-serif; font-size:large} + ''' conversion_options = { - 'comment' : description - , 'tags' : category - , 'publisher' : publisher - , 'language' : language - } + 'comment' : description, + 'tags' : category, + 'publisher' : publisher, + 'language' : language, + } remove_attributes = ['width','height'] - #keep_only_tags = [dict(attrs={'class':['', 'category-breadcrumb']}),] - remove_tags_before = dict(name='h2') - remove_tags = [ - #dict(name='iframe'), - dict(name='div', attrs={'class':['e-comments', 'more-about', 'entry-tags']}), - #dict(name='div', attrs={'id':['IEContainer', 'clickIncludeBox']}), - #dict(name='ul', attrs={'class':'article-tools'}), - #dict(name='ul', attrs={'class':'articleTools'}), - ] + keep_only_tags = dict(name='div', attrs={'class':['hfeed',]}) # AGe 2013-08-23 - remove_tags_after = dict(attrs={'class':'e-body'}) + remove_tags = [dict(name='div', attrs={'class':['navigation', # AGe 2013-08-23 + 'wpcom-related-posts widget widget_related_posts', # AGe 2013-08-23 + 'sharedaddy sd-like-enabled sd-sharing-enabled',]}), # AGe 2013-08-23 + dict(name='div', attrs={'id':['comments',]}), # AGe 2013-08-23 + ] feeds = [(u'Articles', u'http://consumerist.com/index.xml')] diff --git a/recipes/countryfile.recipe b/recipes/countryfile.recipe index 717f81afce..f0680b5434 100644 --- a/recipes/countryfile.recipe +++ b/recipes/countryfile.recipe @@ -20,27 +20,22 @@ class AdvancedUserRecipe1325006965(BasicNewsRecipe): #article_already_exists = False #feed_hash = '' def get_cover_url(self): - soup = self.index_to_soup('http://www.countryfile.com/magazine') - cov = soup.find(attrs={'class' : re.compile('imagecache imagecache-250px_wide')})#'width' : '160', - print '&&&&&&&& ',cov,' ***' - cov=str(cov) - #cov2 = re.findall('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', cov) - cov2 = re.findall('/(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', cov) + soup = self.index_to_soup('http://www.countryfile.com/magazine') + cov = soup.find(attrs={'class' : re.compile('imagecache imagecache-250px')}) # 'width' : '160', - cov2 = str(cov2) - cov2= "http://www.countryfile.com"+cov2[2:len(cov2)-8] - - print '******** ',cov2,' ***' - # try to get cover - if can't get known cover - br = browser() - - br.set_handle_redirect(False) - try: - br.open_novisit(cov2) - cover_url = cov2 - except: - cover_url = 'http://www.countryfile.com/sites/default/files/imagecache/160px_wide/cover/2_1.jpg' - return cover_url + cov=str(cov) + cov=cov[10:] + cov=cov[:-135] + br = browser() + br.set_handle_redirect(False) + try: + br.open_novisit(cov) + cover_url = cov + except: + cover_url = 'http://www.countryfile.com/sites/default/files/imagecache/160px_wide/cover/2_1.jpg' + return cover_url + preprocess_regexps = [ + (re.compile(r' \| Countryfile.com', re.IGNORECASE | re.DOTALL), lambda match: '')] remove_tags = [ # dict(attrs={'class' : ['player']}), @@ -48,6 +43,5 @@ class AdvancedUserRecipe1325006965(BasicNewsRecipe): feeds = [ (u'Homepage', u'http://www.countryfile.com/rss/home'), (u'Country News', u'http://www.countryfile.com/rss/news'), - (u'Countryside', u'http://www.countryfile.com/rss/countryside'), + (u'Countryside', u'http://www.countryfile.com/rss/countryside'), ] - diff --git a/recipes/cracked_com.recipe b/recipes/cracked_com.recipe index 829299ae17..5a024f7b23 100644 --- a/recipes/cracked_com.recipe +++ b/recipes/cracked_com.recipe @@ -1,63 +1,51 @@ from calibre.web.feeds.news import BasicNewsRecipe -class Cracked(BasicNewsRecipe): - title = u'Cracked.com' - __author__ = 'UnWeave' - language = 'en' - description = "America's Only HumorSite since 1958" - publisher = 'Cracked' - category = 'comedy, lists' - oldest_article = 3 #days - max_articles_per_feed = 100 - no_stylesheets = True - encoding = 'ascii' - remove_javascript = True - use_embedded_content = False - feeds = [ (u'Articles', u'http://feeds.feedburner.com/CrackedRSS/') ] +class Cracked(BasicNewsRecipe): + title = u'Cracked.com' + __author__ = 'UnWeave' + language = 'en' + description = "America's Only HumorSite since 1958" + publisher = 'Cracked' + category = 'comedy, lists' + oldest_article = 3 # days + max_articles_per_feed = 100 + no_stylesheets = True + encoding = 'ascii' + remove_javascript = True + use_embedded_content = False + # auto_cleanup = True + + feeds = [(u'Articles', u'http://feeds.feedburner.com/CrackedRSS/')] conversion_options = { - 'comment' : description - , 'tags' : category - , 'publisher' : publisher - , 'language' : language - } + 'comment': description, 'tags': category, 'publisher': publisher, 'language': language + } - remove_tags_before = dict(id='PrimaryContent') + keep_only_tags = [dict(name='article', attrs={'class': 'module article dropShadowBottomCurved'}), + dict(name='article', attrs={'class': 'module blog dropShadowBottomCurved'})] - remove_tags_after = dict(name='div', attrs={'class':'shareBar'}) - - remove_tags = [ dict(name='div', attrs={'class':['social', - 'FacebookLike', - 'shareBar' - ]}), - - dict(name='div', attrs={'id':['inline-share-buttons', - ]}), - - dict(name='span', attrs={'class':['views', - 'KonaFilter' - ]}), - #dict(name='img'), - ] + remove_tags = [ + dict(name='section', attrs={'class': ['socialTools', 'quickFixModule']})] def appendPage(self, soup, appendTag, position): # Check if article has multiple pages - pageNav = soup.find('nav', attrs={'class':'PaginationContent'}) + pageNav = soup.find('nav', attrs={'class': 'PaginationContent'}) if pageNav: # Check not at last page - nextPage = pageNav.find('a', attrs={'class':'next'}) + nextPage = pageNav.find('a', attrs={'class': 'next'}) if nextPage: nextPageURL = nextPage['href'] nextPageSoup = self.index_to_soup(nextPageURL) # 8th
tag contains article content - nextPageContent = nextPageSoup.findAll('section')[7] + nextPageContent = nextPageSoup.findAll('article')[0] newPosition = len(nextPageContent.contents) - self.appendPage(nextPageSoup,nextPageContent,newPosition) + self.appendPage(nextPageSoup, nextPageContent, newPosition) nextPageContent.extract() pageNav.extract() - appendTag.insert(position,nextPageContent) + appendTag.insert(position, nextPageContent) def preprocess_html(self, soup): self.appendPage(soup, soup.body, 3) return soup + diff --git a/recipes/daily_express.recipe b/recipes/daily_express.recipe new file mode 100644 index 0000000000..dc55d3f0b8 --- /dev/null +++ b/recipes/daily_express.recipe @@ -0,0 +1,88 @@ +import re +from calibre.web.feeds.news import BasicNewsRecipe +class AdvancedUserRecipe1376229553(BasicNewsRecipe): + title = u'Daily Express' + __author__ = 'Dave Asbury' + # 9-9-13 added article author and now use (re.compile(r'>[\w].+? News<' + encoding = 'utf-8' + remove_empty_feeds = True + #remove_javascript = True + no_stylesheets = True + oldest_article = 1 + max_articles_per_feed = 10 + #auto_cleanup = True + compress_news_images = True + compress_news_images_max_size = 30 + ignore_duplicate_articles = {'title', 'url'} + masthead_url = 'http://cdn.images.dailyexpress.co.uk/img/page/express_logo.png' + + preprocess_regexps = [ + + (re.compile(r'widget', re.IGNORECASE | re.DOTALL), lambda match: ''), + (re.compile(r'Related articles', re.IGNORECASE | re.DOTALL), lambda match: ''), + (re.compile(r'Add Your Comment<', re.IGNORECASE | re.DOTALL), lambda match: '<'), + (re.compile(r'>More [\w].+?<', re.IGNORECASE), lambda match: '><'), + (re.compile(r'>[\w].+? News<', re.IGNORECASE), lambda match: '><'), + #(re.compile(r'Health News<', re.IGNORECASE | re.DOTALL), lambda match: '<'), + #(re.compile(r'Car News<', re.IGNORECASE | re.DOTALL), lambda match: '<'), + #(re.compile(r'TV & Radio News<', re.IGNORECASE | re.DOTALL), lambda match: '<'), + #(re.compile(r'Food & Recipe News<', re.IGNORECASE | re.DOTALL), lambda match: '<'), + #(re.compile(r'More City & Business<', re.IGNORECASE | re.DOTALL), lambda match: '<'), + #(re.compile(r'Travel News<', re.IGNORECASE | re.DOTALL), lambda match: '<'), + #(re.compile(r'Garden News<', re.IGNORECASE | re.DOTALL), lambda match: '<'), + #(re.compile(r'Fashion & Beauty News<', re.IGNORECASE | re.DOTALL), lambda match: '<'), + #(re.compile(r'More Personal Finance<', re.IGNORECASE | re.DOTALL), lambda match: '<'), + #(re.compile(r'

More UK

', re.IGNORECASE | re.DOTALL), lambda match: ''), + + ] + + remove_tags = [ + dict(attrs={'class' : 'quote'}), + #dict(attrs={'class' : 'author'}), + dict(name='footer'), + dict(attrs={'id' : 'header_addons'}), + dict(attrs={'class' : 'hoverException'}), + dict(name='_li'),dict(name='li'), + dict(attrs={'class' : 'box related-articles clear'}), + dict(attrs={'class' : 'news-list'}), + dict(attrs={'class' : 'sponsored-section'}), + dict(attrs={'class' : 'pull-quote on-right'}), + dict(attrs={'class' : 'pull-quote on-left'}), + + ] + keep_only_tags = [ + dict(name='h1'), + dict(attrs={'class' : 'publish-info'}), + dict(name='h3', limit=2), + dict(attrs={'class' : 'clearfix hR new-style'}), + ] + + feeds = [(u'UK News', u'http://www.express.co.uk/posts/rss/1/uk'), + (u'World News',u'http://www.express.co.uk/posts/rss/78/world'), + (u'Finance',u'http://www.express.co.uk/posts/rss/21/finance'), + (u'Sport',u'http://www.express.co.uk/posts/rss/65/sport'), + (u'Entertainment',u'http://www.express.co.uk/posts/rss/18/entertainment'), + (u'Lifestyle',u'http://www.express.co.uk/posts/rss/8/life&style'), + (u'Fun',u'http://www.express.co.uk/posts/rss/110/fun'), + ] + + def get_cover_url(self): + soup = self.index_to_soup('http://www.express.co.uk/ourpaper/') + cov = soup.find(attrs={'src' : re.compile('http://images.dailyexpress.co.uk/img/covers/')}) + cov=str(cov) + cov2 = re.findall('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', cov) + + cov=str(cov2) + cov=cov[2:len(cov)-2] + cover_url=cov + return cover_url + + extra_css = ''' + h1{font-weight:bold;font-size:175%;} + h2{font-weight:normal;font-size:75%;} + #p{font-size:14px;} + #body{font-size:14px;} + .photo-caption {display: block;margin-left: auto;margin-right: auto;width:100%;font-size:40%;} + .publish-info {font-size:50%;} + .photo img {display: block;margin-left: auto;margin-right: auto;width:100%;} + ''' diff --git a/recipes/daily_mirror.recipe b/recipes/daily_mirror.recipe index 91a274ab8c..d8cb6e024e 100644 --- a/recipes/daily_mirror.recipe +++ b/recipes/daily_mirror.recipe @@ -7,50 +7,50 @@ class AdvancedUserRecipe1306061239(BasicNewsRecipe): description = 'News as provided by The Daily Mirror -UK' __author__ = 'Dave Asbury' - # last updated 19/10/12 + # last updated 27/8/13 language = 'en_GB' #cover_url = 'http://yookeo.com/screens/m/i/mirror.co.uk.jpg' masthead_url = 'http://www.nmauk.co.uk/nma/images/daily_mirror.gif' - + #recursions = 10 compress_news_images = True - oldest_article = 1 - max_articles_per_feed = 12 + compress_news_images_max_size = 30 + oldest_article = 1.5 + max_articles_per_feed = 10 remove_empty_feeds = True remove_javascript = True no_stylesheets = True - ignore_duplicate_articles = {'title'} + ignore_duplicate_articles = {'url'} - # auto_cleanup = True + #auto_cleanup = True #conversion_options = { 'linearize_tables' : True } - - - keep_only_tags = [ dict(name='h1'), + keep_only_tags = [dict(name='h1'), dict(name='div',attrs={'class' : 'lead-text'}), - dict(name='div',attrs={'class' : 'styleGroup clearfix'}), + dict(attrs={'class' : 'tools clearfix'}), dict(name='div',attrs={'class' : 'widget relatedContents pictures widget-editable viziwyg-section-245 inpage-widget-158123'}), # dict(name='figure',attrs={'class' : 'clearfix'}), dict(name='div',attrs={'class' :'body '}), - + dict(name='div',attrs={'class' :'thumb'}), + dict(attrs={'img alt' : ['Perishers','Horace']}), + #dict(attrs={'class' : 'tmRow span-15-5 col-1 article-page'}), #dict(attrs={'class' : ['article-attr','byline append-1','published']}), - #dict(name='p'), + # dict(name='p'), ] - remove_tags = [ - dict(attrs={'class' : ['article sa-teaser type-opinion','image-gallery','gallery-caption']}), + dict(attrs={'class' : ['article sa-teaser type-opinion','last','gallery-caption','gallery-data','ir btn-fullscreen','avatar']}), # ,'image-gallery' dict(attrs={'class' : 'comment'}), dict(name='title'), dict(name='ul',attrs={'class' : 'clearfix breadcrumbs '}), dict(name='ul',attrs={'id' : 'login-201109171215'}), - dict(name='div',attrs={'class' : ['inline-ad span-16 last','caption']}),#'widget navigation breadcrumb widget-editable viziwyg-section-198 inpage-widget-80721 span-17','image-credit' + #'widget navigation breadcrumb widget-editable viziwyg-section-198 inpage-widget-80721 span-17','image-credit' + dict(name='div',attrs={'class' : ['inline-ad span-16 last','caption']}), ] preprocess_regexps = [ (re.compile(r'- mirror.co.uk', re.IGNORECASE | re.DOTALL), lambda match: '')] - feeds = [ (u'News',u'http://www.mirror.co.uk/news/rss.xml'), (u'Sports',u'http://www.mirror.co.uk/sport/rss.xml'), @@ -63,26 +63,31 @@ class AdvancedUserRecipe1306061239(BasicNewsRecipe): # example of commented out feed not needed ,(u'Travel','http://www.mirror.co.uk/advice/travel/rss.xml') ] extra_css = ''' - h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;} - h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;} - p{font-family:Arial,Helvetica,sans-serif;font-size:small;} - body{font-family:Helvetica,Arial,sans-serif;font-size:small;} - ''' + h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:170%;} + .article figure figcaption {display: block;margin-left: auto;margin-right: auto; + width:100%;font-family:Arial,Helvetica,sans-serif;font-size:40%;} + + #h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;} + p{font-family:Arial,Helvetica,sans-serif;} + body{font-family:Helvetica,Arial,sans-serif;} + .article figure{display: block;margin-left: auto;margin-right: auto;width:100%;} + .lead-text p {font-size:150%} + ''' def get_cover_url(self): soup = self.index_to_soup('http://www.politicshome.com/uk/latest_frontpage.html') -# look for the block containing the mirror button and url + # look for the block containing the mirror button and url cov = soup.find(attrs={'style' : 'background-image: url(http://www.politicshome.com/images/sources/source_frontpage_button_92.gif);'}) cov2 = str(cov) cov2='http://www.politicshome.com'+cov2[9:-142] -#cov2 now contains url of the page containing pic + # cov2 now contains url of the page containing pic soup = self.index_to_soup(cov2) cov = soup.find(attrs={'id' : 'large'}) cov=str(cov) cov2 = re.findall('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', cov) cov2 = str(cov2) cov2=cov2[2:len(cov2)-2] - #cov2 now is pic url, now go back to original function + # cov2 now is pic url, now go back to original function br = browser() br.set_handle_redirect(False) try: diff --git a/recipes/democracy_now.recipe b/recipes/democracy_now.recipe new file mode 100644 index 0000000000..f7868c19dd --- /dev/null +++ b/recipes/democracy_now.recipe @@ -0,0 +1,45 @@ +# vim:fileencoding=utf-8 +from calibre.web.feeds.news import BasicNewsRecipe + +class DemocracyNowRecipe(BasicNewsRecipe): + title = u'Democracy now!' + __author__ = u'Antoine Beaupré' + description = 'A daily TV/radio news program, hosted by Amy Goodman and Juan Gonzalez, airing on over 1,100 stations, pioneering the largest community media collaboration in the United States.' # noqa + language = 'en' + cover_url = 'http://www.democracynow.org/images/dn-logo-for-podcast.png' + + oldest_article = 1 + max_articles_per_feed = 10 + publication_type = 'magazine' + + auto_cleanup = False + use_embedded_content = False + no_stylesheets = True + remove_javascript = True + + feeds = [ + (u'Daily news', u'http://www.democracynow.org/democracynow.rss')] + + keep_only_tags = [dict(name='div', attrs={'id': 'page'}), ] + remove_tags = [dict(name='div', attrs={'id': 'topics_list'}), + dict(name='div', attrs={'id': 'header'}), + dict(name='div', attrs={'id': 'footer'}), + dict(name='div', attrs={'id': 'right'}), + dict(name='div', attrs={'id': 'left-panel'}), + dict(name='div', attrs={'id': 'top-video-content'}), + dict(name='div', attrs={'id': 'google-news-date'}), + dict(name='div', attrs={'id': 'story-donate'}), + dict( + name='div', attrs={'id': 'transcript-expand-collapse'}), + dict(name='span', attrs={'class': 'show-links'}), + dict(name='span', attrs={'class': 'storyNav'}), + dict(name='div', attrs={'class': 'headline_share'}), + dict(name='div', attrs={'class': 'mediaBar'}), + dict(name='div', attrs={'class': 'shareAndPrinterBar'}), + dict(name='div', attrs={'class': 'utility-navigation'}), + dict(name='div', attrs={'class': 'bottomContentNav'}), + dict(name='div', attrs={'class': 'recentShows'}), + dict( + name='div', attrs={'class': 'printer-and-transcript-links'}), + ] + diff --git a/recipes/diagonales.recipe b/recipes/diagonales.recipe index eff06efc1d..65542697b5 100644 --- a/recipes/diagonales.recipe +++ b/recipes/diagonales.recipe @@ -1,72 +1,50 @@ -#!/usr/bin/env python - __license__ = 'GPL v3' -__copyright__ = '2009, Darko Miletic ' +__copyright__ = '2009-2013, Darko Miletic ' ''' -elargentino.com +diagonales.infonews.com ''' from calibre.web.feeds.news import BasicNewsRecipe -from calibre.ebooks.BeautifulSoup import Tag class Diagonales(BasicNewsRecipe): title = 'Diagonales' __author__ = 'Darko Miletic' - description = 'El nuevo diario de La Plata' - publisher = 'ElArgentino.com' + description = 'Para estar bien informado sobre los temas de actualidad. Conoce sobre pais, economia, deportes, mundo, espectaculos, sociedad, entrevistas y tecnologia.' + publisher = 'INFOFIN S.A.' category = 'news, politics, Argentina, La Plata' oldest_article = 2 max_articles_per_feed = 100 no_stylesheets = True use_embedded_content = False encoding = 'utf-8' - language = 'es_AR' - - lang = 'es-AR' - direction = 'ltr' - INDEX = 'http://www.elargentino.com/medios/122/Diagonales.html' + language = 'es_AR' + publication_type = 'newspaper' + delay = 1 + remove_empty_feeds = True extra_css = ' .titulo{font-size: x-large; font-weight: bold} .volantaImp{font-size: small; font-weight: bold} ' - html2lrf_options = [ - '--comment' , description - , '--category' , category - , '--publisher', publisher - ] - - html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0cm; margin-top: 0em; margin-bottom: 0.5em} "' + conversion_options = { + 'comment' : description + , 'tags' : category + , 'publisher' : publisher + , 'language' : language + } keep_only_tags = [dict(name='div', attrs={'class':'ContainerPop'})] + remove_tags = [dict(name='link')] - remove_tags = [dict(name='link')] - - feeds = [(u'Articulos', u'http://www.elargentino.com/Highlights.aspx?ParentType=Section&ParentId=122&Content-Type=text/xml&ChannelDesc=Diagonales')] + feeds = [ + (u'Pais' , u'http://diagonales.infonews.com/Highlights.aspx?ParentType=Section&ParentId=112&Content-Type=text/xml&ChannelDesc=Pa%C3%ADs') + ,(u'Deportes' , u'http://diagonales.infonews.com/Highlights.aspx?ParentType=Section&ParentId=106&Content-Type=text/xml&ChannelDesc=Deportes') + ,(u'Economia' , u'http://diagonales.infonews.com/Highlights.aspx?ParentType=Section&ParentId=107&Content-Type=text/xml&ChannelDesc=Econom%C3%ADa') + ,(u'Sociedad' , u'http://diagonales.infonews.com/Highlights.aspx?ParentType=Section&ParentId=109&Content-Type=text/xml&ChannelDesc=Sociedad') + ,(u'Mundo' , u'http://diagonales.infonews.com/Highlights.aspx?ParentType=Section&ParentId=113&Content-Type=text/xml&ChannelDesc=Mundo') + ,(u'Espectaculos', u'http://diagonales.infonews.com/Highlights.aspx?ParentType=Section&ParentId=114&Content-Type=text/xml&ChannelDesc=Espect%C3%A1culos') + ,(u'Entrevistas' , u'http://diagonales.infonews.com/Highlights.aspx?ParentType=Section&ParentId=115&Content-Type=text/xml&ChannelDesc=Entrevistas') + ,(u'Tecnologia' , u'http://diagonales.infonews.com/Highlights.aspx?ParentType=Section&ParentId=118&Content-Type=text/xml&ChannelDesc=Tecnolog%C3%ADa') + ] def print_version(self, url): main, sep, article_part = url.partition('/nota-') article_id, rsep, rrest = article_part.partition('-') - return u'http://www.elargentino.com/Impresion.aspx?Id=' + article_id - - def preprocess_html(self, soup): - for item in soup.findAll(style=True): - del item['style'] - soup.html['lang'] = self.lang - soup.html['dir' ] = self.direction - mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)]) - mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")]) - soup.head.insert(0,mlang) - soup.head.insert(1,mcharset) - return soup - - def get_cover_url(self): - cover_url = None - soup = self.index_to_soup(self.INDEX) - cover_item = soup.find('div',attrs={'class':'colder'}) - if cover_item: - clean_url = self.image_url_processor(None,cover_item.div.img['src']) - cover_url = 'http://www.elargentino.com' + clean_url + '&height=600' - return cover_url - - def image_url_processor(self, baseurl, url): - base, sep, rest = url.rpartition('?Id=') - img, sep2, rrest = rest.partition('&') - return base + sep + img + return u'http://diagonales.infonews.com/Impresion.aspx?Id=' + article_id diff --git a/recipes/diario_el_pueblo.recipe b/recipes/diario_el_pueblo.recipe new file mode 100644 index 0000000000..4cfab9eb32 --- /dev/null +++ b/recipes/diario_el_pueblo.recipe @@ -0,0 +1,51 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__author__ = '2013, Carlos Alves ' +''' +diarioelpueblo.com.uy +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class General(BasicNewsRecipe): + title = 'Diario El Pueblo' + __author__ = 'Carlos Alves' + description = 'Noticias de Salto - Uruguay' + tags = 'news, sports' + language = 'es_UY' + timefmt = '[%a, %d %b, %Y]' + use_embedded_content = False + recursion = 5 + encoding = 'utf8' + remove_javascript = True + no_stylesheets = True + + oldest_article = 2 + max_articles_per_feed = 100 + keep_only_tags = [dict(name='div', attrs={'class':'post-alt blog'})] + + remove_tags = [ + dict(name='div', attrs={'class':['hr', 'titlebar', 'volver-arriba-right','navigation']}), + dict(name='div', attrs={'id':'comment','id':'suckerfish','id':'crp_related'}), + dict(name='h3', attrs={'class':['post_date']}), + dict(name=['object','link']) + ] + + extra_css = ''' + h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;} + h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;} + h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;} + p {font-family:Arial,Helvetica,sans-serif;} + ''' + feeds = [ + (u'Articulos', u'http://www.diarioelpueblo.com.uy/feed') + ] + + def get_cover_url(self): + return 'http://www.diarioelpueblo.com.uy/wp-content/uploads/2013/06/Cabezal_Web1.jpg' + + def preprocess_html(self, soup): + for item in soup.findAll(style=True): + del item['style'] + return soup diff --git a/recipes/diario_salto.recipe b/recipes/diario_salto.recipe new file mode 100644 index 0000000000..799233db4d --- /dev/null +++ b/recipes/diario_salto.recipe @@ -0,0 +1,50 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__author__ = '2013, Carlos Alves ' +''' +diarisalto.com.uy +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class General(BasicNewsRecipe): + title = 'Diario Salto' + __author__ = 'Carlos Alves' + description = 'Noticias de Salto - Uruguay' + tags = 'news, sports' + language = 'es_UY' + timefmt = '[%a, %d %b, %Y]' + use_embedded_content = False + recursion = 5 + encoding = 'utf8' + remove_javascript = True + no_stylesheets = True + + oldest_article = 2 + max_articles_per_feed = 100 + keep_only_tags = [dict(name='div', attrs={'class':'post'})] + + remove_tags = [ + dict(name='div', attrs={'class':['hr', 'titlebar', 'navigation']}), + dict(name='div', attrs={'id':'comment'}), + dict(name=['object','link']) + ] + + extra_css = ''' + h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;} + h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;} + h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;} + p {font-family:Arial,Helvetica,sans-serif;} + ''' + feeds = [ + (u'Articulos', u'http://www.diariosalto.com.uy/feed/atom') + ] + + def get_cover_url(self): + return 'http://diariosalto.com.uy/demo/wp-content/uploads/2011/12/diario-salto_logo-final-b-b.png' + + def preprocess_html(self, soup): + for item in soup.findAll(style=True): + del item['style'] + return soup diff --git a/recipes/dilbert.recipe b/recipes/dilbert.recipe index ed2771debf..e98c9212a9 100644 --- a/recipes/dilbert.recipe +++ b/recipes/dilbert.recipe @@ -6,6 +6,7 @@ DrMerry added cover Image 2011-11-12 ''' from calibre.web.feeds.recipes import BasicNewsRecipe +from calibre.ebooks.BeautifulSoup import BeautifulSoup import re class DilbertBig(BasicNewsRecipe): @@ -16,7 +17,7 @@ class DilbertBig(BasicNewsRecipe): oldest_article = 15 max_articles_per_feed = 100 no_stylesheets = True - use_embedded_content = True + use_embedded_content = False encoding = 'utf-8' publisher = 'UNITED FEATURE SYNDICATE, INC.' category = 'comic' @@ -30,25 +31,14 @@ class DilbertBig(BasicNewsRecipe): ,'publisher' : publisher } - feeds = [(u'Dilbert', u'http://feed.dilbert.com/dilbert/daily_strip' )] - - def get_article_url(self, article): - return article.get('feedburner_origlink', None) + feeds = [(u'Dilbert', u'http://feed.dilbert.com/dilbert/daily_strip')] preprocess_regexps = [ (re.compile('strip\..*\.gif', re.DOTALL|re.IGNORECASE), lambda match: 'strip.zoom.gif') ] def preprocess_html(self, soup): - for tag in soup.findAll(name='a'): - if tag['href'].find('http://feedads') >= 0: - tag.extract() - return soup + for tag in soup.findAll(name='input'): + image = BeautifulSoup('') + return image - extra_css = ''' - h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;} - h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;} - img {max-width:100%; min-width:100%;} - p{font-family:Arial,Helvetica,sans-serif;font-size:small;} - body{font-family:Helvetica,Arial,sans-serif;font-size:small;} - ''' diff --git a/recipes/dilemaveche.recipe b/recipes/dilemaveche.recipe index 72920600f7..33e9a263b0 100644 --- a/recipes/dilemaveche.recipe +++ b/recipes/dilemaveche.recipe @@ -6,46 +6,87 @@ __copyright__ = u'2011, Silviu Cotoar\u0103' ''' dilemaveche.ro ''' - from calibre.web.feeds.news import BasicNewsRecipe class DilemaVeche(BasicNewsRecipe): - title = u'Dilema Veche' - __author__ = u'Silviu Cotoar\u0103' - description = 'Sint vechi, domnule! (I.L. Caragiale)' - publisher = u'Adev\u0103rul Holding' - oldest_article = 5 - language = 'ro' - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = False - category = 'Ziare' - encoding = 'utf-8' - cover_url = 'http://dilemaveche.ro/sites/all/themes/dilema/theme/dilema_two/layouter/dilema_two_homepage/logo.png' - - conversion_options = { - 'comments' : description - ,'tags' : category - ,'language' : language - ,'publisher' : publisher - } - - keep_only_tags = [ - dict(name='div', attrs={'class':'c_left_column'}) - ] - - remove_tags = [ - dict(name='div', attrs={'id':['adshop_widget_428x60']}) , - dict(name='div', attrs={'id':['gallery']}) - ] - - remove_tags_after = [ - dict(name='div', attrs={'id':['adshop_widget_428x60']}) - ] - - feeds = [ - (u'Feeds', u'http://dilemaveche.ro/rss.xml') + # apare vinerea, mai pe dupa-masa,depinde de Luiza cred (care se semneaza ca fiind creatorul fiecarui articol in feed-ul RSS) + title = u'Dilema Veche' + __author__ = 'song2' # inspirat din scriptul pentru Le Monde. Inspired from the Le Monde script + description = '"Sint vechi, domnule!" (I.L. Caragiale)' + publisher = 'Adevarul Holding' + oldest_article = 7 + max_articles_per_feed = 200 + encoding = 'utf8' + language = 'ro' + masthead_url = 'http://www.dilemaveche.ro/sites/all/themes/dilema/theme/dilema_two/layouter/dilema_two_homepage/logo.png' + publication_type = 'magazine' + feeds = [ + ('Editoriale si opinii - Situatiunea', 'http://www.dilemaveche.ro/taxonomy/term/37/0/feed'), + ('Editoriale si opinii - Pe ce lume traim', 'http://www.dilemaveche.ro/taxonomy/term/38/0/feed'), + ('Editoriale si opinii - Bordeie si obiceie', 'http://www.dilemaveche.ro/taxonomy/term/44/0/feed'), + ('Editoriale si opinii - Talc Show', 'http://www.dilemaveche.ro/taxonomy/term/44/0/feed'), + ('Tema saptamanii', 'http://www.dilemaveche.ro/taxonomy/term/19/0/feed'), + ('La zi in cultura - Dilema va recomanda', 'http://www.dilemaveche.ro/taxonomy/term/58/0/feed'), + ('La zi in cultura - Carte', 'http://www.dilemaveche.ro/taxonomy/term/14/0/feed'), + ('La zi in cultura - Film', 'http://www.dilemaveche.ro/taxonomy/term/13/0/feed'), + ('La zi in cultura - Muzica', 'http://www.dilemaveche.ro/taxonomy/term/1341/0/feed'), + ('La zi in cultura - Arte performative', 'http://www.dilemaveche.ro/taxonomy/term/1342/0/feed'), + ('La zi in cultura - Arte vizuale', 'http://www.dilemaveche.ro/taxonomy/term/1512/0/feed'), + ('Societate - Ieri cu vedere spre azi', 'http://www.dilemaveche.ro/taxonomy/term/15/0/feed'), + ('Societate - Din polul opus', 'http://www.dilemaveche.ro/taxonomy/term/41/0/feed'), + ('Societate - Mass comedia', 'http://www.dilemaveche.ro/taxonomy/term/43/0/feed'), + ('Societate - La singular si la plural', 'http://www.dilemaveche.ro/taxonomy/term/42/0/feed'), + ('Oameni si idei - Educatie', 'http://www.dilemaveche.ro/taxonomy/term/46/0/feed'), + ('Oameni si idei - Polemici si dezbateri', 'http://www.dilemaveche.ro/taxonomy/term/48/0/feed'), + ('Oameni si idei - Stiinta si tehnologie', 'http://www.dilemaveche.ro/taxonomy/term/46/0/feed'), + ('Dileme on-line', 'http://www.dilemaveche.ro/taxonomy/term/005/0/feed') ] + remove_tags_before = dict(name='div',attrs={'class':'spacer_10'}) + remove_tags = [ + dict(name='div', attrs={'class':'art_related_left'}), + dict(name='div', attrs={'class':'controale'}), + dict(name='div', attrs={'class':'simple_overlay'}), + ] + remove_tags_after = [dict(id='facebookLike')] + remove_javascript = True + no_stylesheets = True + remove_empty_feeds = True + extra_css = """ + body{font-family: Georgia,Times,serif } + img{margin-bottom: 0.4em; display:block} + """ + needs_subscription = 'optional' + cover_margins = (10, 15, '#ffffff') + + def get_browser(self): + br = BasicNewsRecipe.get_browser(self) + if self.username is not None and self.password is not None: + br.open('http://dilemaveche.ro/user/login') + br.select_form(nr=0) + br['username'] = self.username + br['password'] = self.password + br.submit() + return br + + def get_cover_url(self): + cover_url = None + soup = self.index_to_soup('http://dilemaveche.ro') + link_item = soup.find('div',attrs={'class':'box_dr_pdf_picture'}) + if link_item and link_item.a: + cover_url = link_item.a['href'] + br = BasicNewsRecipe.get_browser() + try: + br.open(cover_url) + except: # daca nu gaseste pdf-ul + self.log("\nPDF indisponibil") + link_item = soup.find('div',attrs={'class':'box_dr_pdf_picture'}) + if link_item and link_item.img: + cover_url = link_item.img['src'] + br = BasicNewsRecipe.get_browser() + try: + br.open(cover_url) + except: # daca nu gaseste nici imaginea mica mica + print('Mama lor de nenorociti! nu este nici pdf nici imagine') + cover_url ='http://www.dilemaveche.ro/sites/all/themes/dilema/theme/dilema_two/layouter/dilema_two_homepage/logo.png' + return cover_url - def preprocess_html(self, soup): - return self.adeify_images(soup) diff --git a/recipes/dot_net.recipe b/recipes/dot_net.recipe index 6b39b8c97e..33c53d4757 100644 --- a/recipes/dot_net.recipe +++ b/recipes/dot_net.recipe @@ -11,30 +11,31 @@ class dotnetMagazine (BasicNewsRecipe): no_stylesheets = True encoding = 'utf8' use_embedded_content = False + auto_cleanup = True # recursion = 1 language = 'en' remove_empty_feeds = True extra_css = ' body{font-family: Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} ' cover_url = u'http://media.netmagazine.futurecdn.net/sites/all/themes/netmag/logo.png' - remove_tags_after = dict(name='footer', id=lambda x:not x) - remove_tags_before = dict(name='header', id=lambda x:not x) + #remove_tags_after = dict(name='footer', id=lambda x:not x) + #remove_tags_before = dict(name='header', id=lambda x:not x) - remove_tags = [ - dict(name='div', attrs={'class': 'item-list'}), - dict(name='h4', attrs={'class': 'std-hdr'}), - dict(name='div', attrs={'class': 'item-list share-links'}), # removes share links - dict(name=['script', 'noscript']), - dict(name='div', attrs={'id': 'comments-form'}), # comment these out if you want the comments to show - dict(name='div', attrs={'id': re.compile('advertorial_block_($|| )')}), - dict(name='div', attrs={'id': 'right-col'}), - dict(name='div', attrs={'id': 'comments'}), # comment these out if you want the comments to show - dict(name='div', attrs={'class': 'item-list related-content'}), + #remove_tags = [ + #dict(name='div', attrs={'class': 'item-list'}), + #dict(name='h4', attrs={'class': 'std-hdr'}), + #dict(name='div', attrs={'class': 'item-list share-links'}), # removes share links + #dict(name=['script', 'noscript']), + #dict(name='div', attrs={'id': 'comments-form'}), # comment these out if you want the comments to show + #dict(name='div', attrs={'id': re.compile('advertorial_block_($|| )')}), + #dict(name='div', attrs={'id': 'right-col'}), + #dict(name='div', attrs={'id': 'comments'}), # comment these out if you want the comments to show + #dict(name='div', attrs={'class': 'item-list related-content'}), - ] + #] feeds = [ - (u'net', u'http://feeds.feedburner.com/net/topstories?format=xml') + (u'net', u'http://feeds.feedburner.com/creativebloq/') ] def skip_ad_pages(self, soup): diff --git a/recipes/el_correo.recipe b/recipes/el_correo.recipe index 110c19d7ba..235d5e0fc7 100644 --- a/recipes/el_correo.recipe +++ b/recipes/el_correo.recipe @@ -3,10 +3,10 @@ __license__ = 'GPL v3' __copyright__ = '08 Januery 2011, desUBIKado' __author__ = 'desUBIKado' __description__ = 'Daily newspaper from Biscay' -__version__ = 'v0.08' -__date__ = '08, Januery 2011' +__version__ = 'v0.10' +__date__ = '07, August 2013' ''' -[url]http://www.elcorreo.com/[/url] +http://www.elcorreo.com/ ''' import time @@ -24,6 +24,7 @@ class heraldo(BasicNewsRecipe): max_articles_per_feed = 100 no_stylesheets = True use_embedded_content = False + masthead_url = 'http://www.elcorreo.com/vizcaya/noticias/201002/02/Media/logo-elcorreo-nuevo.png' language = 'es' timefmt = '[%a, %d %b, %Y]' encoding = 'iso-8859-1' @@ -33,15 +34,15 @@ class heraldo(BasicNewsRecipe): feeds = [ (u'Portada', u'http://www.elcorreo.com/vizcaya/portada.xml'), (u'Local', u'http://www.elcorreo.com/vizcaya/rss/feeds/vizcaya.xml'), - (u'Internacional', u'hhttp://www.elcorreo.com/vizcaya/rss/feeds/internacional.xml'), - (u'Econom\xeda', u'http://www.elcorreo.com/vizcaya/rss/feeds/economia.xml'), + (u'Internacional', u'hhttp://www.elcorreo.com/vizcaya/rss/feeds/internacional.xml'), + (u'Econom\xeda', u'http://www.elcorreo.com/vizcaya/rss/feeds/economia.xml'), (u'Pol\xedtica', u'http://www.elcorreo.com/vizcaya/rss/feeds/politica.xml'), - (u'Opini\xf3n', u'http://www.elcorreo.com/vizcaya/rss/feeds/opinion.xml'), - (u'Deportes', u'http://www.elcorreo.com/vizcaya/rss/feeds/deportes.xml'), + (u'Opini\xf3n', u'http://www.elcorreo.com/vizcaya/rss/feeds/opinion.xml'), + (u'Deportes', u'http://www.elcorreo.com/vizcaya/rss/feeds/deportes.xml'), (u'Sociedad', u'http://www.elcorreo.com/vizcaya/rss/feeds/sociedad.xml'), - (u'Cultura', u'http://www.elcorreo.com/vizcaya/rss/feeds/cultura.xml'), - (u'Televisi\xf3n', u'http://www.elcorreo.com/vizcaya/rss/feeds/television.xml'), - (u'Gente', u'http://www.elcorreo.com/vizcaya/rss/feeds/gente.xml') + (u'Cultura', u'http://www.elcorreo.com/vizcaya/rss/feeds/cultura.xml'), + (u'Televisi\xf3n', u'http://www.elcorreo.com/vizcaya/rss/feeds/television.xml'), + (u'Gente', u'http://www.elcorreo.com/vizcaya/rss/feeds/gente.xml') ] keep_only_tags = [ @@ -54,14 +55,14 @@ class heraldo(BasicNewsRecipe): dict(name='div', attrs={'class':['mod_lomas','bloque_lomas','blm_header','link-app3','link-app4','botones_listado']}), dict(name='div', attrs={'class':['navegacion_galeria','modulocanalpromocion','separa','separacion','compartir','tags_relacionados']}), dict(name='div', attrs={'class':['moduloBuscadorDeportes','modulo-gente','moddestacadopeq','OpcArt','articulopiniones']}), - dict(name='div', attrs={'class':['modulo-especial','publiEspecial']}), - dict(name='div', attrs={'id':['articulopina']}), + dict(name='div', attrs={'class':['modulo-especial','publiEspecial','carruselNoticias','vj','modulocomun2']}), + dict(name='div', attrs={'id':['articulopina','webs_asociadas']}), dict(name='br', attrs={'class':'clear'}), dict(name='form', attrs={'name':'frm_conversor2'}) ] remove_tags_before = dict(name='div' , attrs={'class':'articulo '}) - remove_tags_after = dict(name='div' , attrs={'class':'comentarios'}) + remove_tags_after = dict(name='div' , attrs={'class':'robapaginas'}) def get_cover_url(self): cover = None @@ -69,10 +70,8 @@ class heraldo(BasicNewsRecipe): year = str(st.tm_year) month = "%.2d" % st.tm_mon day = "%.2d" % st.tm_mday - #[url]http://img.kiosko.net/2011/01/02/es/elcorreo.750.jpg[/url] - #[url]http://info.elcorreo.com/pdf/06012011-viz.pdf[/url] - cover='http://info.elcorreo.com/pdf/'+ day + month + year +'-viz.pdf' - + # http://info.elcorreo.com/pdf/07082013-viz.pdf + cover='http://info.elcorreo.com/pdf/'+ day + month + year +'-viz.pdf' br = BasicNewsRecipe.get_browser(self) try: br.open(cover) @@ -92,29 +91,27 @@ class heraldo(BasicNewsRecipe): img{margin-bottom: 0.4em} ''' - - preprocess_regexps = [ - # To present the image of the embedded video + # Para presentar la imagen de los video incrustados (re.compile(r'var RUTA_IMAGEN', re.DOTALL|re.IGNORECASE), lambda match: ''), (re.compile(r'var SITIO = "elcorreo";', re.DOTALL|re.IGNORECASE), lambda match: '