diff --git a/.bzrignore b/.bzrignore index 88fc9188fc..aaacc9f58a 100644 --- a/.bzrignore +++ b/.bzrignore @@ -3,9 +3,9 @@ src/calibre/plugins resources/images.qrc src/calibre/ebooks/oeb/display/test/*.js -src/calibre/manual/.build/ -src/calibre/manual/cli/ -src/calibre/manual/template_ref.rst +manual/.build/ +manual/cli/ +manual/template_ref.rst build dist docs @@ -16,7 +16,6 @@ resources/ebook-convert-complete.pickle resources/builtin_recipes.xml resources/builtin_recipes.zip resources/template-functions.json -resources/display/*.js setup/installer/windows/calibre/build.log src/calibre/translations/.errors src/cssutils/.svn/ diff --git a/COPYRIGHT b/COPYRIGHT index 129b0b0536..eb4433f96d 100644 --- a/COPYRIGHT +++ b/COPYRIGHT @@ -4,11 +4,6 @@ License: GPL-3 The full text of the GPL is distributed as in /usr/share/common-licenses/GPL-3 on Debian systems. -Files: src/calibre/ebooks/pdf/*.h,*.cpp -License: GPL-2 or later - The full text of the GPL is distributed as in - /usr/share/common-licenses/GPL-2 on Debian systems. - Files: setup/iso_639/* Copyright: Various License: LGPL 2.1 @@ -21,6 +16,12 @@ License: BSD The full text of the BSD license is distributed as in /usr/share/common-licenses/BSD on Debian systems. +Files: src/qtcurve/* +Copyright: Craig Drummond, 2007 - 2010 craig.p.drummond@gmail.com +License: GPL-2 + The full text of the GPL is distributed as in + /usr/share/common-licenses/GPL-2 on Debian systems. + Files: src/calibre/ebooks/chardet/* Copyright: Copyright (C) 1998-2001 Netscape Communications Corporation License: LGPL-2.1+ diff --git a/Changelog.yaml b/Changelog.yaml index 452744ba94..39bfd0ef10 100644 --- a/Changelog.yaml +++ b/Changelog.yaml @@ -19,6 +19,559 @@ # new recipes: # - title: +- version: 0.8.58 + date: 2012-06-29 + + new features: + - title: "Add some texture to calibre generated covers" + + - title: "Drivers for Sogo SS-4370, HTC G2 and Lenovo ThinkPad Tablet" + tickets: [1019050, 1017010] + + - title: "Add search to the Manage tags/series/etc. dialogs" + + - title: "News download: Add support for images embedded in the HTML" + + - title: "calibre -s now waits for calibre to shutdown" + + bug fixes: + - title: "Workaround for iTunes breaking scripting with version 10.6.3 on OS X." + tickets: [1012243] + + - title: "EPUB Input: When there are multiple elements of the same type in the OPF guide, use the first rather than the last element." + + - title: "Windows: Disable the new UI style if the color depth of the desktop is less than 32 bits per pixel" + + - title: "ISBNDB metadata plugin: Return results even though they have no comments" + + - title: "More robust handling of EINTR during IPC" + + - title: "Metadata download: Support for amazon's new results page markup" + + - title: "EPUB Output: Fix a bug that could cause corrupted output when doing an EPUB/OEB to EPUB conversion if the input EPUB had multiple files with the same name" + + - title: "KF8 Output: Fix a couple of bugs that could lead to generation of invalid KF8 files." + tickets: [1016672] + + improved recipes: + - ABC Digital + - O Globo + + new recipes: + - title: Sign of the Times and New Statesman + author: TerminalVeracity + + - title: CT24 + author: zoidozoido + + - title: SmileZilla + author: Will + + - title: Marketing Sensoriale + author: NotTaken + +- version: 0.8.57 + date: 2012-06-22 + + new features: + - title: "PDF Output: Full pagination support. No more cutoff bottom line." + type: major + description: "Fixes a long standing bug in calibre's PDF Output that caused the bottom line of some pages to be partially cut off and prevented top and bottom margins from working." + + - title: "calibredb add now prints out the ids of added books" + tickets: [1014303] + + - title: "Kobo Vox driver: Add support for new Google Play firmware" + tickets: [1014129] + + - title: "Driver for Prestigio PMP5097PRO" + tickets: [1013864] + + - title: "Add option to disable tooltips in the book list under Preferences->Look & Feel" + + - title: "When customizing builtin recipes download the latest version of the recipe to customize instead of using the possibly out of date bundled version" + + bug fixes: + - title: "PDF Output: Use the cover from the input document when no cover is specified during a conversion" + + - title: "E-book Viewer: Printing now has proper pagination with top and bottom margins no lines partially cut-off at the bottom and full style retention" + + - title: "KF8 Input: Handle files with incorrectly encoded guide type entries." + tickets: [1015020] + + - title: "E-book viewer: Disable hyphenation on windows xp as Qt WebKit barfs on soft hyphens on windows XP" + + - title: "Handle OS X systems with invalid palette colors." + tickets: [1014900] + + - title: "Tag Browser: Fix regression that broke partitioning of hierarchical categories." + tickets: [1014065] + + - title: "LRF Output: Handle negative page margins" + tickets: [1014103] + + - title: "Template language: Fix arithmetic functions to tolerate the value 'None' as returned by raw_field()" + + - title: "Fix custom title sort set in the edit metadata dialog getting reset by the conversion dialog" + + improved recipes: + - The Economist + - Akter + - 24 Sata sr + - Novi List + - Metro Montreal + - Mode Durable + - CanardPC + - The Economic Collapse + - Our Daily Bread + + new recipes: + - title: Akter Daily + author: Darko MIletic + + - title: BBC Brasil + author: Claviola + + - title: Homopedia.pl + author: rainbowwarrior + + - title: National Geographic Magazine + author: Terminal Veracity + + - title: Something Awful + author: atordo + + - title: Huffington Post UK + author: Krittika Goyal + +- version: 0.8.56 + date: 2012-06-15 + + new features: + - title: "Make the new calibre style default on Windows and OS X." + type: major + description: "This change gives a more 'modern' feel to the calibre user interface with focus highlighting, gradients, rounded corners, etc. In case you prefer the old look, you can restore under Preferences->Look & Feel->User interface style" + + - title: "Get Books: Add the new SONY Reader store" + + - title: "Read metadata from .docx (Microsoft Word) files" + + - title: "Allow customizing the behavior of the searching for similar books by right clicking the book. You can now tell calibre to search different columns than the traditional author/series/publisher/tags/etc. in Preferences->Searching" + + - title: "Add option to restore alternating row colors to the Tag Browser under Preferences->Look & Feel->Tag Browser" + + - title: "Update to Qt 4.8.2 on windows compiled with link time code generation for a small performance boost" + + bug fixes: + - title: "Get Books: Update plugins to handle website changes at ebooks.com, project gutenberg, and virtualo" + + - title: "AZW3 Output: Fix TOC at start option not working" + + - title: "AZW3 Output: Close self closing script/style/title/head tags explicitly as they cause problems in webkit based renderers like the Kindle Fire and calibre's viewers." + + - title: "Fix the current_library_name() template function not updating after a library switch" + + - title: "AZW3 Output: Handle the case of a link pointing to the last line of text in the document." + tickets: [1011330] + + - title: "Fix regression in 0.8.55 that broke highlighting of items matching a search in the Tag Browser" + tickets: [1011030] + + - title: "News download: Handle query only relative URLs" + + improved recipes: + - Christian Science Monitor + - Neue Zurcher Zeitung + - Birmignham Post + - Metro UK + - New Musical Express + - The Independent + - The Daily Mirror + - Vreme + - Smithsonian Magazine + + new recipes: + - title: NZZ Webpaper + author: Bernd Leinfelder + + +- version: 0.8.55 + date: 2012-06-08 + + new features: + - title: "Add a new 'Calibre style' interface look that is more modern than the default look. You can select it via Preferences->Look & Feel->User interface style." + + - title: "New, subtler look for the Tag Browser" + + - title: "Driver for Trekstor Pyrus and Pantech Android Tablet" + tickets: [1008946, 1007929] + + - title: "Conversion pipeline: Handle guide elements with incorrectly cased hrefs. Also handle guide elements of type coverimagestandard and thumbimagestandard." + + - title: "Allow user to customize trekstor plugin to send books into sub directories." + tickets: [1007646] + + - title: "EPUB Input: Add support for EPUB files that use the IDPF font obfuscation algorithm. Apparently, people have started producing these now." + tickets: [1008810] + + - title: "Save single format to disk: Only show the format available in the selected books." + tickets: [1007287] + + bug fixes: + - title: "MOBI Output: When using the insert metadata at start of book option, do not use a table to layout the metadata, as the Kindle Fire crashes when rendering the table." + tickets: [1002119] + + - title: "Device detection: Fix a bug that could cause device detection to fail completely if devices with certain vendor/product ids are connected." + tickets: [1009718] + + - title: "MOBI Output: When rasterizing svgs only compute style information when an actual svg image is present. Small speedup when converting large svg-free documents to MOBI." + + - title: "SONY T1 driver: Fix support for collections of books placed on the SD card" + tickets: [986044] + + - title: "Fix partitioning problems in tag browser with fields that have no name, such as identifiers and formats" + + - title: "Welcome wizard: Preferentially use the kindle email address set as default when more than one such address exists." + tickets: [1007932 ] + + - title: "Fix regression in 0.8.54 that broke the use of the shortcut Alt+A to select books by the same author" + + improved recipes: + - Various Polish recipes + - Vice Magazine + - EL Mundo Today + - Haaretz + - Good Housekeeping + - El Pais + - Christian Science Monitor + - Marketing Magazine + - Instapaper + + new recipes: + - title: Various Philippine news sources + author: jde + + - title: Natemat.pl and wirtualnemedia.pl + author: fenuks + + - title: Rabble.ca + author: timtoo + +- version: 0.8.54 + date: 2012-05-31 + + new features: + - title: "E-book viewer: The Table of contents panel now tracks the current position in the book. As you scroll through the book, the entry you are currently on is highlighted." + type: major + description: "To see this feature in action, open the Table of Contents panel in the viewer by clicking the button with three blue lines on it. As you page through the book, the chapter you are reading currently is highlighted in the Table of Contents Panel. Obviously, this will only work if the book you are reading has a Table of Contents. You can also use the Ctrl+PgUp and Ctrl+PgDn keys to quickly skip between chapters." + + - title: "calibredb: Allow setting metadata for individual fields with the set_metadata command" + + - title: "Make it a little harder to accidentally change the sorting of items in the Tag Browser. Also frees up more vertical space for the Tag Browser itself." + + - title: "The calibre user manual is now available in AZW3 format as well as EPUB" + + bug fixes: + - title: "Automatic titlecasing: No longer try to capitalize scottish names, as there are too many special cases." + tickets: [775825] + + - title: "Never crash when reading metadata from PDF files (reading now always happens in a worker process)" + tickets: [1006452] + + - title: "EPUB Input: Do no skip the valid children of an NCX node that has no text/href" + + - title: "Archos driver: Detect SD card" + tickets: [1005650] + + - title: "When bulk downloading metadata and the user deletes one of the books for which metadata is being downloaded, just ignore it, instead of erroring out" + + - title: "When deleting books from the bottom of the booklist, ensure that the bottom book after deleting is selected" + + - title: "Fix regression in 0.8.53 that broke sending APNX files to older Kindle devices" + + - title: "Use correct text color for selected rows in the list of matches when downloading metadata and showing results in get books." + tickets: [1004568] + + improved recipes: + - The Independent + - Welt der Physik + - China Daily + - The Grid + - Prospect Magazine + + new recipes: + - title: La gazetta del Mezzogiorno + author: faber1971 + +- version: 0.8.53 + date: 2012-05-25 + + new features: + - title: "Kindle Touch/4 driver: Upload cover thumbnails when sending books to device by USB to workaround Amazon bug of not displaying covers for sync-enabled books" + + - title: "Support for updating metadata in FB2 files" + + - title: "Set a different background color when choosing formats to not delete as opposed to choosing format to delete." + tickets: [ 1001741 ] + + - title: "E-book viewer: Add an option to prevent the up and down arrow keys from scrolling past page breaks" + + - title: "Get Books: Remove ebookshoppe.com at the website's request" + + bug fixes: + - title: "PDF Input: Support image rotation commands in PDF files. Fixes the long standing problem of some images being flipped when converting from PDF in calibre." + + - title: "Fix a regression in 0.8.51 that caused conversion to HTMLZ to not have any CSS" + + - title: "Get Books: Fix website change at kobo.com causing prices not to be found" + + - title: "Edit the time in the 24 hour clock when calibre's interface language is set to German." + tickets: [ 1001809 ] + + - title: "MOBI Output: When generating joint KF8/MOBI6 .mobi files set the text length field in the MOBI 6 header correctly. " + tickets: [ 1003489 ] + + - title: "ODT Input: More workarounds for LibreOffice 3.5's habit of inserting pointless margin:100% directives everywhere." + tickets: [ 1002702 ] + + - title: "Fix regression that broke smarten punctuation when quotes were next to html tags." + tickets: [ 998900 ] + + - title: "Fix published date from ozon.ru wrong in some timezones" + tickets: [ 975338 ] + + - title: "Catalogs: Handle the use of custom columns with non-ascii names correctly" + tickets: [1001437] + + - title: "Conversion pipeline: Remove the attempt to detect and autocorrect if text will go off the left edge of the page, as it was a rather crude heuristic. Also do not remove fake margins if the book uses negative text indents on the margined elements." + + - title: "KF8 Output: Set offsets to tags in the skeleton the same way kindlegen does. Also linearize non linear ToCs to ensure section to section jumping works." + + - title: "Conversion pipeline: Use correct default value of 'inherit' for font-family and font-size when normalizing the shorthand font property." + + - title: "When running python scripts via calibre-debug ensure that user plugins are loaded" + + improved recipes: + - Business Week Magazine + - Metro Nieuws NL + + new recipes: + - title: Attac.es + author: Marc Busque + + - title: Drytooling.com + author: Damian Granowski + + - title: Shortlist.com + author: Dave ASbury + + - title: National Geographic (es) + author: vakya + +- version: 0.8.52 + date: 2012-05-18 + + new features: + - title: "EPUB Input: When setting the cover for a book that identifies its cover image, but not the html wrapper around the cover, try to detect and remove that wrapper automatically." + tickets: [ 999959 ] + + - title: "When deleting books of a specific format, show the number of books with each format available" + + - title: "Linux install: No longer create MAN pages as all utilities have more comprehensive command line --help anyway" + + - title: "Add a tweak Preferences->Tweaks to control the default choice of format for the Tweak Book feature" + + - title: "Conversion: Allow setting negative page margins. A negative page margin means that calibre will not specify any page margin in the output document (for formats that support this)" + + bug fixes: + - title: "Tweak book: Fix handling of covers when tweaking KF8 books" + + - title: "KF8 Output: Handle input documents with out of sequence ToC entries. Note that currently section jumping in the KF8 output produced by calibre for such files does not work." + tickets: [1000493] + + - title: "Edit metadata dialog: Fix the edit values button for custom tag-like columns showing a unneeded warning about changed values" + + - title: "EPUB Output: Be a little more conservative when removing
tags. Only remove them if they have actual forms inside. " + tickets: [ 1000384 ] + + - title: "EPUB Input: Correctly update the Cover entry in the ToC even when the entry has a fragment reference. " + tickets: [ 999973 ] + + - title: "Update ImagMagick DLLs in all calibre binary builds to fix security vulnerabilities in ImageMagick" + tickets: [ 999496 ] + + - title: "Advanced search dialog: Fix equals and regex matching not being applied for custom column searches." + tickets: [ 980221 ] + + - title: "RTF Input: Handle old RTF files that have commands without braces." + tickets: [ 994133 ] + + - title: "Get Books: Diesel, fix results not showing when only a single match is found" + + - title: "Get Books: Fix DRM status indicators for Kobo and Diesel stores. Fix smashwords not returning results." + tickets: [ 993755 ] + + - title: "Fix regression in 0.8.51 that broke viewing of LIT and some EPUB files" + tickets: [998248, 998216] + + improved recipes: + - Clarin + - Spiegel + - Spiegel International + - Montreal Gazette + - Gosc Niedzelny + - Ars Technica + + new recipes: + - title: "Army/Navy/Air force/Marine Times and News busters" + author: jde + + - title: "Ads of the World, Heavy Meta (Italian) and Juve La Stampa" + author: faber1971 + + - title: "Revista Summa" + author: Vakya + + - title: "Strategic culture" + author: Darko Miletic + + - title: Stars and Stripes + author: adoucette + + - title: Nackdenkseiten + author: jrda + + +- version: 0.8.51 + date: 2012-05-11 + + new features: + - title: "When switching libraries preserve the position and selected books if you switch back to a previously opened library." + tickets: [994514] + + - title: "Conversion pipeline: Filter out the useless font-face rules inserted by Microsoft Word for every font on the system" + + - title: "Driver for Motorola XT875 and Pandigital SuperNova" + tickets: [996890] + + - title: "Add a colour swatch the the dialog for creating column coloring rules, to ease selection of colors" + tickets: [994811] + + - title: "EPUB Output: Consolidate internal CSS generated by calibre into external stylesheets for ease of editing the EPUB" + + - title: "List EPUB and MOBI at the top of the dropdown list fo formats to convert to, as they are the most common choices" + tickets: [994838] + + bug fixes: + - title: "E-book viewer: Improve performance when switching between normal and fullscreen views." + tickets: [996102] + + - title: "Edit metadata dialog: When running download metadata do not insert duplicate tags into the list of tags" + + - title: "KF8 Input: Do not error out if the file has a few invalidly encoded bytes." + tickets: [997034] + + - title: "Fix download of news in AZW3 format not working" + tickets: [996439] + + - title: "Pocketbook driver: Update for new PB 611 firmware." + tickets: [903079] + + - title: "ebook-convert: Error out if the user prvides extra command line args instead of silently ignoring them" + tickets: [994939] + + - title: "EPUB Output: Do not self close any container tags to prevent artifacts when EPUBs are viewed using buggy browser based viewers." + tickets: [994861] + + - title: "Fix regression in 0.8.50 that broke the conversion of HTML files that contained non-ascii font-face declarations, typically produced by Microsoft Word" + + improved recipes: + - Mainichi news + - derStandard + - Endgadget Japan + + new recipes: + - title: Mainichi English + author: Hiroshi Miura + + - title: The Grid TO + author: Yusuf W + + - title: National Geographic (Italy) + author: faber1971 + + - title: Rebelion + author: Marc Busque + +- version: 0.8.50 + date: 2012-05-04 + + new features: + - title: "Tweak Book: Allow tweaking of KF8 MOBI files. Useful to fine-tune the result of a conversion. Right click on the book and select Tweak Book to use the feature. Note that tweaking a MOBI file that contains both KF8 and older MOBI6 will cause the MOBI6 version to be discarded." + + - title: "AZW3 output plugin. This output plugin generates pure KF8 mobi files. These only work on the Kindle Fire and Kindle Touch with latest firmware." + + - title: "Conversion: Allow easy re-ordering of the search and replace expressions in the conversion dialog. Also apply the expressions in the same order that they were entered when doing the conversion." + + - title: "Automatically add the Tag 'Sample Book' when an Amazon sample is added to calibre" + + - title: "FB2 Input: Better handling of inline images." + tickets: [989869] + + bug fixes: + - title: "KF8 Output: Fix section to section jumps not working for documents with multi-level ToCs" + + - title: "EPUB Input: Handle the case of the metadata ToC containing a reference to the cover HTML file." + tickets: [993812] + + - title: "CHM Input: Handle files with deeply nested markup and non html files listed at the start of the manifest." + tickets: [993607] + + - title: "KF8 Output: Workaround Kindle Touch bug that causes the book to be rendered as black pages when a height is specified for " + + - title: "Fix regression in 0.8.49 that broke italics detection in heuristic processing on 32-bit systems." + tickets: [991380] + + - title: "KF8 Output: Fix joint MOBI6/KF8 books not being recognized as MOBI files by older Kindles" + + - title: "KF8 Output: Fix errors when processing documents with HTML comments and/or XML processing instructions" + + - title: "Get Books: Amazon fix prices not being found. B&N fix details link. ebooks.com: fix cover image. Website changes to various EU stores" + + - title: "FB2 Input: More robust base64 decoding to handle embedded images that are incorrectly encoded." + tickets: [990929] + + - title: "Fix scrolling with the cover browser updating only the selection in the book list, not the current book." + tickets: [990881] + + - title: "Save to Disk: Do not run out memory when saving very large files on systems with low RAM." + tickets: [990741] + + - title: "FB2 Output: Use 2 letter language codes in preference to 3-letter ones to not break poorly implemented FB2 readers" + tickets: [990026] + + - title: "EPUB Input: Auto set the media-type for OPF manifest entries with an empty media-type" + + improved recipes: + - National Post + - Daily Mirror + - Sun + - Newsweek Polska + - Max-Planck + - derStandard + - tweakers.net + + new recipes: + - title: George Monbiot + author: Darko Miletic + + - title: El Mundo + author: atordo + + - title: AraInfo and Diagonal + author: Ruben Pollan + + - version: 0.8.49 date: 2012-04-27 diff --git a/src/calibre/manual/Makefile b/manual/Makefile similarity index 100% rename from src/calibre/manual/Makefile rename to manual/Makefile diff --git a/src/calibre/manual/conf.py b/manual/conf.py similarity index 97% rename from src/calibre/manual/conf.py rename to manual/conf.py index b0f2cd4365..7b24f2f50a 100644 --- a/src/calibre/manual/conf.py +++ b/manual/conf.py @@ -14,7 +14,7 @@ import sys, os # If your extensions are in another directory, add it here. -sys.path.append(os.path.abspath('../../../')) +sys.path.append(os.path.abspath('../src')) sys.path.append(os.path.abspath('.')) __appname__ = os.environ.get('__appname__', 'calibre') __version__ = os.environ.get('__version__', '0.0.0') @@ -98,7 +98,7 @@ html_favicon = 'favicon.ico' # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the built-in static files, # so a file named "default.css" will overwrite the built-in "default.css". -html_static_path = ['resources', '../../../icons/favicon.ico'] +html_static_path = ['resources', '../icons/favicon.ico'] # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, # using the given strftime format. diff --git a/src/calibre/manual/conversion.rst b/manual/conversion.rst similarity index 99% rename from src/calibre/manual/conversion.rst rename to manual/conversion.rst index f6fe04dd90..5eaca5a469 100644 --- a/src/calibre/manual/conversion.rst +++ b/manual/conversion.rst @@ -669,7 +669,6 @@ Some limitations of PDF input are: * Complex, multi-column, and image based documents are not supported. * Extraction of vector images and tables from within the document is also not supported. * Some PDFs use special glyphs to represent ll or ff or fi, etc. Conversion of these may or may not work depending on just how they are represented internally in the PDF. - * Some PDFs store their images upside down with a rotation instruction, |app| currently doesn't support that instruction, so the images will be rotated in the output as well. * Links and Tables of Contents are not supported * PDFs that use embedded non-unicode fonts to represent non-English characters will result in garbled output for those characters * Some PDFs are made up of photographs of the page with OCRed text behind them. In such cases |app| uses the OCRed text, which can be very different from what you see when you view the PDF file diff --git a/src/calibre/manual/creating_plugins.rst b/manual/creating_plugins.rst similarity index 98% rename from src/calibre/manual/creating_plugins.rst rename to manual/creating_plugins.rst index d38abfd341..c3f1202365 100644 --- a/src/calibre/manual/creating_plugins.rst +++ b/manual/creating_plugins.rst @@ -172,7 +172,7 @@ You can see the ``prefs`` object being used in main.py: :pyobject: DemoDialog.config -The different types of plugins +The plugin API -------------------------------- As you may have noticed above, a plugin in |app| is a class. There are different classes for the different types of plugins in |app|. @@ -195,7 +195,7 @@ It can get tiresome to keep re-adding a plugin to calibre to test small changes. Once you've located the zip file of your plugin you can then directly update it with your changes instead of re-adding it each time. To do so from the command line, in the directory that contains your plugin source code, use:: - calibre -s; sleep 4s; zip -R /path/to/plugin/zip/file.zip *; calibre + calibre -s; zip -r /path/to/plugin/zip/file.zip *; calibre This will shutdown a running calibre. Wait for the shutdown to complete, then update your plugin files and relaunch calibre. It relies on the freely available zip command line tool. diff --git a/src/calibre/manual/custom.py b/manual/custom.py similarity index 79% rename from src/calibre/manual/custom.py rename to manual/custom.py index 390b5aa931..fdfb5711bb 100644 --- a/src/calibre/manual/custom.py +++ b/manual/custom.py @@ -5,9 +5,9 @@ __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal ' import sys, os, re, textwrap -sys.path.insert(0, os.path.abspath('../../')) -sys.extensions_location = '../plugins' -sys.resources_location = '../../../resources' +sys.path.insert(0, os.path.abspath('../src')) +sys.extensions_location = '../src/calibre/plugins' +sys.resources_location = '../resources' from sphinx.util.console import bold @@ -116,44 +116,42 @@ def generate_ebook_convert_help(preamble, info): from calibre.utils.logging import default_log preamble = re.sub(r'http.*\.html', ':ref:`conversion`', preamble) raw = preamble + textwrap.dedent(''' - Since the options supported by ebook-convert vary depending on both the - input and the output formats, the various combinations are listed below: + The options and default values for the options change depending on both the + input and output formats, so you should always check with:: + + ebook-convert myfile.input_format myfile.output_format -h + + Below are the options that are common to all conversion, followed by the + options specific to every input and output format ''') - toc = {} - sec_templ = textwrap.dedent('''\ - .. include:: ../global.rst + parser, plumber = create_option_parser(['ebook-convert', + 'dummyi.mobi', 'dummyo.epub', '-h'], default_log) + groups = [(None, None, parser.option_list)] + for grp in parser.option_groups: + if grp.title not in {'INPUT OPTIONS', 'OUTPUT OPTIONS'}: + groups.append((grp.title.title(), grp.description, grp.option_list)) + options = '\n'.join(render_options('ebook-convert', groups, False)) - {0} - ================================================================ + raw += '\n\n.. contents::\n :local:' - .. contents:: Contents - :depth: 1 - :local: + raw += '\n\n' + options + for pl in sorted(input_format_plugins(), key=lambda x:x.name): + parser, plumber = create_option_parser(['ebook-convert', + 'dummyi.'+list(pl.file_types)[0], 'dummyo.epub', '-h'], default_log) + groups = [(pl.name+ ' Options', '', g.option_list) for g in + parser.option_groups if g.title == "INPUT OPTIONS"] + prog = 'ebook-convert-'+(pl.name.lower().replace(' ', '-')) + raw += '\n\n' + '\n'.join(render_options(prog, groups, False, True)) + for pl in sorted(output_format_plugins(), key=lambda x: x.name): + parser, plumber = create_option_parser(['ebook-convert', 'd.epub', + 'dummyi.'+pl.file_type, '-h'], default_log) + groups = [(pl.name+ ' Options', '', g.option_list) for g in + parser.option_groups if g.title == "OUTPUT OPTIONS"] + prog = 'ebook-convert-'+(pl.name.lower().replace(' ', '-')) + raw += '\n\n' + '\n'.join(render_options(prog, groups, False, True)) - ''') - for i, ip in enumerate(input_format_plugins()): - sraw = sec_templ.format(ip.name) - toc[ip.name] = 'ebook-convert-%d'%i - for op in output_format_plugins(): - title = ip.name + ' to ' + op.name - parser, plumber = create_option_parser(['ebook-convert', - 'dummyi.'+list(ip.file_types)[0], - 'dummyo.'+op.file_type, '-h'], default_log) - cmd = 'ebook-convert '+list(ip.file_types)[0]+' '+op.file_type - groups = [(None, None, parser.option_list)] - for grp in parser.option_groups: - groups.append((grp.title, grp.description, grp.option_list)) - options = '\n'.join(render_options(cmd, groups, False)) - sraw += title+'\n------------------------------------------------------\n\n' - sraw += options + '\n\n' - update_cli_doc(os.path.join('cli', toc[ip.name]+'.rst'), sraw, info) - toct = '\n\n.. toctree::\n :maxdepth: 2\n\n' - for ip in sorted(toc): - toct += ' ' + toc[ip]+'\n' - - raw += toct+'\n\n' update_cli_doc(os.path.join('cli', 'ebook-convert.rst'), raw, info) def update_cli_doc(path, raw, info): diff --git a/src/calibre/manual/customize.rst b/manual/customize.rst similarity index 97% rename from src/calibre/manual/customize.rst rename to manual/customize.rst index e436c73aa9..e2e2825de6 100644 --- a/src/calibre/manual/customize.rst +++ b/manual/customize.rst @@ -39,10 +39,10 @@ Tweaks Tweaks are small changes that you can specify to control various aspects of |app|'s behavior. You can change them by going to Preferences->Advanced->Tweaks. The default values for the tweaks are reproduced below -.. literalinclude:: ../../../resources/default_tweaks.py +.. literalinclude:: ../resources/default_tweaks.py -Overriding icons, templates, etcetera +Overriding icons, templates, et cetera ---------------------------------------- |app| allows you to override the static resources, like icons, templates, javascript, etc. with customized versions that you like. diff --git a/src/calibre/manual/develop.rst b/manual/develop.rst similarity index 95% rename from src/calibre/manual/develop.rst rename to manual/develop.rst index 8257eac3b0..12bbcefe57 100755 --- a/src/calibre/manual/develop.rst +++ b/manual/develop.rst @@ -45,6 +45,16 @@ All the |app| python code is in the ``calibre`` package. This package contains t The format independent code is all in ebooks.oeb and the format dependent code is in ebooks.format_name. * Metadata reading, writing, and downloading is all in ebooks.metadata + * Conversion happens in a pipeline, for the structure of the pipeline, + see :ref:`conversion-introduction`. The pipeline consists of an input + plugin, various transforms and an output plugin. The code constructs + and drives the pipeline is in plumber.py. The pipeline works on a + representation of an ebook that is like an unzipped epub, with + manifest, spine, toc, guide, html content, etc. The + class that manages this representation is OEBBook in oeb/base.py. The + various transformations that are applied to the book during + conversions live in `oeb/transforms/*.py`. And the input and output + plugins live in `conversion/plugins/*.py`. * library - The database back-end and the content server. See library.database2 for the interface to the |app| library. library.server is the |app| Content Server. * gui2 - The Graphical User Interface. GUI initialization happens in gui2.main and gui2.ui. The ebook-viewer is in gui2.viewer. diff --git a/src/calibre/manual/epub.py b/manual/epub.py similarity index 100% rename from src/calibre/manual/epub.py rename to manual/epub.py diff --git a/src/calibre/manual/faq.rst b/manual/faq.rst similarity index 98% rename from src/calibre/manual/faq.rst rename to manual/faq.rst index f0d9aa8bd3..0c891767df 100644 --- a/src/calibre/manual/faq.rst +++ b/manual/faq.rst @@ -22,7 +22,7 @@ It can convert every input format in the following list, to every output format. *Input Formats:* CBZ, CBR, CBC, CHM, DJVU, EPUB, FB2, HTML, HTMLZ, LIT, LRF, MOBI, ODT, PDF, PRC, PDB, PML, RB, RTF, SNB, TCR, TXT, TXTZ -*Output Formats:* EPUB, FB2, OEB, LIT, LRF, MOBI, HTMLZ, PDB, PML, RB, PDF, RTF, SNB, TCR, TXT, TXTZ +*Output Formats:* AZW3, EPUB, FB2, OEB, LIT, LRF, MOBI, HTMLZ, PDB, PML, RB, PDF, RTF, SNB, TCR, TXT, TXTZ .. note :: @@ -35,7 +35,7 @@ It can convert every input format in the following list, to every output format. What are the best source formats to convert? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -In order of decreasing preference: LIT, MOBI, EPUB, FB2, HTML, PRC, RTF, PDB, TXT, PDF +In order of decreasing preference: LIT, MOBI, AZW, EPUB, AZW3, FB2, HTML, PRC, RTF, PDB, TXT, PDF I converted a PDF file, but the result has various problems? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -69,6 +69,22 @@ If you have a hand edited TOC in the input document, you can use the TOC detecti Finally, I encourage you to ditch the content TOC and only have a metadata TOC in your ebooks. Metadata TOCs will give the people reading your ebooks a much superior navigation experience (except on the Kindle, where they are essentially the same as a content TOC). +The covers for my MOBI files have stopped showing up in Kindle for PC/Kindle for Android/etc. +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This is caused by a bug in the Amazon software. You can work around it by going +to Preferences->Output Options->MOBI output and setting the "Enable sharing +of book content" option. If you are reconverting a previously converted book, +you will also have to enable the option in the conversion dialog for that +individual book (as per book conversion settings are saved and take +precedence). + +Note that doing this will mean that the generated MOBI will show up under +personal documents instead of Books on the Kindle Fire and Amazon whispersync +will not work, but the covers will. It's your choice which functionality is +more important to you. I encourage you to contact Amazon and ask them to fix +this bug. + How do I convert a collection of HTML files in a specific order? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In order to convert a collection of HTML files in a specific oder, you have to create a table of contents file. That is, another HTML file that contains links to all the other files in the desired order. Such a file looks like:: diff --git a/src/calibre/manual/global.rst b/manual/global.rst similarity index 100% rename from src/calibre/manual/global.rst rename to manual/global.rst diff --git a/src/calibre/manual/glossary.rst b/manual/glossary.rst similarity index 100% rename from src/calibre/manual/glossary.rst rename to manual/glossary.rst diff --git a/src/calibre/manual/gui.rst b/manual/gui.rst similarity index 100% rename from src/calibre/manual/gui.rst rename to manual/gui.rst diff --git a/src/calibre/manual/images/actions.png b/manual/images/actions.png similarity index 100% rename from src/calibre/manual/images/actions.png rename to manual/images/actions.png diff --git a/src/calibre/manual/images/add_books.png b/manual/images/add_books.png similarity index 100% rename from src/calibre/manual/images/add_books.png rename to manual/images/add_books.png diff --git a/manual/images/added_books.png b/manual/images/added_books.png new file mode 100644 index 0000000000..1a2b20420e Binary files /dev/null and b/manual/images/added_books.png differ diff --git a/src/calibre/manual/images/auto_author_sort.png b/manual/images/auto_author_sort.png similarity index 100% rename from src/calibre/manual/images/auto_author_sort.png rename to manual/images/auto_author_sort.png diff --git a/src/calibre/manual/images/bbc_advanced.png b/manual/images/bbc_advanced.png similarity index 100% rename from src/calibre/manual/images/bbc_advanced.png rename to manual/images/bbc_advanced.png diff --git a/src/calibre/manual/images/bbc_altered.png b/manual/images/bbc_altered.png similarity index 100% rename from src/calibre/manual/images/bbc_altered.png rename to manual/images/bbc_altered.png diff --git a/src/calibre/manual/images/bbc_altered1.png b/manual/images/bbc_altered1.png similarity index 100% rename from src/calibre/manual/images/bbc_altered1.png rename to manual/images/bbc_altered1.png diff --git a/src/calibre/manual/images/book_details.png b/manual/images/book_details.png similarity index 100% rename from src/calibre/manual/images/book_details.png rename to manual/images/book_details.png diff --git a/src/calibre/manual/images/bookmark.png b/manual/images/bookmark.png similarity index 100% rename from src/calibre/manual/images/bookmark.png rename to manual/images/bookmark.png diff --git a/src/calibre/manual/images/catalogs.png b/manual/images/catalogs.png similarity index 100% rename from src/calibre/manual/images/catalogs.png rename to manual/images/catalogs.png diff --git a/src/calibre/manual/images/cli.png b/manual/images/cli.png similarity index 100% rename from src/calibre/manual/images/cli.png rename to manual/images/cli.png diff --git a/src/calibre/manual/images/configuration.png b/manual/images/configuration.png similarity index 100% rename from src/calibre/manual/images/configuration.png rename to manual/images/configuration.png diff --git a/src/calibre/manual/images/connect_share.png b/manual/images/connect_share.png similarity index 100% rename from src/calibre/manual/images/connect_share.png rename to manual/images/connect_share.png diff --git a/src/calibre/manual/images/conv_dialog.png b/manual/images/conv_dialog.png similarity index 100% rename from src/calibre/manual/images/conv_dialog.png rename to manual/images/conv_dialog.png diff --git a/src/calibre/manual/images/convert_ebooks.png b/manual/images/convert_ebooks.png similarity index 100% rename from src/calibre/manual/images/convert_ebooks.png rename to manual/images/convert_ebooks.png diff --git a/src/calibre/manual/images/custom_news.png b/manual/images/custom_news.png similarity index 100% rename from src/calibre/manual/images/custom_news.png rename to manual/images/custom_news.png diff --git a/src/calibre/manual/images/debug.png b/manual/images/debug.png similarity index 100% rename from src/calibre/manual/images/debug.png rename to manual/images/debug.png diff --git a/src/calibre/manual/images/device.png b/manual/images/device.png similarity index 100% rename from src/calibre/manual/images/device.png rename to manual/images/device.png diff --git a/src/calibre/manual/images/edit_meta_information.png b/manual/images/edit_meta_information.png similarity index 100% rename from src/calibre/manual/images/edit_meta_information.png rename to manual/images/edit_meta_information.png diff --git a/src/calibre/manual/images/fetch_news.png b/manual/images/fetch_news.png similarity index 100% rename from src/calibre/manual/images/fetch_news.png rename to manual/images/fetch_news.png diff --git a/src/calibre/manual/images/folder_device.png b/manual/images/folder_device.png similarity index 100% rename from src/calibre/manual/images/folder_device.png rename to manual/images/folder_device.png diff --git a/src/calibre/manual/images/font_size.png b/manual/images/font_size.png similarity index 100% rename from src/calibre/manual/images/font_size.png rename to manual/images/font_size.png diff --git a/src/calibre/manual/images/full_screen.png b/manual/images/full_screen.png similarity index 100% rename from src/calibre/manual/images/full_screen.png rename to manual/images/full_screen.png diff --git a/src/calibre/manual/images/jobs.png b/manual/images/jobs.png similarity index 100% rename from src/calibre/manual/images/jobs.png rename to manual/images/jobs.png diff --git a/src/calibre/manual/images/library.png b/manual/images/library.png similarity index 100% rename from src/calibre/manual/images/library.png rename to manual/images/library.png diff --git a/src/calibre/manual/images/nav_pos.png b/manual/images/nav_pos.png similarity index 100% rename from src/calibre/manual/images/nav_pos.png rename to manual/images/nav_pos.png diff --git a/src/calibre/manual/images/news.png b/manual/images/news.png similarity index 100% rename from src/calibre/manual/images/news.png rename to manual/images/news.png diff --git a/src/calibre/manual/images/pipeline.pgf b/manual/images/pipeline.pgf similarity index 100% rename from src/calibre/manual/images/pipeline.pgf rename to manual/images/pipeline.pgf diff --git a/src/calibre/manual/images/pipeline.png b/manual/images/pipeline.png similarity index 100% rename from src/calibre/manual/images/pipeline.png rename to manual/images/pipeline.png diff --git a/src/calibre/manual/images/pref_button.png b/manual/images/pref_button.png similarity index 100% rename from src/calibre/manual/images/pref_button.png rename to manual/images/pref_button.png diff --git a/src/calibre/manual/images/preferences.png b/manual/images/preferences.png similarity index 100% rename from src/calibre/manual/images/preferences.png rename to manual/images/preferences.png diff --git a/src/calibre/manual/images/prev_next.png b/manual/images/prev_next.png similarity index 100% rename from src/calibre/manual/images/prev_next.png rename to manual/images/prev_next.png diff --git a/src/calibre/manual/images/ref_mode.png b/manual/images/ref_mode.png similarity index 100% rename from src/calibre/manual/images/ref_mode.png rename to manual/images/ref_mode.png diff --git a/src/calibre/manual/images/ref_mode_button.png b/manual/images/ref_mode_button.png similarity index 100% rename from src/calibre/manual/images/ref_mode_button.png rename to manual/images/ref_mode_button.png diff --git a/src/calibre/manual/images/remove_books.png b/manual/images/remove_books.png similarity index 100% rename from src/calibre/manual/images/remove_books.png rename to manual/images/remove_books.png diff --git a/src/calibre/manual/images/save_to_disk.png b/manual/images/save_to_disk.png similarity index 100% rename from src/calibre/manual/images/save_to_disk.png rename to manual/images/save_to_disk.png diff --git a/src/calibre/manual/images/search.png b/manual/images/search.png similarity index 100% rename from src/calibre/manual/images/search.png rename to manual/images/search.png diff --git a/src/calibre/manual/images/search_button.png b/manual/images/search_button.png similarity index 100% rename from src/calibre/manual/images/search_button.png rename to manual/images/search_button.png diff --git a/src/calibre/manual/images/search_sort.png b/manual/images/search_sort.png similarity index 100% rename from src/calibre/manual/images/search_sort.png rename to manual/images/search_sort.png diff --git a/src/calibre/manual/images/send_to_device.png b/manual/images/send_to_device.png similarity index 100% rename from src/calibre/manual/images/send_to_device.png rename to manual/images/send_to_device.png diff --git a/src/calibre/manual/images/sg_cc.jpg b/manual/images/sg_cc.jpg similarity index 100% rename from src/calibre/manual/images/sg_cc.jpg rename to manual/images/sg_cc.jpg diff --git a/src/calibre/manual/images/sg_genre.jpg b/manual/images/sg_genre.jpg similarity index 100% rename from src/calibre/manual/images/sg_genre.jpg rename to manual/images/sg_genre.jpg diff --git a/src/calibre/manual/images/sg_pref.jpg b/manual/images/sg_pref.jpg similarity index 100% rename from src/calibre/manual/images/sg_pref.jpg rename to manual/images/sg_pref.jpg diff --git a/src/calibre/manual/images/sg_restrict.jpg b/manual/images/sg_restrict.jpg similarity index 100% rename from src/calibre/manual/images/sg_restrict.jpg rename to manual/images/sg_restrict.jpg diff --git a/src/calibre/manual/images/sg_restrict2.jpg b/manual/images/sg_restrict2.jpg similarity index 100% rename from src/calibre/manual/images/sg_restrict2.jpg rename to manual/images/sg_restrict2.jpg diff --git a/src/calibre/manual/images/sg_search.jpg b/manual/images/sg_search.jpg similarity index 100% rename from src/calibre/manual/images/sg_search.jpg rename to manual/images/sg_search.jpg diff --git a/src/calibre/manual/images/sg_tb.jpg b/manual/images/sg_tb.jpg similarity index 100% rename from src/calibre/manual/images/sg_tb.jpg rename to manual/images/sg_tb.jpg diff --git a/src/calibre/manual/images/sg_tree.jpg b/manual/images/sg_tree.jpg similarity index 100% rename from src/calibre/manual/images/sg_tree.jpg rename to manual/images/sg_tree.jpg diff --git a/src/calibre/manual/images/show_tag_editor.png b/manual/images/show_tag_editor.png similarity index 100% rename from src/calibre/manual/images/show_tag_editor.png rename to manual/images/show_tag_editor.png diff --git a/src/calibre/manual/images/swap_title_author.png b/manual/images/swap_title_author.png similarity index 100% rename from src/calibre/manual/images/swap_title_author.png rename to manual/images/swap_title_author.png diff --git a/src/calibre/manual/images/tag_browser.png b/manual/images/tag_browser.png similarity index 100% rename from src/calibre/manual/images/tag_browser.png rename to manual/images/tag_browser.png diff --git a/src/calibre/manual/images/toc.png b/manual/images/toc.png similarity index 100% rename from src/calibre/manual/images/toc.png rename to manual/images/toc.png diff --git a/src/calibre/manual/images/valid.png b/manual/images/valid.png similarity index 100% rename from src/calibre/manual/images/valid.png rename to manual/images/valid.png diff --git a/src/calibre/manual/images/view.png b/manual/images/view.png similarity index 100% rename from src/calibre/manual/images/view.png rename to manual/images/view.png diff --git a/src/calibre/manual/index.rst b/manual/index.rst similarity index 97% rename from src/calibre/manual/index.rst rename to manual/index.rst index d0d6bfb9b5..fa89dba95f 100755 --- a/src/calibre/manual/index.rst +++ b/manual/index.rst @@ -17,7 +17,7 @@ To get started with more advanced usage, you should read about the :ref:`Graphic .. only:: online - **An ebook version of this user manual is available in** `EPUB format `_. + **An ebook version of this user manual is available in** `EPUB format `_ and `AZW3 (Kindle Fire) format `_. Sections ------------ diff --git a/src/calibre/manual/metadata.rst b/manual/metadata.rst similarity index 100% rename from src/calibre/manual/metadata.rst rename to manual/metadata.rst diff --git a/src/calibre/manual/news.rst b/manual/news.rst similarity index 100% rename from src/calibre/manual/news.rst rename to manual/news.rst diff --git a/src/calibre/manual/news_recipe.rst b/manual/news_recipe.rst similarity index 100% rename from src/calibre/manual/news_recipe.rst rename to manual/news_recipe.rst diff --git a/src/calibre/manual/plugin_examples/helloworld/__init__.py b/manual/plugin_examples/helloworld/__init__.py similarity index 100% rename from src/calibre/manual/plugin_examples/helloworld/__init__.py rename to manual/plugin_examples/helloworld/__init__.py diff --git a/src/calibre/manual/plugin_examples/interface_demo/__init__.py b/manual/plugin_examples/interface_demo/__init__.py similarity index 100% rename from src/calibre/manual/plugin_examples/interface_demo/__init__.py rename to manual/plugin_examples/interface_demo/__init__.py diff --git a/src/calibre/manual/plugin_examples/interface_demo/about.txt b/manual/plugin_examples/interface_demo/about.txt similarity index 100% rename from src/calibre/manual/plugin_examples/interface_demo/about.txt rename to manual/plugin_examples/interface_demo/about.txt diff --git a/src/calibre/manual/plugin_examples/interface_demo/config.py b/manual/plugin_examples/interface_demo/config.py similarity index 100% rename from src/calibre/manual/plugin_examples/interface_demo/config.py rename to manual/plugin_examples/interface_demo/config.py diff --git a/src/calibre/manual/plugin_examples/interface_demo/images/icon.png b/manual/plugin_examples/interface_demo/images/icon.png similarity index 100% rename from src/calibre/manual/plugin_examples/interface_demo/images/icon.png rename to manual/plugin_examples/interface_demo/images/icon.png diff --git a/src/calibre/manual/plugin_examples/interface_demo/main.py b/manual/plugin_examples/interface_demo/main.py similarity index 100% rename from src/calibre/manual/plugin_examples/interface_demo/main.py rename to manual/plugin_examples/interface_demo/main.py diff --git a/src/calibre/manual/plugin_examples/interface_demo/plugin-import-name-interface_demo.txt b/manual/plugin_examples/interface_demo/plugin-import-name-interface_demo.txt similarity index 100% rename from src/calibre/manual/plugin_examples/interface_demo/plugin-import-name-interface_demo.txt rename to manual/plugin_examples/interface_demo/plugin-import-name-interface_demo.txt diff --git a/src/calibre/manual/plugin_examples/interface_demo/ui.py b/manual/plugin_examples/interface_demo/ui.py similarity index 100% rename from src/calibre/manual/plugin_examples/interface_demo/ui.py rename to manual/plugin_examples/interface_demo/ui.py diff --git a/src/calibre/manual/plugins.rst b/manual/plugins.rst similarity index 100% rename from src/calibre/manual/plugins.rst rename to manual/plugins.rst diff --git a/src/calibre/manual/qthelp.py b/manual/qthelp.py similarity index 100% rename from src/calibre/manual/qthelp.py rename to manual/qthelp.py diff --git a/src/calibre/manual/regexp.rst b/manual/regexp.rst similarity index 100% rename from src/calibre/manual/regexp.rst rename to manual/regexp.rst diff --git a/src/calibre/manual/resources/epub_cover.jpg b/manual/resources/epub_cover.jpg similarity index 100% rename from src/calibre/manual/resources/epub_cover.jpg rename to manual/resources/epub_cover.jpg diff --git a/src/calibre/manual/resources/logo.png b/manual/resources/logo.png similarity index 100% rename from src/calibre/manual/resources/logo.png rename to manual/resources/logo.png diff --git a/src/calibre/manual/server.rst b/manual/server.rst similarity index 100% rename from src/calibre/manual/server.rst rename to manual/server.rst diff --git a/src/calibre/manual/sub_groups.rst b/manual/sub_groups.rst similarity index 100% rename from src/calibre/manual/sub_groups.rst rename to manual/sub_groups.rst diff --git a/src/calibre/manual/template_lang.rst b/manual/template_lang.rst similarity index 98% rename from src/calibre/manual/template_lang.rst rename to manual/template_lang.rst index 782673ce16..3730ab7054 100644 --- a/src/calibre/manual/template_lang.rst +++ b/manual/template_lang.rst @@ -245,7 +245,7 @@ The following functions are available in addition to those described in single-f * ``current_library_name() -- `` return the last name on the path to the current calibre library. This function can be called in template program mode using the template ``{:'current_library_name()'}``. * ``days_between(date1, date2)`` -- return the number of days between ``date1`` and ``date2``. The number is positive if ``date1`` is greater than ``date2``, otherwise negative. If either ``date1`` or ``date2`` are not dates, the function returns the empty string. * ``divide(x, y)`` -- returns x / y. Throws an exception if either x or y are not numbers. - * ``eval(string)`` -- evaluates the string as a program, passing the local variables (those ``assign`` ed to). This permits using the template processor to construct complex results from local variables. + * ``eval(string)`` -- evaluates the string as a program, passing the local variables (those ``assign`` ed to). This permits using the template processor to construct complex results from local variables. Because the `{` and `}` characters are special, you must use `[[` for the `{` character and `]]` for the '}' character; they are converted automatically. Note also that prefixes and suffixes (the `|prefix|suffix` syntax) cannot be used in the argument to this function when using template program mode. * ``field(name)`` -- returns the metadata field named by ``name``. * ``first_non_empty(value, value, ...)`` -- returns the first value that is not empty. If all values are empty, then the empty value is returned. You can have as many values as you want. * ``format_date(x, date_format)`` -- format_date(val, format_string) -- format the value, which must be a date field, using the format_string, returning a string. The formatting codes are:: @@ -306,7 +306,7 @@ The following functions are available in addition to those described in single-f * ``substr(str, start, end)`` -- returns the ``start``'th through the ``end``'th characters of ``str``. The first character in ``str`` is the zero'th character. If end is negative, then it indicates that many characters counting from the right. If end is zero, then it indicates the last character. For example, ``substr('12345', 1, 0)`` returns ``'2345'``, and ``substr('12345', 1, -1)`` returns ``'234'``. * ``subtract(x, y)`` -- returns x - y. Throws an exception if either x or y are not numbers. * ``today()`` -- return a date string for today. This value is designed for use in format_date or days_between, but can be manipulated like any other string. The date is in ISO format. - * ``template(x)`` -- evaluates x as a template. The evaluation is done in its own context, meaning that variables are not shared between the caller and the template evaluation. Because the `{` and `}` characters are special, you must use `[[` for the `{` character and `]]` for the '}' character; they are converted automatically. For example, ``template('[[title_sort]]') will evaluate the template ``{title_sort}`` and return its value. + * ``template(x)`` -- evaluates x as a template. The evaluation is done in its own context, meaning that variables are not shared between the caller and the template evaluation. Because the `{` and `}` characters are special, you must use `[[` for the `{` character and `]]` for the '}' character; they are converted automatically. For example, ``template('[[title_sort]]') will evaluate the template ``{title_sort}`` and return its value. Note also that prefixes and suffixes (the `|prefix|suffix` syntax) cannot be used in the argument to this function when using template program mode. .. _template_functions_reference: diff --git a/src/calibre/manual/template_ref_generate.py b/manual/template_ref_generate.py similarity index 97% rename from src/calibre/manual/template_ref_generate.py rename to manual/template_ref_generate.py index 24f9bba9dc..b331345572 100644 --- a/src/calibre/manual/template_ref_generate.py +++ b/manual/template_ref_generate.py @@ -55,7 +55,7 @@ The python implementation of the template functions is passed in a Metadata obje The set of standard metadata fields. -.. literalinclude:: ../ebooks/metadata/book/__init__.py +.. literalinclude:: ../src/calibre/ebooks/metadata/book/__init__.py :lines: 7- ''' diff --git a/src/calibre/manual/templates/layout.html b/manual/templates/layout.html similarity index 100% rename from src/calibre/manual/templates/layout.html rename to manual/templates/layout.html diff --git a/src/calibre/manual/templates/search.html b/manual/templates/search.html similarity index 100% rename from src/calibre/manual/templates/search.html rename to manual/templates/search.html diff --git a/src/calibre/manual/tutorials.rst b/manual/tutorials.rst similarity index 100% rename from src/calibre/manual/tutorials.rst rename to manual/tutorials.rst diff --git a/src/calibre/manual/viewer.rst b/manual/viewer.rst similarity index 100% rename from src/calibre/manual/viewer.rst rename to manual/viewer.rst diff --git a/src/calibre/manual/xpath.rst b/manual/xpath.rst similarity index 100% rename from src/calibre/manual/xpath.rst rename to manual/xpath.rst diff --git a/src/calibre/manual/xpath.xhtml b/manual/xpath.xhtml similarity index 100% rename from src/calibre/manual/xpath.xhtml rename to manual/xpath.xhtml diff --git a/recipes/24sata_rs.recipe b/recipes/24sata_rs.recipe index 0f879036ea..a51323f21f 100644 --- a/recipes/24sata_rs.recipe +++ b/recipes/24sata_rs.recipe @@ -1,6 +1,7 @@ +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai __license__ = 'GPL v3' -__copyright__ = '2009-2010, Darko Miletic ' +__copyright__ = '2009-2012, Darko Miletic ' ''' 24sata.rs @@ -21,26 +22,29 @@ class Ser24Sata(BasicNewsRecipe): encoding = 'utf-8' use_embedded_content = False language = 'sr' - publication_type = 'newspaper' - extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}' + publication_type = 'newsportal' + extra_css = """ + @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} + body{font-family: serif1, serif} + """ conversion_options = { - 'comment' : description - , 'tags' : category - , 'publisher' : publisher - , 'language' : language - , 'linearize_tables' : True + 'comment' : description + , 'tags' : category + , 'publisher': publisher + , 'language' : language } preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] - feeds = [(u'Vesti Dana', u'http://www.24sata.rs/rss.php')] - - def preprocess_html(self, soup): - return self.adeify_images(soup) + feeds = [ + (u'Vesti' , u'http://www.24sata.rs/rss/vesti.xml' ), + (u'Sport' , u'http://www.24sata.rs/rss/sport.xml' ), + (u'Šou' , u'http://www.24sata.rs/rss/sou.xml' ), + (u'Specijal', u'http://www.24sata.rs/rss/specijal.xml'), + (u'Novi Sad', u'http://www.24sata.rs/rss/ns.xml' ) + ] def print_version(self, url): - article = url.partition('#')[0] - article_id = article.partition('id=')[2] - return 'http://www.24sata.rs/_print.php?id=' + article_id - + dpart, spart, apart = url.rpartition('/') + return dpart + '/print/' + apart diff --git a/recipes/abc_py.recipe b/recipes/abc_py.recipe index 297129d269..41005c6844 100644 --- a/recipes/abc_py.recipe +++ b/recipes/abc_py.recipe @@ -1,5 +1,5 @@ __license__ = 'GPL v3' -__copyright__ = '2010, Darko Miletic ' +__copyright__ = '2010-2012, Darko Miletic ' ''' abc.com.py ''' @@ -7,7 +7,7 @@ abc.com.py from calibre.web.feeds.news import BasicNewsRecipe class ABC_py(BasicNewsRecipe): - title = 'ABC digital' + title = 'ABC Color' __author__ = 'Darko Miletic' description = 'Noticias de Paraguay y el resto del mundo' publisher = 'ABC' @@ -15,12 +15,16 @@ class ABC_py(BasicNewsRecipe): oldest_article = 2 max_articles_per_feed = 200 no_stylesheets = True - encoding = 'cp1252' + encoding = 'utf8' use_embedded_content = False language = 'es_PY' remove_empty_feeds = True + masthead_url = 'http://www.abc.com.py/plantillas/img/abc-logo.png' publication_type = 'newspaper' - extra_css = ' body{font-family: Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} ' + extra_css = """ + body{font-family: UnitSlabProMedium,"Times New Roman",serif } + img{margin-bottom: 0.4em; display: block;} + """ conversion_options = { 'comment' : description @@ -29,21 +33,19 @@ class ABC_py(BasicNewsRecipe): , 'language' : language } - remove_tags = [dict(name=['form','iframe','embed','object','link','base','table']),dict(attrs={'class':'toolbox'})] - remove_tags_after = dict(attrs={'class':'date'}) - keep_only_tags = [dict(attrs={'class':'zcontent'})] + remove_tags = [ + dict(name=['form','iframe','embed','object','link','base','table']), + dict(attrs={'class':['es-carousel-wrapper']}), + dict(attrs={'id':['tools','article-banner-1']}) + ] + keep_only_tags = [dict(attrs={'id':'article'})] feeds = [ - (u'Ultimo momento' , u'http://www.abc.com.py/ultimo-momento.xml' ) - ,(u'Nacionales' , u'http://www.abc.com.py/nacionales.xml' ) - ,(u'Internacionales' , u'http://www.abc.com.py/internacionales.xml' ) - ,(u'Deportes' , u'http://www.abc.com.py/deportes.xml' ) - ,(u'Espectaculos' , u'http://www.abc.com.py/espectaculos.xml' ) - ,(u'Ciencia y Tecnologia', u'http://www.abc.com.py/ciencia-y-tecnologia.xml') + (u'Ultimo momento', u'http://www.abc.com.py/rss.xml' ) + ,(u'Nacionales' , u'http://www.abc.com.py/nacionales/rss.xml' ) + ,(u'Mundo' , u'http://www.abc.com.py/internacionales/rss.xml') + ,(u'Deportes' , u'http://www.abc.com.py/deportes/rss.xml' ) + ,(u'Espectaculos' , u'http://www.abc.com.py/espectaculos/rss.xml' ) + ,(u'TecnoCiencia' , u'http://www.abc.com.py/ciencia/rss.xml' ) ] - - def preprocess_html(self, soup): - for item in soup.findAll(style=True): - del item['style'] - return soup diff --git a/recipes/ads_of_the_world.recipe b/recipes/ads_of_the_world.recipe new file mode 100644 index 0000000000..11224f2382 --- /dev/null +++ b/recipes/ads_of_the_world.recipe @@ -0,0 +1,26 @@ +from calibre.web.feeds.news import BasicNewsRecipe + +class AdvancedUserRecipe1336986047(BasicNewsRecipe): + title = u'Ads of the World' + oldest_article = 7 + max_articles_per_feed = 100 + auto_cleanup = False + description = 'The best international advertising campaigns' + language = 'en' + __author__ = 'faber1971' + + no_stylesheets = True + keep_only_tags = [ + dict(name='div', attrs={'id':'primary'}) + ] + + remove_tags = [ + dict(name='ul', attrs={'class':'links inline'}) + ,dict(name='div', attrs={'class':'form-item'}) + ,dict(name='div', attrs={'id':['options', 'comments']}) + ,dict(name='ul', attrs={'id':'nodePager'}) + ] + + reverse_article_order = True + masthead_url = 'http://bigcatgroup.co.uk/files/2011/01/05-ads-of-the-world.png' + feeds = [(u'Ads of the world', u'http://feeds.feedburner.com/adsoftheworld-latest')] diff --git a/recipes/air_force_times.recipe b/recipes/air_force_times.recipe new file mode 100644 index 0000000000..e4f223bf4b --- /dev/null +++ b/recipes/air_force_times.recipe @@ -0,0 +1,43 @@ +from calibre.web.feeds.news import BasicNewsRecipe + +class AirForceTimes(BasicNewsRecipe): + title = 'Air Force Times' + __author__ = 'jde' + __date__ = '16 May 2012' + __version__ = '1.0' + description = 'News of the U.S. Air Force' + language = 'en' + publisher = 'AirForceTimes.com' + category = 'news, U.S. Air Force' + tags = 'news, U.S. Air Force' + cover_url = 'http://www.airforcetimes.com/images/logo_airforcetimes_alert.jpg' + masthead_url = 'http://www.airforcetimes.com/images/logo_airforcetimes_alert.jpg' + oldest_article = 7 #days + max_articles_per_feed = 25 + publication_type = 'newspaper' + no_stylesheets = True + use_embedded_content = False + encoding = None + recursions = 0 + needs_subscription = False + remove_javascript = True + remove_empty_feeds = True + auto_cleanup = True + + + + feeds = [ + + ('News', 'http://www.airforcetimes.com/rss_news.php'), + ('Benefits', 'http://www.airforcetimes.com/rss_benefits.php'), + ('Money', 'http://www.airforcetimes.com/rss_money.php'), + ('Careers & Education', 'http://www.airforcetimes.com/rss_careers.php'), + ('Community', 'http://www.airforcetimes.com/rss_community.php'), + ('Off Duty', 'http://www.airforcetimes.com/rss_off_duty.php'), + ('Entertainment', 'http://www.airforcetimes.com/rss_entertainment.php'), + ('Guard & Reserve', 'http://www.airforcetimes.com/rss_guard.php'), + ] + + + + diff --git a/recipes/akter.recipe b/recipes/akter.recipe index 0f2fb05640..83625c240b 100644 --- a/recipes/akter.recipe +++ b/recipes/akter.recipe @@ -1,5 +1,5 @@ __license__ = 'GPL v3' -__copyright__ = '2010, Darko Miletic ' +__copyright__ = '2010-2012, Darko Miletic ' ''' akter.co.rs ''' @@ -8,7 +8,7 @@ import re from calibre.web.feeds.news import BasicNewsRecipe class Akter(BasicNewsRecipe): - title = 'AKTER' + title = 'AKTER - Nedeljnik' __author__ = 'Darko Miletic' description = 'AKTER - nedeljni politicki magazin savremene Srbije' publisher = 'Akter Media Group d.o.o.' @@ -18,61 +18,37 @@ class Akter(BasicNewsRecipe): no_stylesheets = True use_embedded_content = False encoding = 'utf-8' - masthead_url = 'http://www.akter.co.rs/templates/gk_thenews2/images/style2/logo.png' + masthead_url = 'http://www.akter.co.rs/gfx/logoneover.png' language = 'sr' publication_type = 'magazine' remove_empty_feeds = True - PREFIX = 'http://www.akter.co.rs' extra_css = """ @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} - .article_description,body{font-family: Arial,Helvetica,sans1,sans-serif} - .color-2{display:block; margin-bottom: 10px; padding: 5px, 10px; - border-left: 1px solid #D00000; color: #D00000} - img{margin-bottom: 0.8em} """ + body{font-family: Tahoma,Geneva,sans1,sans-serif} + img{margin-bottom: 0.8em; display: block;} + """ conversion_options = { - 'comment' : description - , 'tags' : category - , 'publisher' : publisher - , 'language' : language - , 'linearize_tables' : True + 'comment' : description + , 'tags' : category + , 'publisher': publisher + , 'language' : language } preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] - - feeds = [ - (u'Politika' , u'http://www.akter.co.rs/index.php/politikaprint.html' ) - ,(u'Ekonomija' , u'http://www.akter.co.rs/index.php/ekonomijaprint.html') - ,(u'Life&Style' , u'http://www.akter.co.rs/index.php/lsprint.html' ) - ,(u'Sport' , u'http://www.akter.co.rs/index.php/sportprint.html' ) - ] - - def preprocess_html(self, soup): - for item in soup.findAll(style=True): - del item['style'] - return self.adeify_images(soup) + keep_only_tags = [dict(name='div', attrs={'id':'section_to_print'})] + feeds = [(u'Nedeljnik', u'http://akter.co.rs/rss/nedeljnik')] def print_version(self, url): - return url + '?tmpl=component&print=1&page=' - - def parse_index(self): - totalfeeds = [] - lfeeds = self.get_feeds() - for feedobj in lfeeds: - feedtitle, feedurl = feedobj - self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl)) - articles = [] - soup = self.index_to_soup(feedurl) - for item in soup.findAll(attrs={'class':['sectiontableentry1','sectiontableentry2']}): - link = item.find('a') - url = self.PREFIX + link['href'] - title = self.tag_to_string(link) - articles.append({ - 'title' :title - ,'date' :'' - ,'url' :url - ,'description':'' - }) - totalfeeds.append((feedtitle, articles)) - return totalfeeds + dpart, spart, apart = url.rpartition('/') + return dpart + '/print-' + apart + def get_cover_url(self): + soup = self.index_to_soup('http://www.akter.co.rs/weekly.html') + divt = soup.find('div', attrs={'class':'lastissue'}) + if divt: + imgt = divt.find('img') + if imgt: + return 'http://www.akter.co.rs' + imgt['src'] + return None + diff --git a/recipes/akter_dnevnik.recipe b/recipes/akter_dnevnik.recipe new file mode 100644 index 0000000000..7322baf4ec --- /dev/null +++ b/recipes/akter_dnevnik.recipe @@ -0,0 +1,44 @@ +__license__ = 'GPL v3' +__copyright__ = '2012, Darko Miletic ' +''' +akter.co.rs +''' + +import re +from calibre.web.feeds.news import BasicNewsRecipe + +class Akter(BasicNewsRecipe): + title = 'AKTER - Dnevnik' + __author__ = 'Darko Miletic' + description = 'AKTER - Najnovije vesti iz Srbije' + publisher = 'Akter Media Group d.o.o.' + category = 'vesti, online vesti, najnovije vesti, politika, sport, ekonomija, biznis, finansije, berza, kultura, zivot, putovanja, auto, automobili, tehnologija, politicki magazin, dogadjaji, desavanja, lifestyle, zdravlje, zdravstvo, vest, novine, nedeljnik, srbija, novi sad, vojvodina, svet, drustvo, zabava, republika srpska, beograd, intervju, komentar, reportaza, arhiva vesti, news, serbia, politics' + oldest_article = 8 + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + encoding = 'utf-8' + masthead_url = 'http://www.akter.co.rs/gfx/logodnover.png' + language = 'sr' + publication_type = 'magazine' + remove_empty_feeds = True + extra_css = """ + @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} + body{font-family: Tahoma,Geneva,sans1,sans-serif} + img{margin-bottom: 0.8em; display: block;} + """ + + conversion_options = { + 'comment' : description + , 'tags' : category + , 'publisher': publisher + , 'language' : language + } + + preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] + keep_only_tags = [dict(name='div', attrs={'id':'section_to_print'})] + feeds = [(u'Vesti', u'http://akter.co.rs/rss/dnevni')] + + def print_version(self, url): + dpart, spart, apart = url.rpartition('/') + return dpart + '/print-' + apart diff --git a/recipes/army_times.recipe b/recipes/army_times.recipe new file mode 100644 index 0000000000..2cb5164106 --- /dev/null +++ b/recipes/army_times.recipe @@ -0,0 +1,42 @@ +from calibre.web.feeds.news import BasicNewsRecipe +class ArmyTimes(BasicNewsRecipe): + title = 'Army Times' + __author__ = 'jde' + __date__ = '16 May 2012' + __version__ = '1.0' + description = 'News of the U.S. Army' + language = 'en' + publisher = 'ArmyTimes.com' + category = 'news, U.S. Army' + tags = 'news, U.S. Army' + cover_url = 'http://www.armytimes.com/images/logo_armytimes_alert.jpg' + masthead_url = 'http://www.armytimes.com/images/logo_armytimes_alert.jpg' + oldest_article = 7 #days + max_articles_per_feed = 25 + publication_type = 'newspaper' + no_stylesheets = True + use_embedded_content = False + encoding = None + recursions = 0 + needs_subscription = False + remove_javascript = True + remove_empty_feeds = True + auto_cleanup = True + + + + feeds = [ + +('News', 'http://www.armytimes.com/rss_news.php'), +('Benefits', 'http://www.armytimes.com/rss_benefits.php'), +('Money', 'http://www.armytimes.com/rss_money.php'), +('Careers & Education', 'http://www.armytimes.com/rss_careers.php'), +('Community', 'http://www.armytimes.com/rss_community.php'), +('Off Duty', 'http://www.armytimes.com/rss_off_duty.php'), +('Entertainment', 'http://www.armytimes.com/rss_entertainment.php'), +('Guard & Reserve', 'http://www.armytimes.com/rss_guard.php'), + + ] + + + diff --git a/recipes/ars_technica.recipe b/recipes/ars_technica.recipe index 3a955d5e15..cef96915e6 100644 --- a/recipes/ars_technica.recipe +++ b/recipes/ars_technica.recipe @@ -1,33 +1,34 @@ __license__ = 'GPL v3' -__copyright__ = '2008-2011, Darko Miletic ' +__copyright__ = '2008-2012, Darko Miletic ' ''' arstechnica.com ''' -import re from calibre.web.feeds.news import BasicNewsRecipe -from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag +from calibre.ebooks.BeautifulSoup import BeautifulSoup class ArsTechnica(BasicNewsRecipe): title = u'Ars Technica' language = 'en' __author__ = 'Darko Miletic, Sujata Raman, Alexis Rohou' - description = 'The art of technology' - publisher = 'Ars Technica' + description = 'Ars Technica: Serving the technologist for 1.2 decades' + publisher = 'Conde Nast Publications' category = 'news, IT, technology' oldest_article = 5 max_articles_per_feed = 100 no_stylesheets = True encoding = 'utf-8' use_embedded_content = False - extra_css = ''' - body {font-family: Arial,Helvetica,sans-serif} - .title{text-align: left} - .byline{font-weight: bold; line-height: 1em; font-size: 0.625em; text-decoration: none} - .news-item-figure-caption-text{font-size:small; font-style:italic} - .news-item-figure-caption-byline{font-size:small; font-style:italic; font-weight:bold} - ''' - ignoreEtcArticles = True # Etc feed items can be ignored, as they're not real stories + remove_empty_feeds = True + publication_type = 'newsportal' + extra_css = ''' + body {font-family: Arial,sans-serif} + .heading{font-family: "Times New Roman",serif} + .byline{font-weight: bold; line-height: 1em; font-size: 0.625em; text-decoration: none} + img{display: block} + .caption-text{font-size:small; font-style:italic} + .caption-byline{font-size:small; font-style:italic; font-weight:bold} + ''' conversion_options = { 'comments' : description @@ -36,93 +37,64 @@ class ArsTechnica(BasicNewsRecipe): ,'publisher' : publisher } - - #preprocess_regexps = [ - # (re.compile(r'
.*?', re.DOTALL|re.IGNORECASE),lambda match: '') - # ] - - keep_only_tags = [dict(name='div', attrs={'id':['story','etc-story']})] + keep_only_tags = [ + dict(attrs={'class':'standalone'}) + ,dict(attrs={'id':'article-guts'}) + ] remove_tags = [ - dict(name=['object','link','embed']) - ,dict(name='div', attrs={'class':'read-more-link'}) + dict(name=['object','link','embed','iframe','meta']) + ,dict(attrs={'class':'corner-info'}) ] - #remove_attributes=['width','height'] + remove_attributes = ['lang'] + feeds = [ (u'Infinite Loop (Apple content)' , u'http://feeds.arstechnica.com/arstechnica/apple/' ) ,(u'Opposable Thumbs (Gaming content)' , u'http://feeds.arstechnica.com/arstechnica/gaming/' ) ,(u'Gear and Gadgets' , u'http://feeds.arstechnica.com/arstechnica/gadgets/' ) - ,(u'Chipster (Hardware content)' , u'http://feeds.arstechnica.com/arstechnica/hardware/' ) ,(u'Uptime (IT content)' , u'http://feeds.arstechnica.com/arstechnica/business/' ) ,(u'Open Ended (Open Source content)' , u'http://feeds.arstechnica.com/arstechnica/open-source/') ,(u'One Microsoft Way' , u'http://feeds.arstechnica.com/arstechnica/microsoft/' ) - ,(u'Nobel Intent (Science content)' , u'http://feeds.arstechnica.com/arstechnica/science/' ) + ,(u'Scientific method (Science content)' , u'http://feeds.arstechnica.com/arstechnica/science/' ) ,(u'Law & Disorder (Tech policy content)' , u'http://feeds.arstechnica.com/arstechnica/tech-policy/') ] - # This deals with multi-page stories def append_page(self, soup, appendtag, position): - pager = soup.find('div',attrs={'class':'pager'}) + pager = soup.find(attrs={'class':'numbers'}) if pager: - for atag in pager.findAll('a',href=True): - str = self.tag_to_string(atag) - if str.startswith('Next'): - nurl = 'http://arstechnica.com' + atag['href'] - rawc = self.index_to_soup(nurl,True) - soup2 = BeautifulSoup(rawc, fromEncoding=self.encoding) - - readmoretag = soup2.find('div', attrs={'class':'read-more-link'}) - if readmoretag: - readmoretag.extract() - texttag = soup2.find('div', attrs={'class':'body'}) - for it in texttag.findAll(style=True): - del it['style'] - - newpos = len(texttag.contents) - self.append_page(soup2,texttag,newpos) - texttag.extract() - pager.extract() - appendtag.insert(position,texttag) + nexttag = pager.find(attrs={'class':'next'}) + if nexttag: + nurl = nexttag.parent['href'] + rawc = self.index_to_soup(nurl,True) + soup2 = BeautifulSoup(rawc, fromEncoding=self.encoding) + texttag = soup2.find(attrs={'id':'article-guts'}) + newpos = len(texttag.contents) + self.append_page(soup2,texttag,newpos) + texttag.extract() + pager.extract() + appendtag.insert(position,texttag) def preprocess_html(self, soup): - # Adds line breaks near the byline (not sure why this is needed) - ftag = soup.find('div', attrs={'class':'byline'}) - if ftag: - brtag = Tag(soup,'br') - brtag2 = Tag(soup,'br') - ftag.insert(4,brtag) - ftag.insert(5,brtag2) - - # Remove style items - for item in soup.findAll(style=True): - del item['style'] - - # Remove id - for item in soup.findAll(id=True): - del item['id'] - - # For some reason, links to authors don't have the domainname - a_author = soup.find('a',{'href':re.compile("^/author")}) - if a_author: - a_author['href'] = 'http://arstechnica.com'+a_author['href'] - - # within div class news-item-figure, we need to grab images - - # Deal with multi-page stories self.append_page(soup, soup.body, 3) - + for item in soup.findAll('a'): + limg = item.find('img') + if item.string is not None: + str = item.string + item.replaceWith(str) + else: + if limg: + item.name = 'div' + item.attrs = [] + else: + str = self.tag_to_string(item) + item.replaceWith(str) + for item in soup.findAll('img'): + if not item.has_key('alt'): + item['alt'] = 'image' return soup - def get_article_url(self, article): - # If the article title starts with Etc:, don't return it - if self.ignoreEtcArticles: - article_title = article.get('title',None) - if re.match('Etc: ',article_title) is not None: - return None - - # The actual article is in a guid tag - return article.get('guid', None).rpartition('?')[0] + def preprocess_raw_html(self, raw, url): + return ''+raw[raw.find(''):] diff --git a/recipes/attac_es.recipe b/recipes/attac_es.recipe new file mode 100644 index 0000000000..627750d9bf --- /dev/null +++ b/recipes/attac_es.recipe @@ -0,0 +1,21 @@ +from calibre.web.feeds.news import BasicNewsRecipe + +class AttacEspanaRecipe (BasicNewsRecipe): + __author__ = u'Marc Busqué' + __url__ = 'http://www.lamarciana.com' + __version__ = '1.0' + __license__ = 'GPL v3' + __copyright__ = u'2012, Marc Busqué ' + title = u'attac.es' + description = u'La Asociación por la Tasación de las Transacciones Financieras y por la Ayuda a los Ciudadanos (ATTAC) es un movimiento internacional altermundialista que promueve el control democrático de los mercados financieros y las instituciones encargadas de su control mediante la reflexión política y la movilización social.' + url = 'http://www.attac.es' + language = 'es' + tags = 'contrainformación, información alternativa' + oldest_article = 7 + remove_empty_feeds = True + no_stylesheets = True + cover_url = u'http://www.attac.es/wp-content/themes/attacweb/images/attaces.jpg' + + feeds = [ + (u'Attac', u'http://www.attac.es/feed'), + ] diff --git a/recipes/banat_news.recipe b/recipes/banat_news.recipe new file mode 100644 index 0000000000..4c183693a3 --- /dev/null +++ b/recipes/banat_news.recipe @@ -0,0 +1,68 @@ + +''' +www.philstar.com +''' + +import time +from calibre.web.feeds.recipes import BasicNewsRecipe + +class BanatNews(BasicNewsRecipe): + title = 'Banat News' + custom_title = "Banat News - " + time.strftime('%d %b %Y %I:%M %p') + __author__ = 'jde' + __date__ = '31 May 2012' + __version__ = '1.0' + description = 'Banat News is a daily Cebuano-language newspaper based in Cebu, Philippines - philstar.com is a Philippine news and entertainment portal for the Filipino global community. It is the online presence of the STAR Group of Publications, a leading publisher of newspapers and magazines in the Philippines.' + language = 'ceb' + publisher = 'The Philippine STAR' + category = 'news, Philippines' + tags = 'news, Philippines' + cover_url = 'http://www.philstar.com/images/logo_Banat.jpg' + masthead_url = 'http://www.philstar.com/images/logo_Banat.jpg' + oldest_article = 1.5 #days + max_articles_per_feed = 25 + simultaneous_downloads = 10 + publication_type = 'newspaper' + timefmt = ' [%a, %d %b %Y %I:%M %p]' + no_stylesheets = True + use_embedded_content = False + encoding = None + recursions = 0 + needs_subscription = False + remove_javascript = True + remove_empty_feeds = True + auto_cleanup = False + + remove_tags = [dict(name='img', attrs={'id':'Image1'}) #Logo + ,dict(name='span', attrs={'id':'ControlArticle1_LabelHeader'}) #Section (Headlines, Nation, Metro, ...) + ,dict(name='a', attrs={'id':'ControlArticle1_FormView1_hlComments'}) #Comments + ,dict(name='img', attrs={'src':'images/post-comments.jpg'}) #View Comments + ,dict(name='a', attrs={'id':'ControlArticle1_FormView1_ControlPhotoAndCaption1_hlImageCaption'}) #Zoom + ] + conversion_options = { 'title' : custom_title, + 'comments' : description, + 'tags' : tags, + 'language' : language, + 'publisher' : publisher, + 'authors' : publisher, + 'smarten_punctuation' : True + } + + feeds = [ + ('Balita' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=101' ) + ,('Opinyon' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=102' ) + ,('Kalingawan' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=104' ) + ,('Showbiz' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=62' ) + ,('Palaro' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=103' ) + ,('Imong Kapalaran' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=105' ) + ] + +# process the printer friendly version of article + def print_version(self, url): + return url.replace('/Article', '/ArticlePrinterFriendly') + +# obtain title from printer friendly version of article; avoiding add_toc_thumbnail changing title when article has image + def populate_article_metadata(self, article, soup, first): + article.title = soup.find('span', {'id': 'ControlArticle1_FormView1_ArticleHeaderLabel'}).contents[0].strip() + + diff --git a/recipes/bbc_brasil.recipe b/recipes/bbc_brasil.recipe new file mode 100644 index 0000000000..a2d83944d1 --- /dev/null +++ b/recipes/bbc_brasil.recipe @@ -0,0 +1,594 @@ +## +## Title: BBC News, Sport, and Blog Calibre Recipe +## Contact: mattst - jmstanfield@gmail.com +## +## License: GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html +## Copyright: mattst - jmstanfield@gmail.com +## +## Written: November 2011 +## Last Edited: 2011-11-19 +## + +__license__ = 'GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html' +__copyright__ = 'mattst - jmstanfield@gmail.com' + + +''' +BBC News, Sport, and Blog Calibre Recipe +''' + +# Import the regular expressions module. +import re + +# Import the BasicNewsRecipe class which this class extends. +from calibre.web.feeds.recipes import BasicNewsRecipe + +class BBCBrasilRecipe(BasicNewsRecipe): + + # + # **** IMPORTANT USERS READ ME **** + # + # First select the feeds you want then scroll down below the feeds list + # and select the values you want for the other user preferences, like + # oldest_article and such like. + # + # + # Select the BBC rss feeds which you want in your ebook. + # Selected feed have NO '#' at their start, de-selected feeds begin with a '#'. + # + # Eg. ("News Home", "http://feeds.bbci.co.uk/... - include feed. + # Eg. #("News Home", "http://feeds.bbci.co.uk/... - do not include feed. + # + # There are 68 feeds below which constitute the bulk of the available rss + # feeds on the BBC web site. These include 5 blogs by editors and + # correspondants, 16 sports feeds, 15 'sub' regional feeds (Eg. North West + # Wales, Scotland Business), and 7 Welsh language feeds. + # + # Some of the feeds are low volume (Eg. blogs), or very low volume (Eg. Click) + # so if "oldest_article = 1.5" (only articles published in the last 36 hours) + # you may get some 'empty feeds' which will not then be included in the ebook. + # + # The 15 feeds currently selected below are simply my default ones. + # + # Note: With all 68 feeds selected, oldest_article set to 2, + # max_articles_per_feed set to 100, and simultaneous_downloads set to 10, + # the ebook creation took 29 minutes on my speedy 100 mbps net connection, + # fairly high-end desktop PC running Linux (Ubuntu Lucid-Lynx). + # More realistically with 15 feeds selected, oldest_article set to 1.5, + # max_articles_per_feed set to 100, and simultaneous_downloads set to 20, + # it took 6 minutes. If that's too slow increase 'simultaneous_downloads'. + # + # Select / de-select the feeds you want in your ebook. + # + feeds = [ + (u'Primeira P\xe1gina', u'http://www.bbc.co.uk/portuguese/index.xml'), + (u'\xdaltimas Not\xedcias', u'http://www.bbc.co.uk/portuguese/ultimas_noticias/index.xml'), + (u'Internacional', u'http://www.bbc.co.uk/portuguese/topicos/internacional/index.xml'), + (u'Brasil', u'http://www.bbc.co.uk/portuguese/topicos/brasil/index.xml'), + (u'Am\xe9rica Latina', u'http://www.bbc.co.uk/portuguese/topicos/america_latina/index.xml'), + (u'Economia', u'http://www.bbc.co.uk/portuguese/topicos/economia/index.xml'), + (u'Sa\xfade', u'http://www.bbc.co.uk/portuguese/topicos/saude/index.xml'), + (u'Ci\xeancia e Tecnologia', u'http://www.bbc.co.uk/portuguese/topicos/ciencia_e_tecnologia/index.xml'), + (u'Cultura', u'http://www.bbc.co.uk/portuguese/topicos/cultura/index.xml'), + (u'V\xeddeos e Fotos', u'http://www.bbc.co.uk/portuguese/videos_e_fotos/index.xml'), + (u'Especiais', u'http://www.bbc.co.uk/portuguese/especiais/index.xml') + ] + + + # **** SELECT YOUR USER PREFERENCES **** + + # Title to use for the ebook. + # + title = 'BBC Brasil' + + # A brief description for the ebook. + # + description = u'Not\xedcias do Brasil e do mundo pela British Broadcasting Corporation' + + # The max number of articles which may be downloaded from each feed. + # I've never seen more than about 70 articles in a single feed in the + # BBC feeds. + # + max_articles_per_feed = 100 + + # The max age of articles which may be downloaded from each feed. This is + # specified in days - note fractions of days are allowed, Eg. 2.5 (2 and a + # half days). My default of 1.5 days is the last 36 hours, the point at + # which I've decided 'news' becomes 'old news', but be warned this is not + # so good for the blogs, technology, magazine, etc., and sports feeds. + # You may wish to extend this to 2-5 but watch out ebook creation time will + # increase as well. Setting this to 30 will get everything (AFAICT) as long + # as max_articles_per_feed remains set high (except for 'Click' which is + # v. low volume and its currently oldest article is 4th Feb 2011). + # + oldest_article = 1.5 + + # Number of simultaneous downloads. 20 is consistantly working fine on the + # BBC News feeds with no problems. Speeds things up from the defualt of 5. + # If you have a lot of feeds and/or have increased oldest_article above 2 + # then you may wish to try increasing simultaneous_downloads to 25-30, + # Or, of course, if you are in a hurry. [I've not tried beyond 20.] + # + simultaneous_downloads = 20 + + # Timeout for fetching files from the server in seconds. The default of + # 120 seconds, seems somewhat excessive. + # + timeout = 30 + + # The format string for the date shown on the ebook's first page. + # List of all values: http://docs.python.org/library/time.html + # Default in news.py has a leading space so that's mirrored here. + # As with 'feeds' select/de-select by adding/removing the initial '#', + # only one timefmt should be selected, here's a few to choose from. + # + timefmt = ' [%a, %d %b %Y]' # [Fri, 14 Nov 2011] (Calibre default) + #timefmt = ' [%a, %d %b %Y %H:%M]' # [Fri, 14 Nov 2011 18:30] + #timefmt = ' [%a, %d %b %Y %I:%M %p]' # [Fri, 14 Nov 2011 06:30 PM] + #timefmt = ' [%d %b %Y]' # [14 Nov 2011] + #timefmt = ' [%d %b %Y %H:%M]' # [14 Nov 2011 18.30] + #timefmt = ' [%Y-%m-%d]' # [2011-11-14] + #timefmt = ' [%Y-%m-%d-%H-%M]' # [2011-11-14-18-30] + + + + # + # **** IMPORTANT **** + # + # DO NOT EDIT BELOW HERE UNLESS YOU KNOW WHAT YOU ARE DOING. + # + # DO NOT EDIT BELOW HERE UNLESS YOU KNOW WHAT YOU ARE DOING. + # + # I MEAN IT, YES I DO, ABSOLUTELY, AT YOU OWN RISK. :) + # + # **** IMPORTANT **** + # + + + + # Author of this recipe. + __author__ = 'Carlos Laviola' + + language = 'pt_BR' + + # Set tags. + tags = 'news, sport, blog' + + # Set publisher and publication type. + publisher = 'BBC' + publication_type = 'newspaper' + + # Disable stylesheets from site. + no_stylesheets = True + + # Specifies an override encoding for sites that have an incorrect charset + # specified. Default of 'None' says to auto-detect. Some other BBC recipes + # use 'utf8', which works fine (so use that if necessary) but auto-detecting + # with None is working fine, so stick with that for robustness. + encoding = None + + # Sets whether a feed has full articles embedded in it. The BBC feeds do not. + use_embedded_content = False + + # Removes empty feeds - why keep them!? + remove_empty_feeds = True + + # Create a custom title which fits nicely in the Kindle title list. + # Requires "import time" above class declaration, and replacing + # title with custom_title in conversion_options (right column only). + # Example of string below: "BBC News - 14 Nov 2011" + # + # custom_title = "BBC News - " + time.strftime('%d %b %Y') + + ''' + # Conversion options for advanced users, but don't forget to comment out the + # current conversion_options below. Avoid setting 'linearize_tables' as that + # plays havoc with the 'old style' table based pages. + # + conversion_options = { 'title' : title, + 'comments' : description, + 'tags' : tags, + 'language' : language, + 'publisher' : publisher, + 'authors' : publisher, + 'smarten_punctuation' : True + } + ''' + + conversion_options = { 'smarten_punctuation' : True } + + # Specify extra CSS - overrides ALL other CSS (IE. Added last). + extra_css = 'body { font-family: verdana, helvetica, sans-serif; } \ + .introduction, .first { font-weight: bold; } \ + .cross-head { font-weight: bold; font-size: 125%; } \ + .cap, .caption { display: block; font-size: 80%; font-style: italic; } \ + .cap, .caption, .caption img, .caption span { display: block; text-align: center; margin: 5px auto; } \ + .byl, .byd, .byline img, .byline-name, .byline-title, .author-name, .author-position, \ + .correspondent-portrait img, .byline-lead-in, .name, .role, .bbc-role { display: block; \ + text-align: center; font-size: 80%; font-style: italic; margin: 1px auto; } \ + .story-date, .published, .datestamp { font-size: 80%; } \ + table { width: 100%; } \ + td img { display: block; margin: 5px auto; } \ + ul { padding-top: 10px; } \ + ol { padding-top: 10px; } \ + li { padding-top: 5px; padding-bottom: 5px; } \ + h1 { text-align: center; font-size: 175%; font-weight: bold; } \ + h2 { text-align: center; font-size: 150%; font-weight: bold; } \ + h3 { text-align: center; font-size: 125%; font-weight: bold; } \ + h4, h5, h6 { text-align: center; font-size: 100%; font-weight: bold; }' + + # Remove various tag attributes to improve the look of the ebook pages. + remove_attributes = [ 'border', 'cellspacing', 'align', 'cellpadding', 'colspan', + 'valign', 'vspace', 'hspace', 'alt', 'width', 'height' ] + + # Remove the (admittedly rarely used) line breaks, "
", which sometimes + # cause a section of the ebook to start in an unsightly fashion or, more + # frequently, a "
" will muck up the formatting of a correspondant's byline. + # "
" and "
" are far more frequently used on the table formatted + # style of pages, and really spoil the look of the ebook pages. + preprocess_regexps = [(re.compile(r'', re.IGNORECASE), lambda m: ''), + (re.compile(r'', re.IGNORECASE), lambda m: '')] + + + # Create regular expressions for tag keeping and removal to make the matches more + # robust against minor changes and errors in the HTML, Eg. double spaces, leading + # and trailing spaces, missing hyphens, and such like. + # Python regular expression ('re' class) page: http://docs.python.org/library/re.html + + # *************************************** + # Regular expressions for keep_only_tags: + # *************************************** + + # The BBC News HTML pages use variants of 'storybody' to denote the section of a HTML + # page which contains the main text of the article. Match storybody variants: 'storybody', + # 'story-body', 'story body','storybody ', etc. + storybody_reg_exp = '^.*story[_ -]*body.*$' + + # The BBC sport and 'newsbeat' (features) HTML pages use 'blq_content' to hold the title + # and published date. This is one level above the usual news pages which have the title + # and date within 'story-body'. This is annoying since 'blq_content' must also be kept, + # resulting in a lot of extra things to be removed by remove_tags. + blq_content_reg_exp = '^.*blq[_ -]*content.*$' + + # The BBC has an alternative page design structure, which I suspect is an out-of-date + # design but which is still used in some articles, Eg. 'Click' (technology), 'FastTrack' + # (travel), and in some sport pages. These alternative pages are table based (which is + # why I think they are an out-of-date design) and account for -I'm guesstimaking- less + # than 1% of all articles. They use a table class 'storycontent' to hold the article + # and like blq_content (above) have required lots of extra removal by remove_tags. + story_content_reg_exp = '^.*story[_ -]*content.*$' + + # Keep the sections of the HTML which match the list below. The HTML page created by + # Calibre will fill with those sections which are matched. Note that the + # blq_content_reg_exp must be listed before storybody_reg_exp in keep_only_tags due to + # it being the parent of storybody_reg_exp, that is to say the div class/id 'story-body' + # will be inside div class/id 'blq_content' in the HTML (if 'blq_content' is there at + # all). If they are the other way around in keep_only_tags then blq_content_reg_exp + # will end up being discarded. + keep_only_tags = [ dict(name='table', attrs={'class':re.compile(story_content_reg_exp, re.IGNORECASE)}), + dict(name='div', attrs={'class':re.compile(blq_content_reg_exp, re.IGNORECASE)}), + dict(name='div', attrs={'id':re.compile(blq_content_reg_exp, re.IGNORECASE)}), + dict(name='div', attrs={'class':re.compile(storybody_reg_exp, re.IGNORECASE)}), + dict(name='div', attrs={'id':re.compile(storybody_reg_exp, re.IGNORECASE)}) ] + + # ************************************ + # Regular expressions for remove_tags: + # ************************************ + + # Regular expression to remove share-help and variant tags. The share-help class + # is used by the site for a variety of 'sharing' type links, Eg. Facebook, delicious, + # twitter, email. Removed to avoid page clutter. + share_help_reg_exp = '^.*share[_ -]*help.*$' + + # Regular expression to remove embedded-hyper and variant tags. This class is used to + # display links to other BBC News articles on the same/similar subject. + embedded_hyper_reg_exp = '^.*embed*ed[_ -]*hyper.*$' + + # Regular expression to remove hypertabs and variant tags. This class is used to + # display a tab bar at the top of an article which allows the user to switch to + # an article (viewed on the same page) providing further info., 'in depth' analysis, + # an editorial, a correspondant's blog entry, and such like. The ability to handle + # a tab bar of this nature is currently beyond the scope of this recipe and + # possibly of Calibre itself (not sure about that - TO DO - check!). + hypertabs_reg_exp = '^.*hyper[_ -]*tabs.*$' + + # Regular expression to remove story-feature and variant tags. Eg. 'story-feature', + # 'story-feature related narrow', 'story-feature wide', 'story-feature narrow'. + # This class is used to add additional info. boxes, or small lists, outside of + # the main story. TO DO: Work out a way to incorporate these neatly. + story_feature_reg_exp = '^.*story[_ -]*feature.*$' + + # Regular expression to remove video and variant tags, Eg. 'videoInStoryB', + # 'videoInStoryC'. This class is used to embed video. + video_reg_exp = '^.*video.*$' + + # Regular expression to remove audio and variant tags, Eg. 'audioInStoryD'. + # This class is used to embed audio. + audio_reg_exp = '^.*audio.*$' + + # Regular expression to remove pictureGallery and variant tags, Eg. 'pictureGallery'. + # This class is used to embed a photo slideshow. See also 'slideshow' below. + picture_gallery_reg_exp = '^.*picture.*$' + + # Regular expression to remove slideshow and variant tags, Eg. 'dslideshow-enclosure'. + # This class is used to embed a slideshow (not necessarily photo) but both + # 'slideshow' and 'pictureGallery' are used for slideshows. + slideshow_reg_exp = '^.*slide[_ -]*show.*$' + + # Regular expression to remove social-links and variant tags. This class is used to + # display links to a BBC bloggers main page, used in various columnist's blogs + # (Eg. Nick Robinson, Robert Preston). + social_links_reg_exp = '^.*social[_ -]*links.*$' + + # Regular expression to remove quote and (multi) variant tags, Eg. 'quote', + # 'endquote', 'quote-credit', 'quote-credit-title', etc. These are usually + # removed by 'story-feature' removal (as they are usually within them), but + # not always. The quotation removed is always (AFAICT) in the article text + # as well but a 2nd copy is placed in a quote tag to draw attention to it. + # The quote class tags may or may not appear in div's. + quote_reg_exp = '^.*quote.*$' + + # Regular expression to remove hidden and variant tags, Eg. 'hidden'. + # The purpose of these is unclear, they seem to be an internal link to a + # section within the article, but the text of the link (Eg. 'Continue reading + # the main story') never seems to be displayed anyway. Removed to avoid clutter. + # The hidden class tags may or may not appear in div's. + hidden_reg_exp = '^.*hidden.*$' + + # Regular expression to remove comment and variant tags, Eg. 'comment-introduction'. + # Used on the site to display text about registered users entering comments. + comment_reg_exp = '^.*comment.*$' + + # Regular expression to remove form and variant tags, Eg. 'comment-form'. + # Used on the site to allow registered BBC users to fill in forms, typically + # for entering comments about an article. + form_reg_exp = '^.*form.*$' + + # Extra things to remove due to the addition of 'blq_content' in keep_only_tags. + + #
Used on sports pages for 'email' and 'print'. + story_actions_reg_exp = '^.*story[_ -]*actions.*$' + + #
Used on sports pages instead of 'share-help' (for + # social networking links). + bookmark_list_reg_exp = '^.*bookmark[_ -]*list.*$' + + #
+ # NOTE: Don't remove class="content-group" that is needed. + # Used on sports pages to link to 'similar stories'. + secondary_content_reg_exp = '^.*secondary[_ -]*content.*$' + + #