Sync to trunk.
@ -2,11 +2,10 @@
|
|||||||
.check-cache.pickle
|
.check-cache.pickle
|
||||||
src/calibre/plugins
|
src/calibre/plugins
|
||||||
resources/images.qrc
|
resources/images.qrc
|
||||||
resources/compiled_coffeescript.zip
|
|
||||||
src/calibre/ebooks/oeb/display/test/*.js
|
src/calibre/ebooks/oeb/display/test/*.js
|
||||||
src/calibre/manual/.build/
|
manual/.build/
|
||||||
src/calibre/manual/cli/
|
manual/cli/
|
||||||
src/calibre/manual/template_ref.rst
|
manual/template_ref.rst
|
||||||
build
|
build
|
||||||
dist
|
dist
|
||||||
docs
|
docs
|
||||||
|
11
COPYRIGHT
@ -4,11 +4,6 @@ License: GPL-3
|
|||||||
The full text of the GPL is distributed as in
|
The full text of the GPL is distributed as in
|
||||||
/usr/share/common-licenses/GPL-3 on Debian systems.
|
/usr/share/common-licenses/GPL-3 on Debian systems.
|
||||||
|
|
||||||
Files: src/calibre/ebooks/pdf/*.h,*.cpp
|
|
||||||
License: GPL-2 or later
|
|
||||||
The full text of the GPL is distributed as in
|
|
||||||
/usr/share/common-licenses/GPL-2 on Debian systems.
|
|
||||||
|
|
||||||
Files: setup/iso_639/*
|
Files: setup/iso_639/*
|
||||||
Copyright: Various
|
Copyright: Various
|
||||||
License: LGPL 2.1
|
License: LGPL 2.1
|
||||||
@ -21,6 +16,12 @@ License: BSD
|
|||||||
The full text of the BSD license is distributed as in
|
The full text of the BSD license is distributed as in
|
||||||
/usr/share/common-licenses/BSD on Debian systems.
|
/usr/share/common-licenses/BSD on Debian systems.
|
||||||
|
|
||||||
|
Files: src/qtcurve/*
|
||||||
|
Copyright: Craig Drummond, 2007 - 2010 craig.p.drummond@gmail.com
|
||||||
|
License: GPL-2
|
||||||
|
The full text of the GPL is distributed as in
|
||||||
|
/usr/share/common-licenses/GPL-2 on Debian systems.
|
||||||
|
|
||||||
Files: src/calibre/ebooks/chardet/*
|
Files: src/calibre/ebooks/chardet/*
|
||||||
Copyright: Copyright (C) 1998-2001 Netscape Communications Corporation
|
Copyright: Copyright (C) 1998-2001 Netscape Communications Corporation
|
||||||
License: LGPL-2.1+
|
License: LGPL-2.1+
|
||||||
|
248
Changelog.yaml
@ -19,6 +19,254 @@
|
|||||||
# new recipes:
|
# new recipes:
|
||||||
# - title:
|
# - title:
|
||||||
|
|
||||||
|
|
||||||
|
- version: 0.8.55
|
||||||
|
date: 2012-06-08
|
||||||
|
|
||||||
|
new features:
|
||||||
|
- title: "Add a new 'Calibre style' interface look that is more modern than the default look. You can select it via Preferences->Look & Feel->User interface style."
|
||||||
|
|
||||||
|
- title: "New, subtler look for the Tag Browser"
|
||||||
|
|
||||||
|
- title: "Driver for Trekstor Pyrus and Pantech Android Tablet"
|
||||||
|
tickets: [1008946, 1007929]
|
||||||
|
|
||||||
|
- title: "Conversion pipeline: Handle guide elements with incorrectly cased hrefs. Also handle guide elements of type coverimagestandard and thumbimagestandard."
|
||||||
|
|
||||||
|
- title: "Allow user to customize trekstor plugin to send books into sub directories."
|
||||||
|
tickets: [1007646]
|
||||||
|
|
||||||
|
- title: "EPUB Input: Add support for EPUB files that use the IDPF font obfuscation algorithm. Apparently, people have started producing these now."
|
||||||
|
tickets: [1008810]
|
||||||
|
|
||||||
|
- title: "Save single format to disk: Only show the format available in the selected books."
|
||||||
|
tickets: [1007287]
|
||||||
|
|
||||||
|
bug fixes:
|
||||||
|
- title: "MOBI Output: When using the insert metadata at start of book option, do not use a table to layout the metadata, as the Kindle Fire crashes when rendering the table."
|
||||||
|
tickets: [1002119]
|
||||||
|
|
||||||
|
- title: "Device detection: Fix a bug that could cause device detection to fail completely if devices with certain vendor/product ids are connected."
|
||||||
|
tickets: [1009718]
|
||||||
|
|
||||||
|
- title: "MOBI Output: When rasterizing svgs only compute style information when an actual svg image is present. Small speedup when converting large svg-free documents to MOBI."
|
||||||
|
|
||||||
|
- title: "SONY T1 driver: Fix support for collections of books placed on the SD card"
|
||||||
|
tickets: [986044]
|
||||||
|
|
||||||
|
- title: "Fix partitioning problems in tag browser with fields that have no name, such as identifiers and formats"
|
||||||
|
|
||||||
|
- title: "Welcome wizard: Preferentially use the kindle email address set as default when more than one such address exists."
|
||||||
|
tickets: [1007932 ]
|
||||||
|
|
||||||
|
- title: "Fix regression in 0.8.54 that broke the use of the shortcut Alt+A to select books by the same author"
|
||||||
|
|
||||||
|
improved recipes:
|
||||||
|
- Various Polish recipes
|
||||||
|
- Vice Magazine
|
||||||
|
- EL Mundo Today
|
||||||
|
- Haaretz
|
||||||
|
- Good Housekeeping
|
||||||
|
- El Pais
|
||||||
|
- Christian Science Monitor
|
||||||
|
- Marketing Magazine
|
||||||
|
- Instapaper
|
||||||
|
|
||||||
|
new recipes:
|
||||||
|
- title: Various Philippine news sources
|
||||||
|
author: jde
|
||||||
|
|
||||||
|
- title: Natemat.pl and wirtualnemedia.pl
|
||||||
|
author: fenuks
|
||||||
|
|
||||||
|
- title: Rabble.ca
|
||||||
|
author: timtoo
|
||||||
|
|
||||||
|
- version: 0.8.54
|
||||||
|
date: 2012-05-31
|
||||||
|
|
||||||
|
new features:
|
||||||
|
- title: "E-book viewer: The Table of contents panel now tracks the current position in the book. As you scroll through the book, the entry you are currently on is highlighted."
|
||||||
|
type: major
|
||||||
|
description: "To see this feature in action, open the Table of Contents panel in the viewer by clicking the button with three blue lines on it. As you page through the book, the chapter you are reading currently is highlighted in the Table of Contents Panel. Obviously, this will only work if the book you are reading has a Table of Contents. You can also use the Ctrl+PgUp and Ctrl+PgDn keys to quickly skip between chapters."
|
||||||
|
|
||||||
|
- title: "calibredb: Allow setting metadata for individual fields with the set_metadata command"
|
||||||
|
|
||||||
|
- title: "Make it a little harder to accidentally change the sorting of items in the Tag Browser. Also frees up more vertical space for the Tag Browser itself."
|
||||||
|
|
||||||
|
- title: "The calibre user manual is now available in AZW3 format as well as EPUB"
|
||||||
|
|
||||||
|
bug fixes:
|
||||||
|
- title: "Automatic titlecasing: No longer try to capitalize scottish names, as there are too many special cases."
|
||||||
|
tickets: [775825]
|
||||||
|
|
||||||
|
- title: "Never crash when reading metadata from PDF files (reading now always happens in a worker process)"
|
||||||
|
tickets: [1006452]
|
||||||
|
|
||||||
|
- title: "EPUB Input: Do no skip the valid children of an NCX node that has no text/href"
|
||||||
|
|
||||||
|
- title: "Archos driver: Detect SD card"
|
||||||
|
tickets: [1005650]
|
||||||
|
|
||||||
|
- title: "When bulk downloading metadata and the user deletes one of the books for which metadata is being downloaded, just ignore it, instead of erroring out"
|
||||||
|
|
||||||
|
- title: "When deleting books from the bottom of the booklist, ensure that the bottom book after deleting is selected"
|
||||||
|
|
||||||
|
- title: "Fix regression in 0.8.53 that broke sending APNX files to older Kindle devices"
|
||||||
|
|
||||||
|
- title: "Use correct text color for selected rows in the list of matches when downloading metadata and showing results in get books."
|
||||||
|
tickets: [1004568]
|
||||||
|
|
||||||
|
improved recipes:
|
||||||
|
- The Independent
|
||||||
|
- Welt der Physik
|
||||||
|
- China Daily
|
||||||
|
- The Grid
|
||||||
|
- Prospect Magazine
|
||||||
|
|
||||||
|
new recipes:
|
||||||
|
- title: La gazetta del Mezzogiorno
|
||||||
|
author: faber1971
|
||||||
|
|
||||||
|
- version: 0.8.53
|
||||||
|
date: 2012-05-25
|
||||||
|
|
||||||
|
new features:
|
||||||
|
- title: "Kindle Touch/4 driver: Upload cover thumbnails when sending books to device by USB to workaround Amazon bug of not displaying covers for sync-enabled books"
|
||||||
|
|
||||||
|
- title: "Support for updating metadata in FB2 files"
|
||||||
|
|
||||||
|
- title: "Set a different background color when choosing formats to not delete as opposed to choosing format to delete."
|
||||||
|
tickets: [ 1001741 ]
|
||||||
|
|
||||||
|
- title: "E-book viewer: Add an option to prevent the up and down arrow keys from scrolling past page breaks"
|
||||||
|
|
||||||
|
- title: "Get Books: Remove ebookshoppe.com at the website's request"
|
||||||
|
|
||||||
|
bug fixes:
|
||||||
|
- title: "PDF Input: Support image rotation commands in PDF files. Fixes the long standing problem of some images being flipped when converting from PDF in calibre."
|
||||||
|
|
||||||
|
- title: "Fix a regression in 0.8.51 that caused conversion to HTMLZ to not have any CSS"
|
||||||
|
|
||||||
|
- title: "Get Books: Fix website change at kobo.com causing prices not to be found"
|
||||||
|
|
||||||
|
- title: "Edit the time in the 24 hour clock when calibre's interface language is set to German."
|
||||||
|
tickets: [ 1001809 ]
|
||||||
|
|
||||||
|
- title: "MOBI Output: When generating joint KF8/MOBI6 .mobi files set the text length field in the MOBI 6 header correctly. "
|
||||||
|
tickets: [ 1003489 ]
|
||||||
|
|
||||||
|
- title: "ODT Input: More workarounds for LibreOffice 3.5's habit of inserting pointless margin:100% directives everywhere."
|
||||||
|
tickets: [ 1002702 ]
|
||||||
|
|
||||||
|
- title: "Fix regression that broke smarten punctuation when quotes were next to html tags."
|
||||||
|
tickets: [ 998900 ]
|
||||||
|
|
||||||
|
- title: "Fix published date from ozon.ru wrong in some timezones"
|
||||||
|
tickets: [ 975338 ]
|
||||||
|
|
||||||
|
- title: "Catalogs: Handle the use of custom columns with non-ascii names correctly"
|
||||||
|
tickets: [1001437]
|
||||||
|
|
||||||
|
- title: "Conversion pipeline: Remove the attempt to detect and autocorrect if text will go off the left edge of the page, as it was a rather crude heuristic. Also do not remove fake margins if the book uses negative text indents on the margined elements."
|
||||||
|
|
||||||
|
- title: "KF8 Output: Set offsets to tags in the skeleton the same way kindlegen does. Also linearize non linear ToCs to ensure section to section jumping works."
|
||||||
|
|
||||||
|
- title: "Conversion pipeline: Use correct default value of 'inherit' for font-family and font-size when normalizing the shorthand font property."
|
||||||
|
|
||||||
|
- title: "When running python scripts via calibre-debug ensure that user plugins are loaded"
|
||||||
|
|
||||||
|
improved recipes:
|
||||||
|
- Business Week Magazine
|
||||||
|
- Metro Nieuws NL
|
||||||
|
|
||||||
|
new recipes:
|
||||||
|
- title: Attac.es
|
||||||
|
author: Marc Busque
|
||||||
|
|
||||||
|
- title: Drytooling.com
|
||||||
|
author: Damian Granowski
|
||||||
|
|
||||||
|
- title: Shortlist.com
|
||||||
|
author: Dave ASbury
|
||||||
|
|
||||||
|
- title: National Geographic (es)
|
||||||
|
author: vakya
|
||||||
|
|
||||||
|
- version: 0.8.52
|
||||||
|
date: 2012-05-18
|
||||||
|
|
||||||
|
new features:
|
||||||
|
- title: "EPUB Input: When setting the cover for a book that identifies its cover image, but not the html wrapper around the cover, try to detect and remove that wrapper automatically."
|
||||||
|
tickets: [ 999959 ]
|
||||||
|
|
||||||
|
- title: "When deleting books of a specific format, show the number of books with each format available"
|
||||||
|
|
||||||
|
- title: "Linux install: No longer create MAN pages as all utilities have more comprehensive command line --help anyway"
|
||||||
|
|
||||||
|
- title: "Add a tweak Preferences->Tweaks to control the default choice of format for the Tweak Book feature"
|
||||||
|
|
||||||
|
- title: "Conversion: Allow setting negative page margins. A negative page margin means that calibre will not specify any page margin in the output document (for formats that support this)"
|
||||||
|
|
||||||
|
bug fixes:
|
||||||
|
- title: "Tweak book: Fix handling of covers when tweaking KF8 books"
|
||||||
|
|
||||||
|
- title: "KF8 Output: Handle input documents with out of sequence ToC entries. Note that currently section jumping in the KF8 output produced by calibre for such files does not work."
|
||||||
|
tickets: [1000493]
|
||||||
|
|
||||||
|
- title: "Edit metadata dialog: Fix the edit values button for custom tag-like columns showing a unneeded warning about changed values"
|
||||||
|
|
||||||
|
- title: "EPUB Output: Be a little more conservative when removing <form> tags. Only remove them if they have actual forms inside. "
|
||||||
|
tickets: [ 1000384 ]
|
||||||
|
|
||||||
|
- title: "EPUB Input: Correctly update the Cover entry in the ToC even when the entry has a fragment reference. "
|
||||||
|
tickets: [ 999973 ]
|
||||||
|
|
||||||
|
- title: "Update ImagMagick DLLs in all calibre binary builds to fix security vulnerabilities in ImageMagick"
|
||||||
|
tickets: [ 999496 ]
|
||||||
|
|
||||||
|
- title: "Advanced search dialog: Fix equals and regex matching not being applied for custom column searches."
|
||||||
|
tickets: [ 980221 ]
|
||||||
|
|
||||||
|
- title: "RTF Input: Handle old RTF files that have commands without braces."
|
||||||
|
tickets: [ 994133 ]
|
||||||
|
|
||||||
|
- title: "Get Books: Diesel, fix results not showing when only a single match is found"
|
||||||
|
|
||||||
|
- title: "Get Books: Fix DRM status indicators for Kobo and Diesel stores. Fix smashwords not returning results."
|
||||||
|
tickets: [ 993755 ]
|
||||||
|
|
||||||
|
- title: "Fix regression in 0.8.51 that broke viewing of LIT and some EPUB files"
|
||||||
|
tickets: [998248, 998216]
|
||||||
|
|
||||||
|
improved recipes:
|
||||||
|
- Clarin
|
||||||
|
- Spiegel
|
||||||
|
- Spiegel International
|
||||||
|
- Montreal Gazette
|
||||||
|
- Gosc Niedzelny
|
||||||
|
- Ars Technica
|
||||||
|
|
||||||
|
new recipes:
|
||||||
|
- title: "Army/Navy/Air force/Marine Times and News busters"
|
||||||
|
author: jde
|
||||||
|
|
||||||
|
- title: "Ads of the World, Heavy Meta (Italian) and Juve La Stampa"
|
||||||
|
author: faber1971
|
||||||
|
|
||||||
|
- title: "Revista Summa"
|
||||||
|
author: Vakya
|
||||||
|
|
||||||
|
- title: "Strategic culture"
|
||||||
|
author: Darko Miletic
|
||||||
|
|
||||||
|
- title: Stars and Stripes
|
||||||
|
author: adoucette
|
||||||
|
|
||||||
|
- title: Nackdenkseiten
|
||||||
|
author: jrda
|
||||||
|
|
||||||
|
|
||||||
- version: 0.8.51
|
- version: 0.8.51
|
||||||
date: 2012-05-11
|
date: 2012-05-11
|
||||||
|
|
||||||
|
@ -14,7 +14,7 @@
|
|||||||
import sys, os
|
import sys, os
|
||||||
|
|
||||||
# If your extensions are in another directory, add it here.
|
# If your extensions are in another directory, add it here.
|
||||||
sys.path.append(os.path.abspath('../../../'))
|
sys.path.append(os.path.abspath('../src'))
|
||||||
sys.path.append(os.path.abspath('.'))
|
sys.path.append(os.path.abspath('.'))
|
||||||
__appname__ = os.environ.get('__appname__', 'calibre')
|
__appname__ = os.environ.get('__appname__', 'calibre')
|
||||||
__version__ = os.environ.get('__version__', '0.0.0')
|
__version__ = os.environ.get('__version__', '0.0.0')
|
||||||
@ -98,7 +98,7 @@ html_favicon = 'favicon.ico'
|
|||||||
# Add any paths that contain custom static files (such as style sheets) here,
|
# Add any paths that contain custom static files (such as style sheets) here,
|
||||||
# relative to this directory. They are copied after the built-in static files,
|
# relative to this directory. They are copied after the built-in static files,
|
||||||
# so a file named "default.css" will overwrite the built-in "default.css".
|
# so a file named "default.css" will overwrite the built-in "default.css".
|
||||||
html_static_path = ['resources', '../../../icons/favicon.ico']
|
html_static_path = ['resources', '../icons/favicon.ico']
|
||||||
|
|
||||||
# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
|
# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
|
||||||
# using the given strftime format.
|
# using the given strftime format.
|
@ -669,7 +669,6 @@ Some limitations of PDF input are:
|
|||||||
* Complex, multi-column, and image based documents are not supported.
|
* Complex, multi-column, and image based documents are not supported.
|
||||||
* Extraction of vector images and tables from within the document is also not supported.
|
* Extraction of vector images and tables from within the document is also not supported.
|
||||||
* Some PDFs use special glyphs to represent ll or ff or fi, etc. Conversion of these may or may not work depending on just how they are represented internally in the PDF.
|
* Some PDFs use special glyphs to represent ll or ff or fi, etc. Conversion of these may or may not work depending on just how they are represented internally in the PDF.
|
||||||
* Some PDFs store their images upside down with a rotation instruction, |app| currently doesn't support that instruction, so the images will be rotated in the output as well.
|
|
||||||
* Links and Tables of Contents are not supported
|
* Links and Tables of Contents are not supported
|
||||||
* PDFs that use embedded non-unicode fonts to represent non-English characters will result in garbled output for those characters
|
* PDFs that use embedded non-unicode fonts to represent non-English characters will result in garbled output for those characters
|
||||||
* Some PDFs are made up of photographs of the page with OCRed text behind them. In such cases |app| uses the OCRed text, which can be very different from what you see when you view the PDF file
|
* Some PDFs are made up of photographs of the page with OCRed text behind them. In such cases |app| uses the OCRed text, which can be very different from what you see when you view the PDF file
|
@ -5,9 +5,9 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||||
import sys, os, re, textwrap
|
import sys, os, re, textwrap
|
||||||
|
|
||||||
sys.path.insert(0, os.path.abspath('../../'))
|
sys.path.insert(0, os.path.abspath('../src'))
|
||||||
sys.extensions_location = '../plugins'
|
sys.extensions_location = '../src/calibre/plugins'
|
||||||
sys.resources_location = '../../../resources'
|
sys.resources_location = '../resources'
|
||||||
|
|
||||||
from sphinx.util.console import bold
|
from sphinx.util.console import bold
|
||||||
|
|
||||||
@ -116,44 +116,42 @@ def generate_ebook_convert_help(preamble, info):
|
|||||||
from calibre.utils.logging import default_log
|
from calibre.utils.logging import default_log
|
||||||
preamble = re.sub(r'http.*\.html', ':ref:`conversion`', preamble)
|
preamble = re.sub(r'http.*\.html', ':ref:`conversion`', preamble)
|
||||||
raw = preamble + textwrap.dedent('''
|
raw = preamble + textwrap.dedent('''
|
||||||
Since the options supported by ebook-convert vary depending on both the
|
The options and default values for the options change depending on both the
|
||||||
input and the output formats, the various combinations are listed below:
|
input and output formats, so you should always check with::
|
||||||
|
|
||||||
|
ebook-convert myfile.input_format myfile.output_format -h
|
||||||
|
|
||||||
|
Below are the options that are common to all conversion, followed by the
|
||||||
|
options specific to every input and output format
|
||||||
|
|
||||||
''')
|
''')
|
||||||
toc = {}
|
|
||||||
sec_templ = textwrap.dedent('''\
|
|
||||||
.. include:: ../global.rst
|
|
||||||
|
|
||||||
{0}
|
|
||||||
================================================================
|
|
||||||
|
|
||||||
.. contents:: Contents
|
|
||||||
:depth: 1
|
|
||||||
:local:
|
|
||||||
|
|
||||||
''')
|
|
||||||
for i, ip in enumerate(input_format_plugins()):
|
|
||||||
sraw = sec_templ.format(ip.name)
|
|
||||||
toc[ip.name] = 'ebook-convert-%d'%i
|
|
||||||
for op in output_format_plugins():
|
|
||||||
title = ip.name + ' to ' + op.name
|
|
||||||
parser, plumber = create_option_parser(['ebook-convert',
|
parser, plumber = create_option_parser(['ebook-convert',
|
||||||
'dummyi.'+list(ip.file_types)[0],
|
'dummyi.mobi', 'dummyo.epub', '-h'], default_log)
|
||||||
'dummyo.'+op.file_type, '-h'], default_log)
|
|
||||||
cmd = 'ebook-convert '+list(ip.file_types)[0]+' '+op.file_type
|
|
||||||
groups = [(None, None, parser.option_list)]
|
groups = [(None, None, parser.option_list)]
|
||||||
for grp in parser.option_groups:
|
for grp in parser.option_groups:
|
||||||
groups.append((grp.title, grp.description, grp.option_list))
|
if grp.title not in {'INPUT OPTIONS', 'OUTPUT OPTIONS'}:
|
||||||
options = '\n'.join(render_options(cmd, groups, False))
|
groups.append((grp.title.title(), grp.description, grp.option_list))
|
||||||
sraw += title+'\n------------------------------------------------------\n\n'
|
options = '\n'.join(render_options('ebook-convert', groups, False))
|
||||||
sraw += options + '\n\n'
|
|
||||||
update_cli_doc(os.path.join('cli', toc[ip.name]+'.rst'), sraw, info)
|
raw += '\n\n.. contents::\n :local:'
|
||||||
|
|
||||||
|
raw += '\n\n' + options
|
||||||
|
for pl in sorted(input_format_plugins(), key=lambda x:x.name):
|
||||||
|
parser, plumber = create_option_parser(['ebook-convert',
|
||||||
|
'dummyi.'+list(pl.file_types)[0], 'dummyo.epub', '-h'], default_log)
|
||||||
|
groups = [(pl.name+ ' Options', '', g.option_list) for g in
|
||||||
|
parser.option_groups if g.title == "INPUT OPTIONS"]
|
||||||
|
prog = 'ebook-convert-'+(pl.name.lower().replace(' ', '-'))
|
||||||
|
raw += '\n\n' + '\n'.join(render_options(prog, groups, False, True))
|
||||||
|
for pl in sorted(output_format_plugins(), key=lambda x: x.name):
|
||||||
|
parser, plumber = create_option_parser(['ebook-convert', 'd.epub',
|
||||||
|
'dummyi.'+pl.file_type, '-h'], default_log)
|
||||||
|
groups = [(pl.name+ ' Options', '', g.option_list) for g in
|
||||||
|
parser.option_groups if g.title == "OUTPUT OPTIONS"]
|
||||||
|
prog = 'ebook-convert-'+(pl.name.lower().replace(' ', '-'))
|
||||||
|
raw += '\n\n' + '\n'.join(render_options(prog, groups, False, True))
|
||||||
|
|
||||||
toct = '\n\n.. toctree::\n :maxdepth: 2\n\n'
|
|
||||||
for ip in sorted(toc):
|
|
||||||
toct += ' ' + toc[ip]+'\n'
|
|
||||||
|
|
||||||
raw += toct+'\n\n'
|
|
||||||
update_cli_doc(os.path.join('cli', 'ebook-convert.rst'), raw, info)
|
update_cli_doc(os.path.join('cli', 'ebook-convert.rst'), raw, info)
|
||||||
|
|
||||||
def update_cli_doc(path, raw, info):
|
def update_cli_doc(path, raw, info):
|
@ -39,7 +39,7 @@ Tweaks
|
|||||||
Tweaks are small changes that you can specify to control various aspects of |app|'s behavior. You can change them by going to Preferences->Advanced->Tweaks.
|
Tweaks are small changes that you can specify to control various aspects of |app|'s behavior. You can change them by going to Preferences->Advanced->Tweaks.
|
||||||
The default values for the tweaks are reproduced below
|
The default values for the tweaks are reproduced below
|
||||||
|
|
||||||
.. literalinclude:: ../../../resources/default_tweaks.py
|
.. literalinclude:: ../resources/default_tweaks.py
|
||||||
|
|
||||||
|
|
||||||
Overriding icons, templates, et cetera
|
Overriding icons, templates, et cetera
|
@ -45,6 +45,16 @@ All the |app| python code is in the ``calibre`` package. This package contains t
|
|||||||
The format independent code is all in ebooks.oeb and the format dependent code is in ebooks.format_name.
|
The format independent code is all in ebooks.oeb and the format dependent code is in ebooks.format_name.
|
||||||
|
|
||||||
* Metadata reading, writing, and downloading is all in ebooks.metadata
|
* Metadata reading, writing, and downloading is all in ebooks.metadata
|
||||||
|
* Conversion happens in a pipeline, for the structure of the pipeline,
|
||||||
|
see :ref:`conversion-introduction`. The pipeline consists of an input
|
||||||
|
plugin, various transforms and an output plugin. The code constructs
|
||||||
|
and drives the pipeline is in plumber.py. The pipeline works on a
|
||||||
|
representation of an ebook that is like an unzipped epub, with
|
||||||
|
manifest, spine, toc, guide, html content, etc. The
|
||||||
|
class that manages this representation is OEBBook in oeb/base.py. The
|
||||||
|
various transformations that are applied to the book during
|
||||||
|
conversions live in `oeb/transforms/*.py`. And the input and output
|
||||||
|
plugins live in `conversion/plugins/*.py`.
|
||||||
|
|
||||||
* library - The database back-end and the content server. See library.database2 for the interface to the |app| library. library.server is the |app| Content Server.
|
* library - The database back-end and the content server. See library.database2 for the interface to the |app| library. library.server is the |app| Content Server.
|
||||||
* gui2 - The Graphical User Interface. GUI initialization happens in gui2.main and gui2.ui. The ebook-viewer is in gui2.viewer.
|
* gui2 - The Graphical User Interface. GUI initialization happens in gui2.main and gui2.ui. The ebook-viewer is in gui2.viewer.
|
@ -69,6 +69,22 @@ If you have a hand edited TOC in the input document, you can use the TOC detecti
|
|||||||
|
|
||||||
Finally, I encourage you to ditch the content TOC and only have a metadata TOC in your ebooks. Metadata TOCs will give the people reading your ebooks a much superior navigation experience (except on the Kindle, where they are essentially the same as a content TOC).
|
Finally, I encourage you to ditch the content TOC and only have a metadata TOC in your ebooks. Metadata TOCs will give the people reading your ebooks a much superior navigation experience (except on the Kindle, where they are essentially the same as a content TOC).
|
||||||
|
|
||||||
|
The covers for my MOBI files have stopped showing up in Kindle for PC/Kindle for Android/etc.
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
This is caused by a bug in the Amazon software. You can work around it by going
|
||||||
|
to Preferences->Output Options->MOBI output and setting the "Enable sharing
|
||||||
|
of book content" option. If you are reconverting a previously converted book,
|
||||||
|
you will also have to enable the option in the conversion dialog for that
|
||||||
|
individual book (as per book conversion settings are saved and take
|
||||||
|
precedence).
|
||||||
|
|
||||||
|
Note that doing this will mean that the generated MOBI will show up under
|
||||||
|
personal documents instead of Books on the Kindle Fire and Amazon whispersync
|
||||||
|
will not work, but the covers will. It's your choice which functionality is
|
||||||
|
more important to you. I encourage you to contact Amazon and ask them to fix
|
||||||
|
this bug.
|
||||||
|
|
||||||
How do I convert a collection of HTML files in a specific order?
|
How do I convert a collection of HTML files in a specific order?
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
In order to convert a collection of HTML files in a specific oder, you have to create a table of contents file. That is, another HTML file that contains links to all the other files in the desired order. Such a file looks like::
|
In order to convert a collection of HTML files in a specific oder, you have to create a table of contents file. That is, another HTML file that contains links to all the other files in the desired order. Such a file looks like::
|
Before Width: | Height: | Size: 36 KiB After Width: | Height: | Size: 36 KiB |
Before Width: | Height: | Size: 2.6 KiB After Width: | Height: | Size: 2.6 KiB |
BIN
manual/images/added_books.png
Normal file
After Width: | Height: | Size: 14 KiB |
Before Width: | Height: | Size: 1.3 KiB After Width: | Height: | Size: 1.3 KiB |
Before Width: | Height: | Size: 16 KiB After Width: | Height: | Size: 16 KiB |
Before Width: | Height: | Size: 22 KiB After Width: | Height: | Size: 22 KiB |
Before Width: | Height: | Size: 23 KiB After Width: | Height: | Size: 23 KiB |
Before Width: | Height: | Size: 21 KiB After Width: | Height: | Size: 21 KiB |
Before Width: | Height: | Size: 1.5 KiB After Width: | Height: | Size: 1.5 KiB |
Before Width: | Height: | Size: 13 KiB After Width: | Height: | Size: 13 KiB |
Before Width: | Height: | Size: 6.1 KiB After Width: | Height: | Size: 6.1 KiB |
Before Width: | Height: | Size: 662 B After Width: | Height: | Size: 662 B |
Before Width: | Height: | Size: 4.8 KiB After Width: | Height: | Size: 4.8 KiB |
Before Width: | Height: | Size: 228 KiB After Width: | Height: | Size: 228 KiB |
Before Width: | Height: | Size: 3.7 KiB After Width: | Height: | Size: 3.7 KiB |
Before Width: | Height: | Size: 44 KiB After Width: | Height: | Size: 44 KiB |
Before Width: | Height: | Size: 5.0 KiB After Width: | Height: | Size: 5.0 KiB |
Before Width: | Height: | Size: 3.1 KiB After Width: | Height: | Size: 3.1 KiB |
Before Width: | Height: | Size: 3.5 KiB After Width: | Height: | Size: 3.5 KiB |
Before Width: | Height: | Size: 3.0 KiB After Width: | Height: | Size: 3.0 KiB |
Before Width: | Height: | Size: 3.6 KiB After Width: | Height: | Size: 3.6 KiB |
Before Width: | Height: | Size: 1.7 KiB After Width: | Height: | Size: 1.7 KiB |
Before Width: | Height: | Size: 1.2 KiB After Width: | Height: | Size: 1.2 KiB |
Before Width: | Height: | Size: 1.7 KiB After Width: | Height: | Size: 1.7 KiB |
Before Width: | Height: | Size: 5.6 KiB After Width: | Height: | Size: 5.6 KiB |
Before Width: | Height: | Size: 1.6 KiB After Width: | Height: | Size: 1.6 KiB |
Before Width: | Height: | Size: 3.1 KiB After Width: | Height: | Size: 3.1 KiB |
Before Width: | Height: | Size: 22 KiB After Width: | Height: | Size: 22 KiB |
Before Width: | Height: | Size: 1.7 KiB After Width: | Height: | Size: 1.7 KiB |
Before Width: | Height: | Size: 4.4 KiB After Width: | Height: | Size: 4.4 KiB |
Before Width: | Height: | Size: 2.5 KiB After Width: | Height: | Size: 2.5 KiB |
Before Width: | Height: | Size: 48 KiB After Width: | Height: | Size: 48 KiB |
Before Width: | Height: | Size: 2.3 KiB After Width: | Height: | Size: 2.3 KiB |
Before Width: | Height: | Size: 4.9 KiB After Width: | Height: | Size: 4.9 KiB |
Before Width: | Height: | Size: 3.8 KiB After Width: | Height: | Size: 3.8 KiB |
Before Width: | Height: | Size: 30 KiB After Width: | Height: | Size: 30 KiB |
Before Width: | Height: | Size: 724 B After Width: | Height: | Size: 724 B |
Before Width: | Height: | Size: 76 KiB After Width: | Height: | Size: 76 KiB |
Before Width: | Height: | Size: 2.8 KiB After Width: | Height: | Size: 2.8 KiB |
Before Width: | Height: | Size: 27 KiB After Width: | Height: | Size: 27 KiB |
Before Width: | Height: | Size: 23 KiB After Width: | Height: | Size: 23 KiB |
Before Width: | Height: | Size: 85 KiB After Width: | Height: | Size: 85 KiB |
Before Width: | Height: | Size: 26 KiB After Width: | Height: | Size: 26 KiB |
Before Width: | Height: | Size: 18 KiB After Width: | Height: | Size: 18 KiB |
Before Width: | Height: | Size: 11 KiB After Width: | Height: | Size: 11 KiB |
Before Width: | Height: | Size: 14 KiB After Width: | Height: | Size: 14 KiB |
Before Width: | Height: | Size: 29 KiB After Width: | Height: | Size: 29 KiB |
Before Width: | Height: | Size: 1.1 KiB After Width: | Height: | Size: 1.1 KiB |
Before Width: | Height: | Size: 1.3 KiB After Width: | Height: | Size: 1.3 KiB |
Before Width: | Height: | Size: 41 KiB After Width: | Height: | Size: 41 KiB |
Before Width: | Height: | Size: 628 B After Width: | Height: | Size: 628 B |
Before Width: | Height: | Size: 2.0 KiB After Width: | Height: | Size: 2.0 KiB |
Before Width: | Height: | Size: 3.0 KiB After Width: | Height: | Size: 3.0 KiB |
@ -17,7 +17,7 @@ To get started with more advanced usage, you should read about the :ref:`Graphic
|
|||||||
|
|
||||||
.. only:: online
|
.. only:: online
|
||||||
|
|
||||||
**An ebook version of this user manual is available in** `EPUB format <calibre.epub>`_.
|
**An ebook version of this user manual is available in** `EPUB format <calibre.epub>`_ and `AZW3 (Kindle Fire) format <calibre.azw3>`_.
|
||||||
|
|
||||||
Sections
|
Sections
|
||||||
------------
|
------------
|
Before Width: | Height: | Size: 5.6 KiB After Width: | Height: | Size: 5.6 KiB |
Before Width: | Height: | Size: 144 KiB After Width: | Height: | Size: 144 KiB |
Before Width: | Height: | Size: 10 KiB After Width: | Height: | Size: 10 KiB |
@ -245,7 +245,7 @@ The following functions are available in addition to those described in single-f
|
|||||||
* ``current_library_name() -- `` return the last name on the path to the current calibre library. This function can be called in template program mode using the template ``{:'current_library_name()'}``.
|
* ``current_library_name() -- `` return the last name on the path to the current calibre library. This function can be called in template program mode using the template ``{:'current_library_name()'}``.
|
||||||
* ``days_between(date1, date2)`` -- return the number of days between ``date1`` and ``date2``. The number is positive if ``date1`` is greater than ``date2``, otherwise negative. If either ``date1`` or ``date2`` are not dates, the function returns the empty string.
|
* ``days_between(date1, date2)`` -- return the number of days between ``date1`` and ``date2``. The number is positive if ``date1`` is greater than ``date2``, otherwise negative. If either ``date1`` or ``date2`` are not dates, the function returns the empty string.
|
||||||
* ``divide(x, y)`` -- returns x / y. Throws an exception if either x or y are not numbers.
|
* ``divide(x, y)`` -- returns x / y. Throws an exception if either x or y are not numbers.
|
||||||
* ``eval(string)`` -- evaluates the string as a program, passing the local variables (those ``assign`` ed to). This permits using the template processor to construct complex results from local variables. Because the `{` and `}` characters are special, you must use `[[` for the `{` character and `]]` for the '}' character; they are converted automatically. Note also that prefixes and suffixes (the "|prefix|suffix" syntax) cannot be used in the argument to this function when using template program mode.
|
* ``eval(string)`` -- evaluates the string as a program, passing the local variables (those ``assign`` ed to). This permits using the template processor to construct complex results from local variables. Because the `{` and `}` characters are special, you must use `[[` for the `{` character and `]]` for the '}' character; they are converted automatically. Note also that prefixes and suffixes (the `|prefix|suffix` syntax) cannot be used in the argument to this function when using template program mode.
|
||||||
* ``field(name)`` -- returns the metadata field named by ``name``.
|
* ``field(name)`` -- returns the metadata field named by ``name``.
|
||||||
* ``first_non_empty(value, value, ...)`` -- returns the first value that is not empty. If all values are empty, then the empty value is returned. You can have as many values as you want.
|
* ``first_non_empty(value, value, ...)`` -- returns the first value that is not empty. If all values are empty, then the empty value is returned. You can have as many values as you want.
|
||||||
* ``format_date(x, date_format)`` -- format_date(val, format_string) -- format the value, which must be a date field, using the format_string, returning a string. The formatting codes are::
|
* ``format_date(x, date_format)`` -- format_date(val, format_string) -- format the value, which must be a date field, using the format_string, returning a string. The formatting codes are::
|
||||||
@ -306,7 +306,7 @@ The following functions are available in addition to those described in single-f
|
|||||||
* ``substr(str, start, end)`` -- returns the ``start``'th through the ``end``'th characters of ``str``. The first character in ``str`` is the zero'th character. If end is negative, then it indicates that many characters counting from the right. If end is zero, then it indicates the last character. For example, ``substr('12345', 1, 0)`` returns ``'2345'``, and ``substr('12345', 1, -1)`` returns ``'234'``.
|
* ``substr(str, start, end)`` -- returns the ``start``'th through the ``end``'th characters of ``str``. The first character in ``str`` is the zero'th character. If end is negative, then it indicates that many characters counting from the right. If end is zero, then it indicates the last character. For example, ``substr('12345', 1, 0)`` returns ``'2345'``, and ``substr('12345', 1, -1)`` returns ``'234'``.
|
||||||
* ``subtract(x, y)`` -- returns x - y. Throws an exception if either x or y are not numbers.
|
* ``subtract(x, y)`` -- returns x - y. Throws an exception if either x or y are not numbers.
|
||||||
* ``today()`` -- return a date string for today. This value is designed for use in format_date or days_between, but can be manipulated like any other string. The date is in ISO format.
|
* ``today()`` -- return a date string for today. This value is designed for use in format_date or days_between, but can be manipulated like any other string. The date is in ISO format.
|
||||||
* ``template(x)`` -- evaluates x as a template. The evaluation is done in its own context, meaning that variables are not shared between the caller and the template evaluation. Because the `{` and `}` characters are special, you must use `[[` for the `{` character and `]]` for the '}' character; they are converted automatically. For example, ``template('[[title_sort]]') will evaluate the template ``{title_sort}`` and return its value. Note also that prefixes and suffixes (the "|prefix|suffix" syntax) cannot be used in the argument to this function when using template program mode.
|
* ``template(x)`` -- evaluates x as a template. The evaluation is done in its own context, meaning that variables are not shared between the caller and the template evaluation. Because the `{` and `}` characters are special, you must use `[[` for the `{` character and `]]` for the '}' character; they are converted automatically. For example, ``template('[[title_sort]]') will evaluate the template ``{title_sort}`` and return its value. Note also that prefixes and suffixes (the `|prefix|suffix` syntax) cannot be used in the argument to this function when using template program mode.
|
||||||
|
|
||||||
.. _template_functions_reference:
|
.. _template_functions_reference:
|
||||||
|
|
@ -55,7 +55,7 @@ The python implementation of the template functions is passed in a Metadata obje
|
|||||||
|
|
||||||
The set of standard metadata fields.
|
The set of standard metadata fields.
|
||||||
|
|
||||||
.. literalinclude:: ../ebooks/metadata/book/__init__.py
|
.. literalinclude:: ../src/calibre/ebooks/metadata/book/__init__.py
|
||||||
:lines: 7-
|
:lines: 7-
|
||||||
'''
|
'''
|
||||||
|
|
26
recipes/ads_of_the_world.recipe
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class AdvancedUserRecipe1336986047(BasicNewsRecipe):
|
||||||
|
title = u'Ads of the World'
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
auto_cleanup = False
|
||||||
|
description = 'The best international advertising campaigns'
|
||||||
|
language = 'en'
|
||||||
|
__author__ = 'faber1971'
|
||||||
|
|
||||||
|
no_stylesheets = True
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(name='div', attrs={'id':'primary'})
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name='ul', attrs={'class':'links inline'})
|
||||||
|
,dict(name='div', attrs={'class':'form-item'})
|
||||||
|
,dict(name='div', attrs={'id':['options', 'comments']})
|
||||||
|
,dict(name='ul', attrs={'id':'nodePager'})
|
||||||
|
]
|
||||||
|
|
||||||
|
reverse_article_order = True
|
||||||
|
masthead_url = 'http://bigcatgroup.co.uk/files/2011/01/05-ads-of-the-world.png'
|
||||||
|
feeds = [(u'Ads of the world', u'http://feeds.feedburner.com/adsoftheworld-latest')]
|
43
recipes/air_force_times.recipe
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class AirForceTimes(BasicNewsRecipe):
|
||||||
|
title = 'Air Force Times'
|
||||||
|
__author__ = 'jde'
|
||||||
|
__date__ = '16 May 2012'
|
||||||
|
__version__ = '1.0'
|
||||||
|
description = 'News of the U.S. Air Force'
|
||||||
|
language = 'en'
|
||||||
|
publisher = 'AirForceTimes.com'
|
||||||
|
category = 'news, U.S. Air Force'
|
||||||
|
tags = 'news, U.S. Air Force'
|
||||||
|
cover_url = 'http://www.airforcetimes.com/images/logo_airforcetimes_alert.jpg'
|
||||||
|
masthead_url = 'http://www.airforcetimes.com/images/logo_airforcetimes_alert.jpg'
|
||||||
|
oldest_article = 7 #days
|
||||||
|
max_articles_per_feed = 25
|
||||||
|
publication_type = 'newspaper'
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
encoding = None
|
||||||
|
recursions = 0
|
||||||
|
needs_subscription = False
|
||||||
|
remove_javascript = True
|
||||||
|
remove_empty_feeds = True
|
||||||
|
auto_cleanup = True
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
|
||||||
|
('News', 'http://www.airforcetimes.com/rss_news.php'),
|
||||||
|
('Benefits', 'http://www.airforcetimes.com/rss_benefits.php'),
|
||||||
|
('Money', 'http://www.airforcetimes.com/rss_money.php'),
|
||||||
|
('Careers & Education', 'http://www.airforcetimes.com/rss_careers.php'),
|
||||||
|
('Community', 'http://www.airforcetimes.com/rss_community.php'),
|
||||||
|
('Off Duty', 'http://www.airforcetimes.com/rss_off_duty.php'),
|
||||||
|
('Entertainment', 'http://www.airforcetimes.com/rss_entertainment.php'),
|
||||||
|
('Guard & Reserve', 'http://www.airforcetimes.com/rss_guard.php'),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
42
recipes/army_times.recipe
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
class ArmyTimes(BasicNewsRecipe):
|
||||||
|
title = 'Army Times'
|
||||||
|
__author__ = 'jde'
|
||||||
|
__date__ = '16 May 2012'
|
||||||
|
__version__ = '1.0'
|
||||||
|
description = 'News of the U.S. Army'
|
||||||
|
language = 'en'
|
||||||
|
publisher = 'ArmyTimes.com'
|
||||||
|
category = 'news, U.S. Army'
|
||||||
|
tags = 'news, U.S. Army'
|
||||||
|
cover_url = 'http://www.armytimes.com/images/logo_armytimes_alert.jpg'
|
||||||
|
masthead_url = 'http://www.armytimes.com/images/logo_armytimes_alert.jpg'
|
||||||
|
oldest_article = 7 #days
|
||||||
|
max_articles_per_feed = 25
|
||||||
|
publication_type = 'newspaper'
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
encoding = None
|
||||||
|
recursions = 0
|
||||||
|
needs_subscription = False
|
||||||
|
remove_javascript = True
|
||||||
|
remove_empty_feeds = True
|
||||||
|
auto_cleanup = True
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
|
||||||
|
('News', 'http://www.armytimes.com/rss_news.php'),
|
||||||
|
('Benefits', 'http://www.armytimes.com/rss_benefits.php'),
|
||||||
|
('Money', 'http://www.armytimes.com/rss_money.php'),
|
||||||
|
('Careers & Education', 'http://www.armytimes.com/rss_careers.php'),
|
||||||
|
('Community', 'http://www.armytimes.com/rss_community.php'),
|
||||||
|
('Off Duty', 'http://www.armytimes.com/rss_off_duty.php'),
|
||||||
|
('Entertainment', 'http://www.armytimes.com/rss_entertainment.php'),
|
||||||
|
('Guard & Reserve', 'http://www.armytimes.com/rss_guard.php'),
|
||||||
|
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1,33 +1,34 @@
|
|||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2008-2012, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
arstechnica.com
|
arstechnica.com
|
||||||
'''
|
'''
|
||||||
|
|
||||||
import re
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||||
|
|
||||||
class ArsTechnica(BasicNewsRecipe):
|
class ArsTechnica(BasicNewsRecipe):
|
||||||
title = u'Ars Technica'
|
title = u'Ars Technica'
|
||||||
language = 'en'
|
language = 'en'
|
||||||
__author__ = 'Darko Miletic, Sujata Raman, Alexis Rohou'
|
__author__ = 'Darko Miletic, Sujata Raman, Alexis Rohou'
|
||||||
description = 'The art of technology'
|
description = 'Ars Technica: Serving the technologist for 1.2 decades'
|
||||||
publisher = 'Ars Technica'
|
publisher = 'Conde Nast Publications'
|
||||||
category = 'news, IT, technology'
|
category = 'news, IT, technology'
|
||||||
oldest_article = 5
|
oldest_article = 5
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
|
remove_empty_feeds = True
|
||||||
|
publication_type = 'newsportal'
|
||||||
extra_css = '''
|
extra_css = '''
|
||||||
body {font-family: Arial,Helvetica,sans-serif}
|
body {font-family: Arial,sans-serif}
|
||||||
.title{text-align: left}
|
.heading{font-family: "Times New Roman",serif}
|
||||||
.byline{font-weight: bold; line-height: 1em; font-size: 0.625em; text-decoration: none}
|
.byline{font-weight: bold; line-height: 1em; font-size: 0.625em; text-decoration: none}
|
||||||
.news-item-figure-caption-text{font-size:small; font-style:italic}
|
img{display: block}
|
||||||
.news-item-figure-caption-byline{font-size:small; font-style:italic; font-weight:bold}
|
.caption-text{font-size:small; font-style:italic}
|
||||||
|
.caption-byline{font-size:small; font-style:italic; font-weight:bold}
|
||||||
'''
|
'''
|
||||||
ignoreEtcArticles = True # Etc feed items can be ignored, as they're not real stories
|
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comments' : description
|
'comments' : description
|
||||||
@ -36,50 +37,38 @@ class ArsTechnica(BasicNewsRecipe):
|
|||||||
,'publisher' : publisher
|
,'publisher' : publisher
|
||||||
}
|
}
|
||||||
|
|
||||||
|
keep_only_tags = [
|
||||||
#preprocess_regexps = [
|
dict(attrs={'class':'standalone'})
|
||||||
# (re.compile(r'<div class="news-item-figure', re.DOTALL|re.IGNORECASE),lambda match: '<div class="news-item-figure"')
|
,dict(attrs={'id':'article-guts'})
|
||||||
# ,(re.compile(r'</title>.*?</head>', re.DOTALL|re.IGNORECASE),lambda match: '</title></head>')
|
]
|
||||||
# ]
|
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'id':['story','etc-story']})]
|
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name=['object','link','embed'])
|
dict(name=['object','link','embed','iframe','meta'])
|
||||||
,dict(name='div', attrs={'class':'read-more-link'})
|
,dict(attrs={'class':'corner-info'})
|
||||||
]
|
]
|
||||||
#remove_attributes=['width','height']
|
remove_attributes = ['lang']
|
||||||
|
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Infinite Loop (Apple content)' , u'http://feeds.arstechnica.com/arstechnica/apple/' )
|
(u'Infinite Loop (Apple content)' , u'http://feeds.arstechnica.com/arstechnica/apple/' )
|
||||||
,(u'Opposable Thumbs (Gaming content)' , u'http://feeds.arstechnica.com/arstechnica/gaming/' )
|
,(u'Opposable Thumbs (Gaming content)' , u'http://feeds.arstechnica.com/arstechnica/gaming/' )
|
||||||
,(u'Gear and Gadgets' , u'http://feeds.arstechnica.com/arstechnica/gadgets/' )
|
,(u'Gear and Gadgets' , u'http://feeds.arstechnica.com/arstechnica/gadgets/' )
|
||||||
,(u'Chipster (Hardware content)' , u'http://feeds.arstechnica.com/arstechnica/hardware/' )
|
|
||||||
,(u'Uptime (IT content)' , u'http://feeds.arstechnica.com/arstechnica/business/' )
|
,(u'Uptime (IT content)' , u'http://feeds.arstechnica.com/arstechnica/business/' )
|
||||||
,(u'Open Ended (Open Source content)' , u'http://feeds.arstechnica.com/arstechnica/open-source/')
|
,(u'Open Ended (Open Source content)' , u'http://feeds.arstechnica.com/arstechnica/open-source/')
|
||||||
,(u'One Microsoft Way' , u'http://feeds.arstechnica.com/arstechnica/microsoft/' )
|
,(u'One Microsoft Way' , u'http://feeds.arstechnica.com/arstechnica/microsoft/' )
|
||||||
,(u'Nobel Intent (Science content)' , u'http://feeds.arstechnica.com/arstechnica/science/' )
|
,(u'Scientific method (Science content)' , u'http://feeds.arstechnica.com/arstechnica/science/' )
|
||||||
,(u'Law & Disorder (Tech policy content)' , u'http://feeds.arstechnica.com/arstechnica/tech-policy/')
|
,(u'Law & Disorder (Tech policy content)' , u'http://feeds.arstechnica.com/arstechnica/tech-policy/')
|
||||||
]
|
]
|
||||||
|
|
||||||
# This deals with multi-page stories
|
|
||||||
def append_page(self, soup, appendtag, position):
|
def append_page(self, soup, appendtag, position):
|
||||||
pager = soup.find('div',attrs={'class':'pager'})
|
pager = soup.find(attrs={'class':'numbers'})
|
||||||
if pager:
|
if pager:
|
||||||
for atag in pager.findAll('a',href=True):
|
nexttag = pager.find(attrs={'class':'next'})
|
||||||
str = self.tag_to_string(atag)
|
if nexttag:
|
||||||
if str.startswith('Next'):
|
nurl = nexttag.parent['href']
|
||||||
nurl = 'http://arstechnica.com' + atag['href']
|
|
||||||
rawc = self.index_to_soup(nurl,True)
|
rawc = self.index_to_soup(nurl,True)
|
||||||
soup2 = BeautifulSoup(rawc, fromEncoding=self.encoding)
|
soup2 = BeautifulSoup(rawc, fromEncoding=self.encoding)
|
||||||
|
texttag = soup2.find(attrs={'id':'article-guts'})
|
||||||
readmoretag = soup2.find('div', attrs={'class':'read-more-link'})
|
|
||||||
if readmoretag:
|
|
||||||
readmoretag.extract()
|
|
||||||
texttag = soup2.find('div', attrs={'class':'body'})
|
|
||||||
for it in texttag.findAll(style=True):
|
|
||||||
del it['style']
|
|
||||||
|
|
||||||
newpos = len(texttag.contents)
|
newpos = len(texttag.contents)
|
||||||
self.append_page(soup2,texttag,newpos)
|
self.append_page(soup2,texttag,newpos)
|
||||||
texttag.extract()
|
texttag.extract()
|
||||||
@ -88,41 +77,24 @@ class ArsTechnica(BasicNewsRecipe):
|
|||||||
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
# Adds line breaks near the byline (not sure why this is needed)
|
|
||||||
ftag = soup.find('div', attrs={'class':'byline'})
|
|
||||||
if ftag:
|
|
||||||
brtag = Tag(soup,'br')
|
|
||||||
brtag2 = Tag(soup,'br')
|
|
||||||
ftag.insert(4,brtag)
|
|
||||||
ftag.insert(5,brtag2)
|
|
||||||
|
|
||||||
# Remove style items
|
|
||||||
for item in soup.findAll(style=True):
|
|
||||||
del item['style']
|
|
||||||
|
|
||||||
# Remove id
|
|
||||||
for item in soup.findAll(id=True):
|
|
||||||
del item['id']
|
|
||||||
|
|
||||||
# For some reason, links to authors don't have the domainname
|
|
||||||
a_author = soup.find('a',{'href':re.compile("^/author")})
|
|
||||||
if a_author:
|
|
||||||
a_author['href'] = 'http://arstechnica.com'+a_author['href']
|
|
||||||
|
|
||||||
# within div class news-item-figure, we need to grab images
|
|
||||||
|
|
||||||
# Deal with multi-page stories
|
|
||||||
self.append_page(soup, soup.body, 3)
|
self.append_page(soup, soup.body, 3)
|
||||||
|
for item in soup.findAll('a'):
|
||||||
|
limg = item.find('img')
|
||||||
|
if item.string is not None:
|
||||||
|
str = item.string
|
||||||
|
item.replaceWith(str)
|
||||||
|
else:
|
||||||
|
if limg:
|
||||||
|
item.name = 'div'
|
||||||
|
item.attrs = []
|
||||||
|
else:
|
||||||
|
str = self.tag_to_string(item)
|
||||||
|
item.replaceWith(str)
|
||||||
|
for item in soup.findAll('img'):
|
||||||
|
if not item.has_key('alt'):
|
||||||
|
item['alt'] = 'image'
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
def get_article_url(self, article):
|
def preprocess_raw_html(self, raw, url):
|
||||||
# If the article title starts with Etc:, don't return it
|
return '<html><head>'+raw[raw.find('</head>'):]
|
||||||
if self.ignoreEtcArticles:
|
|
||||||
article_title = article.get('title',None)
|
|
||||||
if re.match('Etc: ',article_title) is not None:
|
|
||||||
return None
|
|
||||||
|
|
||||||
# The actual article is in a guid tag
|
|
||||||
return article.get('guid', None).rpartition('?')[0]
|
|
||||||
|
|
||||||
|