updated from main branch
244
Changelog.yaml
@ -4,6 +4,250 @@
|
|||||||
# for important features/bug fixes.
|
# for important features/bug fixes.
|
||||||
# Also, each release can have new and improved recipes.
|
# Also, each release can have new and improved recipes.
|
||||||
|
|
||||||
|
- version: 0.6.40
|
||||||
|
date: 2010-02-12
|
||||||
|
|
||||||
|
new features:
|
||||||
|
- title: "Ability to perform exact match and regular expression based searches."
|
||||||
|
type: major
|
||||||
|
tickets: [4830]
|
||||||
|
description: >
|
||||||
|
"You can now perform exact match searches by prefixing your search term with an =.
|
||||||
|
So for example, tag:=fiction will match all tags named fiction, but not tags named
|
||||||
|
non-fiction. Similarly, you can use regular expression based searches by prefixing
|
||||||
|
the search term by ~."
|
||||||
|
|
||||||
|
- title: "Autodetect if a zip/rar file is actually a comic and if so, import it as CBZ/CBR"
|
||||||
|
tickets: [4753]
|
||||||
|
|
||||||
|
- title: "Add plugin to automatically extract an ebook during import if it is in a zip/rar archive"
|
||||||
|
|
||||||
|
- title: "Linux source install: Install a calibre environment module to ease the integration of calibre into other python projects"
|
||||||
|
|
||||||
|
bug fixes:
|
||||||
|
- title: "Fix regression in 0.6.39 that broke the LRF viewer"
|
||||||
|
|
||||||
|
- title: "ZIP/EPUB files: Try to detect file name encoding instead of assuming the name is encoded in UTF-8. Also correctly
|
||||||
|
encode the extracted file name in the local filesystem encoding."
|
||||||
|
|
||||||
|
- title: "HTML Input: Handle HTML fragments more gracefully"
|
||||||
|
tickets: [4854]
|
||||||
|
|
||||||
|
- title: "Zip files: Workaround invalid zip files that contain end-of-file comments but set comment size to zero"
|
||||||
|
|
||||||
|
- title: "Restore the recipe for the Wired daily feed."
|
||||||
|
tickets: [4871]
|
||||||
|
|
||||||
|
- title: "MOBI metadata: Preserve original EXTH records when not overwrriten by calibre metadata."
|
||||||
|
|
||||||
|
- title: "Catalog generation: Improved series sorting. All books not in a series are now grouped together"
|
||||||
|
|
||||||
|
- title: "Fix occassional threading related crash when using the ChooseFormatDialog"
|
||||||
|
|
||||||
|
- title: "Catalog generation: Various fixes for handling invalid data"
|
||||||
|
|
||||||
|
new recipes:
|
||||||
|
- title: Sueddeustche Zeitung
|
||||||
|
author: Darko Miletic
|
||||||
|
|
||||||
|
improved recipes:
|
||||||
|
- Pagina 12
|
||||||
|
- Variety
|
||||||
|
- Toronto Sun
|
||||||
|
- Telegraph UK
|
||||||
|
- Danas
|
||||||
|
- Dilbert
|
||||||
|
|
||||||
|
- version: 0.6.39
|
||||||
|
date: 2010-02-09
|
||||||
|
|
||||||
|
new features:
|
||||||
|
- title: "Add ability to control how author sort strings are automatically generated from author strings, via the config file tweaks.py"
|
||||||
|
|
||||||
|
- title: "Handle broken EPUB files from Project Gutenberg that have invalid OCF containers"
|
||||||
|
tickets: [4832]
|
||||||
|
|
||||||
|
bug fixes:
|
||||||
|
- title: "Fix regression in 0.6.38 that broke setting bookmarks in the viewer"
|
||||||
|
|
||||||
|
- title: "HTML Input: Ignore filenames that are encoded incorerctly."
|
||||||
|
|
||||||
|
new recipes:
|
||||||
|
|
||||||
|
- title: Radikal
|
||||||
|
author: Darko Miletic
|
||||||
|
|
||||||
|
|
||||||
|
- version: 0.6.38
|
||||||
|
date: 2010-02-09
|
||||||
|
|
||||||
|
new features:
|
||||||
|
- title: "Driver for the Irex DR 800"
|
||||||
|
|
||||||
|
- title: "Driver for the Booq e-book reader"
|
||||||
|
|
||||||
|
- title: "Allow automatic series increment algorithm to be tweaked by editing the config file tweaks.py"
|
||||||
|
|
||||||
|
- title: "Various improvements to the catlog generation. Larger thumbnails in EPUB output and better series sorting. Better handling of html markup in the comments."
|
||||||
|
|
||||||
|
- title: "MOBI Output: Make font used for generated masthead images user customizable."
|
||||||
|
|
||||||
|
bug fixes:
|
||||||
|
- title: "E-book viewer: Make bookmarking (and remebering last open position more robust). For linuxsource installs, you must have Qt 4.6"
|
||||||
|
tickets: [4812]
|
||||||
|
|
||||||
|
- title: "Fix conversion/import of HTML files with very long href links on windows"
|
||||||
|
tickets: [4783]
|
||||||
|
|
||||||
|
- title: "Don't read metadata from filenames for download news, even if the user has the read metadata from filename option set"
|
||||||
|
tickets: [4758]
|
||||||
|
|
||||||
|
- title: "Don't allow leading or trailing space in tags and series. Also normalize all internal spaces to a single space"
|
||||||
|
tickets: [4809]
|
||||||
|
|
||||||
|
- title: "E-book viewer: Toolbars remember their position"
|
||||||
|
tickets: [4811]
|
||||||
|
|
||||||
|
- title: "Fix year being repeated when editing date in main library screen on windows"
|
||||||
|
tickets: [4829]
|
||||||
|
|
||||||
|
- title: "New download: Fix downloading of images from URLs with an ampersand in them"
|
||||||
|
|
||||||
|
- title: "Linux source install: unbundle cssutils, it is now an external dependancy"
|
||||||
|
|
||||||
|
- title: "MOBI metadata: Fix regression that broke setting of titles in some MOBI files"
|
||||||
|
|
||||||
|
- title: "EPUB metadata: Extract the cover image from the html it is embededd in if possible, instead of rendering the html. Removes the white margins on covers and speeds up cover extraction"
|
||||||
|
|
||||||
|
- title: "Fix regression in PDB output"
|
||||||
|
|
||||||
|
- title: "News download: Remove <base> tags automatically"
|
||||||
|
|
||||||
|
- title: "Searching on device: Ignore unicode errors"
|
||||||
|
|
||||||
|
|
||||||
|
new recipes:
|
||||||
|
- title: Courier Press
|
||||||
|
author: Krittika Goyal
|
||||||
|
|
||||||
|
- title: zive.sk and iliterature.cz
|
||||||
|
author: Abelturd
|
||||||
|
|
||||||
|
- title: El Comerico, Digital Spy UK, Gizmodo, News Straits Times, Read It Later, TidBits
|
||||||
|
author: Darko Miletic
|
||||||
|
|
||||||
|
improved recipes:
|
||||||
|
- Jerusalem Post
|
||||||
|
- Clarin
|
||||||
|
- La Nacion
|
||||||
|
- Harvard Business Review
|
||||||
|
- People US Mashup
|
||||||
|
- The New Republic
|
||||||
|
- "Pagina 12"
|
||||||
|
- Discover Magazine
|
||||||
|
- Metro Montreal
|
||||||
|
|
||||||
|
- version: 0.6.37
|
||||||
|
date: 2010-02-01
|
||||||
|
|
||||||
|
new features:
|
||||||
|
- title: "E-book viewer: Add support for viewing SVG images"
|
||||||
|
type: major
|
||||||
|
|
||||||
|
- title: "Add category of Recently added books when generating catalog in e-book format"
|
||||||
|
|
||||||
|
- title: "OS X: Allow adding of books to calibre via drag and drop on the calibre dock icon"
|
||||||
|
|
||||||
|
- title: "Add support for masthead images when downloading news for the Kindle"
|
||||||
|
|
||||||
|
- title: "MOBI metadata: Allow setting of metadata in old PRC files without EXTH headers as well"
|
||||||
|
|
||||||
|
bug fixes:
|
||||||
|
- title: Changing the date in Dutch
|
||||||
|
tickets: [4732]
|
||||||
|
|
||||||
|
- title: "Fix regression that broke sending files to unupdated PRS 500s"
|
||||||
|
|
||||||
|
- title: "MOBI Input: Ignore width and height percentage measures for <img> tags."
|
||||||
|
tickets: [4726]
|
||||||
|
|
||||||
|
- title: "EPUB Output: Remove <img> tags that point to the internet for their images as this causes the ever delicate ADE to crash."
|
||||||
|
tickets: [4692]
|
||||||
|
|
||||||
|
- title: "Comic Input: Handle UTF-8 BOM when converting a cbc file"
|
||||||
|
tickets: [4683]
|
||||||
|
|
||||||
|
- title: "Allow rating to be cleared via the Bulk metadata edit dialog"
|
||||||
|
tickets: [4693]
|
||||||
|
|
||||||
|
- title: "Add workaround for broken linux systems with multiply encoded file names"
|
||||||
|
tickets: [4721]
|
||||||
|
|
||||||
|
- title: Fix bug preventing the the use of indices when setting save to disk templates
|
||||||
|
tickets: [4710]
|
||||||
|
|
||||||
|
- title: "Linux device mounting. Use filetype of auto to allow non vfat filesystems to be mounted"
|
||||||
|
tickets: [4707]
|
||||||
|
|
||||||
|
- title: "Catalog generation: Make sorting of numbers in title as text optional"
|
||||||
|
|
||||||
|
- title: "Fix error while sending book with non-ascii character in title/author to device on linux"
|
||||||
|
tickets: [4690]
|
||||||
|
|
||||||
|
- title: "Fix reset cover in edit meta information dialog does not actually remove cover"
|
||||||
|
tickets: [4731]
|
||||||
|
|
||||||
|
new recipes:
|
||||||
|
- title: Kamera Bild
|
||||||
|
author: Darko Miletic
|
||||||
|
|
||||||
|
- title: The Online Photographer
|
||||||
|
author: Darko Miletic
|
||||||
|
|
||||||
|
- title: The Luminous Landscape
|
||||||
|
author: Darko Miletic
|
||||||
|
|
||||||
|
- title: Slovo
|
||||||
|
author: Abelturd
|
||||||
|
|
||||||
|
- title: Various Danish newspapers
|
||||||
|
author: Darko Miletic
|
||||||
|
|
||||||
|
- title: Heraldo de Aragon
|
||||||
|
author: Lorenzo Vigentini
|
||||||
|
|
||||||
|
- title: Orange County Register
|
||||||
|
author: Lorenzi Vigentini
|
||||||
|
|
||||||
|
- title: Open Left
|
||||||
|
author: Xanthan Gum
|
||||||
|
|
||||||
|
- title: Michelle Malkin
|
||||||
|
author: Walt Anthony
|
||||||
|
|
||||||
|
- title: The Metro Montreal
|
||||||
|
author: Jerry Clapperton
|
||||||
|
|
||||||
|
- title: The Gazette
|
||||||
|
author: Jerry Clapperton
|
||||||
|
|
||||||
|
- title: Macleans Magazine
|
||||||
|
author: Nick Redding
|
||||||
|
|
||||||
|
- title: NY Time Sunday Book Review
|
||||||
|
author: Krittika Goyal
|
||||||
|
|
||||||
|
- title: Various Italian newspapers
|
||||||
|
author: Lorenzo Vigentini
|
||||||
|
|
||||||
|
|
||||||
|
improved recipes:
|
||||||
|
- The Irish Times
|
||||||
|
- Washington Post
|
||||||
|
- NIN
|
||||||
|
- The Discover Magazine
|
||||||
|
- Pagina 12
|
||||||
|
|
||||||
- version: 0.6.36
|
- version: 0.6.36
|
||||||
date: 2010-01-25
|
date: 2010-01-25
|
||||||
|
|
||||||
|
@ -27,7 +27,7 @@ p.tags {
|
|||||||
|
|
||||||
p.description {
|
p.description {
|
||||||
text-align:left;
|
text-align:left;
|
||||||
font-style:italic;
|
font-style:normal;
|
||||||
margin-top: 0em;
|
margin-top: 0em;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -55,6 +55,14 @@ p.author_index {
|
|||||||
text-indent: 0em;
|
text-indent: 0em;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
p.series {
|
||||||
|
text-align: left;
|
||||||
|
margin-top:0px;
|
||||||
|
margin-bottom:0px;
|
||||||
|
margin-left:2em;
|
||||||
|
text-indent:-2em;
|
||||||
|
}
|
||||||
|
|
||||||
p.read_book {
|
p.read_book {
|
||||||
text-align:left;
|
text-align:left;
|
||||||
margin-top:0px;
|
margin-top:0px;
|
||||||
@ -71,3 +79,9 @@ p.unread_book {
|
|||||||
text-indent:-2em;
|
text-indent:-2em;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
hr.series_divider {
|
||||||
|
width:50%;
|
||||||
|
margin-left:1em;
|
||||||
|
margin-top:0em;
|
||||||
|
margin-bottom:0em;
|
||||||
|
}
|
||||||
|
27
resources/default_tweaks.py
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
'''
|
||||||
|
Contains various tweaks that affect calibre behavior. Only edit this file if
|
||||||
|
you know what you are dong. If you delete this file, it will be recreated from
|
||||||
|
defaults.
|
||||||
|
'''
|
||||||
|
|
||||||
|
|
||||||
|
# The algorithm used to assign a new book in an existing series a series number.
|
||||||
|
# Possible values are:
|
||||||
|
# next - Next available number
|
||||||
|
# const - Assign the number 1 always
|
||||||
|
series_index_auto_increment = 'next'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# The algorithm used to copy author to author_sort
|
||||||
|
# Possible values are:
|
||||||
|
# invert: use "fn ln" -> "ln, fn" (the original algorithm)
|
||||||
|
# copy : copy author to author_sort without modification
|
||||||
|
# comma : use 'copy' if there is a ',' in the name, otherwise use 'invert'
|
||||||
|
author_sort_copy_method = 'invert'
|
157
resources/images/catalog.svg
Normal file
@ -0,0 +1,157 @@
|
|||||||
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<!-- Generator: Adobe Illustrator 14.0.0, SVG Export Plug-In . SVG Version: 6.00 Build 43363) -->
|
||||||
|
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
|
||||||
|
<svg version="1.1" id="svg2" xmlns:svg="http://www.w3.org/2000/svg" xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
|
||||||
|
xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px" width="128px" height="128px"
|
||||||
|
viewBox="0 0 128 128" enable-background="new 0 0 128 128" xml:space="preserve">
|
||||||
|
<filter id="filter5365">
|
||||||
|
<feGaussianBlur stdDeviation="1.3829225" inkscape:collect="always" id="feGaussianBlur5367"></feGaussianBlur>
|
||||||
|
</filter>
|
||||||
|
<g id="layer1">
|
||||||
|
</g>
|
||||||
|
<g id="layer2">
|
||||||
|
<polygon id="rect3200" opacity="0.5722" fill="#0000A4" enable-background="new " points="167.5,297.005 171.429,297.005
|
||||||
|
171.429,297.005 "/>
|
||||||
|
<g id="path5265" filter="url(#filter5365)">
|
||||||
|
<polygon fill="#362D2D" points="21.951,79.904 70.397,63.09 119.953,80.636 70.397,97.084 "/>
|
||||||
|
<polygon fill="none" stroke="#362D2D" stroke-width="1.2507" stroke-linejoin="bevel" points="21.951,79.904 70.397,63.09
|
||||||
|
119.953,80.636 70.397,97.084 "/>
|
||||||
|
</g>
|
||||||
|
<g id="path5267" filter="url(#filter5365)">
|
||||||
|
<path fill="#362D2D" d="M118.639,100.902v1.724l-46.437,15.432c-3.723-9.284-1.901-16.34,0.089-20.69l46.883-15.518l-6.34,2.068
|
||||||
|
l2.322,16.553L118.639,100.902z"/>
|
||||||
|
<path fill="none" stroke="#362D2D" stroke-width="1.9" d="M118.639,100.902v1.724l-46.437,15.432
|
||||||
|
c-3.723-9.284-1.901-16.34,0.089-20.69l46.883-15.518l-6.34,2.068l2.322,16.553L118.639,100.902z"/>
|
||||||
|
</g>
|
||||||
|
<g id="path5269" filter="url(#filter5365)">
|
||||||
|
<path fill="#362D2D" d="M70.711,98.81l47.581-15.743l0.29,18.582l-47.56,15.986c0,0-1.515-3.455-1.942-9.812
|
||||||
|
C68.936,101.726,70.711,98.81,70.711,98.81z"/>
|
||||||
|
<path fill="none" stroke="#362D2D" stroke-width="2.1" d="M70.711,98.81l47.581-15.743l0.29,18.582l-47.56,15.986
|
||||||
|
c0,0-1.515-3.455-1.942-9.812C68.936,101.726,70.711,98.81,70.711,98.81z"/>
|
||||||
|
</g>
|
||||||
|
<g id="path5271" filter="url(#filter5365)">
|
||||||
|
<path fill="#362D2D" d="M21.479,79.607l49.115,17.501c-3.287,7.816-2.385,15.202,0.982,23.019l-50.008-16.208
|
||||||
|
C17.974,94.288,17.113,87.874,21.479,79.607z"/>
|
||||||
|
<path fill="none" stroke="#362D2D" stroke-width="1.6" d="M21.479,79.607l49.115,17.501c-3.287,7.816-2.385,15.202,0.982,23.019
|
||||||
|
l-50.008-16.208C17.974,94.288,17.113,87.874,21.479,79.607z"/>
|
||||||
|
</g>
|
||||||
|
<g id="path5273" filter="url(#filter5365)">
|
||||||
|
<path fill="#362D2D" d="M120.871,99.092v4.827l-50.008,16.897l-49.651-15.863c-4.763-11.162-1.987-18.682,0.714-25.346
|
||||||
|
l49.651-16.724l48.579,17.242v3.449l-2.143,1.033l0.357,14.139L120.871,99.092z"/>
|
||||||
|
<path fill="none" stroke="#362D2D" stroke-width="2.7" stroke-linejoin="bevel" d="M120.871,99.092v4.827l-50.008,16.897
|
||||||
|
l-49.651-15.863c-4.763-11.162-1.987-18.682,0.714-25.346l49.651-16.724l48.579,17.242v3.449l-2.143,1.033l0.357,14.139
|
||||||
|
L120.871,99.092z"/>
|
||||||
|
</g>
|
||||||
|
<path id="path5385" fill="#78CE4F" d="M19.316,78.05l48.438-17.414l49.548,18.171L67.754,95.842L19.316,78.05z"/>
|
||||||
|
<path id="path5387" fill="none" stroke="#0F973B" stroke-width="1.9" d="M115.988,99.796v1.786l-46.43,15.982
|
||||||
|
c-3.722-9.616-1.901-16.924,0.09-21.43l46.875-16.07l-6.34,2.143l2.322,17.143L115.988,99.796z"/>
|
||||||
|
|
||||||
|
<radialGradient id="path5389_1_" cx="498.3457" cy="267.1621" r="27.1927" gradientTransform="matrix(-0.064 0.175 1.8694 0.6835 -425.1342 -169.6643)" gradientUnits="userSpaceOnUse">
|
||||||
|
<stop offset="0" style="stop-color:#B5FFA6"/>
|
||||||
|
<stop offset="1" style="stop-color:#76E976"/>
|
||||||
|
</radialGradient>
|
||||||
|
<path id="path5389" fill="url(#path5389_1_)" stroke="#003131" stroke-width="1.6" stroke-opacity="0.9608" d="M18.845,77.742
|
||||||
|
l49.107,18.125c-3.287,8.096-2.385,15.744,0.981,23.84l-50-16.786C15.339,92.946,14.479,86.304,18.845,77.742z"/>
|
||||||
|
<path id="path5391" fill="none" stroke="#003131" stroke-width="2.7" stroke-linejoin="bevel" stroke-opacity="0.9608" d="
|
||||||
|
M118.22,97.921v5l-50,17.5l-49.643-16.429c-4.762-11.561-1.987-19.348,0.714-26.25l49.642-17.321l48.572,17.857v3.571l-2.143,1.071
|
||||||
|
l0.356,14.644L118.22,97.921z"/>
|
||||||
|
<path id="path5393" fill="#FFFFFF" d="M68.068,97.629l47.572-16.305l0.29,19.245l-47.194,16.423c0,0-1.424-2.819-2.12-10.029
|
||||||
|
C66.471,100.649,68.068,97.629,68.068,97.629z"/>
|
||||||
|
<g id="path5419" filter="url(#filter5365)">
|
||||||
|
<polygon fill="#362D2D" points="8.737,52.047 57.183,35.233 106.738,52.778 57.183,69.227 "/>
|
||||||
|
<polygon fill="none" stroke="#362D2D" stroke-width="1.2507" stroke-linejoin="bevel" points="8.737,52.047 57.183,35.233
|
||||||
|
106.738,52.778 57.183,69.227 "/>
|
||||||
|
</g>
|
||||||
|
<g id="path5421" filter="url(#filter5365)">
|
||||||
|
<path fill="#362D2D" d="M105.424,73.045v1.724L58.988,90.2c-3.723-9.284-1.902-16.34,0.089-20.69l46.882-15.518l-6.341,2.069
|
||||||
|
l2.322,16.552L105.424,73.045z"/>
|
||||||
|
<path fill="none" stroke="#362D2D" stroke-width="1.9" d="M105.424,73.045v1.724L58.988,90.2
|
||||||
|
c-3.723-9.284-1.902-16.34,0.089-20.69l46.882-15.518l-6.341,2.069l2.322,16.552L105.424,73.045z"/>
|
||||||
|
</g>
|
||||||
|
<g id="path5423" filter="url(#filter5365)">
|
||||||
|
<path fill="#362D2D" d="M57.497,70.953l47.581-15.744l0.289,18.582L57.809,89.777c0,0-1.515-3.455-1.942-9.812
|
||||||
|
C55.721,73.869,57.497,70.953,57.497,70.953z"/>
|
||||||
|
<path fill="none" stroke="#362D2D" stroke-width="2.1" d="M57.497,70.953l47.581-15.744l0.289,18.582L57.809,89.777
|
||||||
|
c0,0-1.515-3.455-1.942-9.812C55.721,73.869,57.497,70.953,57.497,70.953z"/>
|
||||||
|
</g>
|
||||||
|
<g id="path5425" filter="url(#filter5365)">
|
||||||
|
<path fill="#362D2D" d="M8.265,51.751l49.116,17.501c-3.288,7.816-2.385,15.201,0.982,23.018L8.354,76.062
|
||||||
|
C4.759,66.431,3.899,60.017,8.265,51.751z"/>
|
||||||
|
<path fill="none" stroke="#362D2D" stroke-width="1.6" d="M8.265,51.751l49.116,17.501c-3.288,7.816-2.385,15.201,0.982,23.018
|
||||||
|
L8.354,76.062C4.759,66.431,3.899,60.017,8.265,51.751z"/>
|
||||||
|
</g>
|
||||||
|
<g id="path5427" filter="url(#filter5365)">
|
||||||
|
<path fill="#362D2D" d="M107.656,71.234v4.828L57.648,92.959L7.998,77.097C3.234,65.934,6.011,58.415,8.712,51.751l49.651-16.725
|
||||||
|
l48.58,17.242v3.448l-2.144,1.035l0.357,14.139L107.656,71.234z"/>
|
||||||
|
<path fill="none" stroke="#362D2D" stroke-width="2.7" stroke-linejoin="bevel" d="M107.656,71.234v4.828L57.648,92.959
|
||||||
|
L7.998,77.097C3.234,65.934,6.011,58.415,8.712,51.751l49.651-16.725l48.58,17.242v3.448l-2.144,1.035l0.357,14.139
|
||||||
|
L107.656,71.234z"/>
|
||||||
|
</g>
|
||||||
|
<path id="path5431" fill="#60BAFF" stroke="#003244" stroke-width="1.2507" stroke-linejoin="bevel" d="M6.102,50.193L54.54,32.779
|
||||||
|
l49.548,18.171L54.54,67.985L6.102,50.193z"/>
|
||||||
|
<path id="path5433" fill="none" stroke="#0056D5" stroke-width="2.8104" d="M102.768,71.76v1.803L56.35,89.701
|
||||||
|
c-3.721-9.71-1.901-17.089,0.089-21.639l46.865-16.229l-6.338,2.164l2.321,17.312L102.768,71.76z"/>
|
||||||
|
|
||||||
|
<radialGradient id="path5435_1_" cx="316.8916" cy="261.2949" r="27.1937" gradientTransform="matrix(-0.0902 0.2793 1.9257 0.6218 -445.576 -180.1955)" gradientUnits="userSpaceOnUse">
|
||||||
|
<stop offset="0" style="stop-color:#789DED"/>
|
||||||
|
<stop offset="1" style="stop-color:#2381E8"/>
|
||||||
|
</radialGradient>
|
||||||
|
<path id="path5435" fill="url(#path5435_1_)" stroke="#003244" stroke-width="1.6" d="M5.63,49.885L54.738,68.01
|
||||||
|
c-3.287,8.096-2.385,15.744,0.982,23.84l-50-16.785C2.125,65.09,1.265,58.447,5.63,49.885z"/>
|
||||||
|
<path id="path5437" fill="none" stroke="#003244" stroke-width="2.7" stroke-linejoin="bevel" d="M105.006,70.064v5l-50,17.5
|
||||||
|
L5.363,76.135c-4.762-11.561-1.987-19.348,0.714-26.25L55.72,32.564l48.571,17.857v3.572l-2.143,1.071l0.357,14.643L105.006,70.064
|
||||||
|
z"/>
|
||||||
|
<path id="path5439" fill="#FFFFFF" d="M54.854,69.772l47.573-16.306l0.29,19.245L55.522,89.135c0,0-1.425-2.819-2.121-10.028
|
||||||
|
C53.256,72.793,54.854,69.772,54.854,69.772z"/>
|
||||||
|
<g id="path5447" filter="url(#filter5365)">
|
||||||
|
<polygon fill="#362D2D" points="25.88,28.119 74.326,11.305 123.882,28.85 74.326,45.299 "/>
|
||||||
|
<polygon fill="none" stroke="#362D2D" stroke-width="1.2507" stroke-linejoin="bevel" points="25.88,28.119 74.326,11.305
|
||||||
|
123.882,28.85 74.326,45.299 "/>
|
||||||
|
</g>
|
||||||
|
<g id="path5449" filter="url(#filter5365)">
|
||||||
|
<path fill="#362D2D" d="M122.567,49.116v1.724L76.131,66.271c-3.723-9.284-1.902-16.34,0.09-20.69l46.883-15.518l-6.341,2.069
|
||||||
|
l2.321,16.552L122.567,49.116z"/>
|
||||||
|
<path fill="none" stroke="#362D2D" stroke-width="1.9" d="M122.567,49.116v1.724L76.131,66.271
|
||||||
|
c-3.723-9.284-1.902-16.34,0.09-20.69l46.883-15.518l-6.341,2.069l2.321,16.552L122.567,49.116z"/>
|
||||||
|
</g>
|
||||||
|
<g id="path5451" filter="url(#filter5365)">
|
||||||
|
<path fill="#362D2D" d="M74.641,47.024l47.58-15.744l0.289,18.582L74.951,65.849c0,0-1.514-3.455-1.941-9.812
|
||||||
|
C72.863,49.94,74.641,47.024,74.641,47.024z"/>
|
||||||
|
<path fill="none" stroke="#362D2D" stroke-width="2.1" d="M74.641,47.024l47.58-15.744l0.289,18.582L74.951,65.849
|
||||||
|
c0,0-1.514-3.455-1.941-9.812C72.863,49.94,74.641,47.024,74.641,47.024z"/>
|
||||||
|
</g>
|
||||||
|
<g id="path5453" filter="url(#filter5365)">
|
||||||
|
<path fill="#362D2D" d="M25.408,27.822l49.115,17.5c-3.287,7.816-2.385,15.202,0.982,23.018L25.498,52.133
|
||||||
|
C21.902,42.502,21.042,36.088,25.408,27.822z"/>
|
||||||
|
<path fill="none" stroke="#362D2D" stroke-width="1.6" d="M25.408,27.822l49.115,17.5c-3.287,7.816-2.385,15.202,0.982,23.018
|
||||||
|
L25.498,52.133C21.902,42.502,21.042,36.088,25.408,27.822z"/>
|
||||||
|
</g>
|
||||||
|
<g id="path5455" filter="url(#filter5365)">
|
||||||
|
<path fill="#362D2D" d="M124.8,47.306v4.828L74.791,69.03L25.14,53.168c-4.763-11.163-1.987-18.682,0.714-25.346l49.651-16.725
|
||||||
|
l48.58,17.242v3.449l-2.145,1.034l0.357,14.139L124.8,47.306z"/>
|
||||||
|
<path fill="none" stroke="#362D2D" stroke-width="2.7" stroke-linejoin="bevel" d="M124.8,47.306v4.828L74.791,69.03L25.14,53.168
|
||||||
|
c-4.763-11.163-1.987-18.682,0.714-25.346l49.651-16.725l48.58,17.242v3.449l-2.145,1.034l0.357,14.139L124.8,47.306z"/>
|
||||||
|
</g>
|
||||||
|
<path id="path5459" fill="#FF7272" d="M23.245,26.264L71.684,8.85l49.547,18.171L71.684,44.057L23.245,26.264z"/>
|
||||||
|
<path id="path5461" fill="none" stroke="#CF0505" stroke-width="1.9" d="M119.916,48.01v1.786L73.488,65.778
|
||||||
|
c-3.723-9.616-1.902-16.923,0.089-21.429l46.875-16.071l-6.339,2.143l2.32,17.143L119.916,48.01z"/>
|
||||||
|
|
||||||
|
<radialGradient id="path5463_1_" cx="14.938" cy="-466.4766" r="27.3207" gradientTransform="matrix(2.5834 0.998 0.0835 -0.2162 46.7076 -68.8071)" gradientUnits="userSpaceOnUse">
|
||||||
|
<stop offset="0" style="stop-color:#FD8A8A"/>
|
||||||
|
<stop offset="1" style="stop-color:#FF7878"/>
|
||||||
|
</radialGradient>
|
||||||
|
<path id="path5463" fill="url(#path5463_1_)" stroke="#600101" stroke-width="1.6" d="M22.773,25.957l49.107,18.125
|
||||||
|
c-3.287,8.095-2.385,15.744,0.982,23.839l-50-18.806C19.268,39.14,18.408,34.518,22.773,25.957z"/>
|
||||||
|
|
||||||
|
<linearGradient id="path3311_1_" gradientUnits="userSpaceOnUse" x1="-1.3145" y1="103.2168" x2="67.4683" y2="103.2168" gradientTransform="matrix(1 0 0 -1 5.4287 129.1426)">
|
||||||
|
<stop offset="0" style="stop-color:#FFFFFF"/>
|
||||||
|
<stop offset="1" style="stop-color:#FFFFFF;stop-opacity:0.2471"/>
|
||||||
|
</linearGradient>
|
||||||
|
<path id="path3311" fill="url(#path3311_1_)" d="M23.904,25.736L72.342,8.322l49.548,18.171L72.342,43.529L23.904,25.736z"/>
|
||||||
|
<path id="path5465" fill="none" stroke="#600101" stroke-width="2.7" stroke-linejoin="bevel" d="M122.148,46.135v5l-50,17.5
|
||||||
|
l-49.39-18.701c-4.762-11.562-2.239-17.076,0.461-23.977L72.863,8.635l48.57,17.857v3.571l-2.143,1.071l0.357,14.643
|
||||||
|
L122.148,46.135z"/>
|
||||||
|
<path id="path5467" fill="#FFFFFF" d="M71.997,45.844l47.573-16.306l0.289,19.246L72.666,65.206c0,0-1.426-2.819-2.121-10.028
|
||||||
|
C70.399,48.864,71.997,45.844,71.997,45.844z"/>
|
||||||
|
</g>
|
||||||
|
</svg>
|
After Width: | Height: | Size: 11 KiB |
BIN
resources/images/news/digitalspy_uk.png
Normal file
After Width: | Height: | Size: 1.3 KiB |
BIN
resources/images/news/elcomercio.png
Normal file
After Width: | Height: | Size: 764 B |
BIN
resources/images/news/gizmodo.png
Normal file
After Width: | Height: | Size: 640 B |
BIN
resources/images/news/kamerabild.png
Normal file
After Width: | Height: | Size: 838 B |
BIN
resources/images/news/newsstraitstimes.png
Normal file
After Width: | Height: | Size: 816 B |
BIN
resources/images/news/radikal_tr.png
Normal file
After Width: | Height: | Size: 1.9 KiB |
BIN
resources/images/news/readitlater.png
Normal file
After Width: | Height: | Size: 810 B |
BIN
resources/images/news/sueddeutschezeitung.png
Normal file
After Width: | Height: | Size: 492 B |
BIN
resources/images/news/theluminouslandscape.png
Normal file
After Width: | Height: | Size: 769 B |
BIN
resources/images/news/tidbits.png
Normal file
After Width: | Height: | Size: 783 B |
37
resources/kathemerini.recipe
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
|
||||||
|
class Kathimerini(BasicNewsRecipe):
|
||||||
|
title = 'Kathimerini'
|
||||||
|
__author__ = 'Pan'
|
||||||
|
description = 'News from Greece'
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
oldest_article = 100
|
||||||
|
publisher = 'Kathimerini'
|
||||||
|
category = 'news, GR'
|
||||||
|
language = 'el'
|
||||||
|
no_stylesheets = True
|
||||||
|
remove_tags_before = dict(name='td',attrs={'class':'news'})
|
||||||
|
remove_tags_after = dict(name='td',attrs={'class':'news'})
|
||||||
|
remove_attributes = ['width', 'src','header','footer']
|
||||||
|
|
||||||
|
feeds = [(u'\u03a0\u03bf\u03bb\u03b9\u03c4\u03b9\u03ba\u03ae',
|
||||||
|
'http://wk.kathimerini.gr/xml_files/politics.xml'),
|
||||||
|
(u'\u0395\u03bb\u03bb\u03ac\u03b4\u03b1',
|
||||||
|
' http://wk.kathimerini.gr/xml_files/ell.xml'),
|
||||||
|
(u'\u039a\u03cc\u03c3\u03bc\u03bf\u03c2',
|
||||||
|
' http://wk.kathimerini.gr/xml_files/world.xml'),
|
||||||
|
(u'\u039f\u03b9\u03ba\u03bf\u03bd\u03bf\u03bc\u03af\u03b1',
|
||||||
|
'http://wk.kathimerini.gr/xml_files/economy_1.xml'),
|
||||||
|
(u'\u0395\u03c0\u03b9\u03c7\u03b5\u03b9\u03c1\u03ae\u03c3\u03b5\u03b9\u03c2',
|
||||||
|
'http://wk.kathimerini.gr/xml_files/economy_2.xml'),
|
||||||
|
(u'\u0394\u03b9\u03b5\u03b8\u03bd\u03ae\u03c2 \u039f\u03b9\u03ba\u03bf\u03bd\u03bf\u03bc\u03af\u03b1',
|
||||||
|
'http://wk.kathimerini.gr/xml_files/economy_3.xml'),
|
||||||
|
(u'\u03a0\u03bf\u03bb\u03b9\u03c4\u03b9\u03c3\u03bc\u03cc\u03c2',
|
||||||
|
'http://wk.kathimerini.gr/xml_files/civ.xml'),
|
||||||
|
(u'\u039c\u03cc\u03bd\u03b9\u03bc\u03b5\u03c2 \u03a3\u03c4\u03ae\u03bb\u03b5\u03c2',
|
||||||
|
'http://wk.kathimerini.gr/xml_files/st.xml')]
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
return url.replace('http://news.kathimerini.gr/4dcgi/', 'http://news.kathimerini.gr/4dcgi/4dcgi/')
|
||||||
|
|
||||||
|
|
45
resources/recipes/ZIVE.sk.recipe
Normal file
@ -0,0 +1,45 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
import re
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class ZiveRecipe(BasicNewsRecipe):
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__author__ = 'Abelturd'
|
||||||
|
language = 'sk'
|
||||||
|
version = 1
|
||||||
|
|
||||||
|
title = u'ZIVE.sk'
|
||||||
|
publisher = u''
|
||||||
|
category = u'News, Newspaper'
|
||||||
|
description = u'Naj\u010d\xedtanej\u0161\xed denn\xedk opo\u010d\xedta\u010doch, IT a internete. '
|
||||||
|
encoding = 'UTF-8'
|
||||||
|
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
use_embedded_content = False
|
||||||
|
remove_empty_feeds = True
|
||||||
|
|
||||||
|
no_stylesheets = True
|
||||||
|
remove_javascript = True
|
||||||
|
cover_url = 'http://www.zive.sk/Client.Images/Logos/logo-zive-sk.gif'
|
||||||
|
|
||||||
|
feeds = []
|
||||||
|
feeds.append((u'V\u0161etky \u010dl\xe1nky', u'http://www.zive.sk/rss/sc-47/default.aspx'))
|
||||||
|
|
||||||
|
preprocess_regexps = [
|
||||||
|
(re.compile(r'<p><p><strong>Pokra.*ie</strong></p>', re.DOTALL|re.IGNORECASE),
|
||||||
|
lambda match: ''),
|
||||||
|
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
remove_tags = []
|
||||||
|
|
||||||
|
keep_only_tags = [dict(name='h1'), dict(name='span', attrs={'class':'arlist-data-info-author'}), dict(name='div', attrs={'class':'bbtext font-resizer-area'}),]
|
||||||
|
extra_css = '''
|
||||||
|
h1 {font-size:140%;font-family:georgia,serif; font-weight:bold}
|
||||||
|
h3 {font-size:115%;font-family:georgia,serif; font-weight:bold}
|
||||||
|
'''
|
||||||
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
|||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
clarin.com
|
clarin.com
|
||||||
'''
|
'''
|
||||||
@ -21,7 +21,8 @@ class Clarin(BasicNewsRecipe):
|
|||||||
cover_url = strftime('http://www.clarin.com/diario/%Y/%m/%d/portada.jpg')
|
cover_url = strftime('http://www.clarin.com/diario/%Y/%m/%d/portada.jpg')
|
||||||
encoding = 'cp1252'
|
encoding = 'cp1252'
|
||||||
language = 'es'
|
language = 'es'
|
||||||
extra_css = ' .Txt{ font-family: sans-serif } .Volan{ font-family: sans-serif; font-size: x-small} .Pie{ font-family: sans-serif; font-size: x-small} .Copete{font-family: sans-serif; font-size: large} .Hora{font-family: sans-serif; font-size: large} .Autor{font-family: sans-serif; font-size: small} '
|
masthead_url = 'http://www.clarin.com/shared/v10/img/Hd/lg_Clarin.gif'
|
||||||
|
extra_css = ' body{font-family: Arial,Helvetica,sans-serif} h2{font-family: Georgia,"Times New Roman",Times,serif; font-size: xx-large} .Volan,.Pie,.Autor{ font-size: x-small} .Copete,.Hora{font-size: large} '
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comment' : description
|
'comment' : description
|
||||||
|
26
resources/recipes/courrier.recipe
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||||
|
|
||||||
|
class CourierPress(BasicNewsRecipe):
|
||||||
|
title = u'Courier Press'
|
||||||
|
language = 'en'
|
||||||
|
__author__ = 'Krittika Goyal'
|
||||||
|
oldest_article = 1 #days
|
||||||
|
max_articles_per_feed = 25
|
||||||
|
|
||||||
|
remove_stylesheets = True
|
||||||
|
remove_tags = [
|
||||||
|
dict(name='iframe'),
|
||||||
|
]
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
('Courier Press',
|
||||||
|
'http://www.courierpress.com/rss/headlines/news/'),
|
||||||
|
]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
story = soup.find(name='div', attrs={'id':'article_body'})
|
||||||
|
soup = BeautifulSoup('<html><head><title>t</title></head><body></body></html>')
|
||||||
|
body = soup.find(name='body')
|
||||||
|
body.insert(0, story)
|
||||||
|
return soup
|
@ -1,64 +1,63 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
danas.rs
|
danas.rs
|
||||||
'''
|
'''
|
||||||
|
|
||||||
import re
|
import re
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import Tag
|
|
||||||
|
|
||||||
class Danas(BasicNewsRecipe):
|
class Danas(BasicNewsRecipe):
|
||||||
title = 'Danas'
|
title = 'Danas'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'Vesti'
|
description = 'Dnevne novine sa vestima iz sveta, politike, ekonomije, kulture, sporta, Beograda, Novog Sada i cele Srbije.'
|
||||||
publisher = 'Danas d.o.o.'
|
publisher = 'Danas d.o.o.'
|
||||||
category = 'news, politics, Serbia'
|
category = 'news, politics, Serbia'
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = False
|
no_stylesheets = False
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
|
encoding = 'utf-8'
|
||||||
|
masthead_url = 'http://www.danas.rs/images/basic/danas.gif'
|
||||||
language = 'sr'
|
language = 'sr'
|
||||||
lang = 'sr-Latn-RS'
|
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} .article_description,body,.lokacija{font-family: Tahoma,Arial,Helvetica,sans1,sans-serif} .nadNaslov,h1,.preamble{font-family: Georgia,"Times New Roman",Times,serif1,serif} .antrfileText{border-left: 2px solid #999999; color:#666666; margin-left: 0.8em; padding-left: 1.2em; margin-bottom: 0; margin-top: 0} h2,.datum,.lokacija,.autor{font-size: small} .antrfileNaslov{border-left: 2px solid #999999; color:#666666; margin-left: 0.8em; padding-left: 1.2em; font-weight:bold; margin-bottom: 0; margin-top: 0} img{margin-bottom: 0.8em} '
|
||||||
direction = 'ltr'
|
|
||||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
|
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comment' : description
|
'comment' : description
|
||||||
, 'tags' : category
|
, 'tags' : category
|
||||||
, 'publisher' : publisher
|
, 'publisher' : publisher
|
||||||
, 'language' : language
|
, 'language' : language
|
||||||
, 'pretty_print' : True
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'id':'left'})]
|
keep_only_tags = [dict(name='div', attrs={'id':'left'})]
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name='div', attrs={'class':['width_1_4','metaClanka','baner']})
|
dict(name='div', attrs={'class':['width_1_4','metaClanka','baner']})
|
||||||
,dict(name='div', attrs={'id':'comments'})
|
,dict(name='div', attrs={'id':'comments'})
|
||||||
,dict(name=['object','link'])
|
,dict(name=['object','link','iframe'])
|
||||||
]
|
]
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Vesti' , u'http://www.danas.rs/rss/rss.asp' )
|
(u'Politika' , u'http://www.danas.rs/rss/rss.asp?column_id=27')
|
||||||
,(u'Periskop', u'http://www.danas.rs/rss/rss.asp?column_id=4')
|
,(u'Hronika' , u'http://www.danas.rs/rss/rss.asp?column_id=2' )
|
||||||
|
,(u'Drustvo' , u'http://www.danas.rs/rss/rss.asp?column_id=24')
|
||||||
|
,(u'Dijalog' , u'http://www.danas.rs/rss/rss.asp?column_id=1' )
|
||||||
|
,(u'Ekonomija', u'http://www.danas.rs/rss/rss.asp?column_id=6' )
|
||||||
|
,(u'Svet' , u'http://www.danas.rs/rss/rss.asp?column_id=25')
|
||||||
|
,(u'Srbija' , u'http://www.danas.rs/rss/rss.asp?column_id=28')
|
||||||
|
,(u'Kultura' , u'http://www.danas.rs/rss/rss.asp?column_id=5' )
|
||||||
|
,(u'Sport' , u'http://www.danas.rs/rss/rss.asp?column_id=13')
|
||||||
|
,(u'Scena' , u'http://www.danas.rs/rss/rss.asp?column_id=42')
|
||||||
|
,(u'Feljton' , u'http://www.danas.rs/rss/rss.asp?column_id=19')
|
||||||
|
,(u'Periskop' , u'http://www.danas.rs/rss/rss.asp?column_id=4' )
|
||||||
]
|
]
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
for item in soup.findAll(style=True):
|
||||||
soup.head.insert(0,mlang)
|
del item['style']
|
||||||
attribs = [ 'style','font','valign'
|
|
||||||
,'colspan','width','height'
|
|
||||||
,'rowspan','summary','align'
|
|
||||||
,'cellspacing','cellpadding'
|
|
||||||
,'frames','rules','border'
|
|
||||||
]
|
|
||||||
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
|
|
||||||
item.name = 'div'
|
|
||||||
for attrib in attribs:
|
|
||||||
if item.has_key(attrib):
|
|
||||||
del item[attrib]
|
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
return url + '&action=print'
|
||||||
|
|
||||||
|
43
resources/recipes/digitalspy_uk.recipe
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
'''
|
||||||
|
www.digitalspy.co.uk
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class DigitalSpyUK(BasicNewsRecipe):
|
||||||
|
title = 'Digital Spy - UK Edition'
|
||||||
|
__author__ = 'Darko Miletic'
|
||||||
|
description = 'Entertainment news about the biggest TV shows, films and celebrities, updated around the clock.'
|
||||||
|
publisher = 'Digital Spy Limited.'
|
||||||
|
category = 'news, showbiz, big brother, x factor, torchwood, doctor who, tv, media, sky, freeview, cable'
|
||||||
|
oldest_article = 2
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
encoding = 'cp1252'
|
||||||
|
use_embedded_content = False
|
||||||
|
language = 'en_GB'
|
||||||
|
remove_empty_feeds = True
|
||||||
|
extra_css = ' body{font-family: Verdana,Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} .info{font-size: small} '
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comment' : description
|
||||||
|
, 'tags' : category
|
||||||
|
, 'publisher' : publisher
|
||||||
|
, 'language' : language
|
||||||
|
}
|
||||||
|
|
||||||
|
remove_tags = [dict(name=['link'])]
|
||||||
|
remove_attributes = ['height','width']
|
||||||
|
keep_only_tags = [dict(name='div',attrs={'id':'article'})]
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'News' , u'http://www.digitalspy.co.uk/rss/zones/gb/all.xml' )
|
||||||
|
,(u'Big Brother' , u'http://www.digitalspy.co.uk/rss/zones/gb/bigbrother.xml' )
|
||||||
|
,(u'Entertainment' , u'http://www.digitalspy.co.uk/rss/zones/gb/entertainment.xml')
|
||||||
|
,(u'General' , u'http://www.digitalspy.co.uk/rss/zones/gb/general.xml' )
|
||||||
|
,(u'Media' , u'http://www.digitalspy.co.uk/rss/zones/gb/media.xml' )
|
||||||
|
]
|
||||||
|
|
@ -3,6 +3,7 @@ __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
|||||||
'''
|
'''
|
||||||
http://www.dilbert.com
|
http://www.dilbert.com
|
||||||
'''
|
'''
|
||||||
|
import re
|
||||||
|
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
|
||||||
@ -28,6 +29,12 @@ class DosisDiarias(BasicNewsRecipe):
|
|||||||
|
|
||||||
feeds = [(u'Dilbert', u'http://feeds.dilbert.com/DilbertDailyStrip' )]
|
feeds = [(u'Dilbert', u'http://feeds.dilbert.com/DilbertDailyStrip' )]
|
||||||
|
|
||||||
|
preprocess_regexps = [
|
||||||
|
(re.compile('strip\..*\.gif', re.DOTALL|re.IGNORECASE),
|
||||||
|
lambda match: 'strip.zoom.gif')
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
def get_article_url(self, article):
|
def get_article_url(self, article):
|
||||||
return article.get('feedburner_origlink', None)
|
return article.get('feedburner_origlink', None)
|
||||||
|
|
||||||
|
@ -4,7 +4,7 @@ __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
|||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
'''
|
'''
|
||||||
doscovermagazine.com
|
discovermagazine.com
|
||||||
'''
|
'''
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
@ -12,42 +12,36 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
|||||||
class DiscoverMagazine(BasicNewsRecipe):
|
class DiscoverMagazine(BasicNewsRecipe):
|
||||||
|
|
||||||
title = u'Discover Magazine'
|
title = u'Discover Magazine'
|
||||||
description = u'Science, Technology and the Future'
|
description = u'Science, Technology and the Future'
|
||||||
__author__ = 'Mike Diaz'
|
__author__ = 'Mike Diaz'
|
||||||
language = 'en'
|
language = 'en'
|
||||||
|
|
||||||
oldest_article = 33
|
oldest_article = 33
|
||||||
max_articles_per_feed = 20
|
max_articles_per_feed = 20
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
|
|
||||||
extra_css = '.headline {font-size: x-large;} \n .fact {padding-top: 10pt}'
|
extra_css = '.headline {font-size: x-large;} \n .fact {padding-top: 10pt}'
|
||||||
|
|
||||||
|
remove_tags = [dict(name='div', attrs={'id':['searchModule', 'mainMenu', 'tool-box']}),
|
||||||
|
dict(name='img', attrs={'src':'http://discovermagazine.com/onebyone.gif'})]
|
||||||
|
|
||||||
remove_tags_before = dict(id='articlePage')
|
remove_tags_after = [dict(name='div', attrs={'class':'articlebody'})]
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'id':'articlePage'})]
|
|
||||||
|
|
||||||
remove_tags = [dict(attrs={'id':['buttons', 'tool-box', 'teaser', 'already-subscriber', 'teaser-suite', 'related-articles', 'relatedItem', 'box-popular', 'box-blogs', 'box-news', 'footer']}),
|
|
||||||
dict(attrs={'class':'popularNewsBox'}),
|
|
||||||
dict(name=['img', 'style', 'head'])]
|
|
||||||
|
|
||||||
remove_tags_after = dict(id='articlePage')
|
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Technology', u'http://discovermagazine.com/topics/technology/rss.xml'),
|
(u'Technology', u'http://discovermagazine.com/topics/technology/rss.xml'),
|
||||||
(u'Health - Medicine', u'http://discovermagazine.com/topics/health-medicine/rss.xml'),
|
(u'Health - Medicine', u'http://discovermagazine.com/topics/health-medicine/rss.xml'),
|
||||||
(u'Mind Brain', u'http://discovermagazine.com/topics/mind-brain/rss.xml'),
|
(u'Mind Brain', u'http://discovermagazine.com/topics/mind-brain/rss.xml'),
|
||||||
(u'Space', u'http://discovermagazine.com/topics/space/rss.xml'),
|
(u'Space', u'http://discovermagazine.com/topics/space/rss.xml'),
|
||||||
(u'Human Origins', u'http://discovermagazine.com/topics/human-origins/rss.xml'),
|
(u'Human Origins', u'http://discovermagazine.com/topics/human-origins/rss.xml'),
|
||||||
(u'Living World', u'http://discovermagazine.com/topics/living-world/rss.xml'),
|
(u'Living World', u'http://discovermagazine.com/topics/living-world/rss.xml'),
|
||||||
(u'Environment', u'http://discovermagazine.com/topics/environment/rss.xml'),
|
(u'Environment', u'http://discovermagazine.com/topics/environment/rss.xml'),
|
||||||
(u'Physics & Math', u'http://discovermagazine.com/topics/physics-math/rss.xml'),
|
(u'Physics & Math', u'http://discovermagazine.com/topics/physics-math/rss.xml'),
|
||||||
(u'Vital Signs', u'http://discovermagazine.com/columns/vital-signs/rss.xml'),
|
(u'Vital Signs', u'http://discovermagazine.com/columns/vital-signs/rss.xml'),
|
||||||
(u"20 Things you didn't know about...", u'http://discovermagazine.com/columns/20-things-you-didnt-know/rss.xml'),
|
(u"20 Things you didn't know about...", u'http://discovermagazine.com/columns/20-things-you-didnt-know/rss.xml'),
|
||||||
(u'Fuzzy Math', u'http://discovermagazine.com/columns/fuzzy-math/rss.xml'),
|
(u'Fuzzy Math', u'http://discovermagazine.com/columns/fuzzy-math/rss.xml'),
|
||||||
(u'The Brain', u'http://discovermagazine.com/columns/the-brain/rss.xml'),
|
(u'The Brain', u'http://discovermagazine.com/columns/the-brain/rss.xml'),
|
||||||
(u'Stupid Science Word of the Month', u'http://discovermagazine.com/columns/stupid-science-word-of-the-month/rss.xml'),
|
(u'Stupid Science Word of the Month', u'http://discovermagazine.com/columns/stupid-science-word-of-the-month/rss.xml'),
|
||||||
(u'Science Not Fiction', u'http://blogs.discovermagazine.com/sciencenotfiction/wp-rss.php')
|
(u'Science Not Fiction', u'http://blogs.discovermagazine.com/sciencenotfiction/wp-rss.php')
|
||||||
]
|
]
|
26
resources/recipes/eksiazki.recipe
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v2'
|
||||||
|
__copyright__ = u'2010, Tomasz Dlugosz <tomek3d@gmail.com>'
|
||||||
|
'''
|
||||||
|
eksiazki.org
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class eksiazki(BasicNewsRecipe):
|
||||||
|
|
||||||
|
title = u'eksiazki.org'
|
||||||
|
desciption = u'Twoje centrum wiedzy o epapierze i ebookach'
|
||||||
|
language = 'pl'
|
||||||
|
__author__ = u'Tomasz D\u0142ugosz'
|
||||||
|
no_stylesheets = True
|
||||||
|
remove_javascript = True
|
||||||
|
|
||||||
|
feeds = [(u'wpisy', u'http://www.eksiazki.org/feed/')]
|
||||||
|
|
||||||
|
keep_only_tags = [dict(name='div', attrs={'id':'content-body'})]
|
||||||
|
remove_tags = [
|
||||||
|
dict(name='span', attrs={'class':'nr_comm'}),
|
||||||
|
dict(name='div', attrs={'id':'tabsContainer'}),
|
||||||
|
dict(name='div', attrs={'class':'next_previous_links'})]
|
38
resources/recipes/elcomercio.recipe
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
'''
|
||||||
|
elcomercio.com
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class ElComercio(BasicNewsRecipe):
|
||||||
|
title = 'El Comercio '
|
||||||
|
__author__ = 'Darko Miletic'
|
||||||
|
description = "Gizmodo, the gadget guide. So much in love with shiny new toys, it's unnatural."
|
||||||
|
publisher = 'GRUPO EL COMERCIO C.A.'
|
||||||
|
category = 'news, Ecuador, politics'
|
||||||
|
oldest_article = 2
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
encoding = 'utf-8'
|
||||||
|
use_embedded_content = True
|
||||||
|
language = 'es'
|
||||||
|
masthead_url = 'http://ww1.elcomercio.com/nv_images/headers/EC/logo_new_08.gif'
|
||||||
|
extra_css = ' body{font-family: Arial,Verdana,sans-serif} img{margin-bottom: 1em} '
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comment' : description
|
||||||
|
, 'tags' : category
|
||||||
|
, 'publisher' : publisher
|
||||||
|
, 'language' : language
|
||||||
|
}
|
||||||
|
|
||||||
|
remove_attributes = ['width','height']
|
||||||
|
|
||||||
|
feeds = [(u'Articles', u'http://ww1.elcomercio.com/rss/titulares1.xml')]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
return self.adeify_images(soup)
|
||||||
|
|
40
resources/recipes/gizmodo.recipe
Normal file
@ -0,0 +1,40 @@
|
|||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
'''
|
||||||
|
gizmodo.com
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class Gizmodo(BasicNewsRecipe):
|
||||||
|
title = 'Gizmodo'
|
||||||
|
__author__ = 'Darko Miletic'
|
||||||
|
description = "Gizmodo, the gadget guide. So much in love with shiny new toys, it's unnatural."
|
||||||
|
publisher = 'gizmodo.com'
|
||||||
|
category = 'news, IT, Internet, gadgets'
|
||||||
|
oldest_article = 2
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
encoding = 'utf-8'
|
||||||
|
use_embedded_content = True
|
||||||
|
language = 'en'
|
||||||
|
masthead_url = 'http://cache.gawkerassets.com/assets/gizmodo.com/img/logo.png'
|
||||||
|
extra_css = ' body{font-family: "Lucida Grande",Helvetica,Arial,sans-serif} img{margin-bottom: 1em} '
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comment' : description
|
||||||
|
, 'tags' : category
|
||||||
|
, 'publisher' : publisher
|
||||||
|
, 'language' : language
|
||||||
|
}
|
||||||
|
|
||||||
|
remove_attributes = ['width','height']
|
||||||
|
remove_tags = [dict(name='div',attrs={'class':'feedflare'})]
|
||||||
|
remove_tags_after = dict(name='div',attrs={'class':'feedflare'})
|
||||||
|
|
||||||
|
feeds = [(u'Articles', u'http://feeds.gawker.com/gizmodo/full')]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
return self.adeify_images(soup)
|
||||||
|
|
@ -18,7 +18,8 @@ class HBR(BasicNewsRecipe):
|
|||||||
remove_tags = [dict(id=['mastheadContainer', 'magazineHeadline',
|
remove_tags = [dict(id=['mastheadContainer', 'magazineHeadline',
|
||||||
'articleToolbarTopRD', 'pageRightSubColumn', 'pageRightColumn',
|
'articleToolbarTopRD', 'pageRightSubColumn', 'pageRightColumn',
|
||||||
'todayOnHBRListWidget', 'mostWidget', 'keepUpWithHBR',
|
'todayOnHBRListWidget', 'mostWidget', 'keepUpWithHBR',
|
||||||
'mailingListTout', 'partnerCenter', 'pageFooter']),
|
'mailingListTout', 'partnerCenter', 'pageFooter',
|
||||||
|
'articleToolbarTop', 'articleToolbarBottom', 'articleToolbarRD']),
|
||||||
dict(name='iframe')]
|
dict(name='iframe')]
|
||||||
extra_css = '''
|
extra_css = '''
|
||||||
a {font-family:Georgia,"Times New Roman",Times,serif; font-style:italic; color:#000000; }
|
a {font-family:Georgia,"Times New Roman",Times,serif; font-style:italic; color:#000000; }
|
||||||
|
47
resources/recipes/iliteratura_cz.recipe
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
import re
|
||||||
|
|
||||||
|
class SmeRecipe(BasicNewsRecipe):
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__author__ = 'Abelturd'
|
||||||
|
language = 'cz'
|
||||||
|
version = 1
|
||||||
|
|
||||||
|
title = u'iLiteratura.cz'
|
||||||
|
publisher = u''
|
||||||
|
category = u'News, Newspaper'
|
||||||
|
description = u'O LITERATU\u0158E V CEL\xc9M SV\u011aT\u011a A DOMA'
|
||||||
|
cover_url = 'http://www.iliteratura.cz/1_vzhled/1/iliteratura.gif'
|
||||||
|
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
use_embedded_content = False
|
||||||
|
remove_empty_feeds = True
|
||||||
|
|
||||||
|
no_stylesheets = True
|
||||||
|
remove_javascript = True
|
||||||
|
|
||||||
|
|
||||||
|
feeds = []
|
||||||
|
feeds.append((u'\u010cl\xe1nky', u'http://www.iliteratura.cz/rss.asp'))
|
||||||
|
|
||||||
|
|
||||||
|
keep_only_tags = []
|
||||||
|
|
||||||
|
remove_tags = [dict(name='table'),dict(name='h3')]
|
||||||
|
|
||||||
|
|
||||||
|
preprocess_regexps = [
|
||||||
|
(re.compile(r'<h3>Souvisej.*</body>', re.DOTALL|re.IGNORECASE),
|
||||||
|
lambda match: ''),
|
||||||
|
]
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
m = re.search('(?<=ID=)[0-9]*', url)
|
||||||
|
|
||||||
|
return u'http://www.iliteratura.cz/clanek.asp?polozkaID=' + str(m.group(0)) + '&c=tisk'
|
||||||
|
|
||||||
|
extra_css = '''
|
||||||
|
h1 {font-size:140%;font-family:georgia,serif; font-weight:bold}
|
||||||
|
h3 {font-size:115%;font-family:georgia,serif; font-weight:bold}
|
||||||
|
'''
|
67
resources/recipes/ilsole24ore.recipe
Normal file
@ -0,0 +1,67 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__author__ = 'Lorenzo Vigentini & Edwin van Maastrigt'
|
||||||
|
__copyright__ = '2009, Lorenzo Vigentini <l.vigentini at gmail.com> and Edwin van Maastrigt <evanmaastrigt at gmail.com>'
|
||||||
|
__description__ = 'Financial news daily paper - v1.02 (30, January 2010)'
|
||||||
|
|
||||||
|
'''
|
||||||
|
http://www.ilsole24ore.com/
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
|
class ilsole(BasicNewsRecipe):
|
||||||
|
author = 'Lorenzo Vigentini & Edwin van Maastrigt'
|
||||||
|
description = 'Financial news daily paper'
|
||||||
|
|
||||||
|
cover_url = 'http://www.ilsole24ore.com/img2009/header/t_logosole.gif'
|
||||||
|
title = u'il Sole 24 Ore '
|
||||||
|
publisher = 'italiaNews'
|
||||||
|
category = 'News, finance, economy, politics'
|
||||||
|
|
||||||
|
language = 'it'
|
||||||
|
timefmt = '[%a, %d %b, %Y]'
|
||||||
|
|
||||||
|
oldest_article = 2
|
||||||
|
max_articles_per_feed = 50
|
||||||
|
use_embedded_content = False
|
||||||
|
|
||||||
|
remove_javascript = True
|
||||||
|
no_stylesheets = True
|
||||||
|
|
||||||
|
def get_article_url(self, article):
|
||||||
|
return article.get('id', article.get('guid', None))
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
link, sep, params = url.rpartition('?')
|
||||||
|
return link.replace('.shtml', '_PRN.shtml')
|
||||||
|
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(name='div', attrs={'class':'txt'})
|
||||||
|
]
|
||||||
|
remove_tags = [dict(name='br')]
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Prima pagina', u'http://www.ilsole24ore.com/rss/primapagina.xml'),
|
||||||
|
(u'Norme e tributi', u'http://www.ilsole24ore.com/rss/norme-tributi.xml'),
|
||||||
|
(u'Finanza e mercati', u'http://www.ilsole24ore.com/rss/finanza-mercati.xml'),
|
||||||
|
(u'Economia e lavoro', u'http://www.ilsole24ore.com/rss/economia-lavoro.xml'),
|
||||||
|
(u'Italia', u'http://www.ilsole24ore.com/rss/italia.xml'),
|
||||||
|
(u'Mondo', u'http://www.ilsole24ore.com/rss/mondo.xml'),
|
||||||
|
(u'Tecnologia e business', u'http://www.ilsole24ore.com/rss/tecnologia-business.xml'),
|
||||||
|
(u'Cultura e tempo libero', u'http://www.ilsole24ore.com/rss/tempolibero-cultura.xml'),
|
||||||
|
(u'Sport', u'http://www.ilsole24ore.com/rss/sport.xml'),
|
||||||
|
(u'Professionisti 24', u'http://www.ilsole24ore.com/rss/prof_home.xml')
|
||||||
|
]
|
||||||
|
|
||||||
|
extra_css = '''
|
||||||
|
html, body, table, tr, td, h1, h2, h3, h4, h5, h6, p, a, span, br, img {margin:0;padding:0;border:0;font-size:12px;font-family:Arial;}
|
||||||
|
.linkHighlight {color:#0292c6;}
|
||||||
|
.txt {border-bottom:1px solid #7c7c7c;padding-bottom:20px;text-align:justify;}
|
||||||
|
.txt p {line-height:18px;}
|
||||||
|
.txt span {line-height:22px;}
|
||||||
|
.title h3 {color:#7b7b7b;}
|
||||||
|
.title h4 {color:#08526e;font-size:26px;font-family:"Times New Roman";font-weight:normal;}
|
||||||
|
'''
|
||||||
|
|
@ -10,22 +10,19 @@ class JerusalemPost(BasicNewsRecipe):
|
|||||||
__author__ = 'Kovid Goyal'
|
__author__ = 'Kovid Goyal'
|
||||||
max_articles_per_feed = 10
|
max_articles_per_feed = 10
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
remove_tags_before = {'class':'byline'}
|
remove_tags_before = {'class':'jp-grid-content'}
|
||||||
remove_tags = [
|
remove_tags_after = {'id':'body_val'}
|
||||||
{'class':['artAdBlock clearboth', 'tbartop', 'divdot_vrttbox',
|
|
||||||
'slideshow']},
|
|
||||||
dict(id=['artFontButtons', 'artRelatedBlock']),
|
|
||||||
]
|
|
||||||
remove_tags_after = {'id':'artTxtBlock'}
|
|
||||||
|
|
||||||
feeds = [ ('Front Page', 'http://www.jpost.com/servlet/Satellite?pagename=JPost/Page/RSS&cid=1123495333346'),
|
feeds = [ ('Front Page', 'http://www.jpost.com/servlet/Satellite?pagename=JPost/Page/RSS&cid=1123495333346'),
|
||||||
('Israel News', 'http://www.jpost.com/servlet/Satellite?pagename=JPost/Page/RSS&cid=1178443463156'),
|
('Israel News', 'http://www.jpost.com/servlet/Satellite?pagename=JPost/Page/RSS&cid=1178443463156'),
|
||||||
('Middle East News', 'http://www.jpost.com/servlet/Satellite?pagename=JPost/Page/RSS&cid=1123495333498'),
|
('Middle East News', 'http://www.jpost.com/servlet/Satellite?pagename=JPost/Page/RSS&cid=1123495333498'),
|
||||||
('International News', 'http://www.jpost.com/servlet/Satellite?pagename=JPost/Page/RSS&cid=1178443463144'),
|
('International News', 'http://www.jpost.com/servlet/Satellite?pagename=JPost/Page/RSS&cid=1178443463144'),
|
||||||
('Editorials', 'http://www.jpost.com/servlet/Satellite?pagename=JPost/Page/RSS&cid=1123495333211'),
|
('Editorials', 'http://www.jpost.com/servlet/Satellite?pagename=JPost/Page/RSS&cid=1123495333211'),
|
||||||
]
|
]
|
||||||
|
|
||||||
def postprocess_html(self, soup, first):
|
def preprocess_html(self, soup):
|
||||||
for tag in soup.findAll(name=['table', 'tr', 'td']):
|
for x in soup.findAll(name=['form', 'input']):
|
||||||
tag.name = 'div'
|
x.name = 'div'
|
||||||
return soup
|
for x in soup.findAll('body', style=True):
|
||||||
|
del x['style']
|
||||||
|
return soup
|
||||||
|
46
resources/recipes/kamerabild.recipe
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
'''
|
||||||
|
www.kamerabild.se
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class Kamerabild(BasicNewsRecipe):
|
||||||
|
title = 'Kamera & Bild'
|
||||||
|
__author__ = 'Darko Miletic'
|
||||||
|
description = 'Photo News from Sweden'
|
||||||
|
publisher = 'politiken.dk'
|
||||||
|
category = 'news, photograph, Sweden'
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
remove_empty_feeds = True
|
||||||
|
use_embedded_content = False
|
||||||
|
encoding = 'utf8'
|
||||||
|
language = 'sv'
|
||||||
|
|
||||||
|
extra_css = ' body{font-family: Verdana,Arial,Helvetica,sans-serif } .title{font-weight: bold} .pricerunnerAdContainer{border-bottom: 1px solid; border-top: 1px solid; margin-top: 0.5em; margin-bottom: 0.5em} .elementTeaserKicker{font-weight: bold; color: #AE0A10} '
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comment' : description
|
||||||
|
, 'tags' : category
|
||||||
|
, 'publisher': publisher
|
||||||
|
, 'language' : language
|
||||||
|
}
|
||||||
|
|
||||||
|
feeds = [(u'Articles', u'http://www.kamerabild.se/cmlink/Nyheter-fran-KAMERA-BILD-1.43315.xml')]
|
||||||
|
keep_only_tags = [dict(name='div',attrs={'class':'container'})]
|
||||||
|
remove_tags_after = dict(name='div',attrs={'class':'editor'})
|
||||||
|
remove_tags = [
|
||||||
|
dict(name=['object','link','iframe'])
|
||||||
|
,dict(name='div',attrs={'class':['pricerunner_head','sideBar','img']})
|
||||||
|
]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
return self.adeify_images(soup)
|
||||||
|
|
||||||
|
|
@ -1,7 +1,5 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
lanacion.com.ar
|
lanacion.com.ar
|
||||||
'''
|
'''
|
||||||
@ -12,28 +10,34 @@ class Lanacion(BasicNewsRecipe):
|
|||||||
title = 'La Nacion'
|
title = 'La Nacion'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'Noticias de Argentina y el resto del mundo'
|
description = 'Noticias de Argentina y el resto del mundo'
|
||||||
publisher = 'La Nacion'
|
publisher = 'La Nacion S.A.'
|
||||||
category = 'news, politics, Argentina'
|
category = 'news, politics, Argentina'
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
remove_javascript = True
|
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
|
language = 'es'
|
||||||
|
encoding = 'cp1252'
|
||||||
|
masthead_url = 'http://www.lanacion.com.ar/imgs/layout/logos/ln341x47.gif'
|
||||||
|
extra_css = ' h1{font-family: Georgia,serif} body{font-family: Arial,sans-serif} img{margin-top: 0.5em; margin-bottom: 0.2em} .notaEpigrafe{font-size: x-small} '
|
||||||
|
|
||||||
html2lrf_options = [
|
|
||||||
'--comment', description
|
|
||||||
, '--category', category
|
|
||||||
, '--publisher', publisher
|
|
||||||
]
|
|
||||||
|
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
conversion_options = {
|
||||||
|
'comment' : description
|
||||||
|
, 'tags' : category
|
||||||
|
, 'publisher': publisher
|
||||||
|
, 'language' : language
|
||||||
|
}
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'class':'nota floatFix'})]
|
keep_only_tags = [dict(name='div', attrs={'class':'nota floatFix'})]
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name='div' , attrs={'class':'notaComentario floatFix noprint' })
|
dict(name='div' , attrs={'class':'notaComentario floatFix noprint' })
|
||||||
,dict(name='ul' , attrs={'class':'cajaHerramientas cajaTop noprint'})
|
,dict(name='ul' , attrs={'class':'cajaHerramientas cajaTop noprint'})
|
||||||
,dict(name='div' , attrs={'class':'cajaHerramientas noprint' })
|
,dict(name='div' , attrs={'class':'cajaHerramientas noprint' })
|
||||||
|
,dict(attrs={'class':['titulosMultimedia','derecha','techo color']})
|
||||||
|
,dict(name=['iframe','embed','object'])
|
||||||
]
|
]
|
||||||
|
remove_attributes = ['height','width']
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Ultimas noticias' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?origen=2' )
|
(u'Ultimas noticias' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?origen=2' )
|
||||||
@ -51,10 +55,4 @@ class Lanacion(BasicNewsRecipe):
|
|||||||
]
|
]
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
mtag = '<meta http-equiv="Content-Language" content="es-AR"/>'
|
return self.adeify_images(soup)
|
||||||
soup.head.insert(0,mtag)
|
|
||||||
for item in soup.findAll(style=True):
|
|
||||||
del item['style']
|
|
||||||
return soup
|
|
||||||
|
|
||||||
language = 'es'
|
|
||||||
|
89
resources/recipes/lescienze.recipe
Normal file
@ -0,0 +1,89 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__author__ = 'Lorenzo Vigentini'
|
||||||
|
__copyright__ = '2009, Lorenzo Vigentini <l.vigentini at gmail.com>'
|
||||||
|
__version__ = 'v1.01'
|
||||||
|
__date__ = '10, January 2010'
|
||||||
|
__description__ = 'Monthly Italian edition of Scientific American'
|
||||||
|
|
||||||
|
'''
|
||||||
|
http://lescienze.espresso.repubblica.it/
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class leScienze(BasicNewsRecipe):
|
||||||
|
author = 'Lorenzo Vigentini'
|
||||||
|
description = 'Monthly Italian edition of Scientific American'
|
||||||
|
|
||||||
|
cover_url = 'http://lescienze.espresso.repubblica.it/images/logo_lescienze.gif'
|
||||||
|
title = 'le Scienze'
|
||||||
|
publisher = 'Gruppo editoriale lEspresso'
|
||||||
|
category = 'Science, general interest'
|
||||||
|
|
||||||
|
language = 'it'
|
||||||
|
encoding = 'cp1252'
|
||||||
|
timefmt = '[%a, %d %b, %Y]'
|
||||||
|
|
||||||
|
oldest_article = 31
|
||||||
|
max_articles_per_feed = 20
|
||||||
|
use_embedded_content = False
|
||||||
|
recursion = 10
|
||||||
|
|
||||||
|
remove_javascript = True
|
||||||
|
no_stylesheets = True
|
||||||
|
|
||||||
|
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(name='div', attrs={'class':'bigbox'})
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name='span',attrs={'class':'linkindice'}),
|
||||||
|
dict(name='div',attrs={'class':'box-commenti'}),
|
||||||
|
dict(name='div',attrs={'id':['rssdiv','blocco']})
|
||||||
|
]
|
||||||
|
remove_tags_after = [dict(name='div',attrs={'class':'box-commenti'})]
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Antropologia', u'http://data.kataweb.it/rss/scienze/antropologia'),
|
||||||
|
(u'Archeologia', u'http://data.kataweb.it/rss/scienze/archeologia'),
|
||||||
|
(u'Arte e Musica', u'http://data.kataweb.it/rss/scienze/arte_e_musica'),
|
||||||
|
(u'Astrofisica', u'http://data.kataweb.it/rss/scienze/astrofisica'),
|
||||||
|
(u'Astronautica', u'http://data.kataweb.it/rss/scienze/astronautica'),
|
||||||
|
(u'Astronomia', u'http://data.kataweb.it/rss/scienze/astronomia_e_cosmologia'),
|
||||||
|
(u'Biologia', u'http://data.kataweb.it/rss/scienze/biologia'),
|
||||||
|
(u'Chimica', u'http://data.kataweb.it/rss/scienze/chimica'),
|
||||||
|
(u'Ecologia & ambiente', u'http://data.kataweb.it/rss/scienze/ecologia_e_ambiente'),
|
||||||
|
(u'Economia', u'http://data.kataweb.it/rss/scienze/Economia'),
|
||||||
|
(u'Fisica', u'http://data.kataweb.it/rss/scienze/Fisica'),
|
||||||
|
(u'Informatica', u'http://data.kataweb.it/rss/scienze/informatica_e_telecomunicazioni'),
|
||||||
|
(u'Ingegneria', u'http://data.kataweb.it/rss/scienze/ingegneria_e_tecnologia'),
|
||||||
|
(u'Matematica', u'http://data.kataweb.it/rss/scienze/Matematica'),
|
||||||
|
(u'Medicina', u'http://data.kataweb.it/rss/scienze/Medicina'),
|
||||||
|
(u'Paleontologia', u'http://data.kataweb.it/rss/scienze/Paleontologia'),
|
||||||
|
(u'Recensioni', u'http://data.kataweb.it/rss/scienze/Recensioni'),
|
||||||
|
(u'Psicologia', u'http://data.kataweb.it/rss/scienze/psicologie_e_scienze_cognitive'),
|
||||||
|
(u'Scienze della Terra', u'http://data.kataweb.it/rss/scienze/scienze_della_terra'),
|
||||||
|
(u'Scienze dello spazio', u'http://data.kataweb.it/rss/scienze/scienze_dello_spazio'),
|
||||||
|
(u'Scienze naturali', u'http://data.kataweb.it/rss/scienze/scienze_naturali'),
|
||||||
|
(u'Scienze sociali', u'http://data.kataweb.it/rss/scienze/scienze_sociali'),
|
||||||
|
(u'Statistica', u'http://data.kataweb.it/rss/scienze/statistica'),
|
||||||
|
(u'Storia della scienza', u'http://data.kataweb.it/rss/scienze/storia_della_scienza')
|
||||||
|
]
|
||||||
|
|
||||||
|
extra_css = '''
|
||||||
|
h1 {font-family:"Trebuchet MS",Arial,Helvetica,sans-serif; font-size:20px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:18px;}
|
||||||
|
h2 {font-family:"Trebuchet MS",Arial,Helvetica,sans-serif; font-size:18px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:16px; }
|
||||||
|
h3 {color:#333333;font-family:"Trebuchet MS",Arial,Helvetica,sans-serif; font-size:16px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px;}
|
||||||
|
h4 {color:#333333; font-family:"Trebuchet MS",Arial,Helvetica,sans-serif;font-size:16px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px; }
|
||||||
|
h5 {color:#333333; font-family:"Trebuchet MS",Arial,Helvetica,sans-serif; font-size:12px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px; text-transform:uppercase;}
|
||||||
|
.occhiello {color:#666666;display:block;font-family:"Trebuchet MS",Arial,Helvetica,sans-serif;font-size:13px;font-size-adjust:none;font-stretch:normal;font-style:normal;font-variant:normal;font-weight:bold;line-height:15px;}
|
||||||
|
.titolo {font-weight:bold;}
|
||||||
|
.label {font-family:"Trebuchet MS",Arial,Helvetica,sans-serif;font-size:12px;font-size-adjust:none;font-stretch:normal;font-style:normal;font-variant:normal;font-weight:bold;height:15px;line-height:15px;text-transform:uppercase;}
|
||||||
|
.firma {color:#333333;font-family:"Trebuchet MS",Arial,Helvetica,sans-serif;font-size:12px; font-size-adjust:none; font-stretch:normal; font-style:italic; font-variant:normal; font-weight:bold; line-height:15px; text-decoration:none;}
|
||||||
|
.testo {font-family:"Trebuchet MS",Arial,Helvetica,sans-serif; font-size:10px;}
|
||||||
|
'''
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -4,21 +4,26 @@ class Metro_Montreal(BasicNewsRecipe):
|
|||||||
|
|
||||||
title = u'M\xe9tro Montr\xe9al'
|
title = u'M\xe9tro Montr\xe9al'
|
||||||
__author__ = 'Jerry Clapperton'
|
__author__ = 'Jerry Clapperton'
|
||||||
description = u'Le quotidien le plus branch\xe9 sur le monde'
|
description = 'Le quotidien le plus branch\xe9 sur le monde'
|
||||||
language = 'fr'
|
language = 'fr'
|
||||||
|
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
max_articles_per_feed = 20
|
max_articles_per_feed = 20
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
|
extra_css = '.headline {font-size: x-large;} \n .fact {padding-top: 10pt}'
|
||||||
|
|
||||||
extra_css = '.headline {font-size: x-large;} \n .fact {padding-top: 10pt}'
|
remove_tags = [dict(attrs={'id':'buttons'})]
|
||||||
|
|
||||||
remove_tags = [dict(attrs={'id':'buttons'}), dict(name=['img', 'style'])]
|
feeds = [
|
||||||
|
(u"L'info", u'http://journalmetro.com/linfo/rss'),
|
||||||
feeds = [(u"L'info", u'http://journalmetro.com/linfo/rss'), (u'Monde', u'http://journalmetro.com/monde/rss'), (u'Culture', u'http://journalmetro.com/culture/rss'), (u'Sports', u'http://journalmetro.com/sports/rss'), (u'Paroles', u'http://journalmetro.com/paroles/rss')]
|
(u'Monde', u'http://journalmetro.com/monde/rss'),
|
||||||
|
(u'Culture', u'http://journalmetro.com/culture/rss'),
|
||||||
|
(u'Sports', u'http://journalmetro.com/sports/rss'),
|
||||||
|
(u'Paroles', u'http://journalmetro.com/paroles/rss')
|
||||||
|
]
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
return url.replace('article', 'ArticlePrint') + '?language=fr'
|
return url.replace('article', 'ArticlePrint') + '?language=fr'
|
||||||
|
35
resources/recipes/newsstraitstimes.recipe
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
'''
|
||||||
|
www.nst.com.my
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class Newstraitstimes(BasicNewsRecipe):
|
||||||
|
title = 'New Straits Times from Malaysia'
|
||||||
|
__author__ = 'Darko Miletic'
|
||||||
|
description = 'Learning Curve, Sunday People, New Straits Times from Malaysia'
|
||||||
|
publisher = 'nst.com.my'
|
||||||
|
category = 'news, politics, Malaysia'
|
||||||
|
oldest_article = 2
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
encoding = 'cp1252'
|
||||||
|
use_embedded_content = False
|
||||||
|
language = 'en'
|
||||||
|
masthead_url = 'http://www.nst.com.my/Current_News/NST/Images/new-nstonline.jpg'
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comment' : description
|
||||||
|
, 'tags' : category
|
||||||
|
, 'publisher' : publisher
|
||||||
|
, 'language' : language
|
||||||
|
}
|
||||||
|
|
||||||
|
remove_tags = [dict(name=['link','table'])]
|
||||||
|
keep_only_tags = dict(name='div',attrs={'id':'haidah'})
|
||||||
|
|
||||||
|
feeds = [(u'Articles', u'http://www.nst.com.my/rss/allSec')]
|
||||||
|
|
@ -72,9 +72,8 @@ class Nin(BasicNewsRecipe):
|
|||||||
section = self.tag_to_string(item)
|
section = self.tag_to_string(item)
|
||||||
feedlink = self.PREFIX + item['href']
|
feedlink = self.PREFIX + item['href']
|
||||||
feedpage = self.index_to_soup(feedlink)
|
feedpage = self.index_to_soup(feedlink)
|
||||||
self.report_progress(0, _('Fetching feed')+' %s...'%(section))
|
self.report_progress(0, _('Fetching feed')+' %s...'%(section))
|
||||||
inarts = []
|
inarts = []
|
||||||
count2 = 0
|
|
||||||
for art in feedpage.findAll('span',attrs={'class':'artTitle'}):
|
for art in feedpage.findAll('span',attrs={'class':'artTitle'}):
|
||||||
alink = art.parent
|
alink = art.parent
|
||||||
url = self.PREFIX + alink['href']
|
url = self.PREFIX + alink['href']
|
||||||
|
@ -5,7 +5,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
|||||||
'''
|
'''
|
||||||
nytimes.com
|
nytimes.com
|
||||||
'''
|
'''
|
||||||
import re
|
import re, time
|
||||||
from calibre import entity_to_unicode
|
from calibre import entity_to_unicode
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, NavigableString, Comment
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, NavigableString, Comment
|
||||||
|
@ -37,7 +37,7 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
dict(name=['script', 'noscript', 'style'])]
|
dict(name=['script', 'noscript', 'style'])]
|
||||||
encoding = decode
|
encoding = decode
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
extra_css = 'h1 {font: sans-serif large;}\n.byline {font:monospace;}'
|
extra_css = 'h1 {font-face:sans-serif; font-size:2em; font-weight:bold;}\n.byline {font:monospace;}\n.bold {font-weight:bold;}'
|
||||||
|
|
||||||
def get_browser(self):
|
def get_browser(self):
|
||||||
br = BasicNewsRecipe.get_browser()
|
br = BasicNewsRecipe.get_browser()
|
||||||
|
56
resources/recipes/nytimesbook.recipe
Normal file
@ -0,0 +1,56 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||||
|
|
||||||
|
class NewYorkTimesBookReview(BasicNewsRecipe):
|
||||||
|
title = u'New York Times Book Review'
|
||||||
|
language = 'en'
|
||||||
|
__author__ = 'Krittika Goyal'
|
||||||
|
oldest_article = 8 #days
|
||||||
|
max_articles_per_feed = 1000
|
||||||
|
recursions = 2
|
||||||
|
#encoding = 'latin1'
|
||||||
|
|
||||||
|
remove_stylesheets = True
|
||||||
|
#remove_tags_before = dict(name='h1', attrs={'class':'heading'})
|
||||||
|
remove_tags_after = dict(name='div', attrs={'id':'authorId'})
|
||||||
|
remove_tags = [
|
||||||
|
dict(name='iframe'),
|
||||||
|
dict(name=['div', 'a'], attrs={'class':['enlargeThis', 'jumpLink']}),
|
||||||
|
dict(name='div', attrs={'id':['sidebarArticles', 'toolsRight']}),
|
||||||
|
#dict(name='ul', attrs={'class':'article-tools'}),
|
||||||
|
#dict(name='ul', attrs={'class':'articleTools'}),
|
||||||
|
]
|
||||||
|
match_regexps = [
|
||||||
|
r'http://www.nytimes.com/.+pagewanted=[2-9]+'
|
||||||
|
]
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
('New York Times Sunday Book Review',
|
||||||
|
'http://feeds.nytimes.com/nyt/rss/SundayBookReview'),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
story = soup.find(name='div', attrs={'id':'article'})
|
||||||
|
#td = heading.findParent(name='td')
|
||||||
|
#td.extract()
|
||||||
|
soup = BeautifulSoup('<html><head><title>t</title></head><body></body></html>')
|
||||||
|
body = soup.find(name='body')
|
||||||
|
body.insert(0, story)
|
||||||
|
#for x in soup.findAll(name='p', text=lambda x:x and '-->' in x):
|
||||||
|
#p = x.findParent('p')
|
||||||
|
#if p is not None:
|
||||||
|
#p.extract()
|
||||||
|
return soup
|
||||||
|
|
||||||
|
def postprocess_html(self, soup, first):
|
||||||
|
for div in soup.findAll(id='pageLinks'):
|
||||||
|
div.extract()
|
||||||
|
if not first:
|
||||||
|
h1 = soup.find('h1')
|
||||||
|
if h1 is not None:
|
||||||
|
h1.extract()
|
||||||
|
t = soup.find(attrs={'class':'timestamp'})
|
||||||
|
if t is not None:
|
||||||
|
t.extract()
|
||||||
|
return soup
|
@ -1,13 +1,12 @@
|
|||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
pagina12.com.ar
|
pagina12.com.ar
|
||||||
'''
|
'''
|
||||||
|
|
||||||
import time
|
import re
|
||||||
from calibre import strftime
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||||
|
|
||||||
class Pagina12(BasicNewsRecipe):
|
class Pagina12(BasicNewsRecipe):
|
||||||
title = 'Pagina - 12'
|
title = 'Pagina - 12'
|
||||||
@ -16,13 +15,14 @@ class Pagina12(BasicNewsRecipe):
|
|||||||
publisher = 'La Pagina S.A.'
|
publisher = 'La Pagina S.A.'
|
||||||
category = 'news, politics, Argentina'
|
category = 'news, politics, Argentina'
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 200
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
encoding = 'cp1252'
|
encoding = 'cp1252'
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
language = 'es'
|
language = 'es'
|
||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
extra_css = ' body{font-family: sans-serif} '
|
masthead_url = 'http://www.pagina12.com.ar/commons/imgs/logo-home.gif'
|
||||||
|
extra_css = ' body{font-family: Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} #autor{font-weight: bold} #fecha,#epigrafe{font-size: 0.9em; margin: 5px} #imagen{border: 1px solid black; margin: 0 0 1.25em 1.25em; width: 232px } '
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comment' : description
|
'comment' : description
|
||||||
@ -45,14 +45,24 @@ class Pagina12(BasicNewsRecipe):
|
|||||||
,(u'NO' , u'http://www.pagina12.com.ar/diario/rss/no.xml' )
|
,(u'NO' , u'http://www.pagina12.com.ar/diario/rss/no.xml' )
|
||||||
,(u'Las/12' , u'http://www.pagina12.com.ar/diario/rss/las12.xml' )
|
,(u'Las/12' , u'http://www.pagina12.com.ar/diario/rss/las12.xml' )
|
||||||
,(u'Soy' , u'http://www.pagina12.com.ar/diario/rss/soy.xml' )
|
,(u'Soy' , u'http://www.pagina12.com.ar/diario/rss/soy.xml' )
|
||||||
,(u'M2' , u'http://www.pagina12.com.ar/diario/rss/futuro.xml' )
|
,(u'Futuro' , u'http://www.pagina12.com.ar/diario/rss/futuro.xml' )
|
||||||
|
,(u'M2' , u'http://www.pagina12.com.ar/diario/rss/m2.xml' )
|
||||||
|
,(u'Rosario/12' , u'http://www.pagina12.com.ar/diario/rss/rosario.xml' )
|
||||||
]
|
]
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
return url.replace('http://www.pagina12.com.ar/','http://www.pagina12.com.ar/imprimir/')
|
return url.replace('http://www.pagina12.com.ar/','http://www.pagina12.com.ar/imprimir/')
|
||||||
|
|
||||||
def get_cover_url(self):
|
def get_cover_url(self):
|
||||||
imgnames = ['tapan.jpg','tapagn.jpg','tapan_gr.jpg','tapagn.jpg','tapagn.jpg','tapan.jpg','tapagn.jpg']
|
rawc = self.index_to_soup('http://www.pagina12.com.ar/diario/principal/diario/index.html',True)
|
||||||
weekday = time.localtime().tm_wday
|
rawc2 = re.sub(r'PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN','PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"',rawc)
|
||||||
return strftime('http://www.pagina12.com.ar/fotos/%Y%m%d/diario/') + imgnames[weekday]
|
soup = BeautifulSoup(rawc2,fromEncoding=self.encoding,smartQuotesTo=None)
|
||||||
|
for image in soup.findAll('img',alt=True):
|
||||||
|
if image['alt'].startswith('Tapa de la fecha'):
|
||||||
|
return image['src']
|
||||||
|
return None
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
return soup
|
@ -31,7 +31,7 @@ class PeopleMag(BasicNewsRecipe):
|
|||||||
|
|
||||||
|
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
dict(name='div', attrs={'class': 'panel_news_article_main'}),
|
dict(name='div', attrs={'class': 'panel_news_article_main'}),
|
||||||
dict(name='div', attrs={'class':'article_content'}),
|
dict(name='div', attrs={'class':'article_content'}),
|
||||||
dict(name='div', attrs={'class': 'headline'}),
|
dict(name='div', attrs={'class': 'headline'}),
|
||||||
dict(name='div', attrs={'class': 'post'}),
|
dict(name='div', attrs={'class': 'post'}),
|
||||||
@ -51,6 +51,7 @@ class PeopleMag(BasicNewsRecipe):
|
|||||||
dict(name='div', attrs={'class':'sharelinkcont'}),
|
dict(name='div', attrs={'class':'sharelinkcont'}),
|
||||||
dict(name='div', attrs={'class':'categories'}),
|
dict(name='div', attrs={'class':'categories'}),
|
||||||
dict(name='ul', attrs={'class':'categories'}),
|
dict(name='ul', attrs={'class':'categories'}),
|
||||||
|
dict(name='div', attrs={'class':'related_content'}),
|
||||||
dict(name='div', attrs={'id':'promo'}),
|
dict(name='div', attrs={'id':'promo'}),
|
||||||
dict(name='div', attrs={'class':'linksWrapper'}),
|
dict(name='div', attrs={'class':'linksWrapper'}),
|
||||||
dict(name='p', attrs={'class':'tag tvnews'}),
|
dict(name='p', attrs={'class':'tag tvnews'}),
|
||||||
|
45
resources/recipes/radikal_tr.recipe
Normal file
@ -0,0 +1,45 @@
|
|||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
'''
|
||||||
|
radikal.com.tr
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class Radikal_tr(BasicNewsRecipe):
|
||||||
|
title = 'Radikal - Turkey'
|
||||||
|
__author__ = 'Darko Miletic'
|
||||||
|
description = 'News from Turkey'
|
||||||
|
publisher = 'radikal'
|
||||||
|
category = 'news, politics, Turkey'
|
||||||
|
oldest_article = 2
|
||||||
|
max_articles_per_feed = 150
|
||||||
|
no_stylesheets = True
|
||||||
|
encoding = 'cp1254'
|
||||||
|
use_embedded_content = False
|
||||||
|
masthead_url = 'http://www.radikal.com.tr/D/i/1/V2/radikal_logo.jpg'
|
||||||
|
language = 'tr'
|
||||||
|
extra_css = ' @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} .article_description,body{font-family: Arial,Verdana,Helvetica,sans1,sans-serif } '
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comment' : description
|
||||||
|
, 'tags' : category
|
||||||
|
, 'publisher' : publisher
|
||||||
|
, 'language' : language
|
||||||
|
}
|
||||||
|
|
||||||
|
remove_tags = [dict(name=['embed','iframe','object','link','base'])]
|
||||||
|
remove_tags_before = dict(name='h1')
|
||||||
|
remove_tags_after = dict(attrs={'id':'haberDetayYazi'})
|
||||||
|
|
||||||
|
|
||||||
|
feeds = [(u'Yazarlar', u'http://www.radikal.com.tr/d/rss/RssYazarlar.xml')]
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
articleid = url.rpartition('ArticleID=')[2]
|
||||||
|
return 'http://www.radikal.com.tr/Default.aspx?aType=HaberYazdir&ArticleID=' + articleid
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
return self.adeify_images(soup)
|
||||||
|
|
64
resources/recipes/readitlater.recipe
Normal file
@ -0,0 +1,64 @@
|
|||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
'''
|
||||||
|
readitlaterlist.com
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre import strftime
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class Readitlater(BasicNewsRecipe):
|
||||||
|
title = 'Read It Later'
|
||||||
|
__author__ = 'Darko Miletic'
|
||||||
|
description = '''Personalized news feeds. Go to readitlaterlist.com to
|
||||||
|
setup up your news. Fill in your account
|
||||||
|
username, and optionally you can add password.'''
|
||||||
|
publisher = 'readitlater.com'
|
||||||
|
category = 'news, custom'
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
needs_subscription = True
|
||||||
|
INDEX = u'http://readitlaterlist.com'
|
||||||
|
LOGIN = INDEX + u'/l'
|
||||||
|
|
||||||
|
|
||||||
|
feeds = [(u'Unread articles' , INDEX + u'/unread')]
|
||||||
|
|
||||||
|
def get_browser(self):
|
||||||
|
br = BasicNewsRecipe.get_browser()
|
||||||
|
if self.username is not None:
|
||||||
|
br.open(self.LOGIN)
|
||||||
|
br.select_form(nr=0)
|
||||||
|
br['feed_id'] = self.username
|
||||||
|
if self.password is not None:
|
||||||
|
br['password'] = self.password
|
||||||
|
br.submit()
|
||||||
|
return br
|
||||||
|
|
||||||
|
def parse_index(self):
|
||||||
|
totalfeeds = []
|
||||||
|
lfeeds = self.get_feeds()
|
||||||
|
for feedobj in lfeeds:
|
||||||
|
feedtitle, feedurl = feedobj
|
||||||
|
self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
|
||||||
|
articles = []
|
||||||
|
soup = self.index_to_soup(feedurl)
|
||||||
|
ritem = soup.find('ul',attrs={'id':'list'})
|
||||||
|
for item in ritem.findAll('li'):
|
||||||
|
description = ''
|
||||||
|
atag = item.find('a',attrs={'class':'text'})
|
||||||
|
if atag and atag.has_key('href'):
|
||||||
|
url = self.INDEX + atag['href']
|
||||||
|
title = self.tag_to_string(item.div)
|
||||||
|
date = strftime(self.timefmt)
|
||||||
|
articles.append({
|
||||||
|
'title' :title
|
||||||
|
,'date' :date
|
||||||
|
,'url' :url
|
||||||
|
,'description':description
|
||||||
|
})
|
||||||
|
totalfeeds.append((feedtitle, articles))
|
||||||
|
return totalfeeds
|
||||||
|
|
107
resources/recipes/sueddeutschezeitung.recipe
Normal file
@ -0,0 +1,107 @@
|
|||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
'''
|
||||||
|
www.sueddeutsche.de/sz/
|
||||||
|
'''
|
||||||
|
|
||||||
|
import urllib
|
||||||
|
from calibre import strftime
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class SueddeutcheZeitung(BasicNewsRecipe):
|
||||||
|
title = 'Sueddeutche Zeitung'
|
||||||
|
__author__ = 'Darko Miletic'
|
||||||
|
description = 'News from Germany. Access to paid content.'
|
||||||
|
publisher = 'Sueddeutche Zeitung'
|
||||||
|
category = 'news, politics, Germany'
|
||||||
|
no_stylesheets = True
|
||||||
|
oldest_article = 2
|
||||||
|
encoding = 'cp1252'
|
||||||
|
needs_subscription = True
|
||||||
|
remove_empty_feeds = True
|
||||||
|
PREFIX = 'http://www.sueddeutsche.de'
|
||||||
|
INDEX = PREFIX + strftime('/sz/%Y-%m-%d/')
|
||||||
|
LOGIN = PREFIX + '/app/lbox/index.html'
|
||||||
|
use_embedded_content = False
|
||||||
|
masthead_url = 'http://pix.sueddeutsche.de/img/g_.gif'
|
||||||
|
language = 'de'
|
||||||
|
extra_css = ' body{font-family: Arial,Helvetica,sans-serif} '
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comment' : description
|
||||||
|
, 'tags' : category
|
||||||
|
, 'publisher' : publisher
|
||||||
|
, 'language' : language
|
||||||
|
, 'linearize_tables' : True
|
||||||
|
}
|
||||||
|
|
||||||
|
remove_attributes = ['height','width']
|
||||||
|
|
||||||
|
def get_browser(self):
|
||||||
|
br = BasicNewsRecipe.get_browser()
|
||||||
|
br.open(self.INDEX)
|
||||||
|
if self.username is not None and self.password is not None:
|
||||||
|
data = urllib.urlencode({ 'login_name':self.username
|
||||||
|
,'login_passwort':self.password
|
||||||
|
,'lboxaction':'doLogin'
|
||||||
|
,'passtxt':'Passwort'
|
||||||
|
,'referer':self.INDEX
|
||||||
|
,'x':'22'
|
||||||
|
,'y':'7'
|
||||||
|
})
|
||||||
|
br.open(self.LOGIN,data)
|
||||||
|
return br
|
||||||
|
|
||||||
|
remove_tags =[
|
||||||
|
dict(attrs={'class':'hidePrint'})
|
||||||
|
,dict(name=['link','object','embed','base','iframe'])
|
||||||
|
]
|
||||||
|
remove_tags_before = dict(name='h2')
|
||||||
|
remove_tags_after = dict(attrs={'class':'author'})
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Politik' , INDEX + 'politik/' )
|
||||||
|
,(u'Seite drei' , INDEX + 'seitedrei/' )
|
||||||
|
,(u'Meinungsseite', INDEX + 'meinungsseite/')
|
||||||
|
,(u'Wissen' , INDEX + 'wissen/' )
|
||||||
|
,(u'Panorama' , INDEX + 'panorama/' )
|
||||||
|
,(u'Feuilleton' , INDEX + 'feuilleton/' )
|
||||||
|
,(u'Medien' , INDEX + 'medien/' )
|
||||||
|
,(u'Wirtschaft' , INDEX + 'wirtschaft/' )
|
||||||
|
,(u'Sport' , INDEX + 'sport/' )
|
||||||
|
,(u'Bayern' , INDEX + 'bayern/' )
|
||||||
|
,(u'Muenchen' , INDEX + 'muenchen/' )
|
||||||
|
,(u'jetzt.de' , INDEX + 'jetzt.de/' )
|
||||||
|
]
|
||||||
|
|
||||||
|
def parse_index(self):
|
||||||
|
totalfeeds = []
|
||||||
|
lfeeds = self.get_feeds()
|
||||||
|
for feedobj in lfeeds:
|
||||||
|
feedtitle, feedurl = feedobj
|
||||||
|
self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
|
||||||
|
articles = []
|
||||||
|
soup = self.index_to_soup(feedurl)
|
||||||
|
tbl = soup.find(attrs={'class':'szprintd'})
|
||||||
|
for item in tbl.findAll(name='td',attrs={'class':'topthema'}):
|
||||||
|
atag = item.find(attrs={'class':'Titel'}).a
|
||||||
|
ptag = item.find('p')
|
||||||
|
stag = ptag.find('script')
|
||||||
|
if stag:
|
||||||
|
stag.extract()
|
||||||
|
url = self.PREFIX + atag['href']
|
||||||
|
title = self.tag_to_string(atag)
|
||||||
|
description = self.tag_to_string(ptag)
|
||||||
|
articles.append({
|
||||||
|
'title' :title
|
||||||
|
,'date' :strftime(self.timefmt)
|
||||||
|
,'url' :url
|
||||||
|
,'description':description
|
||||||
|
})
|
||||||
|
totalfeeds.append((feedtitle, articles))
|
||||||
|
return totalfeeds
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
return url + 'print.html'
|
||||||
|
|
@ -9,8 +9,8 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
|||||||
|
|
||||||
class TelegraphUK(BasicNewsRecipe):
|
class TelegraphUK(BasicNewsRecipe):
|
||||||
title = u'Telegraph.co.uk'
|
title = u'Telegraph.co.uk'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic and Sujata Raman'
|
||||||
description = 'News from United Kingdom'
|
description = 'News from United Kingdom'
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
@ -18,23 +18,26 @@ class TelegraphUK(BasicNewsRecipe):
|
|||||||
|
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
|
|
||||||
extra_css = '''
|
extra_css = '''
|
||||||
h1{font-family :Arial,Helvetica,sans-serif; font-size:large; }
|
h1{font-family :Arial,Helvetica,sans-serif; font-size:large; }
|
||||||
h2{font-family :Arial,Helvetica,sans-serif; font-size:x-small; color:#444444}
|
h2{font-family :Arial,Helvetica,sans-serif; font-size:x-small; color:#444444;}
|
||||||
.story{font-family :Arial,Helvetica,sans-serif; font-size: x-small;}
|
.story{font-family :Arial,Helvetica,sans-serif; font-size: x-small;}
|
||||||
.byline{color:#666666; font-family :Arial,Helvetica,sans-serif; font-size: xx-small;}
|
.byline{color:#666666; font-family :Arial,Helvetica,sans-serif; font-size: xx-small;}
|
||||||
a{color:#234B7B; }
|
a{color:#234B7B; }
|
||||||
.imageExtras{color:#666666; font-family :Arial,Helvetica,sans-serif; font-size: xx-small;}
|
.imageExtras{color:#666666; font-family :Arial,Helvetica,sans-serif; font-size: xx-small;}
|
||||||
'''
|
'''
|
||||||
|
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
dict(name='div', attrs={'class':'storyHead'})
|
dict(name='div', attrs={'class':'storyHead'})
|
||||||
,dict(name='div', attrs={'class':'story' })
|
,dict(name='div', attrs={'class':'story' })
|
||||||
#,dict(name='div', attrs={'class':['slideshowHD gutterUnder',"twoThirds gutter","caption" ] })
|
#,dict(name='div', attrs={'class':['slideshowHD gutterUnder',"twoThirds gutter","caption" ] })
|
||||||
]
|
]
|
||||||
remove_tags = [dict(name='div', attrs={'class':['related_links_inline',"imgindex","next","prev","gutterUnder"]})]
|
remove_tags = [dict(name='div', attrs={'class':['related_links_inline',"imgindex","next","prev","gutterUnder",'ssImgHide','imageExtras','ssImg hide']})
|
||||||
|
#,dict(name='div', attrs={'class':['toolshideoneQuarter']})
|
||||||
feeds = [
|
,dict(name='span', attrs={'class':['num','placeComment']})
|
||||||
|
]
|
||||||
|
|
||||||
|
feeds = [
|
||||||
(u'UK News' , u'http://www.telegraph.co.uk/news/uknews/rss' )
|
(u'UK News' , u'http://www.telegraph.co.uk/news/uknews/rss' )
|
||||||
,(u'World News' , u'http://www.telegraph.co.uk/news/worldnews/rss' )
|
,(u'World News' , u'http://www.telegraph.co.uk/news/worldnews/rss' )
|
||||||
,(u'Politics' , u'http://www.telegraph.co.uk/news/newstopics/politics/rss' )
|
,(u'Politics' , u'http://www.telegraph.co.uk/news/newstopics/politics/rss' )
|
||||||
@ -45,15 +48,27 @@ class TelegraphUK(BasicNewsRecipe):
|
|||||||
,(u'Earth News' , u'http://www.telegraph.co.uk/earth/earthnews/rss' )
|
,(u'Earth News' , u'http://www.telegraph.co.uk/earth/earthnews/rss' )
|
||||||
,(u'Comment' , u'http://www.telegraph.co.uk/comment/rss' )
|
,(u'Comment' , u'http://www.telegraph.co.uk/comment/rss' )
|
||||||
,(u'How about that?', u'http://www.telegraph.co.uk/news/newstopics/howaboutthat/rss' )
|
,(u'How about that?', u'http://www.telegraph.co.uk/news/newstopics/howaboutthat/rss' )
|
||||||
]
|
]
|
||||||
|
|
||||||
def get_article_url(self, article):
|
def get_article_url(self, article):
|
||||||
|
|
||||||
url = article.get('guid', None)
|
url = article.get('guid', None)
|
||||||
|
|
||||||
if 'picture-galleries' in url or 'pictures' in url or 'picturegalleries' in url :
|
if 'picture-galleries' in url or 'pictures' in url or 'picturegalleries' in url :
|
||||||
url = None
|
url = None
|
||||||
|
|
||||||
return url
|
return url
|
||||||
|
|
||||||
|
|
||||||
|
def postprocess_html(self,soup,first):
|
||||||
|
|
||||||
|
for bylineTag in soup.findAll(name='div', attrs={'class':'byline'}):
|
||||||
|
for pTag in bylineTag.findAll(name='p'):
|
||||||
|
if getattr(pTag.contents[0],"Comments",True):
|
||||||
|
pTag.extract()
|
||||||
|
return soup
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,22 +0,0 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
class The_Gazette(BasicNewsRecipe):
|
|
||||||
|
|
||||||
cover_url = 'file:///D:/Documents/Pictures/Covers/The_Gazette.jpg'
|
|
||||||
title = u'The Gazette'
|
|
||||||
__author__ = 'Jerry Clapperton'
|
|
||||||
description = 'Montreal news in English'
|
|
||||||
language = 'en_CA'
|
|
||||||
|
|
||||||
oldest_article = 7
|
|
||||||
max_articles_per_feed = 20
|
|
||||||
use_embedded_content = False
|
|
||||||
remove_javascript = True
|
|
||||||
no_stylesheets = True
|
|
||||||
encoding = 'utf-8'
|
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'id':['storyheader','page1']})]
|
|
||||||
|
|
||||||
extra_css = '.headline {font-size: x-large;} \n .fact {padding-top: 10pt}'
|
|
||||||
|
|
||||||
feeds = [(u'News', u'http://feeds.canada.com/canwest/F297'), (u'Opinion', u'http://feeds.canada.com/canwest/F7383'), (u'Arts', u'http://feeds.canada.com/canwest/F7366'), (u'Life', u'http://rss.canada.com/get/?F6934'), (u'Business', u'http://feeds.canada.com/canwest/F6939'), (u'Travel', u'http://rss.canada.com/get/?F6938'), (u'Health', u'http://feeds.canada.com/canwest/F7397'), (u'Technology', u'http://feeds.canada.com/canwest/F7411')]
|
|
@ -9,6 +9,7 @@ class The_New_Republic(BasicNewsRecipe):
|
|||||||
|
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name='div', attrs={'class':['print-logo', 'print-site_name', 'img-left', 'print-source_url']}),
|
dict(name='div', attrs={'class':['print-logo', 'print-site_name', 'img-left', 'print-source_url']}),
|
||||||
@ -21,14 +22,15 @@ class The_New_Republic(BasicNewsRecipe):
|
|||||||
('Economy', 'http://www.tnr.com/rss/articles/Economy'),
|
('Economy', 'http://www.tnr.com/rss/articles/Economy'),
|
||||||
('Environment and Energy', 'http://www.tnr.com/rss/articles/Environment-%2526-Energy'),
|
('Environment and Energy', 'http://www.tnr.com/rss/articles/Environment-%2526-Energy'),
|
||||||
('Health Care', 'http://www.tnr.com/rss/articles/Health-Care'),
|
('Health Care', 'http://www.tnr.com/rss/articles/Health-Care'),
|
||||||
('Urban Policy', 'http://www.tnr.com/rss/articles/Urban-Policy'),
|
('Metro Policy', 'http://www.tnr.com/rss/articles/Metro-Policy'),
|
||||||
('World', 'http://www.tnr.com/rss/articles/World'),
|
('World', 'http://www.tnr.com/rss/articles/World'),
|
||||||
('Film', 'http://www.tnr.com/rss/articles/Film'),
|
('Film', 'http://www.tnr.com/rss/articles/Film'),
|
||||||
('Books', 'http://www.tnr.com/rss/articles/books'),
|
('Books', 'http://www.tnr.com/rss/articles/books'),
|
||||||
|
('The Book', 'http://www.tnr.com/rss/book'),
|
||||||
|
('Jonathan Chait', 'http://www.tnr.com/rss/blogs/Jonathan-Chait'),
|
||||||
('The Plank', 'http://www.tnr.com/rss/blogs/The-Plank'),
|
('The Plank', 'http://www.tnr.com/rss/blogs/The-Plank'),
|
||||||
('The Treatment', 'http://www.tnr.com/rss/blogs/The-Treatment'),
|
('The Treatment', 'http://www.tnr.com/rss/blogs/The-Treatment'),
|
||||||
('The Spine', 'http://www.tnr.com/rss/blogs/The-Spine'),
|
('The Spine', 'http://www.tnr.com/rss/blogs/The-Spine'),
|
||||||
('The Stash', 'http://www.tnr.com/rss/blogs/The-Stash'),
|
|
||||||
('The Vine', 'http://www.tnr.com/rss/blogs/The-Vine'),
|
('The Vine', 'http://www.tnr.com/rss/blogs/The-Vine'),
|
||||||
('The Avenue', 'http://www.tnr.com/rss/blogs/The-Avenue'),
|
('The Avenue', 'http://www.tnr.com/rss/blogs/The-Avenue'),
|
||||||
('William Galston', 'http://www.tnr.com/rss/blogs/William-Galston'),
|
('William Galston', 'http://www.tnr.com/rss/blogs/William-Galston'),
|
||||||
@ -40,3 +42,4 @@ class The_New_Republic(BasicNewsRecipe):
|
|||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
return url.replace('http://www.tnr.com/', 'http://www.tnr.com/print/')
|
return url.replace('http://www.tnr.com/', 'http://www.tnr.com/print/')
|
||||||
|
|
||||||
|
37
resources/recipes/theluminouslandscape.recipe
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
'''
|
||||||
|
luminous-landscape.com
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class theluminouslandscape(BasicNewsRecipe):
|
||||||
|
title = 'The Luminous Landscape'
|
||||||
|
__author__ = 'Darko Miletic'
|
||||||
|
description = 'A photography news and information website in the form of a weblog with multiple authors who write on a variety of photography and art-photography related issues.'
|
||||||
|
publisher = 'The Luminous Landscape '
|
||||||
|
category = 'news, blog, photograph, international'
|
||||||
|
oldest_article = 15
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
remove_empty_feeds = True
|
||||||
|
use_embedded_content = True
|
||||||
|
encoding = 'cp1252'
|
||||||
|
language = 'en'
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comment' : description
|
||||||
|
, 'tags' : category
|
||||||
|
, 'publisher': publisher
|
||||||
|
, 'language' : language
|
||||||
|
}
|
||||||
|
|
||||||
|
feeds = [(u"What's new", u'http://www.luminous-landscape.com/whatsnew/rssfeed.php')]
|
||||||
|
remove_tags = [dict(name=['object','link','iframe'])]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
return self.adeify_images(soup)
|
||||||
|
|
||||||
|
|
41
resources/recipes/theonlinephotographer.recipe
Normal file
@ -0,0 +1,41 @@
|
|||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
'''
|
||||||
|
theonlinephotographer.typepad.com
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class theonlinephotographer(BasicNewsRecipe):
|
||||||
|
title = 'The Online Photographer'
|
||||||
|
__author__ = 'Darko Miletic'
|
||||||
|
description = 'A photography news and information website in the form of a weblog with multiple authors who write on a variety of photography and art-photography related issues.'
|
||||||
|
publisher = 'The Online Photographer'
|
||||||
|
category = 'news, blog, photograph, international'
|
||||||
|
oldest_article = 15
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
remove_empty_feeds = True
|
||||||
|
use_embedded_content = False
|
||||||
|
encoding = 'utf8'
|
||||||
|
language = 'en'
|
||||||
|
|
||||||
|
extra_css = ' body{font-family: Georgia,"Times New Roman",serif } '
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comment' : description
|
||||||
|
, 'tags' : category
|
||||||
|
, 'publisher': publisher
|
||||||
|
, 'language' : language
|
||||||
|
}
|
||||||
|
|
||||||
|
feeds = [(u'Articles', u'http://feeds.feedburner.com/typepad/ZSjz')]
|
||||||
|
remove_tags_before = dict(name='h3',attrs={'class':'entry-header'})
|
||||||
|
remove_tags_after = dict(name='div',attrs={'class':'entry-footer'})
|
||||||
|
remove_tags = [dict(name=['object','link','iframe'])]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
return self.adeify_images(soup)
|
||||||
|
|
||||||
|
|
53
resources/recipes/tidbits.recipe
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
'''
|
||||||
|
db.tidbits.com
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class TidBITS(BasicNewsRecipe):
|
||||||
|
title = 'TidBITS: Mac News for the Rest of Us'
|
||||||
|
__author__ = 'Darko Miletic'
|
||||||
|
description = 'Insightful news, reviews, and analysis of the Macintosh and Internet worlds'
|
||||||
|
publisher = 'TidBITS Publishing Inc.'
|
||||||
|
category = 'news, Apple, Macintosh, IT, Internet'
|
||||||
|
oldest_article = 2
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
encoding = 'utf-8'
|
||||||
|
use_embedded_content = True
|
||||||
|
language = 'en'
|
||||||
|
remove_empty_feeds = True
|
||||||
|
masthead_url = 'http://db.tidbits.com/images/tblogo9.gif'
|
||||||
|
extra_css = ' body{font-family: Georgia,"Times New Roman",Times,serif} '
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comment' : description
|
||||||
|
, 'tags' : category
|
||||||
|
, 'publisher' : publisher
|
||||||
|
, 'language' : language
|
||||||
|
}
|
||||||
|
|
||||||
|
remove_attributes = ['width','height']
|
||||||
|
remove_tags = [dict(name='small')]
|
||||||
|
remove_tags_after = dict(name='small')
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Business Apps' , u'http://db.tidbits.com/feeds/business.rss' )
|
||||||
|
,(u'Entertainment' , u'http://db.tidbits.com/feeds/entertainment.rss')
|
||||||
|
,(u'External Links' , u'http://db.tidbits.com/feeds/links.rss' )
|
||||||
|
,(u'Home Mac' , u'http://db.tidbits.com/feeds/home.rss' )
|
||||||
|
,(u'Inside TidBITS' , u'http://db.tidbits.com/feeds/inside.rss' )
|
||||||
|
,(u'iPod & iPhone' , u'http://db.tidbits.com/feeds/ipod-iphone.rss' )
|
||||||
|
,(u'Just for Fun' , u'http://db.tidbits.com/feeds/fun.rss' )
|
||||||
|
,(u'Macs & Mac OS X' , u'http://db.tidbits.com/feeds/macs.rss' )
|
||||||
|
,(u'Media Creation' , u'http://db.tidbits.com/feeds/creative.rss' )
|
||||||
|
,(u'Networking & Communications', u'http://db.tidbits.com/feeds/net.rss' )
|
||||||
|
,(u'Opinion & Editorial' , u'http://db.tidbits.com/feeds/opinion.rss' )
|
||||||
|
,(u'Support & Problem Solving' , u'http://db.tidbits.com/feeds/support.rss' )
|
||||||
|
,(u'Safe Computing' , u'http://db.tidbits.com/feeds/security.rss' )
|
||||||
|
,(u'Tech News' , u'http://db.tidbits.com/feeds/tech.rss' )
|
||||||
|
,(u'Software Watchlist' , u'http://db.tidbits.com/feeds/watchlist.rss' )
|
||||||
|
]
|
@ -10,7 +10,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
|||||||
|
|
||||||
class TorontoSun(BasicNewsRecipe):
|
class TorontoSun(BasicNewsRecipe):
|
||||||
title = 'Toronto SUN'
|
title = 'Toronto SUN'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic and Sujata Raman'
|
||||||
description = 'News from Canada'
|
description = 'News from Canada'
|
||||||
publisher = 'Toronto Sun'
|
publisher = 'Toronto Sun'
|
||||||
category = 'news, politics, Canada'
|
category = 'news, politics, Canada'
|
||||||
@ -21,25 +21,50 @@ class TorontoSun(BasicNewsRecipe):
|
|||||||
encoding = 'cp1252'
|
encoding = 'cp1252'
|
||||||
language = 'en_CA'
|
language = 'en_CA'
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comment' : description
|
'comment' : description
|
||||||
, 'tags' : category
|
, 'tags' : category
|
||||||
, 'publisher' : publisher
|
, 'publisher' : publisher
|
||||||
, 'language' : language
|
, 'language' : language
|
||||||
}
|
}
|
||||||
|
|
||||||
keep_only_tags =[
|
keep_only_tags = [
|
||||||
dict(name='div', attrs={'class':'articleHead'})
|
dict(name='div', attrs={'class':['articleHead','leftBox']})
|
||||||
,dict(name='div', attrs={'id':'channelContent'})
|
,dict(name='div', attrs={'id':'channelContent'})
|
||||||
]
|
,dict(name='div', attrs={'id':'rotateBox'})
|
||||||
remove_tags = [
|
,dict(name='img')
|
||||||
dict(name='div',attrs={'class':['leftBox','bottomBox clear','bottomBox','breadCrumb']})
|
]
|
||||||
,dict(name=['link','iframe','object'])
|
remove_tags = [
|
||||||
,dict(name='a',attrs={'rel':'swap'})
|
dict(name='div',attrs={'class':['bottomBox clear','bottomBox','breadCrumb','articleControls thin','articleControls thin short','extraVideoList']})
|
||||||
,dict(name='ul',attrs={'class':'tabs dl contentSwap'})
|
,dict(name='h2',attrs={'class':'microhead'})
|
||||||
]
|
,dict(name='div',attrs={'id':'commentsBottom'})
|
||||||
|
,dict(name=['link','iframe','object'])
|
||||||
|
,dict(name='a',attrs={'rel':'swap'})
|
||||||
|
,dict(name='a',attrs={'href':'/news/haiti/'})
|
||||||
|
,dict(name='ul',attrs={'class':['tabs dl contentSwap','micrositeNav clearIt hList','galleryNav rotateNav']})
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags_after = [
|
||||||
|
dict(name='div',attrs={'class':'bottomBox clear'})
|
||||||
|
,dict(name='div',attrs={'class':'rotateBox'})
|
||||||
|
,dict(name='div',attrs={'id':'contentSwap'})
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
extra_css = '''
|
||||||
|
h1{font-family :Arial,Helvetica,sans-serif; font-size:large;}
|
||||||
|
h2{font-family :Arial,Helvetica,sans-serif; font-size:medium; color:#666666;}
|
||||||
|
h3{font-family :Arial,Helvetica,sans-serif; font-size:medium; color:#000000;}
|
||||||
|
p{font-family :Arial,Helvetica,sans-serif; font-size:x-small;}
|
||||||
|
.bold{font-family :Arial,Helvetica,sans-serif; font-size: xx-small;color:#444444;margin-left: 0px;}
|
||||||
|
.subheading{font-family :Arial,Helvetica,sans-serif; font-size:medium; color:#000000; font-weight: bold;}
|
||||||
|
.byline{color:#666666; font-family :Arial,Helvetica,sans-serif; font-size: xx-small;}
|
||||||
|
.byline span{color:#666666; font-family :Arial,Helvetica,sans-serif; font-size: xx-small; text-transform: uppercase;}
|
||||||
|
.updated{font-family :Arial,Helvetica,sans-serif; font-size: xx-small;}
|
||||||
|
.galleryCaption{font-family :Arial,Helvetica,sans-serif; font-size: x-small;}
|
||||||
|
.galleryUpdated{font-family :Arial,Helvetica,sans-serif; font-size: x-small;}
|
||||||
|
'''
|
||||||
|
|
||||||
remove_tags_after = dict(name='div',attrs={'class':'bottomBox clear'})
|
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'News' , u'http://www.torontosun.com/news/rss.xml' )
|
(u'News' , u'http://www.torontosun.com/news/rss.xml' )
|
||||||
@ -48,3 +73,19 @@ class TorontoSun(BasicNewsRecipe):
|
|||||||
,(u'World' , u'http://www.torontosun.com/news/world/rss.xml' )
|
,(u'World' , u'http://www.torontosun.com/news/world/rss.xml' )
|
||||||
,(u'Money' , u'http://www.torontosun.com/money/rss.xml' )
|
,(u'Money' , u'http://www.torontosun.com/money/rss.xml' )
|
||||||
]
|
]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
##To fetch images from the specified source
|
||||||
|
for img in soup.findAll('img', src=True):
|
||||||
|
url= img.get('src').split('?')[-1].partition('=')[-1]
|
||||||
|
if url:
|
||||||
|
img['src'] = url.split('&')[0].partition('=')[0]
|
||||||
|
img['width'] = url.split('&')[-1].partition('=')[-1].split('x')[0]
|
||||||
|
img['height'] =url.split('&')[-1].partition('=')[-1].split('x')[1]
|
||||||
|
return soup
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
66
resources/recipes/tuttosport.recipe
Normal file
@ -0,0 +1,66 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__author__ = 'Lorenzo Vigentini'
|
||||||
|
__copyright__ = '2009, Lorenzo Vigentini <l.vigentini at gmail.com>'
|
||||||
|
__version__ = 'v1.01'
|
||||||
|
__date__ = '30, January 2010'
|
||||||
|
__description__ = 'Sport daily news from Italy'
|
||||||
|
|
||||||
|
'''www.tuttosport.com'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class tuttosport(BasicNewsRecipe):
|
||||||
|
author = 'Lorenzo Vigentini'
|
||||||
|
description = 'Sport daily news from Italy'
|
||||||
|
|
||||||
|
cover_url = 'http://www.tuttosport.com/res/imgs/logo_TuttoSport.png'
|
||||||
|
title = 'Tuttosport'
|
||||||
|
publisher = 'Nuova Editoriale Sportiva S.r.l'
|
||||||
|
category = 'Sport News'
|
||||||
|
|
||||||
|
language = 'it'
|
||||||
|
timefmt = '[%a, %d %b, %Y]'
|
||||||
|
|
||||||
|
oldest_article = 2
|
||||||
|
max_articles_per_feed = 20
|
||||||
|
use_embedded_content = False
|
||||||
|
recursion = 10
|
||||||
|
|
||||||
|
remove_javascript = True
|
||||||
|
no_stylesheets = True
|
||||||
|
|
||||||
|
def print_version(self,url):
|
||||||
|
segments = url.split('/')
|
||||||
|
printURL = '/'.join(segments[0:10]) + '?print'
|
||||||
|
return printURL
|
||||||
|
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(name='h2', attrs={'class':'tit_Article'}),
|
||||||
|
dict(name='div', attrs={'class':['box_Img img_L ','txt_ArticleAbstract','txt_Article txtBox_cms']})
|
||||||
|
]
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Primo piano',u'http://www.tuttosport.com/rss/primo_piano.xml'),
|
||||||
|
(u'Cronanca',u'http://www.tuttosport.com/rss/Cronaca-205.xml'),
|
||||||
|
(u'Lettere al direttore',u'http://blog.tuttosport.com/direttore/feed'),
|
||||||
|
(u'Calcio',u'http://www.tuttosport.com/rss/Calcio-3.xml'),
|
||||||
|
(u'Speciale Derby',u'http://www.tuttosport.com/rss/Speciale-derby-310.xml'),
|
||||||
|
(u'Formula 1',u'hhttp://www.tuttosport.com/rss/Formula-1-7.xml'),
|
||||||
|
(u'Moto',u'hhttp://www.tuttosport.com/rss/Moto-8.xml'),
|
||||||
|
(u'Basket',u'http://www.tuttosport.com/rss/Basket-9.xml'),
|
||||||
|
(u'Altri Sport',u'http://www.tuttosport.com/rss/Altri-Sport-2.xml'),
|
||||||
|
(u'Tuttosport League',u'http://www.tuttosport.com/rss/Tuttosport-League-245.xml'),
|
||||||
|
(u'Scommesse',u'http://www.tuttosport.com/rss/Scommesse-286.xml')
|
||||||
|
]
|
||||||
|
|
||||||
|
extra_css = '''
|
||||||
|
body {font-family: Arial, Verdana, sans-serif; margin-bottom: 3em;}
|
||||||
|
h1 {color:#9C3A0B;font-family:Arial,Helvetica,sans-serif; font-size:20px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:20px;}
|
||||||
|
h3 {color:#9C3A0B;font-family:Arial,Helvetica,sans-serif; font-size:15px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:15px;}
|
||||||
|
h2.tit_Article {color:#9C3A0B;margin: 15px 8px 0; margin-bottom: 1px; border-bottom: 3px solid;}
|
||||||
|
.txt_ArticleAbstract {color:#4080AE;clear: both; margin: 3px 8px;}
|
||||||
|
.txt_Article {clear: both; margin: 8px 8px 12px;}
|
||||||
|
.txt_Author {float: right;}
|
||||||
|
.txt_ArticleAuthor {clear: both; margin: 8px;}
|
||||||
|
'''
|
@ -1,7 +1,5 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
www.variety.com
|
www.variety.com
|
||||||
'''
|
'''
|
||||||
@ -20,8 +18,10 @@ class Variety(BasicNewsRecipe):
|
|||||||
publisher = 'Red Business Information'
|
publisher = 'Red Business Information'
|
||||||
category = 'Entertainment Industry News, Daily Variety, Movie Reviews, TV, Awards, Oscars, Cannes, Box Office, Hollywood'
|
category = 'Entertainment Industry News, Daily Variety, Movie Reviews, TV, Awards, Oscars, Cannes, Box Office, Hollywood'
|
||||||
language = 'en'
|
language = 'en'
|
||||||
|
masthead_url = 'http://a330.g.akamai.net/7/330/23382/20090528190853/www.variety.com/graphics/variety/Variety_logo_green_tm.gif'
|
||||||
|
extra_css = ' body{font-family: Georgia,"Times New Roman",Times,Courier,serif } img{margin-bottom: 1em} '
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comments' : description
|
'comments' : description
|
||||||
,'tags' : category
|
,'tags' : category
|
||||||
,'language' : language
|
,'language' : language
|
||||||
@ -31,7 +31,7 @@ class Variety(BasicNewsRecipe):
|
|||||||
remove_tags = [dict(name=['object','link','map'])]
|
remove_tags = [dict(name=['object','link','map'])]
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'id':'article'})]
|
keep_only_tags = [dict(name='div', attrs={'id':'article'})]
|
||||||
|
|
||||||
feeds = [(u'News & Articles', u'http://feeds.feedburner.com/variety/headlines' )]
|
feeds = [(u'News & Articles', u'http://feeds.feedburner.com/variety/headlines' )]
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
@ -41,6 +41,6 @@ class Variety(BasicNewsRecipe):
|
|||||||
catid = catidr.partition('&')[0]
|
catid = catidr.partition('&')[0]
|
||||||
return 'http://www.variety.com/index.asp?layout=print_story&articleid=' + artid + '&categoryid=' + catid
|
return 'http://www.variety.com/index.asp?layout=print_story&articleid=' + artid + '&categoryid=' + catid
|
||||||
|
|
||||||
def get_article_url(self, article):
|
|
||||||
return article.get('feedburner_origlink', None)
|
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
return self.adeify_images(soup)
|
||||||
|
95
resources/recipes/winter_olympics.recipe
Normal file
@ -0,0 +1,95 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Starson17'
|
||||||
|
'''
|
||||||
|
www.nbcolympics.com
|
||||||
|
'''
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class Olympics_2010(BasicNewsRecipe):
|
||||||
|
title = u'NBC Olympics 2010'
|
||||||
|
__author__ = 'Starson17'
|
||||||
|
description = 'Olympics 2010'
|
||||||
|
cover_url = 'http://www.digitaljournal.com/img/1/1/2/1/i/4/7/6/o/WinterOlympics2010-logo.jpg'
|
||||||
|
publisher = 'Olympics 2010'
|
||||||
|
tags = 'Olympics news'
|
||||||
|
language = 'en'
|
||||||
|
use_embedded_content = False
|
||||||
|
no_stylesheets = True
|
||||||
|
remove_javascript = True
|
||||||
|
# recursions = 3
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 10
|
||||||
|
|
||||||
|
keep_only_tags = [dict(name='div', attrs={'class':['Article ','ArticleGallery']}),
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags = [dict(name='div', attrs={'id':['RelatedTagsBox','ShareBox']}),
|
||||||
|
dict(name='div', attrs={'class':['DateUtilities','PhotoGallery BoxRight','Frame','ToolBox']}),
|
||||||
|
]
|
||||||
|
|
||||||
|
# RSS feeds are at: http://www.nbcolympics.com/rss/index.html
|
||||||
|
feeds = [
|
||||||
|
('NBCOlympics.com - News', 'http://www.nbcolympics.com/rss/newscenter/mostpopular.xml'),
|
||||||
|
('NBCOlympics.com - News - Top Stories', 'http://www.nbcolympics.com/rss/newscenter/topstories.xml'),
|
||||||
|
('NBCOlympics.com - News - Latest Headlines', 'http://www.nbcolympics.com/rss/newscenter/latestnews.xml'),
|
||||||
|
# ('NBCOlympics.com - Photos', 'http://www.nbcolympics.com/rss/photos/mostpopular.xml'),
|
||||||
|
# ('NBCOlympics.com - Photos - Editorial Picks', 'http://www.nbcolympics.com/rss/photos/editorialpicks.xml'),
|
||||||
|
# ('NBCOlympics.com - Photos - Latest Slideshows', 'http://www.nbcolympics.com/rss/photos/latestslideshows.xml'),
|
||||||
|
('NBCOlympics.com - Team USA - Latest news', 'http://www.nbcolympics.com/rss/countries/team-usa/index.xml'),
|
||||||
|
# ('NBCOlympics.com - Team USA - Latest Slideshows', 'http://www.nbcolympics.com/rss/countries/team-usa/photos/index.xml'),
|
||||||
|
# ('NBCOlympics.com - Team USA - Video', 'http://www.nbcolympics.com/rss/countries/team-usa/video/index.xml'),
|
||||||
|
# ('NBCOlympics.com - Alpine Skiing - Most Popular News', 'http://www.nbcolympics.com/rss/sport=AS/mostpopular.xml'),
|
||||||
|
# ('NBCOlympics.com - Alpine Skiing - Top News', 'http://www.nbcolympics.com/rss/sport=AS/topnews.xml'),
|
||||||
|
('NBCOlympics.com - Alpine Skiing - Latest News', 'http://www.nbcolympics.com/rss/sport=AS/latestnews.xml'),
|
||||||
|
# ('NBCOlympics.com - Biathlon - Most Popular News', 'http://www.nbcolympics.com/rss/sport=BT/mostpopular.xml'),
|
||||||
|
# ('NBCOlympics.com - Biathlon - Top News', 'http://www.nbcolympics.com/rss/sport=BT/topnews.xml'),
|
||||||
|
('NBCOlympics.com - Biathlon - Latest News', 'http://www.nbcolympics.com/rss/sport=BT/latestnews.xml'),
|
||||||
|
# ('NBCOlympics.com - Bobsled - Most Popular News', 'http://www.nbcolympics.com/rss/sport=BS/mostpopular.xml'),
|
||||||
|
# ('NBCOlympics.com - Bobsled - Top News', 'http://www.nbcolympics.com/rss/sport=BS/topnews.xml'),
|
||||||
|
('NBCOlympics.com - Bobsled - Latest News', 'http://www.nbcolympics.com/rss/sport=BS/latestnews.xml'),
|
||||||
|
# ('NBCOlympics.com - Cross-Country - Most Popular News', 'http://www.nbcolympics.com/rss/sport=CC/mostpopular.xml'),
|
||||||
|
# ('NBCOlympics.com - Cross-Country - Top News', 'http://www.nbcolympics.com/rss/sport=CC/topnews.xml'),
|
||||||
|
('NBCOlympics.com - Cross-Country - Latest News', 'http://www.nbcolympics.com/rss/sport=CC/latestnews.xml'),
|
||||||
|
# ('NBCOlympics.com - Curling - Most Popular News', 'http://www.nbcolympics.com/rss/sport=CU/mostpopular.xml'),
|
||||||
|
# ('NBCOlympics.com - Curling - Top News', 'http://www.nbcolympics.com/rss/sport=CU/topnews.xml'),
|
||||||
|
('NBCOlympics.com - Curling - Latest News', 'http://www.nbcolympics.com/rss/sport=CU/latestnews.xml'),
|
||||||
|
# ('NBCOlympics.com - Figure Skating - Most Popular News', 'http://www.nbcolympics.com/rss/sport=FS/mostpopular.xml'),
|
||||||
|
# ('NBCOlympics.com - Figure Skating - Top News', 'http://www.nbcolympics.com/rss/sport=FS/topnews.xml'),
|
||||||
|
('NBCOlympics.com - Figure Skating - Latest News', 'http://www.nbcolympics.com/rss/sport=FS/latestnews.xml'),
|
||||||
|
# ('NBCOlympics.com - Freestyle Skiing - Most Popular News', 'http://www.nbcolympics.com/rss/sport=FR/mostpopular.xml'),
|
||||||
|
# ('NBCOlympics.com - Freestyle Skiing - Top News', 'http://www.nbcolympics.com/rss/sport=FR/topnews.xml'),
|
||||||
|
('NBCOlympics.com - Freestyle Skiing - Latest News', 'http://www.nbcolympics.com/rss/sport=FR/latestnews.xml'),
|
||||||
|
# ('NBCOlympics.com - Hockey - Most Popular News', 'http://www.nbcolympics.com/rss/sport=IH/mostpopular.xml'),
|
||||||
|
# ('NBCOlympics.com - Hockey - Top News', 'http://www.nbcolympics.com/rss/sport=IH/topnews.xml'),
|
||||||
|
('NBCOlympics.com - Hockey - Latest News', 'http://www.nbcolympics.com/rss/sport=IH/latestnews.xml'),
|
||||||
|
# ('NBCOlympics.com - Luge - Most Popular News', 'http://www.nbcolympics.com/rss/sport=LG/mostpopular.xml'),
|
||||||
|
# ('NBCOlympics.com - Luge - Top News', 'http://www.nbcolympics.com/rss/sport=LG/topnews.xml'),
|
||||||
|
('NBCOlympics.com - Luge - Latest News', 'http://www.nbcolympics.com/rss/sport=LG/latestnews.xml'),
|
||||||
|
# ('NBCOlympics.com - Nordic Combined - Most Popular News', 'http://www.nbcolympics.com/rss/sport=NC/mostpopular.xml'),
|
||||||
|
# ('NBCOlympics.com - Nordic Combined - Top News', 'http://www.nbcolympics.com/rss/sport=NC/topnews.xml'),
|
||||||
|
('NBCOlympics.com - Nordic Combined - Latest News', 'http://www.nbcolympics.com/rss/sport=NC/latestnews.xml'),
|
||||||
|
# ('NBCOlympics.com - Short Track - Most Popular News', 'http://www.nbcolympics.com/rss/sport=ST/mostpopular.xml'),
|
||||||
|
# ('NBCOlympics.com - Short Track - Top News', 'http://www.nbcolympics.com/rss/sport=ST/topnews.xml'),
|
||||||
|
('NBCOlympics.com - Short Track - Latest News', 'http://www.nbcolympics.com/rss/sport=ST/latestnews.xml'),
|
||||||
|
# ('NBCOlympics.com - Skeleton - Most Popular News', 'http://www.nbcolympics.com/rss/sport=SN/mostpopular.xml'),
|
||||||
|
# ('NBCOlympics.com - Skeleton - Top News', 'http://www.nbcolympics.com/rss/sport=SN/topnews.xml'),
|
||||||
|
('NBCOlympics.com - Skeleton - Latest News', 'http://www.nbcolympics.com/rss/sport=SN/latestnews.xml'),
|
||||||
|
# ('NBCOlympics.com - Ski Jumping - Most Popular News', 'http://www.nbcolympics.com/rss/sport=SJ/mostpopular.xml'),
|
||||||
|
# ('NBCOlympics.com - Ski Jumping - Top News', 'http://www.nbcolympics.com/rss/sport=SJ/topnews.xml'),
|
||||||
|
('NBCOlympics.com - Ski Jumping - Latest News', 'http://www.nbcolympics.com/rss/sport=SJ/latestnews.xml'),
|
||||||
|
# ('NBCOlympics.com - Snowboarding - Most Popular News', 'http://www.nbcolympics.com/rss/sport=SB/mostpopular.xml'),
|
||||||
|
# ('NBCOlympics.com - Snowboarding - Top News', 'http://www.nbcolympics.com/rss/sport=SB/topnews.xml'),
|
||||||
|
('NBCOlympics.com - Snowboarding - Latest News', 'http://www.nbcolympics.com/rss/sport=SB/latestnews.xml'),
|
||||||
|
# ('NBCOlympics.com - Speed Skating - Most Popular News', 'http://www.nbcolympics.com/rss/sport=AS/mostpopular.xml'),
|
||||||
|
# ('NBCOlympics.com - Speed Skating - Top News', 'http://www.nbcolympics.com/rss/sport=AS/topnews.xml'),
|
||||||
|
('NBCOlympics.com - Speed Skating - Latest News', 'http://www.nbcolympics.com/rss/sport=AS/latestnews.xml'),
|
||||||
|
]
|
||||||
|
|
||||||
|
extra_css = '''
|
||||||
|
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||||
|
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
||||||
|
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||||
|
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
||||||
|
'''
|
@ -20,6 +20,7 @@ class Wired(BasicNewsRecipe):
|
|||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
|
masthead_url = 'http://www.wired.com/images/home/wired_logo.gif'
|
||||||
language = 'en'
|
language = 'en'
|
||||||
extra_css = ' body{font-family: sans-serif} .entryDescription li {display: inline; list-style-type: none} '
|
extra_css = ' body{font-family: sans-serif} .entryDescription li {display: inline; list-style-type: none} '
|
||||||
index = 'http://www.wired.com/magazine/'
|
index = 'http://www.wired.com/magazine/'
|
||||||
@ -38,14 +39,34 @@ class Wired(BasicNewsRecipe):
|
|||||||
dict(name=['object','embed','iframe','link'])
|
dict(name=['object','embed','iframe','link'])
|
||||||
,dict(name='div', attrs={'class':['podcast_storyboard','tweetmeme_button']})
|
,dict(name='div', attrs={'class':['podcast_storyboard','tweetmeme_button']})
|
||||||
]
|
]
|
||||||
|
remove_attributes = ['height','width']
|
||||||
|
|
||||||
|
|
||||||
#feeds = [(u'Articles' , u'http://www.wired.com/magazine/feed/' )]
|
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
totalfeeds = []
|
totalfeeds = []
|
||||||
|
|
||||||
soup = self.index_to_soup(self.index)
|
soup = self.index_to_soup(self.index)
|
||||||
|
majorf = soup.find('div',attrs={'class':'index'})
|
||||||
|
if majorf:
|
||||||
|
pfarticles = []
|
||||||
|
firsta = majorf.find(attrs={'class':'spread-header'})
|
||||||
|
if firsta:
|
||||||
|
pfarticles.append({
|
||||||
|
'title' :self.tag_to_string(firsta.a)
|
||||||
|
,'date' :strftime(self.timefmt)
|
||||||
|
,'url' :'http://www.wired.com' + firsta.a['href']
|
||||||
|
,'description':''
|
||||||
|
})
|
||||||
|
for itt in majorf.findAll('li'):
|
||||||
|
itema = itt.find('a',href=True)
|
||||||
|
if itema:
|
||||||
|
pfarticles.append({
|
||||||
|
'title' :self.tag_to_string(itema)
|
||||||
|
,'date' :strftime(self.timefmt)
|
||||||
|
,'url' :'http://www.wired.com' + itema['href']
|
||||||
|
,'description':''
|
||||||
|
})
|
||||||
|
totalfeeds.append(('Cover', pfarticles))
|
||||||
features = soup.find('div',attrs={'id':'my-glider'})
|
features = soup.find('div',attrs={'id':'my-glider'})
|
||||||
if features:
|
if features:
|
||||||
farticles = []
|
farticles = []
|
||||||
|
44
resources/recipes/wired_daily.recipe
Normal file
@ -0,0 +1,44 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class Wired_Daily(BasicNewsRecipe):
|
||||||
|
|
||||||
|
title = 'Wired Daily Edition'
|
||||||
|
__author__ = 'Kovid Goyal'
|
||||||
|
description = 'Technology news'
|
||||||
|
timefmt = ' [%Y%b%d %H%M]'
|
||||||
|
language = 'en'
|
||||||
|
|
||||||
|
no_stylesheets = True
|
||||||
|
|
||||||
|
remove_tags_before = dict(name='div', id='content')
|
||||||
|
remove_tags = [dict(id=['social_tools', 'outerWrapper', 'sidebar',
|
||||||
|
'footer', 'advertisement', 'blog_subscription_unit',
|
||||||
|
'brightcove_component']),
|
||||||
|
{'class':'entryActions'},
|
||||||
|
dict(name=['noscript', 'script'])]
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
('Top News', 'http://feeds.wired.com/wired/index'),
|
||||||
|
('Culture', 'http://feeds.wired.com/wired/culture'),
|
||||||
|
('Software', 'http://feeds.wired.com/wired/software'),
|
||||||
|
('Mac', 'http://feeds.feedburner.com/cultofmac/bFow'),
|
||||||
|
('Gadgets', 'http://feeds.wired.com/wired/gadgets'),
|
||||||
|
('Cars', 'http://feeds.wired.com/wired/cars'),
|
||||||
|
('Entertainment', 'http://feeds.wired.com/wired/entertainment'),
|
||||||
|
('Gaming', 'http://feeds.wired.com/wired/gaming'),
|
||||||
|
('Science', 'http://feeds.wired.com/wired/science'),
|
||||||
|
('Med Tech', 'http://feeds.wired.com/wired/medtech'),
|
||||||
|
('Politics', 'http://feeds.wired.com/wired/politics'),
|
||||||
|
('Tech Biz', 'http://feeds.wired.com/wired/techbiz'),
|
||||||
|
('Commentary', 'http://feeds.wired.com/wired/commentary'),
|
||||||
|
]
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
return url.replace('http://www.wired.com/', 'http://www.wired.com/print/')
|
||||||
|
|
||||||
|
|
@ -215,7 +215,7 @@ class WSJ(BasicNewsRecipe):
|
|||||||
# first, check if there is an h3 tag which provides a section name
|
# first, check if there is an h3 tag which provides a section name
|
||||||
stag = divtag.find('h3')
|
stag = divtag.find('h3')
|
||||||
if stag:
|
if stag:
|
||||||
if stag.parent['class'] == 'dynamic':
|
if stag.parent.get('class', '') == 'dynamic':
|
||||||
# a carousel of articles is too complex to extract a section name
|
# a carousel of articles is too complex to extract a section name
|
||||||
# for each article, so we'll just call the section "Carousel"
|
# for each article, so we'll just call the section "Carousel"
|
||||||
section_name = 'Carousel'
|
section_name = 'Carousel'
|
||||||
|
30
resources/tanea.recipe
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
|
||||||
|
class TaNea(BasicNewsRecipe):
|
||||||
|
title = u'Ta Nea'
|
||||||
|
__author__ = 'Pan'
|
||||||
|
oldest_article = 1
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
|
||||||
|
remove_tags_before = dict(name='div',attrs={'id':'print-body'})
|
||||||
|
remove_tags_after = dict(name='div',attrs={'id':'text'})
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'\xce\x95\xce\xbb\xce\xbb\xce\xac\xce\xb4\xce\xb1',
|
||||||
|
u'http://www.tanea.gr/default.asp?pid=66&la=1'),
|
||||||
|
(u'\xce\x9a\xcf\x8c\xcf\x83\xce\xbc\xce\xbf\xcf\x82',
|
||||||
|
u'http://www.tanea.gr/default.asp?pid=67&la=1'),
|
||||||
|
(u'\xce\x9f\xce\xb9\xce\xba\xce\xbf\xce\xbd\xce\xbf\xce\xbc\xce\xaf\xce\xb1',
|
||||||
|
u'http://www.tanea.gr/default.asp?pid=68&la=1'),
|
||||||
|
(u'\xce\xa0\xce\xbf\xce\xbb\xce\xb9\xcf\x84\xce\xb9\xcf\x83\xce\xbc\xcf\x8c\xcf\x82',
|
||||||
|
u'http://www.tanea.gr/default.asp?pid=69&la=1'),
|
||||||
|
(u'\xce\x93\xce\xbd\xcf\x8e\xce\xbc\xce\xb5\xcf\x82',
|
||||||
|
u'http://www.tanea.gr/default.asp?pid=79&la=1'),
|
||||||
|
(u'\xce\xa1\xce\xb9\xcf\x80\xce\xad\xcf\x82',
|
||||||
|
u'http://www.tanea.gr/default.asp?pid=80&la=1'),
|
||||||
|
(u'\xce\x91\xce\xb9\xcf\x87\xce\xbc\xce\xad\xcf\x82',
|
||||||
|
u'http://www.tanea.gr/default.asp?pid=81&la=1')]
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
return url.replace('http://www.tanea.gr/default.asp?pid=2', 'http://www.tanea.gr/default.asp?pid=96')
|
@ -20,37 +20,8 @@ function selector(elem) {
|
|||||||
return sel;
|
return sel;
|
||||||
}
|
}
|
||||||
|
|
||||||
function find_closest_enclosing_block(top) {
|
function calculate_bookmark(y, node) {
|
||||||
var START = top-1000;
|
var elem = $(node);
|
||||||
var STOP = top;
|
|
||||||
var matches = [];
|
|
||||||
var elem, temp;
|
|
||||||
var width = 1000;
|
|
||||||
|
|
||||||
for (y = START; y < STOP; y += 20) {
|
|
||||||
for ( x = 0; x < width; x += 20) {
|
|
||||||
elem = document.elementFromPoint(x, y);
|
|
||||||
try {
|
|
||||||
elem = $(elem);
|
|
||||||
temp = elem.offset().top
|
|
||||||
matches.push(elem);
|
|
||||||
if (Math.abs(temp - START) < 25) { y = STOP; break}
|
|
||||||
} catch(error) {}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
var miny = Math.abs(matches[0].offset().top - START), min_elem = matches[0];
|
|
||||||
|
|
||||||
for (i = 1; i < matches.length; i++) {
|
|
||||||
elem = matches[i];
|
|
||||||
temp = Math.abs(elem.offset().top - START);
|
|
||||||
if ( temp < miny ) { miny = temp; min_elem = elem; }
|
|
||||||
}
|
|
||||||
return min_elem;
|
|
||||||
}
|
|
||||||
|
|
||||||
function calculate_bookmark(y) {
|
|
||||||
var elem = find_closest_enclosing_block(y);
|
|
||||||
var sel = selector(elem);
|
var sel = selector(elem);
|
||||||
var ratio = (y - elem.offset().top)/elem.height();
|
var ratio = (y - elem.offset().top)/elem.height();
|
||||||
if (ratio > 1) { ratio = 1; }
|
if (ratio > 1) { ratio = 1; }
|
||||||
|
@ -399,7 +399,7 @@ class BuildPDF2XML(Command):
|
|||||||
objects.append(obj)
|
objects.append(obj)
|
||||||
|
|
||||||
if self.newer(dest, objects):
|
if self.newer(dest, objects):
|
||||||
cmd = ['g++', '-g', '-o', dest]+objects+['-lpoppler', '-lMagickWand',
|
cmd = ['g++', '-ggdb', '-o', dest]+objects+['-lpoppler', '-lMagickWand',
|
||||||
'-lpng', '-lpthread']
|
'-lpng', '-lpthread']
|
||||||
if iswindows:
|
if iswindows:
|
||||||
cmd = [msvc.linker] + '/INCREMENTAL:NO /DEBUG /NODEFAULTLIB:libcmt.lib'.split()
|
cmd = [msvc.linker] + '/INCREMENTAL:NO /DEBUG /NODEFAULTLIB:libcmt.lib'.split()
|
||||||
|
@ -137,8 +137,20 @@ class Develop(Command):
|
|||||||
self.setup_mount_helper()
|
self.setup_mount_helper()
|
||||||
self.install_files()
|
self.install_files()
|
||||||
self.run_postinstall()
|
self.run_postinstall()
|
||||||
|
self.install_env_module()
|
||||||
self.success()
|
self.success()
|
||||||
|
|
||||||
|
def install_env_module(self):
|
||||||
|
import distutils.sysconfig as s
|
||||||
|
libdir = s.get_python_lib(prefix=self.opts.staging_root)
|
||||||
|
if os.path.exists(libdir):
|
||||||
|
path = os.path.join(libdir, 'init_calibre.py')
|
||||||
|
self.info('Installing calibre environment module: '+path)
|
||||||
|
with open(path, 'wb') as f:
|
||||||
|
f.write(HEADER.format(**self.template_args()))
|
||||||
|
else:
|
||||||
|
self.warn('Cannot install calibre environment module to: '+libdir)
|
||||||
|
|
||||||
def setup_mount_helper(self):
|
def setup_mount_helper(self):
|
||||||
def warn():
|
def warn():
|
||||||
self.warn('Failed to compile mount helper. Auto mounting of',
|
self.warn('Failed to compile mount helper. Auto mounting of',
|
||||||
@ -180,13 +192,20 @@ class Develop(Command):
|
|||||||
functions[typ]):
|
functions[typ]):
|
||||||
self.write_template(name, mod, func)
|
self.write_template(name, mod, func)
|
||||||
|
|
||||||
|
def template_args(self):
|
||||||
|
return {
|
||||||
|
'path':self.libdir,
|
||||||
|
'resources':self.sharedir,
|
||||||
|
'executables':self.bindir,
|
||||||
|
'extensions':self.j(self.libdir, 'calibre', 'plugins')
|
||||||
|
}
|
||||||
|
|
||||||
def write_template(self, name, mod, func):
|
def write_template(self, name, mod, func):
|
||||||
template = COMPLETE_TEMPLATE if name == 'calibre-complete' else TEMPLATE
|
template = COMPLETE_TEMPLATE if name == 'calibre-complete' else TEMPLATE
|
||||||
script = template.format(
|
args = self.template_args()
|
||||||
module=mod, func=func,
|
args['module'] = mod
|
||||||
path=self.libdir, resources=self.sharedir,
|
args['func'] = func
|
||||||
executables=self.bindir,
|
script = template.format(**args)
|
||||||
extensions=self.j(self.libdir, 'calibre', 'plugins'))
|
|
||||||
path = self.j(self.staging_bindir, name)
|
path = self.j(self.staging_bindir, name)
|
||||||
if not os.path.exists(self.staging_bindir):
|
if not os.path.exists(self.staging_bindir):
|
||||||
os.makedirs(self.staging_bindir)
|
os.makedirs(self.staging_bindir)
|
||||||
|
@ -15,7 +15,7 @@ class Rsync(Command):
|
|||||||
|
|
||||||
description = 'Sync source tree from development machine'
|
description = 'Sync source tree from development machine'
|
||||||
|
|
||||||
SYNC_CMD = ('rsync -avz --exclude src/calibre/plugins '
|
SYNC_CMD = ('rsync -avz --delete --exclude src/calibre/plugins '
|
||||||
'--exclude src/calibre/manual --exclude src/calibre/trac '
|
'--exclude src/calibre/manual --exclude src/calibre/trac '
|
||||||
'--exclude .bzr --exclude .build --exclude .svn --exclude build --exclude dist '
|
'--exclude .bzr --exclude .build --exclude .svn --exclude build --exclude dist '
|
||||||
'--exclude "*.pyc" --exclude "*.pyo" --exclude "*.swp" --exclude "*.swo" '
|
'--exclude "*.pyc" --exclude "*.pyo" --exclude "*.swp" --exclude "*.swo" '
|
||||||
|
@ -48,7 +48,9 @@ class Resources(Command):
|
|||||||
dest = self.j(self.RESOURCES, 'builtin_recipes.xml')
|
dest = self.j(self.RESOURCES, 'builtin_recipes.xml')
|
||||||
if self.newer(dest, files):
|
if self.newer(dest, files):
|
||||||
self.info('\tCreating builtin_recipes.xml')
|
self.info('\tCreating builtin_recipes.xml')
|
||||||
open(dest, 'wb').write(serialize_builtin_recipes())
|
xml = serialize_builtin_recipes()
|
||||||
|
with open(dest, 'wb') as f:
|
||||||
|
f.write(xml)
|
||||||
|
|
||||||
dest = self.j(self.RESOURCES, 'ebook-convert-complete.pickle')
|
dest = self.j(self.RESOURCES, 'ebook-convert-complete.pickle')
|
||||||
files = []
|
files = []
|
||||||
|
@ -378,10 +378,11 @@ def strftime(fmt, t=None):
|
|||||||
t = time.localtime()
|
t = time.localtime()
|
||||||
early_year = t[0] < 1900
|
early_year = t[0] < 1900
|
||||||
if early_year:
|
if early_year:
|
||||||
|
replacement = 1900 if t[0]%4 == 0 else 1901
|
||||||
fmt = fmt.replace('%Y', '_early year hack##')
|
fmt = fmt.replace('%Y', '_early year hack##')
|
||||||
t = list(t)
|
t = list(t)
|
||||||
orig_year = t[0]
|
orig_year = t[0]
|
||||||
t[0] = 1900
|
t[0] = replacement
|
||||||
ans = None
|
ans = None
|
||||||
if iswindows:
|
if iswindows:
|
||||||
if isinstance(fmt, unicode):
|
if isinstance(fmt, unicode):
|
||||||
|
@ -2,7 +2,7 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
__appname__ = 'calibre'
|
__appname__ = 'calibre'
|
||||||
__version__ = '0.6.36'
|
__version__ = '0.6.40'
|
||||||
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
|
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
@ -7,6 +7,7 @@ import os
|
|||||||
import glob
|
import glob
|
||||||
from calibre.customize import FileTypePlugin, MetadataReaderPlugin, MetadataWriterPlugin
|
from calibre.customize import FileTypePlugin, MetadataReaderPlugin, MetadataWriterPlugin
|
||||||
from calibre.constants import numeric_version
|
from calibre.constants import numeric_version
|
||||||
|
from calibre.ebooks.metadata.archive import ArchiveExtract
|
||||||
|
|
||||||
class HTML2ZIP(FileTypePlugin):
|
class HTML2ZIP(FileTypePlugin):
|
||||||
name = 'HTML to ZIP'
|
name = 'HTML to ZIP'
|
||||||
@ -416,9 +417,10 @@ from calibre.devices.hanlin.driver import HANLINV3, HANLINV5, BOOX
|
|||||||
from calibre.devices.blackberry.driver import BLACKBERRY
|
from calibre.devices.blackberry.driver import BLACKBERRY
|
||||||
from calibre.devices.cybook.driver import CYBOOK
|
from calibre.devices.cybook.driver import CYBOOK
|
||||||
from calibre.devices.eb600.driver import EB600, COOL_ER, SHINEBOOK, \
|
from calibre.devices.eb600.driver import EB600, COOL_ER, SHINEBOOK, \
|
||||||
POCKETBOOK360, GER2, ITALICA, ECLICTO, DBOOK, INVESBOOK
|
POCKETBOOK360, GER2, ITALICA, ECLICTO, DBOOK, INVESBOOK, \
|
||||||
|
BOOQ
|
||||||
from calibre.devices.iliad.driver import ILIAD
|
from calibre.devices.iliad.driver import ILIAD
|
||||||
from calibre.devices.irexdr.driver import IREXDR1000
|
from calibre.devices.irexdr.driver import IREXDR1000, IREXDR800
|
||||||
from calibre.devices.jetbook.driver import JETBOOK
|
from calibre.devices.jetbook.driver import JETBOOK
|
||||||
from calibre.devices.kindle.driver import KINDLE, KINDLE2, KINDLE_DX
|
from calibre.devices.kindle.driver import KINDLE, KINDLE2, KINDLE_DX
|
||||||
from calibre.devices.nook.driver import NOOK
|
from calibre.devices.nook.driver import NOOK
|
||||||
@ -430,11 +432,11 @@ from calibre.devices.eslick.driver import ESLICK
|
|||||||
from calibre.devices.nuut2.driver import NUUT2
|
from calibre.devices.nuut2.driver import NUUT2
|
||||||
from calibre.devices.iriver.driver import IRIVER_STORY
|
from calibre.devices.iriver.driver import IRIVER_STORY
|
||||||
from calibre.devices.binatone.driver import README
|
from calibre.devices.binatone.driver import README
|
||||||
from calibre.devices.hanvon.driver import N516
|
from calibre.devices.hanvon.driver import N516, EB511
|
||||||
|
|
||||||
from calibre.ebooks.metadata.fetch import GoogleBooks, ISBNDB, Amazon
|
from calibre.ebooks.metadata.fetch import GoogleBooks, ISBNDB, Amazon
|
||||||
from calibre.library.catalog import CSV_XML, EPUB_MOBI
|
from calibre.library.catalog import CSV_XML, EPUB_MOBI
|
||||||
plugins = [HTML2ZIP, PML2PMLZ, GoogleBooks, ISBNDB, Amazon, CSV_XML, EPUB_MOBI]
|
plugins = [HTML2ZIP, PML2PMLZ, ArchiveExtract, GoogleBooks, ISBNDB, Amazon, CSV_XML, EPUB_MOBI]
|
||||||
plugins += [
|
plugins += [
|
||||||
ComicInput,
|
ComicInput,
|
||||||
EPUBInput,
|
EPUBInput,
|
||||||
@ -477,6 +479,7 @@ plugins += [
|
|||||||
CYBOOK,
|
CYBOOK,
|
||||||
ILIAD,
|
ILIAD,
|
||||||
IREXDR1000,
|
IREXDR1000,
|
||||||
|
IREXDR800,
|
||||||
JETBOOK,
|
JETBOOK,
|
||||||
SHINEBOOK,
|
SHINEBOOK,
|
||||||
POCKETBOOK360,
|
POCKETBOOK360,
|
||||||
@ -500,9 +503,11 @@ plugins += [
|
|||||||
DBOOK,
|
DBOOK,
|
||||||
INVESBOOK,
|
INVESBOOK,
|
||||||
BOOX,
|
BOOX,
|
||||||
|
BOOQ,
|
||||||
EB600,
|
EB600,
|
||||||
README,
|
README,
|
||||||
N516,
|
N516,
|
||||||
|
EB511,
|
||||||
]
|
]
|
||||||
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
|
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
|
||||||
x.__name__.endswith('MetadataReader')]
|
x.__name__.endswith('MetadataReader')]
|
||||||
|
@ -20,7 +20,7 @@ class ANDROID(USBMS):
|
|||||||
VENDOR_ID = {
|
VENDOR_ID = {
|
||||||
0x0bb4 : { 0x0c02 : [0x100], 0x0c01 : [0x100]},
|
0x0bb4 : { 0x0c02 : [0x100], 0x0c01 : [0x100]},
|
||||||
0x22b8 : { 0x41d9 : [0x216]},
|
0x22b8 : { 0x41d9 : [0x216]},
|
||||||
0x18d1 : { 0x4e11 : [0x0100]},
|
0x18d1 : { 0x4e11 : [0x0100], 0x4e12: [0x0100]},
|
||||||
}
|
}
|
||||||
EBOOK_DIR_MAIN = ['wordplayer/calibretransfer', 'eBooks/import', 'Books']
|
EBOOK_DIR_MAIN = ['wordplayer/calibretransfer', 'eBooks/import', 'Books']
|
||||||
EXTRA_CUSTOMIZATION_MESSAGE = _('Comma separated list of directories to '
|
EXTRA_CUSTOMIZATION_MESSAGE = _('Comma separated list of directories to '
|
||||||
|
@ -184,3 +184,14 @@ class INVESBOOK(EB600):
|
|||||||
VENDOR_NAME = 'INVES_E6'
|
VENDOR_NAME = 'INVES_E6'
|
||||||
WINDOWS_MAIN_MEM = '00INVES_E600'
|
WINDOWS_MAIN_MEM = '00INVES_E600'
|
||||||
WINDOWS_CARD_A_MEM = '00INVES_E600'
|
WINDOWS_CARD_A_MEM = '00INVES_E600'
|
||||||
|
|
||||||
|
class BOOQ(EB600):
|
||||||
|
name = 'Booq Device Interface'
|
||||||
|
gui_name = 'Booq'
|
||||||
|
|
||||||
|
FORMATS = ['epub', 'mobi', 'prc', 'fb2', 'pdf', 'doc', 'rtf', 'txt', 'html']
|
||||||
|
|
||||||
|
VENDOR_NAME = 'NETRONIX'
|
||||||
|
WINDOWS_MAIN_MEM = 'EB600'
|
||||||
|
WINDOWS_CARD_A_MEM = 'EB600'
|
||||||
|
|
||||||
|
@ -126,3 +126,15 @@ class BOOX(HANLINV3):
|
|||||||
|
|
||||||
EBOOK_DIR_MAIN = 'MyBooks'
|
EBOOK_DIR_MAIN = 'MyBooks'
|
||||||
EBOOK_DIR_CARD_A = 'MyBooks'
|
EBOOK_DIR_CARD_A = 'MyBooks'
|
||||||
|
|
||||||
|
|
||||||
|
def windows_sort_drives(self, drives):
|
||||||
|
main = drives.get('main', None)
|
||||||
|
card = drives.get('carda', None)
|
||||||
|
if card and main and card < main:
|
||||||
|
drives['main'] = card
|
||||||
|
drives['carda'] = main
|
||||||
|
|
||||||
|
return drives
|
||||||
|
|
||||||
|
|
||||||
|
@ -7,6 +7,7 @@ __docformat__ = 'restructuredtext en'
|
|||||||
'''
|
'''
|
||||||
Device driver for Hanvon devices
|
Device driver for Hanvon devices
|
||||||
'''
|
'''
|
||||||
|
import re
|
||||||
|
|
||||||
from calibre.devices.usbms.driver import USBMS
|
from calibre.devices.usbms.driver import USBMS
|
||||||
|
|
||||||
@ -32,3 +33,25 @@ class N516(USBMS):
|
|||||||
|
|
||||||
EBOOK_DIR_MAIN = 'e_book'
|
EBOOK_DIR_MAIN = 'e_book'
|
||||||
SUPPORTS_SUB_DIRS = True
|
SUPPORTS_SUB_DIRS = True
|
||||||
|
|
||||||
|
class EB511(USBMS):
|
||||||
|
name = 'Elonex EB 511 driver'
|
||||||
|
gui_name = 'EB 511'
|
||||||
|
description = _('Communicate with the Elonex EB 511 eBook reader.')
|
||||||
|
author = 'Kovid Goyal'
|
||||||
|
supported_platforms = ['windows', 'osx', 'linux']
|
||||||
|
|
||||||
|
FORMATS = ['epub', 'html', 'pdf', 'txt']
|
||||||
|
|
||||||
|
VENDOR_ID = [0x45e]
|
||||||
|
PRODUCT_ID = [0xffff]
|
||||||
|
BCD = [0x0]
|
||||||
|
|
||||||
|
MAIN_MEMORY_VOLUME_LABEL = 'EB 511 Internal Memory'
|
||||||
|
|
||||||
|
EBOOK_DIR_MAIN = 'e_book'
|
||||||
|
SUPPORTS_SUB_DIRS = True
|
||||||
|
|
||||||
|
OSX_MAIN_MEM_VOL_PAT = re.compile(r'/eReader')
|
||||||
|
|
||||||
|
|
||||||
|
@ -36,3 +36,14 @@ class IREXDR1000(USBMS):
|
|||||||
EBOOK_DIR_MAIN = 'ebooks'
|
EBOOK_DIR_MAIN = 'ebooks'
|
||||||
DELETE_EXTS = ['.mbp']
|
DELETE_EXTS = ['.mbp']
|
||||||
SUPPORTS_SUB_DIRS = True
|
SUPPORTS_SUB_DIRS = True
|
||||||
|
|
||||||
|
class IREXDR800(IREXDR1000):
|
||||||
|
name = 'IRex Digital Reader 800 Device Interface'
|
||||||
|
description = _('Communicate with the IRex Digital Reader 800')
|
||||||
|
PRODUCT_ID = [0x002]
|
||||||
|
WINDOWS_MAIN_MEM = 'DR800'
|
||||||
|
FORMATS = ['epub', 'html', 'pdf', 'txt']
|
||||||
|
|
||||||
|
EBOOK_DIR_MAIN = 'Books'
|
||||||
|
DELETE_EXTS = []
|
||||||
|
|
||||||
|
@ -192,17 +192,15 @@ class PRS505(CLI, Device):
|
|||||||
fix_ids(*booklists)
|
fix_ids(*booklists)
|
||||||
if not os.path.exists(self._main_prefix):
|
if not os.path.exists(self._main_prefix):
|
||||||
os.makedirs(self._main_prefix)
|
os.makedirs(self._main_prefix)
|
||||||
f = open(self._main_prefix + self.__class__.MEDIA_XML, 'wb')
|
with open(self._main_prefix + self.__class__.MEDIA_XML, 'wb') as f:
|
||||||
booklists[0].write(f)
|
booklists[0].write(f)
|
||||||
f.close()
|
|
||||||
|
|
||||||
def write_card_prefix(prefix, listid):
|
def write_card_prefix(prefix, listid):
|
||||||
if prefix is not None and hasattr(booklists[listid], 'write'):
|
if prefix is not None and hasattr(booklists[listid], 'write'):
|
||||||
if not os.path.exists(prefix):
|
if not os.path.exists(prefix):
|
||||||
os.makedirs(prefix)
|
os.makedirs(prefix)
|
||||||
f = open(prefix + self.__class__.CACHE_XML, 'wb')
|
with open(prefix + self.__class__.CACHE_XML, 'wb') as f:
|
||||||
booklists[listid].write(f)
|
booklists[listid].write(f)
|
||||||
f.close()
|
|
||||||
write_card_prefix(self._card_a_prefix, 1)
|
write_card_prefix(self._card_a_prefix, 1)
|
||||||
write_card_prefix(self._card_b_prefix, 2)
|
write_card_prefix(self._card_b_prefix, 2)
|
||||||
|
|
||||||
|
@ -70,6 +70,19 @@ def extract_cover_from_embedded_svg(html, base, log):
|
|||||||
if href and os.access(path, os.R_OK):
|
if href and os.access(path, os.R_OK):
|
||||||
return open(path, 'rb').read()
|
return open(path, 'rb').read()
|
||||||
|
|
||||||
|
def extract_calibre_cover(raw, base, log):
|
||||||
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||||
|
soup = BeautifulSoup(raw)
|
||||||
|
matches = soup.find(name=['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'span',
|
||||||
|
'font', 'br'])
|
||||||
|
images = soup.findAll('img')
|
||||||
|
if matches is None and len(images) == 1 and \
|
||||||
|
images[0].get('alt', '')=='cover':
|
||||||
|
img = images[0]
|
||||||
|
img = os.path.join(base, *img['src'].split('/'))
|
||||||
|
if os.path.exists(img):
|
||||||
|
return open(img, 'rb').read()
|
||||||
|
|
||||||
def render_html_svg_workaround(path_to_html, log, width=590, height=750):
|
def render_html_svg_workaround(path_to_html, log, width=590, height=750):
|
||||||
from calibre.ebooks.oeb.base import SVG_NS
|
from calibre.ebooks.oeb.base import SVG_NS
|
||||||
raw = open(path_to_html, 'rb').read()
|
raw = open(path_to_html, 'rb').read()
|
||||||
@ -80,6 +93,11 @@ def render_html_svg_workaround(path_to_html, log, width=590, height=750):
|
|||||||
os.path.dirname(path_to_html), log)
|
os.path.dirname(path_to_html), log)
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
|
if data is None:
|
||||||
|
try:
|
||||||
|
data = extract_calibre_cover(raw, os.path.dirname(path_to_html), log)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
if data is None:
|
if data is None:
|
||||||
renderer = render_html(path_to_html, width, height)
|
renderer = render_html(path_to_html, width, height)
|
||||||
data = getattr(renderer, 'data', None)
|
data = getattr(renderer, 'data', None)
|
||||||
|
@ -12,6 +12,7 @@ from calibre.customize.ui import input_profiles, output_profiles, \
|
|||||||
run_plugins_on_preprocess, run_plugins_on_postprocess
|
run_plugins_on_preprocess, run_plugins_on_postprocess
|
||||||
from calibre.ebooks.conversion.preprocess import HTMLPreProcessor
|
from calibre.ebooks.conversion.preprocess import HTMLPreProcessor
|
||||||
from calibre.ptempfile import PersistentTemporaryDirectory
|
from calibre.ptempfile import PersistentTemporaryDirectory
|
||||||
|
from calibre.utils.date import parse_date
|
||||||
from calibre import extract, walk
|
from calibre import extract, walk
|
||||||
|
|
||||||
DEBUG_README=u'''
|
DEBUG_README=u'''
|
||||||
@ -65,7 +66,7 @@ class Plumber(object):
|
|||||||
metadata_option_names = [
|
metadata_option_names = [
|
||||||
'title', 'authors', 'title_sort', 'author_sort', 'cover', 'comments',
|
'title', 'authors', 'title_sort', 'author_sort', 'cover', 'comments',
|
||||||
'publisher', 'series', 'series_index', 'rating', 'isbn',
|
'publisher', 'series', 'series_index', 'rating', 'isbn',
|
||||||
'tags', 'book_producer', 'language'
|
'tags', 'book_producer', 'language', 'pubdate', 'timestamp'
|
||||||
]
|
]
|
||||||
|
|
||||||
def __init__(self, input, output, log, report_progress=DummyReporter(),
|
def __init__(self, input, output, log, report_progress=DummyReporter(),
|
||||||
@ -461,6 +462,14 @@ OptionRecommendation(name='language',
|
|||||||
recommended_value=None, level=OptionRecommendation.LOW,
|
recommended_value=None, level=OptionRecommendation.LOW,
|
||||||
help=_('Set the language.')),
|
help=_('Set the language.')),
|
||||||
|
|
||||||
|
OptionRecommendation(name='pubdate',
|
||||||
|
recommended_value=None, level=OptionRecommendation.LOW,
|
||||||
|
help=_('Set the publication date.')),
|
||||||
|
|
||||||
|
OptionRecommendation(name='timestamp',
|
||||||
|
recommended_value=None, level=OptionRecommendation.LOW,
|
||||||
|
help=_('Set the book timestamp (used by the date column in calibre).')),
|
||||||
|
|
||||||
]
|
]
|
||||||
|
|
||||||
input_fmt = os.path.splitext(self.input)[1]
|
input_fmt = os.path.splitext(self.input)[1]
|
||||||
@ -619,6 +628,14 @@ OptionRecommendation(name='language',
|
|||||||
except ValueError:
|
except ValueError:
|
||||||
self.log.warn(_('Values of series index and rating must'
|
self.log.warn(_('Values of series index and rating must'
|
||||||
' be numbers. Ignoring'), val)
|
' be numbers. Ignoring'), val)
|
||||||
|
continue
|
||||||
|
elif x in ('timestamp', 'pubdate'):
|
||||||
|
try:
|
||||||
|
val = parse_date(val, assume_utc=x=='pubdate')
|
||||||
|
except:
|
||||||
|
self.log.exception(_('Failed to parse date/time') + ' ' +
|
||||||
|
unicode(val))
|
||||||
|
continue
|
||||||
setattr(mi, x, val)
|
setattr(mi, x, val)
|
||||||
|
|
||||||
|
|
||||||
|
@ -132,6 +132,8 @@ class EPUBInput(InputFormatPlugin):
|
|||||||
|
|
||||||
self.rationalize_cover(opf, log)
|
self.rationalize_cover(opf, log)
|
||||||
|
|
||||||
|
self.optimize_opf_parsing = opf
|
||||||
|
|
||||||
with open('content.opf', 'wb') as nopf:
|
with open('content.opf', 'wb') as nopf:
|
||||||
nopf.write(opf.render())
|
nopf.write(opf.render())
|
||||||
|
|
||||||
|
@ -256,7 +256,20 @@ class EPUBOutput(OutputFormatPlugin):
|
|||||||
Perform various markup transforms to get the output to render correctly
|
Perform various markup transforms to get the output to render correctly
|
||||||
in the quirky ADE.
|
in the quirky ADE.
|
||||||
'''
|
'''
|
||||||
from calibre.ebooks.oeb.base import XPath, XHTML, OEB_STYLES, barename
|
from calibre.ebooks.oeb.base import XPath, XHTML, OEB_STYLES, barename, urlunquote
|
||||||
|
|
||||||
|
# ADE cries big wet tears when it encounters an invalid fragment
|
||||||
|
# identifier in the NCX toc.
|
||||||
|
frag_pat = re.compile(r'[-A-Za-z0-9_:.]+$')
|
||||||
|
for node in self.oeb.toc.iter():
|
||||||
|
href = getattr(node, 'href', None)
|
||||||
|
if hasattr(href, 'partition'):
|
||||||
|
base, _, frag = href.partition('#')
|
||||||
|
frag = urlunquote(frag)
|
||||||
|
if frag and frag_pat.match(frag) is None:
|
||||||
|
self.log.warn(
|
||||||
|
'Removing invalid fragment identifier %r from TOC'%frag)
|
||||||
|
node.href = base
|
||||||
|
|
||||||
for x in self.oeb.spine:
|
for x in self.oeb.spine:
|
||||||
root = x.data
|
root = x.data
|
||||||
|
@ -111,7 +111,7 @@ class HTMLFile(object):
|
|||||||
raise IOError(msg)
|
raise IOError(msg)
|
||||||
raise IgnoreFile(msg, err.errno)
|
raise IgnoreFile(msg, err.errno)
|
||||||
|
|
||||||
self.is_binary = not bool(self.HTML_PAT.search(src[:4096]))
|
self.is_binary = level > 0 and not bool(self.HTML_PAT.search(src[:4096]))
|
||||||
if not self.is_binary:
|
if not self.is_binary:
|
||||||
if encoding is None:
|
if encoding is None:
|
||||||
encoding = xml_to_unicode(src[:4096], verbose=verbose)[-1]
|
encoding = xml_to_unicode(src[:4096], verbose=verbose)[-1]
|
||||||
@ -408,7 +408,10 @@ class HTMLInput(InputFormatPlugin):
|
|||||||
return link_
|
return link_
|
||||||
if base and not os.path.isabs(link):
|
if base and not os.path.isabs(link):
|
||||||
link = os.path.join(base, link)
|
link = os.path.join(base, link)
|
||||||
link = os.path.abspath(link)
|
try:
|
||||||
|
link = os.path.abspath(link)
|
||||||
|
except:
|
||||||
|
return link_
|
||||||
if not os.access(link, os.R_OK):
|
if not os.access(link, os.R_OK):
|
||||||
return link_
|
return link_
|
||||||
if os.path.isdir(link):
|
if os.path.isdir(link):
|
||||||
|
@ -50,6 +50,7 @@ from pylrf import (LrfWriter, LrfObject, LrfTag, LrfToc,
|
|||||||
STREAM_COMPRESSED, LrfTagStream, LrfStreamBase, IMAGE_TYPE_ENCODING,
|
STREAM_COMPRESSED, LrfTagStream, LrfStreamBase, IMAGE_TYPE_ENCODING,
|
||||||
BINDING_DIRECTION_ENCODING, LINE_TYPE_ENCODING, LrfFileStream,
|
BINDING_DIRECTION_ENCODING, LINE_TYPE_ENCODING, LrfFileStream,
|
||||||
STREAM_FORCE_COMPRESSED)
|
STREAM_FORCE_COMPRESSED)
|
||||||
|
from calibre.utils.date import isoformat
|
||||||
|
|
||||||
DEFAULT_SOURCE_ENCODING = "cp1252" # defualt is us-windows character set
|
DEFAULT_SOURCE_ENCODING = "cp1252" # defualt is us-windows character set
|
||||||
DEFAULT_GENREADING = "fs" # default is yes to both lrf and lrs
|
DEFAULT_GENREADING = "fs" # default is yes to both lrf and lrs
|
||||||
@ -852,7 +853,7 @@ class DocInfo(object):
|
|||||||
self.thumbnail = None
|
self.thumbnail = None
|
||||||
self.language = "en"
|
self.language = "en"
|
||||||
self.creator = None
|
self.creator = None
|
||||||
self.creationdate = date.today().isoformat()
|
self.creationdate = str(isoformat(date.today()))
|
||||||
self.producer = "%s v%s"%(__appname__, __version__)
|
self.producer = "%s v%s"%(__appname__, __version__)
|
||||||
self.numberofpages = "0"
|
self.numberofpages = "0"
|
||||||
|
|
||||||
|
@ -10,9 +10,11 @@ import os, mimetypes, sys, re
|
|||||||
from urllib import unquote, quote
|
from urllib import unquote, quote
|
||||||
from urlparse import urlparse
|
from urlparse import urlparse
|
||||||
|
|
||||||
|
|
||||||
from calibre import relpath
|
from calibre import relpath
|
||||||
|
|
||||||
|
from calibre.utils.config import tweaks
|
||||||
|
from calibre.utils.date import isoformat
|
||||||
|
|
||||||
_author_pat = re.compile(',?\s+(and|with)\s+', re.IGNORECASE)
|
_author_pat = re.compile(',?\s+(and|with)\s+', re.IGNORECASE)
|
||||||
def string_to_authors(raw):
|
def string_to_authors(raw):
|
||||||
raw = raw.replace('&&', u'\uffff')
|
raw = raw.replace('&&', u'\uffff')
|
||||||
@ -27,6 +29,9 @@ def authors_to_string(authors):
|
|||||||
return ''
|
return ''
|
||||||
|
|
||||||
def author_to_author_sort(author):
|
def author_to_author_sort(author):
|
||||||
|
method = tweaks['author_sort_copy_method']
|
||||||
|
if method == 'copy' or (method == 'comma' and author.count(',') > 0):
|
||||||
|
return author
|
||||||
tokens = author.split()
|
tokens = author.split()
|
||||||
tokens = tokens[-1:] + tokens[:-1]
|
tokens = tokens[-1:] + tokens[:-1]
|
||||||
if len(tokens) > 1:
|
if len(tokens) > 1:
|
||||||
@ -340,9 +345,9 @@ class MetaInformation(object):
|
|||||||
if self.rating is not None:
|
if self.rating is not None:
|
||||||
fmt('Rating', self.rating)
|
fmt('Rating', self.rating)
|
||||||
if self.timestamp is not None:
|
if self.timestamp is not None:
|
||||||
fmt('Timestamp', self.timestamp.isoformat(' '))
|
fmt('Timestamp', isoformat(self.timestamp))
|
||||||
if self.pubdate is not None:
|
if self.pubdate is not None:
|
||||||
fmt('Published', self.pubdate.isoformat(' '))
|
fmt('Published', isoformat(self.pubdate))
|
||||||
if self.rights is not None:
|
if self.rights is not None:
|
||||||
fmt('Rights', unicode(self.rights))
|
fmt('Rights', unicode(self.rights))
|
||||||
if self.lccn:
|
if self.lccn:
|
||||||
|
@ -7,12 +7,11 @@ __docformat__ = 'restructuredtext en'
|
|||||||
Fetch metadata using Amazon AWS
|
Fetch metadata using Amazon AWS
|
||||||
'''
|
'''
|
||||||
import sys, re
|
import sys, re
|
||||||
from datetime import datetime
|
|
||||||
|
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
from dateutil import parser
|
|
||||||
|
|
||||||
from calibre import browser
|
from calibre import browser
|
||||||
|
from calibre.utils.date import parse_date, utcnow
|
||||||
from calibre.ebooks.metadata import MetaInformation, string_to_authors
|
from calibre.ebooks.metadata import MetaInformation, string_to_authors
|
||||||
|
|
||||||
AWS_NS = 'http://webservices.amazon.com/AWSECommerceService/2005-10-05'
|
AWS_NS = 'http://webservices.amazon.com/AWSECommerceService/2005-10-05'
|
||||||
@ -44,9 +43,8 @@ def get_social_metadata(title, authors, publisher, isbn):
|
|||||||
try:
|
try:
|
||||||
d = root.findtext('.//'+AWS('PublicationDate'))
|
d = root.findtext('.//'+AWS('PublicationDate'))
|
||||||
if d:
|
if d:
|
||||||
default = datetime.utcnow()
|
default = utcnow().replace(day=15)
|
||||||
default = datetime(default.year, default.month, 15)
|
d = parse_date(d[0].text, assume_utc=True, default=default)
|
||||||
d = parser.parse(d[0].text, default=default)
|
|
||||||
mi.pubdate = d
|
mi.pubdate = d
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
|
65
src/calibre/ebooks/metadata/archive.py
Normal file
@ -0,0 +1,65 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
|
from __future__ import with_statement
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import os
|
||||||
|
from contextlib import closing
|
||||||
|
|
||||||
|
from calibre.customize import FileTypePlugin
|
||||||
|
|
||||||
|
def is_comic(list_of_names):
|
||||||
|
extensions = set([x.rpartition('.')[-1].lower() for x in list_of_names])
|
||||||
|
return len(extensions) == 1 and iter(extensions).next() in ('jpg', 'jpeg', 'png')
|
||||||
|
|
||||||
|
class ArchiveExtract(FileTypePlugin):
|
||||||
|
name = 'Archive Extract'
|
||||||
|
author = 'Kovid Goyal'
|
||||||
|
description = _('Extract common e-book formats from archives '
|
||||||
|
'(zip/rar) files. Also try to autodetect if they are actually '
|
||||||
|
'cbz/cbr files.')
|
||||||
|
file_types = set(['zip', 'rar'])
|
||||||
|
supported_platforms = ['windows', 'osx', 'linux']
|
||||||
|
on_import = True
|
||||||
|
|
||||||
|
def run(self, archive):
|
||||||
|
is_rar = archive.lower().endswith('.rar')
|
||||||
|
if is_rar:
|
||||||
|
from calibre.libunrar import extract_member, names
|
||||||
|
else:
|
||||||
|
from calibre.utils.zipfile import ZipFile
|
||||||
|
zf = ZipFile(archive, 'r')
|
||||||
|
|
||||||
|
if is_rar:
|
||||||
|
fnames = names(archive)
|
||||||
|
else:
|
||||||
|
fnames = zf.namelist()
|
||||||
|
|
||||||
|
fnames = [x for x in fnames if '.' in x]
|
||||||
|
if is_comic(fnames):
|
||||||
|
ext = '.cbr' if is_rar else '.cbz'
|
||||||
|
of = self.temporary_file('_archive_extract'+ext)
|
||||||
|
with open(archive, 'rb') as f:
|
||||||
|
of.write(f.read())
|
||||||
|
of.close()
|
||||||
|
return of.name
|
||||||
|
if len(fnames) > 1 or not fnames:
|
||||||
|
return archive
|
||||||
|
fname = fnames[0]
|
||||||
|
ext = os.path.splitext(fname)[1][1:]
|
||||||
|
if ext.lower() not in ('lit', 'epub', 'mobi', 'prc', 'rtf', 'pdf',
|
||||||
|
'mp3', 'pdb', 'azw', 'azw1'):
|
||||||
|
return archive
|
||||||
|
|
||||||
|
of = self.temporary_file('_archive_extract.'+ext)
|
||||||
|
with closing(of):
|
||||||
|
if is_rar:
|
||||||
|
data = extract_member(archive, match=None, name=fname)[1]
|
||||||
|
of.write(data)
|
||||||
|
else:
|
||||||
|
of.write(zf.read(fname))
|
||||||
|
return of.name
|
||||||
|
|
@ -15,6 +15,7 @@ from calibre.ebooks.metadata import string_to_authors, authors_to_sort_string, \
|
|||||||
title_sort, MetaInformation
|
title_sort, MetaInformation
|
||||||
from calibre.ebooks.lrf.meta import LRFMetaFile
|
from calibre.ebooks.lrf.meta import LRFMetaFile
|
||||||
from calibre import prints
|
from calibre import prints
|
||||||
|
from calibre.utils.date import parse_date
|
||||||
|
|
||||||
USAGE='%%prog ebook_file [' + _('options') + ']\n' + \
|
USAGE='%%prog ebook_file [' + _('options') + ']\n' + \
|
||||||
_('''
|
_('''
|
||||||
@ -69,6 +70,8 @@ def config():
|
|||||||
help=_('Set the book producer.'))
|
help=_('Set the book producer.'))
|
||||||
c.add_opt('language', ['-l', '--language'],
|
c.add_opt('language', ['-l', '--language'],
|
||||||
help=_('Set the language.'))
|
help=_('Set the language.'))
|
||||||
|
c.add_opt('pubdate', ['-d', '--date'],
|
||||||
|
help=_('Set the published date.'))
|
||||||
|
|
||||||
c.add_opt('get_cover', ['--get-cover'],
|
c.add_opt('get_cover', ['--get-cover'],
|
||||||
help=_('Get the cover from the ebook and save it at as the '
|
help=_('Get the cover from the ebook and save it at as the '
|
||||||
@ -132,6 +135,8 @@ def do_set_metadata(opts, mi, stream, stream_type):
|
|||||||
mi.series = opts.series.strip()
|
mi.series = opts.series.strip()
|
||||||
if getattr(opts, 'series_index', None) is not None:
|
if getattr(opts, 'series_index', None) is not None:
|
||||||
mi.series_index = float(opts.series_index.strip())
|
mi.series_index = float(opts.series_index.strip())
|
||||||
|
if getattr(opts, 'pubdate', None) is not None:
|
||||||
|
mi.pubdate = parse_date(opts.pubdate, assume_utc=False, as_utc=False)
|
||||||
|
|
||||||
if getattr(opts, 'cover', None) is not None:
|
if getattr(opts, 'cover', None) is not None:
|
||||||
ext = os.path.splitext(opts.cover)[1].replace('.', '').upper()
|
ext = os.path.splitext(opts.cover)[1].replace('.', '').upper()
|
||||||
|
@ -5,7 +5,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
|||||||
|
|
||||||
'''Read meta information from epub files'''
|
'''Read meta information from epub files'''
|
||||||
|
|
||||||
import os
|
import os, re
|
||||||
from cStringIO import StringIO
|
from cStringIO import StringIO
|
||||||
from contextlib import closing
|
from contextlib import closing
|
||||||
|
|
||||||
@ -29,15 +29,15 @@ class Container(dict):
|
|||||||
def __init__(self, stream=None):
|
def __init__(self, stream=None):
|
||||||
if not stream: return
|
if not stream: return
|
||||||
soup = BeautifulStoneSoup(stream.read())
|
soup = BeautifulStoneSoup(stream.read())
|
||||||
container = soup.find('container')
|
container = soup.find(name=re.compile(r'container$', re.I))
|
||||||
if not container:
|
if not container:
|
||||||
raise OCFException("<container/> element missing")
|
raise OCFException("<container> element missing")
|
||||||
if container.get('version', None) != '1.0':
|
if container.get('version', None) != '1.0':
|
||||||
raise EPubException("unsupported version of OCF")
|
raise EPubException("unsupported version of OCF")
|
||||||
rootfiles = container.find('rootfiles')
|
rootfiles = container.find(re.compile(r'rootfiles$', re.I))
|
||||||
if not rootfiles:
|
if not rootfiles:
|
||||||
raise EPubException("<rootfiles/> element missing")
|
raise EPubException("<rootfiles/> element missing")
|
||||||
for rootfile in rootfiles.findAll('rootfile'):
|
for rootfile in rootfiles.findAll(re.compile(r'rootfile$', re.I)):
|
||||||
try:
|
try:
|
||||||
self[rootfile['media-type']] = rootfile['full-path']
|
self[rootfile['media-type']] = rootfile['full-path']
|
||||||
except KeyError:
|
except KeyError:
|
||||||
@ -69,7 +69,7 @@ class OCFReader(OCF):
|
|||||||
self.opf_path = self.container[OPF.MIMETYPE]
|
self.opf_path = self.container[OPF.MIMETYPE]
|
||||||
try:
|
try:
|
||||||
with closing(self.open(self.opf_path)) as f:
|
with closing(self.open(self.opf_path)) as f:
|
||||||
self.opf = OPF(f, self.root)
|
self.opf = OPF(f, self.root, populate_spine=False)
|
||||||
except KeyError:
|
except KeyError:
|
||||||
raise EPubException("missing OPF package file")
|
raise EPubException("missing OPF package file")
|
||||||
|
|
||||||
@ -101,10 +101,9 @@ class OCFDirReader(OCFReader):
|
|||||||
def get_cover(opf, opf_path, stream):
|
def get_cover(opf, opf_path, stream):
|
||||||
from calibre.ebooks import render_html_svg_workaround
|
from calibre.ebooks import render_html_svg_workaround
|
||||||
from calibre.utils.logging import default_log
|
from calibre.utils.logging import default_log
|
||||||
spine = list(opf.spine_items())
|
cpage = opf.first_spine_item()
|
||||||
if not spine:
|
if not cpage:
|
||||||
return
|
return
|
||||||
cpage = spine[0]
|
|
||||||
with TemporaryDirectory('_epub_meta') as tdir:
|
with TemporaryDirectory('_epub_meta') as tdir:
|
||||||
with CurrentDir(tdir):
|
with CurrentDir(tdir):
|
||||||
stream.seek(0)
|
stream.seek(0)
|
||||||
|
@ -6,14 +6,13 @@ __docformat__ = 'restructuredtext en'
|
|||||||
import sys, textwrap
|
import sys, textwrap
|
||||||
from urllib import urlencode
|
from urllib import urlencode
|
||||||
from functools import partial
|
from functools import partial
|
||||||
from datetime import datetime
|
|
||||||
|
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
from dateutil import parser
|
|
||||||
|
|
||||||
from calibre import browser, preferred_encoding
|
from calibre import browser, preferred_encoding
|
||||||
from calibre.ebooks.metadata import MetaInformation
|
from calibre.ebooks.metadata import MetaInformation
|
||||||
from calibre.utils.config import OptionParser
|
from calibre.utils.config import OptionParser
|
||||||
|
from calibre.utils.date import parse_date, utcnow
|
||||||
|
|
||||||
NAMESPACES = {
|
NAMESPACES = {
|
||||||
'openSearch':'http://a9.com/-/spec/opensearchrss/1.0/',
|
'openSearch':'http://a9.com/-/spec/opensearchrss/1.0/',
|
||||||
@ -156,9 +155,8 @@ class ResultList(list):
|
|||||||
try:
|
try:
|
||||||
d = date(entry)
|
d = date(entry)
|
||||||
if d:
|
if d:
|
||||||
default = datetime.utcnow()
|
default = utcnow().replace(day=15)
|
||||||
default = datetime(default.year, default.month, 15)
|
d = parse_date(d[0].text, assume_utc=True, default=default)
|
||||||
d = parser.parse(d[0].text, default=default)
|
|
||||||
else:
|
else:
|
||||||
d = None
|
d = None
|
||||||
except:
|
except:
|
||||||
|
@ -65,6 +65,10 @@ def _metadata_from_formats(formats):
|
|||||||
|
|
||||||
return mi
|
return mi
|
||||||
|
|
||||||
|
def is_recipe(filename):
|
||||||
|
return filename.startswith('calibre') and \
|
||||||
|
filename.rpartition('.')[0].endswith('_recipe_out')
|
||||||
|
|
||||||
def get_metadata(stream, stream_type='lrf', use_libprs_metadata=False):
|
def get_metadata(stream, stream_type='lrf', use_libprs_metadata=False):
|
||||||
if stream_type: stream_type = stream_type.lower()
|
if stream_type: stream_type = stream_type.lower()
|
||||||
if stream_type in ('html', 'html', 'xhtml', 'xhtm', 'xml'):
|
if stream_type in ('html', 'html', 'xhtml', 'xhtm', 'xml'):
|
||||||
@ -84,11 +88,10 @@ def get_metadata(stream, stream_type='lrf', use_libprs_metadata=False):
|
|||||||
return opf
|
return opf
|
||||||
|
|
||||||
mi = MetaInformation(None, None)
|
mi = MetaInformation(None, None)
|
||||||
if prefs['read_file_metadata']:
|
|
||||||
mi = get_file_type_metadata(stream, stream_type)
|
|
||||||
|
|
||||||
name = os.path.basename(getattr(stream, 'name', ''))
|
name = os.path.basename(getattr(stream, 'name', ''))
|
||||||
base = metadata_from_filename(name)
|
base = metadata_from_filename(name)
|
||||||
|
if is_recipe(name) or prefs['read_file_metadata']:
|
||||||
|
mi = get_file_type_metadata(stream, stream_type)
|
||||||
if base.title == os.path.splitext(name)[0] and base.authors is None:
|
if base.title == os.path.splitext(name)[0] and base.authors is None:
|
||||||
# Assume that there was no metadata in the file and the user set pattern
|
# Assume that there was no metadata in the file and the user set pattern
|
||||||
# to match meta info from the file name did not match.
|
# to match meta info from the file name did not match.
|
||||||
|
@ -11,13 +11,11 @@ __docformat__ = 'restructuredtext en'
|
|||||||
|
|
||||||
from struct import pack, unpack
|
from struct import pack, unpack
|
||||||
from cStringIO import StringIO
|
from cStringIO import StringIO
|
||||||
from datetime import datetime
|
|
||||||
|
|
||||||
from calibre.ebooks.mobi import MobiError
|
from calibre.ebooks.mobi import MobiError
|
||||||
from calibre.ebooks.mobi.writer import rescale_image, MAX_THUMB_DIMEN
|
from calibre.ebooks.mobi.writer import rescale_image, MAX_THUMB_DIMEN
|
||||||
from calibre.ebooks.mobi.langcodes import iana2mobi
|
from calibre.ebooks.mobi.langcodes import iana2mobi
|
||||||
|
from calibre.utils.date import now as nowf
|
||||||
import struct
|
|
||||||
|
|
||||||
class StreamSlicer(object):
|
class StreamSlicer(object):
|
||||||
|
|
||||||
@ -105,11 +103,12 @@ class MetadataUpdater(object):
|
|||||||
have_exth = self.have_exth = (flags & 0x40) != 0
|
have_exth = self.have_exth = (flags & 0x40) != 0
|
||||||
self.cover_record = self.thumbnail_record = None
|
self.cover_record = self.thumbnail_record = None
|
||||||
self.timestamp = None
|
self.timestamp = None
|
||||||
|
|
||||||
self.pdbrecords = self.get_pdbrecords()
|
self.pdbrecords = self.get_pdbrecords()
|
||||||
|
|
||||||
|
self.original_exth_records = {}
|
||||||
if not have_exth:
|
if not have_exth:
|
||||||
self.create_exth()
|
self.create_exth()
|
||||||
|
self.have_exth = True
|
||||||
# Fetch timestamp, cover_record, thumbnail_record
|
# Fetch timestamp, cover_record, thumbnail_record
|
||||||
self.fetchEXTHFields()
|
self.fetchEXTHFields()
|
||||||
|
|
||||||
@ -131,14 +130,18 @@ class MetadataUpdater(object):
|
|||||||
content = exth[pos + 8: pos + size]
|
content = exth[pos + 8: pos + size]
|
||||||
pos += size
|
pos += size
|
||||||
|
|
||||||
|
self.original_exth_records[id] = content
|
||||||
|
|
||||||
if id == 106:
|
if id == 106:
|
||||||
self.timestamp = content
|
self.timestamp = content
|
||||||
elif id == 201:
|
elif id == 201:
|
||||||
rindex, = self.cover_rindex, = unpack('>I', content)
|
rindex, = self.cover_rindex, = unpack('>i', content)
|
||||||
self.cover_record = self.record(rindex + image_base)
|
if rindex > 0 :
|
||||||
|
self.cover_record = self.record(rindex + image_base)
|
||||||
elif id == 202:
|
elif id == 202:
|
||||||
rindex, = self.thumbnail_rindex, = unpack('>I', content)
|
rindex, = self.thumbnail_rindex, = unpack('>i', content)
|
||||||
self.thumbnail_record = self.record(rindex + image_base)
|
if rindex > 0 :
|
||||||
|
self.thumbnail_record = self.record(rindex + image_base)
|
||||||
|
|
||||||
def patch(self, off, new_record0):
|
def patch(self, off, new_record0):
|
||||||
# Save the current size of each record
|
# Save the current size of each record
|
||||||
@ -181,14 +184,14 @@ class MetadataUpdater(object):
|
|||||||
off = self.pdbrecords[section][0]
|
off = self.pdbrecords[section][0]
|
||||||
self.patch(off, new)
|
self.patch(off, new)
|
||||||
|
|
||||||
def create_exth(self, exth=None):
|
def create_exth(self, new_title=None, exth=None):
|
||||||
# Add an EXTH block to record 0, rewrite the stream
|
# Add an EXTH block to record 0, rewrite the stream
|
||||||
# self.hexdump(self.record0)
|
# self.hexdump(self.record0)
|
||||||
|
|
||||||
# Fetch the title
|
# Fetch the existing title
|
||||||
title_offset, = struct.unpack('>L', self.record0[0x54:0x58])
|
title_offset, = unpack('>L', self.record0[0x54:0x58])
|
||||||
title_length, = struct.unpack('>L', self.record0[0x58:0x5c])
|
title_length, = unpack('>L', self.record0[0x58:0x5c])
|
||||||
title_in_file, = struct.unpack('%ds' % (title_length), self.record0[title_offset:title_offset + title_length])
|
title_in_file, = unpack('%ds' % (title_length), self.record0[title_offset:title_offset + title_length])
|
||||||
|
|
||||||
# Adjust length to accommodate PrimaryINDX if necessary
|
# Adjust length to accommodate PrimaryINDX if necessary
|
||||||
mobi_header_length, = unpack('>L', self.record0[0x14:0x18])
|
mobi_header_length, = unpack('>L', self.record0[0x14:0x18])
|
||||||
@ -207,14 +210,21 @@ class MetadataUpdater(object):
|
|||||||
exth = ['EXTH', pack('>II', 12, 0), pad]
|
exth = ['EXTH', pack('>II', 12, 0), pad]
|
||||||
exth = ''.join(exth)
|
exth = ''.join(exth)
|
||||||
|
|
||||||
# Update title_offset
|
# Update title_offset, title_len if new_title
|
||||||
self.record0[0x54:0x58] = pack('>L', 0x10 + mobi_header_length + len(exth))
|
self.record0[0x54:0x58] = pack('>L', 0x10 + mobi_header_length + len(exth))
|
||||||
|
if new_title:
|
||||||
|
self.record0[0x58:0x5c] = pack('>L', len(new_title))
|
||||||
|
|
||||||
# Create an updated Record0
|
# Create an updated Record0
|
||||||
new_record0 = StringIO()
|
new_record0 = StringIO()
|
||||||
new_record0.write(self.record0[:0x10 + mobi_header_length])
|
new_record0.write(self.record0[:0x10 + mobi_header_length])
|
||||||
new_record0.write(exth)
|
new_record0.write(exth)
|
||||||
new_record0.write(title_in_file)
|
if new_title:
|
||||||
|
#new_record0.write(new_title.encode(self.codec, 'replace'))
|
||||||
|
new_title = (new_title or _('Unknown')).encode(self.codec, 'replace')
|
||||||
|
new_record0.write(new_title)
|
||||||
|
else:
|
||||||
|
new_record0.write(title_in_file)
|
||||||
|
|
||||||
# Pad to a 4-byte boundary
|
# Pad to a 4-byte boundary
|
||||||
trail = len(new_record0.getvalue()) % 4
|
trail = len(new_record0.getvalue()) % 4
|
||||||
@ -244,7 +254,7 @@ class MetadataUpdater(object):
|
|||||||
def get_pdbrecords(self):
|
def get_pdbrecords(self):
|
||||||
pdbrecords = []
|
pdbrecords = []
|
||||||
for i in xrange(self.nrecs):
|
for i in xrange(self.nrecs):
|
||||||
offset, a1,a2,a3,a4 = struct.unpack('>LBBBB', self.data[78+i*8:78+i*8+8])
|
offset, a1,a2,a3,a4 = unpack('>LBBBB', self.data[78+i*8:78+i*8+8])
|
||||||
flags, val = a1, a2<<16|a3<<8|a4
|
flags, val = a1, a2<<16|a3<<8|a4
|
||||||
pdbrecords.append( [offset, flags, val] )
|
pdbrecords.append( [offset, flags, val] )
|
||||||
return pdbrecords
|
return pdbrecords
|
||||||
@ -275,6 +285,10 @@ class MetadataUpdater(object):
|
|||||||
return StreamSlicer(self.stream, start, stop)
|
return StreamSlicer(self.stream, start, stop)
|
||||||
|
|
||||||
def update(self, mi):
|
def update(self, mi):
|
||||||
|
def pop_exth_record(exth_id):
|
||||||
|
if exth_id in self.original_exth_records:
|
||||||
|
self.original_exth_records.pop(exth_id)
|
||||||
|
|
||||||
if self.type != "BOOKMOBI":
|
if self.type != "BOOKMOBI":
|
||||||
raise MobiError("Setting metadata only supported for MOBI files of type 'BOOK'.\n"
|
raise MobiError("Setting metadata only supported for MOBI files of type 'BOOK'.\n"
|
||||||
"\tThis is a '%s' file of type '%s'" % (self.type[0:4], self.type[4:8]))
|
"\tThis is a '%s' file of type '%s'" % (self.type[0:4], self.type[4:8]))
|
||||||
@ -289,35 +303,53 @@ class MetadataUpdater(object):
|
|||||||
if mi.author_sort and pas:
|
if mi.author_sort and pas:
|
||||||
authors = mi.author_sort
|
authors = mi.author_sort
|
||||||
recs.append((100, authors.encode(self.codec, 'replace')))
|
recs.append((100, authors.encode(self.codec, 'replace')))
|
||||||
|
pop_exth_record(100)
|
||||||
elif mi.authors:
|
elif mi.authors:
|
||||||
authors = '; '.join(mi.authors)
|
authors = '; '.join(mi.authors)
|
||||||
recs.append((100, authors.encode(self.codec, 'replace')))
|
recs.append((100, authors.encode(self.codec, 'replace')))
|
||||||
|
pop_exth_record(100)
|
||||||
if mi.publisher:
|
if mi.publisher:
|
||||||
recs.append((101, mi.publisher.encode(self.codec, 'replace')))
|
recs.append((101, mi.publisher.encode(self.codec, 'replace')))
|
||||||
|
pop_exth_record(101)
|
||||||
if mi.comments:
|
if mi.comments:
|
||||||
recs.append((103, mi.comments.encode(self.codec, 'replace')))
|
recs.append((103, mi.comments.encode(self.codec, 'replace')))
|
||||||
|
pop_exth_record(103)
|
||||||
if mi.isbn:
|
if mi.isbn:
|
||||||
recs.append((104, mi.isbn.encode(self.codec, 'replace')))
|
recs.append((104, mi.isbn.encode(self.codec, 'replace')))
|
||||||
|
pop_exth_record(104)
|
||||||
if mi.tags:
|
if mi.tags:
|
||||||
subjects = '; '.join(mi.tags)
|
subjects = '; '.join(mi.tags)
|
||||||
recs.append((105, subjects.encode(self.codec, 'replace')))
|
recs.append((105, subjects.encode(self.codec, 'replace')))
|
||||||
|
pop_exth_record(105)
|
||||||
if mi.pubdate:
|
if mi.pubdate:
|
||||||
recs.append((106, str(mi.pubdate).encode(self.codec, 'replace')))
|
recs.append((106, str(mi.pubdate).encode(self.codec, 'replace')))
|
||||||
|
pop_exth_record(106)
|
||||||
elif mi.timestamp:
|
elif mi.timestamp:
|
||||||
recs.append((106, str(mi.timestamp).encode(self.codec, 'replace')))
|
recs.append((106, str(mi.timestamp).encode(self.codec, 'replace')))
|
||||||
|
pop_exth_record(106)
|
||||||
elif self.timestamp:
|
elif self.timestamp:
|
||||||
recs.append((106, self.timestamp))
|
recs.append((106, self.timestamp))
|
||||||
|
pop_exth_record(106)
|
||||||
else:
|
else:
|
||||||
recs.append((106, str(datetime.now()).encode(self.codec, 'replace')))
|
recs.append((106, nowf().isoformat().encode(self.codec, 'replace')))
|
||||||
|
pop_exth_record(106)
|
||||||
if self.cover_record is not None:
|
if self.cover_record is not None:
|
||||||
recs.append((201, pack('>I', self.cover_rindex)))
|
recs.append((201, pack('>I', self.cover_rindex)))
|
||||||
recs.append((203, pack('>I', 0)))
|
recs.append((203, pack('>I', 0)))
|
||||||
|
pop_exth_record(201)
|
||||||
|
pop_exth_record(203)
|
||||||
if self.thumbnail_record is not None:
|
if self.thumbnail_record is not None:
|
||||||
recs.append((202, pack('>I', self.thumbnail_rindex)))
|
recs.append((202, pack('>I', self.thumbnail_rindex)))
|
||||||
|
pop_exth_record(202)
|
||||||
|
|
||||||
if getattr(self, 'encryption_type', -1) != 0:
|
if getattr(self, 'encryption_type', -1) != 0:
|
||||||
raise MobiError('Setting metadata in DRMed MOBI files is not supported.')
|
raise MobiError('Setting metadata in DRMed MOBI files is not supported.')
|
||||||
|
|
||||||
|
# Restore any original EXTH fields that weren't modified/updated
|
||||||
|
for id in sorted(self.original_exth_records):
|
||||||
|
recs.append((id, self.original_exth_records[id]))
|
||||||
|
recs = sorted(recs, key=lambda x:(x[0],x[0]))
|
||||||
|
|
||||||
exth = StringIO()
|
exth = StringIO()
|
||||||
for code, data in recs:
|
for code, data in recs:
|
||||||
exth.write(pack('>II', code, len(data) + 8))
|
exth.write(pack('>II', code, len(data) + 8))
|
||||||
@ -332,7 +364,7 @@ class MetadataUpdater(object):
|
|||||||
raise MobiError('No existing EXTH record. Cannot update metadata.')
|
raise MobiError('No existing EXTH record. Cannot update metadata.')
|
||||||
|
|
||||||
self.record0[92:96] = iana2mobi(mi.language)
|
self.record0[92:96] = iana2mobi(mi.language)
|
||||||
self.create_exth(exth)
|
self.create_exth(exth=exth, new_title=mi.title)
|
||||||
|
|
||||||
# Fetch updated timestamp, cover_record, thumbnail_record
|
# Fetch updated timestamp, cover_record, thumbnail_record
|
||||||
self.fetchEXTHFields()
|
self.fetchEXTHFields()
|
||||||
|
@ -12,12 +12,12 @@ from urllib import unquote
|
|||||||
from urlparse import urlparse
|
from urlparse import urlparse
|
||||||
|
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
from dateutil import parser
|
|
||||||
|
|
||||||
from calibre.ebooks.chardet import xml_to_unicode
|
from calibre.ebooks.chardet import xml_to_unicode
|
||||||
from calibre.constants import __appname__, __version__, filesystem_encoding
|
from calibre.constants import __appname__, __version__, filesystem_encoding
|
||||||
from calibre.ebooks.metadata.toc import TOC
|
from calibre.ebooks.metadata.toc import TOC
|
||||||
from calibre.ebooks.metadata import MetaInformation, string_to_authors
|
from calibre.ebooks.metadata import MetaInformation, string_to_authors
|
||||||
|
from calibre.utils.date import parse_date, isoformat
|
||||||
|
|
||||||
|
|
||||||
class Resource(object):
|
class Resource(object):
|
||||||
@ -272,6 +272,10 @@ class Spine(ResourceCollection):
|
|||||||
self.id = idfunc(self.path)
|
self.id = idfunc(self.path)
|
||||||
self.idref = None
|
self.idref = None
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return 'Spine.Item(path=%r, id=%s, is_linear=%s)' % \
|
||||||
|
(self.path, self.id, self.is_linear)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def from_opf_spine_element(itemrefs, manifest):
|
def from_opf_spine_element(itemrefs, manifest):
|
||||||
s = Spine(manifest)
|
s = Spine(manifest)
|
||||||
@ -280,7 +284,7 @@ class Spine(ResourceCollection):
|
|||||||
if idref is not None:
|
if idref is not None:
|
||||||
path = s.manifest.path_for_id(idref)
|
path = s.manifest.path_for_id(idref)
|
||||||
if path:
|
if path:
|
||||||
r = Spine.Item(s.manifest.id_for_path, path, is_path=True)
|
r = Spine.Item(lambda x:idref, path, is_path=True)
|
||||||
r.is_linear = itemref.get('linear', 'yes') == 'yes'
|
r.is_linear = itemref.get('linear', 'yes') == 'yes'
|
||||||
r.idref = idref
|
r.idref = idref
|
||||||
s.append(r)
|
s.append(r)
|
||||||
@ -441,6 +445,8 @@ class OPF(object):
|
|||||||
guide_path = XPath('descendant::*[re:match(name(), "guide", "i")]/*[re:match(name(), "reference", "i")]')
|
guide_path = XPath('descendant::*[re:match(name(), "guide", "i")]/*[re:match(name(), "reference", "i")]')
|
||||||
|
|
||||||
title = MetadataField('title', formatter=lambda x: re.sub(r'\s+', ' ', x))
|
title = MetadataField('title', formatter=lambda x: re.sub(r'\s+', ' ', x))
|
||||||
|
title_sort = MetadataField('title_sort', formatter=lambda x:
|
||||||
|
re.sub(r'\s+', ' ', x), is_dc=False)
|
||||||
publisher = MetadataField('publisher')
|
publisher = MetadataField('publisher')
|
||||||
language = MetadataField('language')
|
language = MetadataField('language')
|
||||||
comments = MetadataField('description')
|
comments = MetadataField('description')
|
||||||
@ -449,12 +455,14 @@ class OPF(object):
|
|||||||
series = MetadataField('series', is_dc=False)
|
series = MetadataField('series', is_dc=False)
|
||||||
series_index = MetadataField('series_index', is_dc=False, formatter=float, none_is=1)
|
series_index = MetadataField('series_index', is_dc=False, formatter=float, none_is=1)
|
||||||
rating = MetadataField('rating', is_dc=False, formatter=int)
|
rating = MetadataField('rating', is_dc=False, formatter=int)
|
||||||
pubdate = MetadataField('date', formatter=parser.parse)
|
pubdate = MetadataField('date', formatter=parse_date)
|
||||||
publication_type = MetadataField('publication_type', is_dc=False)
|
publication_type = MetadataField('publication_type', is_dc=False)
|
||||||
timestamp = MetadataField('timestamp', is_dc=False, formatter=parser.parse)
|
timestamp = MetadataField('timestamp', is_dc=False,
|
||||||
|
formatter=parse_date)
|
||||||
|
|
||||||
|
|
||||||
def __init__(self, stream, basedir=os.getcwdu(), unquote_urls=True):
|
def __init__(self, stream, basedir=os.getcwdu(), unquote_urls=True,
|
||||||
|
populate_spine=True):
|
||||||
if not hasattr(stream, 'read'):
|
if not hasattr(stream, 'read'):
|
||||||
stream = open(stream, 'rb')
|
stream = open(stream, 'rb')
|
||||||
raw = stream.read()
|
raw = stream.read()
|
||||||
@ -477,7 +485,7 @@ class OPF(object):
|
|||||||
self.manifest = Manifest.from_opf_manifest_element(m, basedir)
|
self.manifest = Manifest.from_opf_manifest_element(m, basedir)
|
||||||
self.spine = None
|
self.spine = None
|
||||||
s = self.spine_path(self.root)
|
s = self.spine_path(self.root)
|
||||||
if s:
|
if populate_spine and s:
|
||||||
self.spine = Spine.from_opf_spine_element(s, self.manifest)
|
self.spine = Spine.from_opf_spine_element(s, self.manifest)
|
||||||
self.guide = None
|
self.guide = None
|
||||||
guide = self.guide_path(self.root)
|
guide = self.guide_path(self.root)
|
||||||
@ -584,6 +592,15 @@ class OPF(object):
|
|||||||
if x.get('id', None) == idref:
|
if x.get('id', None) == idref:
|
||||||
yield x.get('href', '')
|
yield x.get('href', '')
|
||||||
|
|
||||||
|
def first_spine_item(self):
|
||||||
|
items = self.iterspine()
|
||||||
|
if not items:
|
||||||
|
return None
|
||||||
|
idref = items[0].get('idref', '')
|
||||||
|
for x in self.itermanifest():
|
||||||
|
if x.get('id', None) == idref:
|
||||||
|
return x.get('href', None)
|
||||||
|
|
||||||
def create_spine_item(self, idref):
|
def create_spine_item(self, idref):
|
||||||
ans = etree.Element('{%s}itemref'%self.NAMESPACES['opf'], idref=idref)
|
ans = etree.Element('{%s}itemref'%self.NAMESPACES['opf'], idref=idref)
|
||||||
ans.tail = '\n\t\t'
|
ans.tail = '\n\t\t'
|
||||||
@ -675,29 +692,6 @@ class OPF(object):
|
|||||||
|
|
||||||
return property(fget=fget, fset=fset)
|
return property(fget=fget, fset=fset)
|
||||||
|
|
||||||
@dynamic_property
|
|
||||||
def title_sort(self):
|
|
||||||
|
|
||||||
def fget(self):
|
|
||||||
matches = self.title_path(self.metadata)
|
|
||||||
if matches:
|
|
||||||
for match in matches:
|
|
||||||
ans = match.get('{%s}file-as'%self.NAMESPACES['opf'], None)
|
|
||||||
if not ans:
|
|
||||||
ans = match.get('file-as', None)
|
|
||||||
if ans:
|
|
||||||
return ans
|
|
||||||
|
|
||||||
def fset(self, val):
|
|
||||||
matches = self.title_path(self.metadata)
|
|
||||||
if matches:
|
|
||||||
for key in matches[0].attrib:
|
|
||||||
if key.endswith('file-as'):
|
|
||||||
matches[0].attrib.pop(key)
|
|
||||||
matches[0].set('file-as', unicode(val))
|
|
||||||
|
|
||||||
return property(fget=fget, fset=fset)
|
|
||||||
|
|
||||||
@dynamic_property
|
@dynamic_property
|
||||||
def tags(self):
|
def tags(self):
|
||||||
|
|
||||||
@ -869,7 +863,8 @@ class OPF(object):
|
|||||||
def smart_update(self, mi):
|
def smart_update(self, mi):
|
||||||
for attr in ('title', 'authors', 'author_sort', 'title_sort',
|
for attr in ('title', 'authors', 'author_sort', 'title_sort',
|
||||||
'publisher', 'series', 'series_index', 'rating',
|
'publisher', 'series', 'series_index', 'rating',
|
||||||
'isbn', 'language', 'tags', 'category', 'comments'):
|
'isbn', 'language', 'tags', 'category', 'comments',
|
||||||
|
'pubdate'):
|
||||||
val = getattr(mi, attr, None)
|
val = getattr(mi, attr, None)
|
||||||
if val is not None and val != [] and val != (None, None):
|
if val is not None and val != [] and val != (None, None):
|
||||||
setattr(self, attr, val)
|
setattr(self, attr, val)
|
||||||
@ -1041,12 +1036,12 @@ def metadata_to_opf(mi, as_string=True):
|
|||||||
elem.text = text.strip()
|
elem.text = text.strip()
|
||||||
metadata.append(elem)
|
metadata.append(elem)
|
||||||
|
|
||||||
factory(DC('title'), mi.title, mi.title_sort)
|
factory(DC('title'), mi.title)
|
||||||
for au in mi.authors:
|
for au in mi.authors:
|
||||||
factory(DC('creator'), au, mi.author_sort, 'aut')
|
factory(DC('creator'), au, mi.author_sort, 'aut')
|
||||||
factory(DC('contributor'), mi.book_producer, __appname__, 'bkp')
|
factory(DC('contributor'), mi.book_producer, __appname__, 'bkp')
|
||||||
if hasattr(mi.pubdate, 'isoformat'):
|
if hasattr(mi.pubdate, 'isoformat'):
|
||||||
factory(DC('date'), mi.pubdate.isoformat())
|
factory(DC('date'), isoformat(mi.pubdate))
|
||||||
factory(DC('language'), mi.language)
|
factory(DC('language'), mi.language)
|
||||||
if mi.category:
|
if mi.category:
|
||||||
factory(DC('type'), mi.category)
|
factory(DC('type'), mi.category)
|
||||||
@ -1069,9 +1064,11 @@ def metadata_to_opf(mi, as_string=True):
|
|||||||
if mi.rating is not None:
|
if mi.rating is not None:
|
||||||
meta('rating', str(mi.rating))
|
meta('rating', str(mi.rating))
|
||||||
if hasattr(mi.timestamp, 'isoformat'):
|
if hasattr(mi.timestamp, 'isoformat'):
|
||||||
meta('timestamp', mi.timestamp.isoformat())
|
meta('timestamp', isoformat(mi.timestamp))
|
||||||
if mi.publication_type:
|
if mi.publication_type:
|
||||||
meta('publication_type', mi.publication_type)
|
meta('publication_type', mi.publication_type)
|
||||||
|
if mi.title_sort:
|
||||||
|
meta('title_sort', mi.title_sort)
|
||||||
|
|
||||||
metadata[-1].tail = '\n' +(' '*4)
|
metadata[-1].tail = '\n' +(' '*4)
|
||||||
|
|
||||||
@ -1088,12 +1085,12 @@ def metadata_to_opf(mi, as_string=True):
|
|||||||
|
|
||||||
|
|
||||||
def test_m2o():
|
def test_m2o():
|
||||||
from datetime import datetime
|
from calibre.utils.date import now as nowf
|
||||||
from cStringIO import StringIO
|
from cStringIO import StringIO
|
||||||
mi = MetaInformation('test & title', ['a"1', "a'2"])
|
mi = MetaInformation('test & title', ['a"1', "a'2"])
|
||||||
mi.title_sort = 'a\'"b'
|
mi.title_sort = 'a\'"b'
|
||||||
mi.author_sort = 'author sort'
|
mi.author_sort = 'author sort'
|
||||||
mi.pubdate = datetime.now()
|
mi.pubdate = nowf()
|
||||||
mi.language = 'en'
|
mi.language = 'en'
|
||||||
mi.category = 'test'
|
mi.category = 'test'
|
||||||
mi.comments = 'what a fun book\n\n'
|
mi.comments = 'what a fun book\n\n'
|
||||||
@ -1103,7 +1100,7 @@ def test_m2o():
|
|||||||
mi.series = 's"c\'l&<>'
|
mi.series = 's"c\'l&<>'
|
||||||
mi.series_index = 3.34
|
mi.series_index = 3.34
|
||||||
mi.rating = 3
|
mi.rating = 3
|
||||||
mi.timestamp = datetime.now()
|
mi.timestamp = nowf()
|
||||||
mi.publication_type = 'ooooo'
|
mi.publication_type = 'ooooo'
|
||||||
mi.rights = 'yes'
|
mi.rights = 'yes'
|
||||||
mi.cover = 'asd.jpg'
|
mi.cover = 'asd.jpg'
|
||||||
|
@ -13,6 +13,9 @@ from calibre.ptempfile import PersistentTemporaryFile
|
|||||||
from calibre.libunrar import extract_member, names
|
from calibre.libunrar import extract_member, names
|
||||||
|
|
||||||
def get_metadata(stream):
|
def get_metadata(stream):
|
||||||
|
from calibre.ebooks.metadata.archive import is_comic
|
||||||
|
from calibre.ebooks.metadata.meta import get_metadata
|
||||||
|
|
||||||
path = getattr(stream, 'name', False)
|
path = getattr(stream, 'name', False)
|
||||||
if not path:
|
if not path:
|
||||||
pt = PersistentTemporaryFile('_rar-meta.rar')
|
pt = PersistentTemporaryFile('_rar-meta.rar')
|
||||||
@ -21,6 +24,8 @@ def get_metadata(stream):
|
|||||||
path = pt.name
|
path = pt.name
|
||||||
path = os.path.abspath(path)
|
path = os.path.abspath(path)
|
||||||
file_names = list(names(path))
|
file_names = list(names(path))
|
||||||
|
if is_comic(file_names):
|
||||||
|
return get_metadata(stream, 'cbr')
|
||||||
for f in file_names:
|
for f in file_names:
|
||||||
stream_type = os.path.splitext(f)[1].lower()
|
stream_type = os.path.splitext(f)[1].lower()
|
||||||
if stream_type:
|
if stream_type:
|
||||||
@ -29,8 +34,7 @@ def get_metadata(stream):
|
|||||||
'rb', 'imp', 'pdf', 'lrf'):
|
'rb', 'imp', 'pdf', 'lrf'):
|
||||||
data = extract_member(path, match=None, name=f)[1]
|
data = extract_member(path, match=None, name=f)[1]
|
||||||
stream = StringIO(data)
|
stream = StringIO(data)
|
||||||
from calibre.ebooks.metadata.meta import get_metadata
|
|
||||||
return get_metadata(stream, stream_type)
|
return get_metadata(stream, stream_type)
|
||||||
raise ValueError('No ebook found in RAR archive')
|
raise ValueError('No ebook found in RAR archive')
|
||||||
|
|
||||||
|
|
||||||
|
@ -8,15 +8,21 @@ from cStringIO import StringIO
|
|||||||
|
|
||||||
|
|
||||||
def get_metadata(stream):
|
def get_metadata(stream):
|
||||||
|
from calibre.ebooks.metadata.meta import get_metadata
|
||||||
|
from calibre.ebooks.metadata.archive import is_comic
|
||||||
stream_type = None
|
stream_type = None
|
||||||
zf = ZipFile(stream, 'r')
|
zf = ZipFile(stream, 'r')
|
||||||
for f in zf.namelist():
|
names = zf.namelist()
|
||||||
|
if is_comic(names):
|
||||||
|
# Is probably a comic
|
||||||
|
return get_metadata(stream, 'cbz')
|
||||||
|
|
||||||
|
for f in names:
|
||||||
stream_type = os.path.splitext(f)[1].lower()
|
stream_type = os.path.splitext(f)[1].lower()
|
||||||
if stream_type:
|
if stream_type:
|
||||||
stream_type = stream_type[1:]
|
stream_type = stream_type[1:]
|
||||||
if stream_type in ('lit', 'opf', 'prc', 'mobi', 'fb2', 'epub',
|
if stream_type in ('lit', 'opf', 'prc', 'mobi', 'fb2', 'epub',
|
||||||
'rb', 'imp', 'pdf', 'lrf'):
|
'rb', 'imp', 'pdf', 'lrf'):
|
||||||
from calibre.ebooks.metadata.meta import get_metadata
|
|
||||||
stream = StringIO(zf.read(f))
|
stream = StringIO(zf.read(f))
|
||||||
return get_metadata(stream, stream_type)
|
return get_metadata(stream, stream_type)
|
||||||
raise ValueError('No ebook found in ZIP archive')
|
raise ValueError('No ebook found in ZIP archive')
|
||||||
|
@ -4,13 +4,11 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
|||||||
Read data from .mobi files
|
Read data from .mobi files
|
||||||
'''
|
'''
|
||||||
|
|
||||||
import datetime
|
|
||||||
import functools
|
import functools
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import struct
|
import struct
|
||||||
import textwrap
|
import textwrap
|
||||||
|
|
||||||
import cStringIO
|
import cStringIO
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@ -23,6 +21,7 @@ from lxml import html, etree
|
|||||||
|
|
||||||
from calibre import entity_to_unicode, CurrentDir
|
from calibre import entity_to_unicode, CurrentDir
|
||||||
from calibre.utils.filenames import ascii_filename
|
from calibre.utils.filenames import ascii_filename
|
||||||
|
from calibre.utils.date import parse_date
|
||||||
from calibre.ptempfile import TemporaryDirectory
|
from calibre.ptempfile import TemporaryDirectory
|
||||||
from calibre.ebooks import DRMError
|
from calibre.ebooks import DRMError
|
||||||
from calibre.ebooks.chardet import ENCODING_PATS
|
from calibre.ebooks.chardet import ENCODING_PATS
|
||||||
@ -68,7 +67,10 @@ class EXTHHeader(object):
|
|||||||
pass
|
pass
|
||||||
elif id == 503: # Long title
|
elif id == 503: # Long title
|
||||||
if not title or title == _('Unknown'):
|
if not title or title == _('Unknown'):
|
||||||
title = content
|
try:
|
||||||
|
title = content.decode(codec)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
#else:
|
#else:
|
||||||
# print 'unknown record', id, repr(content)
|
# print 'unknown record', id, repr(content)
|
||||||
if title:
|
if title:
|
||||||
@ -96,8 +98,7 @@ class EXTHHeader(object):
|
|||||||
self.mi.tags = list(set(self.mi.tags))
|
self.mi.tags = list(set(self.mi.tags))
|
||||||
elif id == 106:
|
elif id == 106:
|
||||||
try:
|
try:
|
||||||
self.mi.publish_date = datetime.datetime.strptime(
|
self.mi.pubdate = parse_date(content, as_utc=False)
|
||||||
content, '%Y-%m-%d', ).date()
|
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
elif id == 108:
|
elif id == 108:
|
||||||
|
@ -310,6 +310,7 @@ class Serializer(object):
|
|||||||
text = text.replace('&', '&')
|
text = text.replace('&', '&')
|
||||||
text = text.replace('<', '<')
|
text = text.replace('<', '<')
|
||||||
text = text.replace('>', '>')
|
text = text.replace('>', '>')
|
||||||
|
text = text.replace(u'\u00AD', '') # Soft-hyphen
|
||||||
if quot:
|
if quot:
|
||||||
text = text.replace('"', '"')
|
text = text.replace('"', '"')
|
||||||
self.buffer.write(encode(text))
|
self.buffer.write(encode(text))
|
||||||
@ -610,12 +611,21 @@ class MobiWriter(object):
|
|||||||
if (i>firstSequentialNode) and self._ctoc_map[i-1]['klass'] != 'section':
|
if (i>firstSequentialNode) and self._ctoc_map[i-1]['klass'] != 'section':
|
||||||
if offset != previousOffset + previousLength :
|
if offset != previousOffset + previousLength :
|
||||||
self._oeb.log.warning("*** TOC discontinuity: nodes are not sequential ***")
|
self._oeb.log.warning("*** TOC discontinuity: nodes are not sequential ***")
|
||||||
self._oeb.log.warning(" node %03d: '%s' offset: 0x%X length: 0x%X" % \
|
self._oeb.log.info(" node %03d: '%s' offset: 0x%X length: 0x%X" % \
|
||||||
(i-1, entries[i-1].title, previousOffset, previousLength) )
|
(i-1, entries[i-1].title, previousOffset, previousLength) )
|
||||||
self._oeb.log.warning(" node %03d: '%s' offset: 0x%X != 0x%06X" % \
|
self._oeb.log.warning(" node %03d: '%s' offset: 0x%X != 0x%06X" % \
|
||||||
(i, child.title, offset, previousOffset + previousLength) )
|
(i, child.title, offset, previousOffset + previousLength) )
|
||||||
self._oeb.log.warning("\tnode data %03d: %s" % (i-1, self._ctoc_map[i-1]) )
|
# self._oeb.log.warning("\tnode data %03d: %s" % (i-1, self._ctoc_map[i-1]) )
|
||||||
self._oeb.log.warning("\tnode data %03d: %s" % (i, self._ctoc_map[i]) )
|
# self._oeb.log.warning("\tnode data %03d: %s" % (i, self._ctoc_map[i]) )
|
||||||
|
# Dump the offending entry
|
||||||
|
self._oeb.log.info("...")
|
||||||
|
for z in range(i-6 if i-6 > 0 else 0, i+6 if i+6 < len(entries) else len(entries)):
|
||||||
|
if z == i:
|
||||||
|
self._oeb.log.warning("child %03d: %s" % (z, entries[z]))
|
||||||
|
else:
|
||||||
|
self._oeb.log.info("child %03d: %s" % (z, entries[z]))
|
||||||
|
self._oeb.log.info("...")
|
||||||
|
|
||||||
self._oeb.log.warning('_generate_indexed_navpoints: Failed to generate index')
|
self._oeb.log.warning('_generate_indexed_navpoints: Failed to generate index')
|
||||||
# Zero out self._HTMLRecords, return False
|
# Zero out self._HTMLRecords, return False
|
||||||
self._HTMLRecords = []
|
self._HTMLRecords = []
|
||||||
|
@ -16,9 +16,10 @@ from urllib import unquote as urlunquote
|
|||||||
from urlparse import urljoin
|
from urlparse import urljoin
|
||||||
|
|
||||||
from lxml import etree, html
|
from lxml import etree, html
|
||||||
|
from cssutils import CSSParser
|
||||||
|
|
||||||
import calibre
|
import calibre
|
||||||
from cssutils import CSSParser
|
from calibre.constants import filesystem_encoding
|
||||||
from calibre.translations.dynamic import translate
|
from calibre.translations.dynamic import translate
|
||||||
from calibre.ebooks.chardet import xml_to_unicode
|
from calibre.ebooks.chardet import xml_to_unicode
|
||||||
from calibre.ebooks.oeb.entitydefs import ENTITYDEFS
|
from calibre.ebooks.oeb.entitydefs import ENTITYDEFS
|
||||||
@ -434,10 +435,18 @@ class DirContainer(object):
|
|||||||
|
|
||||||
def namelist(self):
|
def namelist(self):
|
||||||
names = []
|
names = []
|
||||||
for root, dirs, files in os.walk(self.rootdir):
|
base = self.rootdir
|
||||||
|
if isinstance(base, unicode):
|
||||||
|
base = base.encode(filesystem_encoding)
|
||||||
|
for root, dirs, files in os.walk(base):
|
||||||
for fname in files:
|
for fname in files:
|
||||||
fname = os.path.join(root, fname)
|
fname = os.path.join(root, fname)
|
||||||
fname = fname.replace('\\', '/')
|
fname = fname.replace('\\', '/')
|
||||||
|
if not isinstance(fname, unicode):
|
||||||
|
try:
|
||||||
|
fname = fname.decode(filesystem_encoding)
|
||||||
|
except:
|
||||||
|
continue
|
||||||
names.append(fname)
|
names.append(fname)
|
||||||
return names
|
return names
|
||||||
|
|
||||||
@ -842,8 +851,10 @@ class Manifest(object):
|
|||||||
self.oeb.log.warn('File %r appears to be a HTML fragment'%self.href)
|
self.oeb.log.warn('File %r appears to be a HTML fragment'%self.href)
|
||||||
nroot = etree.fromstring('<html><body/></html>')
|
nroot = etree.fromstring('<html><body/></html>')
|
||||||
parent = nroot[0]
|
parent = nroot[0]
|
||||||
for child in list(data):
|
for child in list(data.iter()):
|
||||||
child.getparent().remove(child)
|
oparent = child.getparent()
|
||||||
|
if oparent is not None:
|
||||||
|
oparent.remove(child)
|
||||||
parent.append(child)
|
parent.append(child)
|
||||||
data = nroot
|
data = nroot
|
||||||
|
|
||||||
@ -1567,14 +1578,17 @@ class TOC(object):
|
|||||||
parent = etree.Element(NCX('navMap'))
|
parent = etree.Element(NCX('navMap'))
|
||||||
for node in self.nodes:
|
for node in self.nodes:
|
||||||
id = node.id or unicode(uuid.uuid4())
|
id = node.id or unicode(uuid.uuid4())
|
||||||
attrib = {'id': id, 'playOrder': str(node.play_order)}
|
po = node.play_order
|
||||||
|
if po == 0:
|
||||||
|
po = 1
|
||||||
|
attrib = {'id': id, 'playOrder': str(po)}
|
||||||
if node.klass:
|
if node.klass:
|
||||||
attrib['class'] = node.klass
|
attrib['class'] = node.klass
|
||||||
point = element(parent, NCX('navPoint'), attrib=attrib)
|
point = element(parent, NCX('navPoint'), attrib=attrib)
|
||||||
label = etree.SubElement(point, NCX('navLabel'))
|
label = etree.SubElement(point, NCX('navLabel'))
|
||||||
title = node.title
|
title = node.title
|
||||||
if title:
|
if title:
|
||||||
title = re.sub(r'\s', ' ', title)
|
title = re.sub(r'\s+', ' ', title)
|
||||||
element(label, NCX('text')).text = title
|
element(label, NCX('text')).text = title
|
||||||
element(point, NCX('content'), src=urlunquote(node.href))
|
element(point, NCX('content'), src=urlunquote(node.href))
|
||||||
node.to_ncx(point)
|
node.to_ncx(point)
|
||||||
|
@ -120,7 +120,10 @@ class EbookIterator(object):
|
|||||||
bad_map = {}
|
bad_map = {}
|
||||||
font_family_pat = re.compile(r'font-family\s*:\s*([^;]+)')
|
font_family_pat = re.compile(r'font-family\s*:\s*([^;]+)')
|
||||||
for csspath in css_files:
|
for csspath in css_files:
|
||||||
css = open(csspath, 'rb').read().decode('utf-8', 'replace')
|
try:
|
||||||
|
css = open(csspath, 'rb').read().decode('utf-8', 'replace')
|
||||||
|
except:
|
||||||
|
continue
|
||||||
for match in re.compile(r'@font-face\s*{([^}]+)}').finditer(css):
|
for match in re.compile(r'@font-face\s*{([^}]+)}').finditer(css):
|
||||||
block = match.group(1)
|
block = match.group(1)
|
||||||
family = font_family_pat.search(block)
|
family = font_family_pat.search(block)
|
||||||
@ -181,8 +184,9 @@ class EbookIterator(object):
|
|||||||
if hasattr(self.pathtoopf, 'manifest'):
|
if hasattr(self.pathtoopf, 'manifest'):
|
||||||
self.pathtoopf = write_oebbook(self.pathtoopf, self.base)
|
self.pathtoopf = write_oebbook(self.pathtoopf, self.base)
|
||||||
|
|
||||||
|
self.opf = getattr(plumber.input_plugin, 'optimize_opf_parsing', None)
|
||||||
self.opf = OPF(self.pathtoopf, os.path.dirname(self.pathtoopf))
|
if self.opf is None:
|
||||||
|
self.opf = OPF(self.pathtoopf, os.path.dirname(self.pathtoopf))
|
||||||
self.language = self.opf.language
|
self.language = self.opf.language
|
||||||
if self.language:
|
if self.language:
|
||||||
self.language = self.language.lower()
|
self.language = self.language.lower()
|
||||||
|
@ -7,7 +7,7 @@ __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
|||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import os
|
import os
|
||||||
from datetime import datetime
|
from calibre.utils.date import isoformat, now
|
||||||
|
|
||||||
def meta_info_to_oeb_metadata(mi, m, log):
|
def meta_info_to_oeb_metadata(mi, m, log):
|
||||||
from calibre.ebooks.oeb.base import OPF
|
from calibre.ebooks.oeb.base import OPF
|
||||||
@ -60,10 +60,10 @@ def meta_info_to_oeb_metadata(mi, m, log):
|
|||||||
m.add('subject', t)
|
m.add('subject', t)
|
||||||
if mi.pubdate is not None:
|
if mi.pubdate is not None:
|
||||||
m.clear('date')
|
m.clear('date')
|
||||||
m.add('date', mi.pubdate.isoformat())
|
m.add('date', isoformat(mi.pubdate))
|
||||||
if mi.timestamp is not None:
|
if mi.timestamp is not None:
|
||||||
m.clear('timestamp')
|
m.clear('timestamp')
|
||||||
m.add('timestamp', mi.timestamp.isoformat())
|
m.add('timestamp', isoformat(mi.timestamp))
|
||||||
if mi.rights is not None:
|
if mi.rights is not None:
|
||||||
m.clear('rights')
|
m.clear('rights')
|
||||||
m.add('rights', mi.rights)
|
m.add('rights', mi.rights)
|
||||||
@ -71,7 +71,7 @@ def meta_info_to_oeb_metadata(mi, m, log):
|
|||||||
m.clear('publication_type')
|
m.clear('publication_type')
|
||||||
m.add('publication_type', mi.publication_type)
|
m.add('publication_type', mi.publication_type)
|
||||||
if not m.timestamp:
|
if not m.timestamp:
|
||||||
m.add('timestamp', datetime.now().isoformat())
|
m.add('timestamp', isoformat(now()))
|
||||||
|
|
||||||
|
|
||||||
class MergeMetadata(object):
|
class MergeMetadata(object):
|
||||||
|
@ -35,7 +35,10 @@ class RescaleImages(object):
|
|||||||
if not raw: continue
|
if not raw: continue
|
||||||
if qt:
|
if qt:
|
||||||
img = QImage(10, 10, QImage.Format_ARGB32_Premultiplied)
|
img = QImage(10, 10, QImage.Format_ARGB32_Premultiplied)
|
||||||
if not img.loadFromData(raw): continue
|
try:
|
||||||
|
if not img.loadFromData(raw): continue
|
||||||
|
except:
|
||||||
|
continue
|
||||||
width, height = img.width(), img.height()
|
width, height = img.width(), img.height()
|
||||||
else:
|
else:
|
||||||
f = cStringIO.StringIO(raw)
|
f = cStringIO.StringIO(raw)
|
||||||
|
@ -42,9 +42,9 @@ class Writer(FormatWriter):
|
|||||||
pml = unicode(pmlmlizer.extract_content(oeb_book, self.opts)).encode('cp1252', 'replace')
|
pml = unicode(pmlmlizer.extract_content(oeb_book, self.opts)).encode('cp1252', 'replace')
|
||||||
|
|
||||||
text, text_sizes = self._text(pml)
|
text, text_sizes = self._text(pml)
|
||||||
chapter_index = self._index_item(r'(?s)\\C(?P<val>[0-4)="(?P<text>.+?)"', pml)
|
chapter_index = self._index_item(r'(?s)\\C(?P<val>[0-4])="(?P<text>.+?)"', pml)
|
||||||
chapter_index += self.index_item(r'(?s)\\X(?P<val>[0-4])(?P<text>.+?)\\X[0-4]', pml)
|
chapter_index += self._index_item(r'(?s)\\X(?P<val>[0-4])(?P<text>.+?)\\X[0-4]', pml)
|
||||||
chapter_index += self.index_item(r'(?s)\\x(?P<text>.+?)\\x', pml)
|
chapter_index += self._index_item(r'(?s)\\x(?P<text>.+?)\\x', pml)
|
||||||
link_index = self._index_item(r'(?s)\\Q="(?P<text>.+?)"', pml)
|
link_index = self._index_item(r'(?s)\\Q="(?P<text>.+?)"', pml)
|
||||||
images = self._images(oeb_book.manifest, pmlmlizer.image_hrefs)
|
images = self._images(oeb_book.manifest, pmlmlizer.image_hrefs)
|
||||||
metadata = [self._metadata(metadata)]
|
metadata = [self._metadata(metadata)]
|
||||||
|
@ -169,6 +169,8 @@ int main(int argc, char **argv) {
|
|||||||
char *memblock;
|
char *memblock;
|
||||||
ifstream::pos_type size;
|
ifstream::pos_type size;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
map<string,string> info;
|
||||||
|
Reflow *reflow = NULL;
|
||||||
|
|
||||||
|
|
||||||
if (argc != 2) {
|
if (argc != 2) {
|
||||||
@ -189,9 +191,13 @@ int main(int argc, char **argv) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
Reflow reflow(memblock, size);
|
reflow = new Reflow(memblock, size);
|
||||||
reflow.render();
|
info = reflow->get_info();
|
||||||
vector<char> *data = reflow.render_first_page();
|
for (map<string,string>::const_iterator it = info.begin() ; it != info.end(); it++ ) {
|
||||||
|
cout << (*it).first << " : " << (*it).second << endl;
|
||||||
|
}
|
||||||
|
//reflow->render();
|
||||||
|
vector<char> *data = reflow->render_first_page();
|
||||||
ofstream file("cover.png", ios::binary);
|
ofstream file("cover.png", ios::binary);
|
||||||
file.write(&((*data)[0]), data->size());
|
file.write(&((*data)[0]), data->size());
|
||||||
delete data;
|
delete data;
|
||||||
@ -200,7 +206,7 @@ int main(int argc, char **argv) {
|
|||||||
cerr << e.what() << endl;
|
cerr << e.what() << endl;
|
||||||
ret = 1;
|
ret = 1;
|
||||||
}
|
}
|
||||||
|
delete reflow;
|
||||||
delete[] memblock;
|
delete[] memblock;
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
@ -6,7 +6,7 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import sys
|
import sys, os
|
||||||
|
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
|
|
||||||
@ -47,6 +47,10 @@ class Image(Element):
|
|||||||
return '<img src="%s" width="%dpx" height="%dpx"/>' % \
|
return '<img src="%s" width="%dpx" height="%dpx"/>' % \
|
||||||
(self.src, int(self.width), int(self.height))
|
(self.src, int(self.width), int(self.height))
|
||||||
|
|
||||||
|
def dump(self, f):
|
||||||
|
f.write(self.to_html())
|
||||||
|
f.write('\n')
|
||||||
|
|
||||||
|
|
||||||
class Text(Element):
|
class Text(Element):
|
||||||
|
|
||||||
@ -91,6 +95,10 @@ class Text(Element):
|
|||||||
def to_html(self):
|
def to_html(self):
|
||||||
return self.raw
|
return self.raw
|
||||||
|
|
||||||
|
def dump(self, f):
|
||||||
|
f.write(self.to_html().encode('utf-8'))
|
||||||
|
f.write('\n')
|
||||||
|
|
||||||
class FontSizeStats(dict):
|
class FontSizeStats(dict):
|
||||||
|
|
||||||
def __init__(self, stats):
|
def __init__(self, stats):
|
||||||
@ -143,6 +151,14 @@ class Column(object):
|
|||||||
def add(self, elem):
|
def add(self, elem):
|
||||||
if elem in self.elements: return
|
if elem in self.elements: return
|
||||||
self.elements.append(elem)
|
self.elements.append(elem)
|
||||||
|
self._post_add()
|
||||||
|
|
||||||
|
def prepend(self, elem):
|
||||||
|
if elem in self.elements: return
|
||||||
|
self.elements.insert(0, elem)
|
||||||
|
self._post_add()
|
||||||
|
|
||||||
|
def _post_add(self):
|
||||||
self.elements.sort(cmp=lambda x,y:cmp(x.bottom,y.bottom))
|
self.elements.sort(cmp=lambda x,y:cmp(x.bottom,y.bottom))
|
||||||
self.top = self.elements[0].top
|
self.top = self.elements[0].top
|
||||||
self.bottom = self.elements[-1].bottom
|
self.bottom = self.elements[-1].bottom
|
||||||
@ -183,6 +199,11 @@ class Column(object):
|
|||||||
return None
|
return None
|
||||||
return self.elements[idx-1]
|
return self.elements[idx-1]
|
||||||
|
|
||||||
|
def dump(self, f, num):
|
||||||
|
f.write('******** Column %d\n\n'%num)
|
||||||
|
for elem in self.elements:
|
||||||
|
elem.dump(f)
|
||||||
|
|
||||||
|
|
||||||
class Box(list):
|
class Box(list):
|
||||||
|
|
||||||
@ -262,7 +283,6 @@ class Region(object):
|
|||||||
max_lines = max(max_lines, len(c))
|
max_lines = max(max_lines, len(c))
|
||||||
return max_lines
|
return max_lines
|
||||||
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def is_small(self):
|
def is_small(self):
|
||||||
return self.line_count < 3
|
return self.line_count < 3
|
||||||
@ -283,7 +303,6 @@ class Region(object):
|
|||||||
mc = self.columns[0]
|
mc = self.columns[0]
|
||||||
return mc
|
return mc
|
||||||
|
|
||||||
print
|
|
||||||
for c in singleton.columns:
|
for c in singleton.columns:
|
||||||
for elem in c:
|
for elem in c:
|
||||||
col = most_suitable_column(elem)
|
col = most_suitable_column(elem)
|
||||||
@ -304,6 +323,51 @@ class Region(object):
|
|||||||
for x in self.columns:
|
for x in self.columns:
|
||||||
yield x
|
yield x
|
||||||
|
|
||||||
|
def absorb_regions(self, regions, at):
|
||||||
|
for region in regions:
|
||||||
|
self.absorb_region(region, at)
|
||||||
|
|
||||||
|
def absorb_region(self, region, at):
|
||||||
|
if len(region.columns) <= len(self.columns):
|
||||||
|
for i in range(len(region.columns)):
|
||||||
|
src, dest = region.columns[i], self.columns[i]
|
||||||
|
if at != 'bottom':
|
||||||
|
src = reversed(list(iter(src)))
|
||||||
|
for elem in src:
|
||||||
|
func = dest.add if at == 'bottom' else dest.prepend
|
||||||
|
func(elem)
|
||||||
|
|
||||||
|
else:
|
||||||
|
col_map = {}
|
||||||
|
for i, col in enumerate(region.columns):
|
||||||
|
max_overlap, max_overlap_index = 0, 0
|
||||||
|
for j, dcol in enumerate(self.columns):
|
||||||
|
sint = Interval(col.left, col.right)
|
||||||
|
dint = Interval(dcol.left, dcol.right)
|
||||||
|
width = sint.intersection(dint).width
|
||||||
|
if width > max_overlap:
|
||||||
|
max_overlap = width
|
||||||
|
max_overlap_index = j
|
||||||
|
col_map[i] = max_overlap_index
|
||||||
|
lines = max(map(len, region.columns))
|
||||||
|
if at == 'bottom':
|
||||||
|
lines = range(lines)
|
||||||
|
else:
|
||||||
|
lines = range(lines-1, -1, -1)
|
||||||
|
for i in lines:
|
||||||
|
for j, src in enumerate(region.columns):
|
||||||
|
dest = self.columns[col_map[j]]
|
||||||
|
if i < len(src):
|
||||||
|
func = dest.add if at == 'bottom' else dest.prepend
|
||||||
|
func(src.elements[i])
|
||||||
|
|
||||||
|
def dump(self, f):
|
||||||
|
f.write('############################################################\n')
|
||||||
|
f.write('########## Region (%d columns) ###############\n'%len(self.columns))
|
||||||
|
f.write('############################################################\n\n')
|
||||||
|
for i, col in enumerate(self.columns):
|
||||||
|
col.dump(f, i)
|
||||||
|
|
||||||
def linearize(self):
|
def linearize(self):
|
||||||
self.elements = []
|
self.elements = []
|
||||||
for x in self.columns:
|
for x in self.columns:
|
||||||
@ -376,7 +440,8 @@ class Page(object):
|
|||||||
self.font_size_stats[t.font_size] = 0
|
self.font_size_stats[t.font_size] = 0
|
||||||
self.font_size_stats[t.font_size] += len(t.text_as_string)
|
self.font_size_stats[t.font_size] += len(t.text_as_string)
|
||||||
self.average_text_height += t.height
|
self.average_text_height += t.height
|
||||||
self.average_text_height /= len(self.texts)
|
if len(self.texts):
|
||||||
|
self.average_text_height /= len(self.texts)
|
||||||
|
|
||||||
self.font_size_stats = FontSizeStats(self.font_size_stats)
|
self.font_size_stats = FontSizeStats(self.font_size_stats)
|
||||||
|
|
||||||
@ -431,31 +496,78 @@ class Page(object):
|
|||||||
if not current_region.is_empty:
|
if not current_region.is_empty:
|
||||||
self.regions.append(current_region)
|
self.regions.append(current_region)
|
||||||
|
|
||||||
|
if self.opts.verbose > 2:
|
||||||
|
self.debug_dir = 'page-%d'%self.number
|
||||||
|
os.mkdir(self.debug_dir)
|
||||||
|
self.dump_regions('pre-coalesce')
|
||||||
|
|
||||||
self.coalesce_regions()
|
self.coalesce_regions()
|
||||||
|
self.dump_regions('post-coalesce')
|
||||||
|
|
||||||
|
def dump_regions(self, fname):
|
||||||
|
fname = 'regions-'+fname+'.txt'
|
||||||
|
with open(os.path.join(self.debug_dir, fname), 'wb') as f:
|
||||||
|
f.write('Page #%d\n\n'%self.number)
|
||||||
|
for region in self.regions:
|
||||||
|
region.dump(f)
|
||||||
|
|
||||||
def coalesce_regions(self):
|
def coalesce_regions(self):
|
||||||
# find contiguous sets of small regions
|
# find contiguous sets of small regions
|
||||||
# absorb into a neighboring region (prefer the one with number of cols
|
# absorb into a neighboring region (prefer the one with number of cols
|
||||||
# closer to the avg number of cols in the set, if equal use larger
|
# closer to the avg number of cols in the set, if equal use larger
|
||||||
# region)
|
# region)
|
||||||
# merge contiguous regions that can contain each other
|
|
||||||
absorbed = set([])
|
|
||||||
found = True
|
found = True
|
||||||
|
absorbed = set([])
|
||||||
|
processed = set([])
|
||||||
while found:
|
while found:
|
||||||
found = False
|
found = False
|
||||||
for i, region in enumerate(self.regions):
|
for i, region in enumerate(self.regions):
|
||||||
if region.is_small:
|
if region in absorbed:
|
||||||
|
continue
|
||||||
|
if region.is_small and region not in processed:
|
||||||
found = True
|
found = True
|
||||||
regions = []
|
processed.add(region)
|
||||||
|
regions = [region]
|
||||||
|
end = i+1
|
||||||
for j in range(i+1, len(self.regions)):
|
for j in range(i+1, len(self.regions)):
|
||||||
|
end = j
|
||||||
if self.regions[j].is_small:
|
if self.regions[j].is_small:
|
||||||
regions.append(self.regions[j])
|
regions.append(self.regions[j])
|
||||||
else:
|
else:
|
||||||
break
|
break
|
||||||
prev = None if i == 0 else i-1
|
prev_region = None if i == 0 else i-1
|
||||||
next = j if self.regions[j] not in regions else None
|
next_region = end if end < len(self.regions) and self.regions[end] not in regions else None
|
||||||
|
absorb_at = 'bottom'
|
||||||
|
if prev_region is None and next_region is not None:
|
||||||
|
absorb_into = next_region
|
||||||
|
absorb_at = 'top'
|
||||||
|
elif next_region is None and prev_region is not None:
|
||||||
|
absorb_into = prev_region
|
||||||
|
elif prev_region is None and next_region is None:
|
||||||
|
if len(regions) > 1:
|
||||||
|
absorb_into = i
|
||||||
|
regions = regions[1:]
|
||||||
|
else:
|
||||||
|
absorb_into = None
|
||||||
|
else:
|
||||||
|
absorb_into = prev_region
|
||||||
|
if self.regions[next_region].line_count >= \
|
||||||
|
self.regions[prev_region].line_count:
|
||||||
|
avg_column_count = sum([len(r.columns) for r in
|
||||||
|
regions])/float(len(regions))
|
||||||
|
if self.regions[next_region].line_count > \
|
||||||
|
self.regions[prev_region].line_count \
|
||||||
|
or abs(avg_column_count -
|
||||||
|
len(self.regions[prev_region].columns)) \
|
||||||
|
> abs(avg_column_count -
|
||||||
|
len(self.regions[next_region].columns)):
|
||||||
|
absorb_into = next_region
|
||||||
|
absorb_at = 'top'
|
||||||
|
if absorb_into is not None:
|
||||||
|
self.regions[absorb_into].absorb_regions(regions, absorb_at)
|
||||||
|
absorbed.update(regions)
|
||||||
|
for region in absorbed:
|
||||||
|
self.regions.remove(region)
|
||||||
|
|
||||||
def sort_into_columns(self, elem, neighbors):
|
def sort_into_columns(self, elem, neighbors):
|
||||||
neighbors.add(elem)
|
neighbors.add(elem)
|
||||||
@ -575,8 +687,9 @@ class PDFDocument(object):
|
|||||||
for elem in self.elements:
|
for elem in self.elements:
|
||||||
html.extend(elem.to_html())
|
html.extend(elem.to_html())
|
||||||
html += ['</body>', '</html>']
|
html += ['</body>', '</html>']
|
||||||
|
raw = (u'\n'.join(html)).replace('</strong><strong>', '')
|
||||||
with open('index.html', 'wb') as f:
|
with open('index.html', 'wb') as f:
|
||||||
f.write((u'\n'.join(html)).encode('utf-8'))
|
f.write(raw.encode('utf-8'))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -182,10 +182,10 @@ class PML_HTMLizer(object):
|
|||||||
return pml
|
return pml
|
||||||
|
|
||||||
def strip_pml(self, pml):
|
def strip_pml(self, pml):
|
||||||
pml = re.sub(r'\\C\d=".+*"', '', pml)
|
pml = re.sub(r'\\C\d=".*"', '', pml)
|
||||||
pml = re.sub(r'\\Fn=".+*"', '', pml)
|
pml = re.sub(r'\\Fn=".*"', '', pml)
|
||||||
pml = re.sub(r'\\Sd=".+*"', '', pml)
|
pml = re.sub(r'\\Sd=".*"', '', pml)
|
||||||
pml = re.sub(r'\\.=".+*"', '', pml)
|
pml = re.sub(r'\\.=".*"', '', pml)
|
||||||
pml = re.sub(r'\\X\d', '', pml)
|
pml = re.sub(r'\\X\d', '', pml)
|
||||||
pml = re.sub(r'\\S[pbd]', '', pml)
|
pml = re.sub(r'\\S[pbd]', '', pml)
|
||||||
pml = re.sub(r'\\Fn', '', pml)
|
pml = re.sub(r'\\Fn', '', pml)
|
||||||
|
@ -27,7 +27,7 @@ from calibre.ebooks.rtf2xml import headings_to_sections, \
|
|||||||
paragraph_def, convert_to_tags, output, copy, \
|
paragraph_def, convert_to_tags, output, copy, \
|
||||||
list_numbers, info, pict, table_info, fonts, paragraphs, \
|
list_numbers, info, pict, table_info, fonts, paragraphs, \
|
||||||
body_styles, preamble_rest, group_styles, \
|
body_styles, preamble_rest, group_styles, \
|
||||||
inline, correct_unicode
|
inline
|
||||||
from calibre.ebooks.rtf2xml.old_rtf import OldRtf
|
from calibre.ebooks.rtf2xml.old_rtf import OldRtf
|
||||||
|
|
||||||
"""
|
"""
|
||||||
@ -256,15 +256,6 @@ class ParseRtf:
|
|||||||
)
|
)
|
||||||
pict_obj.process_pict()
|
pict_obj.process_pict()
|
||||||
self.__bracket_match('pict_data_info')
|
self.__bracket_match('pict_data_info')
|
||||||
correct_uni_obj = correct_unicode.CorrectUnicode(
|
|
||||||
in_file = self.__temp_file,
|
|
||||||
bug_handler = RtfInvalidCodeException,
|
|
||||||
copy = self.__copy,
|
|
||||||
run_level = self.__run_level,
|
|
||||||
exception_handler = InvalidRtfException,
|
|
||||||
)
|
|
||||||
correct_uni_obj.correct_unicode()
|
|
||||||
self.__bracket_match('correct_unicode_info')
|
|
||||||
combine_obj = combine_borders.CombineBorders(
|
combine_obj = combine_borders.CombineBorders(
|
||||||
in_file = self.__temp_file,
|
in_file = self.__temp_file,
|
||||||
bug_handler = RtfInvalidCodeException,
|
bug_handler = RtfInvalidCodeException,
|
||||||
|
@ -1,94 +0,0 @@
|
|||||||
#########################################################################
|
|
||||||
# #
|
|
||||||
# #
|
|
||||||
# copyright 2002 Paul Henry Tremblay #
|
|
||||||
# #
|
|
||||||
# This program is distributed in the hope that it will be useful, #
|
|
||||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
|
|
||||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU #
|
|
||||||
# General Public License for more details. #
|
|
||||||
# #
|
|
||||||
# You should have received a copy of the GNU General Public License #
|
|
||||||
# along with this program; if not, write to the Free Software #
|
|
||||||
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA #
|
|
||||||
# 02111-1307 USA #
|
|
||||||
# #
|
|
||||||
# #
|
|
||||||
#########################################################################
|
|
||||||
import os, re, tempfile
|
|
||||||
from calibre.ebooks.rtf2xml import copy
|
|
||||||
class CorrectUnicode:
|
|
||||||
"""
|
|
||||||
corrects sequences such as \u201c\'F0\'BE
|
|
||||||
Where \'F0\'BE has to be eliminated.
|
|
||||||
"""
|
|
||||||
def __init__(self,
|
|
||||||
in_file,
|
|
||||||
exception_handler,
|
|
||||||
bug_handler,
|
|
||||||
copy = None,
|
|
||||||
run_level = 1,
|
|
||||||
):
|
|
||||||
self.__file = in_file
|
|
||||||
self.__bug_handler = bug_handler
|
|
||||||
self.__copy = copy
|
|
||||||
self.__run_level = run_level
|
|
||||||
self.__write_to = tempfile.mktemp()
|
|
||||||
self.__exception_handler = exception_handler
|
|
||||||
self.__bug_handler = bug_handler
|
|
||||||
self.__state = 'outside'
|
|
||||||
self.__utf_exp = re.compile(r'&#x(.*?);')
|
|
||||||
def __process_token(self, line):
|
|
||||||
if self.__state == 'outside':
|
|
||||||
if line[:5] == 'tx<ut':
|
|
||||||
self.__handle_unicode(line)
|
|
||||||
else:
|
|
||||||
self.__write_obj.write(line)
|
|
||||||
elif self.__state == 'after':
|
|
||||||
if line[:5] == 'tx<hx':
|
|
||||||
pass
|
|
||||||
elif line[:5] == 'tx<ut':
|
|
||||||
self.__handle_unicode(line)
|
|
||||||
else:
|
|
||||||
self.__state = 'outside'
|
|
||||||
self.__write_obj.write(line)
|
|
||||||
else:
|
|
||||||
raise 'should\'t happen'
|
|
||||||
def __handle_unicode(self, line):
|
|
||||||
token = line[16:]
|
|
||||||
match_obj = re.search(self.__utf_exp, token)
|
|
||||||
if match_obj:
|
|
||||||
uni_char = match_obj.group(1)
|
|
||||||
dec_num = int(uni_char, 16)
|
|
||||||
if dec_num > 57343 and dec_num < 63743:
|
|
||||||
self.__state = 'outside'
|
|
||||||
else:
|
|
||||||
self.__write_obj.write(line)
|
|
||||||
self.__state = 'after'
|
|
||||||
else:
|
|
||||||
self.__write_obj.write(line)
|
|
||||||
self.__state = 'outside'
|
|
||||||
def correct_unicode(self):
|
|
||||||
"""
|
|
||||||
Requires:
|
|
||||||
nothing
|
|
||||||
Returns:
|
|
||||||
nothing (changes the original file)
|
|
||||||
Logic:
|
|
||||||
Read one line in at a time.
|
|
||||||
"""
|
|
||||||
read_obj = open(self.__file, 'r')
|
|
||||||
self.__write_obj = open(self.__write_to, 'w')
|
|
||||||
line_to_read = 1
|
|
||||||
while line_to_read:
|
|
||||||
line_to_read = read_obj.readline()
|
|
||||||
line = line_to_read
|
|
||||||
self.__token_info = line[:16]
|
|
||||||
self.__process_token(line)
|
|
||||||
read_obj.close()
|
|
||||||
self.__write_obj.close()
|
|
||||||
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
|
|
||||||
if self.__copy:
|
|
||||||
copy_obj.copy_file(self.__write_to, "correct_unicode.data")
|
|
||||||
copy_obj.rename(self.__write_to, self.__file)
|
|
||||||
os.remove(self.__write_to)
|
|