updated from main branch
244
Changelog.yaml
@ -4,6 +4,250 @@
|
||||
# for important features/bug fixes.
|
||||
# Also, each release can have new and improved recipes.
|
||||
|
||||
- version: 0.6.40
|
||||
date: 2010-02-12
|
||||
|
||||
new features:
|
||||
- title: "Ability to perform exact match and regular expression based searches."
|
||||
type: major
|
||||
tickets: [4830]
|
||||
description: >
|
||||
"You can now perform exact match searches by prefixing your search term with an =.
|
||||
So for example, tag:=fiction will match all tags named fiction, but not tags named
|
||||
non-fiction. Similarly, you can use regular expression based searches by prefixing
|
||||
the search term by ~."
|
||||
|
||||
- title: "Autodetect if a zip/rar file is actually a comic and if so, import it as CBZ/CBR"
|
||||
tickets: [4753]
|
||||
|
||||
- title: "Add plugin to automatically extract an ebook during import if it is in a zip/rar archive"
|
||||
|
||||
- title: "Linux source install: Install a calibre environment module to ease the integration of calibre into other python projects"
|
||||
|
||||
bug fixes:
|
||||
- title: "Fix regression in 0.6.39 that broke the LRF viewer"
|
||||
|
||||
- title: "ZIP/EPUB files: Try to detect file name encoding instead of assuming the name is encoded in UTF-8. Also correctly
|
||||
encode the extracted file name in the local filesystem encoding."
|
||||
|
||||
- title: "HTML Input: Handle HTML fragments more gracefully"
|
||||
tickets: [4854]
|
||||
|
||||
- title: "Zip files: Workaround invalid zip files that contain end-of-file comments but set comment size to zero"
|
||||
|
||||
- title: "Restore the recipe for the Wired daily feed."
|
||||
tickets: [4871]
|
||||
|
||||
- title: "MOBI metadata: Preserve original EXTH records when not overwrriten by calibre metadata."
|
||||
|
||||
- title: "Catalog generation: Improved series sorting. All books not in a series are now grouped together"
|
||||
|
||||
- title: "Fix occassional threading related crash when using the ChooseFormatDialog"
|
||||
|
||||
- title: "Catalog generation: Various fixes for handling invalid data"
|
||||
|
||||
new recipes:
|
||||
- title: Sueddeustche Zeitung
|
||||
author: Darko Miletic
|
||||
|
||||
improved recipes:
|
||||
- Pagina 12
|
||||
- Variety
|
||||
- Toronto Sun
|
||||
- Telegraph UK
|
||||
- Danas
|
||||
- Dilbert
|
||||
|
||||
- version: 0.6.39
|
||||
date: 2010-02-09
|
||||
|
||||
new features:
|
||||
- title: "Add ability to control how author sort strings are automatically generated from author strings, via the config file tweaks.py"
|
||||
|
||||
- title: "Handle broken EPUB files from Project Gutenberg that have invalid OCF containers"
|
||||
tickets: [4832]
|
||||
|
||||
bug fixes:
|
||||
- title: "Fix regression in 0.6.38 that broke setting bookmarks in the viewer"
|
||||
|
||||
- title: "HTML Input: Ignore filenames that are encoded incorerctly."
|
||||
|
||||
new recipes:
|
||||
|
||||
- title: Radikal
|
||||
author: Darko Miletic
|
||||
|
||||
|
||||
- version: 0.6.38
|
||||
date: 2010-02-09
|
||||
|
||||
new features:
|
||||
- title: "Driver for the Irex DR 800"
|
||||
|
||||
- title: "Driver for the Booq e-book reader"
|
||||
|
||||
- title: "Allow automatic series increment algorithm to be tweaked by editing the config file tweaks.py"
|
||||
|
||||
- title: "Various improvements to the catlog generation. Larger thumbnails in EPUB output and better series sorting. Better handling of html markup in the comments."
|
||||
|
||||
- title: "MOBI Output: Make font used for generated masthead images user customizable."
|
||||
|
||||
bug fixes:
|
||||
- title: "E-book viewer: Make bookmarking (and remebering last open position more robust). For linuxsource installs, you must have Qt 4.6"
|
||||
tickets: [4812]
|
||||
|
||||
- title: "Fix conversion/import of HTML files with very long href links on windows"
|
||||
tickets: [4783]
|
||||
|
||||
- title: "Don't read metadata from filenames for download news, even if the user has the read metadata from filename option set"
|
||||
tickets: [4758]
|
||||
|
||||
- title: "Don't allow leading or trailing space in tags and series. Also normalize all internal spaces to a single space"
|
||||
tickets: [4809]
|
||||
|
||||
- title: "E-book viewer: Toolbars remember their position"
|
||||
tickets: [4811]
|
||||
|
||||
- title: "Fix year being repeated when editing date in main library screen on windows"
|
||||
tickets: [4829]
|
||||
|
||||
- title: "New download: Fix downloading of images from URLs with an ampersand in them"
|
||||
|
||||
- title: "Linux source install: unbundle cssutils, it is now an external dependancy"
|
||||
|
||||
- title: "MOBI metadata: Fix regression that broke setting of titles in some MOBI files"
|
||||
|
||||
- title: "EPUB metadata: Extract the cover image from the html it is embededd in if possible, instead of rendering the html. Removes the white margins on covers and speeds up cover extraction"
|
||||
|
||||
- title: "Fix regression in PDB output"
|
||||
|
||||
- title: "News download: Remove <base> tags automatically"
|
||||
|
||||
- title: "Searching on device: Ignore unicode errors"
|
||||
|
||||
|
||||
new recipes:
|
||||
- title: Courier Press
|
||||
author: Krittika Goyal
|
||||
|
||||
- title: zive.sk and iliterature.cz
|
||||
author: Abelturd
|
||||
|
||||
- title: El Comerico, Digital Spy UK, Gizmodo, News Straits Times, Read It Later, TidBits
|
||||
author: Darko Miletic
|
||||
|
||||
improved recipes:
|
||||
- Jerusalem Post
|
||||
- Clarin
|
||||
- La Nacion
|
||||
- Harvard Business Review
|
||||
- People US Mashup
|
||||
- The New Republic
|
||||
- "Pagina 12"
|
||||
- Discover Magazine
|
||||
- Metro Montreal
|
||||
|
||||
- version: 0.6.37
|
||||
date: 2010-02-01
|
||||
|
||||
new features:
|
||||
- title: "E-book viewer: Add support for viewing SVG images"
|
||||
type: major
|
||||
|
||||
- title: "Add category of Recently added books when generating catalog in e-book format"
|
||||
|
||||
- title: "OS X: Allow adding of books to calibre via drag and drop on the calibre dock icon"
|
||||
|
||||
- title: "Add support for masthead images when downloading news for the Kindle"
|
||||
|
||||
- title: "MOBI metadata: Allow setting of metadata in old PRC files without EXTH headers as well"
|
||||
|
||||
bug fixes:
|
||||
- title: Changing the date in Dutch
|
||||
tickets: [4732]
|
||||
|
||||
- title: "Fix regression that broke sending files to unupdated PRS 500s"
|
||||
|
||||
- title: "MOBI Input: Ignore width and height percentage measures for <img> tags."
|
||||
tickets: [4726]
|
||||
|
||||
- title: "EPUB Output: Remove <img> tags that point to the internet for their images as this causes the ever delicate ADE to crash."
|
||||
tickets: [4692]
|
||||
|
||||
- title: "Comic Input: Handle UTF-8 BOM when converting a cbc file"
|
||||
tickets: [4683]
|
||||
|
||||
- title: "Allow rating to be cleared via the Bulk metadata edit dialog"
|
||||
tickets: [4693]
|
||||
|
||||
- title: "Add workaround for broken linux systems with multiply encoded file names"
|
||||
tickets: [4721]
|
||||
|
||||
- title: Fix bug preventing the the use of indices when setting save to disk templates
|
||||
tickets: [4710]
|
||||
|
||||
- title: "Linux device mounting. Use filetype of auto to allow non vfat filesystems to be mounted"
|
||||
tickets: [4707]
|
||||
|
||||
- title: "Catalog generation: Make sorting of numbers in title as text optional"
|
||||
|
||||
- title: "Fix error while sending book with non-ascii character in title/author to device on linux"
|
||||
tickets: [4690]
|
||||
|
||||
- title: "Fix reset cover in edit meta information dialog does not actually remove cover"
|
||||
tickets: [4731]
|
||||
|
||||
new recipes:
|
||||
- title: Kamera Bild
|
||||
author: Darko Miletic
|
||||
|
||||
- title: The Online Photographer
|
||||
author: Darko Miletic
|
||||
|
||||
- title: The Luminous Landscape
|
||||
author: Darko Miletic
|
||||
|
||||
- title: Slovo
|
||||
author: Abelturd
|
||||
|
||||
- title: Various Danish newspapers
|
||||
author: Darko Miletic
|
||||
|
||||
- title: Heraldo de Aragon
|
||||
author: Lorenzo Vigentini
|
||||
|
||||
- title: Orange County Register
|
||||
author: Lorenzi Vigentini
|
||||
|
||||
- title: Open Left
|
||||
author: Xanthan Gum
|
||||
|
||||
- title: Michelle Malkin
|
||||
author: Walt Anthony
|
||||
|
||||
- title: The Metro Montreal
|
||||
author: Jerry Clapperton
|
||||
|
||||
- title: The Gazette
|
||||
author: Jerry Clapperton
|
||||
|
||||
- title: Macleans Magazine
|
||||
author: Nick Redding
|
||||
|
||||
- title: NY Time Sunday Book Review
|
||||
author: Krittika Goyal
|
||||
|
||||
- title: Various Italian newspapers
|
||||
author: Lorenzo Vigentini
|
||||
|
||||
|
||||
improved recipes:
|
||||
- The Irish Times
|
||||
- Washington Post
|
||||
- NIN
|
||||
- The Discover Magazine
|
||||
- Pagina 12
|
||||
|
||||
- version: 0.6.36
|
||||
date: 2010-01-25
|
||||
|
||||
|
@ -27,7 +27,7 @@ p.tags {
|
||||
|
||||
p.description {
|
||||
text-align:left;
|
||||
font-style:italic;
|
||||
font-style:normal;
|
||||
margin-top: 0em;
|
||||
}
|
||||
|
||||
@ -55,6 +55,14 @@ p.author_index {
|
||||
text-indent: 0em;
|
||||
}
|
||||
|
||||
p.series {
|
||||
text-align: left;
|
||||
margin-top:0px;
|
||||
margin-bottom:0px;
|
||||
margin-left:2em;
|
||||
text-indent:-2em;
|
||||
}
|
||||
|
||||
p.read_book {
|
||||
text-align:left;
|
||||
margin-top:0px;
|
||||
@ -71,3 +79,9 @@ p.unread_book {
|
||||
text-indent:-2em;
|
||||
}
|
||||
|
||||
hr.series_divider {
|
||||
width:50%;
|
||||
margin-left:1em;
|
||||
margin-top:0em;
|
||||
margin-bottom:0em;
|
||||
}
|
||||
|
27
resources/default_tweaks.py
Normal file
@ -0,0 +1,27 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
'''
|
||||
Contains various tweaks that affect calibre behavior. Only edit this file if
|
||||
you know what you are dong. If you delete this file, it will be recreated from
|
||||
defaults.
|
||||
'''
|
||||
|
||||
|
||||
# The algorithm used to assign a new book in an existing series a series number.
|
||||
# Possible values are:
|
||||
# next - Next available number
|
||||
# const - Assign the number 1 always
|
||||
series_index_auto_increment = 'next'
|
||||
|
||||
|
||||
|
||||
# The algorithm used to copy author to author_sort
|
||||
# Possible values are:
|
||||
# invert: use "fn ln" -> "ln, fn" (the original algorithm)
|
||||
# copy : copy author to author_sort without modification
|
||||
# comma : use 'copy' if there is a ',' in the name, otherwise use 'invert'
|
||||
author_sort_copy_method = 'invert'
|
157
resources/images/catalog.svg
Normal file
@ -0,0 +1,157 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<!-- Generator: Adobe Illustrator 14.0.0, SVG Export Plug-In . SVG Version: 6.00 Build 43363) -->
|
||||
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
|
||||
<svg version="1.1" id="svg2" xmlns:svg="http://www.w3.org/2000/svg" xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
|
||||
xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px" width="128px" height="128px"
|
||||
viewBox="0 0 128 128" enable-background="new 0 0 128 128" xml:space="preserve">
|
||||
<filter id="filter5365">
|
||||
<feGaussianBlur stdDeviation="1.3829225" inkscape:collect="always" id="feGaussianBlur5367"></feGaussianBlur>
|
||||
</filter>
|
||||
<g id="layer1">
|
||||
</g>
|
||||
<g id="layer2">
|
||||
<polygon id="rect3200" opacity="0.5722" fill="#0000A4" enable-background="new " points="167.5,297.005 171.429,297.005
|
||||
171.429,297.005 "/>
|
||||
<g id="path5265" filter="url(#filter5365)">
|
||||
<polygon fill="#362D2D" points="21.951,79.904 70.397,63.09 119.953,80.636 70.397,97.084 "/>
|
||||
<polygon fill="none" stroke="#362D2D" stroke-width="1.2507" stroke-linejoin="bevel" points="21.951,79.904 70.397,63.09
|
||||
119.953,80.636 70.397,97.084 "/>
|
||||
</g>
|
||||
<g id="path5267" filter="url(#filter5365)">
|
||||
<path fill="#362D2D" d="M118.639,100.902v1.724l-46.437,15.432c-3.723-9.284-1.901-16.34,0.089-20.69l46.883-15.518l-6.34,2.068
|
||||
l2.322,16.553L118.639,100.902z"/>
|
||||
<path fill="none" stroke="#362D2D" stroke-width="1.9" d="M118.639,100.902v1.724l-46.437,15.432
|
||||
c-3.723-9.284-1.901-16.34,0.089-20.69l46.883-15.518l-6.34,2.068l2.322,16.553L118.639,100.902z"/>
|
||||
</g>
|
||||
<g id="path5269" filter="url(#filter5365)">
|
||||
<path fill="#362D2D" d="M70.711,98.81l47.581-15.743l0.29,18.582l-47.56,15.986c0,0-1.515-3.455-1.942-9.812
|
||||
C68.936,101.726,70.711,98.81,70.711,98.81z"/>
|
||||
<path fill="none" stroke="#362D2D" stroke-width="2.1" d="M70.711,98.81l47.581-15.743l0.29,18.582l-47.56,15.986
|
||||
c0,0-1.515-3.455-1.942-9.812C68.936,101.726,70.711,98.81,70.711,98.81z"/>
|
||||
</g>
|
||||
<g id="path5271" filter="url(#filter5365)">
|
||||
<path fill="#362D2D" d="M21.479,79.607l49.115,17.501c-3.287,7.816-2.385,15.202,0.982,23.019l-50.008-16.208
|
||||
C17.974,94.288,17.113,87.874,21.479,79.607z"/>
|
||||
<path fill="none" stroke="#362D2D" stroke-width="1.6" d="M21.479,79.607l49.115,17.501c-3.287,7.816-2.385,15.202,0.982,23.019
|
||||
l-50.008-16.208C17.974,94.288,17.113,87.874,21.479,79.607z"/>
|
||||
</g>
|
||||
<g id="path5273" filter="url(#filter5365)">
|
||||
<path fill="#362D2D" d="M120.871,99.092v4.827l-50.008,16.897l-49.651-15.863c-4.763-11.162-1.987-18.682,0.714-25.346
|
||||
l49.651-16.724l48.579,17.242v3.449l-2.143,1.033l0.357,14.139L120.871,99.092z"/>
|
||||
<path fill="none" stroke="#362D2D" stroke-width="2.7" stroke-linejoin="bevel" d="M120.871,99.092v4.827l-50.008,16.897
|
||||
l-49.651-15.863c-4.763-11.162-1.987-18.682,0.714-25.346l49.651-16.724l48.579,17.242v3.449l-2.143,1.033l0.357,14.139
|
||||
L120.871,99.092z"/>
|
||||
</g>
|
||||
<path id="path5385" fill="#78CE4F" d="M19.316,78.05l48.438-17.414l49.548,18.171L67.754,95.842L19.316,78.05z"/>
|
||||
<path id="path5387" fill="none" stroke="#0F973B" stroke-width="1.9" d="M115.988,99.796v1.786l-46.43,15.982
|
||||
c-3.722-9.616-1.901-16.924,0.09-21.43l46.875-16.07l-6.34,2.143l2.322,17.143L115.988,99.796z"/>
|
||||
|
||||
<radialGradient id="path5389_1_" cx="498.3457" cy="267.1621" r="27.1927" gradientTransform="matrix(-0.064 0.175 1.8694 0.6835 -425.1342 -169.6643)" gradientUnits="userSpaceOnUse">
|
||||
<stop offset="0" style="stop-color:#B5FFA6"/>
|
||||
<stop offset="1" style="stop-color:#76E976"/>
|
||||
</radialGradient>
|
||||
<path id="path5389" fill="url(#path5389_1_)" stroke="#003131" stroke-width="1.6" stroke-opacity="0.9608" d="M18.845,77.742
|
||||
l49.107,18.125c-3.287,8.096-2.385,15.744,0.981,23.84l-50-16.786C15.339,92.946,14.479,86.304,18.845,77.742z"/>
|
||||
<path id="path5391" fill="none" stroke="#003131" stroke-width="2.7" stroke-linejoin="bevel" stroke-opacity="0.9608" d="
|
||||
M118.22,97.921v5l-50,17.5l-49.643-16.429c-4.762-11.561-1.987-19.348,0.714-26.25l49.642-17.321l48.572,17.857v3.571l-2.143,1.071
|
||||
l0.356,14.644L118.22,97.921z"/>
|
||||
<path id="path5393" fill="#FFFFFF" d="M68.068,97.629l47.572-16.305l0.29,19.245l-47.194,16.423c0,0-1.424-2.819-2.12-10.029
|
||||
C66.471,100.649,68.068,97.629,68.068,97.629z"/>
|
||||
<g id="path5419" filter="url(#filter5365)">
|
||||
<polygon fill="#362D2D" points="8.737,52.047 57.183,35.233 106.738,52.778 57.183,69.227 "/>
|
||||
<polygon fill="none" stroke="#362D2D" stroke-width="1.2507" stroke-linejoin="bevel" points="8.737,52.047 57.183,35.233
|
||||
106.738,52.778 57.183,69.227 "/>
|
||||
</g>
|
||||
<g id="path5421" filter="url(#filter5365)">
|
||||
<path fill="#362D2D" d="M105.424,73.045v1.724L58.988,90.2c-3.723-9.284-1.902-16.34,0.089-20.69l46.882-15.518l-6.341,2.069
|
||||
l2.322,16.552L105.424,73.045z"/>
|
||||
<path fill="none" stroke="#362D2D" stroke-width="1.9" d="M105.424,73.045v1.724L58.988,90.2
|
||||
c-3.723-9.284-1.902-16.34,0.089-20.69l46.882-15.518l-6.341,2.069l2.322,16.552L105.424,73.045z"/>
|
||||
</g>
|
||||
<g id="path5423" filter="url(#filter5365)">
|
||||
<path fill="#362D2D" d="M57.497,70.953l47.581-15.744l0.289,18.582L57.809,89.777c0,0-1.515-3.455-1.942-9.812
|
||||
C55.721,73.869,57.497,70.953,57.497,70.953z"/>
|
||||
<path fill="none" stroke="#362D2D" stroke-width="2.1" d="M57.497,70.953l47.581-15.744l0.289,18.582L57.809,89.777
|
||||
c0,0-1.515-3.455-1.942-9.812C55.721,73.869,57.497,70.953,57.497,70.953z"/>
|
||||
</g>
|
||||
<g id="path5425" filter="url(#filter5365)">
|
||||
<path fill="#362D2D" d="M8.265,51.751l49.116,17.501c-3.288,7.816-2.385,15.201,0.982,23.018L8.354,76.062
|
||||
C4.759,66.431,3.899,60.017,8.265,51.751z"/>
|
||||
<path fill="none" stroke="#362D2D" stroke-width="1.6" d="M8.265,51.751l49.116,17.501c-3.288,7.816-2.385,15.201,0.982,23.018
|
||||
L8.354,76.062C4.759,66.431,3.899,60.017,8.265,51.751z"/>
|
||||
</g>
|
||||
<g id="path5427" filter="url(#filter5365)">
|
||||
<path fill="#362D2D" d="M107.656,71.234v4.828L57.648,92.959L7.998,77.097C3.234,65.934,6.011,58.415,8.712,51.751l49.651-16.725
|
||||
l48.58,17.242v3.448l-2.144,1.035l0.357,14.139L107.656,71.234z"/>
|
||||
<path fill="none" stroke="#362D2D" stroke-width="2.7" stroke-linejoin="bevel" d="M107.656,71.234v4.828L57.648,92.959
|
||||
L7.998,77.097C3.234,65.934,6.011,58.415,8.712,51.751l49.651-16.725l48.58,17.242v3.448l-2.144,1.035l0.357,14.139
|
||||
L107.656,71.234z"/>
|
||||
</g>
|
||||
<path id="path5431" fill="#60BAFF" stroke="#003244" stroke-width="1.2507" stroke-linejoin="bevel" d="M6.102,50.193L54.54,32.779
|
||||
l49.548,18.171L54.54,67.985L6.102,50.193z"/>
|
||||
<path id="path5433" fill="none" stroke="#0056D5" stroke-width="2.8104" d="M102.768,71.76v1.803L56.35,89.701
|
||||
c-3.721-9.71-1.901-17.089,0.089-21.639l46.865-16.229l-6.338,2.164l2.321,17.312L102.768,71.76z"/>
|
||||
|
||||
<radialGradient id="path5435_1_" cx="316.8916" cy="261.2949" r="27.1937" gradientTransform="matrix(-0.0902 0.2793 1.9257 0.6218 -445.576 -180.1955)" gradientUnits="userSpaceOnUse">
|
||||
<stop offset="0" style="stop-color:#789DED"/>
|
||||
<stop offset="1" style="stop-color:#2381E8"/>
|
||||
</radialGradient>
|
||||
<path id="path5435" fill="url(#path5435_1_)" stroke="#003244" stroke-width="1.6" d="M5.63,49.885L54.738,68.01
|
||||
c-3.287,8.096-2.385,15.744,0.982,23.84l-50-16.785C2.125,65.09,1.265,58.447,5.63,49.885z"/>
|
||||
<path id="path5437" fill="none" stroke="#003244" stroke-width="2.7" stroke-linejoin="bevel" d="M105.006,70.064v5l-50,17.5
|
||||
L5.363,76.135c-4.762-11.561-1.987-19.348,0.714-26.25L55.72,32.564l48.571,17.857v3.572l-2.143,1.071l0.357,14.643L105.006,70.064
|
||||
z"/>
|
||||
<path id="path5439" fill="#FFFFFF" d="M54.854,69.772l47.573-16.306l0.29,19.245L55.522,89.135c0,0-1.425-2.819-2.121-10.028
|
||||
C53.256,72.793,54.854,69.772,54.854,69.772z"/>
|
||||
<g id="path5447" filter="url(#filter5365)">
|
||||
<polygon fill="#362D2D" points="25.88,28.119 74.326,11.305 123.882,28.85 74.326,45.299 "/>
|
||||
<polygon fill="none" stroke="#362D2D" stroke-width="1.2507" stroke-linejoin="bevel" points="25.88,28.119 74.326,11.305
|
||||
123.882,28.85 74.326,45.299 "/>
|
||||
</g>
|
||||
<g id="path5449" filter="url(#filter5365)">
|
||||
<path fill="#362D2D" d="M122.567,49.116v1.724L76.131,66.271c-3.723-9.284-1.902-16.34,0.09-20.69l46.883-15.518l-6.341,2.069
|
||||
l2.321,16.552L122.567,49.116z"/>
|
||||
<path fill="none" stroke="#362D2D" stroke-width="1.9" d="M122.567,49.116v1.724L76.131,66.271
|
||||
c-3.723-9.284-1.902-16.34,0.09-20.69l46.883-15.518l-6.341,2.069l2.321,16.552L122.567,49.116z"/>
|
||||
</g>
|
||||
<g id="path5451" filter="url(#filter5365)">
|
||||
<path fill="#362D2D" d="M74.641,47.024l47.58-15.744l0.289,18.582L74.951,65.849c0,0-1.514-3.455-1.941-9.812
|
||||
C72.863,49.94,74.641,47.024,74.641,47.024z"/>
|
||||
<path fill="none" stroke="#362D2D" stroke-width="2.1" d="M74.641,47.024l47.58-15.744l0.289,18.582L74.951,65.849
|
||||
c0,0-1.514-3.455-1.941-9.812C72.863,49.94,74.641,47.024,74.641,47.024z"/>
|
||||
</g>
|
||||
<g id="path5453" filter="url(#filter5365)">
|
||||
<path fill="#362D2D" d="M25.408,27.822l49.115,17.5c-3.287,7.816-2.385,15.202,0.982,23.018L25.498,52.133
|
||||
C21.902,42.502,21.042,36.088,25.408,27.822z"/>
|
||||
<path fill="none" stroke="#362D2D" stroke-width="1.6" d="M25.408,27.822l49.115,17.5c-3.287,7.816-2.385,15.202,0.982,23.018
|
||||
L25.498,52.133C21.902,42.502,21.042,36.088,25.408,27.822z"/>
|
||||
</g>
|
||||
<g id="path5455" filter="url(#filter5365)">
|
||||
<path fill="#362D2D" d="M124.8,47.306v4.828L74.791,69.03L25.14,53.168c-4.763-11.163-1.987-18.682,0.714-25.346l49.651-16.725
|
||||
l48.58,17.242v3.449l-2.145,1.034l0.357,14.139L124.8,47.306z"/>
|
||||
<path fill="none" stroke="#362D2D" stroke-width="2.7" stroke-linejoin="bevel" d="M124.8,47.306v4.828L74.791,69.03L25.14,53.168
|
||||
c-4.763-11.163-1.987-18.682,0.714-25.346l49.651-16.725l48.58,17.242v3.449l-2.145,1.034l0.357,14.139L124.8,47.306z"/>
|
||||
</g>
|
||||
<path id="path5459" fill="#FF7272" d="M23.245,26.264L71.684,8.85l49.547,18.171L71.684,44.057L23.245,26.264z"/>
|
||||
<path id="path5461" fill="none" stroke="#CF0505" stroke-width="1.9" d="M119.916,48.01v1.786L73.488,65.778
|
||||
c-3.723-9.616-1.902-16.923,0.089-21.429l46.875-16.071l-6.339,2.143l2.32,17.143L119.916,48.01z"/>
|
||||
|
||||
<radialGradient id="path5463_1_" cx="14.938" cy="-466.4766" r="27.3207" gradientTransform="matrix(2.5834 0.998 0.0835 -0.2162 46.7076 -68.8071)" gradientUnits="userSpaceOnUse">
|
||||
<stop offset="0" style="stop-color:#FD8A8A"/>
|
||||
<stop offset="1" style="stop-color:#FF7878"/>
|
||||
</radialGradient>
|
||||
<path id="path5463" fill="url(#path5463_1_)" stroke="#600101" stroke-width="1.6" d="M22.773,25.957l49.107,18.125
|
||||
c-3.287,8.095-2.385,15.744,0.982,23.839l-50-18.806C19.268,39.14,18.408,34.518,22.773,25.957z"/>
|
||||
|
||||
<linearGradient id="path3311_1_" gradientUnits="userSpaceOnUse" x1="-1.3145" y1="103.2168" x2="67.4683" y2="103.2168" gradientTransform="matrix(1 0 0 -1 5.4287 129.1426)">
|
||||
<stop offset="0" style="stop-color:#FFFFFF"/>
|
||||
<stop offset="1" style="stop-color:#FFFFFF;stop-opacity:0.2471"/>
|
||||
</linearGradient>
|
||||
<path id="path3311" fill="url(#path3311_1_)" d="M23.904,25.736L72.342,8.322l49.548,18.171L72.342,43.529L23.904,25.736z"/>
|
||||
<path id="path5465" fill="none" stroke="#600101" stroke-width="2.7" stroke-linejoin="bevel" d="M122.148,46.135v5l-50,17.5
|
||||
l-49.39-18.701c-4.762-11.562-2.239-17.076,0.461-23.977L72.863,8.635l48.57,17.857v3.571l-2.143,1.071l0.357,14.643
|
||||
L122.148,46.135z"/>
|
||||
<path id="path5467" fill="#FFFFFF" d="M71.997,45.844l47.573-16.306l0.289,19.246L72.666,65.206c0,0-1.426-2.819-2.121-10.028
|
||||
C70.399,48.864,71.997,45.844,71.997,45.844z"/>
|
||||
</g>
|
||||
</svg>
|
After Width: | Height: | Size: 11 KiB |
BIN
resources/images/news/digitalspy_uk.png
Normal file
After Width: | Height: | Size: 1.3 KiB |
BIN
resources/images/news/elcomercio.png
Normal file
After Width: | Height: | Size: 764 B |
BIN
resources/images/news/gizmodo.png
Normal file
After Width: | Height: | Size: 640 B |
BIN
resources/images/news/kamerabild.png
Normal file
After Width: | Height: | Size: 838 B |
BIN
resources/images/news/newsstraitstimes.png
Normal file
After Width: | Height: | Size: 816 B |
BIN
resources/images/news/radikal_tr.png
Normal file
After Width: | Height: | Size: 1.9 KiB |
BIN
resources/images/news/readitlater.png
Normal file
After Width: | Height: | Size: 810 B |
BIN
resources/images/news/sueddeutschezeitung.png
Normal file
After Width: | Height: | Size: 492 B |
BIN
resources/images/news/theluminouslandscape.png
Normal file
After Width: | Height: | Size: 769 B |
BIN
resources/images/news/tidbits.png
Normal file
After Width: | Height: | Size: 783 B |
37
resources/kathemerini.recipe
Normal file
@ -0,0 +1,37 @@
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
class Kathimerini(BasicNewsRecipe):
|
||||
title = 'Kathimerini'
|
||||
__author__ = 'Pan'
|
||||
description = 'News from Greece'
|
||||
max_articles_per_feed = 100
|
||||
oldest_article = 100
|
||||
publisher = 'Kathimerini'
|
||||
category = 'news, GR'
|
||||
language = 'el'
|
||||
no_stylesheets = True
|
||||
remove_tags_before = dict(name='td',attrs={'class':'news'})
|
||||
remove_tags_after = dict(name='td',attrs={'class':'news'})
|
||||
remove_attributes = ['width', 'src','header','footer']
|
||||
|
||||
feeds = [(u'\u03a0\u03bf\u03bb\u03b9\u03c4\u03b9\u03ba\u03ae',
|
||||
'http://wk.kathimerini.gr/xml_files/politics.xml'),
|
||||
(u'\u0395\u03bb\u03bb\u03ac\u03b4\u03b1',
|
||||
' http://wk.kathimerini.gr/xml_files/ell.xml'),
|
||||
(u'\u039a\u03cc\u03c3\u03bc\u03bf\u03c2',
|
||||
' http://wk.kathimerini.gr/xml_files/world.xml'),
|
||||
(u'\u039f\u03b9\u03ba\u03bf\u03bd\u03bf\u03bc\u03af\u03b1',
|
||||
'http://wk.kathimerini.gr/xml_files/economy_1.xml'),
|
||||
(u'\u0395\u03c0\u03b9\u03c7\u03b5\u03b9\u03c1\u03ae\u03c3\u03b5\u03b9\u03c2',
|
||||
'http://wk.kathimerini.gr/xml_files/economy_2.xml'),
|
||||
(u'\u0394\u03b9\u03b5\u03b8\u03bd\u03ae\u03c2 \u039f\u03b9\u03ba\u03bf\u03bd\u03bf\u03bc\u03af\u03b1',
|
||||
'http://wk.kathimerini.gr/xml_files/economy_3.xml'),
|
||||
(u'\u03a0\u03bf\u03bb\u03b9\u03c4\u03b9\u03c3\u03bc\u03cc\u03c2',
|
||||
'http://wk.kathimerini.gr/xml_files/civ.xml'),
|
||||
(u'\u039c\u03cc\u03bd\u03b9\u03bc\u03b5\u03c2 \u03a3\u03c4\u03ae\u03bb\u03b5\u03c2',
|
||||
'http://wk.kathimerini.gr/xml_files/st.xml')]
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('http://news.kathimerini.gr/4dcgi/', 'http://news.kathimerini.gr/4dcgi/4dcgi/')
|
||||
|
||||
|
45
resources/recipes/ZIVE.sk.recipe
Normal file
@ -0,0 +1,45 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
import re
|
||||
|
||||
|
||||
|
||||
class ZiveRecipe(BasicNewsRecipe):
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'Abelturd'
|
||||
language = 'sk'
|
||||
version = 1
|
||||
|
||||
title = u'ZIVE.sk'
|
||||
publisher = u''
|
||||
category = u'News, Newspaper'
|
||||
description = u'Naj\u010d\xedtanej\u0161\xed denn\xedk opo\u010d\xedta\u010doch, IT a internete. '
|
||||
encoding = 'UTF-8'
|
||||
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
use_embedded_content = False
|
||||
remove_empty_feeds = True
|
||||
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
cover_url = 'http://www.zive.sk/Client.Images/Logos/logo-zive-sk.gif'
|
||||
|
||||
feeds = []
|
||||
feeds.append((u'V\u0161etky \u010dl\xe1nky', u'http://www.zive.sk/rss/sc-47/default.aspx'))
|
||||
|
||||
preprocess_regexps = [
|
||||
(re.compile(r'<p><p><strong>Pokra.*ie</strong></p>', re.DOTALL|re.IGNORECASE),
|
||||
lambda match: ''),
|
||||
|
||||
]
|
||||
|
||||
|
||||
remove_tags = []
|
||||
|
||||
keep_only_tags = [dict(name='h1'), dict(name='span', attrs={'class':'arlist-data-info-author'}), dict(name='div', attrs={'class':'bbtext font-resizer-area'}),]
|
||||
extra_css = '''
|
||||
h1 {font-size:140%;font-family:georgia,serif; font-weight:bold}
|
||||
h3 {font-size:115%;font-family:georgia,serif; font-weight:bold}
|
||||
'''
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
clarin.com
|
||||
'''
|
||||
@ -21,7 +21,8 @@ class Clarin(BasicNewsRecipe):
|
||||
cover_url = strftime('http://www.clarin.com/diario/%Y/%m/%d/portada.jpg')
|
||||
encoding = 'cp1252'
|
||||
language = 'es'
|
||||
extra_css = ' .Txt{ font-family: sans-serif } .Volan{ font-family: sans-serif; font-size: x-small} .Pie{ font-family: sans-serif; font-size: x-small} .Copete{font-family: sans-serif; font-size: large} .Hora{font-family: sans-serif; font-size: large} .Autor{font-family: sans-serif; font-size: small} '
|
||||
masthead_url = 'http://www.clarin.com/shared/v10/img/Hd/lg_Clarin.gif'
|
||||
extra_css = ' body{font-family: Arial,Helvetica,sans-serif} h2{font-family: Georgia,"Times New Roman",Times,serif; font-size: xx-large} .Volan,.Pie,.Autor{ font-size: x-small} .Copete,.Hora{font-size: large} '
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
|
26
resources/recipes/courrier.recipe
Normal file
@ -0,0 +1,26 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||
|
||||
class CourierPress(BasicNewsRecipe):
|
||||
title = u'Courier Press'
|
||||
language = 'en'
|
||||
__author__ = 'Krittika Goyal'
|
||||
oldest_article = 1 #days
|
||||
max_articles_per_feed = 25
|
||||
|
||||
remove_stylesheets = True
|
||||
remove_tags = [
|
||||
dict(name='iframe'),
|
||||
]
|
||||
|
||||
feeds = [
|
||||
('Courier Press',
|
||||
'http://www.courierpress.com/rss/headlines/news/'),
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
story = soup.find(name='div', attrs={'id':'article_body'})
|
||||
soup = BeautifulSoup('<html><head><title>t</title></head><body></body></html>')
|
||||
body = soup.find(name='body')
|
||||
body.insert(0, story)
|
||||
return soup
|
@ -1,64 +1,63 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
danas.rs
|
||||
'''
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
|
||||
class Danas(BasicNewsRecipe):
|
||||
title = 'Danas'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Vesti'
|
||||
description = 'Dnevne novine sa vestima iz sveta, politike, ekonomije, kulture, sporta, Beograda, Novog Sada i cele Srbije.'
|
||||
publisher = 'Danas d.o.o.'
|
||||
category = 'news, politics, Serbia'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = False
|
||||
use_embedded_content = False
|
||||
encoding = 'utf-8'
|
||||
masthead_url = 'http://www.danas.rs/images/basic/danas.gif'
|
||||
language = 'sr'
|
||||
lang = 'sr-Latn-RS'
|
||||
direction = 'ltr'
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} .article_description,body,.lokacija{font-family: Tahoma,Arial,Helvetica,sans1,sans-serif} .nadNaslov,h1,.preamble{font-family: Georgia,"Times New Roman",Times,serif1,serif} .antrfileText{border-left: 2px solid #999999; color:#666666; margin-left: 0.8em; padding-left: 1.2em; margin-bottom: 0; margin-top: 0} h2,.datum,.lokacija,.autor{font-size: small} .antrfileNaslov{border-left: 2px solid #999999; color:#666666; margin-left: 0.8em; padding-left: 1.2em; font-weight:bold; margin-bottom: 0; margin-top: 0} img{margin-bottom: 0.8em} '
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
, 'pretty_print' : True
|
||||
}
|
||||
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'left'})]
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':['width_1_4','metaClanka','baner']})
|
||||
,dict(name='div', attrs={'id':'comments'})
|
||||
,dict(name=['object','link'])
|
||||
,dict(name=['object','link','iframe'])
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Vesti' , u'http://www.danas.rs/rss/rss.asp' )
|
||||
,(u'Periskop', u'http://www.danas.rs/rss/rss.asp?column_id=4')
|
||||
(u'Politika' , u'http://www.danas.rs/rss/rss.asp?column_id=27')
|
||||
,(u'Hronika' , u'http://www.danas.rs/rss/rss.asp?column_id=2' )
|
||||
,(u'Drustvo' , u'http://www.danas.rs/rss/rss.asp?column_id=24')
|
||||
,(u'Dijalog' , u'http://www.danas.rs/rss/rss.asp?column_id=1' )
|
||||
,(u'Ekonomija', u'http://www.danas.rs/rss/rss.asp?column_id=6' )
|
||||
,(u'Svet' , u'http://www.danas.rs/rss/rss.asp?column_id=25')
|
||||
,(u'Srbija' , u'http://www.danas.rs/rss/rss.asp?column_id=28')
|
||||
,(u'Kultura' , u'http://www.danas.rs/rss/rss.asp?column_id=5' )
|
||||
,(u'Sport' , u'http://www.danas.rs/rss/rss.asp?column_id=13')
|
||||
,(u'Scena' , u'http://www.danas.rs/rss/rss.asp?column_id=42')
|
||||
,(u'Feljton' , u'http://www.danas.rs/rss/rss.asp?column_id=19')
|
||||
,(u'Periskop' , u'http://www.danas.rs/rss/rss.asp?column_id=4' )
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||
soup.head.insert(0,mlang)
|
||||
attribs = [ 'style','font','valign'
|
||||
,'colspan','width','height'
|
||||
,'rowspan','summary','align'
|
||||
,'cellspacing','cellpadding'
|
||||
,'frames','rules','border'
|
||||
]
|
||||
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
|
||||
item.name = 'div'
|
||||
for attrib in attribs:
|
||||
if item.has_key(attrib):
|
||||
del item[attrib]
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
||||
def print_version(self, url):
|
||||
return url + '&action=print'
|
||||
|
||||
|
43
resources/recipes/digitalspy_uk.recipe
Normal file
@ -0,0 +1,43 @@
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
www.digitalspy.co.uk
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class DigitalSpyUK(BasicNewsRecipe):
|
||||
title = 'Digital Spy - UK Edition'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Entertainment news about the biggest TV shows, films and celebrities, updated around the clock.'
|
||||
publisher = 'Digital Spy Limited.'
|
||||
category = 'news, showbiz, big brother, x factor, torchwood, doctor who, tv, media, sky, freeview, cable'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
encoding = 'cp1252'
|
||||
use_embedded_content = False
|
||||
language = 'en_GB'
|
||||
remove_empty_feeds = True
|
||||
extra_css = ' body{font-family: Verdana,Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} .info{font-size: small} '
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
remove_tags = [dict(name=['link'])]
|
||||
remove_attributes = ['height','width']
|
||||
keep_only_tags = [dict(name='div',attrs={'id':'article'})]
|
||||
|
||||
feeds = [
|
||||
(u'News' , u'http://www.digitalspy.co.uk/rss/zones/gb/all.xml' )
|
||||
,(u'Big Brother' , u'http://www.digitalspy.co.uk/rss/zones/gb/bigbrother.xml' )
|
||||
,(u'Entertainment' , u'http://www.digitalspy.co.uk/rss/zones/gb/entertainment.xml')
|
||||
,(u'General' , u'http://www.digitalspy.co.uk/rss/zones/gb/general.xml' )
|
||||
,(u'Media' , u'http://www.digitalspy.co.uk/rss/zones/gb/media.xml' )
|
||||
]
|
||||
|
@ -3,6 +3,7 @@ __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
http://www.dilbert.com
|
||||
'''
|
||||
import re
|
||||
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
@ -28,6 +29,12 @@ class DosisDiarias(BasicNewsRecipe):
|
||||
|
||||
feeds = [(u'Dilbert', u'http://feeds.dilbert.com/DilbertDailyStrip' )]
|
||||
|
||||
preprocess_regexps = [
|
||||
(re.compile('strip\..*\.gif', re.DOTALL|re.IGNORECASE),
|
||||
lambda match: 'strip.zoom.gif')
|
||||
]
|
||||
|
||||
|
||||
def get_article_url(self, article):
|
||||
return article.get('feedburner_origlink', None)
|
||||
|
||||
|
@ -4,7 +4,7 @@ __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
'''
|
||||
doscovermagazine.com
|
||||
discovermagazine.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
@ -19,21 +19,15 @@ class DiscoverMagazine(BasicNewsRecipe):
|
||||
oldest_article = 33
|
||||
max_articles_per_feed = 20
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
encoding = 'utf-8'
|
||||
|
||||
extra_css = '.headline {font-size: x-large;} \n .fact {padding-top: 10pt}'
|
||||
|
||||
remove_tags_before = dict(id='articlePage')
|
||||
remove_tags = [dict(name='div', attrs={'id':['searchModule', 'mainMenu', 'tool-box']}),
|
||||
dict(name='img', attrs={'src':'http://discovermagazine.com/onebyone.gif'})]
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'articlePage'})]
|
||||
|
||||
remove_tags = [dict(attrs={'id':['buttons', 'tool-box', 'teaser', 'already-subscriber', 'teaser-suite', 'related-articles', 'relatedItem', 'box-popular', 'box-blogs', 'box-news', 'footer']}),
|
||||
dict(attrs={'class':'popularNewsBox'}),
|
||||
dict(name=['img', 'style', 'head'])]
|
||||
|
||||
remove_tags_after = dict(id='articlePage')
|
||||
remove_tags_after = [dict(name='div', attrs={'class':'articlebody'})]
|
||||
|
||||
feeds = [
|
||||
(u'Technology', u'http://discovermagazine.com/topics/technology/rss.xml'),
|
||||
|
26
resources/recipes/eksiazki.recipe
Normal file
@ -0,0 +1,26 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v2'
|
||||
__copyright__ = u'2010, Tomasz Dlugosz <tomek3d@gmail.com>'
|
||||
'''
|
||||
eksiazki.org
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class eksiazki(BasicNewsRecipe):
|
||||
|
||||
title = u'eksiazki.org'
|
||||
desciption = u'Twoje centrum wiedzy o epapierze i ebookach'
|
||||
language = 'pl'
|
||||
__author__ = u'Tomasz D\u0142ugosz'
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
|
||||
feeds = [(u'wpisy', u'http://www.eksiazki.org/feed/')]
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'content-body'})]
|
||||
remove_tags = [
|
||||
dict(name='span', attrs={'class':'nr_comm'}),
|
||||
dict(name='div', attrs={'id':'tabsContainer'}),
|
||||
dict(name='div', attrs={'class':'next_previous_links'})]
|
38
resources/recipes/elcomercio.recipe
Normal file
@ -0,0 +1,38 @@
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
elcomercio.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class ElComercio(BasicNewsRecipe):
|
||||
title = 'El Comercio '
|
||||
__author__ = 'Darko Miletic'
|
||||
description = "Gizmodo, the gadget guide. So much in love with shiny new toys, it's unnatural."
|
||||
publisher = 'GRUPO EL COMERCIO C.A.'
|
||||
category = 'news, Ecuador, politics'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
encoding = 'utf-8'
|
||||
use_embedded_content = True
|
||||
language = 'es'
|
||||
masthead_url = 'http://ww1.elcomercio.com/nv_images/headers/EC/logo_new_08.gif'
|
||||
extra_css = ' body{font-family: Arial,Verdana,sans-serif} img{margin-bottom: 1em} '
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
remove_attributes = ['width','height']
|
||||
|
||||
feeds = [(u'Articles', u'http://ww1.elcomercio.com/rss/titulares1.xml')]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
||||
|
40
resources/recipes/gizmodo.recipe
Normal file
@ -0,0 +1,40 @@
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
gizmodo.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Gizmodo(BasicNewsRecipe):
|
||||
title = 'Gizmodo'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = "Gizmodo, the gadget guide. So much in love with shiny new toys, it's unnatural."
|
||||
publisher = 'gizmodo.com'
|
||||
category = 'news, IT, Internet, gadgets'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
encoding = 'utf-8'
|
||||
use_embedded_content = True
|
||||
language = 'en'
|
||||
masthead_url = 'http://cache.gawkerassets.com/assets/gizmodo.com/img/logo.png'
|
||||
extra_css = ' body{font-family: "Lucida Grande",Helvetica,Arial,sans-serif} img{margin-bottom: 1em} '
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
remove_attributes = ['width','height']
|
||||
remove_tags = [dict(name='div',attrs={'class':'feedflare'})]
|
||||
remove_tags_after = dict(name='div',attrs={'class':'feedflare'})
|
||||
|
||||
feeds = [(u'Articles', u'http://feeds.gawker.com/gizmodo/full')]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
||||
|
@ -18,7 +18,8 @@ class HBR(BasicNewsRecipe):
|
||||
remove_tags = [dict(id=['mastheadContainer', 'magazineHeadline',
|
||||
'articleToolbarTopRD', 'pageRightSubColumn', 'pageRightColumn',
|
||||
'todayOnHBRListWidget', 'mostWidget', 'keepUpWithHBR',
|
||||
'mailingListTout', 'partnerCenter', 'pageFooter']),
|
||||
'mailingListTout', 'partnerCenter', 'pageFooter',
|
||||
'articleToolbarTop', 'articleToolbarBottom', 'articleToolbarRD']),
|
||||
dict(name='iframe')]
|
||||
extra_css = '''
|
||||
a {font-family:Georgia,"Times New Roman",Times,serif; font-style:italic; color:#000000; }
|
||||
|
47
resources/recipes/iliteratura_cz.recipe
Normal file
@ -0,0 +1,47 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
import re
|
||||
|
||||
class SmeRecipe(BasicNewsRecipe):
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'Abelturd'
|
||||
language = 'cz'
|
||||
version = 1
|
||||
|
||||
title = u'iLiteratura.cz'
|
||||
publisher = u''
|
||||
category = u'News, Newspaper'
|
||||
description = u'O LITERATU\u0158E V CEL\xc9M SV\u011aT\u011a A DOMA'
|
||||
cover_url = 'http://www.iliteratura.cz/1_vzhled/1/iliteratura.gif'
|
||||
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
use_embedded_content = False
|
||||
remove_empty_feeds = True
|
||||
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
|
||||
|
||||
feeds = []
|
||||
feeds.append((u'\u010cl\xe1nky', u'http://www.iliteratura.cz/rss.asp'))
|
||||
|
||||
|
||||
keep_only_tags = []
|
||||
|
||||
remove_tags = [dict(name='table'),dict(name='h3')]
|
||||
|
||||
|
||||
preprocess_regexps = [
|
||||
(re.compile(r'<h3>Souvisej.*</body>', re.DOTALL|re.IGNORECASE),
|
||||
lambda match: ''),
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
m = re.search('(?<=ID=)[0-9]*', url)
|
||||
|
||||
return u'http://www.iliteratura.cz/clanek.asp?polozkaID=' + str(m.group(0)) + '&c=tisk'
|
||||
|
||||
extra_css = '''
|
||||
h1 {font-size:140%;font-family:georgia,serif; font-weight:bold}
|
||||
h3 {font-size:115%;font-family:georgia,serif; font-weight:bold}
|
||||
'''
|
67
resources/recipes/ilsole24ore.recipe
Normal file
@ -0,0 +1,67 @@
|
||||
#!/usr/bin/env python
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'Lorenzo Vigentini & Edwin van Maastrigt'
|
||||
__copyright__ = '2009, Lorenzo Vigentini <l.vigentini at gmail.com> and Edwin van Maastrigt <evanmaastrigt at gmail.com>'
|
||||
__description__ = 'Financial news daily paper - v1.02 (30, January 2010)'
|
||||
|
||||
'''
|
||||
http://www.ilsole24ore.com/
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class ilsole(BasicNewsRecipe):
|
||||
author = 'Lorenzo Vigentini & Edwin van Maastrigt'
|
||||
description = 'Financial news daily paper'
|
||||
|
||||
cover_url = 'http://www.ilsole24ore.com/img2009/header/t_logosole.gif'
|
||||
title = u'il Sole 24 Ore '
|
||||
publisher = 'italiaNews'
|
||||
category = 'News, finance, economy, politics'
|
||||
|
||||
language = 'it'
|
||||
timefmt = '[%a, %d %b, %Y]'
|
||||
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 50
|
||||
use_embedded_content = False
|
||||
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
|
||||
def get_article_url(self, article):
|
||||
return article.get('id', article.get('guid', None))
|
||||
|
||||
def print_version(self, url):
|
||||
link, sep, params = url.rpartition('?')
|
||||
return link.replace('.shtml', '_PRN.shtml')
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':'txt'})
|
||||
]
|
||||
remove_tags = [dict(name='br')]
|
||||
|
||||
feeds = [
|
||||
(u'Prima pagina', u'http://www.ilsole24ore.com/rss/primapagina.xml'),
|
||||
(u'Norme e tributi', u'http://www.ilsole24ore.com/rss/norme-tributi.xml'),
|
||||
(u'Finanza e mercati', u'http://www.ilsole24ore.com/rss/finanza-mercati.xml'),
|
||||
(u'Economia e lavoro', u'http://www.ilsole24ore.com/rss/economia-lavoro.xml'),
|
||||
(u'Italia', u'http://www.ilsole24ore.com/rss/italia.xml'),
|
||||
(u'Mondo', u'http://www.ilsole24ore.com/rss/mondo.xml'),
|
||||
(u'Tecnologia e business', u'http://www.ilsole24ore.com/rss/tecnologia-business.xml'),
|
||||
(u'Cultura e tempo libero', u'http://www.ilsole24ore.com/rss/tempolibero-cultura.xml'),
|
||||
(u'Sport', u'http://www.ilsole24ore.com/rss/sport.xml'),
|
||||
(u'Professionisti 24', u'http://www.ilsole24ore.com/rss/prof_home.xml')
|
||||
]
|
||||
|
||||
extra_css = '''
|
||||
html, body, table, tr, td, h1, h2, h3, h4, h5, h6, p, a, span, br, img {margin:0;padding:0;border:0;font-size:12px;font-family:Arial;}
|
||||
.linkHighlight {color:#0292c6;}
|
||||
.txt {border-bottom:1px solid #7c7c7c;padding-bottom:20px;text-align:justify;}
|
||||
.txt p {line-height:18px;}
|
||||
.txt span {line-height:22px;}
|
||||
.title h3 {color:#7b7b7b;}
|
||||
.title h4 {color:#08526e;font-size:26px;font-family:"Times New Roman";font-weight:normal;}
|
||||
'''
|
||||
|
@ -10,13 +10,8 @@ class JerusalemPost(BasicNewsRecipe):
|
||||
__author__ = 'Kovid Goyal'
|
||||
max_articles_per_feed = 10
|
||||
no_stylesheets = True
|
||||
remove_tags_before = {'class':'byline'}
|
||||
remove_tags = [
|
||||
{'class':['artAdBlock clearboth', 'tbartop', 'divdot_vrttbox',
|
||||
'slideshow']},
|
||||
dict(id=['artFontButtons', 'artRelatedBlock']),
|
||||
]
|
||||
remove_tags_after = {'id':'artTxtBlock'}
|
||||
remove_tags_before = {'class':'jp-grid-content'}
|
||||
remove_tags_after = {'id':'body_val'}
|
||||
|
||||
feeds = [ ('Front Page', 'http://www.jpost.com/servlet/Satellite?pagename=JPost/Page/RSS&cid=1123495333346'),
|
||||
('Israel News', 'http://www.jpost.com/servlet/Satellite?pagename=JPost/Page/RSS&cid=1178443463156'),
|
||||
@ -25,7 +20,9 @@ class JerusalemPost(BasicNewsRecipe):
|
||||
('Editorials', 'http://www.jpost.com/servlet/Satellite?pagename=JPost/Page/RSS&cid=1123495333211'),
|
||||
]
|
||||
|
||||
def postprocess_html(self, soup, first):
|
||||
for tag in soup.findAll(name=['table', 'tr', 'td']):
|
||||
tag.name = 'div'
|
||||
def preprocess_html(self, soup):
|
||||
for x in soup.findAll(name=['form', 'input']):
|
||||
x.name = 'div'
|
||||
for x in soup.findAll('body', style=True):
|
||||
del x['style']
|
||||
return soup
|
46
resources/recipes/kamerabild.recipe
Normal file
@ -0,0 +1,46 @@
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
www.kamerabild.se
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Kamerabild(BasicNewsRecipe):
|
||||
title = 'Kamera & Bild'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Photo News from Sweden'
|
||||
publisher = 'politiken.dk'
|
||||
category = 'news, photograph, Sweden'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
remove_empty_feeds = True
|
||||
use_embedded_content = False
|
||||
encoding = 'utf8'
|
||||
language = 'sv'
|
||||
|
||||
extra_css = ' body{font-family: Verdana,Arial,Helvetica,sans-serif } .title{font-weight: bold} .pricerunnerAdContainer{border-bottom: 1px solid; border-top: 1px solid; margin-top: 0.5em; margin-bottom: 0.5em} .elementTeaserKicker{font-weight: bold; color: #AE0A10} '
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher': publisher
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
feeds = [(u'Articles', u'http://www.kamerabild.se/cmlink/Nyheter-fran-KAMERA-BILD-1.43315.xml')]
|
||||
keep_only_tags = [dict(name='div',attrs={'class':'container'})]
|
||||
remove_tags_after = dict(name='div',attrs={'class':'editor'})
|
||||
remove_tags = [
|
||||
dict(name=['object','link','iframe'])
|
||||
,dict(name='div',attrs={'class':['pricerunner_head','sideBar','img']})
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return self.adeify_images(soup)
|
||||
|
||||
|
@ -1,7 +1,5 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
lanacion.com.ar
|
||||
'''
|
||||
@ -12,28 +10,34 @@ class Lanacion(BasicNewsRecipe):
|
||||
title = 'La Nacion'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Noticias de Argentina y el resto del mundo'
|
||||
publisher = 'La Nacion'
|
||||
publisher = 'La Nacion S.A.'
|
||||
category = 'news, politics, Argentina'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
use_embedded_content = False
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
language = 'es'
|
||||
encoding = 'cp1252'
|
||||
masthead_url = 'http://www.lanacion.com.ar/imgs/layout/logos/ln341x47.gif'
|
||||
extra_css = ' h1{font-family: Georgia,serif} body{font-family: Arial,sans-serif} img{margin-top: 0.5em; margin-bottom: 0.2em} .notaEpigrafe{font-size: x-small} '
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher': publisher
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'nota floatFix'})]
|
||||
remove_tags = [
|
||||
dict(name='div' , attrs={'class':'notaComentario floatFix noprint' })
|
||||
,dict(name='ul' , attrs={'class':'cajaHerramientas cajaTop noprint'})
|
||||
,dict(name='div' , attrs={'class':'cajaHerramientas noprint' })
|
||||
,dict(attrs={'class':['titulosMultimedia','derecha','techo color']})
|
||||
,dict(name=['iframe','embed','object'])
|
||||
]
|
||||
remove_attributes = ['height','width']
|
||||
|
||||
feeds = [
|
||||
(u'Ultimas noticias' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?origen=2' )
|
||||
@ -51,10 +55,4 @@ class Lanacion(BasicNewsRecipe):
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
mtag = '<meta http-equiv="Content-Language" content="es-AR"/>'
|
||||
soup.head.insert(0,mtag)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
||||
language = 'es'
|
||||
return self.adeify_images(soup)
|
||||
|
89
resources/recipes/lescienze.recipe
Normal file
@ -0,0 +1,89 @@
|
||||
#!/usr/bin/env python
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'Lorenzo Vigentini'
|
||||
__copyright__ = '2009, Lorenzo Vigentini <l.vigentini at gmail.com>'
|
||||
__version__ = 'v1.01'
|
||||
__date__ = '10, January 2010'
|
||||
__description__ = 'Monthly Italian edition of Scientific American'
|
||||
|
||||
'''
|
||||
http://lescienze.espresso.repubblica.it/
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class leScienze(BasicNewsRecipe):
|
||||
author = 'Lorenzo Vigentini'
|
||||
description = 'Monthly Italian edition of Scientific American'
|
||||
|
||||
cover_url = 'http://lescienze.espresso.repubblica.it/images/logo_lescienze.gif'
|
||||
title = 'le Scienze'
|
||||
publisher = 'Gruppo editoriale lEspresso'
|
||||
category = 'Science, general interest'
|
||||
|
||||
language = 'it'
|
||||
encoding = 'cp1252'
|
||||
timefmt = '[%a, %d %b, %Y]'
|
||||
|
||||
oldest_article = 31
|
||||
max_articles_per_feed = 20
|
||||
use_embedded_content = False
|
||||
recursion = 10
|
||||
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':'bigbox'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='span',attrs={'class':'linkindice'}),
|
||||
dict(name='div',attrs={'class':'box-commenti'}),
|
||||
dict(name='div',attrs={'id':['rssdiv','blocco']})
|
||||
]
|
||||
remove_tags_after = [dict(name='div',attrs={'class':'box-commenti'})]
|
||||
|
||||
feeds = [
|
||||
(u'Antropologia', u'http://data.kataweb.it/rss/scienze/antropologia'),
|
||||
(u'Archeologia', u'http://data.kataweb.it/rss/scienze/archeologia'),
|
||||
(u'Arte e Musica', u'http://data.kataweb.it/rss/scienze/arte_e_musica'),
|
||||
(u'Astrofisica', u'http://data.kataweb.it/rss/scienze/astrofisica'),
|
||||
(u'Astronautica', u'http://data.kataweb.it/rss/scienze/astronautica'),
|
||||
(u'Astronomia', u'http://data.kataweb.it/rss/scienze/astronomia_e_cosmologia'),
|
||||
(u'Biologia', u'http://data.kataweb.it/rss/scienze/biologia'),
|
||||
(u'Chimica', u'http://data.kataweb.it/rss/scienze/chimica'),
|
||||
(u'Ecologia & ambiente', u'http://data.kataweb.it/rss/scienze/ecologia_e_ambiente'),
|
||||
(u'Economia', u'http://data.kataweb.it/rss/scienze/Economia'),
|
||||
(u'Fisica', u'http://data.kataweb.it/rss/scienze/Fisica'),
|
||||
(u'Informatica', u'http://data.kataweb.it/rss/scienze/informatica_e_telecomunicazioni'),
|
||||
(u'Ingegneria', u'http://data.kataweb.it/rss/scienze/ingegneria_e_tecnologia'),
|
||||
(u'Matematica', u'http://data.kataweb.it/rss/scienze/Matematica'),
|
||||
(u'Medicina', u'http://data.kataweb.it/rss/scienze/Medicina'),
|
||||
(u'Paleontologia', u'http://data.kataweb.it/rss/scienze/Paleontologia'),
|
||||
(u'Recensioni', u'http://data.kataweb.it/rss/scienze/Recensioni'),
|
||||
(u'Psicologia', u'http://data.kataweb.it/rss/scienze/psicologie_e_scienze_cognitive'),
|
||||
(u'Scienze della Terra', u'http://data.kataweb.it/rss/scienze/scienze_della_terra'),
|
||||
(u'Scienze dello spazio', u'http://data.kataweb.it/rss/scienze/scienze_dello_spazio'),
|
||||
(u'Scienze naturali', u'http://data.kataweb.it/rss/scienze/scienze_naturali'),
|
||||
(u'Scienze sociali', u'http://data.kataweb.it/rss/scienze/scienze_sociali'),
|
||||
(u'Statistica', u'http://data.kataweb.it/rss/scienze/statistica'),
|
||||
(u'Storia della scienza', u'http://data.kataweb.it/rss/scienze/storia_della_scienza')
|
||||
]
|
||||
|
||||
extra_css = '''
|
||||
h1 {font-family:"Trebuchet MS",Arial,Helvetica,sans-serif; font-size:20px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:18px;}
|
||||
h2 {font-family:"Trebuchet MS",Arial,Helvetica,sans-serif; font-size:18px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:16px; }
|
||||
h3 {color:#333333;font-family:"Trebuchet MS",Arial,Helvetica,sans-serif; font-size:16px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px;}
|
||||
h4 {color:#333333; font-family:"Trebuchet MS",Arial,Helvetica,sans-serif;font-size:16px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px; }
|
||||
h5 {color:#333333; font-family:"Trebuchet MS",Arial,Helvetica,sans-serif; font-size:12px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px; text-transform:uppercase;}
|
||||
.occhiello {color:#666666;display:block;font-family:"Trebuchet MS",Arial,Helvetica,sans-serif;font-size:13px;font-size-adjust:none;font-stretch:normal;font-style:normal;font-variant:normal;font-weight:bold;line-height:15px;}
|
||||
.titolo {font-weight:bold;}
|
||||
.label {font-family:"Trebuchet MS",Arial,Helvetica,sans-serif;font-size:12px;font-size-adjust:none;font-stretch:normal;font-style:normal;font-variant:normal;font-weight:bold;height:15px;line-height:15px;text-transform:uppercase;}
|
||||
.firma {color:#333333;font-family:"Trebuchet MS",Arial,Helvetica,sans-serif;font-size:12px; font-size-adjust:none; font-stretch:normal; font-style:italic; font-variant:normal; font-weight:bold; line-height:15px; text-decoration:none;}
|
||||
.testo {font-family:"Trebuchet MS",Arial,Helvetica,sans-serif; font-size:10px;}
|
||||
'''
|
||||
|
||||
|
||||
|
@ -4,21 +4,26 @@ class Metro_Montreal(BasicNewsRecipe):
|
||||
|
||||
title = u'M\xe9tro Montr\xe9al'
|
||||
__author__ = 'Jerry Clapperton'
|
||||
description = u'Le quotidien le plus branch\xe9 sur le monde'
|
||||
language = 'fr'
|
||||
description = 'Le quotidien le plus branch\xe9 sur le monde'
|
||||
language = 'fr'
|
||||
|
||||
oldest_article = 7
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 20
|
||||
use_embedded_content = False
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
encoding = 'utf-8'
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
encoding = 'utf-8'
|
||||
extra_css = '.headline {font-size: x-large;} \n .fact {padding-top: 10pt}'
|
||||
|
||||
extra_css = '.headline {font-size: x-large;} \n .fact {padding-top: 10pt}'
|
||||
remove_tags = [dict(attrs={'id':'buttons'})]
|
||||
|
||||
remove_tags = [dict(attrs={'id':'buttons'}), dict(name=['img', 'style'])]
|
||||
|
||||
feeds = [(u"L'info", u'http://journalmetro.com/linfo/rss'), (u'Monde', u'http://journalmetro.com/monde/rss'), (u'Culture', u'http://journalmetro.com/culture/rss'), (u'Sports', u'http://journalmetro.com/sports/rss'), (u'Paroles', u'http://journalmetro.com/paroles/rss')]
|
||||
feeds = [
|
||||
(u"L'info", u'http://journalmetro.com/linfo/rss'),
|
||||
(u'Monde', u'http://journalmetro.com/monde/rss'),
|
||||
(u'Culture', u'http://journalmetro.com/culture/rss'),
|
||||
(u'Sports', u'http://journalmetro.com/sports/rss'),
|
||||
(u'Paroles', u'http://journalmetro.com/paroles/rss')
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('article', 'ArticlePrint') + '?language=fr'
|
||||
|
35
resources/recipes/newsstraitstimes.recipe
Normal file
@ -0,0 +1,35 @@
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
www.nst.com.my
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Newstraitstimes(BasicNewsRecipe):
|
||||
title = 'New Straits Times from Malaysia'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Learning Curve, Sunday People, New Straits Times from Malaysia'
|
||||
publisher = 'nst.com.my'
|
||||
category = 'news, politics, Malaysia'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
encoding = 'cp1252'
|
||||
use_embedded_content = False
|
||||
language = 'en'
|
||||
masthead_url = 'http://www.nst.com.my/Current_News/NST/Images/new-nstonline.jpg'
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
remove_tags = [dict(name=['link','table'])]
|
||||
keep_only_tags = dict(name='div',attrs={'id':'haidah'})
|
||||
|
||||
feeds = [(u'Articles', u'http://www.nst.com.my/rss/allSec')]
|
||||
|
@ -74,7 +74,6 @@ class Nin(BasicNewsRecipe):
|
||||
feedpage = self.index_to_soup(feedlink)
|
||||
self.report_progress(0, _('Fetching feed')+' %s...'%(section))
|
||||
inarts = []
|
||||
count2 = 0
|
||||
for art in feedpage.findAll('span',attrs={'class':'artTitle'}):
|
||||
alink = art.parent
|
||||
url = self.PREFIX + alink['href']
|
||||
|
@ -5,7 +5,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
'''
|
||||
nytimes.com
|
||||
'''
|
||||
import re
|
||||
import re, time
|
||||
from calibre import entity_to_unicode
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, NavigableString, Comment
|
||||
|
@ -37,7 +37,7 @@ class NYTimes(BasicNewsRecipe):
|
||||
dict(name=['script', 'noscript', 'style'])]
|
||||
encoding = decode
|
||||
no_stylesheets = True
|
||||
extra_css = 'h1 {font: sans-serif large;}\n.byline {font:monospace;}'
|
||||
extra_css = 'h1 {font-face:sans-serif; font-size:2em; font-weight:bold;}\n.byline {font:monospace;}\n.bold {font-weight:bold;}'
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
|
56
resources/recipes/nytimesbook.recipe
Normal file
@ -0,0 +1,56 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||
|
||||
class NewYorkTimesBookReview(BasicNewsRecipe):
|
||||
title = u'New York Times Book Review'
|
||||
language = 'en'
|
||||
__author__ = 'Krittika Goyal'
|
||||
oldest_article = 8 #days
|
||||
max_articles_per_feed = 1000
|
||||
recursions = 2
|
||||
#encoding = 'latin1'
|
||||
|
||||
remove_stylesheets = True
|
||||
#remove_tags_before = dict(name='h1', attrs={'class':'heading'})
|
||||
remove_tags_after = dict(name='div', attrs={'id':'authorId'})
|
||||
remove_tags = [
|
||||
dict(name='iframe'),
|
||||
dict(name=['div', 'a'], attrs={'class':['enlargeThis', 'jumpLink']}),
|
||||
dict(name='div', attrs={'id':['sidebarArticles', 'toolsRight']}),
|
||||
#dict(name='ul', attrs={'class':'article-tools'}),
|
||||
#dict(name='ul', attrs={'class':'articleTools'}),
|
||||
]
|
||||
match_regexps = [
|
||||
r'http://www.nytimes.com/.+pagewanted=[2-9]+'
|
||||
]
|
||||
|
||||
feeds = [
|
||||
('New York Times Sunday Book Review',
|
||||
'http://feeds.nytimes.com/nyt/rss/SundayBookReview'),
|
||||
]
|
||||
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
story = soup.find(name='div', attrs={'id':'article'})
|
||||
#td = heading.findParent(name='td')
|
||||
#td.extract()
|
||||
soup = BeautifulSoup('<html><head><title>t</title></head><body></body></html>')
|
||||
body = soup.find(name='body')
|
||||
body.insert(0, story)
|
||||
#for x in soup.findAll(name='p', text=lambda x:x and '-->' in x):
|
||||
#p = x.findParent('p')
|
||||
#if p is not None:
|
||||
#p.extract()
|
||||
return soup
|
||||
|
||||
def postprocess_html(self, soup, first):
|
||||
for div in soup.findAll(id='pageLinks'):
|
||||
div.extract()
|
||||
if not first:
|
||||
h1 = soup.find('h1')
|
||||
if h1 is not None:
|
||||
h1.extract()
|
||||
t = soup.find(attrs={'class':'timestamp'})
|
||||
if t is not None:
|
||||
t.extract()
|
||||
return soup
|
@ -1,13 +1,12 @@
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
pagina12.com.ar
|
||||
'''
|
||||
|
||||
import time
|
||||
from calibre import strftime
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||
|
||||
class Pagina12(BasicNewsRecipe):
|
||||
title = 'Pagina - 12'
|
||||
@ -16,13 +15,14 @@ class Pagina12(BasicNewsRecipe):
|
||||
publisher = 'La Pagina S.A.'
|
||||
category = 'news, politics, Argentina'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
max_articles_per_feed = 200
|
||||
no_stylesheets = True
|
||||
encoding = 'cp1252'
|
||||
use_embedded_content = False
|
||||
language = 'es'
|
||||
remove_empty_feeds = True
|
||||
extra_css = ' body{font-family: sans-serif} '
|
||||
masthead_url = 'http://www.pagina12.com.ar/commons/imgs/logo-home.gif'
|
||||
extra_css = ' body{font-family: Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} #autor{font-weight: bold} #fecha,#epigrafe{font-size: 0.9em; margin: 5px} #imagen{border: 1px solid black; margin: 0 0 1.25em 1.25em; width: 232px } '
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
@ -45,14 +45,24 @@ class Pagina12(BasicNewsRecipe):
|
||||
,(u'NO' , u'http://www.pagina12.com.ar/diario/rss/no.xml' )
|
||||
,(u'Las/12' , u'http://www.pagina12.com.ar/diario/rss/las12.xml' )
|
||||
,(u'Soy' , u'http://www.pagina12.com.ar/diario/rss/soy.xml' )
|
||||
,(u'M2' , u'http://www.pagina12.com.ar/diario/rss/futuro.xml' )
|
||||
,(u'Futuro' , u'http://www.pagina12.com.ar/diario/rss/futuro.xml' )
|
||||
,(u'M2' , u'http://www.pagina12.com.ar/diario/rss/m2.xml' )
|
||||
,(u'Rosario/12' , u'http://www.pagina12.com.ar/diario/rss/rosario.xml' )
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('http://www.pagina12.com.ar/','http://www.pagina12.com.ar/imprimir/')
|
||||
|
||||
def get_cover_url(self):
|
||||
imgnames = ['tapan.jpg','tapagn.jpg','tapan_gr.jpg','tapagn.jpg','tapagn.jpg','tapan.jpg','tapagn.jpg']
|
||||
weekday = time.localtime().tm_wday
|
||||
return strftime('http://www.pagina12.com.ar/fotos/%Y%m%d/diario/') + imgnames[weekday]
|
||||
rawc = self.index_to_soup('http://www.pagina12.com.ar/diario/principal/diario/index.html',True)
|
||||
rawc2 = re.sub(r'PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN','PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"',rawc)
|
||||
soup = BeautifulSoup(rawc2,fromEncoding=self.encoding,smartQuotesTo=None)
|
||||
for image in soup.findAll('img',alt=True):
|
||||
if image['alt'].startswith('Tapa de la fecha'):
|
||||
return image['src']
|
||||
return None
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
@ -51,6 +51,7 @@ class PeopleMag(BasicNewsRecipe):
|
||||
dict(name='div', attrs={'class':'sharelinkcont'}),
|
||||
dict(name='div', attrs={'class':'categories'}),
|
||||
dict(name='ul', attrs={'class':'categories'}),
|
||||
dict(name='div', attrs={'class':'related_content'}),
|
||||
dict(name='div', attrs={'id':'promo'}),
|
||||
dict(name='div', attrs={'class':'linksWrapper'}),
|
||||
dict(name='p', attrs={'class':'tag tvnews'}),
|
||||
|
45
resources/recipes/radikal_tr.recipe
Normal file
@ -0,0 +1,45 @@
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
radikal.com.tr
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Radikal_tr(BasicNewsRecipe):
|
||||
title = 'Radikal - Turkey'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'News from Turkey'
|
||||
publisher = 'radikal'
|
||||
category = 'news, politics, Turkey'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 150
|
||||
no_stylesheets = True
|
||||
encoding = 'cp1254'
|
||||
use_embedded_content = False
|
||||
masthead_url = 'http://www.radikal.com.tr/D/i/1/V2/radikal_logo.jpg'
|
||||
language = 'tr'
|
||||
extra_css = ' @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} .article_description,body{font-family: Arial,Verdana,Helvetica,sans1,sans-serif } '
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
remove_tags = [dict(name=['embed','iframe','object','link','base'])]
|
||||
remove_tags_before = dict(name='h1')
|
||||
remove_tags_after = dict(attrs={'id':'haberDetayYazi'})
|
||||
|
||||
|
||||
feeds = [(u'Yazarlar', u'http://www.radikal.com.tr/d/rss/RssYazarlar.xml')]
|
||||
|
||||
def print_version(self, url):
|
||||
articleid = url.rpartition('ArticleID=')[2]
|
||||
return 'http://www.radikal.com.tr/Default.aspx?aType=HaberYazdir&ArticleID=' + articleid
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
||||
|
64
resources/recipes/readitlater.recipe
Normal file
@ -0,0 +1,64 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
readitlaterlist.com
|
||||
'''
|
||||
|
||||
from calibre import strftime
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Readitlater(BasicNewsRecipe):
|
||||
title = 'Read It Later'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = '''Personalized news feeds. Go to readitlaterlist.com to
|
||||
setup up your news. Fill in your account
|
||||
username, and optionally you can add password.'''
|
||||
publisher = 'readitlater.com'
|
||||
category = 'news, custom'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
needs_subscription = True
|
||||
INDEX = u'http://readitlaterlist.com'
|
||||
LOGIN = INDEX + u'/l'
|
||||
|
||||
|
||||
feeds = [(u'Unread articles' , INDEX + u'/unread')]
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
if self.username is not None:
|
||||
br.open(self.LOGIN)
|
||||
br.select_form(nr=0)
|
||||
br['feed_id'] = self.username
|
||||
if self.password is not None:
|
||||
br['password'] = self.password
|
||||
br.submit()
|
||||
return br
|
||||
|
||||
def parse_index(self):
|
||||
totalfeeds = []
|
||||
lfeeds = self.get_feeds()
|
||||
for feedobj in lfeeds:
|
||||
feedtitle, feedurl = feedobj
|
||||
self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
|
||||
articles = []
|
||||
soup = self.index_to_soup(feedurl)
|
||||
ritem = soup.find('ul',attrs={'id':'list'})
|
||||
for item in ritem.findAll('li'):
|
||||
description = ''
|
||||
atag = item.find('a',attrs={'class':'text'})
|
||||
if atag and atag.has_key('href'):
|
||||
url = self.INDEX + atag['href']
|
||||
title = self.tag_to_string(item.div)
|
||||
date = strftime(self.timefmt)
|
||||
articles.append({
|
||||
'title' :title
|
||||
,'date' :date
|
||||
,'url' :url
|
||||
,'description':description
|
||||
})
|
||||
totalfeeds.append((feedtitle, articles))
|
||||
return totalfeeds
|
||||
|
107
resources/recipes/sueddeutschezeitung.recipe
Normal file
@ -0,0 +1,107 @@
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
www.sueddeutsche.de/sz/
|
||||
'''
|
||||
|
||||
import urllib
|
||||
from calibre import strftime
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class SueddeutcheZeitung(BasicNewsRecipe):
|
||||
title = 'Sueddeutche Zeitung'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'News from Germany. Access to paid content.'
|
||||
publisher = 'Sueddeutche Zeitung'
|
||||
category = 'news, politics, Germany'
|
||||
no_stylesheets = True
|
||||
oldest_article = 2
|
||||
encoding = 'cp1252'
|
||||
needs_subscription = True
|
||||
remove_empty_feeds = True
|
||||
PREFIX = 'http://www.sueddeutsche.de'
|
||||
INDEX = PREFIX + strftime('/sz/%Y-%m-%d/')
|
||||
LOGIN = PREFIX + '/app/lbox/index.html'
|
||||
use_embedded_content = False
|
||||
masthead_url = 'http://pix.sueddeutsche.de/img/g_.gif'
|
||||
language = 'de'
|
||||
extra_css = ' body{font-family: Arial,Helvetica,sans-serif} '
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
, 'linearize_tables' : True
|
||||
}
|
||||
|
||||
remove_attributes = ['height','width']
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
br.open(self.INDEX)
|
||||
if self.username is not None and self.password is not None:
|
||||
data = urllib.urlencode({ 'login_name':self.username
|
||||
,'login_passwort':self.password
|
||||
,'lboxaction':'doLogin'
|
||||
,'passtxt':'Passwort'
|
||||
,'referer':self.INDEX
|
||||
,'x':'22'
|
||||
,'y':'7'
|
||||
})
|
||||
br.open(self.LOGIN,data)
|
||||
return br
|
||||
|
||||
remove_tags =[
|
||||
dict(attrs={'class':'hidePrint'})
|
||||
,dict(name=['link','object','embed','base','iframe'])
|
||||
]
|
||||
remove_tags_before = dict(name='h2')
|
||||
remove_tags_after = dict(attrs={'class':'author'})
|
||||
|
||||
feeds = [
|
||||
(u'Politik' , INDEX + 'politik/' )
|
||||
,(u'Seite drei' , INDEX + 'seitedrei/' )
|
||||
,(u'Meinungsseite', INDEX + 'meinungsseite/')
|
||||
,(u'Wissen' , INDEX + 'wissen/' )
|
||||
,(u'Panorama' , INDEX + 'panorama/' )
|
||||
,(u'Feuilleton' , INDEX + 'feuilleton/' )
|
||||
,(u'Medien' , INDEX + 'medien/' )
|
||||
,(u'Wirtschaft' , INDEX + 'wirtschaft/' )
|
||||
,(u'Sport' , INDEX + 'sport/' )
|
||||
,(u'Bayern' , INDEX + 'bayern/' )
|
||||
,(u'Muenchen' , INDEX + 'muenchen/' )
|
||||
,(u'jetzt.de' , INDEX + 'jetzt.de/' )
|
||||
]
|
||||
|
||||
def parse_index(self):
|
||||
totalfeeds = []
|
||||
lfeeds = self.get_feeds()
|
||||
for feedobj in lfeeds:
|
||||
feedtitle, feedurl = feedobj
|
||||
self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
|
||||
articles = []
|
||||
soup = self.index_to_soup(feedurl)
|
||||
tbl = soup.find(attrs={'class':'szprintd'})
|
||||
for item in tbl.findAll(name='td',attrs={'class':'topthema'}):
|
||||
atag = item.find(attrs={'class':'Titel'}).a
|
||||
ptag = item.find('p')
|
||||
stag = ptag.find('script')
|
||||
if stag:
|
||||
stag.extract()
|
||||
url = self.PREFIX + atag['href']
|
||||
title = self.tag_to_string(atag)
|
||||
description = self.tag_to_string(ptag)
|
||||
articles.append({
|
||||
'title' :title
|
||||
,'date' :strftime(self.timefmt)
|
||||
,'url' :url
|
||||
,'description':description
|
||||
})
|
||||
totalfeeds.append((feedtitle, articles))
|
||||
return totalfeeds
|
||||
|
||||
def print_version(self, url):
|
||||
return url + 'print.html'
|
||||
|
@ -9,7 +9,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class TelegraphUK(BasicNewsRecipe):
|
||||
title = u'Telegraph.co.uk'
|
||||
__author__ = 'Darko Miletic'
|
||||
__author__ = 'Darko Miletic and Sujata Raman'
|
||||
description = 'News from United Kingdom'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
@ -18,23 +18,26 @@ class TelegraphUK(BasicNewsRecipe):
|
||||
|
||||
use_embedded_content = False
|
||||
|
||||
extra_css = '''
|
||||
h1{font-family :Arial,Helvetica,sans-serif; font-size:large; }
|
||||
h2{font-family :Arial,Helvetica,sans-serif; font-size:x-small; color:#444444}
|
||||
.story{font-family :Arial,Helvetica,sans-serif; font-size: x-small;}
|
||||
.byline{color:#666666; font-family :Arial,Helvetica,sans-serif; font-size: xx-small;}
|
||||
a{color:#234B7B; }
|
||||
.imageExtras{color:#666666; font-family :Arial,Helvetica,sans-serif; font-size: xx-small;}
|
||||
'''
|
||||
extra_css = '''
|
||||
h1{font-family :Arial,Helvetica,sans-serif; font-size:large; }
|
||||
h2{font-family :Arial,Helvetica,sans-serif; font-size:x-small; color:#444444;}
|
||||
.story{font-family :Arial,Helvetica,sans-serif; font-size: x-small;}
|
||||
.byline{color:#666666; font-family :Arial,Helvetica,sans-serif; font-size: xx-small;}
|
||||
a{color:#234B7B; }
|
||||
.imageExtras{color:#666666; font-family :Arial,Helvetica,sans-serif; font-size: xx-small;}
|
||||
'''
|
||||
|
||||
keep_only_tags = [
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':'storyHead'})
|
||||
,dict(name='div', attrs={'class':'story' })
|
||||
#,dict(name='div', attrs={'class':['slideshowHD gutterUnder',"twoThirds gutter","caption" ] })
|
||||
]
|
||||
remove_tags = [dict(name='div', attrs={'class':['related_links_inline',"imgindex","next","prev","gutterUnder"]})]
|
||||
]
|
||||
remove_tags = [dict(name='div', attrs={'class':['related_links_inline',"imgindex","next","prev","gutterUnder",'ssImgHide','imageExtras','ssImg hide']})
|
||||
#,dict(name='div', attrs={'class':['toolshideoneQuarter']})
|
||||
,dict(name='span', attrs={'class':['num','placeComment']})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
feeds = [
|
||||
(u'UK News' , u'http://www.telegraph.co.uk/news/uknews/rss' )
|
||||
,(u'World News' , u'http://www.telegraph.co.uk/news/worldnews/rss' )
|
||||
,(u'Politics' , u'http://www.telegraph.co.uk/news/newstopics/politics/rss' )
|
||||
@ -45,7 +48,7 @@ class TelegraphUK(BasicNewsRecipe):
|
||||
,(u'Earth News' , u'http://www.telegraph.co.uk/earth/earthnews/rss' )
|
||||
,(u'Comment' , u'http://www.telegraph.co.uk/comment/rss' )
|
||||
,(u'How about that?', u'http://www.telegraph.co.uk/news/newstopics/howaboutthat/rss' )
|
||||
]
|
||||
]
|
||||
|
||||
def get_article_url(self, article):
|
||||
|
||||
@ -57,3 +60,15 @@ class TelegraphUK(BasicNewsRecipe):
|
||||
return url
|
||||
|
||||
|
||||
def postprocess_html(self,soup,first):
|
||||
|
||||
for bylineTag in soup.findAll(name='div', attrs={'class':'byline'}):
|
||||
for pTag in bylineTag.findAll(name='p'):
|
||||
if getattr(pTag.contents[0],"Comments",True):
|
||||
pTag.extract()
|
||||
return soup
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -1,22 +0,0 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class The_Gazette(BasicNewsRecipe):
|
||||
|
||||
cover_url = 'file:///D:/Documents/Pictures/Covers/The_Gazette.jpg'
|
||||
title = u'The Gazette'
|
||||
__author__ = 'Jerry Clapperton'
|
||||
description = 'Montreal news in English'
|
||||
language = 'en_CA'
|
||||
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 20
|
||||
use_embedded_content = False
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
encoding = 'utf-8'
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':['storyheader','page1']})]
|
||||
|
||||
extra_css = '.headline {font-size: x-large;} \n .fact {padding-top: 10pt}'
|
||||
|
||||
feeds = [(u'News', u'http://feeds.canada.com/canwest/F297'), (u'Opinion', u'http://feeds.canada.com/canwest/F7383'), (u'Arts', u'http://feeds.canada.com/canwest/F7366'), (u'Life', u'http://rss.canada.com/get/?F6934'), (u'Business', u'http://feeds.canada.com/canwest/F6939'), (u'Travel', u'http://rss.canada.com/get/?F6938'), (u'Health', u'http://feeds.canada.com/canwest/F7397'), (u'Technology', u'http://feeds.canada.com/canwest/F7411')]
|
@ -9,6 +9,7 @@ class The_New_Republic(BasicNewsRecipe):
|
||||
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':['print-logo', 'print-site_name', 'img-left', 'print-source_url']}),
|
||||
@ -21,14 +22,15 @@ class The_New_Republic(BasicNewsRecipe):
|
||||
('Economy', 'http://www.tnr.com/rss/articles/Economy'),
|
||||
('Environment and Energy', 'http://www.tnr.com/rss/articles/Environment-%2526-Energy'),
|
||||
('Health Care', 'http://www.tnr.com/rss/articles/Health-Care'),
|
||||
('Urban Policy', 'http://www.tnr.com/rss/articles/Urban-Policy'),
|
||||
('Metro Policy', 'http://www.tnr.com/rss/articles/Metro-Policy'),
|
||||
('World', 'http://www.tnr.com/rss/articles/World'),
|
||||
('Film', 'http://www.tnr.com/rss/articles/Film'),
|
||||
('Books', 'http://www.tnr.com/rss/articles/books'),
|
||||
('The Book', 'http://www.tnr.com/rss/book'),
|
||||
('Jonathan Chait', 'http://www.tnr.com/rss/blogs/Jonathan-Chait'),
|
||||
('The Plank', 'http://www.tnr.com/rss/blogs/The-Plank'),
|
||||
('The Treatment', 'http://www.tnr.com/rss/blogs/The-Treatment'),
|
||||
('The Spine', 'http://www.tnr.com/rss/blogs/The-Spine'),
|
||||
('The Stash', 'http://www.tnr.com/rss/blogs/The-Stash'),
|
||||
('The Vine', 'http://www.tnr.com/rss/blogs/The-Vine'),
|
||||
('The Avenue', 'http://www.tnr.com/rss/blogs/The-Avenue'),
|
||||
('William Galston', 'http://www.tnr.com/rss/blogs/William-Galston'),
|
||||
@ -40,3 +42,4 @@ class The_New_Republic(BasicNewsRecipe):
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('http://www.tnr.com/', 'http://www.tnr.com/print/')
|
||||
|
||||
|
37
resources/recipes/theluminouslandscape.recipe
Normal file
@ -0,0 +1,37 @@
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
luminous-landscape.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class theluminouslandscape(BasicNewsRecipe):
|
||||
title = 'The Luminous Landscape'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'A photography news and information website in the form of a weblog with multiple authors who write on a variety of photography and art-photography related issues.'
|
||||
publisher = 'The Luminous Landscape '
|
||||
category = 'news, blog, photograph, international'
|
||||
oldest_article = 15
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
remove_empty_feeds = True
|
||||
use_embedded_content = True
|
||||
encoding = 'cp1252'
|
||||
language = 'en'
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher': publisher
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
feeds = [(u"What's new", u'http://www.luminous-landscape.com/whatsnew/rssfeed.php')]
|
||||
remove_tags = [dict(name=['object','link','iframe'])]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
||||
|
||||
|
41
resources/recipes/theonlinephotographer.recipe
Normal file
@ -0,0 +1,41 @@
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
theonlinephotographer.typepad.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class theonlinephotographer(BasicNewsRecipe):
|
||||
title = 'The Online Photographer'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'A photography news and information website in the form of a weblog with multiple authors who write on a variety of photography and art-photography related issues.'
|
||||
publisher = 'The Online Photographer'
|
||||
category = 'news, blog, photograph, international'
|
||||
oldest_article = 15
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
remove_empty_feeds = True
|
||||
use_embedded_content = False
|
||||
encoding = 'utf8'
|
||||
language = 'en'
|
||||
|
||||
extra_css = ' body{font-family: Georgia,"Times New Roman",serif } '
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher': publisher
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
feeds = [(u'Articles', u'http://feeds.feedburner.com/typepad/ZSjz')]
|
||||
remove_tags_before = dict(name='h3',attrs={'class':'entry-header'})
|
||||
remove_tags_after = dict(name='div',attrs={'class':'entry-footer'})
|
||||
remove_tags = [dict(name=['object','link','iframe'])]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
||||
|
||||
|
53
resources/recipes/tidbits.recipe
Normal file
@ -0,0 +1,53 @@
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
db.tidbits.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class TidBITS(BasicNewsRecipe):
|
||||
title = 'TidBITS: Mac News for the Rest of Us'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Insightful news, reviews, and analysis of the Macintosh and Internet worlds'
|
||||
publisher = 'TidBITS Publishing Inc.'
|
||||
category = 'news, Apple, Macintosh, IT, Internet'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
encoding = 'utf-8'
|
||||
use_embedded_content = True
|
||||
language = 'en'
|
||||
remove_empty_feeds = True
|
||||
masthead_url = 'http://db.tidbits.com/images/tblogo9.gif'
|
||||
extra_css = ' body{font-family: Georgia,"Times New Roman",Times,serif} '
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
remove_attributes = ['width','height']
|
||||
remove_tags = [dict(name='small')]
|
||||
remove_tags_after = dict(name='small')
|
||||
|
||||
feeds = [
|
||||
(u'Business Apps' , u'http://db.tidbits.com/feeds/business.rss' )
|
||||
,(u'Entertainment' , u'http://db.tidbits.com/feeds/entertainment.rss')
|
||||
,(u'External Links' , u'http://db.tidbits.com/feeds/links.rss' )
|
||||
,(u'Home Mac' , u'http://db.tidbits.com/feeds/home.rss' )
|
||||
,(u'Inside TidBITS' , u'http://db.tidbits.com/feeds/inside.rss' )
|
||||
,(u'iPod & iPhone' , u'http://db.tidbits.com/feeds/ipod-iphone.rss' )
|
||||
,(u'Just for Fun' , u'http://db.tidbits.com/feeds/fun.rss' )
|
||||
,(u'Macs & Mac OS X' , u'http://db.tidbits.com/feeds/macs.rss' )
|
||||
,(u'Media Creation' , u'http://db.tidbits.com/feeds/creative.rss' )
|
||||
,(u'Networking & Communications', u'http://db.tidbits.com/feeds/net.rss' )
|
||||
,(u'Opinion & Editorial' , u'http://db.tidbits.com/feeds/opinion.rss' )
|
||||
,(u'Support & Problem Solving' , u'http://db.tidbits.com/feeds/support.rss' )
|
||||
,(u'Safe Computing' , u'http://db.tidbits.com/feeds/security.rss' )
|
||||
,(u'Tech News' , u'http://db.tidbits.com/feeds/tech.rss' )
|
||||
,(u'Software Watchlist' , u'http://db.tidbits.com/feeds/watchlist.rss' )
|
||||
]
|
@ -10,7 +10,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class TorontoSun(BasicNewsRecipe):
|
||||
title = 'Toronto SUN'
|
||||
__author__ = 'Darko Miletic'
|
||||
__author__ = 'Darko Miletic and Sujata Raman'
|
||||
description = 'News from Canada'
|
||||
publisher = 'Toronto Sun'
|
||||
category = 'news, politics, Canada'
|
||||
@ -21,25 +21,50 @@ class TorontoSun(BasicNewsRecipe):
|
||||
encoding = 'cp1252'
|
||||
language = 'en_CA'
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
}
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
keep_only_tags =[
|
||||
dict(name='div', attrs={'class':'articleHead'})
|
||||
,dict(name='div', attrs={'id':'channelContent'})
|
||||
]
|
||||
remove_tags = [
|
||||
dict(name='div',attrs={'class':['leftBox','bottomBox clear','bottomBox','breadCrumb']})
|
||||
,dict(name=['link','iframe','object'])
|
||||
,dict(name='a',attrs={'rel':'swap'})
|
||||
,dict(name='ul',attrs={'class':'tabs dl contentSwap'})
|
||||
]
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':['articleHead','leftBox']})
|
||||
,dict(name='div', attrs={'id':'channelContent'})
|
||||
,dict(name='div', attrs={'id':'rotateBox'})
|
||||
,dict(name='img')
|
||||
]
|
||||
remove_tags = [
|
||||
dict(name='div',attrs={'class':['bottomBox clear','bottomBox','breadCrumb','articleControls thin','articleControls thin short','extraVideoList']})
|
||||
,dict(name='h2',attrs={'class':'microhead'})
|
||||
,dict(name='div',attrs={'id':'commentsBottom'})
|
||||
,dict(name=['link','iframe','object'])
|
||||
,dict(name='a',attrs={'rel':'swap'})
|
||||
,dict(name='a',attrs={'href':'/news/haiti/'})
|
||||
,dict(name='ul',attrs={'class':['tabs dl contentSwap','micrositeNav clearIt hList','galleryNav rotateNav']})
|
||||
]
|
||||
|
||||
remove_tags_after = [
|
||||
dict(name='div',attrs={'class':'bottomBox clear'})
|
||||
,dict(name='div',attrs={'class':'rotateBox'})
|
||||
,dict(name='div',attrs={'id':'contentSwap'})
|
||||
]
|
||||
|
||||
|
||||
extra_css = '''
|
||||
h1{font-family :Arial,Helvetica,sans-serif; font-size:large;}
|
||||
h2{font-family :Arial,Helvetica,sans-serif; font-size:medium; color:#666666;}
|
||||
h3{font-family :Arial,Helvetica,sans-serif; font-size:medium; color:#000000;}
|
||||
p{font-family :Arial,Helvetica,sans-serif; font-size:x-small;}
|
||||
.bold{font-family :Arial,Helvetica,sans-serif; font-size: xx-small;color:#444444;margin-left: 0px;}
|
||||
.subheading{font-family :Arial,Helvetica,sans-serif; font-size:medium; color:#000000; font-weight: bold;}
|
||||
.byline{color:#666666; font-family :Arial,Helvetica,sans-serif; font-size: xx-small;}
|
||||
.byline span{color:#666666; font-family :Arial,Helvetica,sans-serif; font-size: xx-small; text-transform: uppercase;}
|
||||
.updated{font-family :Arial,Helvetica,sans-serif; font-size: xx-small;}
|
||||
.galleryCaption{font-family :Arial,Helvetica,sans-serif; font-size: x-small;}
|
||||
.galleryUpdated{font-family :Arial,Helvetica,sans-serif; font-size: x-small;}
|
||||
'''
|
||||
|
||||
remove_tags_after = dict(name='div',attrs={'class':'bottomBox clear'})
|
||||
|
||||
feeds = [
|
||||
(u'News' , u'http://www.torontosun.com/news/rss.xml' )
|
||||
@ -48,3 +73,19 @@ class TorontoSun(BasicNewsRecipe):
|
||||
,(u'World' , u'http://www.torontosun.com/news/world/rss.xml' )
|
||||
,(u'Money' , u'http://www.torontosun.com/money/rss.xml' )
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
##To fetch images from the specified source
|
||||
for img in soup.findAll('img', src=True):
|
||||
url= img.get('src').split('?')[-1].partition('=')[-1]
|
||||
if url:
|
||||
img['src'] = url.split('&')[0].partition('=')[0]
|
||||
img['width'] = url.split('&')[-1].partition('=')[-1].split('x')[0]
|
||||
img['height'] =url.split('&')[-1].partition('=')[-1].split('x')[1]
|
||||
return soup
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
66
resources/recipes/tuttosport.recipe
Normal file
@ -0,0 +1,66 @@
|
||||
#!/usr/bin/env python
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'Lorenzo Vigentini'
|
||||
__copyright__ = '2009, Lorenzo Vigentini <l.vigentini at gmail.com>'
|
||||
__version__ = 'v1.01'
|
||||
__date__ = '30, January 2010'
|
||||
__description__ = 'Sport daily news from Italy'
|
||||
|
||||
'''www.tuttosport.com'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class tuttosport(BasicNewsRecipe):
|
||||
author = 'Lorenzo Vigentini'
|
||||
description = 'Sport daily news from Italy'
|
||||
|
||||
cover_url = 'http://www.tuttosport.com/res/imgs/logo_TuttoSport.png'
|
||||
title = 'Tuttosport'
|
||||
publisher = 'Nuova Editoriale Sportiva S.r.l'
|
||||
category = 'Sport News'
|
||||
|
||||
language = 'it'
|
||||
timefmt = '[%a, %d %b, %Y]'
|
||||
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 20
|
||||
use_embedded_content = False
|
||||
recursion = 10
|
||||
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
|
||||
def print_version(self,url):
|
||||
segments = url.split('/')
|
||||
printURL = '/'.join(segments[0:10]) + '?print'
|
||||
return printURL
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='h2', attrs={'class':'tit_Article'}),
|
||||
dict(name='div', attrs={'class':['box_Img img_L ','txt_ArticleAbstract','txt_Article txtBox_cms']})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Primo piano',u'http://www.tuttosport.com/rss/primo_piano.xml'),
|
||||
(u'Cronanca',u'http://www.tuttosport.com/rss/Cronaca-205.xml'),
|
||||
(u'Lettere al direttore',u'http://blog.tuttosport.com/direttore/feed'),
|
||||
(u'Calcio',u'http://www.tuttosport.com/rss/Calcio-3.xml'),
|
||||
(u'Speciale Derby',u'http://www.tuttosport.com/rss/Speciale-derby-310.xml'),
|
||||
(u'Formula 1',u'hhttp://www.tuttosport.com/rss/Formula-1-7.xml'),
|
||||
(u'Moto',u'hhttp://www.tuttosport.com/rss/Moto-8.xml'),
|
||||
(u'Basket',u'http://www.tuttosport.com/rss/Basket-9.xml'),
|
||||
(u'Altri Sport',u'http://www.tuttosport.com/rss/Altri-Sport-2.xml'),
|
||||
(u'Tuttosport League',u'http://www.tuttosport.com/rss/Tuttosport-League-245.xml'),
|
||||
(u'Scommesse',u'http://www.tuttosport.com/rss/Scommesse-286.xml')
|
||||
]
|
||||
|
||||
extra_css = '''
|
||||
body {font-family: Arial, Verdana, sans-serif; margin-bottom: 3em;}
|
||||
h1 {color:#9C3A0B;font-family:Arial,Helvetica,sans-serif; font-size:20px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:20px;}
|
||||
h3 {color:#9C3A0B;font-family:Arial,Helvetica,sans-serif; font-size:15px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:15px;}
|
||||
h2.tit_Article {color:#9C3A0B;margin: 15px 8px 0; margin-bottom: 1px; border-bottom: 3px solid;}
|
||||
.txt_ArticleAbstract {color:#4080AE;clear: both; margin: 3px 8px;}
|
||||
.txt_Article {clear: both; margin: 8px 8px 12px;}
|
||||
.txt_Author {float: right;}
|
||||
.txt_ArticleAuthor {clear: both; margin: 8px;}
|
||||
'''
|
@ -1,7 +1,5 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
www.variety.com
|
||||
'''
|
||||
@ -20,6 +18,8 @@ class Variety(BasicNewsRecipe):
|
||||
publisher = 'Red Business Information'
|
||||
category = 'Entertainment Industry News, Daily Variety, Movie Reviews, TV, Awards, Oscars, Cannes, Box Office, Hollywood'
|
||||
language = 'en'
|
||||
masthead_url = 'http://a330.g.akamai.net/7/330/23382/20090528190853/www.variety.com/graphics/variety/Variety_logo_green_tm.gif'
|
||||
extra_css = ' body{font-family: Georgia,"Times New Roman",Times,Courier,serif } img{margin-bottom: 1em} '
|
||||
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
@ -41,6 +41,6 @@ class Variety(BasicNewsRecipe):
|
||||
catid = catidr.partition('&')[0]
|
||||
return 'http://www.variety.com/index.asp?layout=print_story&articleid=' + artid + '&categoryid=' + catid
|
||||
|
||||
def get_article_url(self, article):
|
||||
return article.get('feedburner_origlink', None)
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
||||
|
95
resources/recipes/winter_olympics.recipe
Normal file
@ -0,0 +1,95 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Starson17'
|
||||
'''
|
||||
www.nbcolympics.com
|
||||
'''
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Olympics_2010(BasicNewsRecipe):
|
||||
title = u'NBC Olympics 2010'
|
||||
__author__ = 'Starson17'
|
||||
description = 'Olympics 2010'
|
||||
cover_url = 'http://www.digitaljournal.com/img/1/1/2/1/i/4/7/6/o/WinterOlympics2010-logo.jpg'
|
||||
publisher = 'Olympics 2010'
|
||||
tags = 'Olympics news'
|
||||
language = 'en'
|
||||
use_embedded_content = False
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
# recursions = 3
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 10
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':['Article ','ArticleGallery']}),
|
||||
]
|
||||
|
||||
remove_tags = [dict(name='div', attrs={'id':['RelatedTagsBox','ShareBox']}),
|
||||
dict(name='div', attrs={'class':['DateUtilities','PhotoGallery BoxRight','Frame','ToolBox']}),
|
||||
]
|
||||
|
||||
# RSS feeds are at: http://www.nbcolympics.com/rss/index.html
|
||||
feeds = [
|
||||
('NBCOlympics.com - News', 'http://www.nbcolympics.com/rss/newscenter/mostpopular.xml'),
|
||||
('NBCOlympics.com - News - Top Stories', 'http://www.nbcolympics.com/rss/newscenter/topstories.xml'),
|
||||
('NBCOlympics.com - News - Latest Headlines', 'http://www.nbcolympics.com/rss/newscenter/latestnews.xml'),
|
||||
# ('NBCOlympics.com - Photos', 'http://www.nbcolympics.com/rss/photos/mostpopular.xml'),
|
||||
# ('NBCOlympics.com - Photos - Editorial Picks', 'http://www.nbcolympics.com/rss/photos/editorialpicks.xml'),
|
||||
# ('NBCOlympics.com - Photos - Latest Slideshows', 'http://www.nbcolympics.com/rss/photos/latestslideshows.xml'),
|
||||
('NBCOlympics.com - Team USA - Latest news', 'http://www.nbcolympics.com/rss/countries/team-usa/index.xml'),
|
||||
# ('NBCOlympics.com - Team USA - Latest Slideshows', 'http://www.nbcolympics.com/rss/countries/team-usa/photos/index.xml'),
|
||||
# ('NBCOlympics.com - Team USA - Video', 'http://www.nbcolympics.com/rss/countries/team-usa/video/index.xml'),
|
||||
# ('NBCOlympics.com - Alpine Skiing - Most Popular News', 'http://www.nbcolympics.com/rss/sport=AS/mostpopular.xml'),
|
||||
# ('NBCOlympics.com - Alpine Skiing - Top News', 'http://www.nbcolympics.com/rss/sport=AS/topnews.xml'),
|
||||
('NBCOlympics.com - Alpine Skiing - Latest News', 'http://www.nbcolympics.com/rss/sport=AS/latestnews.xml'),
|
||||
# ('NBCOlympics.com - Biathlon - Most Popular News', 'http://www.nbcolympics.com/rss/sport=BT/mostpopular.xml'),
|
||||
# ('NBCOlympics.com - Biathlon - Top News', 'http://www.nbcolympics.com/rss/sport=BT/topnews.xml'),
|
||||
('NBCOlympics.com - Biathlon - Latest News', 'http://www.nbcolympics.com/rss/sport=BT/latestnews.xml'),
|
||||
# ('NBCOlympics.com - Bobsled - Most Popular News', 'http://www.nbcolympics.com/rss/sport=BS/mostpopular.xml'),
|
||||
# ('NBCOlympics.com - Bobsled - Top News', 'http://www.nbcolympics.com/rss/sport=BS/topnews.xml'),
|
||||
('NBCOlympics.com - Bobsled - Latest News', 'http://www.nbcolympics.com/rss/sport=BS/latestnews.xml'),
|
||||
# ('NBCOlympics.com - Cross-Country - Most Popular News', 'http://www.nbcolympics.com/rss/sport=CC/mostpopular.xml'),
|
||||
# ('NBCOlympics.com - Cross-Country - Top News', 'http://www.nbcolympics.com/rss/sport=CC/topnews.xml'),
|
||||
('NBCOlympics.com - Cross-Country - Latest News', 'http://www.nbcolympics.com/rss/sport=CC/latestnews.xml'),
|
||||
# ('NBCOlympics.com - Curling - Most Popular News', 'http://www.nbcolympics.com/rss/sport=CU/mostpopular.xml'),
|
||||
# ('NBCOlympics.com - Curling - Top News', 'http://www.nbcolympics.com/rss/sport=CU/topnews.xml'),
|
||||
('NBCOlympics.com - Curling - Latest News', 'http://www.nbcolympics.com/rss/sport=CU/latestnews.xml'),
|
||||
# ('NBCOlympics.com - Figure Skating - Most Popular News', 'http://www.nbcolympics.com/rss/sport=FS/mostpopular.xml'),
|
||||
# ('NBCOlympics.com - Figure Skating - Top News', 'http://www.nbcolympics.com/rss/sport=FS/topnews.xml'),
|
||||
('NBCOlympics.com - Figure Skating - Latest News', 'http://www.nbcolympics.com/rss/sport=FS/latestnews.xml'),
|
||||
# ('NBCOlympics.com - Freestyle Skiing - Most Popular News', 'http://www.nbcolympics.com/rss/sport=FR/mostpopular.xml'),
|
||||
# ('NBCOlympics.com - Freestyle Skiing - Top News', 'http://www.nbcolympics.com/rss/sport=FR/topnews.xml'),
|
||||
('NBCOlympics.com - Freestyle Skiing - Latest News', 'http://www.nbcolympics.com/rss/sport=FR/latestnews.xml'),
|
||||
# ('NBCOlympics.com - Hockey - Most Popular News', 'http://www.nbcolympics.com/rss/sport=IH/mostpopular.xml'),
|
||||
# ('NBCOlympics.com - Hockey - Top News', 'http://www.nbcolympics.com/rss/sport=IH/topnews.xml'),
|
||||
('NBCOlympics.com - Hockey - Latest News', 'http://www.nbcolympics.com/rss/sport=IH/latestnews.xml'),
|
||||
# ('NBCOlympics.com - Luge - Most Popular News', 'http://www.nbcolympics.com/rss/sport=LG/mostpopular.xml'),
|
||||
# ('NBCOlympics.com - Luge - Top News', 'http://www.nbcolympics.com/rss/sport=LG/topnews.xml'),
|
||||
('NBCOlympics.com - Luge - Latest News', 'http://www.nbcolympics.com/rss/sport=LG/latestnews.xml'),
|
||||
# ('NBCOlympics.com - Nordic Combined - Most Popular News', 'http://www.nbcolympics.com/rss/sport=NC/mostpopular.xml'),
|
||||
# ('NBCOlympics.com - Nordic Combined - Top News', 'http://www.nbcolympics.com/rss/sport=NC/topnews.xml'),
|
||||
('NBCOlympics.com - Nordic Combined - Latest News', 'http://www.nbcolympics.com/rss/sport=NC/latestnews.xml'),
|
||||
# ('NBCOlympics.com - Short Track - Most Popular News', 'http://www.nbcolympics.com/rss/sport=ST/mostpopular.xml'),
|
||||
# ('NBCOlympics.com - Short Track - Top News', 'http://www.nbcolympics.com/rss/sport=ST/topnews.xml'),
|
||||
('NBCOlympics.com - Short Track - Latest News', 'http://www.nbcolympics.com/rss/sport=ST/latestnews.xml'),
|
||||
# ('NBCOlympics.com - Skeleton - Most Popular News', 'http://www.nbcolympics.com/rss/sport=SN/mostpopular.xml'),
|
||||
# ('NBCOlympics.com - Skeleton - Top News', 'http://www.nbcolympics.com/rss/sport=SN/topnews.xml'),
|
||||
('NBCOlympics.com - Skeleton - Latest News', 'http://www.nbcolympics.com/rss/sport=SN/latestnews.xml'),
|
||||
# ('NBCOlympics.com - Ski Jumping - Most Popular News', 'http://www.nbcolympics.com/rss/sport=SJ/mostpopular.xml'),
|
||||
# ('NBCOlympics.com - Ski Jumping - Top News', 'http://www.nbcolympics.com/rss/sport=SJ/topnews.xml'),
|
||||
('NBCOlympics.com - Ski Jumping - Latest News', 'http://www.nbcolympics.com/rss/sport=SJ/latestnews.xml'),
|
||||
# ('NBCOlympics.com - Snowboarding - Most Popular News', 'http://www.nbcolympics.com/rss/sport=SB/mostpopular.xml'),
|
||||
# ('NBCOlympics.com - Snowboarding - Top News', 'http://www.nbcolympics.com/rss/sport=SB/topnews.xml'),
|
||||
('NBCOlympics.com - Snowboarding - Latest News', 'http://www.nbcolympics.com/rss/sport=SB/latestnews.xml'),
|
||||
# ('NBCOlympics.com - Speed Skating - Most Popular News', 'http://www.nbcolympics.com/rss/sport=AS/mostpopular.xml'),
|
||||
# ('NBCOlympics.com - Speed Skating - Top News', 'http://www.nbcolympics.com/rss/sport=AS/topnews.xml'),
|
||||
('NBCOlympics.com - Speed Skating - Latest News', 'http://www.nbcolympics.com/rss/sport=AS/latestnews.xml'),
|
||||
]
|
||||
|
||||
extra_css = '''
|
||||
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
||||
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
||||
'''
|
@ -20,6 +20,7 @@ class Wired(BasicNewsRecipe):
|
||||
no_stylesheets = True
|
||||
encoding = 'utf-8'
|
||||
use_embedded_content = False
|
||||
masthead_url = 'http://www.wired.com/images/home/wired_logo.gif'
|
||||
language = 'en'
|
||||
extra_css = ' body{font-family: sans-serif} .entryDescription li {display: inline; list-style-type: none} '
|
||||
index = 'http://www.wired.com/magazine/'
|
||||
@ -38,14 +39,34 @@ class Wired(BasicNewsRecipe):
|
||||
dict(name=['object','embed','iframe','link'])
|
||||
,dict(name='div', attrs={'class':['podcast_storyboard','tweetmeme_button']})
|
||||
]
|
||||
remove_attributes = ['height','width']
|
||||
|
||||
|
||||
#feeds = [(u'Articles' , u'http://www.wired.com/magazine/feed/' )]
|
||||
|
||||
def parse_index(self):
|
||||
totalfeeds = []
|
||||
|
||||
soup = self.index_to_soup(self.index)
|
||||
soup = self.index_to_soup(self.index)
|
||||
majorf = soup.find('div',attrs={'class':'index'})
|
||||
if majorf:
|
||||
pfarticles = []
|
||||
firsta = majorf.find(attrs={'class':'spread-header'})
|
||||
if firsta:
|
||||
pfarticles.append({
|
||||
'title' :self.tag_to_string(firsta.a)
|
||||
,'date' :strftime(self.timefmt)
|
||||
,'url' :'http://www.wired.com' + firsta.a['href']
|
||||
,'description':''
|
||||
})
|
||||
for itt in majorf.findAll('li'):
|
||||
itema = itt.find('a',href=True)
|
||||
if itema:
|
||||
pfarticles.append({
|
||||
'title' :self.tag_to_string(itema)
|
||||
,'date' :strftime(self.timefmt)
|
||||
,'url' :'http://www.wired.com' + itema['href']
|
||||
,'description':''
|
||||
})
|
||||
totalfeeds.append(('Cover', pfarticles))
|
||||
features = soup.find('div',attrs={'id':'my-glider'})
|
||||
if features:
|
||||
farticles = []
|
||||
|
44
resources/recipes/wired_daily.recipe
Normal file
@ -0,0 +1,44 @@
|
||||
#!/usr/bin/env python
|
||||
__license__ = 'GPL v3'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Wired_Daily(BasicNewsRecipe):
|
||||
|
||||
title = 'Wired Daily Edition'
|
||||
__author__ = 'Kovid Goyal'
|
||||
description = 'Technology news'
|
||||
timefmt = ' [%Y%b%d %H%M]'
|
||||
language = 'en'
|
||||
|
||||
no_stylesheets = True
|
||||
|
||||
remove_tags_before = dict(name='div', id='content')
|
||||
remove_tags = [dict(id=['social_tools', 'outerWrapper', 'sidebar',
|
||||
'footer', 'advertisement', 'blog_subscription_unit',
|
||||
'brightcove_component']),
|
||||
{'class':'entryActions'},
|
||||
dict(name=['noscript', 'script'])]
|
||||
|
||||
feeds = [
|
||||
('Top News', 'http://feeds.wired.com/wired/index'),
|
||||
('Culture', 'http://feeds.wired.com/wired/culture'),
|
||||
('Software', 'http://feeds.wired.com/wired/software'),
|
||||
('Mac', 'http://feeds.feedburner.com/cultofmac/bFow'),
|
||||
('Gadgets', 'http://feeds.wired.com/wired/gadgets'),
|
||||
('Cars', 'http://feeds.wired.com/wired/cars'),
|
||||
('Entertainment', 'http://feeds.wired.com/wired/entertainment'),
|
||||
('Gaming', 'http://feeds.wired.com/wired/gaming'),
|
||||
('Science', 'http://feeds.wired.com/wired/science'),
|
||||
('Med Tech', 'http://feeds.wired.com/wired/medtech'),
|
||||
('Politics', 'http://feeds.wired.com/wired/politics'),
|
||||
('Tech Biz', 'http://feeds.wired.com/wired/techbiz'),
|
||||
('Commentary', 'http://feeds.wired.com/wired/commentary'),
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('http://www.wired.com/', 'http://www.wired.com/print/')
|
||||
|
||||
|
@ -215,7 +215,7 @@ class WSJ(BasicNewsRecipe):
|
||||
# first, check if there is an h3 tag which provides a section name
|
||||
stag = divtag.find('h3')
|
||||
if stag:
|
||||
if stag.parent['class'] == 'dynamic':
|
||||
if stag.parent.get('class', '') == 'dynamic':
|
||||
# a carousel of articles is too complex to extract a section name
|
||||
# for each article, so we'll just call the section "Carousel"
|
||||
section_name = 'Carousel'
|
||||
|
30
resources/tanea.recipe
Normal file
@ -0,0 +1,30 @@
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
class TaNea(BasicNewsRecipe):
|
||||
title = u'Ta Nea'
|
||||
__author__ = 'Pan'
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
|
||||
remove_tags_before = dict(name='div',attrs={'id':'print-body'})
|
||||
remove_tags_after = dict(name='div',attrs={'id':'text'})
|
||||
|
||||
feeds = [
|
||||
(u'\xce\x95\xce\xbb\xce\xbb\xce\xac\xce\xb4\xce\xb1',
|
||||
u'http://www.tanea.gr/default.asp?pid=66&la=1'),
|
||||
(u'\xce\x9a\xcf\x8c\xcf\x83\xce\xbc\xce\xbf\xcf\x82',
|
||||
u'http://www.tanea.gr/default.asp?pid=67&la=1'),
|
||||
(u'\xce\x9f\xce\xb9\xce\xba\xce\xbf\xce\xbd\xce\xbf\xce\xbc\xce\xaf\xce\xb1',
|
||||
u'http://www.tanea.gr/default.asp?pid=68&la=1'),
|
||||
(u'\xce\xa0\xce\xbf\xce\xbb\xce\xb9\xcf\x84\xce\xb9\xcf\x83\xce\xbc\xcf\x8c\xcf\x82',
|
||||
u'http://www.tanea.gr/default.asp?pid=69&la=1'),
|
||||
(u'\xce\x93\xce\xbd\xcf\x8e\xce\xbc\xce\xb5\xcf\x82',
|
||||
u'http://www.tanea.gr/default.asp?pid=79&la=1'),
|
||||
(u'\xce\xa1\xce\xb9\xcf\x80\xce\xad\xcf\x82',
|
||||
u'http://www.tanea.gr/default.asp?pid=80&la=1'),
|
||||
(u'\xce\x91\xce\xb9\xcf\x87\xce\xbc\xce\xad\xcf\x82',
|
||||
u'http://www.tanea.gr/default.asp?pid=81&la=1')]
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('http://www.tanea.gr/default.asp?pid=2', 'http://www.tanea.gr/default.asp?pid=96')
|
@ -20,37 +20,8 @@ function selector(elem) {
|
||||
return sel;
|
||||
}
|
||||
|
||||
function find_closest_enclosing_block(top) {
|
||||
var START = top-1000;
|
||||
var STOP = top;
|
||||
var matches = [];
|
||||
var elem, temp;
|
||||
var width = 1000;
|
||||
|
||||
for (y = START; y < STOP; y += 20) {
|
||||
for ( x = 0; x < width; x += 20) {
|
||||
elem = document.elementFromPoint(x, y);
|
||||
try {
|
||||
elem = $(elem);
|
||||
temp = elem.offset().top
|
||||
matches.push(elem);
|
||||
if (Math.abs(temp - START) < 25) { y = STOP; break}
|
||||
} catch(error) {}
|
||||
}
|
||||
}
|
||||
|
||||
var miny = Math.abs(matches[0].offset().top - START), min_elem = matches[0];
|
||||
|
||||
for (i = 1; i < matches.length; i++) {
|
||||
elem = matches[i];
|
||||
temp = Math.abs(elem.offset().top - START);
|
||||
if ( temp < miny ) { miny = temp; min_elem = elem; }
|
||||
}
|
||||
return min_elem;
|
||||
}
|
||||
|
||||
function calculate_bookmark(y) {
|
||||
var elem = find_closest_enclosing_block(y);
|
||||
function calculate_bookmark(y, node) {
|
||||
var elem = $(node);
|
||||
var sel = selector(elem);
|
||||
var ratio = (y - elem.offset().top)/elem.height();
|
||||
if (ratio > 1) { ratio = 1; }
|
||||
|
@ -399,7 +399,7 @@ class BuildPDF2XML(Command):
|
||||
objects.append(obj)
|
||||
|
||||
if self.newer(dest, objects):
|
||||
cmd = ['g++', '-g', '-o', dest]+objects+['-lpoppler', '-lMagickWand',
|
||||
cmd = ['g++', '-ggdb', '-o', dest]+objects+['-lpoppler', '-lMagickWand',
|
||||
'-lpng', '-lpthread']
|
||||
if iswindows:
|
||||
cmd = [msvc.linker] + '/INCREMENTAL:NO /DEBUG /NODEFAULTLIB:libcmt.lib'.split()
|
||||
|
@ -137,8 +137,20 @@ class Develop(Command):
|
||||
self.setup_mount_helper()
|
||||
self.install_files()
|
||||
self.run_postinstall()
|
||||
self.install_env_module()
|
||||
self.success()
|
||||
|
||||
def install_env_module(self):
|
||||
import distutils.sysconfig as s
|
||||
libdir = s.get_python_lib(prefix=self.opts.staging_root)
|
||||
if os.path.exists(libdir):
|
||||
path = os.path.join(libdir, 'init_calibre.py')
|
||||
self.info('Installing calibre environment module: '+path)
|
||||
with open(path, 'wb') as f:
|
||||
f.write(HEADER.format(**self.template_args()))
|
||||
else:
|
||||
self.warn('Cannot install calibre environment module to: '+libdir)
|
||||
|
||||
def setup_mount_helper(self):
|
||||
def warn():
|
||||
self.warn('Failed to compile mount helper. Auto mounting of',
|
||||
@ -180,13 +192,20 @@ class Develop(Command):
|
||||
functions[typ]):
|
||||
self.write_template(name, mod, func)
|
||||
|
||||
def template_args(self):
|
||||
return {
|
||||
'path':self.libdir,
|
||||
'resources':self.sharedir,
|
||||
'executables':self.bindir,
|
||||
'extensions':self.j(self.libdir, 'calibre', 'plugins')
|
||||
}
|
||||
|
||||
def write_template(self, name, mod, func):
|
||||
template = COMPLETE_TEMPLATE if name == 'calibre-complete' else TEMPLATE
|
||||
script = template.format(
|
||||
module=mod, func=func,
|
||||
path=self.libdir, resources=self.sharedir,
|
||||
executables=self.bindir,
|
||||
extensions=self.j(self.libdir, 'calibre', 'plugins'))
|
||||
args = self.template_args()
|
||||
args['module'] = mod
|
||||
args['func'] = func
|
||||
script = template.format(**args)
|
||||
path = self.j(self.staging_bindir, name)
|
||||
if not os.path.exists(self.staging_bindir):
|
||||
os.makedirs(self.staging_bindir)
|
||||
|
@ -15,7 +15,7 @@ class Rsync(Command):
|
||||
|
||||
description = 'Sync source tree from development machine'
|
||||
|
||||
SYNC_CMD = ('rsync -avz --exclude src/calibre/plugins '
|
||||
SYNC_CMD = ('rsync -avz --delete --exclude src/calibre/plugins '
|
||||
'--exclude src/calibre/manual --exclude src/calibre/trac '
|
||||
'--exclude .bzr --exclude .build --exclude .svn --exclude build --exclude dist '
|
||||
'--exclude "*.pyc" --exclude "*.pyo" --exclude "*.swp" --exclude "*.swo" '
|
||||
|
@ -48,7 +48,9 @@ class Resources(Command):
|
||||
dest = self.j(self.RESOURCES, 'builtin_recipes.xml')
|
||||
if self.newer(dest, files):
|
||||
self.info('\tCreating builtin_recipes.xml')
|
||||
open(dest, 'wb').write(serialize_builtin_recipes())
|
||||
xml = serialize_builtin_recipes()
|
||||
with open(dest, 'wb') as f:
|
||||
f.write(xml)
|
||||
|
||||
dest = self.j(self.RESOURCES, 'ebook-convert-complete.pickle')
|
||||
files = []
|
||||
|
@ -378,10 +378,11 @@ def strftime(fmt, t=None):
|
||||
t = time.localtime()
|
||||
early_year = t[0] < 1900
|
||||
if early_year:
|
||||
replacement = 1900 if t[0]%4 == 0 else 1901
|
||||
fmt = fmt.replace('%Y', '_early year hack##')
|
||||
t = list(t)
|
||||
orig_year = t[0]
|
||||
t[0] = 1900
|
||||
t[0] = replacement
|
||||
ans = None
|
||||
if iswindows:
|
||||
if isinstance(fmt, unicode):
|
||||
|
@ -2,7 +2,7 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
__appname__ = 'calibre'
|
||||
__version__ = '0.6.36'
|
||||
__version__ = '0.6.40'
|
||||
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
|
||||
|
||||
import re
|
||||
|
@ -7,6 +7,7 @@ import os
|
||||
import glob
|
||||
from calibre.customize import FileTypePlugin, MetadataReaderPlugin, MetadataWriterPlugin
|
||||
from calibre.constants import numeric_version
|
||||
from calibre.ebooks.metadata.archive import ArchiveExtract
|
||||
|
||||
class HTML2ZIP(FileTypePlugin):
|
||||
name = 'HTML to ZIP'
|
||||
@ -416,9 +417,10 @@ from calibre.devices.hanlin.driver import HANLINV3, HANLINV5, BOOX
|
||||
from calibre.devices.blackberry.driver import BLACKBERRY
|
||||
from calibre.devices.cybook.driver import CYBOOK
|
||||
from calibre.devices.eb600.driver import EB600, COOL_ER, SHINEBOOK, \
|
||||
POCKETBOOK360, GER2, ITALICA, ECLICTO, DBOOK, INVESBOOK
|
||||
POCKETBOOK360, GER2, ITALICA, ECLICTO, DBOOK, INVESBOOK, \
|
||||
BOOQ
|
||||
from calibre.devices.iliad.driver import ILIAD
|
||||
from calibre.devices.irexdr.driver import IREXDR1000
|
||||
from calibre.devices.irexdr.driver import IREXDR1000, IREXDR800
|
||||
from calibre.devices.jetbook.driver import JETBOOK
|
||||
from calibre.devices.kindle.driver import KINDLE, KINDLE2, KINDLE_DX
|
||||
from calibre.devices.nook.driver import NOOK
|
||||
@ -430,11 +432,11 @@ from calibre.devices.eslick.driver import ESLICK
|
||||
from calibre.devices.nuut2.driver import NUUT2
|
||||
from calibre.devices.iriver.driver import IRIVER_STORY
|
||||
from calibre.devices.binatone.driver import README
|
||||
from calibre.devices.hanvon.driver import N516
|
||||
from calibre.devices.hanvon.driver import N516, EB511
|
||||
|
||||
from calibre.ebooks.metadata.fetch import GoogleBooks, ISBNDB, Amazon
|
||||
from calibre.library.catalog import CSV_XML, EPUB_MOBI
|
||||
plugins = [HTML2ZIP, PML2PMLZ, GoogleBooks, ISBNDB, Amazon, CSV_XML, EPUB_MOBI]
|
||||
plugins = [HTML2ZIP, PML2PMLZ, ArchiveExtract, GoogleBooks, ISBNDB, Amazon, CSV_XML, EPUB_MOBI]
|
||||
plugins += [
|
||||
ComicInput,
|
||||
EPUBInput,
|
||||
@ -477,6 +479,7 @@ plugins += [
|
||||
CYBOOK,
|
||||
ILIAD,
|
||||
IREXDR1000,
|
||||
IREXDR800,
|
||||
JETBOOK,
|
||||
SHINEBOOK,
|
||||
POCKETBOOK360,
|
||||
@ -500,9 +503,11 @@ plugins += [
|
||||
DBOOK,
|
||||
INVESBOOK,
|
||||
BOOX,
|
||||
BOOQ,
|
||||
EB600,
|
||||
README,
|
||||
N516,
|
||||
EB511,
|
||||
]
|
||||
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
|
||||
x.__name__.endswith('MetadataReader')]
|
||||
|
@ -20,7 +20,7 @@ class ANDROID(USBMS):
|
||||
VENDOR_ID = {
|
||||
0x0bb4 : { 0x0c02 : [0x100], 0x0c01 : [0x100]},
|
||||
0x22b8 : { 0x41d9 : [0x216]},
|
||||
0x18d1 : { 0x4e11 : [0x0100]},
|
||||
0x18d1 : { 0x4e11 : [0x0100], 0x4e12: [0x0100]},
|
||||
}
|
||||
EBOOK_DIR_MAIN = ['wordplayer/calibretransfer', 'eBooks/import', 'Books']
|
||||
EXTRA_CUSTOMIZATION_MESSAGE = _('Comma separated list of directories to '
|
||||
|
@ -184,3 +184,14 @@ class INVESBOOK(EB600):
|
||||
VENDOR_NAME = 'INVES_E6'
|
||||
WINDOWS_MAIN_MEM = '00INVES_E600'
|
||||
WINDOWS_CARD_A_MEM = '00INVES_E600'
|
||||
|
||||
class BOOQ(EB600):
|
||||
name = 'Booq Device Interface'
|
||||
gui_name = 'Booq'
|
||||
|
||||
FORMATS = ['epub', 'mobi', 'prc', 'fb2', 'pdf', 'doc', 'rtf', 'txt', 'html']
|
||||
|
||||
VENDOR_NAME = 'NETRONIX'
|
||||
WINDOWS_MAIN_MEM = 'EB600'
|
||||
WINDOWS_CARD_A_MEM = 'EB600'
|
||||
|
||||
|
@ -126,3 +126,15 @@ class BOOX(HANLINV3):
|
||||
|
||||
EBOOK_DIR_MAIN = 'MyBooks'
|
||||
EBOOK_DIR_CARD_A = 'MyBooks'
|
||||
|
||||
|
||||
def windows_sort_drives(self, drives):
|
||||
main = drives.get('main', None)
|
||||
card = drives.get('carda', None)
|
||||
if card and main and card < main:
|
||||
drives['main'] = card
|
||||
drives['carda'] = main
|
||||
|
||||
return drives
|
||||
|
||||
|
||||
|
@ -7,6 +7,7 @@ __docformat__ = 'restructuredtext en'
|
||||
'''
|
||||
Device driver for Hanvon devices
|
||||
'''
|
||||
import re
|
||||
|
||||
from calibre.devices.usbms.driver import USBMS
|
||||
|
||||
@ -32,3 +33,25 @@ class N516(USBMS):
|
||||
|
||||
EBOOK_DIR_MAIN = 'e_book'
|
||||
SUPPORTS_SUB_DIRS = True
|
||||
|
||||
class EB511(USBMS):
|
||||
name = 'Elonex EB 511 driver'
|
||||
gui_name = 'EB 511'
|
||||
description = _('Communicate with the Elonex EB 511 eBook reader.')
|
||||
author = 'Kovid Goyal'
|
||||
supported_platforms = ['windows', 'osx', 'linux']
|
||||
|
||||
FORMATS = ['epub', 'html', 'pdf', 'txt']
|
||||
|
||||
VENDOR_ID = [0x45e]
|
||||
PRODUCT_ID = [0xffff]
|
||||
BCD = [0x0]
|
||||
|
||||
MAIN_MEMORY_VOLUME_LABEL = 'EB 511 Internal Memory'
|
||||
|
||||
EBOOK_DIR_MAIN = 'e_book'
|
||||
SUPPORTS_SUB_DIRS = True
|
||||
|
||||
OSX_MAIN_MEM_VOL_PAT = re.compile(r'/eReader')
|
||||
|
||||
|
||||
|
@ -36,3 +36,14 @@ class IREXDR1000(USBMS):
|
||||
EBOOK_DIR_MAIN = 'ebooks'
|
||||
DELETE_EXTS = ['.mbp']
|
||||
SUPPORTS_SUB_DIRS = True
|
||||
|
||||
class IREXDR800(IREXDR1000):
|
||||
name = 'IRex Digital Reader 800 Device Interface'
|
||||
description = _('Communicate with the IRex Digital Reader 800')
|
||||
PRODUCT_ID = [0x002]
|
||||
WINDOWS_MAIN_MEM = 'DR800'
|
||||
FORMATS = ['epub', 'html', 'pdf', 'txt']
|
||||
|
||||
EBOOK_DIR_MAIN = 'Books'
|
||||
DELETE_EXTS = []
|
||||
|
||||
|
@ -192,17 +192,15 @@ class PRS505(CLI, Device):
|
||||
fix_ids(*booklists)
|
||||
if not os.path.exists(self._main_prefix):
|
||||
os.makedirs(self._main_prefix)
|
||||
f = open(self._main_prefix + self.__class__.MEDIA_XML, 'wb')
|
||||
booklists[0].write(f)
|
||||
f.close()
|
||||
with open(self._main_prefix + self.__class__.MEDIA_XML, 'wb') as f:
|
||||
booklists[0].write(f)
|
||||
|
||||
def write_card_prefix(prefix, listid):
|
||||
if prefix is not None and hasattr(booklists[listid], 'write'):
|
||||
if not os.path.exists(prefix):
|
||||
os.makedirs(prefix)
|
||||
f = open(prefix + self.__class__.CACHE_XML, 'wb')
|
||||
booklists[listid].write(f)
|
||||
f.close()
|
||||
with open(prefix + self.__class__.CACHE_XML, 'wb') as f:
|
||||
booklists[listid].write(f)
|
||||
write_card_prefix(self._card_a_prefix, 1)
|
||||
write_card_prefix(self._card_b_prefix, 2)
|
||||
|
||||
|
@ -70,6 +70,19 @@ def extract_cover_from_embedded_svg(html, base, log):
|
||||
if href and os.access(path, os.R_OK):
|
||||
return open(path, 'rb').read()
|
||||
|
||||
def extract_calibre_cover(raw, base, log):
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||
soup = BeautifulSoup(raw)
|
||||
matches = soup.find(name=['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'span',
|
||||
'font', 'br'])
|
||||
images = soup.findAll('img')
|
||||
if matches is None and len(images) == 1 and \
|
||||
images[0].get('alt', '')=='cover':
|
||||
img = images[0]
|
||||
img = os.path.join(base, *img['src'].split('/'))
|
||||
if os.path.exists(img):
|
||||
return open(img, 'rb').read()
|
||||
|
||||
def render_html_svg_workaround(path_to_html, log, width=590, height=750):
|
||||
from calibre.ebooks.oeb.base import SVG_NS
|
||||
raw = open(path_to_html, 'rb').read()
|
||||
@ -80,6 +93,11 @@ def render_html_svg_workaround(path_to_html, log, width=590, height=750):
|
||||
os.path.dirname(path_to_html), log)
|
||||
except:
|
||||
pass
|
||||
if data is None:
|
||||
try:
|
||||
data = extract_calibre_cover(raw, os.path.dirname(path_to_html), log)
|
||||
except:
|
||||
pass
|
||||
if data is None:
|
||||
renderer = render_html(path_to_html, width, height)
|
||||
data = getattr(renderer, 'data', None)
|
||||
|
@ -12,6 +12,7 @@ from calibre.customize.ui import input_profiles, output_profiles, \
|
||||
run_plugins_on_preprocess, run_plugins_on_postprocess
|
||||
from calibre.ebooks.conversion.preprocess import HTMLPreProcessor
|
||||
from calibre.ptempfile import PersistentTemporaryDirectory
|
||||
from calibre.utils.date import parse_date
|
||||
from calibre import extract, walk
|
||||
|
||||
DEBUG_README=u'''
|
||||
@ -65,7 +66,7 @@ class Plumber(object):
|
||||
metadata_option_names = [
|
||||
'title', 'authors', 'title_sort', 'author_sort', 'cover', 'comments',
|
||||
'publisher', 'series', 'series_index', 'rating', 'isbn',
|
||||
'tags', 'book_producer', 'language'
|
||||
'tags', 'book_producer', 'language', 'pubdate', 'timestamp'
|
||||
]
|
||||
|
||||
def __init__(self, input, output, log, report_progress=DummyReporter(),
|
||||
@ -461,6 +462,14 @@ OptionRecommendation(name='language',
|
||||
recommended_value=None, level=OptionRecommendation.LOW,
|
||||
help=_('Set the language.')),
|
||||
|
||||
OptionRecommendation(name='pubdate',
|
||||
recommended_value=None, level=OptionRecommendation.LOW,
|
||||
help=_('Set the publication date.')),
|
||||
|
||||
OptionRecommendation(name='timestamp',
|
||||
recommended_value=None, level=OptionRecommendation.LOW,
|
||||
help=_('Set the book timestamp (used by the date column in calibre).')),
|
||||
|
||||
]
|
||||
|
||||
input_fmt = os.path.splitext(self.input)[1]
|
||||
@ -619,6 +628,14 @@ OptionRecommendation(name='language',
|
||||
except ValueError:
|
||||
self.log.warn(_('Values of series index and rating must'
|
||||
' be numbers. Ignoring'), val)
|
||||
continue
|
||||
elif x in ('timestamp', 'pubdate'):
|
||||
try:
|
||||
val = parse_date(val, assume_utc=x=='pubdate')
|
||||
except:
|
||||
self.log.exception(_('Failed to parse date/time') + ' ' +
|
||||
unicode(val))
|
||||
continue
|
||||
setattr(mi, x, val)
|
||||
|
||||
|
||||
|
@ -132,6 +132,8 @@ class EPUBInput(InputFormatPlugin):
|
||||
|
||||
self.rationalize_cover(opf, log)
|
||||
|
||||
self.optimize_opf_parsing = opf
|
||||
|
||||
with open('content.opf', 'wb') as nopf:
|
||||
nopf.write(opf.render())
|
||||
|
||||
|
@ -256,7 +256,20 @@ class EPUBOutput(OutputFormatPlugin):
|
||||
Perform various markup transforms to get the output to render correctly
|
||||
in the quirky ADE.
|
||||
'''
|
||||
from calibre.ebooks.oeb.base import XPath, XHTML, OEB_STYLES, barename
|
||||
from calibre.ebooks.oeb.base import XPath, XHTML, OEB_STYLES, barename, urlunquote
|
||||
|
||||
# ADE cries big wet tears when it encounters an invalid fragment
|
||||
# identifier in the NCX toc.
|
||||
frag_pat = re.compile(r'[-A-Za-z0-9_:.]+$')
|
||||
for node in self.oeb.toc.iter():
|
||||
href = getattr(node, 'href', None)
|
||||
if hasattr(href, 'partition'):
|
||||
base, _, frag = href.partition('#')
|
||||
frag = urlunquote(frag)
|
||||
if frag and frag_pat.match(frag) is None:
|
||||
self.log.warn(
|
||||
'Removing invalid fragment identifier %r from TOC'%frag)
|
||||
node.href = base
|
||||
|
||||
for x in self.oeb.spine:
|
||||
root = x.data
|
||||
|
@ -111,7 +111,7 @@ class HTMLFile(object):
|
||||
raise IOError(msg)
|
||||
raise IgnoreFile(msg, err.errno)
|
||||
|
||||
self.is_binary = not bool(self.HTML_PAT.search(src[:4096]))
|
||||
self.is_binary = level > 0 and not bool(self.HTML_PAT.search(src[:4096]))
|
||||
if not self.is_binary:
|
||||
if encoding is None:
|
||||
encoding = xml_to_unicode(src[:4096], verbose=verbose)[-1]
|
||||
@ -408,7 +408,10 @@ class HTMLInput(InputFormatPlugin):
|
||||
return link_
|
||||
if base and not os.path.isabs(link):
|
||||
link = os.path.join(base, link)
|
||||
link = os.path.abspath(link)
|
||||
try:
|
||||
link = os.path.abspath(link)
|
||||
except:
|
||||
return link_
|
||||
if not os.access(link, os.R_OK):
|
||||
return link_
|
||||
if os.path.isdir(link):
|
||||
|
@ -50,6 +50,7 @@ from pylrf import (LrfWriter, LrfObject, LrfTag, LrfToc,
|
||||
STREAM_COMPRESSED, LrfTagStream, LrfStreamBase, IMAGE_TYPE_ENCODING,
|
||||
BINDING_DIRECTION_ENCODING, LINE_TYPE_ENCODING, LrfFileStream,
|
||||
STREAM_FORCE_COMPRESSED)
|
||||
from calibre.utils.date import isoformat
|
||||
|
||||
DEFAULT_SOURCE_ENCODING = "cp1252" # defualt is us-windows character set
|
||||
DEFAULT_GENREADING = "fs" # default is yes to both lrf and lrs
|
||||
@ -852,7 +853,7 @@ class DocInfo(object):
|
||||
self.thumbnail = None
|
||||
self.language = "en"
|
||||
self.creator = None
|
||||
self.creationdate = date.today().isoformat()
|
||||
self.creationdate = str(isoformat(date.today()))
|
||||
self.producer = "%s v%s"%(__appname__, __version__)
|
||||
self.numberofpages = "0"
|
||||
|
||||
|
@ -10,9 +10,11 @@ import os, mimetypes, sys, re
|
||||
from urllib import unquote, quote
|
||||
from urlparse import urlparse
|
||||
|
||||
|
||||
from calibre import relpath
|
||||
|
||||
from calibre.utils.config import tweaks
|
||||
from calibre.utils.date import isoformat
|
||||
|
||||
_author_pat = re.compile(',?\s+(and|with)\s+', re.IGNORECASE)
|
||||
def string_to_authors(raw):
|
||||
raw = raw.replace('&&', u'\uffff')
|
||||
@ -27,6 +29,9 @@ def authors_to_string(authors):
|
||||
return ''
|
||||
|
||||
def author_to_author_sort(author):
|
||||
method = tweaks['author_sort_copy_method']
|
||||
if method == 'copy' or (method == 'comma' and author.count(',') > 0):
|
||||
return author
|
||||
tokens = author.split()
|
||||
tokens = tokens[-1:] + tokens[:-1]
|
||||
if len(tokens) > 1:
|
||||
@ -340,9 +345,9 @@ class MetaInformation(object):
|
||||
if self.rating is not None:
|
||||
fmt('Rating', self.rating)
|
||||
if self.timestamp is not None:
|
||||
fmt('Timestamp', self.timestamp.isoformat(' '))
|
||||
fmt('Timestamp', isoformat(self.timestamp))
|
||||
if self.pubdate is not None:
|
||||
fmt('Published', self.pubdate.isoformat(' '))
|
||||
fmt('Published', isoformat(self.pubdate))
|
||||
if self.rights is not None:
|
||||
fmt('Rights', unicode(self.rights))
|
||||
if self.lccn:
|
||||
|
@ -7,12 +7,11 @@ __docformat__ = 'restructuredtext en'
|
||||
Fetch metadata using Amazon AWS
|
||||
'''
|
||||
import sys, re
|
||||
from datetime import datetime
|
||||
|
||||
from lxml import etree
|
||||
from dateutil import parser
|
||||
|
||||
from calibre import browser
|
||||
from calibre.utils.date import parse_date, utcnow
|
||||
from calibre.ebooks.metadata import MetaInformation, string_to_authors
|
||||
|
||||
AWS_NS = 'http://webservices.amazon.com/AWSECommerceService/2005-10-05'
|
||||
@ -44,9 +43,8 @@ def get_social_metadata(title, authors, publisher, isbn):
|
||||
try:
|
||||
d = root.findtext('.//'+AWS('PublicationDate'))
|
||||
if d:
|
||||
default = datetime.utcnow()
|
||||
default = datetime(default.year, default.month, 15)
|
||||
d = parser.parse(d[0].text, default=default)
|
||||
default = utcnow().replace(day=15)
|
||||
d = parse_date(d[0].text, assume_utc=True, default=default)
|
||||
mi.pubdate = d
|
||||
except:
|
||||
pass
|
||||
|
65
src/calibre/ebooks/metadata/archive.py
Normal file
@ -0,0 +1,65 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import with_statement
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import os
|
||||
from contextlib import closing
|
||||
|
||||
from calibre.customize import FileTypePlugin
|
||||
|
||||
def is_comic(list_of_names):
|
||||
extensions = set([x.rpartition('.')[-1].lower() for x in list_of_names])
|
||||
return len(extensions) == 1 and iter(extensions).next() in ('jpg', 'jpeg', 'png')
|
||||
|
||||
class ArchiveExtract(FileTypePlugin):
|
||||
name = 'Archive Extract'
|
||||
author = 'Kovid Goyal'
|
||||
description = _('Extract common e-book formats from archives '
|
||||
'(zip/rar) files. Also try to autodetect if they are actually '
|
||||
'cbz/cbr files.')
|
||||
file_types = set(['zip', 'rar'])
|
||||
supported_platforms = ['windows', 'osx', 'linux']
|
||||
on_import = True
|
||||
|
||||
def run(self, archive):
|
||||
is_rar = archive.lower().endswith('.rar')
|
||||
if is_rar:
|
||||
from calibre.libunrar import extract_member, names
|
||||
else:
|
||||
from calibre.utils.zipfile import ZipFile
|
||||
zf = ZipFile(archive, 'r')
|
||||
|
||||
if is_rar:
|
||||
fnames = names(archive)
|
||||
else:
|
||||
fnames = zf.namelist()
|
||||
|
||||
fnames = [x for x in fnames if '.' in x]
|
||||
if is_comic(fnames):
|
||||
ext = '.cbr' if is_rar else '.cbz'
|
||||
of = self.temporary_file('_archive_extract'+ext)
|
||||
with open(archive, 'rb') as f:
|
||||
of.write(f.read())
|
||||
of.close()
|
||||
return of.name
|
||||
if len(fnames) > 1 or not fnames:
|
||||
return archive
|
||||
fname = fnames[0]
|
||||
ext = os.path.splitext(fname)[1][1:]
|
||||
if ext.lower() not in ('lit', 'epub', 'mobi', 'prc', 'rtf', 'pdf',
|
||||
'mp3', 'pdb', 'azw', 'azw1'):
|
||||
return archive
|
||||
|
||||
of = self.temporary_file('_archive_extract.'+ext)
|
||||
with closing(of):
|
||||
if is_rar:
|
||||
data = extract_member(archive, match=None, name=fname)[1]
|
||||
of.write(data)
|
||||
else:
|
||||
of.write(zf.read(fname))
|
||||
return of.name
|
||||
|
@ -15,6 +15,7 @@ from calibre.ebooks.metadata import string_to_authors, authors_to_sort_string, \
|
||||
title_sort, MetaInformation
|
||||
from calibre.ebooks.lrf.meta import LRFMetaFile
|
||||
from calibre import prints
|
||||
from calibre.utils.date import parse_date
|
||||
|
||||
USAGE='%%prog ebook_file [' + _('options') + ']\n' + \
|
||||
_('''
|
||||
@ -69,6 +70,8 @@ def config():
|
||||
help=_('Set the book producer.'))
|
||||
c.add_opt('language', ['-l', '--language'],
|
||||
help=_('Set the language.'))
|
||||
c.add_opt('pubdate', ['-d', '--date'],
|
||||
help=_('Set the published date.'))
|
||||
|
||||
c.add_opt('get_cover', ['--get-cover'],
|
||||
help=_('Get the cover from the ebook and save it at as the '
|
||||
@ -132,6 +135,8 @@ def do_set_metadata(opts, mi, stream, stream_type):
|
||||
mi.series = opts.series.strip()
|
||||
if getattr(opts, 'series_index', None) is not None:
|
||||
mi.series_index = float(opts.series_index.strip())
|
||||
if getattr(opts, 'pubdate', None) is not None:
|
||||
mi.pubdate = parse_date(opts.pubdate, assume_utc=False, as_utc=False)
|
||||
|
||||
if getattr(opts, 'cover', None) is not None:
|
||||
ext = os.path.splitext(opts.cover)[1].replace('.', '').upper()
|
||||
|
@ -5,7 +5,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
'''Read meta information from epub files'''
|
||||
|
||||
import os
|
||||
import os, re
|
||||
from cStringIO import StringIO
|
||||
from contextlib import closing
|
||||
|
||||
@ -29,15 +29,15 @@ class Container(dict):
|
||||
def __init__(self, stream=None):
|
||||
if not stream: return
|
||||
soup = BeautifulStoneSoup(stream.read())
|
||||
container = soup.find('container')
|
||||
container = soup.find(name=re.compile(r'container$', re.I))
|
||||
if not container:
|
||||
raise OCFException("<container/> element missing")
|
||||
raise OCFException("<container> element missing")
|
||||
if container.get('version', None) != '1.0':
|
||||
raise EPubException("unsupported version of OCF")
|
||||
rootfiles = container.find('rootfiles')
|
||||
rootfiles = container.find(re.compile(r'rootfiles$', re.I))
|
||||
if not rootfiles:
|
||||
raise EPubException("<rootfiles/> element missing")
|
||||
for rootfile in rootfiles.findAll('rootfile'):
|
||||
for rootfile in rootfiles.findAll(re.compile(r'rootfile$', re.I)):
|
||||
try:
|
||||
self[rootfile['media-type']] = rootfile['full-path']
|
||||
except KeyError:
|
||||
@ -69,7 +69,7 @@ class OCFReader(OCF):
|
||||
self.opf_path = self.container[OPF.MIMETYPE]
|
||||
try:
|
||||
with closing(self.open(self.opf_path)) as f:
|
||||
self.opf = OPF(f, self.root)
|
||||
self.opf = OPF(f, self.root, populate_spine=False)
|
||||
except KeyError:
|
||||
raise EPubException("missing OPF package file")
|
||||
|
||||
@ -101,10 +101,9 @@ class OCFDirReader(OCFReader):
|
||||
def get_cover(opf, opf_path, stream):
|
||||
from calibre.ebooks import render_html_svg_workaround
|
||||
from calibre.utils.logging import default_log
|
||||
spine = list(opf.spine_items())
|
||||
if not spine:
|
||||
cpage = opf.first_spine_item()
|
||||
if not cpage:
|
||||
return
|
||||
cpage = spine[0]
|
||||
with TemporaryDirectory('_epub_meta') as tdir:
|
||||
with CurrentDir(tdir):
|
||||
stream.seek(0)
|
||||
|
@ -6,14 +6,13 @@ __docformat__ = 'restructuredtext en'
|
||||
import sys, textwrap
|
||||
from urllib import urlencode
|
||||
from functools import partial
|
||||
from datetime import datetime
|
||||
|
||||
from lxml import etree
|
||||
from dateutil import parser
|
||||
|
||||
from calibre import browser, preferred_encoding
|
||||
from calibre.ebooks.metadata import MetaInformation
|
||||
from calibre.utils.config import OptionParser
|
||||
from calibre.utils.date import parse_date, utcnow
|
||||
|
||||
NAMESPACES = {
|
||||
'openSearch':'http://a9.com/-/spec/opensearchrss/1.0/',
|
||||
@ -156,9 +155,8 @@ class ResultList(list):
|
||||
try:
|
||||
d = date(entry)
|
||||
if d:
|
||||
default = datetime.utcnow()
|
||||
default = datetime(default.year, default.month, 15)
|
||||
d = parser.parse(d[0].text, default=default)
|
||||
default = utcnow().replace(day=15)
|
||||
d = parse_date(d[0].text, assume_utc=True, default=default)
|
||||
else:
|
||||
d = None
|
||||
except:
|
||||
|
@ -65,6 +65,10 @@ def _metadata_from_formats(formats):
|
||||
|
||||
return mi
|
||||
|
||||
def is_recipe(filename):
|
||||
return filename.startswith('calibre') and \
|
||||
filename.rpartition('.')[0].endswith('_recipe_out')
|
||||
|
||||
def get_metadata(stream, stream_type='lrf', use_libprs_metadata=False):
|
||||
if stream_type: stream_type = stream_type.lower()
|
||||
if stream_type in ('html', 'html', 'xhtml', 'xhtm', 'xml'):
|
||||
@ -84,11 +88,10 @@ def get_metadata(stream, stream_type='lrf', use_libprs_metadata=False):
|
||||
return opf
|
||||
|
||||
mi = MetaInformation(None, None)
|
||||
if prefs['read_file_metadata']:
|
||||
mi = get_file_type_metadata(stream, stream_type)
|
||||
|
||||
name = os.path.basename(getattr(stream, 'name', ''))
|
||||
base = metadata_from_filename(name)
|
||||
if is_recipe(name) or prefs['read_file_metadata']:
|
||||
mi = get_file_type_metadata(stream, stream_type)
|
||||
if base.title == os.path.splitext(name)[0] and base.authors is None:
|
||||
# Assume that there was no metadata in the file and the user set pattern
|
||||
# to match meta info from the file name did not match.
|
||||
|
@ -11,13 +11,11 @@ __docformat__ = 'restructuredtext en'
|
||||
|
||||
from struct import pack, unpack
|
||||
from cStringIO import StringIO
|
||||
from datetime import datetime
|
||||
|
||||
from calibre.ebooks.mobi import MobiError
|
||||
from calibre.ebooks.mobi.writer import rescale_image, MAX_THUMB_DIMEN
|
||||
from calibre.ebooks.mobi.langcodes import iana2mobi
|
||||
|
||||
import struct
|
||||
from calibre.utils.date import now as nowf
|
||||
|
||||
class StreamSlicer(object):
|
||||
|
||||
@ -105,11 +103,12 @@ class MetadataUpdater(object):
|
||||
have_exth = self.have_exth = (flags & 0x40) != 0
|
||||
self.cover_record = self.thumbnail_record = None
|
||||
self.timestamp = None
|
||||
|
||||
self.pdbrecords = self.get_pdbrecords()
|
||||
|
||||
self.original_exth_records = {}
|
||||
if not have_exth:
|
||||
self.create_exth()
|
||||
|
||||
self.have_exth = True
|
||||
# Fetch timestamp, cover_record, thumbnail_record
|
||||
self.fetchEXTHFields()
|
||||
|
||||
@ -131,14 +130,18 @@ class MetadataUpdater(object):
|
||||
content = exth[pos + 8: pos + size]
|
||||
pos += size
|
||||
|
||||
self.original_exth_records[id] = content
|
||||
|
||||
if id == 106:
|
||||
self.timestamp = content
|
||||
elif id == 201:
|
||||
rindex, = self.cover_rindex, = unpack('>I', content)
|
||||
self.cover_record = self.record(rindex + image_base)
|
||||
rindex, = self.cover_rindex, = unpack('>i', content)
|
||||
if rindex > 0 :
|
||||
self.cover_record = self.record(rindex + image_base)
|
||||
elif id == 202:
|
||||
rindex, = self.thumbnail_rindex, = unpack('>I', content)
|
||||
self.thumbnail_record = self.record(rindex + image_base)
|
||||
rindex, = self.thumbnail_rindex, = unpack('>i', content)
|
||||
if rindex > 0 :
|
||||
self.thumbnail_record = self.record(rindex + image_base)
|
||||
|
||||
def patch(self, off, new_record0):
|
||||
# Save the current size of each record
|
||||
@ -181,14 +184,14 @@ class MetadataUpdater(object):
|
||||
off = self.pdbrecords[section][0]
|
||||
self.patch(off, new)
|
||||
|
||||
def create_exth(self, exth=None):
|
||||
def create_exth(self, new_title=None, exth=None):
|
||||
# Add an EXTH block to record 0, rewrite the stream
|
||||
# self.hexdump(self.record0)
|
||||
|
||||
# Fetch the title
|
||||
title_offset, = struct.unpack('>L', self.record0[0x54:0x58])
|
||||
title_length, = struct.unpack('>L', self.record0[0x58:0x5c])
|
||||
title_in_file, = struct.unpack('%ds' % (title_length), self.record0[title_offset:title_offset + title_length])
|
||||
# Fetch the existing title
|
||||
title_offset, = unpack('>L', self.record0[0x54:0x58])
|
||||
title_length, = unpack('>L', self.record0[0x58:0x5c])
|
||||
title_in_file, = unpack('%ds' % (title_length), self.record0[title_offset:title_offset + title_length])
|
||||
|
||||
# Adjust length to accommodate PrimaryINDX if necessary
|
||||
mobi_header_length, = unpack('>L', self.record0[0x14:0x18])
|
||||
@ -207,14 +210,21 @@ class MetadataUpdater(object):
|
||||
exth = ['EXTH', pack('>II', 12, 0), pad]
|
||||
exth = ''.join(exth)
|
||||
|
||||
# Update title_offset
|
||||
# Update title_offset, title_len if new_title
|
||||
self.record0[0x54:0x58] = pack('>L', 0x10 + mobi_header_length + len(exth))
|
||||
if new_title:
|
||||
self.record0[0x58:0x5c] = pack('>L', len(new_title))
|
||||
|
||||
# Create an updated Record0
|
||||
new_record0 = StringIO()
|
||||
new_record0.write(self.record0[:0x10 + mobi_header_length])
|
||||
new_record0.write(exth)
|
||||
new_record0.write(title_in_file)
|
||||
if new_title:
|
||||
#new_record0.write(new_title.encode(self.codec, 'replace'))
|
||||
new_title = (new_title or _('Unknown')).encode(self.codec, 'replace')
|
||||
new_record0.write(new_title)
|
||||
else:
|
||||
new_record0.write(title_in_file)
|
||||
|
||||
# Pad to a 4-byte boundary
|
||||
trail = len(new_record0.getvalue()) % 4
|
||||
@ -244,7 +254,7 @@ class MetadataUpdater(object):
|
||||
def get_pdbrecords(self):
|
||||
pdbrecords = []
|
||||
for i in xrange(self.nrecs):
|
||||
offset, a1,a2,a3,a4 = struct.unpack('>LBBBB', self.data[78+i*8:78+i*8+8])
|
||||
offset, a1,a2,a3,a4 = unpack('>LBBBB', self.data[78+i*8:78+i*8+8])
|
||||
flags, val = a1, a2<<16|a3<<8|a4
|
||||
pdbrecords.append( [offset, flags, val] )
|
||||
return pdbrecords
|
||||
@ -275,6 +285,10 @@ class MetadataUpdater(object):
|
||||
return StreamSlicer(self.stream, start, stop)
|
||||
|
||||
def update(self, mi):
|
||||
def pop_exth_record(exth_id):
|
||||
if exth_id in self.original_exth_records:
|
||||
self.original_exth_records.pop(exth_id)
|
||||
|
||||
if self.type != "BOOKMOBI":
|
||||
raise MobiError("Setting metadata only supported for MOBI files of type 'BOOK'.\n"
|
||||
"\tThis is a '%s' file of type '%s'" % (self.type[0:4], self.type[4:8]))
|
||||
@ -289,35 +303,53 @@ class MetadataUpdater(object):
|
||||
if mi.author_sort and pas:
|
||||
authors = mi.author_sort
|
||||
recs.append((100, authors.encode(self.codec, 'replace')))
|
||||
pop_exth_record(100)
|
||||
elif mi.authors:
|
||||
authors = '; '.join(mi.authors)
|
||||
recs.append((100, authors.encode(self.codec, 'replace')))
|
||||
pop_exth_record(100)
|
||||
if mi.publisher:
|
||||
recs.append((101, mi.publisher.encode(self.codec, 'replace')))
|
||||
pop_exth_record(101)
|
||||
if mi.comments:
|
||||
recs.append((103, mi.comments.encode(self.codec, 'replace')))
|
||||
pop_exth_record(103)
|
||||
if mi.isbn:
|
||||
recs.append((104, mi.isbn.encode(self.codec, 'replace')))
|
||||
pop_exth_record(104)
|
||||
if mi.tags:
|
||||
subjects = '; '.join(mi.tags)
|
||||
recs.append((105, subjects.encode(self.codec, 'replace')))
|
||||
pop_exth_record(105)
|
||||
if mi.pubdate:
|
||||
recs.append((106, str(mi.pubdate).encode(self.codec, 'replace')))
|
||||
pop_exth_record(106)
|
||||
elif mi.timestamp:
|
||||
recs.append((106, str(mi.timestamp).encode(self.codec, 'replace')))
|
||||
pop_exth_record(106)
|
||||
elif self.timestamp:
|
||||
recs.append((106, self.timestamp))
|
||||
pop_exth_record(106)
|
||||
else:
|
||||
recs.append((106, str(datetime.now()).encode(self.codec, 'replace')))
|
||||
recs.append((106, nowf().isoformat().encode(self.codec, 'replace')))
|
||||
pop_exth_record(106)
|
||||
if self.cover_record is not None:
|
||||
recs.append((201, pack('>I', self.cover_rindex)))
|
||||
recs.append((203, pack('>I', 0)))
|
||||
pop_exth_record(201)
|
||||
pop_exth_record(203)
|
||||
if self.thumbnail_record is not None:
|
||||
recs.append((202, pack('>I', self.thumbnail_rindex)))
|
||||
pop_exth_record(202)
|
||||
|
||||
if getattr(self, 'encryption_type', -1) != 0:
|
||||
raise MobiError('Setting metadata in DRMed MOBI files is not supported.')
|
||||
|
||||
# Restore any original EXTH fields that weren't modified/updated
|
||||
for id in sorted(self.original_exth_records):
|
||||
recs.append((id, self.original_exth_records[id]))
|
||||
recs = sorted(recs, key=lambda x:(x[0],x[0]))
|
||||
|
||||
exth = StringIO()
|
||||
for code, data in recs:
|
||||
exth.write(pack('>II', code, len(data) + 8))
|
||||
@ -332,7 +364,7 @@ class MetadataUpdater(object):
|
||||
raise MobiError('No existing EXTH record. Cannot update metadata.')
|
||||
|
||||
self.record0[92:96] = iana2mobi(mi.language)
|
||||
self.create_exth(exth)
|
||||
self.create_exth(exth=exth, new_title=mi.title)
|
||||
|
||||
# Fetch updated timestamp, cover_record, thumbnail_record
|
||||
self.fetchEXTHFields()
|
||||
|
@ -12,12 +12,12 @@ from urllib import unquote
|
||||
from urlparse import urlparse
|
||||
|
||||
from lxml import etree
|
||||
from dateutil import parser
|
||||
|
||||
from calibre.ebooks.chardet import xml_to_unicode
|
||||
from calibre.constants import __appname__, __version__, filesystem_encoding
|
||||
from calibre.ebooks.metadata.toc import TOC
|
||||
from calibre.ebooks.metadata import MetaInformation, string_to_authors
|
||||
from calibre.utils.date import parse_date, isoformat
|
||||
|
||||
|
||||
class Resource(object):
|
||||
@ -272,6 +272,10 @@ class Spine(ResourceCollection):
|
||||
self.id = idfunc(self.path)
|
||||
self.idref = None
|
||||
|
||||
def __repr__(self):
|
||||
return 'Spine.Item(path=%r, id=%s, is_linear=%s)' % \
|
||||
(self.path, self.id, self.is_linear)
|
||||
|
||||
@staticmethod
|
||||
def from_opf_spine_element(itemrefs, manifest):
|
||||
s = Spine(manifest)
|
||||
@ -280,7 +284,7 @@ class Spine(ResourceCollection):
|
||||
if idref is not None:
|
||||
path = s.manifest.path_for_id(idref)
|
||||
if path:
|
||||
r = Spine.Item(s.manifest.id_for_path, path, is_path=True)
|
||||
r = Spine.Item(lambda x:idref, path, is_path=True)
|
||||
r.is_linear = itemref.get('linear', 'yes') == 'yes'
|
||||
r.idref = idref
|
||||
s.append(r)
|
||||
@ -441,6 +445,8 @@ class OPF(object):
|
||||
guide_path = XPath('descendant::*[re:match(name(), "guide", "i")]/*[re:match(name(), "reference", "i")]')
|
||||
|
||||
title = MetadataField('title', formatter=lambda x: re.sub(r'\s+', ' ', x))
|
||||
title_sort = MetadataField('title_sort', formatter=lambda x:
|
||||
re.sub(r'\s+', ' ', x), is_dc=False)
|
||||
publisher = MetadataField('publisher')
|
||||
language = MetadataField('language')
|
||||
comments = MetadataField('description')
|
||||
@ -449,12 +455,14 @@ class OPF(object):
|
||||
series = MetadataField('series', is_dc=False)
|
||||
series_index = MetadataField('series_index', is_dc=False, formatter=float, none_is=1)
|
||||
rating = MetadataField('rating', is_dc=False, formatter=int)
|
||||
pubdate = MetadataField('date', formatter=parser.parse)
|
||||
pubdate = MetadataField('date', formatter=parse_date)
|
||||
publication_type = MetadataField('publication_type', is_dc=False)
|
||||
timestamp = MetadataField('timestamp', is_dc=False, formatter=parser.parse)
|
||||
timestamp = MetadataField('timestamp', is_dc=False,
|
||||
formatter=parse_date)
|
||||
|
||||
|
||||
def __init__(self, stream, basedir=os.getcwdu(), unquote_urls=True):
|
||||
def __init__(self, stream, basedir=os.getcwdu(), unquote_urls=True,
|
||||
populate_spine=True):
|
||||
if not hasattr(stream, 'read'):
|
||||
stream = open(stream, 'rb')
|
||||
raw = stream.read()
|
||||
@ -477,7 +485,7 @@ class OPF(object):
|
||||
self.manifest = Manifest.from_opf_manifest_element(m, basedir)
|
||||
self.spine = None
|
||||
s = self.spine_path(self.root)
|
||||
if s:
|
||||
if populate_spine and s:
|
||||
self.spine = Spine.from_opf_spine_element(s, self.manifest)
|
||||
self.guide = None
|
||||
guide = self.guide_path(self.root)
|
||||
@ -584,6 +592,15 @@ class OPF(object):
|
||||
if x.get('id', None) == idref:
|
||||
yield x.get('href', '')
|
||||
|
||||
def first_spine_item(self):
|
||||
items = self.iterspine()
|
||||
if not items:
|
||||
return None
|
||||
idref = items[0].get('idref', '')
|
||||
for x in self.itermanifest():
|
||||
if x.get('id', None) == idref:
|
||||
return x.get('href', None)
|
||||
|
||||
def create_spine_item(self, idref):
|
||||
ans = etree.Element('{%s}itemref'%self.NAMESPACES['opf'], idref=idref)
|
||||
ans.tail = '\n\t\t'
|
||||
@ -675,29 +692,6 @@ class OPF(object):
|
||||
|
||||
return property(fget=fget, fset=fset)
|
||||
|
||||
@dynamic_property
|
||||
def title_sort(self):
|
||||
|
||||
def fget(self):
|
||||
matches = self.title_path(self.metadata)
|
||||
if matches:
|
||||
for match in matches:
|
||||
ans = match.get('{%s}file-as'%self.NAMESPACES['opf'], None)
|
||||
if not ans:
|
||||
ans = match.get('file-as', None)
|
||||
if ans:
|
||||
return ans
|
||||
|
||||
def fset(self, val):
|
||||
matches = self.title_path(self.metadata)
|
||||
if matches:
|
||||
for key in matches[0].attrib:
|
||||
if key.endswith('file-as'):
|
||||
matches[0].attrib.pop(key)
|
||||
matches[0].set('file-as', unicode(val))
|
||||
|
||||
return property(fget=fget, fset=fset)
|
||||
|
||||
@dynamic_property
|
||||
def tags(self):
|
||||
|
||||
@ -869,7 +863,8 @@ class OPF(object):
|
||||
def smart_update(self, mi):
|
||||
for attr in ('title', 'authors', 'author_sort', 'title_sort',
|
||||
'publisher', 'series', 'series_index', 'rating',
|
||||
'isbn', 'language', 'tags', 'category', 'comments'):
|
||||
'isbn', 'language', 'tags', 'category', 'comments',
|
||||
'pubdate'):
|
||||
val = getattr(mi, attr, None)
|
||||
if val is not None and val != [] and val != (None, None):
|
||||
setattr(self, attr, val)
|
||||
@ -1041,12 +1036,12 @@ def metadata_to_opf(mi, as_string=True):
|
||||
elem.text = text.strip()
|
||||
metadata.append(elem)
|
||||
|
||||
factory(DC('title'), mi.title, mi.title_sort)
|
||||
factory(DC('title'), mi.title)
|
||||
for au in mi.authors:
|
||||
factory(DC('creator'), au, mi.author_sort, 'aut')
|
||||
factory(DC('contributor'), mi.book_producer, __appname__, 'bkp')
|
||||
if hasattr(mi.pubdate, 'isoformat'):
|
||||
factory(DC('date'), mi.pubdate.isoformat())
|
||||
factory(DC('date'), isoformat(mi.pubdate))
|
||||
factory(DC('language'), mi.language)
|
||||
if mi.category:
|
||||
factory(DC('type'), mi.category)
|
||||
@ -1069,9 +1064,11 @@ def metadata_to_opf(mi, as_string=True):
|
||||
if mi.rating is not None:
|
||||
meta('rating', str(mi.rating))
|
||||
if hasattr(mi.timestamp, 'isoformat'):
|
||||
meta('timestamp', mi.timestamp.isoformat())
|
||||
meta('timestamp', isoformat(mi.timestamp))
|
||||
if mi.publication_type:
|
||||
meta('publication_type', mi.publication_type)
|
||||
if mi.title_sort:
|
||||
meta('title_sort', mi.title_sort)
|
||||
|
||||
metadata[-1].tail = '\n' +(' '*4)
|
||||
|
||||
@ -1088,12 +1085,12 @@ def metadata_to_opf(mi, as_string=True):
|
||||
|
||||
|
||||
def test_m2o():
|
||||
from datetime import datetime
|
||||
from calibre.utils.date import now as nowf
|
||||
from cStringIO import StringIO
|
||||
mi = MetaInformation('test & title', ['a"1', "a'2"])
|
||||
mi.title_sort = 'a\'"b'
|
||||
mi.author_sort = 'author sort'
|
||||
mi.pubdate = datetime.now()
|
||||
mi.pubdate = nowf()
|
||||
mi.language = 'en'
|
||||
mi.category = 'test'
|
||||
mi.comments = 'what a fun book\n\n'
|
||||
@ -1103,7 +1100,7 @@ def test_m2o():
|
||||
mi.series = 's"c\'l&<>'
|
||||
mi.series_index = 3.34
|
||||
mi.rating = 3
|
||||
mi.timestamp = datetime.now()
|
||||
mi.timestamp = nowf()
|
||||
mi.publication_type = 'ooooo'
|
||||
mi.rights = 'yes'
|
||||
mi.cover = 'asd.jpg'
|
||||
|
@ -13,6 +13,9 @@ from calibre.ptempfile import PersistentTemporaryFile
|
||||
from calibre.libunrar import extract_member, names
|
||||
|
||||
def get_metadata(stream):
|
||||
from calibre.ebooks.metadata.archive import is_comic
|
||||
from calibre.ebooks.metadata.meta import get_metadata
|
||||
|
||||
path = getattr(stream, 'name', False)
|
||||
if not path:
|
||||
pt = PersistentTemporaryFile('_rar-meta.rar')
|
||||
@ -21,6 +24,8 @@ def get_metadata(stream):
|
||||
path = pt.name
|
||||
path = os.path.abspath(path)
|
||||
file_names = list(names(path))
|
||||
if is_comic(file_names):
|
||||
return get_metadata(stream, 'cbr')
|
||||
for f in file_names:
|
||||
stream_type = os.path.splitext(f)[1].lower()
|
||||
if stream_type:
|
||||
@ -29,7 +34,6 @@ def get_metadata(stream):
|
||||
'rb', 'imp', 'pdf', 'lrf'):
|
||||
data = extract_member(path, match=None, name=f)[1]
|
||||
stream = StringIO(data)
|
||||
from calibre.ebooks.metadata.meta import get_metadata
|
||||
return get_metadata(stream, stream_type)
|
||||
raise ValueError('No ebook found in RAR archive')
|
||||
|
||||
|
@ -8,15 +8,21 @@ from cStringIO import StringIO
|
||||
|
||||
|
||||
def get_metadata(stream):
|
||||
from calibre.ebooks.metadata.meta import get_metadata
|
||||
from calibre.ebooks.metadata.archive import is_comic
|
||||
stream_type = None
|
||||
zf = ZipFile(stream, 'r')
|
||||
for f in zf.namelist():
|
||||
names = zf.namelist()
|
||||
if is_comic(names):
|
||||
# Is probably a comic
|
||||
return get_metadata(stream, 'cbz')
|
||||
|
||||
for f in names:
|
||||
stream_type = os.path.splitext(f)[1].lower()
|
||||
if stream_type:
|
||||
stream_type = stream_type[1:]
|
||||
if stream_type in ('lit', 'opf', 'prc', 'mobi', 'fb2', 'epub',
|
||||
'rb', 'imp', 'pdf', 'lrf'):
|
||||
from calibre.ebooks.metadata.meta import get_metadata
|
||||
stream = StringIO(zf.read(f))
|
||||
return get_metadata(stream, stream_type)
|
||||
raise ValueError('No ebook found in ZIP archive')
|
||||
|
@ -4,13 +4,11 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
Read data from .mobi files
|
||||
'''
|
||||
|
||||
import datetime
|
||||
import functools
|
||||
import os
|
||||
import re
|
||||
import struct
|
||||
import textwrap
|
||||
|
||||
import cStringIO
|
||||
|
||||
try:
|
||||
@ -23,6 +21,7 @@ from lxml import html, etree
|
||||
|
||||
from calibre import entity_to_unicode, CurrentDir
|
||||
from calibre.utils.filenames import ascii_filename
|
||||
from calibre.utils.date import parse_date
|
||||
from calibre.ptempfile import TemporaryDirectory
|
||||
from calibre.ebooks import DRMError
|
||||
from calibre.ebooks.chardet import ENCODING_PATS
|
||||
@ -68,7 +67,10 @@ class EXTHHeader(object):
|
||||
pass
|
||||
elif id == 503: # Long title
|
||||
if not title or title == _('Unknown'):
|
||||
title = content
|
||||
try:
|
||||
title = content.decode(codec)
|
||||
except:
|
||||
pass
|
||||
#else:
|
||||
# print 'unknown record', id, repr(content)
|
||||
if title:
|
||||
@ -96,8 +98,7 @@ class EXTHHeader(object):
|
||||
self.mi.tags = list(set(self.mi.tags))
|
||||
elif id == 106:
|
||||
try:
|
||||
self.mi.publish_date = datetime.datetime.strptime(
|
||||
content, '%Y-%m-%d', ).date()
|
||||
self.mi.pubdate = parse_date(content, as_utc=False)
|
||||
except:
|
||||
pass
|
||||
elif id == 108:
|
||||
|
@ -310,6 +310,7 @@ class Serializer(object):
|
||||
text = text.replace('&', '&')
|
||||
text = text.replace('<', '<')
|
||||
text = text.replace('>', '>')
|
||||
text = text.replace(u'\u00AD', '') # Soft-hyphen
|
||||
if quot:
|
||||
text = text.replace('"', '"')
|
||||
self.buffer.write(encode(text))
|
||||
@ -610,12 +611,21 @@ class MobiWriter(object):
|
||||
if (i>firstSequentialNode) and self._ctoc_map[i-1]['klass'] != 'section':
|
||||
if offset != previousOffset + previousLength :
|
||||
self._oeb.log.warning("*** TOC discontinuity: nodes are not sequential ***")
|
||||
self._oeb.log.warning(" node %03d: '%s' offset: 0x%X length: 0x%X" % \
|
||||
self._oeb.log.info(" node %03d: '%s' offset: 0x%X length: 0x%X" % \
|
||||
(i-1, entries[i-1].title, previousOffset, previousLength) )
|
||||
self._oeb.log.warning(" node %03d: '%s' offset: 0x%X != 0x%06X" % \
|
||||
(i, child.title, offset, previousOffset + previousLength) )
|
||||
self._oeb.log.warning("\tnode data %03d: %s" % (i-1, self._ctoc_map[i-1]) )
|
||||
self._oeb.log.warning("\tnode data %03d: %s" % (i, self._ctoc_map[i]) )
|
||||
# self._oeb.log.warning("\tnode data %03d: %s" % (i-1, self._ctoc_map[i-1]) )
|
||||
# self._oeb.log.warning("\tnode data %03d: %s" % (i, self._ctoc_map[i]) )
|
||||
# Dump the offending entry
|
||||
self._oeb.log.info("...")
|
||||
for z in range(i-6 if i-6 > 0 else 0, i+6 if i+6 < len(entries) else len(entries)):
|
||||
if z == i:
|
||||
self._oeb.log.warning("child %03d: %s" % (z, entries[z]))
|
||||
else:
|
||||
self._oeb.log.info("child %03d: %s" % (z, entries[z]))
|
||||
self._oeb.log.info("...")
|
||||
|
||||
self._oeb.log.warning('_generate_indexed_navpoints: Failed to generate index')
|
||||
# Zero out self._HTMLRecords, return False
|
||||
self._HTMLRecords = []
|
||||
|
@ -16,9 +16,10 @@ from urllib import unquote as urlunquote
|
||||
from urlparse import urljoin
|
||||
|
||||
from lxml import etree, html
|
||||
from cssutils import CSSParser
|
||||
|
||||
import calibre
|
||||
from cssutils import CSSParser
|
||||
from calibre.constants import filesystem_encoding
|
||||
from calibre.translations.dynamic import translate
|
||||
from calibre.ebooks.chardet import xml_to_unicode
|
||||
from calibre.ebooks.oeb.entitydefs import ENTITYDEFS
|
||||
@ -434,10 +435,18 @@ class DirContainer(object):
|
||||
|
||||
def namelist(self):
|
||||
names = []
|
||||
for root, dirs, files in os.walk(self.rootdir):
|
||||
base = self.rootdir
|
||||
if isinstance(base, unicode):
|
||||
base = base.encode(filesystem_encoding)
|
||||
for root, dirs, files in os.walk(base):
|
||||
for fname in files:
|
||||
fname = os.path.join(root, fname)
|
||||
fname = fname.replace('\\', '/')
|
||||
if not isinstance(fname, unicode):
|
||||
try:
|
||||
fname = fname.decode(filesystem_encoding)
|
||||
except:
|
||||
continue
|
||||
names.append(fname)
|
||||
return names
|
||||
|
||||
@ -842,8 +851,10 @@ class Manifest(object):
|
||||
self.oeb.log.warn('File %r appears to be a HTML fragment'%self.href)
|
||||
nroot = etree.fromstring('<html><body/></html>')
|
||||
parent = nroot[0]
|
||||
for child in list(data):
|
||||
child.getparent().remove(child)
|
||||
for child in list(data.iter()):
|
||||
oparent = child.getparent()
|
||||
if oparent is not None:
|
||||
oparent.remove(child)
|
||||
parent.append(child)
|
||||
data = nroot
|
||||
|
||||
@ -1567,14 +1578,17 @@ class TOC(object):
|
||||
parent = etree.Element(NCX('navMap'))
|
||||
for node in self.nodes:
|
||||
id = node.id or unicode(uuid.uuid4())
|
||||
attrib = {'id': id, 'playOrder': str(node.play_order)}
|
||||
po = node.play_order
|
||||
if po == 0:
|
||||
po = 1
|
||||
attrib = {'id': id, 'playOrder': str(po)}
|
||||
if node.klass:
|
||||
attrib['class'] = node.klass
|
||||
point = element(parent, NCX('navPoint'), attrib=attrib)
|
||||
label = etree.SubElement(point, NCX('navLabel'))
|
||||
title = node.title
|
||||
if title:
|
||||
title = re.sub(r'\s', ' ', title)
|
||||
title = re.sub(r'\s+', ' ', title)
|
||||
element(label, NCX('text')).text = title
|
||||
element(point, NCX('content'), src=urlunquote(node.href))
|
||||
node.to_ncx(point)
|
||||
|
@ -120,7 +120,10 @@ class EbookIterator(object):
|
||||
bad_map = {}
|
||||
font_family_pat = re.compile(r'font-family\s*:\s*([^;]+)')
|
||||
for csspath in css_files:
|
||||
css = open(csspath, 'rb').read().decode('utf-8', 'replace')
|
||||
try:
|
||||
css = open(csspath, 'rb').read().decode('utf-8', 'replace')
|
||||
except:
|
||||
continue
|
||||
for match in re.compile(r'@font-face\s*{([^}]+)}').finditer(css):
|
||||
block = match.group(1)
|
||||
family = font_family_pat.search(block)
|
||||
@ -181,8 +184,9 @@ class EbookIterator(object):
|
||||
if hasattr(self.pathtoopf, 'manifest'):
|
||||
self.pathtoopf = write_oebbook(self.pathtoopf, self.base)
|
||||
|
||||
|
||||
self.opf = OPF(self.pathtoopf, os.path.dirname(self.pathtoopf))
|
||||
self.opf = getattr(plumber.input_plugin, 'optimize_opf_parsing', None)
|
||||
if self.opf is None:
|
||||
self.opf = OPF(self.pathtoopf, os.path.dirname(self.pathtoopf))
|
||||
self.language = self.opf.language
|
||||
if self.language:
|
||||
self.language = self.language.lower()
|
||||
|
@ -7,7 +7,7 @@ __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import os
|
||||
from datetime import datetime
|
||||
from calibre.utils.date import isoformat, now
|
||||
|
||||
def meta_info_to_oeb_metadata(mi, m, log):
|
||||
from calibre.ebooks.oeb.base import OPF
|
||||
@ -60,10 +60,10 @@ def meta_info_to_oeb_metadata(mi, m, log):
|
||||
m.add('subject', t)
|
||||
if mi.pubdate is not None:
|
||||
m.clear('date')
|
||||
m.add('date', mi.pubdate.isoformat())
|
||||
m.add('date', isoformat(mi.pubdate))
|
||||
if mi.timestamp is not None:
|
||||
m.clear('timestamp')
|
||||
m.add('timestamp', mi.timestamp.isoformat())
|
||||
m.add('timestamp', isoformat(mi.timestamp))
|
||||
if mi.rights is not None:
|
||||
m.clear('rights')
|
||||
m.add('rights', mi.rights)
|
||||
@ -71,7 +71,7 @@ def meta_info_to_oeb_metadata(mi, m, log):
|
||||
m.clear('publication_type')
|
||||
m.add('publication_type', mi.publication_type)
|
||||
if not m.timestamp:
|
||||
m.add('timestamp', datetime.now().isoformat())
|
||||
m.add('timestamp', isoformat(now()))
|
||||
|
||||
|
||||
class MergeMetadata(object):
|
||||
|
@ -35,7 +35,10 @@ class RescaleImages(object):
|
||||
if not raw: continue
|
||||
if qt:
|
||||
img = QImage(10, 10, QImage.Format_ARGB32_Premultiplied)
|
||||
if not img.loadFromData(raw): continue
|
||||
try:
|
||||
if not img.loadFromData(raw): continue
|
||||
except:
|
||||
continue
|
||||
width, height = img.width(), img.height()
|
||||
else:
|
||||
f = cStringIO.StringIO(raw)
|
||||
|
@ -42,9 +42,9 @@ class Writer(FormatWriter):
|
||||
pml = unicode(pmlmlizer.extract_content(oeb_book, self.opts)).encode('cp1252', 'replace')
|
||||
|
||||
text, text_sizes = self._text(pml)
|
||||
chapter_index = self._index_item(r'(?s)\\C(?P<val>[0-4)="(?P<text>.+?)"', pml)
|
||||
chapter_index += self.index_item(r'(?s)\\X(?P<val>[0-4])(?P<text>.+?)\\X[0-4]', pml)
|
||||
chapter_index += self.index_item(r'(?s)\\x(?P<text>.+?)\\x', pml)
|
||||
chapter_index = self._index_item(r'(?s)\\C(?P<val>[0-4])="(?P<text>.+?)"', pml)
|
||||
chapter_index += self._index_item(r'(?s)\\X(?P<val>[0-4])(?P<text>.+?)\\X[0-4]', pml)
|
||||
chapter_index += self._index_item(r'(?s)\\x(?P<text>.+?)\\x', pml)
|
||||
link_index = self._index_item(r'(?s)\\Q="(?P<text>.+?)"', pml)
|
||||
images = self._images(oeb_book.manifest, pmlmlizer.image_hrefs)
|
||||
metadata = [self._metadata(metadata)]
|
||||
|
@ -169,6 +169,8 @@ int main(int argc, char **argv) {
|
||||
char *memblock;
|
||||
ifstream::pos_type size;
|
||||
int ret = 0;
|
||||
map<string,string> info;
|
||||
Reflow *reflow = NULL;
|
||||
|
||||
|
||||
if (argc != 2) {
|
||||
@ -189,9 +191,13 @@ int main(int argc, char **argv) {
|
||||
}
|
||||
|
||||
try {
|
||||
Reflow reflow(memblock, size);
|
||||
reflow.render();
|
||||
vector<char> *data = reflow.render_first_page();
|
||||
reflow = new Reflow(memblock, size);
|
||||
info = reflow->get_info();
|
||||
for (map<string,string>::const_iterator it = info.begin() ; it != info.end(); it++ ) {
|
||||
cout << (*it).first << " : " << (*it).second << endl;
|
||||
}
|
||||
//reflow->render();
|
||||
vector<char> *data = reflow->render_first_page();
|
||||
ofstream file("cover.png", ios::binary);
|
||||
file.write(&((*data)[0]), data->size());
|
||||
delete data;
|
||||
@ -200,7 +206,7 @@ int main(int argc, char **argv) {
|
||||
cerr << e.what() << endl;
|
||||
ret = 1;
|
||||
}
|
||||
|
||||
delete reflow;
|
||||
delete[] memblock;
|
||||
return ret;
|
||||
}
|
||||
|
@ -6,7 +6,7 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import sys
|
||||
import sys, os
|
||||
|
||||
from lxml import etree
|
||||
|
||||
@ -47,6 +47,10 @@ class Image(Element):
|
||||
return '<img src="%s" width="%dpx" height="%dpx"/>' % \
|
||||
(self.src, int(self.width), int(self.height))
|
||||
|
||||
def dump(self, f):
|
||||
f.write(self.to_html())
|
||||
f.write('\n')
|
||||
|
||||
|
||||
class Text(Element):
|
||||
|
||||
@ -91,6 +95,10 @@ class Text(Element):
|
||||
def to_html(self):
|
||||
return self.raw
|
||||
|
||||
def dump(self, f):
|
||||
f.write(self.to_html().encode('utf-8'))
|
||||
f.write('\n')
|
||||
|
||||
class FontSizeStats(dict):
|
||||
|
||||
def __init__(self, stats):
|
||||
@ -143,6 +151,14 @@ class Column(object):
|
||||
def add(self, elem):
|
||||
if elem in self.elements: return
|
||||
self.elements.append(elem)
|
||||
self._post_add()
|
||||
|
||||
def prepend(self, elem):
|
||||
if elem in self.elements: return
|
||||
self.elements.insert(0, elem)
|
||||
self._post_add()
|
||||
|
||||
def _post_add(self):
|
||||
self.elements.sort(cmp=lambda x,y:cmp(x.bottom,y.bottom))
|
||||
self.top = self.elements[0].top
|
||||
self.bottom = self.elements[-1].bottom
|
||||
@ -183,6 +199,11 @@ class Column(object):
|
||||
return None
|
||||
return self.elements[idx-1]
|
||||
|
||||
def dump(self, f, num):
|
||||
f.write('******** Column %d\n\n'%num)
|
||||
for elem in self.elements:
|
||||
elem.dump(f)
|
||||
|
||||
|
||||
class Box(list):
|
||||
|
||||
@ -262,7 +283,6 @@ class Region(object):
|
||||
max_lines = max(max_lines, len(c))
|
||||
return max_lines
|
||||
|
||||
|
||||
@property
|
||||
def is_small(self):
|
||||
return self.line_count < 3
|
||||
@ -283,7 +303,6 @@ class Region(object):
|
||||
mc = self.columns[0]
|
||||
return mc
|
||||
|
||||
print
|
||||
for c in singleton.columns:
|
||||
for elem in c:
|
||||
col = most_suitable_column(elem)
|
||||
@ -304,6 +323,51 @@ class Region(object):
|
||||
for x in self.columns:
|
||||
yield x
|
||||
|
||||
def absorb_regions(self, regions, at):
|
||||
for region in regions:
|
||||
self.absorb_region(region, at)
|
||||
|
||||
def absorb_region(self, region, at):
|
||||
if len(region.columns) <= len(self.columns):
|
||||
for i in range(len(region.columns)):
|
||||
src, dest = region.columns[i], self.columns[i]
|
||||
if at != 'bottom':
|
||||
src = reversed(list(iter(src)))
|
||||
for elem in src:
|
||||
func = dest.add if at == 'bottom' else dest.prepend
|
||||
func(elem)
|
||||
|
||||
else:
|
||||
col_map = {}
|
||||
for i, col in enumerate(region.columns):
|
||||
max_overlap, max_overlap_index = 0, 0
|
||||
for j, dcol in enumerate(self.columns):
|
||||
sint = Interval(col.left, col.right)
|
||||
dint = Interval(dcol.left, dcol.right)
|
||||
width = sint.intersection(dint).width
|
||||
if width > max_overlap:
|
||||
max_overlap = width
|
||||
max_overlap_index = j
|
||||
col_map[i] = max_overlap_index
|
||||
lines = max(map(len, region.columns))
|
||||
if at == 'bottom':
|
||||
lines = range(lines)
|
||||
else:
|
||||
lines = range(lines-1, -1, -1)
|
||||
for i in lines:
|
||||
for j, src in enumerate(region.columns):
|
||||
dest = self.columns[col_map[j]]
|
||||
if i < len(src):
|
||||
func = dest.add if at == 'bottom' else dest.prepend
|
||||
func(src.elements[i])
|
||||
|
||||
def dump(self, f):
|
||||
f.write('############################################################\n')
|
||||
f.write('########## Region (%d columns) ###############\n'%len(self.columns))
|
||||
f.write('############################################################\n\n')
|
||||
for i, col in enumerate(self.columns):
|
||||
col.dump(f, i)
|
||||
|
||||
def linearize(self):
|
||||
self.elements = []
|
||||
for x in self.columns:
|
||||
@ -376,7 +440,8 @@ class Page(object):
|
||||
self.font_size_stats[t.font_size] = 0
|
||||
self.font_size_stats[t.font_size] += len(t.text_as_string)
|
||||
self.average_text_height += t.height
|
||||
self.average_text_height /= len(self.texts)
|
||||
if len(self.texts):
|
||||
self.average_text_height /= len(self.texts)
|
||||
|
||||
self.font_size_stats = FontSizeStats(self.font_size_stats)
|
||||
|
||||
@ -431,31 +496,78 @@ class Page(object):
|
||||
if not current_region.is_empty:
|
||||
self.regions.append(current_region)
|
||||
|
||||
if self.opts.verbose > 2:
|
||||
self.debug_dir = 'page-%d'%self.number
|
||||
os.mkdir(self.debug_dir)
|
||||
self.dump_regions('pre-coalesce')
|
||||
|
||||
self.coalesce_regions()
|
||||
self.dump_regions('post-coalesce')
|
||||
|
||||
def dump_regions(self, fname):
|
||||
fname = 'regions-'+fname+'.txt'
|
||||
with open(os.path.join(self.debug_dir, fname), 'wb') as f:
|
||||
f.write('Page #%d\n\n'%self.number)
|
||||
for region in self.regions:
|
||||
region.dump(f)
|
||||
|
||||
def coalesce_regions(self):
|
||||
# find contiguous sets of small regions
|
||||
# absorb into a neighboring region (prefer the one with number of cols
|
||||
# closer to the avg number of cols in the set, if equal use larger
|
||||
# region)
|
||||
# merge contiguous regions that can contain each other
|
||||
absorbed = set([])
|
||||
found = True
|
||||
absorbed = set([])
|
||||
processed = set([])
|
||||
while found:
|
||||
found = False
|
||||
for i, region in enumerate(self.regions):
|
||||
if region.is_small:
|
||||
if region in absorbed:
|
||||
continue
|
||||
if region.is_small and region not in processed:
|
||||
found = True
|
||||
regions = []
|
||||
processed.add(region)
|
||||
regions = [region]
|
||||
end = i+1
|
||||
for j in range(i+1, len(self.regions)):
|
||||
end = j
|
||||
if self.regions[j].is_small:
|
||||
regions.append(self.regions[j])
|
||||
else:
|
||||
break
|
||||
prev = None if i == 0 else i-1
|
||||
next = j if self.regions[j] not in regions else None
|
||||
|
||||
|
||||
prev_region = None if i == 0 else i-1
|
||||
next_region = end if end < len(self.regions) and self.regions[end] not in regions else None
|
||||
absorb_at = 'bottom'
|
||||
if prev_region is None and next_region is not None:
|
||||
absorb_into = next_region
|
||||
absorb_at = 'top'
|
||||
elif next_region is None and prev_region is not None:
|
||||
absorb_into = prev_region
|
||||
elif prev_region is None and next_region is None:
|
||||
if len(regions) > 1:
|
||||
absorb_into = i
|
||||
regions = regions[1:]
|
||||
else:
|
||||
absorb_into = None
|
||||
else:
|
||||
absorb_into = prev_region
|
||||
if self.regions[next_region].line_count >= \
|
||||
self.regions[prev_region].line_count:
|
||||
avg_column_count = sum([len(r.columns) for r in
|
||||
regions])/float(len(regions))
|
||||
if self.regions[next_region].line_count > \
|
||||
self.regions[prev_region].line_count \
|
||||
or abs(avg_column_count -
|
||||
len(self.regions[prev_region].columns)) \
|
||||
> abs(avg_column_count -
|
||||
len(self.regions[next_region].columns)):
|
||||
absorb_into = next_region
|
||||
absorb_at = 'top'
|
||||
if absorb_into is not None:
|
||||
self.regions[absorb_into].absorb_regions(regions, absorb_at)
|
||||
absorbed.update(regions)
|
||||
for region in absorbed:
|
||||
self.regions.remove(region)
|
||||
|
||||
def sort_into_columns(self, elem, neighbors):
|
||||
neighbors.add(elem)
|
||||
@ -575,8 +687,9 @@ class PDFDocument(object):
|
||||
for elem in self.elements:
|
||||
html.extend(elem.to_html())
|
||||
html += ['</body>', '</html>']
|
||||
raw = (u'\n'.join(html)).replace('</strong><strong>', '')
|
||||
with open('index.html', 'wb') as f:
|
||||
f.write((u'\n'.join(html)).encode('utf-8'))
|
||||
f.write(raw.encode('utf-8'))
|
||||
|
||||
|
||||
|
||||
|
@ -182,10 +182,10 @@ class PML_HTMLizer(object):
|
||||
return pml
|
||||
|
||||
def strip_pml(self, pml):
|
||||
pml = re.sub(r'\\C\d=".+*"', '', pml)
|
||||
pml = re.sub(r'\\Fn=".+*"', '', pml)
|
||||
pml = re.sub(r'\\Sd=".+*"', '', pml)
|
||||
pml = re.sub(r'\\.=".+*"', '', pml)
|
||||
pml = re.sub(r'\\C\d=".*"', '', pml)
|
||||
pml = re.sub(r'\\Fn=".*"', '', pml)
|
||||
pml = re.sub(r'\\Sd=".*"', '', pml)
|
||||
pml = re.sub(r'\\.=".*"', '', pml)
|
||||
pml = re.sub(r'\\X\d', '', pml)
|
||||
pml = re.sub(r'\\S[pbd]', '', pml)
|
||||
pml = re.sub(r'\\Fn', '', pml)
|
||||
|
@ -27,7 +27,7 @@ from calibre.ebooks.rtf2xml import headings_to_sections, \
|
||||
paragraph_def, convert_to_tags, output, copy, \
|
||||
list_numbers, info, pict, table_info, fonts, paragraphs, \
|
||||
body_styles, preamble_rest, group_styles, \
|
||||
inline, correct_unicode
|
||||
inline
|
||||
from calibre.ebooks.rtf2xml.old_rtf import OldRtf
|
||||
|
||||
"""
|
||||
@ -256,15 +256,6 @@ class ParseRtf:
|
||||
)
|
||||
pict_obj.process_pict()
|
||||
self.__bracket_match('pict_data_info')
|
||||
correct_uni_obj = correct_unicode.CorrectUnicode(
|
||||
in_file = self.__temp_file,
|
||||
bug_handler = RtfInvalidCodeException,
|
||||
copy = self.__copy,
|
||||
run_level = self.__run_level,
|
||||
exception_handler = InvalidRtfException,
|
||||
)
|
||||
correct_uni_obj.correct_unicode()
|
||||
self.__bracket_match('correct_unicode_info')
|
||||
combine_obj = combine_borders.CombineBorders(
|
||||
in_file = self.__temp_file,
|
||||
bug_handler = RtfInvalidCodeException,
|
||||
|
@ -1,94 +0,0 @@
|
||||
#########################################################################
|
||||
# #
|
||||
# #
|
||||
# copyright 2002 Paul Henry Tremblay #
|
||||
# #
|
||||
# This program is distributed in the hope that it will be useful, #
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU #
|
||||
# General Public License for more details. #
|
||||
# #
|
||||
# You should have received a copy of the GNU General Public License #
|
||||
# along with this program; if not, write to the Free Software #
|
||||
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA #
|
||||
# 02111-1307 USA #
|
||||
# #
|
||||
# #
|
||||
#########################################################################
|
||||
import os, re, tempfile
|
||||
from calibre.ebooks.rtf2xml import copy
|
||||
class CorrectUnicode:
|
||||
"""
|
||||
corrects sequences such as \u201c\'F0\'BE
|
||||
Where \'F0\'BE has to be eliminated.
|
||||
"""
|
||||
def __init__(self,
|
||||
in_file,
|
||||
exception_handler,
|
||||
bug_handler,
|
||||
copy = None,
|
||||
run_level = 1,
|
||||
):
|
||||
self.__file = in_file
|
||||
self.__bug_handler = bug_handler
|
||||
self.__copy = copy
|
||||
self.__run_level = run_level
|
||||
self.__write_to = tempfile.mktemp()
|
||||
self.__exception_handler = exception_handler
|
||||
self.__bug_handler = bug_handler
|
||||
self.__state = 'outside'
|
||||
self.__utf_exp = re.compile(r'&#x(.*?);')
|
||||
def __process_token(self, line):
|
||||
if self.__state == 'outside':
|
||||
if line[:5] == 'tx<ut':
|
||||
self.__handle_unicode(line)
|
||||
else:
|
||||
self.__write_obj.write(line)
|
||||
elif self.__state == 'after':
|
||||
if line[:5] == 'tx<hx':
|
||||
pass
|
||||
elif line[:5] == 'tx<ut':
|
||||
self.__handle_unicode(line)
|
||||
else:
|
||||
self.__state = 'outside'
|
||||
self.__write_obj.write(line)
|
||||
else:
|
||||
raise 'should\'t happen'
|
||||
def __handle_unicode(self, line):
|
||||
token = line[16:]
|
||||
match_obj = re.search(self.__utf_exp, token)
|
||||
if match_obj:
|
||||
uni_char = match_obj.group(1)
|
||||
dec_num = int(uni_char, 16)
|
||||
if dec_num > 57343 and dec_num < 63743:
|
||||
self.__state = 'outside'
|
||||
else:
|
||||
self.__write_obj.write(line)
|
||||
self.__state = 'after'
|
||||
else:
|
||||
self.__write_obj.write(line)
|
||||
self.__state = 'outside'
|
||||
def correct_unicode(self):
|
||||
"""
|
||||
Requires:
|
||||
nothing
|
||||
Returns:
|
||||
nothing (changes the original file)
|
||||
Logic:
|
||||
Read one line in at a time.
|
||||
"""
|
||||
read_obj = open(self.__file, 'r')
|
||||
self.__write_obj = open(self.__write_to, 'w')
|
||||
line_to_read = 1
|
||||
while line_to_read:
|
||||
line_to_read = read_obj.readline()
|
||||
line = line_to_read
|
||||
self.__token_info = line[:16]
|
||||
self.__process_token(line)
|
||||
read_obj.close()
|
||||
self.__write_obj.close()
|
||||
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
|
||||
if self.__copy:
|
||||
copy_obj.copy_file(self.__write_to, "correct_unicode.data")
|
||||
copy_obj.rename(self.__write_to, self.__file)
|
||||
os.remove(self.__write_to)
|