mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Sync to trunk.
This commit is contained in:
commit
07dab2d5ae
217
Changelog.yaml
217
Changelog.yaml
@ -4,6 +4,223 @@
|
||||
# for important features/bug fixes.
|
||||
# Also, each release can have new and improved recipes.
|
||||
|
||||
- version: 0.6.42
|
||||
date: 2010-02-20
|
||||
|
||||
bug fixes:
|
||||
- title: "Fix regression that broke catalog generation from the Graphical User Interface in 0.6.41"
|
||||
|
||||
- title: "Fix right edge of comics like Dilbert and xkcd getting cut off on the SONY reader. More generally, take page margins into account when rescaling images to fit in the selected output profile."
|
||||
|
||||
|
||||
- version: 0.6.41
|
||||
date: 2010-02-19
|
||||
|
||||
new features:
|
||||
- title: "Make calibre timezone aware. This required lots of internal changes, so I may have broken something"
|
||||
type: major
|
||||
|
||||
- title: "Allow editing of metadata in DRMed MOBI files"
|
||||
type: major
|
||||
|
||||
- title: "ebook-convert: Allow passing URLs as argument to --cover"
|
||||
tickets: [4909]
|
||||
|
||||
- title: "OS X/linux driver for EB511"
|
||||
|
||||
- title: "ebook-meta: Allow changing of published date"
|
||||
|
||||
- title: "Make replacing of files in ZIP archives faster and (hopefully) more robust"
|
||||
|
||||
- title: "Speed optimization for viewing large EPUB files"
|
||||
|
||||
- title: "Speed up parsing of OPF files"
|
||||
tickets: [4908]
|
||||
|
||||
bug fixes:
|
||||
- title: "Fix drag and drop of multiple books to OS X dock icon"
|
||||
tickets: [4849]
|
||||
|
||||
- title: "MOBI Output: Encode titles as UTF-8 in the PalmDoc header as well as the EXTH header, since there are apparently MOBI readers that use the title from the PalmDoc header in preference to the title from the EXTH header."
|
||||
|
||||
- title: "MOBI Output: Remove soft hyphens as the Kindle doesn't support them."
|
||||
tickets: [4887]
|
||||
|
||||
- title: "Fix Boox main mem and SD card swapped on windows"
|
||||
|
||||
- title: "Fix sending large ebook fiels to devices"
|
||||
tickets: [4896]
|
||||
|
||||
- title: "EPUB Output: Strip invalid anchors from NCX TOC as Adobe Digital Editions cries when it sees one"
|
||||
tickets: [4907]
|
||||
|
||||
- title: "EPUB metadata: Don't set title_sort as a file_as attribute, as the brain-dead OPF spec doesn't allow this"
|
||||
|
||||
- title: "Make publishing the content server via mDNS a little more robust"
|
||||
|
||||
- title: "Content server: Use new exact matching for greater precision when generating OPDS catalogs. Also fix regression that broke rowsing by Tags on Stanza."
|
||||
|
||||
- title: "Proper fix for breakage in LRF viewer caused by API change in QGraphicsItem in Qt 4.6"
|
||||
|
||||
new recipes:
|
||||
- title: Various Polish news sources
|
||||
author: Tomaz Dlugosz
|
||||
|
||||
- title: Que Leer, Wired UK
|
||||
author: Darko Miletic
|
||||
|
||||
- title: Kathermini and Ta Nea
|
||||
author: Pan
|
||||
|
||||
- title: Winter Olympics
|
||||
author: Starson17
|
||||
|
||||
improved recipes:
|
||||
- Wired Magazine
|
||||
|
||||
- version: 0.6.40
|
||||
date: 2010-02-12
|
||||
|
||||
new features:
|
||||
- title: "Ability to perform exact match and regular expression based searches."
|
||||
type: major
|
||||
tickets: [4830]
|
||||
description: >
|
||||
"You can now perform exact match searches by prefixing your search term with an =.
|
||||
So for example, tag:=fiction will match all tags named fiction, but not tags named
|
||||
non-fiction. Similarly, you can use regular expression based searches by prefixing
|
||||
the search term by ~."
|
||||
|
||||
- title: "Autodetect if a zip/rar file is actually a comic and if so, import it as CBZ/CBR"
|
||||
tickets: [4753]
|
||||
|
||||
- title: "Add plugin to automatically extract an ebook during import if it is in a zip/rar archive"
|
||||
|
||||
- title: "Linux source install: Install a calibre environment module to ease the integration of calibre into other python projects"
|
||||
|
||||
bug fixes:
|
||||
- title: "Fix regression in 0.6.39 that broke the LRF viewer"
|
||||
|
||||
- title: "ZIP/EPUB files: Try to detect file name encoding instead of assuming the name is encoded in UTF-8. Also correctly
|
||||
encode the extracted file name in the local filesystem encoding."
|
||||
|
||||
- title: "HTML Input: Handle HTML fragments more gracefully"
|
||||
tickets: [4854]
|
||||
|
||||
- title: "Zip files: Workaround invalid zip files that contain end-of-file comments but set comment size to zero"
|
||||
|
||||
- title: "Restore the recipe for the Wired daily feed."
|
||||
tickets: [4871]
|
||||
|
||||
- title: "MOBI metadata: Preserve original EXTH records when not overwrriten by calibre metadata."
|
||||
|
||||
- title: "Catalog generation: Improved series sorting. All books not in a series are now grouped together"
|
||||
|
||||
- title: "Fix occassional threading related crash when using the ChooseFormatDialog"
|
||||
|
||||
- title: "Catalog generation: Various fixes for handling invalid data"
|
||||
|
||||
new recipes:
|
||||
- title: Sueddeustche Zeitung
|
||||
author: Darko Miletic
|
||||
|
||||
improved recipes:
|
||||
- Pagina 12
|
||||
- Variety
|
||||
- Toronto Sun
|
||||
- Telegraph UK
|
||||
- Danas
|
||||
- Dilbert
|
||||
|
||||
- version: 0.6.39
|
||||
date: 2010-02-09
|
||||
|
||||
new features:
|
||||
- title: "Add ability to control how author sort strings are automatically generated from author strings, via the config file tweaks.py"
|
||||
|
||||
- title: "Handle broken EPUB files from Project Gutenberg that have invalid OCF containers"
|
||||
tickets: [4832]
|
||||
|
||||
bug fixes:
|
||||
- title: "Fix regression in 0.6.38 that broke setting bookmarks in the viewer"
|
||||
|
||||
- title: "HTML Input: Ignore filenames that are encoded incorerctly."
|
||||
|
||||
new recipes:
|
||||
|
||||
- title: Radikal
|
||||
author: Darko Miletic
|
||||
|
||||
|
||||
- version: 0.6.38
|
||||
date: 2010-02-09
|
||||
|
||||
new features:
|
||||
- title: "Driver for the Irex DR 800"
|
||||
|
||||
- title: "Driver for the Booq e-book reader"
|
||||
|
||||
- title: "Allow automatic series increment algorithm to be tweaked by editing the config file tweaks.py"
|
||||
|
||||
- title: "Various improvements to the catlog generation. Larger thumbnails in EPUB output and better series sorting. Better handling of html markup in the comments."
|
||||
|
||||
- title: "MOBI Output: Make font used for generated masthead images user customizable."
|
||||
|
||||
bug fixes:
|
||||
- title: "E-book viewer: Make bookmarking (and remebering last open position more robust). For linuxsource installs, you must have Qt 4.6"
|
||||
tickets: [4812]
|
||||
|
||||
- title: "Fix conversion/import of HTML files with very long href links on windows"
|
||||
tickets: [4783]
|
||||
|
||||
- title: "Don't read metadata from filenames for download news, even if the user has the read metadata from filename option set"
|
||||
tickets: [4758]
|
||||
|
||||
- title: "Don't allow leading or trailing space in tags and series. Also normalize all internal spaces to a single space"
|
||||
tickets: [4809]
|
||||
|
||||
- title: "E-book viewer: Toolbars remember their position"
|
||||
tickets: [4811]
|
||||
|
||||
- title: "Fix year being repeated when editing date in main library screen on windows"
|
||||
tickets: [4829]
|
||||
|
||||
- title: "New download: Fix downloading of images from URLs with an ampersand in them"
|
||||
|
||||
- title: "Linux source install: unbundle cssutils, it is now an external dependancy"
|
||||
|
||||
- title: "MOBI metadata: Fix regression that broke setting of titles in some MOBI files"
|
||||
|
||||
- title: "EPUB metadata: Extract the cover image from the html it is embededd in if possible, instead of rendering the html. Removes the white margins on covers and speeds up cover extraction"
|
||||
|
||||
- title: "Fix regression in PDB output"
|
||||
|
||||
- title: "News download: Remove <base> tags automatically"
|
||||
|
||||
- title: "Searching on device: Ignore unicode errors"
|
||||
|
||||
|
||||
new recipes:
|
||||
- title: Courier Press
|
||||
author: Krittika Goyal
|
||||
|
||||
- title: zive.sk and iliterature.cz
|
||||
author: Abelturd
|
||||
|
||||
- title: El Comerico, Digital Spy UK, Gizmodo, News Straits Times, Read It Later, TidBits
|
||||
author: Darko Miletic
|
||||
|
||||
improved recipes:
|
||||
- Jerusalem Post
|
||||
- Clarin
|
||||
- La Nacion
|
||||
- Harvard Business Review
|
||||
- People US Mashup
|
||||
- The New Republic
|
||||
- "Pagina 12"
|
||||
- Discover Magazine
|
||||
- Metro Montreal
|
||||
|
||||
- version: 0.6.37
|
||||
date: 2010-02-01
|
||||
|
||||
|
@ -79,3 +79,9 @@ p.unread_book {
|
||||
text-indent:-2em;
|
||||
}
|
||||
|
||||
hr.series_divider {
|
||||
width:50%;
|
||||
margin-left:1em;
|
||||
margin-top:0em;
|
||||
margin-bottom:0em;
|
||||
}
|
||||
|
27
resources/default_tweaks.py
Normal file
27
resources/default_tweaks.py
Normal file
@ -0,0 +1,27 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
'''
|
||||
Contains various tweaks that affect calibre behavior. Only edit this file if
|
||||
you know what you are dong. If you delete this file, it will be recreated from
|
||||
defaults.
|
||||
'''
|
||||
|
||||
|
||||
# The algorithm used to assign a new book in an existing series a series number.
|
||||
# Possible values are:
|
||||
# next - Next available number
|
||||
# const - Assign the number 1 always
|
||||
series_index_auto_increment = 'next'
|
||||
|
||||
|
||||
|
||||
# The algorithm used to copy author to author_sort
|
||||
# Possible values are:
|
||||
# invert: use "fn ln" -> "ln, fn" (the original algorithm)
|
||||
# copy : copy author to author_sort without modification
|
||||
# comma : use 'copy' if there is a ',' in the name, otherwise use 'invert'
|
||||
author_sort_copy_method = 'invert'
|
157
resources/images/catalog.svg
Normal file
157
resources/images/catalog.svg
Normal file
@ -0,0 +1,157 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<!-- Generator: Adobe Illustrator 14.0.0, SVG Export Plug-In . SVG Version: 6.00 Build 43363) -->
|
||||
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
|
||||
<svg version="1.1" id="svg2" xmlns:svg="http://www.w3.org/2000/svg" xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
|
||||
xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px" width="128px" height="128px"
|
||||
viewBox="0 0 128 128" enable-background="new 0 0 128 128" xml:space="preserve">
|
||||
<filter id="filter5365">
|
||||
<feGaussianBlur stdDeviation="1.3829225" inkscape:collect="always" id="feGaussianBlur5367"></feGaussianBlur>
|
||||
</filter>
|
||||
<g id="layer1">
|
||||
</g>
|
||||
<g id="layer2">
|
||||
<polygon id="rect3200" opacity="0.5722" fill="#0000A4" enable-background="new " points="167.5,297.005 171.429,297.005
|
||||
171.429,297.005 "/>
|
||||
<g id="path5265" filter="url(#filter5365)">
|
||||
<polygon fill="#362D2D" points="21.951,79.904 70.397,63.09 119.953,80.636 70.397,97.084 "/>
|
||||
<polygon fill="none" stroke="#362D2D" stroke-width="1.2507" stroke-linejoin="bevel" points="21.951,79.904 70.397,63.09
|
||||
119.953,80.636 70.397,97.084 "/>
|
||||
</g>
|
||||
<g id="path5267" filter="url(#filter5365)">
|
||||
<path fill="#362D2D" d="M118.639,100.902v1.724l-46.437,15.432c-3.723-9.284-1.901-16.34,0.089-20.69l46.883-15.518l-6.34,2.068
|
||||
l2.322,16.553L118.639,100.902z"/>
|
||||
<path fill="none" stroke="#362D2D" stroke-width="1.9" d="M118.639,100.902v1.724l-46.437,15.432
|
||||
c-3.723-9.284-1.901-16.34,0.089-20.69l46.883-15.518l-6.34,2.068l2.322,16.553L118.639,100.902z"/>
|
||||
</g>
|
||||
<g id="path5269" filter="url(#filter5365)">
|
||||
<path fill="#362D2D" d="M70.711,98.81l47.581-15.743l0.29,18.582l-47.56,15.986c0,0-1.515-3.455-1.942-9.812
|
||||
C68.936,101.726,70.711,98.81,70.711,98.81z"/>
|
||||
<path fill="none" stroke="#362D2D" stroke-width="2.1" d="M70.711,98.81l47.581-15.743l0.29,18.582l-47.56,15.986
|
||||
c0,0-1.515-3.455-1.942-9.812C68.936,101.726,70.711,98.81,70.711,98.81z"/>
|
||||
</g>
|
||||
<g id="path5271" filter="url(#filter5365)">
|
||||
<path fill="#362D2D" d="M21.479,79.607l49.115,17.501c-3.287,7.816-2.385,15.202,0.982,23.019l-50.008-16.208
|
||||
C17.974,94.288,17.113,87.874,21.479,79.607z"/>
|
||||
<path fill="none" stroke="#362D2D" stroke-width="1.6" d="M21.479,79.607l49.115,17.501c-3.287,7.816-2.385,15.202,0.982,23.019
|
||||
l-50.008-16.208C17.974,94.288,17.113,87.874,21.479,79.607z"/>
|
||||
</g>
|
||||
<g id="path5273" filter="url(#filter5365)">
|
||||
<path fill="#362D2D" d="M120.871,99.092v4.827l-50.008,16.897l-49.651-15.863c-4.763-11.162-1.987-18.682,0.714-25.346
|
||||
l49.651-16.724l48.579,17.242v3.449l-2.143,1.033l0.357,14.139L120.871,99.092z"/>
|
||||
<path fill="none" stroke="#362D2D" stroke-width="2.7" stroke-linejoin="bevel" d="M120.871,99.092v4.827l-50.008,16.897
|
||||
l-49.651-15.863c-4.763-11.162-1.987-18.682,0.714-25.346l49.651-16.724l48.579,17.242v3.449l-2.143,1.033l0.357,14.139
|
||||
L120.871,99.092z"/>
|
||||
</g>
|
||||
<path id="path5385" fill="#78CE4F" d="M19.316,78.05l48.438-17.414l49.548,18.171L67.754,95.842L19.316,78.05z"/>
|
||||
<path id="path5387" fill="none" stroke="#0F973B" stroke-width="1.9" d="M115.988,99.796v1.786l-46.43,15.982
|
||||
c-3.722-9.616-1.901-16.924,0.09-21.43l46.875-16.07l-6.34,2.143l2.322,17.143L115.988,99.796z"/>
|
||||
|
||||
<radialGradient id="path5389_1_" cx="498.3457" cy="267.1621" r="27.1927" gradientTransform="matrix(-0.064 0.175 1.8694 0.6835 -425.1342 -169.6643)" gradientUnits="userSpaceOnUse">
|
||||
<stop offset="0" style="stop-color:#B5FFA6"/>
|
||||
<stop offset="1" style="stop-color:#76E976"/>
|
||||
</radialGradient>
|
||||
<path id="path5389" fill="url(#path5389_1_)" stroke="#003131" stroke-width="1.6" stroke-opacity="0.9608" d="M18.845,77.742
|
||||
l49.107,18.125c-3.287,8.096-2.385,15.744,0.981,23.84l-50-16.786C15.339,92.946,14.479,86.304,18.845,77.742z"/>
|
||||
<path id="path5391" fill="none" stroke="#003131" stroke-width="2.7" stroke-linejoin="bevel" stroke-opacity="0.9608" d="
|
||||
M118.22,97.921v5l-50,17.5l-49.643-16.429c-4.762-11.561-1.987-19.348,0.714-26.25l49.642-17.321l48.572,17.857v3.571l-2.143,1.071
|
||||
l0.356,14.644L118.22,97.921z"/>
|
||||
<path id="path5393" fill="#FFFFFF" d="M68.068,97.629l47.572-16.305l0.29,19.245l-47.194,16.423c0,0-1.424-2.819-2.12-10.029
|
||||
C66.471,100.649,68.068,97.629,68.068,97.629z"/>
|
||||
<g id="path5419" filter="url(#filter5365)">
|
||||
<polygon fill="#362D2D" points="8.737,52.047 57.183,35.233 106.738,52.778 57.183,69.227 "/>
|
||||
<polygon fill="none" stroke="#362D2D" stroke-width="1.2507" stroke-linejoin="bevel" points="8.737,52.047 57.183,35.233
|
||||
106.738,52.778 57.183,69.227 "/>
|
||||
</g>
|
||||
<g id="path5421" filter="url(#filter5365)">
|
||||
<path fill="#362D2D" d="M105.424,73.045v1.724L58.988,90.2c-3.723-9.284-1.902-16.34,0.089-20.69l46.882-15.518l-6.341,2.069
|
||||
l2.322,16.552L105.424,73.045z"/>
|
||||
<path fill="none" stroke="#362D2D" stroke-width="1.9" d="M105.424,73.045v1.724L58.988,90.2
|
||||
c-3.723-9.284-1.902-16.34,0.089-20.69l46.882-15.518l-6.341,2.069l2.322,16.552L105.424,73.045z"/>
|
||||
</g>
|
||||
<g id="path5423" filter="url(#filter5365)">
|
||||
<path fill="#362D2D" d="M57.497,70.953l47.581-15.744l0.289,18.582L57.809,89.777c0,0-1.515-3.455-1.942-9.812
|
||||
C55.721,73.869,57.497,70.953,57.497,70.953z"/>
|
||||
<path fill="none" stroke="#362D2D" stroke-width="2.1" d="M57.497,70.953l47.581-15.744l0.289,18.582L57.809,89.777
|
||||
c0,0-1.515-3.455-1.942-9.812C55.721,73.869,57.497,70.953,57.497,70.953z"/>
|
||||
</g>
|
||||
<g id="path5425" filter="url(#filter5365)">
|
||||
<path fill="#362D2D" d="M8.265,51.751l49.116,17.501c-3.288,7.816-2.385,15.201,0.982,23.018L8.354,76.062
|
||||
C4.759,66.431,3.899,60.017,8.265,51.751z"/>
|
||||
<path fill="none" stroke="#362D2D" stroke-width="1.6" d="M8.265,51.751l49.116,17.501c-3.288,7.816-2.385,15.201,0.982,23.018
|
||||
L8.354,76.062C4.759,66.431,3.899,60.017,8.265,51.751z"/>
|
||||
</g>
|
||||
<g id="path5427" filter="url(#filter5365)">
|
||||
<path fill="#362D2D" d="M107.656,71.234v4.828L57.648,92.959L7.998,77.097C3.234,65.934,6.011,58.415,8.712,51.751l49.651-16.725
|
||||
l48.58,17.242v3.448l-2.144,1.035l0.357,14.139L107.656,71.234z"/>
|
||||
<path fill="none" stroke="#362D2D" stroke-width="2.7" stroke-linejoin="bevel" d="M107.656,71.234v4.828L57.648,92.959
|
||||
L7.998,77.097C3.234,65.934,6.011,58.415,8.712,51.751l49.651-16.725l48.58,17.242v3.448l-2.144,1.035l0.357,14.139
|
||||
L107.656,71.234z"/>
|
||||
</g>
|
||||
<path id="path5431" fill="#60BAFF" stroke="#003244" stroke-width="1.2507" stroke-linejoin="bevel" d="M6.102,50.193L54.54,32.779
|
||||
l49.548,18.171L54.54,67.985L6.102,50.193z"/>
|
||||
<path id="path5433" fill="none" stroke="#0056D5" stroke-width="2.8104" d="M102.768,71.76v1.803L56.35,89.701
|
||||
c-3.721-9.71-1.901-17.089,0.089-21.639l46.865-16.229l-6.338,2.164l2.321,17.312L102.768,71.76z"/>
|
||||
|
||||
<radialGradient id="path5435_1_" cx="316.8916" cy="261.2949" r="27.1937" gradientTransform="matrix(-0.0902 0.2793 1.9257 0.6218 -445.576 -180.1955)" gradientUnits="userSpaceOnUse">
|
||||
<stop offset="0" style="stop-color:#789DED"/>
|
||||
<stop offset="1" style="stop-color:#2381E8"/>
|
||||
</radialGradient>
|
||||
<path id="path5435" fill="url(#path5435_1_)" stroke="#003244" stroke-width="1.6" d="M5.63,49.885L54.738,68.01
|
||||
c-3.287,8.096-2.385,15.744,0.982,23.84l-50-16.785C2.125,65.09,1.265,58.447,5.63,49.885z"/>
|
||||
<path id="path5437" fill="none" stroke="#003244" stroke-width="2.7" stroke-linejoin="bevel" d="M105.006,70.064v5l-50,17.5
|
||||
L5.363,76.135c-4.762-11.561-1.987-19.348,0.714-26.25L55.72,32.564l48.571,17.857v3.572l-2.143,1.071l0.357,14.643L105.006,70.064
|
||||
z"/>
|
||||
<path id="path5439" fill="#FFFFFF" d="M54.854,69.772l47.573-16.306l0.29,19.245L55.522,89.135c0,0-1.425-2.819-2.121-10.028
|
||||
C53.256,72.793,54.854,69.772,54.854,69.772z"/>
|
||||
<g id="path5447" filter="url(#filter5365)">
|
||||
<polygon fill="#362D2D" points="25.88,28.119 74.326,11.305 123.882,28.85 74.326,45.299 "/>
|
||||
<polygon fill="none" stroke="#362D2D" stroke-width="1.2507" stroke-linejoin="bevel" points="25.88,28.119 74.326,11.305
|
||||
123.882,28.85 74.326,45.299 "/>
|
||||
</g>
|
||||
<g id="path5449" filter="url(#filter5365)">
|
||||
<path fill="#362D2D" d="M122.567,49.116v1.724L76.131,66.271c-3.723-9.284-1.902-16.34,0.09-20.69l46.883-15.518l-6.341,2.069
|
||||
l2.321,16.552L122.567,49.116z"/>
|
||||
<path fill="none" stroke="#362D2D" stroke-width="1.9" d="M122.567,49.116v1.724L76.131,66.271
|
||||
c-3.723-9.284-1.902-16.34,0.09-20.69l46.883-15.518l-6.341,2.069l2.321,16.552L122.567,49.116z"/>
|
||||
</g>
|
||||
<g id="path5451" filter="url(#filter5365)">
|
||||
<path fill="#362D2D" d="M74.641,47.024l47.58-15.744l0.289,18.582L74.951,65.849c0,0-1.514-3.455-1.941-9.812
|
||||
C72.863,49.94,74.641,47.024,74.641,47.024z"/>
|
||||
<path fill="none" stroke="#362D2D" stroke-width="2.1" d="M74.641,47.024l47.58-15.744l0.289,18.582L74.951,65.849
|
||||
c0,0-1.514-3.455-1.941-9.812C72.863,49.94,74.641,47.024,74.641,47.024z"/>
|
||||
</g>
|
||||
<g id="path5453" filter="url(#filter5365)">
|
||||
<path fill="#362D2D" d="M25.408,27.822l49.115,17.5c-3.287,7.816-2.385,15.202,0.982,23.018L25.498,52.133
|
||||
C21.902,42.502,21.042,36.088,25.408,27.822z"/>
|
||||
<path fill="none" stroke="#362D2D" stroke-width="1.6" d="M25.408,27.822l49.115,17.5c-3.287,7.816-2.385,15.202,0.982,23.018
|
||||
L25.498,52.133C21.902,42.502,21.042,36.088,25.408,27.822z"/>
|
||||
</g>
|
||||
<g id="path5455" filter="url(#filter5365)">
|
||||
<path fill="#362D2D" d="M124.8,47.306v4.828L74.791,69.03L25.14,53.168c-4.763-11.163-1.987-18.682,0.714-25.346l49.651-16.725
|
||||
l48.58,17.242v3.449l-2.145,1.034l0.357,14.139L124.8,47.306z"/>
|
||||
<path fill="none" stroke="#362D2D" stroke-width="2.7" stroke-linejoin="bevel" d="M124.8,47.306v4.828L74.791,69.03L25.14,53.168
|
||||
c-4.763-11.163-1.987-18.682,0.714-25.346l49.651-16.725l48.58,17.242v3.449l-2.145,1.034l0.357,14.139L124.8,47.306z"/>
|
||||
</g>
|
||||
<path id="path5459" fill="#FF7272" d="M23.245,26.264L71.684,8.85l49.547,18.171L71.684,44.057L23.245,26.264z"/>
|
||||
<path id="path5461" fill="none" stroke="#CF0505" stroke-width="1.9" d="M119.916,48.01v1.786L73.488,65.778
|
||||
c-3.723-9.616-1.902-16.923,0.089-21.429l46.875-16.071l-6.339,2.143l2.32,17.143L119.916,48.01z"/>
|
||||
|
||||
<radialGradient id="path5463_1_" cx="14.938" cy="-466.4766" r="27.3207" gradientTransform="matrix(2.5834 0.998 0.0835 -0.2162 46.7076 -68.8071)" gradientUnits="userSpaceOnUse">
|
||||
<stop offset="0" style="stop-color:#FD8A8A"/>
|
||||
<stop offset="1" style="stop-color:#FF7878"/>
|
||||
</radialGradient>
|
||||
<path id="path5463" fill="url(#path5463_1_)" stroke="#600101" stroke-width="1.6" d="M22.773,25.957l49.107,18.125
|
||||
c-3.287,8.095-2.385,15.744,0.982,23.839l-50-18.806C19.268,39.14,18.408,34.518,22.773,25.957z"/>
|
||||
|
||||
<linearGradient id="path3311_1_" gradientUnits="userSpaceOnUse" x1="-1.3145" y1="103.2168" x2="67.4683" y2="103.2168" gradientTransform="matrix(1 0 0 -1 5.4287 129.1426)">
|
||||
<stop offset="0" style="stop-color:#FFFFFF"/>
|
||||
<stop offset="1" style="stop-color:#FFFFFF;stop-opacity:0.2471"/>
|
||||
</linearGradient>
|
||||
<path id="path3311" fill="url(#path3311_1_)" d="M23.904,25.736L72.342,8.322l49.548,18.171L72.342,43.529L23.904,25.736z"/>
|
||||
<path id="path5465" fill="none" stroke="#600101" stroke-width="2.7" stroke-linejoin="bevel" d="M122.148,46.135v5l-50,17.5
|
||||
l-49.39-18.701c-4.762-11.562-2.239-17.076,0.461-23.977L72.863,8.635l48.57,17.857v3.571l-2.143,1.071l0.357,14.643
|
||||
L122.148,46.135z"/>
|
||||
<path id="path5467" fill="#FFFFFF" d="M71.997,45.844l47.573-16.306l0.289,19.246L72.666,65.206c0,0-1.426-2.819-2.121-10.028
|
||||
C70.399,48.864,71.997,45.844,71.997,45.844z"/>
|
||||
</g>
|
||||
</svg>
|
After Width: | Height: | Size: 11 KiB |
Binary file not shown.
Before Width: | Height: | Size: 116 KiB After Width: | Height: | Size: 124 KiB |
BIN
resources/images/news/radikal_tr.png
Normal file
BIN
resources/images/news/radikal_tr.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 1.9 KiB |
BIN
resources/images/news/sueddeutschezeitung.png
Normal file
BIN
resources/images/news/sueddeutschezeitung.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 492 B |
BIN
resources/images/news/wired_uk.png
Normal file
BIN
resources/images/news/wired_uk.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 647 B |
37
resources/kathemerini.recipe
Normal file
37
resources/kathemerini.recipe
Normal file
@ -0,0 +1,37 @@
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
class Kathimerini(BasicNewsRecipe):
|
||||
title = 'Kathimerini'
|
||||
__author__ = 'Pan'
|
||||
description = 'News from Greece'
|
||||
max_articles_per_feed = 100
|
||||
oldest_article = 100
|
||||
publisher = 'Kathimerini'
|
||||
category = 'news, GR'
|
||||
language = 'el'
|
||||
no_stylesheets = True
|
||||
remove_tags_before = dict(name='td',attrs={'class':'news'})
|
||||
remove_tags_after = dict(name='td',attrs={'class':'news'})
|
||||
remove_attributes = ['width', 'src','header','footer']
|
||||
|
||||
feeds = [(u'\u03a0\u03bf\u03bb\u03b9\u03c4\u03b9\u03ba\u03ae',
|
||||
'http://wk.kathimerini.gr/xml_files/politics.xml'),
|
||||
(u'\u0395\u03bb\u03bb\u03ac\u03b4\u03b1',
|
||||
' http://wk.kathimerini.gr/xml_files/ell.xml'),
|
||||
(u'\u039a\u03cc\u03c3\u03bc\u03bf\u03c2',
|
||||
' http://wk.kathimerini.gr/xml_files/world.xml'),
|
||||
(u'\u039f\u03b9\u03ba\u03bf\u03bd\u03bf\u03bc\u03af\u03b1',
|
||||
'http://wk.kathimerini.gr/xml_files/economy_1.xml'),
|
||||
(u'\u0395\u03c0\u03b9\u03c7\u03b5\u03b9\u03c1\u03ae\u03c3\u03b5\u03b9\u03c2',
|
||||
'http://wk.kathimerini.gr/xml_files/economy_2.xml'),
|
||||
(u'\u0394\u03b9\u03b5\u03b8\u03bd\u03ae\u03c2 \u039f\u03b9\u03ba\u03bf\u03bd\u03bf\u03bc\u03af\u03b1',
|
||||
'http://wk.kathimerini.gr/xml_files/economy_3.xml'),
|
||||
(u'\u03a0\u03bf\u03bb\u03b9\u03c4\u03b9\u03c3\u03bc\u03cc\u03c2',
|
||||
'http://wk.kathimerini.gr/xml_files/civ.xml'),
|
||||
(u'\u039c\u03cc\u03bd\u03b9\u03bc\u03b5\u03c2 \u03a3\u03c4\u03ae\u03bb\u03b5\u03c2',
|
||||
'http://wk.kathimerini.gr/xml_files/st.xml')]
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('http://news.kathimerini.gr/4dcgi/', 'http://news.kathimerini.gr/4dcgi/4dcgi/')
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
clarin.com
|
||||
'''
|
||||
@ -21,7 +21,8 @@ class Clarin(BasicNewsRecipe):
|
||||
cover_url = strftime('http://www.clarin.com/diario/%Y/%m/%d/portada.jpg')
|
||||
encoding = 'cp1252'
|
||||
language = 'es'
|
||||
extra_css = ' .Txt{ font-family: sans-serif } .Volan{ font-family: sans-serif; font-size: x-small} .Pie{ font-family: sans-serif; font-size: x-small} .Copete{font-family: sans-serif; font-size: large} .Hora{font-family: sans-serif; font-size: large} .Autor{font-family: sans-serif; font-size: small} '
|
||||
masthead_url = 'http://www.clarin.com/shared/v10/img/Hd/lg_Clarin.gif'
|
||||
extra_css = ' body{font-family: Arial,Helvetica,sans-serif} h2{font-family: Georgia,"Times New Roman",Times,serif; font-size: xx-large} .Volan,.Pie,.Autor{ font-size: x-small} .Copete,.Hora{font-size: large} '
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
|
26
resources/recipes/courrier.recipe
Normal file
26
resources/recipes/courrier.recipe
Normal file
@ -0,0 +1,26 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||
|
||||
class CourierPress(BasicNewsRecipe):
|
||||
title = u'Courier Press'
|
||||
language = 'en'
|
||||
__author__ = 'Krittika Goyal'
|
||||
oldest_article = 1 #days
|
||||
max_articles_per_feed = 25
|
||||
|
||||
remove_stylesheets = True
|
||||
remove_tags = [
|
||||
dict(name='iframe'),
|
||||
]
|
||||
|
||||
feeds = [
|
||||
('Courier Press',
|
||||
'http://www.courierpress.com/rss/headlines/news/'),
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
story = soup.find(name='div', attrs={'id':'article_body'})
|
||||
soup = BeautifulSoup('<html><head><title>t</title></head><body></body></html>')
|
||||
body = soup.find(name='body')
|
||||
body.insert(0, story)
|
||||
return soup
|
@ -1,64 +1,63 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
danas.rs
|
||||
'''
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
|
||||
class Danas(BasicNewsRecipe):
|
||||
title = 'Danas'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Vesti'
|
||||
description = 'Dnevne novine sa vestima iz sveta, politike, ekonomije, kulture, sporta, Beograda, Novog Sada i cele Srbije.'
|
||||
publisher = 'Danas d.o.o.'
|
||||
category = 'news, politics, Serbia'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = False
|
||||
use_embedded_content = False
|
||||
encoding = 'utf-8'
|
||||
masthead_url = 'http://www.danas.rs/images/basic/danas.gif'
|
||||
language = 'sr'
|
||||
lang = 'sr-Latn-RS'
|
||||
direction = 'ltr'
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} .article_description,body,.lokacija{font-family: Tahoma,Arial,Helvetica,sans1,sans-serif} .nadNaslov,h1,.preamble{font-family: Georgia,"Times New Roman",Times,serif1,serif} .antrfileText{border-left: 2px solid #999999; color:#666666; margin-left: 0.8em; padding-left: 1.2em; margin-bottom: 0; margin-top: 0} h2,.datum,.lokacija,.autor{font-size: small} .antrfileNaslov{border-left: 2px solid #999999; color:#666666; margin-left: 0.8em; padding-left: 1.2em; font-weight:bold; margin-bottom: 0; margin-top: 0} img{margin-bottom: 0.8em} '
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
, 'pretty_print' : True
|
||||
}
|
||||
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'left'})]
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':['width_1_4','metaClanka','baner']})
|
||||
,dict(name='div', attrs={'id':'comments'})
|
||||
,dict(name=['object','link'])
|
||||
,dict(name=['object','link','iframe'])
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Vesti' , u'http://www.danas.rs/rss/rss.asp' )
|
||||
,(u'Periskop', u'http://www.danas.rs/rss/rss.asp?column_id=4')
|
||||
feeds = [
|
||||
(u'Politika' , u'http://www.danas.rs/rss/rss.asp?column_id=27')
|
||||
,(u'Hronika' , u'http://www.danas.rs/rss/rss.asp?column_id=2' )
|
||||
,(u'Drustvo' , u'http://www.danas.rs/rss/rss.asp?column_id=24')
|
||||
,(u'Dijalog' , u'http://www.danas.rs/rss/rss.asp?column_id=1' )
|
||||
,(u'Ekonomija', u'http://www.danas.rs/rss/rss.asp?column_id=6' )
|
||||
,(u'Svet' , u'http://www.danas.rs/rss/rss.asp?column_id=25')
|
||||
,(u'Srbija' , u'http://www.danas.rs/rss/rss.asp?column_id=28')
|
||||
,(u'Kultura' , u'http://www.danas.rs/rss/rss.asp?column_id=5' )
|
||||
,(u'Sport' , u'http://www.danas.rs/rss/rss.asp?column_id=13')
|
||||
,(u'Scena' , u'http://www.danas.rs/rss/rss.asp?column_id=42')
|
||||
,(u'Feljton' , u'http://www.danas.rs/rss/rss.asp?column_id=19')
|
||||
,(u'Periskop' , u'http://www.danas.rs/rss/rss.asp?column_id=4' )
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||
soup.head.insert(0,mlang)
|
||||
attribs = [ 'style','font','valign'
|
||||
,'colspan','width','height'
|
||||
,'rowspan','summary','align'
|
||||
,'cellspacing','cellpadding'
|
||||
,'frames','rules','border'
|
||||
]
|
||||
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
|
||||
item.name = 'div'
|
||||
for attrib in attribs:
|
||||
if item.has_key(attrib):
|
||||
del item[attrib]
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
||||
def print_version(self, url):
|
||||
return url + '&action=print'
|
||||
|
||||
|
60
resources/recipes/di.recipe
Normal file
60
resources/recipes/di.recipe
Normal file
@ -0,0 +1,60 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'Mori'
|
||||
__version__ = 'v. 0.5'
|
||||
'''
|
||||
di.com.pl
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
import re
|
||||
|
||||
class DziennikInternautowRecipe(BasicNewsRecipe):
|
||||
__author__ = 'Mori'
|
||||
language = 'pl'
|
||||
|
||||
title = u'Dziennik Internautow'
|
||||
publisher = u'Dziennik Internaut\xc3\xb3w Sp. z o.o.'
|
||||
description =u'Internet w \xc5\xbcyciu i biznesie. Porady, wywiady, interwencje, bezpiecze\xc5\x84stwo w Sieci, technologia.'
|
||||
|
||||
max_articles_per_feed = 100
|
||||
oldest_article = 7
|
||||
cover_url = 'http://di.com.pl/pic/logo_di_norm.gif'
|
||||
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
encoding = 'utf-8'
|
||||
|
||||
extra_css = '''
|
||||
.fotodesc{font-size: 75%;}
|
||||
.pub_data{font-size: 75%;}
|
||||
.fotonews{clear: both; padding-top: 10px; padding-bottom: 10px;}
|
||||
#pub_foto{font-size: 75%; float: left; padding-right: 10px;}
|
||||
'''
|
||||
|
||||
feeds = [
|
||||
(u'Dziennik Internautów', u'http://feeds.feedburner.com/glowny-di')
|
||||
]
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name = 'div', attrs = {'id' : 'pub_head'}),
|
||||
dict(name = 'div', attrs = {'id' : 'pub_content'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name = 'div', attrs = {'class' : 'poradniki_context'}),
|
||||
dict(name = 'div', attrs = {'class' : 'uniBox'}),
|
||||
dict(name = 'object', attrs = {}),
|
||||
dict(name = 'h3', attrs = {})
|
||||
]
|
||||
|
||||
preprocess_regexps = [
|
||||
(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
|
||||
[
|
||||
(r', <a href="http://di.com.pl/komentarze,.*?</div>', lambda match: '</div>'),
|
||||
(r'<div class="fotonews".*?">', lambda match: '<div class="fotonews">'),
|
||||
(r'http://di.com.pl/pic/photo/mini/', lambda match: 'http://di.com.pl/pic/photo/oryginal/'),
|
||||
(r'\s*</', lambda match: '</'),
|
||||
]
|
||||
]
|
@ -3,6 +3,7 @@ __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
http://www.dilbert.com
|
||||
'''
|
||||
import re
|
||||
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
@ -28,6 +29,12 @@ class DosisDiarias(BasicNewsRecipe):
|
||||
|
||||
feeds = [(u'Dilbert', u'http://feeds.dilbert.com/DilbertDailyStrip' )]
|
||||
|
||||
preprocess_regexps = [
|
||||
(re.compile('strip\..*\.gif', re.DOTALL|re.IGNORECASE),
|
||||
lambda match: 'strip.zoom.gif')
|
||||
]
|
||||
|
||||
|
||||
def get_article_url(self, article):
|
||||
return article.get('feedburner_origlink', None)
|
||||
|
||||
|
49
resources/recipes/eclicto.recipe
Normal file
49
resources/recipes/eclicto.recipe
Normal file
@ -0,0 +1,49 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'Mori'
|
||||
__version__ = 'v. 0.1'
|
||||
'''
|
||||
blog.eclicto.pl
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
import re
|
||||
|
||||
class BlogeClictoRecipe(BasicNewsRecipe):
|
||||
__author__ = 'Mori'
|
||||
language = 'pl'
|
||||
|
||||
title = u'Blog eClicto'
|
||||
publisher = u'Blog eClicto'
|
||||
description = u'Blog o e-papierze i e-bookach'
|
||||
|
||||
max_articles_per_feed = 100
|
||||
cover_url = 'http://blog.eclicto.pl/wordpress/wp-content/themes/blog_eclicto/g/logo.gif'
|
||||
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
encoding = 'utf-8'
|
||||
|
||||
extra_css = '''
|
||||
img{float: left; padding-right: 10px; padding-bottom: 5px;}
|
||||
'''
|
||||
|
||||
feeds = [
|
||||
(u'Blog eClicto', u'http://blog.eclicto.pl/feed/')
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name = 'span', attrs = {'id' : 'tags'})
|
||||
]
|
||||
|
||||
remove_tags_after = [
|
||||
dict(name = 'div', attrs = {'class' : 'post'})
|
||||
]
|
||||
|
||||
preprocess_regexps = [
|
||||
(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
|
||||
[
|
||||
(r'\s*</', lambda match: '</'),
|
||||
]
|
||||
]
|
26
resources/recipes/eksiazki.recipe
Normal file
26
resources/recipes/eksiazki.recipe
Normal file
@ -0,0 +1,26 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = u'2010, Tomasz Dlugosz <tomek3d@gmail.com>'
|
||||
'''
|
||||
eksiazki.org
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class eksiazki(BasicNewsRecipe):
|
||||
|
||||
title = u'eKsiazki.org'
|
||||
desciption = u'Twoje centrum wiedzy o ePapierze i eBookach'
|
||||
language = 'pl'
|
||||
__author__ = u'Tomasz D\u0142ugosz'
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
|
||||
feeds = [(u'eKsiazki.org', u'http://www.eksiazki.org/feed/')]
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'content-body'})]
|
||||
remove_tags = [
|
||||
dict(name='span', attrs={'class':'nr_comm'}),
|
||||
dict(name='div', attrs={'id':'tabsContainer'}),
|
||||
dict(name='div', attrs={'class':'next_previous_links'})]
|
@ -1,17 +1,41 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class HoustonChronicle(BasicNewsRecipe):
|
||||
|
||||
title = u'The Houston Chronicle'
|
||||
description = 'News from Houston, Texas'
|
||||
__author__ = 'Kovid Goyal'
|
||||
__author__ = 'Kovid Goyal and Sujata Raman'
|
||||
language = 'en'
|
||||
timefmt = ' [%a, %d %b, %Y]'
|
||||
no_stylesheets = True
|
||||
|
||||
keep_only_tags = [dict(id=['story-head', 'story'])]
|
||||
remove_tags = [dict(id=['share-module', 'resource-box',
|
||||
'resource-box-header'])]
|
||||
keep_only_tags = [
|
||||
dict(id=['story-head', 'story'])
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(id=['share-module', 'resource-box',
|
||||
'resource-box-header'])
|
||||
]
|
||||
|
||||
extra_css = '''
|
||||
h1{font-family :Arial,Helvetica,sans-serif; font-size:large;}
|
||||
h2{font-family :Arial,Helvetica,sans-serif; font-size:medium; color:#666666;}
|
||||
h3{font-family :Arial,Helvetica,sans-serif; font-size:medium; color:#000000;}
|
||||
h4{font-family :Arial,Helvetica,sans-serif; font-size: x-small;}
|
||||
p{font-family :Arial,Helvetica,sans-serif; font-size:x-small;}
|
||||
#story-head h1{font-family :Arial,Helvetica,sans-serif; font-size: xx-large;}
|
||||
#story-head h2{font-family :Arial,Helvetica,sans-serif; font-size: small; color:#000000;}
|
||||
#story-head h3{font-family :Arial,Helvetica,sans-serif; font-size: xx-small;}
|
||||
#story-head h4{font-family :Arial,Helvetica,sans-serif; font-size: xx-small;}
|
||||
#story{font-family :Arial,Helvetica,sans-serif; font-size:xx-small;}
|
||||
#Text-TextSubhed BoldCond PoynterAgateZero h3{color:#444444;font-family :Arial,Helvetica,sans-serif; font-size:small;}
|
||||
.p260x p{font-family :Arial,Helvetica,serif; font-size:x-small;font-style:italic;}
|
||||
.p260x h6{color:#777777;font-family :Arial,Helvetica,sans-serif; font-size:xx-small;}
|
||||
'''
|
||||
|
||||
|
||||
def parse_index(self):
|
||||
soup = self.index_to_soup('http://www.chron.com/news/')
|
||||
@ -64,3 +88,6 @@ class HoustonChronicle(BasicNewsRecipe):
|
||||
feeds.append((current_section, current_articles))
|
||||
return feeds
|
||||
|
||||
|
||||
|
||||
|
||||
|
38
resources/recipes/interia_fakty.recipe
Normal file
38
resources/recipes/interia_fakty.recipe
Normal file
@ -0,0 +1,38 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = u'2010, Tomasz Dlugosz <tomek3d@gmail.com>'
|
||||
'''
|
||||
fakty.interia.pl
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class InteriaFakty(BasicNewsRecipe):
|
||||
title = u'Interia.pl - Fakty'
|
||||
language = 'pl'
|
||||
oldest_article = 7
|
||||
__author__ = u'Tomasz D\u0142ugosz'
|
||||
simultaneous_downloads = 2
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
max_articles_per_feed = 100
|
||||
|
||||
feeds = [(u'Kraj', u'http://kanaly.rss.interia.pl/kraj.xml'),
|
||||
(u'\u015awiat', u'http://kanaly.rss.interia.pl/swiat.xml'),
|
||||
(u'Wiadomo\u015bci dnia', u'http://kanaly.rss.interia.pl/fakty.xml'),
|
||||
(u'Przegl\u0105d prasy', u'http://kanaly.rss.interia.pl/przeglad_prasy.xml'),
|
||||
(u'Wywiady', u'http://kanaly.rss.interia.pl/wywiady.xml'),
|
||||
(u'Ciekawostki', u'http://kanaly.rss.interia.pl/ciekawostki.xml')]
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'article'})]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':'box fontSizeSwitch'}),
|
||||
dict(name='div', attrs={'class':'clear'}),
|
||||
dict(name='div', attrs={'class':'embed embedLeft articleEmbedArticleList articleEmbedArticleListTitle'}),
|
||||
dict(name='span', attrs={'class':'keywords'})]
|
||||
|
||||
extra_css = '''
|
||||
h2 { font-size: 1.2em; }
|
||||
'''
|
71
resources/recipes/interia_sport.recipe
Normal file
71
resources/recipes/interia_sport.recipe
Normal file
@ -0,0 +1,71 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = u'2010, Tomasz Dlugosz <tomek3d@gmail.com>'
|
||||
'''
|
||||
sport.interia.pl
|
||||
'''
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class InteriaSport(BasicNewsRecipe):
|
||||
title = u'Interia.pl - Sport'
|
||||
language = 'pl'
|
||||
oldest_article = 7
|
||||
__author__ = u'Tomasz D\u0142ugosz'
|
||||
simultaneous_downloads = 3
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
max_articles_per_feed = 100
|
||||
|
||||
feeds = [(u'Wydarzenia sportowe', u'http://kanaly.rss.interia.pl/sport.xml'),
|
||||
(u'Pi\u0142ka no\u017cna', u'http://kanaly.rss.interia.pl/pilka_nozna.xml'),
|
||||
(u'Siatk\xf3wka', u'http://kanaly.rss.interia.pl/siatkowka.xml'),
|
||||
(u'Koszyk\xf3wka', u'http://kanaly.rss.interia.pl/koszykowka.xml'),
|
||||
(u'NBA', u'http://kanaly.rss.interia.pl/nba.xml'),
|
||||
(u'Kolarstwo', u'http://kanaly.rss.interia.pl/kolarstwo.xml'),
|
||||
(u'\u017bu\u017cel', u'http://kanaly.rss.interia.pl/zuzel.xml'),
|
||||
(u'Tenis', u'http://kanaly.rss.interia.pl/tenis.xml')]
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'article'})]
|
||||
|
||||
remove_tags = [dict(name='div', attrs={'class':'object gallery'})]
|
||||
|
||||
extra_css = '''
|
||||
.articleDate {
|
||||
font-size: 0.5em;
|
||||
color: black;
|
||||
}
|
||||
|
||||
.articleFoto {
|
||||
display: block;
|
||||
font-family: sans;
|
||||
font-size: 0.5em;
|
||||
text-indent: 0
|
||||
color: black;
|
||||
}
|
||||
|
||||
.articleText {
|
||||
display: block;
|
||||
margin-bottom: 1em;
|
||||
margin-left: 0;
|
||||
margin-right: 0;
|
||||
margin-top: 1em
|
||||
color: black;
|
||||
}
|
||||
|
||||
.articleLead {
|
||||
font-size: 1.2em;
|
||||
}
|
||||
'''
|
||||
|
||||
preprocess_regexps = [
|
||||
(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
|
||||
[
|
||||
(r'<p><a href.*?</a></p>', lambda match: ''),
|
||||
# FIXME
|
||||
#(r'(<div id="newsAddContent">)(.*?)(<a href=".*">)(.*?)(</a>)', lambda match: '\1\2\4'),
|
||||
(r'<p>(<i>)?<b>(ZOBACZ|CZYTAJ) T.*?</div>', lambda match: '</div>')
|
||||
]
|
||||
]
|
@ -10,22 +10,19 @@ class JerusalemPost(BasicNewsRecipe):
|
||||
__author__ = 'Kovid Goyal'
|
||||
max_articles_per_feed = 10
|
||||
no_stylesheets = True
|
||||
remove_tags_before = {'class':'byline'}
|
||||
remove_tags = [
|
||||
{'class':['artAdBlock clearboth', 'tbartop', 'divdot_vrttbox',
|
||||
'slideshow']},
|
||||
dict(id=['artFontButtons', 'artRelatedBlock']),
|
||||
]
|
||||
remove_tags_after = {'id':'artTxtBlock'}
|
||||
|
||||
remove_tags_before = {'class':'jp-grid-content'}
|
||||
remove_tags_after = {'id':'body_val'}
|
||||
|
||||
feeds = [ ('Front Page', 'http://www.jpost.com/servlet/Satellite?pagename=JPost/Page/RSS&cid=1123495333346'),
|
||||
('Israel News', 'http://www.jpost.com/servlet/Satellite?pagename=JPost/Page/RSS&cid=1178443463156'),
|
||||
('Middle East News', 'http://www.jpost.com/servlet/Satellite?pagename=JPost/Page/RSS&cid=1123495333498'),
|
||||
('International News', 'http://www.jpost.com/servlet/Satellite?pagename=JPost/Page/RSS&cid=1178443463144'),
|
||||
('Editorials', 'http://www.jpost.com/servlet/Satellite?pagename=JPost/Page/RSS&cid=1123495333211'),
|
||||
]
|
||||
|
||||
def postprocess_html(self, soup, first):
|
||||
for tag in soup.findAll(name=['table', 'tr', 'td']):
|
||||
tag.name = 'div'
|
||||
return soup
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for x in soup.findAll(name=['form', 'input']):
|
||||
x.name = 'div'
|
||||
for x in soup.findAll('body', style=True):
|
||||
del x['style']
|
||||
return soup
|
||||
|
@ -1,7 +1,5 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
lanacion.com.ar
|
||||
'''
|
||||
@ -12,28 +10,34 @@ class Lanacion(BasicNewsRecipe):
|
||||
title = 'La Nacion'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Noticias de Argentina y el resto del mundo'
|
||||
publisher = 'La Nacion'
|
||||
publisher = 'La Nacion S.A.'
|
||||
category = 'news, politics, Argentina'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
use_embedded_content = False
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
language = 'es'
|
||||
encoding = 'cp1252'
|
||||
masthead_url = 'http://www.lanacion.com.ar/imgs/layout/logos/ln341x47.gif'
|
||||
extra_css = ' h1{font-family: Georgia,serif} body{font-family: Arial,sans-serif} img{margin-top: 0.5em; margin-bottom: 0.2em} .notaEpigrafe{font-size: x-small} '
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher': publisher
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'nota floatFix'})]
|
||||
remove_tags = [
|
||||
dict(name='div' , attrs={'class':'notaComentario floatFix noprint' })
|
||||
,dict(name='ul' , attrs={'class':'cajaHerramientas cajaTop noprint'})
|
||||
,dict(name='div' , attrs={'class':'cajaHerramientas noprint' })
|
||||
,dict(attrs={'class':['titulosMultimedia','derecha','techo color']})
|
||||
,dict(name=['iframe','embed','object'])
|
||||
]
|
||||
remove_attributes = ['height','width']
|
||||
|
||||
feeds = [
|
||||
(u'Ultimas noticias' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?origen=2' )
|
||||
@ -51,10 +55,4 @@ class Lanacion(BasicNewsRecipe):
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
mtag = '<meta http-equiv="Content-Language" content="es-AR"/>'
|
||||
soup.head.insert(0,mtag)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
||||
language = 'es'
|
||||
return self.adeify_images(soup)
|
||||
|
43
resources/recipes/legeartis.recipe
Normal file
43
resources/recipes/legeartis.recipe
Normal file
@ -0,0 +1,43 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'Mori'
|
||||
__version__ = 'v. 0.1'
|
||||
'''
|
||||
olgierd.bblog.pl
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class LegeArtisRecipe(BasicNewsRecipe):
|
||||
__author__ = 'Mori'
|
||||
language = 'pl'
|
||||
|
||||
title = u'Lege Artis'
|
||||
publisher = u'Olgierd Rudak'
|
||||
description = u'Wszystko, co chcieliby\xc5\x9bcie wiedzie\xc4\x87 o prawie, ale wstydzicie si\xc4\x99 zapyta\xc4\x87'
|
||||
|
||||
max_articles_per_feed = 100
|
||||
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
|
||||
extra_css = '''
|
||||
img{clear: both;}
|
||||
'''
|
||||
|
||||
feeds = [
|
||||
(u'Lege Artis', u'http://olgierd.bblog.pl/rss/rss20.xml')
|
||||
]
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name = 'div', attrs = {'class' : 'post_title'}),
|
||||
dict(name = 'div', attrs = {'class' : 'post_date'}),
|
||||
dict(name = 'div', attrs = {'class' : 'post_content'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name = 'div', attrs = {'id' : 'bb_tools'}),
|
||||
dict(name = 'div', attrs = {'class' : 'post_comments'}),
|
||||
dict(name = 'object', attrs = {})
|
||||
]
|
49
resources/recipes/legitymizm.recipe
Normal file
49
resources/recipes/legitymizm.recipe
Normal file
@ -0,0 +1,49 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = u'2010, Tomasz Dlugosz <tomek3d@gmail.com>'
|
||||
'''
|
||||
legitymizm.org
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Legitymizm(BasicNewsRecipe):
|
||||
title = u'Organizacja Monarchist\xf3w Polskich'
|
||||
language = 'pl'
|
||||
oldest_article = 7
|
||||
__author__ = u'Tomasz D\u0142ugosz'
|
||||
max_articles_per_feed = 100
|
||||
cover_url = 'http://www.legitymizm.org/img_omp/logo.gif'
|
||||
no_stylesheets = True
|
||||
|
||||
feeds = [(u'Aktualno\u015bci i publicystyka', u'http://www.legitymizm.org/rss.php')]
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'szeroka_kolumna'})]
|
||||
remove_tags = [dict(name = 'div', attrs = {'class' : 'koniec_tresci_wlasciwej'}),
|
||||
dict(name = 'div', attrs = {'class' : 'return'})]
|
||||
|
||||
extra_css = '''
|
||||
body { font-family: Georgia, 'Times New Roman', Times, serif; }
|
||||
h1 { color: #898981; font-weight: normal; font-size: 26px; letter-spacing: -1px; line-height: 23px; text-align: left; }
|
||||
h2, h3 { font-weight: normal; font-size: 20px; line-height: 23px; letter-spacing: -1px; margin: 0 0 3px 0; text-align: left; }
|
||||
#szeroka_kolumna { float: left; line-height: 20px; }
|
||||
#szeroka_kolumna ul.wykaz { list-style-type: none; margin: 0 0 1.2em 0; padding: 0; }
|
||||
#szeroka_kolumna ul.wykaz li.wykaz_2 { font-weight: bold; margin: 0.6em 0 0 0; }
|
||||
#szeroka_kolumna ul.wykaz a { text-decoration: none; }
|
||||
#szeroka_kolumna ul.wykaz li.wykaz_1, #szeroka_kolumna ul.wykaz li.wykaz_2 ul li { list-style-type: square; color: #898981; text-transform: none; font-weight: normal; padding: 0; }
|
||||
#szeroka_kolumna ul.wykaz li.wykaz_1 { margin: 0 0 0 1.3em; }
|
||||
#szeroka_kolumna ul.wykaz li.wykaz_2 ul { margin: 0; padding: 0 0 0 1.3em; }
|
||||
#szeroka_kolumna h3.autor { background-color: #898981; color: #f9f9f8; margin: -25px 0px 30px 0; text-align: left; padding: 0 0 0 2px; }
|
||||
.tresc_wlasciwa { border-top: 1px solid #898981; padding: 30px 0px 0px 0px; position: relative; }
|
||||
#cytat { font-size: 11px; line-height: 19px; font-style: italic; text-align: justify; }
|
||||
#cytat img { width: 100px; height: 105px; float: right; margin: 3px 0 0 10px; }
|
||||
.duzy_cytat { padding: 20px 20px 10px 20px; margin: 0 0 1.2em 0; }
|
||||
#szeroka_kolumna img, #szeroka_kolumna object { padding: 3px; border: 1px solid #898981; }
|
||||
#szeroka_kolumna img.ilustracja { margin: 0px 10px 0 0; float: left; }
|
||||
p { margin: 0 0 1.2em 0; }
|
||||
#cytat p.sentencja { margin: 0; }
|
||||
#cytat p.sentencja:first-letter { font-size: 44px; line-height: 33px; margin: 0 2px 0 0; font-style: normal; float: left; display: block; }
|
||||
p.autor { text-transform: uppercase; color: #898981; font-style: normal; text-align: left; }
|
||||
'''
|
||||
|
26
resources/recipes/michalkiewicz.recipe
Normal file
26
resources/recipes/michalkiewicz.recipe
Normal file
@ -0,0 +1,26 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Tomasz Dlugosz <tomek3d@gmail.com>'
|
||||
'''
|
||||
michalkiewicz.pl
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
#
|
||||
|
||||
class michalkiewicz(BasicNewsRecipe):
|
||||
title = u'Stanis\u0142aw Michalkiewicz'
|
||||
desciption = u'Strona autorska * felietony * artyku\u0142y * komentarze'
|
||||
__author__ = u'Tomasz D\u0142ugosz'
|
||||
language = 'pl'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'modul_srodek'})]
|
||||
remove_tags = [dict(name='ul', attrs={'class':'menu'})]
|
||||
|
||||
feeds = [(u'Teksty', u'http://www.michalkiewicz.pl/rss.xml')]
|
||||
|
35
resources/recipes/nczas.recipe
Normal file
35
resources/recipes/nczas.recipe
Normal file
@ -0,0 +1,35 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Tomasz Dlugosz <tomek3d@gmail.com>'
|
||||
'''
|
||||
nczas.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
#
|
||||
|
||||
class NCzas(BasicNewsRecipe):
|
||||
title = u'Najwy\u017cszy Czas!'
|
||||
desciption = u'Najwy\u017cszy Czas!\nwydanie internetowe'
|
||||
__author__ = u'Tomasz D\u0142ugosz'
|
||||
language = 'pl'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
cover_url = 'http://nczas.com/wp-content/themes/default/grafika/logo.png'
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'trescartykulu'})]
|
||||
|
||||
feeds = [(u'Najwy\u017cszy Czas!', u'http://nczas.com/feed/')]
|
||||
|
||||
def postprocess_html(self, soup, first):
|
||||
|
||||
for tag in soup.findAll(name= 'img', alt=""):
|
||||
tag.extract()
|
||||
|
||||
for item in soup.findAll(align = "right"):
|
||||
del item['align']
|
||||
|
||||
return soup
|
@ -37,7 +37,7 @@ class NYTimes(BasicNewsRecipe):
|
||||
dict(name=['script', 'noscript', 'style'])]
|
||||
encoding = decode
|
||||
no_stylesheets = True
|
||||
extra_css = 'h1 {font: sans-serif large;}\n.byline {font:monospace;}'
|
||||
extra_css = 'h1 {font-face:sans-serif; font-size:2em; font-weight:bold;}\n.byline {font:monospace;}\n.bold {font-weight:bold;}'
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
|
@ -15,14 +15,14 @@ class Pagina12(BasicNewsRecipe):
|
||||
publisher = 'La Pagina S.A.'
|
||||
category = 'news, politics, Argentina'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
max_articles_per_feed = 200
|
||||
no_stylesheets = True
|
||||
encoding = 'cp1252'
|
||||
use_embedded_content = False
|
||||
language = 'es'
|
||||
remove_empty_feeds = True
|
||||
masthead_url = 'http://www.pagina12.com.ar/commons/imgs/logo-home.gif'
|
||||
extra_css = ' body{font-family: Arial,Helvetica,sans-serif } h2{color: #028CCD} img{margin-bottom: 0.4em} .epigrafe{font-size: x-small; background-color: #EBEAE5; color: #565144 } .intro{font-size: 1.1em} '
|
||||
extra_css = ' body{font-family: Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} #autor{font-weight: bold} #fecha,#epigrafe{font-size: 0.9em; margin: 5px} #imagen{border: 1px solid black; margin: 0 0 1.25em 1.25em; width: 232px } '
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
@ -45,7 +45,9 @@ class Pagina12(BasicNewsRecipe):
|
||||
,(u'NO' , u'http://www.pagina12.com.ar/diario/rss/no.xml' )
|
||||
,(u'Las/12' , u'http://www.pagina12.com.ar/diario/rss/las12.xml' )
|
||||
,(u'Soy' , u'http://www.pagina12.com.ar/diario/rss/soy.xml' )
|
||||
,(u'M2' , u'http://www.pagina12.com.ar/diario/rss/futuro.xml' )
|
||||
,(u'Futuro' , u'http://www.pagina12.com.ar/diario/rss/futuro.xml' )
|
||||
,(u'M2' , u'http://www.pagina12.com.ar/diario/rss/m2.xml' )
|
||||
,(u'Rosario/12' , u'http://www.pagina12.com.ar/diario/rss/rosario.xml' )
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
@ -60,3 +62,7 @@ class Pagina12(BasicNewsRecipe):
|
||||
return image['src']
|
||||
return None
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
56
resources/recipes/queleer.recipe
Normal file
56
resources/recipes/queleer.recipe
Normal file
@ -0,0 +1,56 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
www.que-leer.com
|
||||
'''
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class QueLeer(BasicNewsRecipe):
|
||||
title = 'Que Leer'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Libros, Novedades en libros, Criticas, Noticias libro'
|
||||
publisher = 'MC Ediciones, S.A.'
|
||||
category = 'news, books, criticas, libros'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 200
|
||||
no_stylesheets = True
|
||||
encoding = 'utf-8'
|
||||
use_embedded_content = False
|
||||
language = 'es'
|
||||
remove_empty_feeds = True
|
||||
masthead_url = 'http://www.que-leer.com/wp-content/themes/queleer/images/backgrounds/que-leer.jpg'
|
||||
extra_css = ' body{font-family: Arial,sans-serif } img{margin-bottom: 0.4em} '
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
preprocess_regexps = [(re.compile(r'<h2 class="izq">.*?</body>', re.DOTALL|re.IGNORECASE),lambda match: '')]
|
||||
|
||||
remove_tags = [
|
||||
dict(attrs={'class':['post-ratings','post-ratings-loading','sociable','toc-anchor']})
|
||||
,dict(name=['object','embed','iframe','link'])
|
||||
,dict(attrs={'id':'left'})
|
||||
]
|
||||
|
||||
remove_tags_after = dict(attrs={'class':'sociable'})
|
||||
remove_attributes = ['width','height']
|
||||
keep_only_tags = [dict(attrs={'class':'post'})]
|
||||
|
||||
feeds = [(u'Articulos', u'http://www.que-leer.com/feed')]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
url = 'http://www.que-leer.com/comprar-libros-tienda-que-leer/libros-recomendados'
|
||||
fitem = soup.find('a',href=url)
|
||||
if fitem:
|
||||
par = fitem.parent
|
||||
par.extract()
|
||||
return self.adeify_images(soup)
|
||||
|
45
resources/recipes/radikal_tr.recipe
Normal file
45
resources/recipes/radikal_tr.recipe
Normal file
@ -0,0 +1,45 @@
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
radikal.com.tr
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Radikal_tr(BasicNewsRecipe):
|
||||
title = 'Radikal - Turkey'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'News from Turkey'
|
||||
publisher = 'radikal'
|
||||
category = 'news, politics, Turkey'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 150
|
||||
no_stylesheets = True
|
||||
encoding = 'cp1254'
|
||||
use_embedded_content = False
|
||||
masthead_url = 'http://www.radikal.com.tr/D/i/1/V2/radikal_logo.jpg'
|
||||
language = 'tr'
|
||||
extra_css = ' @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} .article_description,body{font-family: Arial,Verdana,Helvetica,sans1,sans-serif } '
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
remove_tags = [dict(name=['embed','iframe','object','link','base'])]
|
||||
remove_tags_before = dict(name='h1')
|
||||
remove_tags_after = dict(attrs={'id':'haberDetayYazi'})
|
||||
|
||||
|
||||
feeds = [(u'Yazarlar', u'http://www.radikal.com.tr/d/rss/RssYazarlar.xml')]
|
||||
|
||||
def print_version(self, url):
|
||||
articleid = url.rpartition('ArticleID=')[2]
|
||||
return 'http://www.radikal.com.tr/Default.aspx?aType=HaberYazdir&ArticleID=' + articleid
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
||||
|
@ -7,10 +7,11 @@ sfgate.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
import re
|
||||
|
||||
class SanFranciscoChronicle(BasicNewsRecipe):
|
||||
title = u'San Francisco Chronicle'
|
||||
__author__ = u'Darko Miletic'
|
||||
__author__ = u'Darko Miletic and Sujata Raman'
|
||||
description = u'San Francisco news'
|
||||
language = 'en'
|
||||
|
||||
@ -19,13 +20,56 @@ class SanFranciscoChronicle(BasicNewsRecipe):
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
|
||||
remove_tags_before = {'class':'articleheadings'}
|
||||
remove_tags_after = dict(name='div', attrs={'id':'articlecontent' })
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':'tools tools_top'})
|
||||
,dict(name='div', attrs={'id':'articlebox' })
|
||||
]
|
||||
|
||||
|
||||
remove_tags_before = {'id':'printheader'}
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div',attrs={'id':'printheader'})
|
||||
,dict(name='a', attrs={'href':re.compile('http://ads\.pheedo\.com.*')})
|
||||
,dict(name='div',attrs={'id':'footer'})
|
||||
]
|
||||
|
||||
extra_css = '''
|
||||
h1{font-family :Arial,Helvetica,sans-serif; font-size:large;}
|
||||
h2{font-family :Arial,Helvetica,sans-serif; font-size:medium; color:#666666;}
|
||||
h3{font-family :Arial,Helvetica,sans-serif; font-size:medium; color:#000000;}
|
||||
h4{font-family :Arial,Helvetica,sans-serif; font-size: x-small;}
|
||||
p{font-family :Arial,Helvetica,sans-serif; font-size:x-small;}
|
||||
.byline{font-family :Arial,Helvetica,sans-serif; font-size: xx-small;}
|
||||
.date{font-family :Arial,Helvetica,sans-serif; font-size: xx-small;}
|
||||
.dtlcomment{font-style:italic;}
|
||||
.georgia h3{font-family :Arial,Helvetica,sans-serif; font-size:x-small; color:#000000;}
|
||||
'''
|
||||
|
||||
feeds = [
|
||||
(u'Top News Stories', u'http://www.sfgate.com/rss/feeds/news.xml')
|
||||
]
|
||||
|
||||
def print_version(self,url):
|
||||
url= url +"&type=printable"
|
||||
return url
|
||||
|
||||
def get_article_url(self, article):
|
||||
print str(article['title_detail']['value'])
|
||||
url = article.get('guid',None)
|
||||
url = "http://www.sfgate.com/cgi-bin/article.cgi?f="+url
|
||||
if "Presented By:" in str(article['title_detail']['value']):
|
||||
url = ''
|
||||
return url
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
107
resources/recipes/sueddeutschezeitung.recipe
Normal file
107
resources/recipes/sueddeutschezeitung.recipe
Normal file
@ -0,0 +1,107 @@
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
www.sueddeutsche.de/sz/
|
||||
'''
|
||||
|
||||
import urllib
|
||||
from calibre import strftime
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class SueddeutcheZeitung(BasicNewsRecipe):
|
||||
title = 'Sueddeutche Zeitung'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'News from Germany. Access to paid content.'
|
||||
publisher = 'Sueddeutche Zeitung'
|
||||
category = 'news, politics, Germany'
|
||||
no_stylesheets = True
|
||||
oldest_article = 2
|
||||
encoding = 'cp1252'
|
||||
needs_subscription = True
|
||||
remove_empty_feeds = True
|
||||
PREFIX = 'http://www.sueddeutsche.de'
|
||||
INDEX = PREFIX + strftime('/sz/%Y-%m-%d/')
|
||||
LOGIN = PREFIX + '/app/lbox/index.html'
|
||||
use_embedded_content = False
|
||||
masthead_url = 'http://pix.sueddeutsche.de/img/g_.gif'
|
||||
language = 'de'
|
||||
extra_css = ' body{font-family: Arial,Helvetica,sans-serif} '
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
, 'linearize_tables' : True
|
||||
}
|
||||
|
||||
remove_attributes = ['height','width']
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
br.open(self.INDEX)
|
||||
if self.username is not None and self.password is not None:
|
||||
data = urllib.urlencode({ 'login_name':self.username
|
||||
,'login_passwort':self.password
|
||||
,'lboxaction':'doLogin'
|
||||
,'passtxt':'Passwort'
|
||||
,'referer':self.INDEX
|
||||
,'x':'22'
|
||||
,'y':'7'
|
||||
})
|
||||
br.open(self.LOGIN,data)
|
||||
return br
|
||||
|
||||
remove_tags =[
|
||||
dict(attrs={'class':'hidePrint'})
|
||||
,dict(name=['link','object','embed','base','iframe'])
|
||||
]
|
||||
remove_tags_before = dict(name='h2')
|
||||
remove_tags_after = dict(attrs={'class':'author'})
|
||||
|
||||
feeds = [
|
||||
(u'Politik' , INDEX + 'politik/' )
|
||||
,(u'Seite drei' , INDEX + 'seitedrei/' )
|
||||
,(u'Meinungsseite', INDEX + 'meinungsseite/')
|
||||
,(u'Wissen' , INDEX + 'wissen/' )
|
||||
,(u'Panorama' , INDEX + 'panorama/' )
|
||||
,(u'Feuilleton' , INDEX + 'feuilleton/' )
|
||||
,(u'Medien' , INDEX + 'medien/' )
|
||||
,(u'Wirtschaft' , INDEX + 'wirtschaft/' )
|
||||
,(u'Sport' , INDEX + 'sport/' )
|
||||
,(u'Bayern' , INDEX + 'bayern/' )
|
||||
,(u'Muenchen' , INDEX + 'muenchen/' )
|
||||
,(u'jetzt.de' , INDEX + 'jetzt.de/' )
|
||||
]
|
||||
|
||||
def parse_index(self):
|
||||
totalfeeds = []
|
||||
lfeeds = self.get_feeds()
|
||||
for feedobj in lfeeds:
|
||||
feedtitle, feedurl = feedobj
|
||||
self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
|
||||
articles = []
|
||||
soup = self.index_to_soup(feedurl)
|
||||
tbl = soup.find(attrs={'class':'szprintd'})
|
||||
for item in tbl.findAll(name='td',attrs={'class':'topthema'}):
|
||||
atag = item.find(attrs={'class':'Titel'}).a
|
||||
ptag = item.find('p')
|
||||
stag = ptag.find('script')
|
||||
if stag:
|
||||
stag.extract()
|
||||
url = self.PREFIX + atag['href']
|
||||
title = self.tag_to_string(atag)
|
||||
description = self.tag_to_string(ptag)
|
||||
articles.append({
|
||||
'title' :title
|
||||
,'date' :strftime(self.timefmt)
|
||||
,'url' :url
|
||||
,'description':description
|
||||
})
|
||||
totalfeeds.append((feedtitle, articles))
|
||||
return totalfeeds
|
||||
|
||||
def print_version(self, url):
|
||||
return url + 'print.html'
|
||||
|
@ -9,8 +9,8 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class TelegraphUK(BasicNewsRecipe):
|
||||
title = u'Telegraph.co.uk'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'News from United Kingdom'
|
||||
__author__ = 'Darko Miletic and Sujata Raman'
|
||||
description = 'News from United Kingdom'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
@ -18,23 +18,26 @@ class TelegraphUK(BasicNewsRecipe):
|
||||
|
||||
use_embedded_content = False
|
||||
|
||||
extra_css = '''
|
||||
h1{font-family :Arial,Helvetica,sans-serif; font-size:large; }
|
||||
h2{font-family :Arial,Helvetica,sans-serif; font-size:x-small; color:#444444}
|
||||
.story{font-family :Arial,Helvetica,sans-serif; font-size: x-small;}
|
||||
.byline{color:#666666; font-family :Arial,Helvetica,sans-serif; font-size: xx-small;}
|
||||
a{color:#234B7B; }
|
||||
.imageExtras{color:#666666; font-family :Arial,Helvetica,sans-serif; font-size: xx-small;}
|
||||
'''
|
||||
|
||||
keep_only_tags = [
|
||||
extra_css = '''
|
||||
h1{font-family :Arial,Helvetica,sans-serif; font-size:large; }
|
||||
h2{font-family :Arial,Helvetica,sans-serif; font-size:x-small; color:#444444;}
|
||||
.story{font-family :Arial,Helvetica,sans-serif; font-size: x-small;}
|
||||
.byline{color:#666666; font-family :Arial,Helvetica,sans-serif; font-size: xx-small;}
|
||||
a{color:#234B7B; }
|
||||
.imageExtras{color:#666666; font-family :Arial,Helvetica,sans-serif; font-size: xx-small;}
|
||||
'''
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':'storyHead'})
|
||||
,dict(name='div', attrs={'class':'story' })
|
||||
#,dict(name='div', attrs={'class':['slideshowHD gutterUnder',"twoThirds gutter","caption" ] })
|
||||
]
|
||||
remove_tags = [dict(name='div', attrs={'class':['related_links_inline',"imgindex","next","prev","gutterUnder"]})]
|
||||
|
||||
feeds = [
|
||||
#,dict(name='div', attrs={'class':['slideshowHD gutterUnder',"twoThirds gutter","caption" ] })
|
||||
]
|
||||
remove_tags = [dict(name='div', attrs={'class':['related_links_inline',"imgindex","next","prev","gutterUnder",'ssImgHide','imageExtras','ssImg hide']})
|
||||
#,dict(name='div', attrs={'class':['toolshideoneQuarter']})
|
||||
,dict(name='span', attrs={'class':['num','placeComment']})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'UK News' , u'http://www.telegraph.co.uk/news/uknews/rss' )
|
||||
,(u'World News' , u'http://www.telegraph.co.uk/news/worldnews/rss' )
|
||||
,(u'Politics' , u'http://www.telegraph.co.uk/news/newstopics/politics/rss' )
|
||||
@ -45,15 +48,27 @@ class TelegraphUK(BasicNewsRecipe):
|
||||
,(u'Earth News' , u'http://www.telegraph.co.uk/earth/earthnews/rss' )
|
||||
,(u'Comment' , u'http://www.telegraph.co.uk/comment/rss' )
|
||||
,(u'How about that?', u'http://www.telegraph.co.uk/news/newstopics/howaboutthat/rss' )
|
||||
]
|
||||
]
|
||||
|
||||
def get_article_url(self, article):
|
||||
|
||||
|
||||
url = article.get('guid', None)
|
||||
|
||||
|
||||
if 'picture-galleries' in url or 'pictures' in url or 'picturegalleries' in url :
|
||||
url = None
|
||||
|
||||
|
||||
return url
|
||||
|
||||
|
||||
|
||||
def postprocess_html(self,soup,first):
|
||||
|
||||
for bylineTag in soup.findAll(name='div', attrs={'class':'byline'}):
|
||||
for pTag in bylineTag.findAll(name='p'):
|
||||
if getattr(pTag.contents[0],"Comments",True):
|
||||
pTag.extract()
|
||||
return soup
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class TorontoSun(BasicNewsRecipe):
|
||||
title = 'Toronto SUN'
|
||||
__author__ = 'Darko Miletic'
|
||||
__author__ = 'Darko Miletic and Sujata Raman'
|
||||
description = 'News from Canada'
|
||||
publisher = 'Toronto Sun'
|
||||
category = 'news, politics, Canada'
|
||||
@ -21,25 +21,50 @@ class TorontoSun(BasicNewsRecipe):
|
||||
encoding = 'cp1252'
|
||||
language = 'en_CA'
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
}
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
keep_only_tags =[
|
||||
dict(name='div', attrs={'class':'articleHead'})
|
||||
,dict(name='div', attrs={'id':'channelContent'})
|
||||
]
|
||||
remove_tags = [
|
||||
dict(name='div',attrs={'class':['leftBox','bottomBox clear','bottomBox','breadCrumb']})
|
||||
,dict(name=['link','iframe','object'])
|
||||
,dict(name='a',attrs={'rel':'swap'})
|
||||
,dict(name='ul',attrs={'class':'tabs dl contentSwap'})
|
||||
]
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':['articleHead','leftBox']})
|
||||
,dict(name='div', attrs={'id':'channelContent'})
|
||||
,dict(name='div', attrs={'id':'rotateBox'})
|
||||
,dict(name='img')
|
||||
]
|
||||
remove_tags = [
|
||||
dict(name='div',attrs={'class':['bottomBox clear','bottomBox','breadCrumb','articleControls thin','articleControls thin short','extraVideoList']})
|
||||
,dict(name='h2',attrs={'class':'microhead'})
|
||||
,dict(name='div',attrs={'id':'commentsBottom'})
|
||||
,dict(name=['link','iframe','object'])
|
||||
,dict(name='a',attrs={'rel':'swap'})
|
||||
,dict(name='a',attrs={'href':'/news/haiti/'})
|
||||
,dict(name='ul',attrs={'class':['tabs dl contentSwap','micrositeNav clearIt hList','galleryNav rotateNav']})
|
||||
]
|
||||
|
||||
remove_tags_after = [
|
||||
dict(name='div',attrs={'class':'bottomBox clear'})
|
||||
,dict(name='div',attrs={'class':'rotateBox'})
|
||||
,dict(name='div',attrs={'id':'contentSwap'})
|
||||
]
|
||||
|
||||
|
||||
extra_css = '''
|
||||
h1{font-family :Arial,Helvetica,sans-serif; font-size:large;}
|
||||
h2{font-family :Arial,Helvetica,sans-serif; font-size:medium; color:#666666;}
|
||||
h3{font-family :Arial,Helvetica,sans-serif; font-size:medium; color:#000000;}
|
||||
p{font-family :Arial,Helvetica,sans-serif; font-size:x-small;}
|
||||
.bold{font-family :Arial,Helvetica,sans-serif; font-size: xx-small;color:#444444;margin-left: 0px;}
|
||||
.subheading{font-family :Arial,Helvetica,sans-serif; font-size:medium; color:#000000; font-weight: bold;}
|
||||
.byline{color:#666666; font-family :Arial,Helvetica,sans-serif; font-size: xx-small;}
|
||||
.byline span{color:#666666; font-family :Arial,Helvetica,sans-serif; font-size: xx-small; text-transform: uppercase;}
|
||||
.updated{font-family :Arial,Helvetica,sans-serif; font-size: xx-small;}
|
||||
.galleryCaption{font-family :Arial,Helvetica,sans-serif; font-size: x-small;}
|
||||
.galleryUpdated{font-family :Arial,Helvetica,sans-serif; font-size: x-small;}
|
||||
'''
|
||||
|
||||
remove_tags_after = dict(name='div',attrs={'class':'bottomBox clear'})
|
||||
|
||||
feeds = [
|
||||
(u'News' , u'http://www.torontosun.com/news/rss.xml' )
|
||||
@ -48,3 +73,19 @@ class TorontoSun(BasicNewsRecipe):
|
||||
,(u'World' , u'http://www.torontosun.com/news/world/rss.xml' )
|
||||
,(u'Money' , u'http://www.torontosun.com/money/rss.xml' )
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
##To fetch images from the specified source
|
||||
for img in soup.findAll('img', src=True):
|
||||
url= img.get('src').split('?')[-1].partition('=')[-1]
|
||||
if url:
|
||||
img['src'] = url.split('&')[0].partition('=')[0]
|
||||
img['width'] = url.split('&')[-1].partition('=')[-1].split('x')[0]
|
||||
img['height'] =url.split('&')[-1].partition('=')[-1].split('x')[1]
|
||||
return soup
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -1,7 +1,5 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
www.variety.com
|
||||
'''
|
||||
@ -20,8 +18,10 @@ class Variety(BasicNewsRecipe):
|
||||
publisher = 'Red Business Information'
|
||||
category = 'Entertainment Industry News, Daily Variety, Movie Reviews, TV, Awards, Oscars, Cannes, Box Office, Hollywood'
|
||||
language = 'en'
|
||||
masthead_url = 'http://a330.g.akamai.net/7/330/23382/20090528190853/www.variety.com/graphics/variety/Variety_logo_green_tm.gif'
|
||||
extra_css = ' body{font-family: Georgia,"Times New Roman",Times,Courier,serif } img{margin-bottom: 1em} '
|
||||
|
||||
conversion_options = {
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : language
|
||||
@ -31,7 +31,7 @@ class Variety(BasicNewsRecipe):
|
||||
remove_tags = [dict(name=['object','link','map'])]
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'article'})]
|
||||
|
||||
|
||||
feeds = [(u'News & Articles', u'http://feeds.feedburner.com/variety/headlines' )]
|
||||
|
||||
def print_version(self, url):
|
||||
@ -41,6 +41,6 @@ class Variety(BasicNewsRecipe):
|
||||
catid = catidr.partition('&')[0]
|
||||
return 'http://www.variety.com/index.asp?layout=print_story&articleid=' + artid + '&categoryid=' + catid
|
||||
|
||||
def get_article_url(self, article):
|
||||
return article.get('feedburner_origlink', None)
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
||||
|
95
resources/recipes/winter_olympics.recipe
Normal file
95
resources/recipes/winter_olympics.recipe
Normal file
@ -0,0 +1,95 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Starson17'
|
||||
'''
|
||||
www.nbcolympics.com
|
||||
'''
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Olympics_2010(BasicNewsRecipe):
|
||||
title = u'NBC Olympics 2010'
|
||||
__author__ = 'Starson17'
|
||||
description = 'Olympics 2010'
|
||||
cover_url = 'http://www.digitaljournal.com/img/1/1/2/1/i/4/7/6/o/WinterOlympics2010-logo.jpg'
|
||||
publisher = 'Olympics 2010'
|
||||
tags = 'Olympics news'
|
||||
language = 'en'
|
||||
use_embedded_content = False
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
# recursions = 3
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 10
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':['Article ','ArticleGallery']}),
|
||||
]
|
||||
|
||||
remove_tags = [dict(name='div', attrs={'id':['RelatedTagsBox','ShareBox']}),
|
||||
dict(name='div', attrs={'class':['DateUtilities','PhotoGallery BoxRight','Frame','ToolBox']}),
|
||||
]
|
||||
|
||||
# RSS feeds are at: http://www.nbcolympics.com/rss/index.html
|
||||
feeds = [
|
||||
('NBCOlympics.com - News', 'http://www.nbcolympics.com/rss/newscenter/mostpopular.xml'),
|
||||
('NBCOlympics.com - News - Top Stories', 'http://www.nbcolympics.com/rss/newscenter/topstories.xml'),
|
||||
('NBCOlympics.com - News - Latest Headlines', 'http://www.nbcolympics.com/rss/newscenter/latestnews.xml'),
|
||||
# ('NBCOlympics.com - Photos', 'http://www.nbcolympics.com/rss/photos/mostpopular.xml'),
|
||||
# ('NBCOlympics.com - Photos - Editorial Picks', 'http://www.nbcolympics.com/rss/photos/editorialpicks.xml'),
|
||||
# ('NBCOlympics.com - Photos - Latest Slideshows', 'http://www.nbcolympics.com/rss/photos/latestslideshows.xml'),
|
||||
('NBCOlympics.com - Team USA - Latest news', 'http://www.nbcolympics.com/rss/countries/team-usa/index.xml'),
|
||||
# ('NBCOlympics.com - Team USA - Latest Slideshows', 'http://www.nbcolympics.com/rss/countries/team-usa/photos/index.xml'),
|
||||
# ('NBCOlympics.com - Team USA - Video', 'http://www.nbcolympics.com/rss/countries/team-usa/video/index.xml'),
|
||||
# ('NBCOlympics.com - Alpine Skiing - Most Popular News', 'http://www.nbcolympics.com/rss/sport=AS/mostpopular.xml'),
|
||||
# ('NBCOlympics.com - Alpine Skiing - Top News', 'http://www.nbcolympics.com/rss/sport=AS/topnews.xml'),
|
||||
('NBCOlympics.com - Alpine Skiing - Latest News', 'http://www.nbcolympics.com/rss/sport=AS/latestnews.xml'),
|
||||
# ('NBCOlympics.com - Biathlon - Most Popular News', 'http://www.nbcolympics.com/rss/sport=BT/mostpopular.xml'),
|
||||
# ('NBCOlympics.com - Biathlon - Top News', 'http://www.nbcolympics.com/rss/sport=BT/topnews.xml'),
|
||||
('NBCOlympics.com - Biathlon - Latest News', 'http://www.nbcolympics.com/rss/sport=BT/latestnews.xml'),
|
||||
# ('NBCOlympics.com - Bobsled - Most Popular News', 'http://www.nbcolympics.com/rss/sport=BS/mostpopular.xml'),
|
||||
# ('NBCOlympics.com - Bobsled - Top News', 'http://www.nbcolympics.com/rss/sport=BS/topnews.xml'),
|
||||
('NBCOlympics.com - Bobsled - Latest News', 'http://www.nbcolympics.com/rss/sport=BS/latestnews.xml'),
|
||||
# ('NBCOlympics.com - Cross-Country - Most Popular News', 'http://www.nbcolympics.com/rss/sport=CC/mostpopular.xml'),
|
||||
# ('NBCOlympics.com - Cross-Country - Top News', 'http://www.nbcolympics.com/rss/sport=CC/topnews.xml'),
|
||||
('NBCOlympics.com - Cross-Country - Latest News', 'http://www.nbcolympics.com/rss/sport=CC/latestnews.xml'),
|
||||
# ('NBCOlympics.com - Curling - Most Popular News', 'http://www.nbcolympics.com/rss/sport=CU/mostpopular.xml'),
|
||||
# ('NBCOlympics.com - Curling - Top News', 'http://www.nbcolympics.com/rss/sport=CU/topnews.xml'),
|
||||
('NBCOlympics.com - Curling - Latest News', 'http://www.nbcolympics.com/rss/sport=CU/latestnews.xml'),
|
||||
# ('NBCOlympics.com - Figure Skating - Most Popular News', 'http://www.nbcolympics.com/rss/sport=FS/mostpopular.xml'),
|
||||
# ('NBCOlympics.com - Figure Skating - Top News', 'http://www.nbcolympics.com/rss/sport=FS/topnews.xml'),
|
||||
('NBCOlympics.com - Figure Skating - Latest News', 'http://www.nbcolympics.com/rss/sport=FS/latestnews.xml'),
|
||||
# ('NBCOlympics.com - Freestyle Skiing - Most Popular News', 'http://www.nbcolympics.com/rss/sport=FR/mostpopular.xml'),
|
||||
# ('NBCOlympics.com - Freestyle Skiing - Top News', 'http://www.nbcolympics.com/rss/sport=FR/topnews.xml'),
|
||||
('NBCOlympics.com - Freestyle Skiing - Latest News', 'http://www.nbcolympics.com/rss/sport=FR/latestnews.xml'),
|
||||
# ('NBCOlympics.com - Hockey - Most Popular News', 'http://www.nbcolympics.com/rss/sport=IH/mostpopular.xml'),
|
||||
# ('NBCOlympics.com - Hockey - Top News', 'http://www.nbcolympics.com/rss/sport=IH/topnews.xml'),
|
||||
('NBCOlympics.com - Hockey - Latest News', 'http://www.nbcolympics.com/rss/sport=IH/latestnews.xml'),
|
||||
# ('NBCOlympics.com - Luge - Most Popular News', 'http://www.nbcolympics.com/rss/sport=LG/mostpopular.xml'),
|
||||
# ('NBCOlympics.com - Luge - Top News', 'http://www.nbcolympics.com/rss/sport=LG/topnews.xml'),
|
||||
('NBCOlympics.com - Luge - Latest News', 'http://www.nbcolympics.com/rss/sport=LG/latestnews.xml'),
|
||||
# ('NBCOlympics.com - Nordic Combined - Most Popular News', 'http://www.nbcolympics.com/rss/sport=NC/mostpopular.xml'),
|
||||
# ('NBCOlympics.com - Nordic Combined - Top News', 'http://www.nbcolympics.com/rss/sport=NC/topnews.xml'),
|
||||
('NBCOlympics.com - Nordic Combined - Latest News', 'http://www.nbcolympics.com/rss/sport=NC/latestnews.xml'),
|
||||
# ('NBCOlympics.com - Short Track - Most Popular News', 'http://www.nbcolympics.com/rss/sport=ST/mostpopular.xml'),
|
||||
# ('NBCOlympics.com - Short Track - Top News', 'http://www.nbcolympics.com/rss/sport=ST/topnews.xml'),
|
||||
('NBCOlympics.com - Short Track - Latest News', 'http://www.nbcolympics.com/rss/sport=ST/latestnews.xml'),
|
||||
# ('NBCOlympics.com - Skeleton - Most Popular News', 'http://www.nbcolympics.com/rss/sport=SN/mostpopular.xml'),
|
||||
# ('NBCOlympics.com - Skeleton - Top News', 'http://www.nbcolympics.com/rss/sport=SN/topnews.xml'),
|
||||
('NBCOlympics.com - Skeleton - Latest News', 'http://www.nbcolympics.com/rss/sport=SN/latestnews.xml'),
|
||||
# ('NBCOlympics.com - Ski Jumping - Most Popular News', 'http://www.nbcolympics.com/rss/sport=SJ/mostpopular.xml'),
|
||||
# ('NBCOlympics.com - Ski Jumping - Top News', 'http://www.nbcolympics.com/rss/sport=SJ/topnews.xml'),
|
||||
('NBCOlympics.com - Ski Jumping - Latest News', 'http://www.nbcolympics.com/rss/sport=SJ/latestnews.xml'),
|
||||
# ('NBCOlympics.com - Snowboarding - Most Popular News', 'http://www.nbcolympics.com/rss/sport=SB/mostpopular.xml'),
|
||||
# ('NBCOlympics.com - Snowboarding - Top News', 'http://www.nbcolympics.com/rss/sport=SB/topnews.xml'),
|
||||
('NBCOlympics.com - Snowboarding - Latest News', 'http://www.nbcolympics.com/rss/sport=SB/latestnews.xml'),
|
||||
# ('NBCOlympics.com - Speed Skating - Most Popular News', 'http://www.nbcolympics.com/rss/sport=AS/mostpopular.xml'),
|
||||
# ('NBCOlympics.com - Speed Skating - Top News', 'http://www.nbcolympics.com/rss/sport=AS/topnews.xml'),
|
||||
('NBCOlympics.com - Speed Skating - Latest News', 'http://www.nbcolympics.com/rss/sport=AS/latestnews.xml'),
|
||||
]
|
||||
|
||||
extra_css = '''
|
||||
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
||||
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
||||
'''
|
@ -20,6 +20,7 @@ class Wired(BasicNewsRecipe):
|
||||
no_stylesheets = True
|
||||
encoding = 'utf-8'
|
||||
use_embedded_content = False
|
||||
masthead_url = 'http://www.wired.com/images/home/wired_logo.gif'
|
||||
language = 'en'
|
||||
extra_css = ' body{font-family: sans-serif} .entryDescription li {display: inline; list-style-type: none} '
|
||||
index = 'http://www.wired.com/magazine/'
|
||||
@ -38,14 +39,34 @@ class Wired(BasicNewsRecipe):
|
||||
dict(name=['object','embed','iframe','link'])
|
||||
,dict(name='div', attrs={'class':['podcast_storyboard','tweetmeme_button']})
|
||||
]
|
||||
remove_attributes = ['height','width']
|
||||
|
||||
|
||||
#feeds = [(u'Articles' , u'http://www.wired.com/magazine/feed/' )]
|
||||
|
||||
def parse_index(self):
|
||||
totalfeeds = []
|
||||
|
||||
soup = self.index_to_soup(self.index)
|
||||
soup = self.index_to_soup(self.index)
|
||||
majorf = soup.find('div',attrs={'class':'index'})
|
||||
if majorf:
|
||||
pfarticles = []
|
||||
firsta = majorf.find(attrs={'class':'spread-header'})
|
||||
if firsta:
|
||||
pfarticles.append({
|
||||
'title' :self.tag_to_string(firsta.a)
|
||||
,'date' :strftime(self.timefmt)
|
||||
,'url' :'http://www.wired.com' + firsta.a['href']
|
||||
,'description':''
|
||||
})
|
||||
for itt in majorf.findAll('li'):
|
||||
itema = itt.find('a',href=True)
|
||||
if itema:
|
||||
pfarticles.append({
|
||||
'title' :self.tag_to_string(itema)
|
||||
,'date' :strftime(self.timefmt)
|
||||
,'url' :'http://www.wired.com' + itema['href']
|
||||
,'description':''
|
||||
})
|
||||
totalfeeds.append(('Cover', pfarticles))
|
||||
features = soup.find('div',attrs={'id':'my-glider'})
|
||||
if features:
|
||||
farticles = []
|
||||
|
44
resources/recipes/wired_daily.recipe
Normal file
44
resources/recipes/wired_daily.recipe
Normal file
@ -0,0 +1,44 @@
|
||||
#!/usr/bin/env python
|
||||
__license__ = 'GPL v3'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Wired_Daily(BasicNewsRecipe):
|
||||
|
||||
title = 'Wired Daily Edition'
|
||||
__author__ = 'Kovid Goyal'
|
||||
description = 'Technology news'
|
||||
timefmt = ' [%Y%b%d %H%M]'
|
||||
language = 'en'
|
||||
|
||||
no_stylesheets = True
|
||||
|
||||
remove_tags_before = dict(name='div', id='content')
|
||||
remove_tags = [dict(id=['social_tools', 'outerWrapper', 'sidebar',
|
||||
'footer', 'advertisement', 'blog_subscription_unit',
|
||||
'brightcove_component']),
|
||||
{'class':'entryActions'},
|
||||
dict(name=['noscript', 'script'])]
|
||||
|
||||
feeds = [
|
||||
('Top News', 'http://feeds.wired.com/wired/index'),
|
||||
('Culture', 'http://feeds.wired.com/wired/culture'),
|
||||
('Software', 'http://feeds.wired.com/wired/software'),
|
||||
('Mac', 'http://feeds.feedburner.com/cultofmac/bFow'),
|
||||
('Gadgets', 'http://feeds.wired.com/wired/gadgets'),
|
||||
('Cars', 'http://feeds.wired.com/wired/cars'),
|
||||
('Entertainment', 'http://feeds.wired.com/wired/entertainment'),
|
||||
('Gaming', 'http://feeds.wired.com/wired/gaming'),
|
||||
('Science', 'http://feeds.wired.com/wired/science'),
|
||||
('Med Tech', 'http://feeds.wired.com/wired/medtech'),
|
||||
('Politics', 'http://feeds.wired.com/wired/politics'),
|
||||
('Tech Biz', 'http://feeds.wired.com/wired/techbiz'),
|
||||
('Commentary', 'http://feeds.wired.com/wired/commentary'),
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('http://www.wired.com/', 'http://www.wired.com/print/')
|
||||
|
||||
|
74
resources/recipes/wired_uk.recipe
Normal file
74
resources/recipes/wired_uk.recipe
Normal file
@ -0,0 +1,74 @@
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
www.wired.co.uk
|
||||
'''
|
||||
|
||||
from calibre import strftime
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Wired_UK(BasicNewsRecipe):
|
||||
title = 'Wired Magazine - UK edition'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Gaming news'
|
||||
publisher = 'Conde Nast Digital'
|
||||
category = 'news, games, IT, gadgets'
|
||||
oldest_article = 32
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
encoding = 'utf-8'
|
||||
use_embedded_content = False
|
||||
masthead_url = 'http://www.wired.co.uk/_/media/wired-logo_UK.gif'
|
||||
language = 'en_GB'
|
||||
extra_css = ' body{font-family: Palatino,"Palatino Linotype","Times New Roman",Times,serif} img{margin-bottom: 0.8em } .img-descr{font-family: Tahoma,Arial,Helvetica,sans-serif; font-size: 0.6875em; display: block} '
|
||||
index = 'http://www.wired.co.uk/wired-magazine.aspx'
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'article-box'})]
|
||||
remove_tags = [
|
||||
dict(name=['object','embed','iframe','link'])
|
||||
,dict(attrs={'class':['opts','comment','stories']})
|
||||
]
|
||||
remove_tags_after = dict(name='div',attrs={'class':'stories'})
|
||||
remove_attributes = ['height','width']
|
||||
|
||||
|
||||
def parse_index(self):
|
||||
totalfeeds = []
|
||||
soup = self.index_to_soup(self.index)
|
||||
maincontent = soup.find('div',attrs={'class':'main-content'})
|
||||
mfeed = []
|
||||
if maincontent:
|
||||
st = maincontent.find(attrs={'class':'most-wired-box'})
|
||||
if st:
|
||||
for itt in st.findAll('a',href=True):
|
||||
url = 'http://www.wired.co.uk' + itt['href']
|
||||
title = self.tag_to_string(itt)
|
||||
description = ''
|
||||
date = strftime(self.timefmt)
|
||||
mfeed.append({
|
||||
'title' :title
|
||||
,'date' :date
|
||||
,'url' :url
|
||||
,'description':description
|
||||
})
|
||||
totalfeeds.append(('Articles', mfeed))
|
||||
return totalfeeds
|
||||
|
||||
def get_cover_url(self):
|
||||
cover_url = None
|
||||
soup = self.index_to_soup(self.index)
|
||||
cover_item = soup.find('span', attrs={'class':'cover'})
|
||||
if cover_item:
|
||||
cover_url = cover_item.img['src']
|
||||
return cover_url
|
||||
|
||||
def print_version(self, url):
|
||||
return url + '?page=all'
|
@ -50,7 +50,11 @@ class WallStreetJournal(BasicNewsRecipe):
|
||||
br.select_form(nr=0)
|
||||
br['user'] = self.username
|
||||
br['password'] = self.password
|
||||
br.submit()
|
||||
res = br.submit()
|
||||
raw = res.read()
|
||||
if 'Welcome,' not in raw:
|
||||
raise ValueError('Failed to log in to wsj.com, check your '
|
||||
'username and password')
|
||||
return br
|
||||
|
||||
def postprocess_html(self, soup, first):
|
||||
@ -69,8 +73,10 @@ class WallStreetJournal(BasicNewsRecipe):
|
||||
soup = self.wsj_get_index()
|
||||
|
||||
year = strftime('%Y')
|
||||
for x in soup.findAll('td', attrs={'class':'b14'}):
|
||||
for x in soup.findAll('td', height='25', attrs={'class':'b14'}):
|
||||
txt = self.tag_to_string(x).strip()
|
||||
txt = txt.replace(u'\xa0', ' ')
|
||||
txt = txt.encode('ascii', 'ignore')
|
||||
if year in txt:
|
||||
self.timefmt = ' [%s]'%txt
|
||||
break
|
||||
|
30
resources/tanea.recipe
Normal file
30
resources/tanea.recipe
Normal file
@ -0,0 +1,30 @@
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
class TaNea(BasicNewsRecipe):
|
||||
title = u'Ta Nea'
|
||||
__author__ = 'Pan'
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
|
||||
remove_tags_before = dict(name='div',attrs={'id':'print-body'})
|
||||
remove_tags_after = dict(name='div',attrs={'id':'text'})
|
||||
|
||||
feeds = [
|
||||
(u'\xce\x95\xce\xbb\xce\xbb\xce\xac\xce\xb4\xce\xb1',
|
||||
u'http://www.tanea.gr/default.asp?pid=66&la=1'),
|
||||
(u'\xce\x9a\xcf\x8c\xcf\x83\xce\xbc\xce\xbf\xcf\x82',
|
||||
u'http://www.tanea.gr/default.asp?pid=67&la=1'),
|
||||
(u'\xce\x9f\xce\xb9\xce\xba\xce\xbf\xce\xbd\xce\xbf\xce\xbc\xce\xaf\xce\xb1',
|
||||
u'http://www.tanea.gr/default.asp?pid=68&la=1'),
|
||||
(u'\xce\xa0\xce\xbf\xce\xbb\xce\xb9\xcf\x84\xce\xb9\xcf\x83\xce\xbc\xcf\x8c\xcf\x82',
|
||||
u'http://www.tanea.gr/default.asp?pid=69&la=1'),
|
||||
(u'\xce\x93\xce\xbd\xcf\x8e\xce\xbc\xce\xb5\xcf\x82',
|
||||
u'http://www.tanea.gr/default.asp?pid=79&la=1'),
|
||||
(u'\xce\xa1\xce\xb9\xcf\x80\xce\xad\xcf\x82',
|
||||
u'http://www.tanea.gr/default.asp?pid=80&la=1'),
|
||||
(u'\xce\x91\xce\xb9\xcf\x87\xce\xbc\xce\xad\xcf\x82',
|
||||
u'http://www.tanea.gr/default.asp?pid=81&la=1')]
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('http://www.tanea.gr/default.asp?pid=2', 'http://www.tanea.gr/default.asp?pid=96')
|
@ -20,37 +20,8 @@ function selector(elem) {
|
||||
return sel;
|
||||
}
|
||||
|
||||
function find_closest_enclosing_block(top) {
|
||||
var START = top-1000;
|
||||
var STOP = top;
|
||||
var matches = [];
|
||||
var elem, temp;
|
||||
var width = 1000;
|
||||
|
||||
for (y = START; y < STOP; y += 20) {
|
||||
for ( x = 0; x < width; x += 20) {
|
||||
elem = document.elementFromPoint(x, y);
|
||||
try {
|
||||
elem = $(elem);
|
||||
temp = elem.offset().top
|
||||
matches.push(elem);
|
||||
if (Math.abs(temp - START) < 25) { y = STOP; break}
|
||||
} catch(error) {}
|
||||
}
|
||||
}
|
||||
|
||||
var miny = Math.abs(matches[0].offset().top - START), min_elem = matches[0];
|
||||
|
||||
for (i = 1; i < matches.length; i++) {
|
||||
elem = matches[i];
|
||||
temp = Math.abs(elem.offset().top - START);
|
||||
if ( temp < miny ) { miny = temp; min_elem = elem; }
|
||||
}
|
||||
return min_elem;
|
||||
}
|
||||
|
||||
function calculate_bookmark(y) {
|
||||
var elem = find_closest_enclosing_block(y);
|
||||
function calculate_bookmark(y, node) {
|
||||
var elem = $(node);
|
||||
var sel = selector(elem);
|
||||
var ratio = (y - elem.offset().top)/elem.height();
|
||||
if (ratio > 1) { ratio = 1; }
|
||||
|
@ -399,7 +399,7 @@ class BuildPDF2XML(Command):
|
||||
objects.append(obj)
|
||||
|
||||
if self.newer(dest, objects):
|
||||
cmd = ['g++', '-g', '-o', dest]+objects+['-lpoppler', '-lMagickWand',
|
||||
cmd = ['g++', '-ggdb', '-o', dest]+objects+['-lpoppler', '-lMagickWand',
|
||||
'-lpng', '-lpthread']
|
||||
if iswindows:
|
||||
cmd = [msvc.linker] + '/INCREMENTAL:NO /DEBUG /NODEFAULTLIB:libcmt.lib'.split()
|
||||
|
@ -137,8 +137,20 @@ class Develop(Command):
|
||||
self.setup_mount_helper()
|
||||
self.install_files()
|
||||
self.run_postinstall()
|
||||
self.install_env_module()
|
||||
self.success()
|
||||
|
||||
def install_env_module(self):
|
||||
import distutils.sysconfig as s
|
||||
libdir = s.get_python_lib(prefix=self.opts.staging_root)
|
||||
if os.path.exists(libdir):
|
||||
path = os.path.join(libdir, 'init_calibre.py')
|
||||
self.info('Installing calibre environment module: '+path)
|
||||
with open(path, 'wb') as f:
|
||||
f.write(HEADER.format(**self.template_args()))
|
||||
else:
|
||||
self.warn('Cannot install calibre environment module to: '+libdir)
|
||||
|
||||
def setup_mount_helper(self):
|
||||
def warn():
|
||||
self.warn('Failed to compile mount helper. Auto mounting of',
|
||||
@ -180,13 +192,20 @@ class Develop(Command):
|
||||
functions[typ]):
|
||||
self.write_template(name, mod, func)
|
||||
|
||||
def template_args(self):
|
||||
return {
|
||||
'path':self.libdir,
|
||||
'resources':self.sharedir,
|
||||
'executables':self.bindir,
|
||||
'extensions':self.j(self.libdir, 'calibre', 'plugins')
|
||||
}
|
||||
|
||||
def write_template(self, name, mod, func):
|
||||
template = COMPLETE_TEMPLATE if name == 'calibre-complete' else TEMPLATE
|
||||
script = template.format(
|
||||
module=mod, func=func,
|
||||
path=self.libdir, resources=self.sharedir,
|
||||
executables=self.bindir,
|
||||
extensions=self.j(self.libdir, 'calibre', 'plugins'))
|
||||
args = self.template_args()
|
||||
args['module'] = mod
|
||||
args['func'] = func
|
||||
script = template.format(**args)
|
||||
path = self.j(self.staging_bindir, name)
|
||||
if not os.path.exists(self.staging_bindir):
|
||||
os.makedirs(self.staging_bindir)
|
||||
|
@ -15,7 +15,7 @@ class Rsync(Command):
|
||||
|
||||
description = 'Sync source tree from development machine'
|
||||
|
||||
SYNC_CMD = ('rsync -avz --exclude src/calibre/plugins '
|
||||
SYNC_CMD = ('rsync -avz --delete --exclude src/calibre/plugins '
|
||||
'--exclude src/calibre/manual --exclude src/calibre/trac '
|
||||
'--exclude .bzr --exclude .build --exclude .svn --exclude build --exclude dist '
|
||||
'--exclude "*.pyc" --exclude "*.pyo" --exclude "*.swp" --exclude "*.swo" '
|
||||
|
@ -378,10 +378,11 @@ def strftime(fmt, t=None):
|
||||
t = time.localtime()
|
||||
early_year = t[0] < 1900
|
||||
if early_year:
|
||||
replacement = 1900 if t[0]%4 == 0 else 1901
|
||||
fmt = fmt.replace('%Y', '_early year hack##')
|
||||
t = list(t)
|
||||
orig_year = t[0]
|
||||
t[0] = 1900
|
||||
t[0] = replacement
|
||||
ans = None
|
||||
if iswindows:
|
||||
if isinstance(fmt, unicode):
|
||||
|
@ -2,7 +2,7 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
__appname__ = 'calibre'
|
||||
__version__ = '0.6.37'
|
||||
__version__ = '0.6.42'
|
||||
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
|
||||
|
||||
import re
|
||||
|
@ -139,12 +139,11 @@ class FileTypePlugin(Plugin):
|
||||
#: to the database
|
||||
on_import = False
|
||||
|
||||
#: If True, this plugin is run whenever an any2* tool
|
||||
#: is used, on the file passed to the any2* tool.
|
||||
#: If True, this plugin is run just before a conversion
|
||||
on_preprocess = False
|
||||
|
||||
#: If True, this plugin is run after an any2* tool is
|
||||
#: used, on the final file produced by the tool.
|
||||
#: If True, this plugin is run after conversion
|
||||
#: on the final file produced by the conversion output plugin.
|
||||
on_postprocess = False
|
||||
|
||||
type = _('File type')
|
||||
@ -249,6 +248,7 @@ class CatalogPlugin(Plugin):
|
||||
#: dest = 'catalog_title',
|
||||
#: help = (_('Title of generated catalog. \nDefault:') + " '" +
|
||||
#: '%default' + "'"))]
|
||||
#: cli_options parsed in library.cli:catalog_option_parser()
|
||||
|
||||
cli_options = []
|
||||
|
||||
@ -275,9 +275,10 @@ class CatalogPlugin(Plugin):
|
||||
def get_output_fields(self, opts):
|
||||
# Return a list of requested fields, with opts.sort_by first
|
||||
all_fields = set(
|
||||
['author_sort','authors','comments','cover','formats', 'id','isbn','pubdate','publisher','rating',
|
||||
'series_index','series','size','tags','timestamp',
|
||||
'title','uuid'])
|
||||
['author_sort','authors','comments','cover','formats',
|
||||
'id','isbn','pubdate','publisher','rating',
|
||||
'series_index','series','size','tags','timestamp',
|
||||
'title','uuid'])
|
||||
|
||||
fields = all_fields
|
||||
if opts.fields != 'all':
|
||||
|
@ -7,6 +7,7 @@ import os
|
||||
import glob
|
||||
from calibre.customize import FileTypePlugin, MetadataReaderPlugin, MetadataWriterPlugin
|
||||
from calibre.constants import numeric_version
|
||||
from calibre.ebooks.metadata.archive import ArchiveExtract
|
||||
|
||||
class HTML2ZIP(FileTypePlugin):
|
||||
name = 'HTML to ZIP'
|
||||
@ -404,9 +405,10 @@ from calibre.devices.hanlin.driver import HANLINV3, HANLINV5, BOOX
|
||||
from calibre.devices.blackberry.driver import BLACKBERRY
|
||||
from calibre.devices.cybook.driver import CYBOOK
|
||||
from calibre.devices.eb600.driver import EB600, COOL_ER, SHINEBOOK, \
|
||||
POCKETBOOK360, GER2, ITALICA, ECLICTO, DBOOK, INVESBOOK
|
||||
POCKETBOOK360, GER2, ITALICA, ECLICTO, DBOOK, INVESBOOK, \
|
||||
BOOQ
|
||||
from calibre.devices.iliad.driver import ILIAD
|
||||
from calibre.devices.irexdr.driver import IREXDR1000
|
||||
from calibre.devices.irexdr.driver import IREXDR1000, IREXDR800
|
||||
from calibre.devices.jetbook.driver import JETBOOK
|
||||
from calibre.devices.kindle.driver import KINDLE, KINDLE2, KINDLE_DX
|
||||
from calibre.devices.nook.driver import NOOK
|
||||
@ -418,11 +420,11 @@ from calibre.devices.eslick.driver import ESLICK
|
||||
from calibre.devices.nuut2.driver import NUUT2
|
||||
from calibre.devices.iriver.driver import IRIVER_STORY
|
||||
from calibre.devices.binatone.driver import README
|
||||
from calibre.devices.hanvon.driver import N516
|
||||
from calibre.devices.hanvon.driver import N516, EB511
|
||||
|
||||
from calibre.ebooks.metadata.fetch import GoogleBooks, ISBNDB, Amazon
|
||||
from calibre.library.catalog import CSV_XML, EPUB_MOBI
|
||||
plugins = [HTML2ZIP, PML2PMLZ, GoogleBooks, ISBNDB, Amazon, CSV_XML, EPUB_MOBI]
|
||||
plugins = [HTML2ZIP, PML2PMLZ, ArchiveExtract, GoogleBooks, ISBNDB, Amazon, CSV_XML, EPUB_MOBI]
|
||||
plugins += [
|
||||
ComicInput,
|
||||
EPUBInput,
|
||||
@ -464,6 +466,7 @@ plugins += [
|
||||
CYBOOK,
|
||||
ILIAD,
|
||||
IREXDR1000,
|
||||
IREXDR800,
|
||||
JETBOOK,
|
||||
SHINEBOOK,
|
||||
POCKETBOOK360,
|
||||
@ -487,9 +490,11 @@ plugins += [
|
||||
DBOOK,
|
||||
INVESBOOK,
|
||||
BOOX,
|
||||
BOOQ,
|
||||
EB600,
|
||||
README,
|
||||
N516,
|
||||
EB511,
|
||||
]
|
||||
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
|
||||
x.__name__.endswith('MetadataReader')]
|
||||
|
@ -20,7 +20,7 @@ class ANDROID(USBMS):
|
||||
VENDOR_ID = {
|
||||
0x0bb4 : { 0x0c02 : [0x100], 0x0c01 : [0x100]},
|
||||
0x22b8 : { 0x41d9 : [0x216]},
|
||||
0x18d1 : { 0x4e11 : [0x0100]},
|
||||
0x18d1 : { 0x4e11 : [0x0100], 0x4e12: [0x0100]},
|
||||
}
|
||||
EBOOK_DIR_MAIN = ['wordplayer/calibretransfer', 'eBooks/import', 'Books']
|
||||
EXTRA_CUSTOMIZATION_MESSAGE = _('Comma separated list of directories to '
|
||||
|
@ -184,3 +184,14 @@ class INVESBOOK(EB600):
|
||||
VENDOR_NAME = 'INVES_E6'
|
||||
WINDOWS_MAIN_MEM = '00INVES_E600'
|
||||
WINDOWS_CARD_A_MEM = '00INVES_E600'
|
||||
|
||||
class BOOQ(EB600):
|
||||
name = 'Booq Device Interface'
|
||||
gui_name = 'Booq'
|
||||
|
||||
FORMATS = ['epub', 'mobi', 'prc', 'fb2', 'pdf', 'doc', 'rtf', 'txt', 'html']
|
||||
|
||||
VENDOR_NAME = 'NETRONIX'
|
||||
WINDOWS_MAIN_MEM = 'EB600'
|
||||
WINDOWS_CARD_A_MEM = 'EB600'
|
||||
|
||||
|
@ -126,3 +126,15 @@ class BOOX(HANLINV3):
|
||||
|
||||
EBOOK_DIR_MAIN = 'MyBooks'
|
||||
EBOOK_DIR_CARD_A = 'MyBooks'
|
||||
|
||||
|
||||
def windows_sort_drives(self, drives):
|
||||
main = drives.get('main', None)
|
||||
card = drives.get('carda', None)
|
||||
if card and main and card < main:
|
||||
drives['main'] = card
|
||||
drives['carda'] = main
|
||||
|
||||
return drives
|
||||
|
||||
|
||||
|
@ -7,6 +7,7 @@ __docformat__ = 'restructuredtext en'
|
||||
'''
|
||||
Device driver for Hanvon devices
|
||||
'''
|
||||
import re
|
||||
|
||||
from calibre.devices.usbms.driver import USBMS
|
||||
|
||||
@ -32,3 +33,25 @@ class N516(USBMS):
|
||||
|
||||
EBOOK_DIR_MAIN = 'e_book'
|
||||
SUPPORTS_SUB_DIRS = True
|
||||
|
||||
class EB511(USBMS):
|
||||
name = 'Elonex EB 511 driver'
|
||||
gui_name = 'EB 511'
|
||||
description = _('Communicate with the Elonex EB 511 eBook reader.')
|
||||
author = 'Kovid Goyal'
|
||||
supported_platforms = ['windows', 'osx', 'linux']
|
||||
|
||||
FORMATS = ['epub', 'html', 'pdf', 'txt']
|
||||
|
||||
VENDOR_ID = [0x45e]
|
||||
PRODUCT_ID = [0xffff]
|
||||
BCD = [0x0]
|
||||
|
||||
MAIN_MEMORY_VOLUME_LABEL = 'EB 511 Internal Memory'
|
||||
|
||||
EBOOK_DIR_MAIN = 'e_book'
|
||||
SUPPORTS_SUB_DIRS = True
|
||||
|
||||
OSX_MAIN_MEM_VOL_PAT = re.compile(r'/eReader')
|
||||
|
||||
|
||||
|
@ -36,3 +36,14 @@ class IREXDR1000(USBMS):
|
||||
EBOOK_DIR_MAIN = 'ebooks'
|
||||
DELETE_EXTS = ['.mbp']
|
||||
SUPPORTS_SUB_DIRS = True
|
||||
|
||||
class IREXDR800(IREXDR1000):
|
||||
name = 'IRex Digital Reader 800 Device Interface'
|
||||
description = _('Communicate with the IRex Digital Reader 800')
|
||||
PRODUCT_ID = [0x002]
|
||||
WINDOWS_MAIN_MEM = 'DR800'
|
||||
FORMATS = ['epub', 'html', 'pdf', 'txt']
|
||||
|
||||
EBOOK_DIR_MAIN = 'Books'
|
||||
DELETE_EXTS = []
|
||||
|
||||
|
@ -192,17 +192,15 @@ class PRS505(CLI, Device):
|
||||
fix_ids(*booklists)
|
||||
if not os.path.exists(self._main_prefix):
|
||||
os.makedirs(self._main_prefix)
|
||||
f = open(self._main_prefix + self.__class__.MEDIA_XML, 'wb')
|
||||
booklists[0].write(f)
|
||||
f.close()
|
||||
with open(self._main_prefix + self.__class__.MEDIA_XML, 'wb') as f:
|
||||
booklists[0].write(f)
|
||||
|
||||
def write_card_prefix(prefix, listid):
|
||||
if prefix is not None and hasattr(booklists[listid], 'write'):
|
||||
if not os.path.exists(prefix):
|
||||
os.makedirs(prefix)
|
||||
f = open(prefix + self.__class__.CACHE_XML, 'wb')
|
||||
booklists[listid].write(f)
|
||||
f.close()
|
||||
with open(prefix + self.__class__.CACHE_XML, 'wb') as f:
|
||||
booklists[listid].write(f)
|
||||
write_card_prefix(self._card_a_prefix, 1)
|
||||
write_card_prefix(self._card_b_prefix, 2)
|
||||
|
||||
|
@ -4,8 +4,7 @@ __license__ = 'GPL 3'
|
||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import os
|
||||
import shutil
|
||||
import os, shutil, time
|
||||
|
||||
from calibre.devices.errors import PathError
|
||||
|
||||
@ -50,11 +49,12 @@ class CLI(object):
|
||||
d = os.path.dirname(path)
|
||||
if not os.path.exists(d):
|
||||
os.makedirs(d)
|
||||
with open(path, 'wb') as dest:
|
||||
with open(path, 'w+b') as dest:
|
||||
try:
|
||||
shutil.copyfileobj(infile, dest)
|
||||
except IOError:
|
||||
print 'WARNING: First attempt to send file to device failed'
|
||||
time.sleep(0.2)
|
||||
infile.seek(0)
|
||||
dest.seek(0)
|
||||
dest.truncate()
|
||||
|
@ -70,6 +70,19 @@ def extract_cover_from_embedded_svg(html, base, log):
|
||||
if href and os.access(path, os.R_OK):
|
||||
return open(path, 'rb').read()
|
||||
|
||||
def extract_calibre_cover(raw, base, log):
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||
soup = BeautifulSoup(raw)
|
||||
matches = soup.find(name=['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'span',
|
||||
'font', 'br'])
|
||||
images = soup.findAll('img')
|
||||
if matches is None and len(images) == 1 and \
|
||||
images[0].get('alt', '')=='cover':
|
||||
img = images[0]
|
||||
img = os.path.join(base, *img['src'].split('/'))
|
||||
if os.path.exists(img):
|
||||
return open(img, 'rb').read()
|
||||
|
||||
def render_html_svg_workaround(path_to_html, log, width=590, height=750):
|
||||
from calibre.ebooks.oeb.base import SVG_NS
|
||||
raw = open(path_to_html, 'rb').read()
|
||||
@ -80,6 +93,11 @@ def render_html_svg_workaround(path_to_html, log, width=590, height=750):
|
||||
os.path.dirname(path_to_html), log)
|
||||
except:
|
||||
pass
|
||||
if data is None:
|
||||
try:
|
||||
data = extract_calibre_cover(raw, os.path.dirname(path_to_html), log)
|
||||
except:
|
||||
pass
|
||||
if data is None:
|
||||
renderer = render_html(path_to_html, width, height)
|
||||
data = getattr(renderer, 'data', None)
|
||||
|
8
src/calibre/ebooks/chm/__init__.py
Normal file
8
src/calibre/ebooks/chm/__init__.py
Normal file
@ -0,0 +1,8 @@
|
||||
#!/usr/bin/env python
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
'''
|
||||
Used for chm input
|
||||
'''
|
384
src/calibre/ebooks/chm/input.py
Normal file
384
src/calibre/ebooks/chm/input.py
Normal file
@ -0,0 +1,384 @@
|
||||
from __future__ import with_statement
|
||||
''' CHM File decoding support '''
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>,' \
|
||||
' and Alex Bramley <a.bramley at gmail.com>.'
|
||||
|
||||
import os, shutil, uuid
|
||||
from tempfile import mkdtemp
|
||||
from mimetypes import guess_type as guess_mimetype
|
||||
|
||||
from BeautifulSoup import BeautifulSoup
|
||||
from lxml import html
|
||||
from pychm.chm import CHMFile
|
||||
from pychm.chmlib import (
|
||||
CHM_RESOLVE_SUCCESS, CHM_ENUMERATE_NORMAL,
|
||||
chm_enumerate,
|
||||
)
|
||||
|
||||
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
|
||||
from calibre.utils.config import OptionParser
|
||||
from calibre.ebooks.metadata.toc import TOC
|
||||
from calibre.utils.localization import get_lang
|
||||
from calibre.utils.filenames import ascii_filename
|
||||
|
||||
|
||||
def match_string(s1, s2_already_lowered):
|
||||
if s1 is not None and s2_already_lowered is not None:
|
||||
if s1.lower()==s2_already_lowered:
|
||||
return True
|
||||
return False
|
||||
|
||||
def option_parser():
|
||||
parser = OptionParser(usage=_('%prog [options] mybook.chm'))
|
||||
parser.add_option('--output-dir', '-d', default='.', help=_('Output directory. Defaults to current directory'), dest='output')
|
||||
parser.add_option('--verbose', default=False, action='store_true', dest='verbose')
|
||||
parser.add_option("-t", "--title", action="store", type="string", \
|
||||
dest="title", help=_("Set the book title"))
|
||||
parser.add_option('--title-sort', action='store', type='string', default=None,
|
||||
dest='title_sort', help=_('Set sort key for the title'))
|
||||
parser.add_option("-a", "--author", action="store", type="string", \
|
||||
dest="author", help=_("Set the author"))
|
||||
parser.add_option('--author-sort', action='store', type='string', default=None,
|
||||
dest='author_sort', help=_('Set sort key for the author'))
|
||||
parser.add_option("-c", "--category", action="store", type="string", \
|
||||
dest="category", help=_("The category this book belongs"
|
||||
" to. E.g.: History"))
|
||||
parser.add_option("--thumbnail", action="store", type="string", \
|
||||
dest="thumbnail", help=_("Path to a graphic that will be"
|
||||
" set as this files' thumbnail"))
|
||||
parser.add_option("--comment", action="store", type="string", \
|
||||
dest="freetext", help=_("Path to a txt file containing a comment."))
|
||||
parser.add_option("--get-thumbnail", action="store_true", \
|
||||
dest="get_thumbnail", default=False, \
|
||||
help=_("Extract thumbnail from LRF file"))
|
||||
parser.add_option('--publisher', default=None, help=_('Set the publisher'))
|
||||
parser.add_option('--classification', default=None, help=_('Set the book classification'))
|
||||
parser.add_option('--creator', default=None, help=_('Set the book creator'))
|
||||
parser.add_option('--producer', default=None, help=_('Set the book producer'))
|
||||
parser.add_option('--get-cover', action='store_true', default=False,
|
||||
help=_('Extract cover from LRF file. Note that the LRF format has no defined cover, so we use some heuristics to guess the cover.'))
|
||||
parser.add_option('--bookid', action='store', type='string', default=None,
|
||||
dest='book_id', help=_('Set book ID'))
|
||||
parser.add_option('--font-delta', action='store', type='int', default=0,
|
||||
dest='font_delta', help=_('Set font delta'))
|
||||
return parser
|
||||
|
||||
class CHMError(Exception):
|
||||
pass
|
||||
|
||||
class CHMReader(CHMFile):
|
||||
def __init__(self, input, log):
|
||||
CHMFile.__init__(self)
|
||||
if not self.LoadCHM(input):
|
||||
raise CHMError("Unable to open CHM file '%s'"%(input,))
|
||||
self.log = log
|
||||
self._sourcechm = input
|
||||
self._contents = None
|
||||
self._playorder = 0
|
||||
self._metadata = False
|
||||
self._extracted = False
|
||||
|
||||
# location of '.hhc' file, which is the CHM TOC.
|
||||
self.root, ext = os.path.splitext(self.topics.lstrip('/'))
|
||||
self.hhc_path = self.root + ".hhc"
|
||||
|
||||
|
||||
def _parse_toc(self, ul, basedir=os.getcwdu()):
|
||||
toc = TOC(play_order=self._playorder, base_path=basedir, text='')
|
||||
self._playorder += 1
|
||||
for li in ul('li', recursive=False):
|
||||
href = li.object('param', {'name': 'Local'})[0]['value']
|
||||
if href.count('#'):
|
||||
href, frag = href.split('#')
|
||||
else:
|
||||
frag = None
|
||||
name = self._deentity(li.object('param', {'name': 'Name'})[0]['value'])
|
||||
#print "========>", name
|
||||
toc.add_item(href, frag, name, play_order=self._playorder)
|
||||
self._playorder += 1
|
||||
if li.ul:
|
||||
child = self._parse_toc(li.ul)
|
||||
child.parent = toc
|
||||
toc.append(child)
|
||||
#print toc
|
||||
return toc
|
||||
|
||||
|
||||
def GetFile(self, path):
|
||||
# have to have abs paths for ResolveObject, but Contents() deliberately
|
||||
# makes them relative. So we don't have to worry, re-add the leading /.
|
||||
# note this path refers to the internal CHM structure
|
||||
if path[0] != '/':
|
||||
path = '/' + path
|
||||
res, ui = self.ResolveObject(path)
|
||||
if res != CHM_RESOLVE_SUCCESS:
|
||||
raise CHMError("Unable to locate '%s' within CHM file '%s'"%(path, self.filename))
|
||||
size, data = self.RetrieveObject(ui)
|
||||
if size == 0:
|
||||
raise CHMError("'%s' is zero bytes in length!"%(path,))
|
||||
return data
|
||||
|
||||
def ExtractFiles(self, output_dir=os.getcwdu()):
|
||||
for path in self.Contents():
|
||||
lpath = os.path.join(output_dir, path)
|
||||
self._ensure_dir(lpath)
|
||||
data = self.GetFile(path)
|
||||
with open(lpath, 'wb') as f:
|
||||
if guess_mimetype(path)[0] == ('text/html'):
|
||||
data = self._reformat(data)
|
||||
f.write(data)
|
||||
#subprocess.call(['extract_chmLib.exe', self._sourcechm, output_dir])
|
||||
self._extracted = True
|
||||
|
||||
def _reformat(self, data):
|
||||
try:
|
||||
soup = BeautifulSoup(data)
|
||||
except UnicodeEncodeError:
|
||||
# hit some strange encoding problems...
|
||||
print "Unable to parse html for cleaning, leaving it :("
|
||||
return data
|
||||
# nuke javascript...
|
||||
[s.extract() for s in soup('script')]
|
||||
# remove forward and back nav bars from the top/bottom of each page
|
||||
# cos they really fuck with the flow of things and generally waste space
|
||||
# since we can't use [a,b] syntax to select arbitrary items from a list
|
||||
# we'll have to do this manually...
|
||||
t = soup('table')
|
||||
if t:
|
||||
if (t[0].previousSibling is None
|
||||
or t[0].previousSibling.previousSibling is None):
|
||||
t[0].extract()
|
||||
if (t[-1].nextSibling is None
|
||||
or t[-1].nextSibling.nextSibling is None):
|
||||
t[-1].extract()
|
||||
# for some very odd reason each page's content appears to be in a table
|
||||
# too. and this table has sub-tables for random asides... grr.
|
||||
|
||||
# some images seem to be broken in some chm's :/
|
||||
for img in soup('img'):
|
||||
try:
|
||||
# some are supposedly "relative"... lies.
|
||||
while img['src'].startswith('../'): img['src'] = img['src'][3:]
|
||||
# some have ";<junk>" at the end.
|
||||
img['src'] = img['src'].split(';')[0]
|
||||
except KeyError:
|
||||
# and some don't even have a src= ?!
|
||||
pass
|
||||
# now give back some pretty html.
|
||||
return soup.prettify()
|
||||
|
||||
def Contents(self):
|
||||
if self._contents is not None:
|
||||
return self._contents
|
||||
paths = []
|
||||
def get_paths(chm, ui, ctx):
|
||||
# skip directories
|
||||
# note this path refers to the internal CHM structure
|
||||
if ui.path[-1] != '/':
|
||||
# and make paths relative
|
||||
paths.append(ui.path.lstrip('/'))
|
||||
chm_enumerate(self.file, CHM_ENUMERATE_NORMAL, get_paths, None)
|
||||
self._contents = paths
|
||||
return self._contents
|
||||
|
||||
def _ensure_dir(self, path):
|
||||
dir = os.path.dirname(path)
|
||||
if not os.path.isdir(dir):
|
||||
os.makedirs(dir)
|
||||
|
||||
def extract_content(self, output_dir=os.getcwdu()):
|
||||
self.ExtractFiles(output_dir=output_dir)
|
||||
|
||||
|
||||
class CHMInput(InputFormatPlugin):
|
||||
|
||||
name = 'CHM Input'
|
||||
author = 'Kovid Goyal and Alex Bramley'
|
||||
description = 'Convert CHM files to OEB'
|
||||
file_types = set(['chm'])
|
||||
|
||||
options = set([
|
||||
OptionRecommendation(name='dummy_option', recommended_value=False,
|
||||
help=_('dummy option until real options are determined.')),
|
||||
])
|
||||
|
||||
def _chmtohtml(self, output_dir, chm_path, no_images, log):
|
||||
log.debug('Opening CHM file')
|
||||
rdr = CHMReader(chm_path, log)
|
||||
log.debug('Extracting CHM to %s' % output_dir)
|
||||
rdr.extract_content(output_dir)
|
||||
return rdr.hhc_path
|
||||
|
||||
|
||||
def convert(self, stream, options, file_ext, log, accelerators):
|
||||
from calibre.ebooks.metadata.chm import get_metadata_
|
||||
|
||||
log.debug('Processing CHM...')
|
||||
tdir = mkdtemp(prefix='chm2oeb_')
|
||||
from calibre.customize.ui import plugin_for_input_format
|
||||
html_input = plugin_for_input_format('html')
|
||||
for opt in html_input.options:
|
||||
setattr(options, opt.option.name, opt.recommended_value)
|
||||
options.input_encoding = 'utf-8'
|
||||
no_images = False #options.no_images
|
||||
chm_name = stream.name
|
||||
#chm_data = stream.read()
|
||||
|
||||
#closing stream so CHM can be opened by external library
|
||||
stream.close()
|
||||
log.debug('tdir=%s' % tdir)
|
||||
log.debug('stream.name=%s' % stream.name)
|
||||
mainname = self._chmtohtml(tdir, chm_name, no_images, log)
|
||||
mainpath = os.path.join(tdir, mainname)
|
||||
|
||||
metadata = get_metadata_(tdir)
|
||||
|
||||
odi = options.debug_pipeline
|
||||
options.debug_pipeline = None
|
||||
# try a custom conversion:
|
||||
#oeb = self._create_oebbook(mainpath, tdir, options, log, metadata)
|
||||
# try using html converter:
|
||||
htmlpath = self._create_html_root(mainpath, log)
|
||||
oeb = self._create_oebbook_html(htmlpath, tdir, options, log, metadata)
|
||||
options.debug_pipeline = odi
|
||||
#log.debug('DEBUG: Not removing tempdir %s' % tdir)
|
||||
shutil.rmtree(tdir)
|
||||
return oeb
|
||||
|
||||
def _create_oebbook_html(self, htmlpath, basedir, opts, log, mi):
|
||||
# use HTMLInput plugin to generate book
|
||||
from calibre.ebooks.html.input import HTMLInput
|
||||
opts.breadth_first = True
|
||||
htmlinput = HTMLInput(None)
|
||||
oeb = htmlinput.create_oebbook(htmlpath, basedir, opts, log, mi)
|
||||
return oeb
|
||||
|
||||
|
||||
def _create_oebbook(self, hhcpath, basedir, opts, log, mi):
|
||||
from calibre.ebooks.conversion.plumber import create_oebbook
|
||||
from calibre.ebooks.oeb.base import DirContainer
|
||||
oeb = create_oebbook(log, None, opts, self,
|
||||
encoding=opts.input_encoding, populate=False)
|
||||
self.oeb = oeb
|
||||
|
||||
metadata = oeb.metadata
|
||||
if mi.title:
|
||||
metadata.add('title', mi.title)
|
||||
if mi.authors:
|
||||
for a in mi.authors:
|
||||
metadata.add('creator', a, attrib={'role':'aut'})
|
||||
if mi.publisher:
|
||||
metadata.add('publisher', mi.publisher)
|
||||
if mi.isbn:
|
||||
metadata.add('identifier', mi.isbn, attrib={'scheme':'ISBN'})
|
||||
if not metadata.language:
|
||||
oeb.logger.warn(u'Language not specified')
|
||||
metadata.add('language', get_lang())
|
||||
if not metadata.creator:
|
||||
oeb.logger.warn('Creator not specified')
|
||||
metadata.add('creator', _('Unknown'))
|
||||
if not metadata.title:
|
||||
oeb.logger.warn('Title not specified')
|
||||
metadata.add('title', _('Unknown'))
|
||||
|
||||
bookid = str(uuid.uuid4())
|
||||
metadata.add('identifier', bookid, id='uuid_id', scheme='uuid')
|
||||
for ident in metadata.identifier:
|
||||
if 'id' in ident.attrib:
|
||||
self.oeb.uid = metadata.identifier[0]
|
||||
break
|
||||
|
||||
hhcdata = self._read_file(hhcpath)
|
||||
hhcroot = html.fromstring(hhcdata)
|
||||
chapters = self._process_nodes(hhcroot)
|
||||
#print "============================="
|
||||
#print "Printing hhcroot"
|
||||
#print etree.tostring(hhcroot, pretty_print=True)
|
||||
#print "============================="
|
||||
log.debug('Found %d section nodes' % len(chapters))
|
||||
|
||||
if len(chapters) > 0:
|
||||
path0 = chapters[0][1]
|
||||
subpath = os.path.dirname(path0)
|
||||
htmlpath = os.path.join(basedir, subpath)
|
||||
|
||||
oeb.container = DirContainer(htmlpath, log)
|
||||
for chapter in chapters:
|
||||
title = chapter[0]
|
||||
basename = os.path.basename(chapter[1])
|
||||
self._add_item(oeb, title, basename)
|
||||
|
||||
oeb.container = DirContainer(htmlpath, oeb.log)
|
||||
return oeb
|
||||
|
||||
def _create_html_root(self, hhcpath, log):
|
||||
hhcdata = self._read_file(hhcpath)
|
||||
hhcroot = html.fromstring(hhcdata)
|
||||
chapters = self._process_nodes(hhcroot)
|
||||
#print "============================="
|
||||
#print "Printing hhcroot"
|
||||
#print etree.tostring(hhcroot, pretty_print=True)
|
||||
#print "============================="
|
||||
log.debug('Found %d section nodes' % len(chapters))
|
||||
htmlpath = os.path.splitext(hhcpath)[0] + ".html"
|
||||
f = open(htmlpath, 'wb')
|
||||
f.write("<HTML><HEAD></HEAD><BODY>\r\n")
|
||||
|
||||
if chapters:
|
||||
path0 = chapters[0][1]
|
||||
subpath = os.path.dirname(path0)
|
||||
|
||||
for chapter in chapters:
|
||||
title = chapter[0]
|
||||
rsrcname = os.path.basename(chapter[1])
|
||||
rsrcpath = os.path.join(subpath, rsrcname)
|
||||
# title should already be url encoded
|
||||
url = "<br /><a href=" + rsrcpath + ">" + title + " </a>\r\n"
|
||||
f.write(url)
|
||||
|
||||
f.write("</BODY></HTML>")
|
||||
f.close()
|
||||
return htmlpath
|
||||
|
||||
|
||||
def _read_file(self, name):
|
||||
f = open(name, 'rb')
|
||||
data = f.read()
|
||||
f.close()
|
||||
return data
|
||||
|
||||
def _visit_node(self, node, chapters, depth):
|
||||
# check that node is a normal node (not a comment, DOCTYPE, etc.)
|
||||
# (normal nodes have string tags)
|
||||
if isinstance(node.tag, basestring):
|
||||
if match_string(node.tag, 'object') and match_string(node.attrib['type'], 'text/sitemap'):
|
||||
for child in node:
|
||||
if match_string(child.tag,'param') and match_string(child.attrib['name'], 'name'):
|
||||
chapter_title = child.attrib['value']
|
||||
if match_string(child.tag,'param') and match_string(child.attrib['name'],'local'):
|
||||
chapter_path = child.attrib['value']
|
||||
if chapter_title is not None and chapter_path is not None:
|
||||
chapter = [chapter_title, chapter_path, depth]
|
||||
chapters.append(chapter)
|
||||
if node.tag=="UL":
|
||||
depth = depth + 1
|
||||
if node.tag=="/UL":
|
||||
depth = depth - 1
|
||||
|
||||
def _process_nodes(self, root):
|
||||
chapters = []
|
||||
depth = 0
|
||||
for node in root.iter():
|
||||
self._visit_node(node, chapters, depth)
|
||||
return chapters
|
||||
|
||||
def _add_item(self, oeb, title, path):
|
||||
bname = os.path.basename(path)
|
||||
id, href = oeb.manifest.generate(id='html',
|
||||
href=ascii_filename(bname))
|
||||
item = oeb.manifest.add(id, href, 'text/html')
|
||||
item.html_input_href = bname
|
||||
oeb.spine.add(item, True)
|
||||
oeb.toc.add(title, item.href)
|
||||
|
@ -233,14 +233,18 @@ def create_option_parser(args, log):
|
||||
|
||||
return parser, plumber
|
||||
|
||||
def abspath(x):
|
||||
if x.startswith('http:') or x.startswith('https:'):
|
||||
return x
|
||||
return os.path.abspath(os.path.expanduser(x))
|
||||
|
||||
def main(args=sys.argv):
|
||||
log = Log()
|
||||
parser, plumber = create_option_parser(args, log)
|
||||
opts = parser.parse_args(args)[0]
|
||||
y = lambda q : os.path.abspath(os.path.expanduser(q))
|
||||
for x in ('read_metadata_from_opf', 'cover'):
|
||||
if getattr(opts, x, None) is not None:
|
||||
setattr(opts, x, y(getattr(opts, x)))
|
||||
setattr(opts, x, abspath(getattr(opts, x)))
|
||||
recommendations = [(n.dest, getattr(opts, n.dest),
|
||||
OptionRecommendation.HIGH) \
|
||||
for n in parser.options_iter()
|
||||
|
@ -12,6 +12,7 @@ from calibre.customize.ui import input_profiles, output_profiles, \
|
||||
run_plugins_on_preprocess, run_plugins_on_postprocess
|
||||
from calibre.ebooks.conversion.preprocess import HTMLPreProcessor
|
||||
from calibre.ptempfile import PersistentTemporaryDirectory
|
||||
from calibre.utils.date import parse_date
|
||||
from calibre import extract, walk
|
||||
|
||||
DEBUG_README=u'''
|
||||
@ -65,7 +66,7 @@ class Plumber(object):
|
||||
metadata_option_names = [
|
||||
'title', 'authors', 'title_sort', 'author_sort', 'cover', 'comments',
|
||||
'publisher', 'series', 'series_index', 'rating', 'isbn',
|
||||
'tags', 'book_producer', 'language'
|
||||
'tags', 'book_producer', 'language', 'pubdate', 'timestamp'
|
||||
]
|
||||
|
||||
def __init__(self, input, output, log, report_progress=DummyReporter(),
|
||||
@ -423,7 +424,7 @@ OptionRecommendation(name='author_sort',
|
||||
|
||||
OptionRecommendation(name='cover',
|
||||
recommended_value=None, level=OptionRecommendation.LOW,
|
||||
help=_('Set the cover to the specified file.')),
|
||||
help=_('Set the cover to the specified file or URL')),
|
||||
|
||||
OptionRecommendation(name='comments',
|
||||
recommended_value=None, level=OptionRecommendation.LOW,
|
||||
@ -461,6 +462,14 @@ OptionRecommendation(name='language',
|
||||
recommended_value=None, level=OptionRecommendation.LOW,
|
||||
help=_('Set the language.')),
|
||||
|
||||
OptionRecommendation(name='pubdate',
|
||||
recommended_value=None, level=OptionRecommendation.LOW,
|
||||
help=_('Set the publication date.')),
|
||||
|
||||
OptionRecommendation(name='timestamp',
|
||||
recommended_value=None, level=OptionRecommendation.LOW,
|
||||
help=_('Set the book timestamp (used by the date column in calibre).')),
|
||||
|
||||
]
|
||||
|
||||
input_fmt = os.path.splitext(self.input)[1]
|
||||
@ -619,8 +628,30 @@ OptionRecommendation(name='language',
|
||||
except ValueError:
|
||||
self.log.warn(_('Values of series index and rating must'
|
||||
' be numbers. Ignoring'), val)
|
||||
continue
|
||||
elif x in ('timestamp', 'pubdate'):
|
||||
try:
|
||||
val = parse_date(val, assume_utc=x=='pubdate')
|
||||
except:
|
||||
self.log.exception(_('Failed to parse date/time') + ' ' +
|
||||
unicode(val))
|
||||
continue
|
||||
setattr(mi, x, val)
|
||||
|
||||
def download_cover(self, url):
|
||||
from calibre import browser
|
||||
from PIL import Image
|
||||
from cStringIO import StringIO
|
||||
from calibre.ptempfile import PersistentTemporaryFile
|
||||
self.log('Downloading cover from %r'%url)
|
||||
br = browser()
|
||||
raw = br.open_novisit(url).read()
|
||||
buf = StringIO(raw)
|
||||
pt = PersistentTemporaryFile('.jpg')
|
||||
pt.close()
|
||||
img = Image.open(buf)
|
||||
img.convert('RGB').save(pt.name)
|
||||
return pt.name
|
||||
|
||||
def read_user_metadata(self):
|
||||
'''
|
||||
@ -638,6 +669,8 @@ OptionRecommendation(name='language',
|
||||
mi = MetaInformation(opf)
|
||||
self.opts_to_mi(mi)
|
||||
if mi.cover:
|
||||
if mi.cover.startswith('http:') or mi.cover.startswith('https:'):
|
||||
mi.cover = self.download_cover(mi.cover)
|
||||
mi.cover_data = ('', open(mi.cover, 'rb').read())
|
||||
mi.cover = None
|
||||
self.user_metadata = mi
|
||||
@ -753,6 +786,7 @@ OptionRecommendation(name='language',
|
||||
self.oeb = create_oebbook(self.log, self.oeb, self.opts,
|
||||
self.input_plugin)
|
||||
self.input_plugin.postprocess_book(self.oeb, self.opts, self.log)
|
||||
self.opts.is_image_collection = self.input_plugin.is_image_collection
|
||||
pr = CompositeProgressReporter(0.34, 0.67, self.ui_reporter)
|
||||
self.flush()
|
||||
if self.opts.debug_pipeline is not None:
|
||||
|
@ -132,6 +132,8 @@ class EPUBInput(InputFormatPlugin):
|
||||
|
||||
self.rationalize_cover(opf, log)
|
||||
|
||||
self.optimize_opf_parsing = opf
|
||||
|
||||
with open('content.opf', 'wb') as nopf:
|
||||
nopf.write(opf.render())
|
||||
|
||||
|
@ -256,7 +256,20 @@ class EPUBOutput(OutputFormatPlugin):
|
||||
Perform various markup transforms to get the output to render correctly
|
||||
in the quirky ADE.
|
||||
'''
|
||||
from calibre.ebooks.oeb.base import XPath, XHTML, OEB_STYLES, barename
|
||||
from calibre.ebooks.oeb.base import XPath, XHTML, OEB_STYLES, barename, urlunquote
|
||||
|
||||
# ADE cries big wet tears when it encounters an invalid fragment
|
||||
# identifier in the NCX toc.
|
||||
frag_pat = re.compile(r'[-A-Za-z0-9_:.]+$')
|
||||
for node in self.oeb.toc.iter():
|
||||
href = getattr(node, 'href', None)
|
||||
if hasattr(href, 'partition'):
|
||||
base, _, frag = href.partition('#')
|
||||
frag = urlunquote(frag)
|
||||
if frag and frag_pat.match(frag) is None:
|
||||
self.log.warn(
|
||||
'Removing invalid fragment identifier %r from TOC'%frag)
|
||||
node.href = base
|
||||
|
||||
for x in self.oeb.spine:
|
||||
root = x.data
|
||||
|
@ -111,7 +111,7 @@ class HTMLFile(object):
|
||||
raise IOError(msg)
|
||||
raise IgnoreFile(msg, err.errno)
|
||||
|
||||
self.is_binary = not bool(self.HTML_PAT.search(src[:4096]))
|
||||
self.is_binary = level > 0 and not bool(self.HTML_PAT.search(src[:4096]))
|
||||
if not self.is_binary:
|
||||
if encoding is None:
|
||||
encoding = xml_to_unicode(src[:4096], verbose=verbose)[-1]
|
||||
@ -408,7 +408,10 @@ class HTMLInput(InputFormatPlugin):
|
||||
return link_
|
||||
if base and not os.path.isabs(link):
|
||||
link = os.path.join(base, link)
|
||||
link = os.path.abspath(link)
|
||||
try:
|
||||
link = os.path.abspath(link)
|
||||
except:
|
||||
return link_
|
||||
if not os.access(link, os.R_OK):
|
||||
return link_
|
||||
if os.path.isdir(link):
|
||||
|
@ -50,6 +50,7 @@ from pylrf import (LrfWriter, LrfObject, LrfTag, LrfToc,
|
||||
STREAM_COMPRESSED, LrfTagStream, LrfStreamBase, IMAGE_TYPE_ENCODING,
|
||||
BINDING_DIRECTION_ENCODING, LINE_TYPE_ENCODING, LrfFileStream,
|
||||
STREAM_FORCE_COMPRESSED)
|
||||
from calibre.utils.date import isoformat
|
||||
|
||||
DEFAULT_SOURCE_ENCODING = "cp1252" # defualt is us-windows character set
|
||||
DEFAULT_GENREADING = "fs" # default is yes to both lrf and lrs
|
||||
@ -852,7 +853,7 @@ class DocInfo(object):
|
||||
self.thumbnail = None
|
||||
self.language = "en"
|
||||
self.creator = None
|
||||
self.creationdate = date.today().isoformat()
|
||||
self.creationdate = str(isoformat(date.today()))
|
||||
self.producer = "%s v%s"%(__appname__, __version__)
|
||||
self.numberofpages = "0"
|
||||
|
||||
|
@ -10,9 +10,11 @@ import os, mimetypes, sys, re
|
||||
from urllib import unquote, quote
|
||||
from urlparse import urlparse
|
||||
|
||||
|
||||
from calibre import relpath
|
||||
|
||||
from calibre.utils.config import tweaks
|
||||
from calibre.utils.date import isoformat
|
||||
|
||||
_author_pat = re.compile(',?\s+(and|with)\s+', re.IGNORECASE)
|
||||
def string_to_authors(raw):
|
||||
raw = raw.replace('&&', u'\uffff')
|
||||
@ -27,6 +29,9 @@ def authors_to_string(authors):
|
||||
return ''
|
||||
|
||||
def author_to_author_sort(author):
|
||||
method = tweaks['author_sort_copy_method']
|
||||
if method == 'copy' or (method == 'comma' and author.count(',') > 0):
|
||||
return author
|
||||
tokens = author.split()
|
||||
tokens = tokens[-1:] + tokens[:-1]
|
||||
if len(tokens) > 1:
|
||||
@ -340,9 +345,9 @@ class MetaInformation(object):
|
||||
if self.rating is not None:
|
||||
fmt('Rating', self.rating)
|
||||
if self.timestamp is not None:
|
||||
fmt('Timestamp', self.timestamp.isoformat(' '))
|
||||
fmt('Timestamp', isoformat(self.timestamp))
|
||||
if self.pubdate is not None:
|
||||
fmt('Published', self.pubdate.isoformat(' '))
|
||||
fmt('Published', isoformat(self.pubdate))
|
||||
if self.rights is not None:
|
||||
fmt('Rights', unicode(self.rights))
|
||||
if self.lccn:
|
||||
|
@ -7,12 +7,11 @@ __docformat__ = 'restructuredtext en'
|
||||
Fetch metadata using Amazon AWS
|
||||
'''
|
||||
import sys, re
|
||||
from datetime import datetime
|
||||
|
||||
from lxml import etree
|
||||
from dateutil import parser
|
||||
|
||||
from calibre import browser
|
||||
from calibre.utils.date import parse_date, utcnow
|
||||
from calibre.ebooks.metadata import MetaInformation, string_to_authors
|
||||
|
||||
AWS_NS = 'http://webservices.amazon.com/AWSECommerceService/2005-10-05'
|
||||
@ -44,9 +43,8 @@ def get_social_metadata(title, authors, publisher, isbn):
|
||||
try:
|
||||
d = root.findtext('.//'+AWS('PublicationDate'))
|
||||
if d:
|
||||
default = datetime.utcnow()
|
||||
default = datetime(default.year, default.month, 15)
|
||||
d = parser.parse(d[0].text, default=default)
|
||||
default = utcnow().replace(day=15)
|
||||
d = parse_date(d[0].text, assume_utc=True, default=default)
|
||||
mi.pubdate = d
|
||||
except:
|
||||
pass
|
||||
|
65
src/calibre/ebooks/metadata/archive.py
Normal file
65
src/calibre/ebooks/metadata/archive.py
Normal file
@ -0,0 +1,65 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import with_statement
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import os
|
||||
from contextlib import closing
|
||||
|
||||
from calibre.customize import FileTypePlugin
|
||||
|
||||
def is_comic(list_of_names):
|
||||
extensions = set([x.rpartition('.')[-1].lower() for x in list_of_names])
|
||||
return len(extensions) == 1 and iter(extensions).next() in ('jpg', 'jpeg', 'png')
|
||||
|
||||
class ArchiveExtract(FileTypePlugin):
|
||||
name = 'Archive Extract'
|
||||
author = 'Kovid Goyal'
|
||||
description = _('Extract common e-book formats from archives '
|
||||
'(zip/rar) files. Also try to autodetect if they are actually '
|
||||
'cbz/cbr files.')
|
||||
file_types = set(['zip', 'rar'])
|
||||
supported_platforms = ['windows', 'osx', 'linux']
|
||||
on_import = True
|
||||
|
||||
def run(self, archive):
|
||||
is_rar = archive.lower().endswith('.rar')
|
||||
if is_rar:
|
||||
from calibre.libunrar import extract_member, names
|
||||
else:
|
||||
from calibre.utils.zipfile import ZipFile
|
||||
zf = ZipFile(archive, 'r')
|
||||
|
||||
if is_rar:
|
||||
fnames = names(archive)
|
||||
else:
|
||||
fnames = zf.namelist()
|
||||
|
||||
fnames = [x for x in fnames if '.' in x]
|
||||
if is_comic(fnames):
|
||||
ext = '.cbr' if is_rar else '.cbz'
|
||||
of = self.temporary_file('_archive_extract'+ext)
|
||||
with open(archive, 'rb') as f:
|
||||
of.write(f.read())
|
||||
of.close()
|
||||
return of.name
|
||||
if len(fnames) > 1 or not fnames:
|
||||
return archive
|
||||
fname = fnames[0]
|
||||
ext = os.path.splitext(fname)[1][1:]
|
||||
if ext.lower() not in ('lit', 'epub', 'mobi', 'prc', 'rtf', 'pdf',
|
||||
'mp3', 'pdb', 'azw', 'azw1'):
|
||||
return archive
|
||||
|
||||
of = self.temporary_file('_archive_extract.'+ext)
|
||||
with closing(of):
|
||||
if is_rar:
|
||||
data = extract_member(archive, match=None, name=fname)[1]
|
||||
of.write(data)
|
||||
else:
|
||||
of.write(zf.read(fname))
|
||||
return of.name
|
||||
|
@ -15,6 +15,7 @@ from calibre.ebooks.metadata import string_to_authors, authors_to_sort_string, \
|
||||
title_sort, MetaInformation
|
||||
from calibre.ebooks.lrf.meta import LRFMetaFile
|
||||
from calibre import prints
|
||||
from calibre.utils.date import parse_date
|
||||
|
||||
USAGE='%%prog ebook_file [' + _('options') + ']\n' + \
|
||||
_('''
|
||||
@ -69,6 +70,8 @@ def config():
|
||||
help=_('Set the book producer.'))
|
||||
c.add_opt('language', ['-l', '--language'],
|
||||
help=_('Set the language.'))
|
||||
c.add_opt('pubdate', ['-d', '--date'],
|
||||
help=_('Set the published date.'))
|
||||
|
||||
c.add_opt('get_cover', ['--get-cover'],
|
||||
help=_('Get the cover from the ebook and save it at as the '
|
||||
@ -132,6 +135,8 @@ def do_set_metadata(opts, mi, stream, stream_type):
|
||||
mi.series = opts.series.strip()
|
||||
if getattr(opts, 'series_index', None) is not None:
|
||||
mi.series_index = float(opts.series_index.strip())
|
||||
if getattr(opts, 'pubdate', None) is not None:
|
||||
mi.pubdate = parse_date(opts.pubdate, assume_utc=False, as_utc=False)
|
||||
|
||||
if getattr(opts, 'cover', None) is not None:
|
||||
ext = os.path.splitext(opts.cover)[1].replace('.', '').upper()
|
||||
|
@ -5,7 +5,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
'''Read meta information from epub files'''
|
||||
|
||||
import os
|
||||
import os, re
|
||||
from cStringIO import StringIO
|
||||
from contextlib import closing
|
||||
|
||||
@ -29,15 +29,15 @@ class Container(dict):
|
||||
def __init__(self, stream=None):
|
||||
if not stream: return
|
||||
soup = BeautifulStoneSoup(stream.read())
|
||||
container = soup.find('container')
|
||||
container = soup.find(name=re.compile(r'container$', re.I))
|
||||
if not container:
|
||||
raise OCFException("<container/> element missing")
|
||||
raise OCFException("<container> element missing")
|
||||
if container.get('version', None) != '1.0':
|
||||
raise EPubException("unsupported version of OCF")
|
||||
rootfiles = container.find('rootfiles')
|
||||
rootfiles = container.find(re.compile(r'rootfiles$', re.I))
|
||||
if not rootfiles:
|
||||
raise EPubException("<rootfiles/> element missing")
|
||||
for rootfile in rootfiles.findAll('rootfile'):
|
||||
for rootfile in rootfiles.findAll(re.compile(r'rootfile$', re.I)):
|
||||
try:
|
||||
self[rootfile['media-type']] = rootfile['full-path']
|
||||
except KeyError:
|
||||
@ -69,7 +69,7 @@ class OCFReader(OCF):
|
||||
self.opf_path = self.container[OPF.MIMETYPE]
|
||||
try:
|
||||
with closing(self.open(self.opf_path)) as f:
|
||||
self.opf = OPF(f, self.root)
|
||||
self.opf = OPF(f, self.root, populate_spine=False)
|
||||
except KeyError:
|
||||
raise EPubException("missing OPF package file")
|
||||
|
||||
@ -101,10 +101,9 @@ class OCFDirReader(OCFReader):
|
||||
def get_cover(opf, opf_path, stream):
|
||||
from calibre.ebooks import render_html_svg_workaround
|
||||
from calibre.utils.logging import default_log
|
||||
spine = list(opf.spine_items())
|
||||
if not spine:
|
||||
cpage = opf.first_spine_item()
|
||||
if not cpage:
|
||||
return
|
||||
cpage = spine[0]
|
||||
with TemporaryDirectory('_epub_meta') as tdir:
|
||||
with CurrentDir(tdir):
|
||||
stream.seek(0)
|
||||
|
@ -6,14 +6,13 @@ __docformat__ = 'restructuredtext en'
|
||||
import sys, textwrap
|
||||
from urllib import urlencode
|
||||
from functools import partial
|
||||
from datetime import datetime
|
||||
|
||||
from lxml import etree
|
||||
from dateutil import parser
|
||||
|
||||
from calibre import browser, preferred_encoding
|
||||
from calibre.ebooks.metadata import MetaInformation
|
||||
from calibre.utils.config import OptionParser
|
||||
from calibre.utils.date import parse_date, utcnow
|
||||
|
||||
NAMESPACES = {
|
||||
'openSearch':'http://a9.com/-/spec/opensearchrss/1.0/',
|
||||
@ -156,9 +155,8 @@ class ResultList(list):
|
||||
try:
|
||||
d = date(entry)
|
||||
if d:
|
||||
default = datetime.utcnow()
|
||||
default = datetime(default.year, default.month, 15)
|
||||
d = parser.parse(d[0].text, default=default)
|
||||
default = utcnow().replace(day=15)
|
||||
d = parse_date(d[0].text, assume_utc=True, default=default)
|
||||
else:
|
||||
d = None
|
||||
except:
|
||||
|
@ -65,7 +65,22 @@ def _metadata_from_formats(formats):
|
||||
|
||||
return mi
|
||||
|
||||
def is_recipe(filename):
|
||||
return filename.startswith('calibre') and \
|
||||
filename.rpartition('.')[0].endswith('_recipe_out')
|
||||
|
||||
def get_metadata(stream, stream_type='lrf', use_libprs_metadata=False):
|
||||
pos = 0
|
||||
if hasattr(stream, 'tell'):
|
||||
pos = stream.tell()
|
||||
try:
|
||||
return _get_metadata(stream, stream_type, use_libprs_metadata)
|
||||
finally:
|
||||
if hasattr(stream, 'seek'):
|
||||
stream.seek(pos)
|
||||
|
||||
|
||||
def _get_metadata(stream, stream_type, use_libprs_metadata):
|
||||
if stream_type: stream_type = stream_type.lower()
|
||||
if stream_type in ('html', 'html', 'xhtml', 'xhtm', 'xml'):
|
||||
stream_type = 'html'
|
||||
@ -84,11 +99,10 @@ def get_metadata(stream, stream_type='lrf', use_libprs_metadata=False):
|
||||
return opf
|
||||
|
||||
mi = MetaInformation(None, None)
|
||||
if prefs['read_file_metadata']:
|
||||
mi = get_file_type_metadata(stream, stream_type)
|
||||
|
||||
name = os.path.basename(getattr(stream, 'name', ''))
|
||||
base = metadata_from_filename(name)
|
||||
if is_recipe(name) or prefs['read_file_metadata']:
|
||||
mi = get_file_type_metadata(stream, stream_type)
|
||||
if base.title == os.path.splitext(name)[0] and base.authors is None:
|
||||
# Assume that there was no metadata in the file and the user set pattern
|
||||
# to match meta info from the file name did not match.
|
||||
|
@ -11,13 +11,11 @@ __docformat__ = 'restructuredtext en'
|
||||
|
||||
from struct import pack, unpack
|
||||
from cStringIO import StringIO
|
||||
from datetime import datetime
|
||||
|
||||
from calibre.ebooks.mobi import MobiError
|
||||
from calibre.ebooks.mobi.writer import rescale_image, MAX_THUMB_DIMEN
|
||||
from calibre.ebooks.mobi.langcodes import iana2mobi
|
||||
|
||||
import struct
|
||||
from calibre.utils.date import now as nowf
|
||||
|
||||
class StreamSlicer(object):
|
||||
|
||||
@ -87,6 +85,8 @@ class StreamSlicer(object):
|
||||
self._stream.truncate(value)
|
||||
|
||||
class MetadataUpdater(object):
|
||||
DRM_KEY_SIZE = 48
|
||||
|
||||
def __init__(self, stream):
|
||||
self.stream = stream
|
||||
data = self.data = StreamSlicer(stream)
|
||||
@ -105,14 +105,32 @@ class MetadataUpdater(object):
|
||||
have_exth = self.have_exth = (flags & 0x40) != 0
|
||||
self.cover_record = self.thumbnail_record = None
|
||||
self.timestamp = None
|
||||
|
||||
self.pdbrecords = self.get_pdbrecords()
|
||||
|
||||
self.drm_block = None
|
||||
if self.encryption_type != 0:
|
||||
if self.have_exth:
|
||||
self.drm_block = self.fetchDRMdata()
|
||||
else:
|
||||
raise MobiError('Unable to set metadata on DRM file without EXTH header')
|
||||
|
||||
self.original_exth_records = {}
|
||||
if not have_exth:
|
||||
self.create_exth()
|
||||
|
||||
self.have_exth = True
|
||||
# Fetch timestamp, cover_record, thumbnail_record
|
||||
self.fetchEXTHFields()
|
||||
|
||||
def fetchDRMdata(self):
|
||||
''' Fetch the DRM keys '''
|
||||
drm_offset = int(unpack('>I', self.record0[0xa8:0xac])[0])
|
||||
self.drm_key_count = int(unpack('>I', self.record0[0xac:0xb0])[0])
|
||||
drm_keys = ''
|
||||
for x in range(self.drm_key_count):
|
||||
base_addr = drm_offset + (x * self.DRM_KEY_SIZE)
|
||||
drm_keys += self.record0[base_addr:base_addr + self.DRM_KEY_SIZE]
|
||||
return drm_keys
|
||||
|
||||
def fetchEXTHFields(self):
|
||||
stream = self.stream
|
||||
record0 = self.record0
|
||||
@ -131,14 +149,18 @@ class MetadataUpdater(object):
|
||||
content = exth[pos + 8: pos + size]
|
||||
pos += size
|
||||
|
||||
self.original_exth_records[id] = content
|
||||
|
||||
if id == 106:
|
||||
self.timestamp = content
|
||||
elif id == 201:
|
||||
rindex, = self.cover_rindex, = unpack('>I', content)
|
||||
self.cover_record = self.record(rindex + image_base)
|
||||
rindex, = self.cover_rindex, = unpack('>i', content)
|
||||
if rindex > 0 :
|
||||
self.cover_record = self.record(rindex + image_base)
|
||||
elif id == 202:
|
||||
rindex, = self.thumbnail_rindex, = unpack('>I', content)
|
||||
self.thumbnail_record = self.record(rindex + image_base)
|
||||
rindex, = self.thumbnail_rindex, = unpack('>i', content)
|
||||
if rindex > 0 :
|
||||
self.thumbnail_record = self.record(rindex + image_base)
|
||||
|
||||
def patch(self, off, new_record0):
|
||||
# Save the current size of each record
|
||||
@ -181,14 +203,15 @@ class MetadataUpdater(object):
|
||||
off = self.pdbrecords[section][0]
|
||||
self.patch(off, new)
|
||||
|
||||
def create_exth(self, exth=None):
|
||||
def create_exth(self, new_title=None, exth=None):
|
||||
# Add an EXTH block to record 0, rewrite the stream
|
||||
# self.hexdump(self.record0)
|
||||
if isinstance(new_title, unicode):
|
||||
new_title = new_title.encode(self.codec, 'replace')
|
||||
|
||||
# Fetch the title
|
||||
title_offset, = struct.unpack('>L', self.record0[0x54:0x58])
|
||||
title_length, = struct.unpack('>L', self.record0[0x58:0x5c])
|
||||
title_in_file, = struct.unpack('%ds' % (title_length), self.record0[title_offset:title_offset + title_length])
|
||||
# Fetch the existing title
|
||||
title_offset, = unpack('>L', self.record0[0x54:0x58])
|
||||
title_length, = unpack('>L', self.record0[0x58:0x5c])
|
||||
title_in_file, = unpack('%ds' % (title_length), self.record0[title_offset:title_offset + title_length])
|
||||
|
||||
# Adjust length to accommodate PrimaryINDX if necessary
|
||||
mobi_header_length, = unpack('>L', self.record0[0x14:0x18])
|
||||
@ -207,22 +230,30 @@ class MetadataUpdater(object):
|
||||
exth = ['EXTH', pack('>II', 12, 0), pad]
|
||||
exth = ''.join(exth)
|
||||
|
||||
# Update title_offset
|
||||
self.record0[0x54:0x58] = pack('>L', 0x10 + mobi_header_length + len(exth))
|
||||
# Update drm_offset(0xa8), title_offset(0x54)
|
||||
if self.encryption_type != 0:
|
||||
self.record0[0xa8:0xac] = pack('>L', 0x10 + mobi_header_length + len(exth))
|
||||
self.record0[0xb0:0xb4] = pack('>L', len(self.drm_block))
|
||||
self.record0[0x54:0x58] = pack('>L', 0x10 + mobi_header_length + len(exth) + len(self.drm_block))
|
||||
else:
|
||||
self.record0[0x54:0x58] = pack('>L', 0x10 + mobi_header_length + len(exth))
|
||||
|
||||
if new_title:
|
||||
self.record0[0x58:0x5c] = pack('>L', len(new_title))
|
||||
|
||||
# Create an updated Record0
|
||||
new_record0 = StringIO()
|
||||
new_record0.write(self.record0[:0x10 + mobi_header_length])
|
||||
new_record0.write(exth)
|
||||
new_record0.write(title_in_file)
|
||||
if self.encryption_type != 0:
|
||||
new_record0.write(self.drm_block)
|
||||
new_record0.write(new_title if new_title else title_in_file)
|
||||
|
||||
# Pad to a 4-byte boundary
|
||||
trail = len(new_record0.getvalue()) % 4
|
||||
pad = '\0' * (4 - trail) # Always pad w/ at least 1 byte
|
||||
new_record0.write(pad)
|
||||
|
||||
#self.hexdump(new_record0.getvalue())
|
||||
|
||||
# Rebuild the stream, update the pdbrecords pointers
|
||||
self.patchSection(0,new_record0.getvalue())
|
||||
|
||||
@ -244,7 +275,7 @@ class MetadataUpdater(object):
|
||||
def get_pdbrecords(self):
|
||||
pdbrecords = []
|
||||
for i in xrange(self.nrecs):
|
||||
offset, a1,a2,a3,a4 = struct.unpack('>LBBBB', self.data[78+i*8:78+i*8+8])
|
||||
offset, a1,a2,a3,a4 = unpack('>LBBBB', self.data[78+i*8:78+i*8+8])
|
||||
flags, val = a1, a2<<16|a3<<8|a4
|
||||
pdbrecords.append( [offset, flags, val] )
|
||||
return pdbrecords
|
||||
@ -275,6 +306,10 @@ class MetadataUpdater(object):
|
||||
return StreamSlicer(self.stream, start, stop)
|
||||
|
||||
def update(self, mi):
|
||||
def pop_exth_record(exth_id):
|
||||
if exth_id in self.original_exth_records:
|
||||
self.original_exth_records.pop(exth_id)
|
||||
|
||||
if self.type != "BOOKMOBI":
|
||||
raise MobiError("Setting metadata only supported for MOBI files of type 'BOOK'.\n"
|
||||
"\tThis is a '%s' file of type '%s'" % (self.type[0:4], self.type[4:8]))
|
||||
@ -289,34 +324,49 @@ class MetadataUpdater(object):
|
||||
if mi.author_sort and pas:
|
||||
authors = mi.author_sort
|
||||
recs.append((100, authors.encode(self.codec, 'replace')))
|
||||
pop_exth_record(100)
|
||||
elif mi.authors:
|
||||
authors = '; '.join(mi.authors)
|
||||
recs.append((100, authors.encode(self.codec, 'replace')))
|
||||
pop_exth_record(100)
|
||||
if mi.publisher:
|
||||
recs.append((101, mi.publisher.encode(self.codec, 'replace')))
|
||||
pop_exth_record(101)
|
||||
if mi.comments:
|
||||
recs.append((103, mi.comments.encode(self.codec, 'replace')))
|
||||
pop_exth_record(103)
|
||||
if mi.isbn:
|
||||
recs.append((104, mi.isbn.encode(self.codec, 'replace')))
|
||||
pop_exth_record(104)
|
||||
if mi.tags:
|
||||
subjects = '; '.join(mi.tags)
|
||||
recs.append((105, subjects.encode(self.codec, 'replace')))
|
||||
pop_exth_record(105)
|
||||
if mi.pubdate:
|
||||
recs.append((106, str(mi.pubdate).encode(self.codec, 'replace')))
|
||||
pop_exth_record(106)
|
||||
elif mi.timestamp:
|
||||
recs.append((106, str(mi.timestamp).encode(self.codec, 'replace')))
|
||||
pop_exth_record(106)
|
||||
elif self.timestamp:
|
||||
recs.append((106, self.timestamp))
|
||||
pop_exth_record(106)
|
||||
else:
|
||||
recs.append((106, str(datetime.now()).encode(self.codec, 'replace')))
|
||||
recs.append((106, nowf().isoformat().encode(self.codec, 'replace')))
|
||||
pop_exth_record(106)
|
||||
if self.cover_record is not None:
|
||||
recs.append((201, pack('>I', self.cover_rindex)))
|
||||
recs.append((203, pack('>I', 0)))
|
||||
pop_exth_record(201)
|
||||
pop_exth_record(203)
|
||||
if self.thumbnail_record is not None:
|
||||
recs.append((202, pack('>I', self.thumbnail_rindex)))
|
||||
pop_exth_record(202)
|
||||
|
||||
if getattr(self, 'encryption_type', -1) != 0:
|
||||
raise MobiError('Setting metadata in DRMed MOBI files is not supported.')
|
||||
# Restore any original EXTH fields that weren't updated
|
||||
for id in sorted(self.original_exth_records):
|
||||
recs.append((id, self.original_exth_records[id]))
|
||||
recs = sorted(recs, key=lambda x:(x[0],x[0]))
|
||||
|
||||
exth = StringIO()
|
||||
for code, data in recs:
|
||||
@ -332,7 +382,7 @@ class MetadataUpdater(object):
|
||||
raise MobiError('No existing EXTH record. Cannot update metadata.')
|
||||
|
||||
self.record0[92:96] = iana2mobi(mi.language)
|
||||
self.create_exth(exth)
|
||||
self.create_exth(exth=exth, new_title=mi.title)
|
||||
|
||||
# Fetch updated timestamp, cover_record, thumbnail_record
|
||||
self.fetchEXTHFields()
|
||||
|
@ -12,12 +12,12 @@ from urllib import unquote
|
||||
from urlparse import urlparse
|
||||
|
||||
from lxml import etree
|
||||
from dateutil import parser
|
||||
|
||||
from calibre.ebooks.chardet import xml_to_unicode
|
||||
from calibre.constants import __appname__, __version__, filesystem_encoding
|
||||
from calibre.ebooks.metadata.toc import TOC
|
||||
from calibre.ebooks.metadata import MetaInformation, string_to_authors
|
||||
from calibre.utils.date import parse_date, isoformat
|
||||
|
||||
|
||||
class Resource(object):
|
||||
@ -272,6 +272,10 @@ class Spine(ResourceCollection):
|
||||
self.id = idfunc(self.path)
|
||||
self.idref = None
|
||||
|
||||
def __repr__(self):
|
||||
return 'Spine.Item(path=%r, id=%s, is_linear=%s)' % \
|
||||
(self.path, self.id, self.is_linear)
|
||||
|
||||
@staticmethod
|
||||
def from_opf_spine_element(itemrefs, manifest):
|
||||
s = Spine(manifest)
|
||||
@ -280,7 +284,7 @@ class Spine(ResourceCollection):
|
||||
if idref is not None:
|
||||
path = s.manifest.path_for_id(idref)
|
||||
if path:
|
||||
r = Spine.Item(s.manifest.id_for_path, path, is_path=True)
|
||||
r = Spine.Item(lambda x:idref, path, is_path=True)
|
||||
r.is_linear = itemref.get('linear', 'yes') == 'yes'
|
||||
r.idref = idref
|
||||
s.append(r)
|
||||
@ -441,6 +445,8 @@ class OPF(object):
|
||||
guide_path = XPath('descendant::*[re:match(name(), "guide", "i")]/*[re:match(name(), "reference", "i")]')
|
||||
|
||||
title = MetadataField('title', formatter=lambda x: re.sub(r'\s+', ' ', x))
|
||||
title_sort = MetadataField('title_sort', formatter=lambda x:
|
||||
re.sub(r'\s+', ' ', x), is_dc=False)
|
||||
publisher = MetadataField('publisher')
|
||||
language = MetadataField('language')
|
||||
comments = MetadataField('description')
|
||||
@ -449,12 +455,14 @@ class OPF(object):
|
||||
series = MetadataField('series', is_dc=False)
|
||||
series_index = MetadataField('series_index', is_dc=False, formatter=float, none_is=1)
|
||||
rating = MetadataField('rating', is_dc=False, formatter=int)
|
||||
pubdate = MetadataField('date', formatter=parser.parse)
|
||||
pubdate = MetadataField('date', formatter=parse_date)
|
||||
publication_type = MetadataField('publication_type', is_dc=False)
|
||||
timestamp = MetadataField('timestamp', is_dc=False, formatter=parser.parse)
|
||||
timestamp = MetadataField('timestamp', is_dc=False,
|
||||
formatter=parse_date)
|
||||
|
||||
|
||||
def __init__(self, stream, basedir=os.getcwdu(), unquote_urls=True):
|
||||
def __init__(self, stream, basedir=os.getcwdu(), unquote_urls=True,
|
||||
populate_spine=True):
|
||||
if not hasattr(stream, 'read'):
|
||||
stream = open(stream, 'rb')
|
||||
raw = stream.read()
|
||||
@ -477,7 +485,7 @@ class OPF(object):
|
||||
self.manifest = Manifest.from_opf_manifest_element(m, basedir)
|
||||
self.spine = None
|
||||
s = self.spine_path(self.root)
|
||||
if s:
|
||||
if populate_spine and s:
|
||||
self.spine = Spine.from_opf_spine_element(s, self.manifest)
|
||||
self.guide = None
|
||||
guide = self.guide_path(self.root)
|
||||
@ -584,6 +592,15 @@ class OPF(object):
|
||||
if x.get('id', None) == idref:
|
||||
yield x.get('href', '')
|
||||
|
||||
def first_spine_item(self):
|
||||
items = self.iterspine()
|
||||
if not items:
|
||||
return None
|
||||
idref = items[0].get('idref', '')
|
||||
for x in self.itermanifest():
|
||||
if x.get('id', None) == idref:
|
||||
return x.get('href', None)
|
||||
|
||||
def create_spine_item(self, idref):
|
||||
ans = etree.Element('{%s}itemref'%self.NAMESPACES['opf'], idref=idref)
|
||||
ans.tail = '\n\t\t'
|
||||
@ -675,29 +692,6 @@ class OPF(object):
|
||||
|
||||
return property(fget=fget, fset=fset)
|
||||
|
||||
@dynamic_property
|
||||
def title_sort(self):
|
||||
|
||||
def fget(self):
|
||||
matches = self.title_path(self.metadata)
|
||||
if matches:
|
||||
for match in matches:
|
||||
ans = match.get('{%s}file-as'%self.NAMESPACES['opf'], None)
|
||||
if not ans:
|
||||
ans = match.get('file-as', None)
|
||||
if ans:
|
||||
return ans
|
||||
|
||||
def fset(self, val):
|
||||
matches = self.title_path(self.metadata)
|
||||
if matches:
|
||||
for key in matches[0].attrib:
|
||||
if key.endswith('file-as'):
|
||||
matches[0].attrib.pop(key)
|
||||
matches[0].set('file-as', unicode(val))
|
||||
|
||||
return property(fget=fget, fset=fset)
|
||||
|
||||
@dynamic_property
|
||||
def tags(self):
|
||||
|
||||
@ -869,7 +863,8 @@ class OPF(object):
|
||||
def smart_update(self, mi):
|
||||
for attr in ('title', 'authors', 'author_sort', 'title_sort',
|
||||
'publisher', 'series', 'series_index', 'rating',
|
||||
'isbn', 'language', 'tags', 'category', 'comments'):
|
||||
'isbn', 'language', 'tags', 'category', 'comments',
|
||||
'pubdate'):
|
||||
val = getattr(mi, attr, None)
|
||||
if val is not None and val != [] and val != (None, None):
|
||||
setattr(self, attr, val)
|
||||
@ -1041,12 +1036,12 @@ def metadata_to_opf(mi, as_string=True):
|
||||
elem.text = text.strip()
|
||||
metadata.append(elem)
|
||||
|
||||
factory(DC('title'), mi.title, mi.title_sort)
|
||||
factory(DC('title'), mi.title)
|
||||
for au in mi.authors:
|
||||
factory(DC('creator'), au, mi.author_sort, 'aut')
|
||||
factory(DC('contributor'), mi.book_producer, __appname__, 'bkp')
|
||||
if hasattr(mi.pubdate, 'isoformat'):
|
||||
factory(DC('date'), mi.pubdate.isoformat())
|
||||
factory(DC('date'), isoformat(mi.pubdate))
|
||||
factory(DC('language'), mi.language)
|
||||
if mi.category:
|
||||
factory(DC('type'), mi.category)
|
||||
@ -1069,9 +1064,11 @@ def metadata_to_opf(mi, as_string=True):
|
||||
if mi.rating is not None:
|
||||
meta('rating', str(mi.rating))
|
||||
if hasattr(mi.timestamp, 'isoformat'):
|
||||
meta('timestamp', mi.timestamp.isoformat())
|
||||
meta('timestamp', isoformat(mi.timestamp))
|
||||
if mi.publication_type:
|
||||
meta('publication_type', mi.publication_type)
|
||||
if mi.title_sort:
|
||||
meta('title_sort', mi.title_sort)
|
||||
|
||||
metadata[-1].tail = '\n' +(' '*4)
|
||||
|
||||
@ -1088,12 +1085,12 @@ def metadata_to_opf(mi, as_string=True):
|
||||
|
||||
|
||||
def test_m2o():
|
||||
from datetime import datetime
|
||||
from calibre.utils.date import now as nowf
|
||||
from cStringIO import StringIO
|
||||
mi = MetaInformation('test & title', ['a"1', "a'2"])
|
||||
mi.title_sort = 'a\'"b'
|
||||
mi.author_sort = 'author sort'
|
||||
mi.pubdate = datetime.now()
|
||||
mi.pubdate = nowf()
|
||||
mi.language = 'en'
|
||||
mi.category = 'test'
|
||||
mi.comments = 'what a fun book\n\n'
|
||||
@ -1103,7 +1100,7 @@ def test_m2o():
|
||||
mi.series = 's"c\'l&<>'
|
||||
mi.series_index = 3.34
|
||||
mi.rating = 3
|
||||
mi.timestamp = datetime.now()
|
||||
mi.timestamp = nowf()
|
||||
mi.publication_type = 'ooooo'
|
||||
mi.rights = 'yes'
|
||||
mi.cover = 'asd.jpg'
|
||||
|
@ -8,11 +8,15 @@ Read metadata from RAR archives
|
||||
'''
|
||||
|
||||
import os
|
||||
from cStringIO import StringIO
|
||||
from calibre.ptempfile import PersistentTemporaryFile
|
||||
|
||||
from calibre.ptempfile import PersistentTemporaryFile, TemporaryDirectory
|
||||
from calibre.libunrar import extract_member, names
|
||||
from calibre import CurrentDir
|
||||
|
||||
def get_metadata(stream):
|
||||
from calibre.ebooks.metadata.archive import is_comic
|
||||
from calibre.ebooks.metadata.meta import get_metadata
|
||||
|
||||
path = getattr(stream, 'name', False)
|
||||
if not path:
|
||||
pt = PersistentTemporaryFile('_rar-meta.rar')
|
||||
@ -21,16 +25,19 @@ def get_metadata(stream):
|
||||
path = pt.name
|
||||
path = os.path.abspath(path)
|
||||
file_names = list(names(path))
|
||||
if is_comic(file_names):
|
||||
return get_metadata(stream, 'cbr')
|
||||
for f in file_names:
|
||||
stream_type = os.path.splitext(f)[1].lower()
|
||||
if stream_type:
|
||||
stream_type = stream_type[1:]
|
||||
if stream_type in ('lit', 'opf', 'prc', 'mobi', 'fb2', 'epub',
|
||||
'rb', 'imp', 'pdf', 'lrf'):
|
||||
data = extract_member(path, match=None, name=f)[1]
|
||||
stream = StringIO(data)
|
||||
from calibre.ebooks.metadata.meta import get_metadata
|
||||
with TemporaryDirectory() as tdir:
|
||||
with CurrentDir(tdir):
|
||||
stream = extract_member(path, match=None, name=f,
|
||||
as_file=True)[1]
|
||||
return get_metadata(stream, stream_type)
|
||||
raise ValueError('No ebook found in RAR archive')
|
||||
|
||||
raise ValueError('No ebook found in RAR archive')
|
||||
|
||||
|
||||
|
@ -3,22 +3,31 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
import os
|
||||
from zipfile import ZipFile
|
||||
from cStringIO import StringIO
|
||||
|
||||
from calibre.utils.zipfile import ZipFile
|
||||
from calibre.ptempfile import TemporaryDirectory
|
||||
from calibre import CurrentDir
|
||||
|
||||
def get_metadata(stream):
|
||||
from calibre.ebooks.metadata.meta import get_metadata
|
||||
from calibre.ebooks.metadata.archive import is_comic
|
||||
stream_type = None
|
||||
zf = ZipFile(stream, 'r')
|
||||
for f in zf.namelist():
|
||||
names = zf.namelist()
|
||||
if is_comic(names):
|
||||
# Is probably a comic
|
||||
return get_metadata(stream, 'cbz')
|
||||
|
||||
for f in names:
|
||||
stream_type = os.path.splitext(f)[1].lower()
|
||||
if stream_type:
|
||||
stream_type = stream_type[1:]
|
||||
if stream_type in ('lit', 'opf', 'prc', 'mobi', 'fb2', 'epub',
|
||||
'rb', 'imp', 'pdf', 'lrf'):
|
||||
from calibre.ebooks.metadata.meta import get_metadata
|
||||
stream = StringIO(zf.read(f))
|
||||
return get_metadata(stream, stream_type)
|
||||
with TemporaryDirectory() as tdir:
|
||||
with CurrentDir(tdir):
|
||||
path = zf.extract(f)
|
||||
return get_metadata(open(path, 'rb'), stream_type)
|
||||
raise ValueError('No ebook found in ZIP archive')
|
||||
|
||||
|
||||
|
@ -4,13 +4,11 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
Read data from .mobi files
|
||||
'''
|
||||
|
||||
import datetime
|
||||
import functools
|
||||
import os
|
||||
import re
|
||||
import struct
|
||||
import textwrap
|
||||
|
||||
import cStringIO
|
||||
|
||||
try:
|
||||
@ -23,6 +21,7 @@ from lxml import html, etree
|
||||
|
||||
from calibre import entity_to_unicode, CurrentDir
|
||||
from calibre.utils.filenames import ascii_filename
|
||||
from calibre.utils.date import parse_date
|
||||
from calibre.ptempfile import TemporaryDirectory
|
||||
from calibre.ebooks import DRMError
|
||||
from calibre.ebooks.chardet import ENCODING_PATS
|
||||
@ -68,7 +67,10 @@ class EXTHHeader(object):
|
||||
pass
|
||||
elif id == 503: # Long title
|
||||
if not title or title == _('Unknown'):
|
||||
title = content
|
||||
try:
|
||||
title = content.decode(codec)
|
||||
except:
|
||||
pass
|
||||
#else:
|
||||
# print 'unknown record', id, repr(content)
|
||||
if title:
|
||||
@ -96,8 +98,7 @@ class EXTHHeader(object):
|
||||
self.mi.tags = list(set(self.mi.tags))
|
||||
elif id == 106:
|
||||
try:
|
||||
self.mi.publish_date = datetime.datetime.strptime(
|
||||
content, '%Y-%m-%d', ).date()
|
||||
self.mi.pubdate = parse_date(content, as_utc=False)
|
||||
except:
|
||||
pass
|
||||
elif id == 108:
|
||||
@ -795,10 +796,11 @@ class MobiReader(object):
|
||||
def get_metadata(stream):
|
||||
from calibre.utils.logging import Log
|
||||
log = Log()
|
||||
|
||||
mi = MetaInformation(os.path.basename(stream.name), [_('Unknown')])
|
||||
try:
|
||||
mh = MetadataHeader(stream, log)
|
||||
if mh.title and mh.title != _('Unknown'):
|
||||
mi.title = mh.title
|
||||
|
||||
if mh.exth is not None:
|
||||
if mh.exth.mi is not None:
|
||||
@ -817,10 +819,15 @@ def get_metadata(stream):
|
||||
else:
|
||||
data = mh.section_data(mh.first_image_index)
|
||||
buf = cStringIO.StringIO(data)
|
||||
im = PILImage.open(buf)
|
||||
obuf = cStringIO.StringIO()
|
||||
im.convert('RGBA').save(obuf, format='JPEG')
|
||||
mi.cover_data = ('jpg', obuf.getvalue())
|
||||
try:
|
||||
im = PILImage.open(buf)
|
||||
except:
|
||||
log.exception('Failed to read MOBI cover')
|
||||
else:
|
||||
obuf = cStringIO.StringIO()
|
||||
im.convert('RGB').save(obuf, format='JPEG')
|
||||
mi.cover_data = ('jpg', obuf.getvalue())
|
||||
except:
|
||||
log.exception()
|
||||
log.filter_level = Log.DEBUG
|
||||
log.exception('Failed to read MOBI metadata')
|
||||
return mi
|
||||
|
@ -310,6 +310,7 @@ class Serializer(object):
|
||||
text = text.replace('&', '&')
|
||||
text = text.replace('<', '<')
|
||||
text = text.replace('>', '>')
|
||||
text = text.replace(u'\u00AD', '') # Soft-hyphen
|
||||
if quot:
|
||||
text = text.replace('"', '"')
|
||||
self.buffer.write(encode(text))
|
||||
@ -610,12 +611,21 @@ class MobiWriter(object):
|
||||
if (i>firstSequentialNode) and self._ctoc_map[i-1]['klass'] != 'section':
|
||||
if offset != previousOffset + previousLength :
|
||||
self._oeb.log.warning("*** TOC discontinuity: nodes are not sequential ***")
|
||||
self._oeb.log.warning(" node %03d: '%s' offset: 0x%X length: 0x%X" % \
|
||||
self._oeb.log.info(" node %03d: '%s' offset: 0x%X length: 0x%X" % \
|
||||
(i-1, entries[i-1].title, previousOffset, previousLength) )
|
||||
self._oeb.log.warning(" node %03d: '%s' offset: 0x%X != 0x%06X" % \
|
||||
(i, child.title, offset, previousOffset + previousLength) )
|
||||
self._oeb.log.warning("\tnode data %03d: %s" % (i-1, self._ctoc_map[i-1]) )
|
||||
self._oeb.log.warning("\tnode data %03d: %s" % (i, self._ctoc_map[i]) )
|
||||
# self._oeb.log.warning("\tnode data %03d: %s" % (i-1, self._ctoc_map[i-1]) )
|
||||
# self._oeb.log.warning("\tnode data %03d: %s" % (i, self._ctoc_map[i]) )
|
||||
# Dump the offending entry
|
||||
self._oeb.log.info("...")
|
||||
for z in range(i-6 if i-6 > 0 else 0, i+6 if i+6 < len(entries) else len(entries)):
|
||||
if z == i:
|
||||
self._oeb.log.warning("child %03d: %s" % (z, entries[z]))
|
||||
else:
|
||||
self._oeb.log.info("child %03d: %s" % (z, entries[z]))
|
||||
self._oeb.log.info("...")
|
||||
|
||||
self._oeb.log.warning('_generate_indexed_navpoints: Failed to generate index')
|
||||
# Zero out self._HTMLRecords, return False
|
||||
self._HTMLRecords = []
|
||||
@ -1366,7 +1376,7 @@ class MobiWriter(object):
|
||||
self._text_length,
|
||||
self._text_nrecords-1, RECORD_SIZE, 0, 0)) # 0 - 15 (0x0 - 0xf)
|
||||
uid = random.randint(0, 0xffffffff)
|
||||
title = str(metadata.title[0])
|
||||
title = unicode(metadata.title[0]).encode('utf-8')
|
||||
# The MOBI Header
|
||||
|
||||
# 0x0 - 0x3
|
||||
|
@ -16,9 +16,10 @@ from urllib import unquote as urlunquote
|
||||
from urlparse import urljoin
|
||||
|
||||
from lxml import etree, html
|
||||
from cssutils import CSSParser
|
||||
|
||||
import calibre
|
||||
from cssutils import CSSParser
|
||||
from calibre.constants import filesystem_encoding
|
||||
from calibre.translations.dynamic import translate
|
||||
from calibre.ebooks.chardet import xml_to_unicode
|
||||
from calibre.ebooks.oeb.entitydefs import ENTITYDEFS
|
||||
@ -434,10 +435,18 @@ class DirContainer(object):
|
||||
|
||||
def namelist(self):
|
||||
names = []
|
||||
for root, dirs, files in os.walk(self.rootdir):
|
||||
base = self.rootdir
|
||||
if isinstance(base, unicode):
|
||||
base = base.encode(filesystem_encoding)
|
||||
for root, dirs, files in os.walk(base):
|
||||
for fname in files:
|
||||
fname = os.path.join(root, fname)
|
||||
fname = fname.replace('\\', '/')
|
||||
if not isinstance(fname, unicode):
|
||||
try:
|
||||
fname = fname.decode(filesystem_encoding)
|
||||
except:
|
||||
continue
|
||||
names.append(fname)
|
||||
return names
|
||||
|
||||
@ -842,8 +851,10 @@ class Manifest(object):
|
||||
self.oeb.log.warn('File %r appears to be a HTML fragment'%self.href)
|
||||
nroot = etree.fromstring('<html><body/></html>')
|
||||
parent = nroot[0]
|
||||
for child in list(data):
|
||||
child.getparent().remove(child)
|
||||
for child in list(data.iter()):
|
||||
oparent = child.getparent()
|
||||
if oparent is not None:
|
||||
oparent.remove(child)
|
||||
parent.append(child)
|
||||
data = nroot
|
||||
|
||||
@ -1567,14 +1578,17 @@ class TOC(object):
|
||||
parent = etree.Element(NCX('navMap'))
|
||||
for node in self.nodes:
|
||||
id = node.id or unicode(uuid.uuid4())
|
||||
attrib = {'id': id, 'playOrder': str(node.play_order)}
|
||||
po = node.play_order
|
||||
if po == 0:
|
||||
po = 1
|
||||
attrib = {'id': id, 'playOrder': str(po)}
|
||||
if node.klass:
|
||||
attrib['class'] = node.klass
|
||||
point = element(parent, NCX('navPoint'), attrib=attrib)
|
||||
label = etree.SubElement(point, NCX('navLabel'))
|
||||
title = node.title
|
||||
if title:
|
||||
title = re.sub(r'\s', ' ', title)
|
||||
title = re.sub(r'\s+', ' ', title)
|
||||
element(label, NCX('text')).text = title
|
||||
element(point, NCX('content'), src=urlunquote(node.href))
|
||||
node.to_ncx(point)
|
||||
|
@ -120,7 +120,10 @@ class EbookIterator(object):
|
||||
bad_map = {}
|
||||
font_family_pat = re.compile(r'font-family\s*:\s*([^;]+)')
|
||||
for csspath in css_files:
|
||||
css = open(csspath, 'rb').read().decode('utf-8', 'replace')
|
||||
try:
|
||||
css = open(csspath, 'rb').read().decode('utf-8', 'replace')
|
||||
except:
|
||||
continue
|
||||
for match in re.compile(r'@font-face\s*{([^}]+)}').finditer(css):
|
||||
block = match.group(1)
|
||||
family = font_family_pat.search(block)
|
||||
@ -181,8 +184,9 @@ class EbookIterator(object):
|
||||
if hasattr(self.pathtoopf, 'manifest'):
|
||||
self.pathtoopf = write_oebbook(self.pathtoopf, self.base)
|
||||
|
||||
|
||||
self.opf = OPF(self.pathtoopf, os.path.dirname(self.pathtoopf))
|
||||
self.opf = getattr(plumber.input_plugin, 'optimize_opf_parsing', None)
|
||||
if self.opf is None:
|
||||
self.opf = OPF(self.pathtoopf, os.path.dirname(self.pathtoopf))
|
||||
self.language = self.opf.language
|
||||
if self.language:
|
||||
self.language = self.language.lower()
|
||||
|
@ -7,7 +7,7 @@ __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import os
|
||||
from datetime import datetime
|
||||
from calibre.utils.date import isoformat, now
|
||||
|
||||
def meta_info_to_oeb_metadata(mi, m, log):
|
||||
from calibre.ebooks.oeb.base import OPF
|
||||
@ -60,10 +60,10 @@ def meta_info_to_oeb_metadata(mi, m, log):
|
||||
m.add('subject', t)
|
||||
if mi.pubdate is not None:
|
||||
m.clear('date')
|
||||
m.add('date', mi.pubdate.isoformat())
|
||||
m.add('date', isoformat(mi.pubdate))
|
||||
if mi.timestamp is not None:
|
||||
m.clear('timestamp')
|
||||
m.add('timestamp', mi.timestamp.isoformat())
|
||||
m.add('timestamp', isoformat(mi.timestamp))
|
||||
if mi.rights is not None:
|
||||
m.clear('rights')
|
||||
m.add('rights', mi.rights)
|
||||
@ -71,7 +71,7 @@ def meta_info_to_oeb_metadata(mi, m, log):
|
||||
m.clear('publication_type')
|
||||
m.add('publication_type', mi.publication_type)
|
||||
if not m.timestamp:
|
||||
m.add('timestamp', datetime.now().isoformat())
|
||||
m.add('timestamp', isoformat(now()))
|
||||
|
||||
|
||||
class MergeMetadata(object):
|
||||
|
@ -29,6 +29,9 @@ class RescaleImages(object):
|
||||
|
||||
|
||||
page_width, page_height = self.opts.dest.width, self.opts.dest.height
|
||||
if not getattr(self.opts, 'is_image_collection', False):
|
||||
page_width -= (self.opts.margin_left + self.opts.margin_right) * self.opts.dest.dpi/72.
|
||||
page_height -= (self.opts.margin_top + self.opts.margin_bottom) * self.opts.dest.dpi/72.
|
||||
for item in self.oeb.manifest:
|
||||
if item.media_type.startswith('image'):
|
||||
raw = item.data
|
||||
@ -53,7 +56,8 @@ class RescaleImages(object):
|
||||
scaled, new_width, new_height = fit_image(width, height,
|
||||
page_width, page_height)
|
||||
if scaled:
|
||||
self.log('Rescaling image', item.href)
|
||||
self.log('Rescaling image from %dx%d to %dx%d'%(
|
||||
width, height, new_width, new_height), item.href)
|
||||
if qt:
|
||||
img = img.scaled(new_width, new_height,
|
||||
Qt.IgnoreAspectRatio, Qt.SmoothTransformation)
|
||||
|
@ -169,6 +169,8 @@ int main(int argc, char **argv) {
|
||||
char *memblock;
|
||||
ifstream::pos_type size;
|
||||
int ret = 0;
|
||||
map<string,string> info;
|
||||
Reflow *reflow = NULL;
|
||||
|
||||
|
||||
if (argc != 2) {
|
||||
@ -189,9 +191,13 @@ int main(int argc, char **argv) {
|
||||
}
|
||||
|
||||
try {
|
||||
Reflow reflow(memblock, size);
|
||||
reflow.render();
|
||||
vector<char> *data = reflow.render_first_page();
|
||||
reflow = new Reflow(memblock, size);
|
||||
info = reflow->get_info();
|
||||
for (map<string,string>::const_iterator it = info.begin() ; it != info.end(); it++ ) {
|
||||
cout << (*it).first << " : " << (*it).second << endl;
|
||||
}
|
||||
//reflow->render();
|
||||
vector<char> *data = reflow->render_first_page();
|
||||
ofstream file("cover.png", ios::binary);
|
||||
file.write(&((*data)[0]), data->size());
|
||||
delete data;
|
||||
@ -200,7 +206,7 @@ int main(int argc, char **argv) {
|
||||
cerr << e.what() << endl;
|
||||
ret = 1;
|
||||
}
|
||||
|
||||
delete reflow;
|
||||
delete[] memblock;
|
||||
return ret;
|
||||
}
|
||||
|
@ -6,7 +6,7 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import sys
|
||||
import sys, os
|
||||
|
||||
from lxml import etree
|
||||
|
||||
@ -47,6 +47,10 @@ class Image(Element):
|
||||
return '<img src="%s" width="%dpx" height="%dpx"/>' % \
|
||||
(self.src, int(self.width), int(self.height))
|
||||
|
||||
def dump(self, f):
|
||||
f.write(self.to_html())
|
||||
f.write('\n')
|
||||
|
||||
|
||||
class Text(Element):
|
||||
|
||||
@ -91,6 +95,10 @@ class Text(Element):
|
||||
def to_html(self):
|
||||
return self.raw
|
||||
|
||||
def dump(self, f):
|
||||
f.write(self.to_html().encode('utf-8'))
|
||||
f.write('\n')
|
||||
|
||||
class FontSizeStats(dict):
|
||||
|
||||
def __init__(self, stats):
|
||||
@ -143,6 +151,14 @@ class Column(object):
|
||||
def add(self, elem):
|
||||
if elem in self.elements: return
|
||||
self.elements.append(elem)
|
||||
self._post_add()
|
||||
|
||||
def prepend(self, elem):
|
||||
if elem in self.elements: return
|
||||
self.elements.insert(0, elem)
|
||||
self._post_add()
|
||||
|
||||
def _post_add(self):
|
||||
self.elements.sort(cmp=lambda x,y:cmp(x.bottom,y.bottom))
|
||||
self.top = self.elements[0].top
|
||||
self.bottom = self.elements[-1].bottom
|
||||
@ -183,6 +199,11 @@ class Column(object):
|
||||
return None
|
||||
return self.elements[idx-1]
|
||||
|
||||
def dump(self, f, num):
|
||||
f.write('******** Column %d\n\n'%num)
|
||||
for elem in self.elements:
|
||||
elem.dump(f)
|
||||
|
||||
|
||||
class Box(list):
|
||||
|
||||
@ -282,7 +303,6 @@ class Region(object):
|
||||
mc = self.columns[0]
|
||||
return mc
|
||||
|
||||
print
|
||||
for c in singleton.columns:
|
||||
for elem in c:
|
||||
col = most_suitable_column(elem)
|
||||
@ -303,6 +323,51 @@ class Region(object):
|
||||
for x in self.columns:
|
||||
yield x
|
||||
|
||||
def absorb_regions(self, regions, at):
|
||||
for region in regions:
|
||||
self.absorb_region(region, at)
|
||||
|
||||
def absorb_region(self, region, at):
|
||||
if len(region.columns) <= len(self.columns):
|
||||
for i in range(len(region.columns)):
|
||||
src, dest = region.columns[i], self.columns[i]
|
||||
if at != 'bottom':
|
||||
src = reversed(list(iter(src)))
|
||||
for elem in src:
|
||||
func = dest.add if at == 'bottom' else dest.prepend
|
||||
func(elem)
|
||||
|
||||
else:
|
||||
col_map = {}
|
||||
for i, col in enumerate(region.columns):
|
||||
max_overlap, max_overlap_index = 0, 0
|
||||
for j, dcol in enumerate(self.columns):
|
||||
sint = Interval(col.left, col.right)
|
||||
dint = Interval(dcol.left, dcol.right)
|
||||
width = sint.intersection(dint).width
|
||||
if width > max_overlap:
|
||||
max_overlap = width
|
||||
max_overlap_index = j
|
||||
col_map[i] = max_overlap_index
|
||||
lines = max(map(len, region.columns))
|
||||
if at == 'bottom':
|
||||
lines = range(lines)
|
||||
else:
|
||||
lines = range(lines-1, -1, -1)
|
||||
for i in lines:
|
||||
for j, src in enumerate(region.columns):
|
||||
dest = self.columns[col_map[j]]
|
||||
if i < len(src):
|
||||
func = dest.add if at == 'bottom' else dest.prepend
|
||||
func(src.elements[i])
|
||||
|
||||
def dump(self, f):
|
||||
f.write('############################################################\n')
|
||||
f.write('########## Region (%d columns) ###############\n'%len(self.columns))
|
||||
f.write('############################################################\n\n')
|
||||
for i, col in enumerate(self.columns):
|
||||
col.dump(f, i)
|
||||
|
||||
def linearize(self):
|
||||
self.elements = []
|
||||
for x in self.columns:
|
||||
@ -375,7 +440,8 @@ class Page(object):
|
||||
self.font_size_stats[t.font_size] = 0
|
||||
self.font_size_stats[t.font_size] += len(t.text_as_string)
|
||||
self.average_text_height += t.height
|
||||
self.average_text_height /= len(self.texts)
|
||||
if len(self.texts):
|
||||
self.average_text_height /= len(self.texts)
|
||||
|
||||
self.font_size_stats = FontSizeStats(self.font_size_stats)
|
||||
|
||||
@ -430,7 +496,20 @@ class Page(object):
|
||||
if not current_region.is_empty:
|
||||
self.regions.append(current_region)
|
||||
|
||||
if self.opts.verbose > 2:
|
||||
self.debug_dir = 'page-%d'%self.number
|
||||
os.mkdir(self.debug_dir)
|
||||
self.dump_regions('pre-coalesce')
|
||||
|
||||
self.coalesce_regions()
|
||||
self.dump_regions('post-coalesce')
|
||||
|
||||
def dump_regions(self, fname):
|
||||
fname = 'regions-'+fname+'.txt'
|
||||
with open(os.path.join(self.debug_dir, fname), 'wb') as f:
|
||||
f.write('Page #%d\n\n'%self.number)
|
||||
for region in self.regions:
|
||||
region.dump(f)
|
||||
|
||||
def coalesce_regions(self):
|
||||
# find contiguous sets of small regions
|
||||
@ -439,47 +518,57 @@ class Page(object):
|
||||
# region)
|
||||
found = True
|
||||
absorbed = set([])
|
||||
processed = set([])
|
||||
while found:
|
||||
found = False
|
||||
for i, region in enumerate(self.regions):
|
||||
if region.is_small:
|
||||
if region in absorbed:
|
||||
continue
|
||||
if region.is_small and region not in processed:
|
||||
found = True
|
||||
regions = []
|
||||
processed.add(region)
|
||||
regions = [region]
|
||||
end = i+1
|
||||
for j in range(i+1, len(self.regions)):
|
||||
end = j
|
||||
if self.regions[j].is_small:
|
||||
regions.append(self.regions[j])
|
||||
else:
|
||||
break
|
||||
prev_region = None if i == 0 else i-1
|
||||
next_region = j if self.regions[j] not in regions else None
|
||||
next_region = end if end < len(self.regions) and self.regions[end] not in regions else None
|
||||
absorb_at = 'bottom'
|
||||
if prev_region is None and next_region is not None:
|
||||
absorb_into = next_region
|
||||
absorb_at = 'top'
|
||||
elif next_region is None and prev_region is not None:
|
||||
absorb_into = prev_region
|
||||
elif prev_region is None and next_region is None:
|
||||
if len(regions) > 1:
|
||||
absorb_into = regions[0]
|
||||
absorb_into = i
|
||||
regions = regions[1:]
|
||||
else:
|
||||
absorb_into = None
|
||||
else:
|
||||
absorb_into = prev_region
|
||||
if next_region.line_count >= prev_region.line_count:
|
||||
if self.regions[next_region].line_count >= \
|
||||
self.regions[prev_region].line_count:
|
||||
avg_column_count = sum([len(r.columns) for r in
|
||||
regions])/float(len(regions))
|
||||
if next_region.line_count > prev_region.line_count \
|
||||
or abs(avg_column_count - len(prev_region.columns)) \
|
||||
> abs(avg_column_count - len(next_region.columns)):
|
||||
if self.regions[next_region].line_count > \
|
||||
self.regions[prev_region].line_count \
|
||||
or abs(avg_column_count -
|
||||
len(self.regions[prev_region].columns)) \
|
||||
> abs(avg_column_count -
|
||||
len(self.regions[next_region].columns)):
|
||||
absorb_into = next_region
|
||||
absorb_at = 'top'
|
||||
if absorb_into is not None:
|
||||
absorb_into.absorb_region(regions)
|
||||
self.regions[absorb_into].absorb_regions(regions, absorb_at)
|
||||
absorbed.update(regions)
|
||||
i = j
|
||||
for region in absorbed:
|
||||
self.regions.remove(region)
|
||||
|
||||
|
||||
|
||||
def sort_into_columns(self, elem, neighbors):
|
||||
neighbors.add(elem)
|
||||
neighbors = sorted(neighbors, cmp=lambda x,y:cmp(x.left, y.left))
|
||||
@ -598,8 +687,9 @@ class PDFDocument(object):
|
||||
for elem in self.elements:
|
||||
html.extend(elem.to_html())
|
||||
html += ['</body>', '</html>']
|
||||
raw = (u'\n'.join(html)).replace('</strong><strong>', '')
|
||||
with open('index.html', 'wb') as f:
|
||||
f.write((u'\n'.join(html)).encode('utf-8'))
|
||||
f.write(raw.encode('utf-8'))
|
||||
|
||||
|
||||
|
||||
|
@ -182,10 +182,10 @@ class PML_HTMLizer(object):
|
||||
return pml
|
||||
|
||||
def strip_pml(self, pml):
|
||||
pml = re.sub(r'\\C\d=".+*"', '', pml)
|
||||
pml = re.sub(r'\\Fn=".+*"', '', pml)
|
||||
pml = re.sub(r'\\Sd=".+*"', '', pml)
|
||||
pml = re.sub(r'\\.=".+*"', '', pml)
|
||||
pml = re.sub(r'\\C\d=".*"', '', pml)
|
||||
pml = re.sub(r'\\Fn=".*"', '', pml)
|
||||
pml = re.sub(r'\\Sd=".*"', '', pml)
|
||||
pml = re.sub(r'\\.=".*"', '', pml)
|
||||
pml = re.sub(r'\\X\d', '', pml)
|
||||
pml = re.sub(r'\\S[pbd]', '', pml)
|
||||
pml = re.sub(r'\\Fn', '', pml)
|
||||
|
@ -131,9 +131,9 @@ class RtfTokenParser():
|
||||
if isString(self.tokens[i].name, "\\'"):
|
||||
i = i + 1
|
||||
if not isinstance(self.tokens[i], tokenData):
|
||||
raise BaseException('Error: token8bitChar without data.')
|
||||
raise Exception('Error: token8bitChar without data.')
|
||||
if len(self.tokens[i].data) < 2:
|
||||
raise BaseException('Error: token8bitChar without data.')
|
||||
raise Exception('Error: token8bitChar without data.')
|
||||
newTokens.append(token8bitChar(self.tokens[i].data[0:2]))
|
||||
if len(self.tokens[i].data) > 2:
|
||||
newTokens.append(tokenData(self.tokens[i].data[2:]))
|
||||
@ -195,7 +195,7 @@ class RtfTokenParser():
|
||||
i = i + 1
|
||||
j = j + 1
|
||||
continue
|
||||
raise BaseException('Error: incorect utf replacement.')
|
||||
raise Exception('Error: incorect utf replacement.')
|
||||
|
||||
#calibre rtf2xml does not support utfreplace
|
||||
replace = []
|
||||
@ -248,7 +248,7 @@ class RtfTokenizer():
|
||||
|
||||
if isChar(self.rtfData[i], '\\'):
|
||||
if i + 1 >= len(self.rtfData):
|
||||
raise BaseException('Error: Control character found at the end of the document.')
|
||||
raise Exception('Error: Control character found at the end of the document.')
|
||||
|
||||
if lastDataStart > -1:
|
||||
self.tokens.append(tokenData(self.rtfData[lastDataStart : i]))
|
||||
@ -269,7 +269,7 @@ class RtfTokenizer():
|
||||
i = i + 1
|
||||
|
||||
if not consumed:
|
||||
raise BaseException('Error (at:%d): Control Word without end.'%(tokenStart))
|
||||
raise Exception('Error (at:%d): Control Word without end.'%(tokenStart))
|
||||
|
||||
#we have numeric argument before delimiter
|
||||
if isChar(self.rtfData[i], '-') or isDigit(self.rtfData[i]):
|
||||
@ -283,10 +283,10 @@ class RtfTokenizer():
|
||||
l = l + 1
|
||||
i = i + 1
|
||||
if l > 10 :
|
||||
raise BaseException('Error (at:%d): Too many digits in control word numeric argument.'%[tokenStart])
|
||||
raise Exception('Error (at:%d): Too many digits in control word numeric argument.'%[tokenStart])
|
||||
|
||||
if not consumed:
|
||||
raise BaseException('Error (at:%d): Control Word without numeric argument end.'%[tokenStart])
|
||||
raise Exception('Error (at:%d): Control Word without numeric argument end.'%[tokenStart])
|
||||
|
||||
separator = ''
|
||||
if isChar(self.rtfData[i], ' '):
|
||||
|
@ -27,7 +27,7 @@ from calibre.ebooks.rtf2xml import headings_to_sections, \
|
||||
paragraph_def, convert_to_tags, output, copy, \
|
||||
list_numbers, info, pict, table_info, fonts, paragraphs, \
|
||||
body_styles, preamble_rest, group_styles, \
|
||||
inline, correct_unicode
|
||||
inline
|
||||
from calibre.ebooks.rtf2xml.old_rtf import OldRtf
|
||||
|
||||
"""
|
||||
@ -256,15 +256,6 @@ class ParseRtf:
|
||||
)
|
||||
pict_obj.process_pict()
|
||||
self.__bracket_match('pict_data_info')
|
||||
correct_uni_obj = correct_unicode.CorrectUnicode(
|
||||
in_file = self.__temp_file,
|
||||
bug_handler = RtfInvalidCodeException,
|
||||
copy = self.__copy,
|
||||
run_level = self.__run_level,
|
||||
exception_handler = InvalidRtfException,
|
||||
)
|
||||
correct_uni_obj.correct_unicode()
|
||||
self.__bracket_match('correct_unicode_info')
|
||||
combine_obj = combine_borders.CombineBorders(
|
||||
in_file = self.__temp_file,
|
||||
bug_handler = RtfInvalidCodeException,
|
||||
|
@ -1,94 +0,0 @@
|
||||
#########################################################################
|
||||
# #
|
||||
# #
|
||||
# copyright 2002 Paul Henry Tremblay #
|
||||
# #
|
||||
# This program is distributed in the hope that it will be useful, #
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU #
|
||||
# General Public License for more details. #
|
||||
# #
|
||||
# You should have received a copy of the GNU General Public License #
|
||||
# along with this program; if not, write to the Free Software #
|
||||
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA #
|
||||
# 02111-1307 USA #
|
||||
# #
|
||||
# #
|
||||
#########################################################################
|
||||
import os, re, tempfile
|
||||
from calibre.ebooks.rtf2xml import copy
|
||||
class CorrectUnicode:
|
||||
"""
|
||||
corrects sequences such as \u201c\'F0\'BE
|
||||
Where \'F0\'BE has to be eliminated.
|
||||
"""
|
||||
def __init__(self,
|
||||
in_file,
|
||||
exception_handler,
|
||||
bug_handler,
|
||||
copy = None,
|
||||
run_level = 1,
|
||||
):
|
||||
self.__file = in_file
|
||||
self.__bug_handler = bug_handler
|
||||
self.__copy = copy
|
||||
self.__run_level = run_level
|
||||
self.__write_to = tempfile.mktemp()
|
||||
self.__exception_handler = exception_handler
|
||||
self.__bug_handler = bug_handler
|
||||
self.__state = 'outside'
|
||||
self.__utf_exp = re.compile(r'&#x(.*?);')
|
||||
def __process_token(self, line):
|
||||
if self.__state == 'outside':
|
||||
if line[:5] == 'tx<ut':
|
||||
self.__handle_unicode(line)
|
||||
else:
|
||||
self.__write_obj.write(line)
|
||||
elif self.__state == 'after':
|
||||
if line[:5] == 'tx<hx':
|
||||
pass
|
||||
elif line[:5] == 'tx<ut':
|
||||
self.__handle_unicode(line)
|
||||
else:
|
||||
self.__state = 'outside'
|
||||
self.__write_obj.write(line)
|
||||
else:
|
||||
raise 'should\'t happen'
|
||||
def __handle_unicode(self, line):
|
||||
token = line[16:]
|
||||
match_obj = re.search(self.__utf_exp, token)
|
||||
if match_obj:
|
||||
uni_char = match_obj.group(1)
|
||||
dec_num = int(uni_char, 16)
|
||||
if dec_num > 57343 and dec_num < 63743:
|
||||
self.__state = 'outside'
|
||||
else:
|
||||
self.__write_obj.write(line)
|
||||
self.__state = 'after'
|
||||
else:
|
||||
self.__write_obj.write(line)
|
||||
self.__state = 'outside'
|
||||
def correct_unicode(self):
|
||||
"""
|
||||
Requires:
|
||||
nothing
|
||||
Returns:
|
||||
nothing (changes the original file)
|
||||
Logic:
|
||||
Read one line in at a time.
|
||||
"""
|
||||
read_obj = open(self.__file, 'r')
|
||||
self.__write_obj = open(self.__write_to, 'w')
|
||||
line_to_read = 1
|
||||
while line_to_read:
|
||||
line_to_read = read_obj.readline()
|
||||
line = line_to_read
|
||||
self.__token_info = line[:16]
|
||||
self.__process_token(line)
|
||||
read_obj.close()
|
||||
self.__write_obj.close()
|
||||
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
|
||||
if self.__copy:
|
||||
copy_obj.copy_file(self.__write_to, "correct_unicode.data")
|
||||
copy_obj.rename(self.__write_to, self.__file)
|
||||
os.remove(self.__write_to)
|
@ -72,7 +72,7 @@ class Tokenize:
|
||||
return line
|
||||
def __compile_expressions(self):
|
||||
self.__ms_hex_exp = re.compile(r"\\\'(..)")
|
||||
self.__utf_exp = re.compile(r"\\u(-?\d{3,6})")
|
||||
self.__utf_exp = re.compile(r"\\u(-?\d{3,6}) {0,1}")
|
||||
self.__splitexp = re.compile(r"(\\[\\{}]|{|}|\\[^\s\\{}&]+(?:\s)?)")
|
||||
self.__par_exp = re.compile(r'\\$')
|
||||
self.__mixed_exp = re.compile(r"(\\[a-zA-Z]+\d+)(\D+)")
|
||||
|
@ -2,9 +2,11 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
""" The GUI """
|
||||
import os
|
||||
from threading import RLock
|
||||
|
||||
from PyQt4.QtCore import QVariant, QFileInfo, QObject, SIGNAL, QBuffer, Qt, QSize, \
|
||||
QByteArray, QTranslator, QCoreApplication, QThread, \
|
||||
QEvent
|
||||
QEvent, QTimer, pyqtSignal
|
||||
from PyQt4.QtGui import QFileDialog, QMessageBox, QPixmap, QFileIconProvider, \
|
||||
QIcon, QTableView, QApplication, QDialog, QPushButton
|
||||
|
||||
@ -234,16 +236,17 @@ def human_readable(size):
|
||||
return size + " " + suffix
|
||||
|
||||
class Dispatcher(QObject):
|
||||
'''Convenience class to ensure that a function call always happens in the GUI thread'''
|
||||
SIGNAL = SIGNAL('dispatcher(PyQt_PyObject,PyQt_PyObject)')
|
||||
'''Convenience class to ensure that a function call always happens in the
|
||||
thread the reciver was created in.'''
|
||||
dispatch_signal = pyqtSignal(object, object)
|
||||
|
||||
def __init__(self, func):
|
||||
QObject.__init__(self)
|
||||
self.func = func
|
||||
self.connect(self, self.SIGNAL, self.dispatch, Qt.QueuedConnection)
|
||||
self.dispatch_signal.connect(self.dispatch, type=Qt.QueuedConnection)
|
||||
|
||||
def __call__(self, *args, **kwargs):
|
||||
self.emit(self.SIGNAL, args, kwargs)
|
||||
self.dispatch_signal.emit(args, kwargs)
|
||||
|
||||
def dispatch(self, args, kwargs):
|
||||
self.func(*args, **kwargs)
|
||||
@ -533,6 +536,8 @@ class Application(QApplication):
|
||||
self._translator = None
|
||||
self.load_translations()
|
||||
qt_app = self
|
||||
self._file_open_paths = []
|
||||
self._file_open_lock = RLock()
|
||||
|
||||
if islinux:
|
||||
self.setStyleSheet('''
|
||||
@ -545,6 +550,12 @@ class Application(QApplication):
|
||||
}
|
||||
''')
|
||||
|
||||
def _send_file_open_events(self):
|
||||
with self._file_open_lock:
|
||||
if self._file_open_paths:
|
||||
self.file_event_hook(self._file_open_paths)
|
||||
self._file_open_paths = []
|
||||
|
||||
|
||||
def load_translations(self):
|
||||
if self._translator is not None:
|
||||
@ -557,7 +568,9 @@ class Application(QApplication):
|
||||
if callable(self.file_event_hook) and e.type() == QEvent.FileOpen:
|
||||
path = unicode(e.file())
|
||||
if os.access(path, os.R_OK):
|
||||
self.file_event_hook(path)
|
||||
with self._file_open_lock:
|
||||
self._file_open_paths.append(path)
|
||||
QTimer.singleShot(1000, self._send_file_open_events)
|
||||
return True
|
||||
else:
|
||||
return QApplication.event(self, e)
|
||||
|
@ -23,7 +23,7 @@ def gui_convert(input, output, recommendations, notification=DummyReporter(),
|
||||
|
||||
plumber.run()
|
||||
|
||||
def gui_catalog(fmt, title, dbspec, ids, out_file_name, sync, fmt_options,
|
||||
def gui_catalog(fmt, title, dbspec, ids, out_file_name, sync, fmt_options, connected_device,
|
||||
notification=DummyReporter(), log=None):
|
||||
if log is None:
|
||||
log = Log()
|
||||
@ -44,6 +44,7 @@ def gui_catalog(fmt, title, dbspec, ids, out_file_name, sync, fmt_options,
|
||||
# Populate opts
|
||||
# opts.gui_search_text = something
|
||||
opts.catalog_title = title
|
||||
opts.connected_device = connected_device
|
||||
opts.ids = ids
|
||||
opts.search_text = None
|
||||
opts.sort_by = None
|
||||
|
@ -115,6 +115,9 @@
|
||||
<property name="decimals">
|
||||
<number>1</number>
|
||||
</property>
|
||||
<property name="maximum">
|
||||
<double>200.000000000000000</double>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="1" column="0">
|
||||
@ -135,6 +138,9 @@
|
||||
<property name="decimals">
|
||||
<number>1</number>
|
||||
</property>
|
||||
<property name="maximum">
|
||||
<double>200.000000000000000</double>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="2" column="0">
|
||||
@ -155,6 +161,9 @@
|
||||
<property name="decimals">
|
||||
<number>1</number>
|
||||
</property>
|
||||
<property name="maximum">
|
||||
<double>200.000000000000000</double>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="3" column="0">
|
||||
@ -175,6 +184,9 @@
|
||||
<property name="decimals">
|
||||
<number>1</number>
|
||||
</property>
|
||||
<property name="maximum">
|
||||
<double>200.000000000000000</double>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
</layout>
|
||||
|
@ -12,7 +12,8 @@ class ChooseFormatDialog(QDialog, Ui_ChooseFormatDialog):
|
||||
QDialog.__init__(self, window)
|
||||
Ui_ChooseFormatDialog.__init__(self)
|
||||
self.setupUi(self)
|
||||
self.connect(self.formats, SIGNAL('activated(QModelIndex)'), lambda i: self.accept())
|
||||
self.connect(self.formats, SIGNAL('activated(QModelIndex)'),
|
||||
self.activated_slot)
|
||||
|
||||
self.msg.setText(msg)
|
||||
for format in formats:
|
||||
@ -20,6 +21,15 @@ class ChooseFormatDialog(QDialog, Ui_ChooseFormatDialog):
|
||||
format.upper()))
|
||||
self._formats = formats
|
||||
self.formats.setCurrentRow(0)
|
||||
self._format = None
|
||||
|
||||
def activated_slot(self, *args):
|
||||
self.accept()
|
||||
|
||||
def format(self):
|
||||
return self._formats[self.formats.currentRow()]
|
||||
return self._format
|
||||
|
||||
def accept(self):
|
||||
self._format = self._formats[self.formats.currentRow()]
|
||||
return QDialog.accept(self)
|
||||
|
||||
|
@ -174,7 +174,7 @@
|
||||
<item>
|
||||
<widget class="QCheckBox" name="opt_overwrite_author_title_metadata">
|
||||
<property name="text">
|
||||
<string>Overwrite & author/title by default when fetching metadata</string>
|
||||
<string>&Overwrite author and title by default when fetching metadata</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
@ -612,7 +612,7 @@
|
||||
<item row="0" column="0" colspan="2">
|
||||
<widget class="QLabel" name="label_22">
|
||||
<property name="text">
|
||||
<string>calibre can send your books to you (or your reader) by email</string>
|
||||
<string>calibre can send your books to you (or your reader) by email. Emails will be automatically sent for downloaded news to all email addresses that have Auto-send checked.</string>
|
||||
</property>
|
||||
<property name="wordWrap">
|
||||
<bool>true</bool>
|
||||
|
@ -119,7 +119,7 @@
|
||||
<item>
|
||||
<widget class="QCheckBox" name="opt_overwrite_author_title_metadata">
|
||||
<property name="text">
|
||||
<string>Overwrite &author/title with author/title of selected book</string>
|
||||
<string>Overwrite author and title with author and title of selected book</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
|
@ -10,7 +10,6 @@ import os
|
||||
import re
|
||||
import time
|
||||
import traceback
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
from PyQt4.Qt import SIGNAL, QObject, QCoreApplication, Qt, QTimer, QThread, QDate, \
|
||||
QPixmap, QListWidgetItem, QDialog
|
||||
@ -28,7 +27,8 @@ from calibre.ebooks.metadata import authors_to_sort_string, string_to_authors, \
|
||||
from calibre.ebooks.metadata.library_thing import cover_from_isbn
|
||||
from calibre import islinux
|
||||
from calibre.ebooks.metadata.meta import get_metadata
|
||||
from calibre.utils.config import prefs
|
||||
from calibre.utils.config import prefs, tweaks
|
||||
from calibre.utils.date import qt_to_dt
|
||||
from calibre.customize.ui import run_plugins_on_import, get_isbndb_key
|
||||
from calibre.gui2.dialogs.config.social import SocialMetadata
|
||||
|
||||
@ -354,12 +354,10 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
|
||||
self.comments.setPlainText(comments if comments else '')
|
||||
cover = self.db.cover(row)
|
||||
pubdate = db.pubdate(self.id, index_is_id=True)
|
||||
self.local_timezone_offset = timedelta(seconds=time.timezone) - timedelta(hours=time.daylight)
|
||||
pubdate = pubdate - self.local_timezone_offset
|
||||
self.pubdate.setDate(QDate(pubdate.year, pubdate.month,
|
||||
pubdate.day))
|
||||
timestamp = db.timestamp(self.id, index_is_id=True)
|
||||
timestamp = timestamp - self.local_timezone_offset
|
||||
self.orig_timestamp = timestamp
|
||||
self.date.setDate(QDate(timestamp.year, timestamp.month,
|
||||
timestamp.day))
|
||||
|
||||
@ -399,6 +397,7 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
|
||||
if not pm.isNull():
|
||||
self.cover.setPixmap(pm)
|
||||
self.cover_data = cover
|
||||
self.original_series_name = unicode(self.series.text()).strip()
|
||||
|
||||
def validate_isbn(self, isbn):
|
||||
isbn = unicode(isbn).strip()
|
||||
@ -582,7 +581,6 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
|
||||
if book.isbn: self.isbn.setText(book.isbn)
|
||||
if book.pubdate:
|
||||
d = book.pubdate
|
||||
d = d - self.local_timezone_offset
|
||||
self.pubdate.setDate(QDate(d.year, d.month, d.day))
|
||||
summ = book.comments
|
||||
if summ:
|
||||
@ -610,10 +608,13 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
|
||||
def increment_series_index(self):
|
||||
if self.db is not None:
|
||||
try:
|
||||
series = unicode(self.series.text())
|
||||
if series:
|
||||
ns = self.db.get_next_series_num_for(series)
|
||||
series = unicode(self.series.text()).strip()
|
||||
if series and series != self.original_series_name:
|
||||
ns = 1
|
||||
if tweaks['series_index_auto_increment'] == 'next':
|
||||
ns = self.db.get_next_series_num_for(series)
|
||||
self.series_index.setValue(ns)
|
||||
self.original_series_name = series
|
||||
except:
|
||||
traceback.print_exc()
|
||||
|
||||
@ -645,18 +646,19 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
|
||||
re.sub(r'[^0-9a-zA-Z]', '', unicode(self.isbn.text())), notify=False)
|
||||
self.db.set_rating(self.id, 2*self.rating.value(), notify=False)
|
||||
self.db.set_publisher(self.id, qstring_to_unicode(self.publisher.currentText()), notify=False)
|
||||
self.db.set_tags(self.id, qstring_to_unicode(self.tags.text()).split(','), notify=False)
|
||||
self.db.set_series(self.id, qstring_to_unicode(self.series.currentText()), notify=False)
|
||||
self.db.set_tags(self.id, [x.strip() for x in
|
||||
unicode(self.tags.text()).split(',')], notify=False)
|
||||
self.db.set_series(self.id,
|
||||
unicode(self.series.currentText()).strip(), notify=False)
|
||||
self.db.set_series_index(self.id, self.series_index.value(), notify=False)
|
||||
self.db.set_comment(self.id, qstring_to_unicode(self.comments.toPlainText()), notify=False)
|
||||
d = self.pubdate.date()
|
||||
d = datetime(d.year(), d.month(), d.day())
|
||||
d = d + self.local_timezone_offset
|
||||
d = qt_to_dt(d)
|
||||
self.db.set_pubdate(self.id, d)
|
||||
d = self.date.date()
|
||||
d = datetime(d.year(), d.month(), d.day())
|
||||
d = d + self.local_timezone_offset
|
||||
self.db.set_timestamp(self.id, d)
|
||||
d = qt_to_dt(d)
|
||||
if d.date() != self.orig_timestamp.date():
|
||||
self.db.set_timestamp(self.id, d)
|
||||
|
||||
if self.cover_changed:
|
||||
if self.cover_data is not None:
|
||||
|
@ -7,7 +7,7 @@ __docformat__ = 'restructuredtext en'
|
||||
Scheduler for automated recipe downloads
|
||||
'''
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
from datetime import timedelta
|
||||
|
||||
from PyQt4.Qt import QDialog, SIGNAL, Qt, QTime, QObject, QMenu, \
|
||||
QAction, QIcon, QMutex, QTimer
|
||||
@ -17,6 +17,7 @@ from calibre.gui2.search_box import SearchBox2
|
||||
from calibre.gui2 import config as gconf, error_dialog
|
||||
from calibre.web.feeds.recipes.model import RecipeModel
|
||||
from calibre.ptempfile import PersistentTemporaryFile
|
||||
from calibre.utils.date import utcnow
|
||||
|
||||
class SchedulerDialog(QDialog, Ui_Dialog):
|
||||
|
||||
@ -185,7 +186,7 @@ class SchedulerDialog(QDialog, Ui_Dialog):
|
||||
self.day.setCurrentIndex(day+1)
|
||||
self.time.setTime(QTime(hour, minute))
|
||||
|
||||
d = datetime.utcnow() - last_downloaded
|
||||
d = utcnow() - last_downloaded
|
||||
def hm(x): return (x-x%3600)//3600, (x%3600 - (x%3600)%60)//60
|
||||
hours, minutes = hm(d.seconds)
|
||||
tm = _('%d days, %d hours and %d minutes ago')%(d.days, hours, minutes)
|
||||
|
@ -5,21 +5,30 @@ from PyQt4.QtGui import QDialog
|
||||
|
||||
from calibre.gui2.dialogs.search_ui import Ui_Dialog
|
||||
from calibre.gui2 import qstring_to_unicode
|
||||
|
||||
from calibre.library.database2 import CONTAINS_MATCH, EQUALS_MATCH
|
||||
|
||||
class SearchDialog(QDialog, Ui_Dialog):
|
||||
|
||||
def __init__(self, *args):
|
||||
QDialog.__init__(self, *args)
|
||||
self.setupUi(self)
|
||||
self.mc = ''
|
||||
|
||||
def tokens(self, raw):
|
||||
phrases = re.findall(r'\s+".*?"\s+', raw)
|
||||
phrases = re.findall(r'\s*".*?"\s*', raw)
|
||||
for f in phrases:
|
||||
raw = raw.replace(f, ' ')
|
||||
return [t.strip() for t in phrases + raw.split()]
|
||||
phrases = [t.strip('" ') for t in phrases]
|
||||
return ['"' + self.mc + t + '"' for t in phrases + [r.strip() for r in raw.split()]]
|
||||
|
||||
def search_string(self):
|
||||
mk = self.matchkind.currentIndex()
|
||||
if mk == CONTAINS_MATCH:
|
||||
self.mc = ''
|
||||
elif mk == EQUALS_MATCH:
|
||||
self.mc = '='
|
||||
else:
|
||||
self.mc = '~'
|
||||
all, any, phrase, none = map(lambda x: unicode(x.text()),
|
||||
(self.all, self.any, self.phrase, self.none))
|
||||
all, any, none = map(self.tokens, (all, any, none))
|
||||
|
@ -104,7 +104,64 @@
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QLabel" name="label_5" >
|
||||
<widget class="QGroupBox" name="groupBox" >
|
||||
<property name="maximumSize" >
|
||||
<size>
|
||||
<width>16777215</width>
|
||||
<height>60</height>
|
||||
</size>
|
||||
</property>
|
||||
<layout class="QHBoxLayout" name="horizontalLayout_5" >
|
||||
<item>
|
||||
<widget class="QLabel" name="label_5" >
|
||||
<property name="text" >
|
||||
<string>What kind of match to use:</string>
|
||||
</property>
|
||||
<property name="buddy" >
|
||||
<cstring>matchkind</cstring>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QComboBox" name="matchkind">
|
||||
<item>
|
||||
<property name="text">
|
||||
<string>Contains: the word or phrase matches anywhere in the metadata</string>
|
||||
</property>
|
||||
</item>
|
||||
<item>
|
||||
<property name="text">
|
||||
<string>Equals: the word or phrase must match an entire metadata field</string>
|
||||
</property>
|
||||
</item>
|
||||
<item>
|
||||
<property name="text">
|
||||
<string>Regular expression: the expression must match anywhere in the metadata</string>
|
||||
</property>
|
||||
</item>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QLabel" name="label_51" >
|
||||
<property name="sizePolicy">
|
||||
<sizepolicy hsizetype="Preferred" vsizetype="Preferred">
|
||||
<horstretch>40</horstretch>
|
||||
<verstretch>0</verstretch>
|
||||
</sizepolicy>
|
||||
</property>
|
||||
<property name="text" >
|
||||
<string> </string>
|
||||
</property>
|
||||
<property name="buddy" >
|
||||
<cstring>matchkind</cstring>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
</layout>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QLabel" name="label_6" >
|
||||
<property name="maximumSize" >
|
||||
<size>
|
||||
<width>16777215</width>
|
||||
|
@ -1,11 +1,11 @@
|
||||
from calibre.ebooks.metadata import authors_to_string
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
import os, textwrap, traceback, time, re
|
||||
from datetime import timedelta, datetime
|
||||
from operator import attrgetter
|
||||
|
||||
import os, textwrap, traceback, re, shutil
|
||||
from operator import attrgetter
|
||||
from math import cos, sin, pi
|
||||
from contextlib import closing
|
||||
|
||||
from PyQt4.QtGui import QTableView, QAbstractItemView, QColor, \
|
||||
QItemDelegate, QPainterPath, QLinearGradient, QBrush, \
|
||||
QPen, QStyle, QPainter, \
|
||||
@ -17,13 +17,16 @@ from PyQt4.QtCore import QAbstractTableModel, QVariant, Qt, pyqtSignal, \
|
||||
from calibre import strftime
|
||||
from calibre.ptempfile import PersistentTemporaryFile
|
||||
from calibre.utils.pyparsing import ParseException
|
||||
from calibre.library.database2 import FIELD_MAP
|
||||
from calibre.library.database2 import FIELD_MAP, _match, CONTAINS_MATCH, EQUALS_MATCH, REGEXP_MATCH
|
||||
from calibre.gui2 import NONE, TableView, qstring_to_unicode, config, \
|
||||
error_dialog
|
||||
from calibre.gui2.widgets import EnLineEdit, TagsLineEdit
|
||||
from calibre.utils.search_query_parser import SearchQueryParser
|
||||
from calibre.ebooks.metadata.meta import set_metadata as _set_metadata
|
||||
from calibre.ebooks.metadata import string_to_authors, fmt_sidx
|
||||
from calibre.ebooks.metadata import string_to_authors, fmt_sidx, \
|
||||
authors_to_string
|
||||
from calibre.utils.config import tweaks
|
||||
from calibre.utils.date import dt_factory, qt_to_dt, isoformat
|
||||
|
||||
class LibraryDelegate(QItemDelegate):
|
||||
COLOR = QColor("blue")
|
||||
@ -97,7 +100,10 @@ class DateDelegate(QStyledItemDelegate):
|
||||
|
||||
def createEditor(self, parent, option, index):
|
||||
qde = QStyledItemDelegate.createEditor(self, parent, option, index)
|
||||
qde.setDisplayFormat(unicode(qde.displayFormat()).replace('yy', 'yyyy'))
|
||||
stdformat = unicode(qde.displayFormat())
|
||||
if 'yyyy' not in stdformat:
|
||||
stdformat = stdformat.replace('yy', 'yyyy')
|
||||
qde.setDisplayFormat(stdformat)
|
||||
qde.setMinimumDate(QDate(101,1,1))
|
||||
qde.setCalendarPopup(True)
|
||||
return qde
|
||||
@ -465,8 +471,11 @@ class BooksModel(QAbstractTableModel):
|
||||
break
|
||||
if format is not None:
|
||||
pt = PersistentTemporaryFile(suffix='.'+format)
|
||||
pt.write(self.db.format(id, format, index_is_id=True))
|
||||
pt.flush()
|
||||
with closing(self.db.format(id, format, index_is_id=True,
|
||||
as_file=True)) as src:
|
||||
shutil.copyfileobj(src, pt)
|
||||
pt.flush()
|
||||
pt.seek(0)
|
||||
if set_metadata:
|
||||
_set_metadata(pt, self.db.get_metadata(id, get_cover=True, index_is_id=True),
|
||||
format)
|
||||
@ -499,8 +508,10 @@ class BooksModel(QAbstractTableModel):
|
||||
break
|
||||
if format is not None:
|
||||
pt = PersistentTemporaryFile(suffix='.'+format)
|
||||
pt.write(self.db.format(row, format))
|
||||
pt.flush()
|
||||
with closing(self.db.format(row, format, as_file=True)) as src:
|
||||
shutil.copyfileobj(src, pt)
|
||||
pt.flush()
|
||||
pt.seek(0)
|
||||
if set_metadata:
|
||||
_set_metadata(pt, self.db.get_metadata(row, get_cover=True),
|
||||
format)
|
||||
@ -563,13 +574,11 @@ class BooksModel(QAbstractTableModel):
|
||||
def timestamp(r):
|
||||
dt = self.db.data[r][tmdx]
|
||||
if dt:
|
||||
dt = dt - timedelta(seconds=time.timezone) + timedelta(hours=time.daylight)
|
||||
return QDate(dt.year, dt.month, dt.day)
|
||||
|
||||
def pubdate(r):
|
||||
dt = self.db.data[r][pddx]
|
||||
if dt:
|
||||
dt = dt - timedelta(seconds=time.timezone) + timedelta(hours=time.daylight)
|
||||
return QDate(dt.year, dt.month, dt.day)
|
||||
|
||||
def rating(r):
|
||||
@ -657,21 +666,20 @@ class BooksModel(QAbstractTableModel):
|
||||
self.db.set_series_index(id, float(match.group(1)))
|
||||
val = pat.sub('', val).strip()
|
||||
elif val:
|
||||
ni = self.db.get_next_series_num_for(val)
|
||||
if ni != 1:
|
||||
self.db.set_series_index(id, ni)
|
||||
if tweaks['series_index_auto_increment'] == 'next':
|
||||
ni = self.db.get_next_series_num_for(val)
|
||||
if ni != 1:
|
||||
self.db.set_series_index(id, ni)
|
||||
if val:
|
||||
self.db.set_series(id, val)
|
||||
elif column == 'timestamp':
|
||||
if val.isNull() or not val.isValid():
|
||||
return False
|
||||
dt = datetime(val.year(), val.month(), val.day()) + timedelta(seconds=time.timezone) - timedelta(hours=time.daylight)
|
||||
self.db.set_timestamp(id, dt)
|
||||
self.db.set_timestamp(id, qt_to_dt(val, as_utc=False))
|
||||
elif column == 'pubdate':
|
||||
if val.isNull() or not val.isValid():
|
||||
return False
|
||||
dt = datetime(val.year(), val.month(), val.day()) + timedelta(seconds=time.timezone) - timedelta(hours=time.daylight)
|
||||
self.db.set_pubdate(id, dt)
|
||||
self.db.set_pubdate(id, qt_to_dt(val, as_utc=False))
|
||||
else:
|
||||
self.db.set(row, column, val)
|
||||
self.emit(SIGNAL("dataChanged(QModelIndex, QModelIndex)"), \
|
||||
@ -888,7 +896,20 @@ class OnDeviceSearch(SearchQueryParser):
|
||||
|
||||
def get_matches(self, location, query):
|
||||
location = location.lower().strip()
|
||||
query = query.lower().strip()
|
||||
|
||||
matchkind = CONTAINS_MATCH
|
||||
if len(query) > 1:
|
||||
if query.startswith('\\'):
|
||||
query = query[1:]
|
||||
elif query.startswith('='):
|
||||
matchkind = EQUALS_MATCH
|
||||
query = query[1:]
|
||||
elif query.startswith('~'):
|
||||
matchkind = REGEXP_MATCH
|
||||
query = query[1:]
|
||||
if matchkind != REGEXP_MATCH: ### leave case in regexps because it can be significant e.g. \S \W \D
|
||||
query = query.lower()
|
||||
|
||||
if location not in ('title', 'author', 'tag', 'all', 'format'):
|
||||
return set([])
|
||||
matches = set([])
|
||||
@ -899,13 +920,24 @@ class OnDeviceSearch(SearchQueryParser):
|
||||
'tag':lambda x: ','.join(getattr(x, 'tags')).lower(),
|
||||
'format':lambda x: os.path.splitext(x.path)[1].lower()
|
||||
}
|
||||
for i, v in enumerate(locations):
|
||||
locations[i] = q[v]
|
||||
for i, r in enumerate(self.model.db):
|
||||
for loc in locations:
|
||||
for index, row in enumerate(self.model.db):
|
||||
for locvalue in locations:
|
||||
accessor = q[locvalue]
|
||||
try:
|
||||
if query in loc(r):
|
||||
matches.add(i)
|
||||
### Can't separate authors because comma is used for name sep and author sep
|
||||
### Exact match might not get what you want. For that reason, turn author
|
||||
### exactmatch searches into contains searches.
|
||||
if locvalue == 'author' and matchkind == EQUALS_MATCH:
|
||||
m = CONTAINS_MATCH
|
||||
else:
|
||||
m = matchkind
|
||||
|
||||
if locvalue == 'tag':
|
||||
vals = accessor(row).split(',')
|
||||
else:
|
||||
vals = [accessor(row)]
|
||||
if _match(query, vals, m):
|
||||
matches.add(index)
|
||||
break
|
||||
except ValueError: # Unicode errors
|
||||
import traceback
|
||||
@ -1003,7 +1035,8 @@ class DeviceBooksModel(BooksModel):
|
||||
def datecmp(x, y):
|
||||
x = self.db[x].datetime
|
||||
y = self.db[y].datetime
|
||||
return cmp(datetime(*x[0:6]), datetime(*y[0:6]))
|
||||
return cmp(dt_factory(x, assume_utc=True), dt_factory(y,
|
||||
assume_utc=True))
|
||||
def sizecmp(x, y):
|
||||
x, y = int(self.db[x].size), int(self.db[y].size)
|
||||
return cmp(x, y)
|
||||
@ -1052,10 +1085,8 @@ class DeviceBooksModel(BooksModel):
|
||||
type = ext[1:].lower()
|
||||
data[_('Format')] = type
|
||||
data[_('Path')] = item.path
|
||||
dt = item.datetime
|
||||
dt = datetime(*dt[0:6])
|
||||
dt = dt - timedelta(seconds=time.timezone) + timedelta(hours=time.daylight)
|
||||
data[_('Timestamp')] = strftime('%a %b %d %H:%M:%S %Y', dt.timetuple())
|
||||
dt = dt_factory(item.datetime, assume_utc=True)
|
||||
data[_('Timestamp')] = isoformat(dt, sep=' ', as_utc=False)
|
||||
data[_('Tags')] = ', '.join(item.tags)
|
||||
self.emit(SIGNAL('new_bookdisplay_data(PyQt_PyObject)'), data)
|
||||
|
||||
@ -1090,8 +1121,7 @@ class DeviceBooksModel(BooksModel):
|
||||
return QVariant(BooksView.human_readable(size))
|
||||
elif col == 3:
|
||||
dt = self.db[self.map[row]].datetime
|
||||
dt = datetime(*dt[0:6])
|
||||
dt = dt - timedelta(seconds=time.timezone) + timedelta(hours=time.daylight)
|
||||
dt = dt_factory(dt, assume_utc=True, as_utc=False)
|
||||
return QVariant(strftime(BooksView.TIME_FMT, dt.timetuple()))
|
||||
elif col == 4:
|
||||
tags = self.db[self.map[row]].tags
|
||||
|
@ -79,6 +79,8 @@ class _Canvas(QGraphicsRectItem):
|
||||
pen = QPen()
|
||||
pen.setStyle(Qt.NoPen)
|
||||
self.setPen(pen)
|
||||
if not hasattr(self, 'children'):
|
||||
self.children = self.childItems
|
||||
|
||||
def layout_block(self, block, x, y):
|
||||
if isinstance(block, TextBlock):
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user