mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
merge from trunk
This commit is contained in:
commit
bb5a05a792
176
Changelog.yaml
176
Changelog.yaml
@ -4,6 +4,182 @@
|
|||||||
# for important features/bug fixes.
|
# for important features/bug fixes.
|
||||||
# Also, each release can have new and improved recipes.
|
# Also, each release can have new and improved recipes.
|
||||||
|
|
||||||
|
- version: 0.7.33
|
||||||
|
date: 2010-12-10
|
||||||
|
|
||||||
|
new features:
|
||||||
|
- title: "Language sensitive sorting"
|
||||||
|
type: major
|
||||||
|
description: >
|
||||||
|
"calibre now sorts using language specific rules. The language used is the language of the calibre interface, which can be changed via Preferences->Look & Feel. There is also
|
||||||
|
a tweak that allows you to use a different language from the one used for the calibre interface. Powered by the ICU library."
|
||||||
|
|
||||||
|
- title: "Add an action to merge only formats and leave metadata alone (Shift+Alt+M)"
|
||||||
|
tickets: [7709]
|
||||||
|
|
||||||
|
- title: "Add a tweak to control which custom columns are displayed in the Book details panel."
|
||||||
|
|
||||||
|
- title: "Implement a more sophisticated 'functional programming' template language. See the User Manual for details."
|
||||||
|
|
||||||
|
- title: "Speed up deleting of large numbers of books and show progress while doing so"
|
||||||
|
|
||||||
|
- title: "Adding books: Dont refresh the Tag Browser while adding multiple books. Should speed up the adding of large numbers of books."
|
||||||
|
|
||||||
|
- title: "Edit metadata dialog: When trying to download metadata, if there are multiple matches indicate which matches have a cover and summary in the list. Also add an option to automatically download the cover of the selected match."
|
||||||
|
|
||||||
|
- title: "Drivers for the HTC Legend and Samsung Epic"
|
||||||
|
|
||||||
|
- title: "FB2 Output: Convert SVG images in the input document to raster images"
|
||||||
|
|
||||||
|
- title: "News download: Localize the navigation bars in the downloaded news to the language the user has selected for their calibre interface"
|
||||||
|
|
||||||
|
bug fixes:
|
||||||
|
- title: "Various fixes to the Title Case function"
|
||||||
|
tickets: [7846]
|
||||||
|
|
||||||
|
- title: "Content server: Fix --url-prefix being ignored for links at the Top level"
|
||||||
|
|
||||||
|
- title: "News download: When generating periodicals for the SONY use the local timezone in the SONY specific metadata"
|
||||||
|
|
||||||
|
- title: "Fix bug in cover cache that could cause it to keep a large number of covers in memory. Showed up when adding large numbers of books to calibre."
|
||||||
|
tickets: [7813]
|
||||||
|
|
||||||
|
- title: "Adding books: Run in the main thread to prevent unfortunate interactions with the metadata backup. Also fix regression that broke the Abort button."
|
||||||
|
|
||||||
|
- title: "Fix a crash on OS X if OK is clicked inthe edit metadata button while downloading a cover"
|
||||||
|
tickets: [7716]
|
||||||
|
|
||||||
|
- title: "E-book viewer: Fix a regression that prevented booksmarks from working with some EPUB files"
|
||||||
|
tickets: [7812]
|
||||||
|
|
||||||
|
- title: "Save to disk: Refactor to not open a database connection in the worker process. Also fix a bug that could lead to save failures not being reported."
|
||||||
|
|
||||||
|
- title: "Fix regression in 0.7.32 that broke opening formats in the ebook viewer from the edit metadata dialog"
|
||||||
|
|
||||||
|
- title: "FB2 Output: Generate output 100% compliant with the FB2 spec"
|
||||||
|
|
||||||
|
- title: "Fix Saved search dropdown box looses selected search"
|
||||||
|
tickets: [7787]
|
||||||
|
|
||||||
|
- title: "TXT Output: Fix an issue where the br to space conversion was not being handled properly."
|
||||||
|
|
||||||
|
improved recipes:
|
||||||
|
- Le Monde
|
||||||
|
- Ming Pao
|
||||||
|
- New Yorker
|
||||||
|
|
||||||
|
new recipes:
|
||||||
|
- title: "ToyoKeizai News and Nikkei Social News"
|
||||||
|
author: "Hiroshi Miura"
|
||||||
|
|
||||||
|
- title: "St. Louis Post Dispatch"
|
||||||
|
author: "cisaak"
|
||||||
|
|
||||||
|
- title: "Heise Open and Technology Review"
|
||||||
|
author: "Anton Gillert"
|
||||||
|
|
||||||
|
|
||||||
|
- version: 0.7.32
|
||||||
|
date: 2010-12-03
|
||||||
|
|
||||||
|
new features:
|
||||||
|
- title: "All new linux binary build. With updated libraries and replacing cx_Freeze with my own C python launcher code."
|
||||||
|
|
||||||
|
- title: "Edit metadata dialog: Add Next and Previous buttons and show cover size in tooltip"
|
||||||
|
tickets: [7706, 7711]
|
||||||
|
|
||||||
|
- title: "A new custom column type: Enumeration. This column can take one of a user defined set of values."
|
||||||
|
|
||||||
|
- title: "PML Output: Add option to reduce image sizes/bit depth to allow PML Output to be used with DropBook"
|
||||||
|
|
||||||
|
- title: "TXT Output: Add option to generate Markdown output. Turn <br> tags into spaces."
|
||||||
|
|
||||||
|
- title: "Add a count function to the template language. Make author_sort searchable."
|
||||||
|
|
||||||
|
- title: "Various consistency and usability enhancements to the search box."
|
||||||
|
tickets: [7726]
|
||||||
|
description: >
|
||||||
|
"Always select first book in result set of a search. Similar books searches added to search history. Search history order is no longer randomized. When focussing the search box with a keyboard shortcut, select all text. If you press enter in the search box, the search is executed and the book list os automatically focussed."
|
||||||
|
|
||||||
|
- title: "Driver for samsung fascinate and PocketBook 902"
|
||||||
|
|
||||||
|
- title: "FB2 Output: Add option to create FB2 sections based on internal file structure of input file (useful for EPUB files that have been split on chapter boundaries). Also add options to mark h1/h2/h3 tags as section titles in the FB2 file."
|
||||||
|
tickets: [7738]
|
||||||
|
|
||||||
|
- title: "Metadata jacket: Add publisher information to jacket."
|
||||||
|
|
||||||
|
- title: "Catalog generation: Allow use of custom columns as well as tags to indicate read books. Note that your previously saved read books setting will be lost."
|
||||||
|
|
||||||
|
- title: "Bulk metadata edit dialog: Add an Apply button to allow you to perform multiple operations in sequence"
|
||||||
|
|
||||||
|
- title: "Allow drag and drop of books onto user categories. If you drag a book from a particular column (say authors) and drop it onto a user category, the column value will be added to the user category. So for authors, the authros will be added to the user category."
|
||||||
|
|
||||||
|
- title: "Check Library can now check and repair the has_cover cache"
|
||||||
|
|
||||||
|
- title: "Allow GUI plugins to be distributed in ZIP files. See http://www.mobileread.com/forums/showthread.php?t=108774"
|
||||||
|
|
||||||
|
- title: "Allow searching by the number of tags/authors/formats/etc. See User Manual for details."
|
||||||
|
|
||||||
|
- title: "Tiny speed up when loading large libraries and make various metadata editing tasks a little faster by reducing the number of times the Tag Browser is refreshed"
|
||||||
|
|
||||||
|
bug fixes:
|
||||||
|
- title: "E-book viewer: Fix broken backwards searching"
|
||||||
|
|
||||||
|
- title: "Fix custom ratings column values being displayed incorrectly in book details area"
|
||||||
|
tickets: [7740]
|
||||||
|
|
||||||
|
- title: "Fix book details dialog not using internal viewer to view ebooks"
|
||||||
|
tickets: [7424]
|
||||||
|
|
||||||
|
- title: "MOBI Output: When the input document does not explicitly specify a size for images, set the size to be the natural size of the image. This works around Amazon's *truly wonderful* MOBI renderer's tendency to expand images that do not have a width and height specified."
|
||||||
|
|
||||||
|
- title: "Conversion pipeline: Fix bug that caused height/width specified in %/em of screen size to be incorrectly calculated by a factor of 72./DPI"
|
||||||
|
|
||||||
|
- title: "Conversion pipeline: Respect max-width and max-height when calculating the effective size of an element"
|
||||||
|
|
||||||
|
- title: "Conversion pipeline: Do not override CSS for images with the value of the img width/height attributes, unless no CSS is specified for the image"
|
||||||
|
|
||||||
|
- title: "E-book viewer: Resize automatically to fit on smaller screens"
|
||||||
|
|
||||||
|
- title: "Use the same MIME database on all platforms that calibre runs on, works around python 2.7's crazy insistence on reading MIME data from the registry"
|
||||||
|
|
||||||
|
- title: "Kobo driver: Allow html, txt and rtf documents to be deleted"
|
||||||
|
|
||||||
|
- title: "Always overwrite title/author metadata when downloading metadata for books added by ISBN"
|
||||||
|
|
||||||
|
- title: "Nook Color profile: Reduce screen height to 900px"
|
||||||
|
|
||||||
|
- title: "Fix regression that broke RTF conversion on some linux systems"
|
||||||
|
|
||||||
|
- title: "Fix bug that could break searching after copying and deleting a book from the current library"
|
||||||
|
tickets: [7459]
|
||||||
|
|
||||||
|
improved recipes:
|
||||||
|
- NZZ
|
||||||
|
- Frankfurter Rundschau
|
||||||
|
- JiJi Press
|
||||||
|
- Revista Muy Intersante
|
||||||
|
|
||||||
|
new recipes:
|
||||||
|
- title: "Global Times"
|
||||||
|
author: "malfi"
|
||||||
|
|
||||||
|
- title: "The Philosopher's Magazine"
|
||||||
|
author: "Darko Miletic"
|
||||||
|
|
||||||
|
- title: "Poughkeepsie Journal"
|
||||||
|
author: "weebl"
|
||||||
|
|
||||||
|
- title: "Business Spectator and ABC Australia"
|
||||||
|
author: "Dean Cording"
|
||||||
|
|
||||||
|
- title: "La Rijoa and NacionRed"
|
||||||
|
author: "Arturo Martinez Nieves"
|
||||||
|
|
||||||
|
- title: "Animal Politico"
|
||||||
|
author: "leamsi"
|
||||||
|
|
||||||
|
|
||||||
- version: 0.7.31
|
- version: 0.7.31
|
||||||
date: 2010-11-27
|
date: 2010-11-27
|
||||||
|
|
||||||
|
@ -181,19 +181,25 @@ max_content_server_tags_shown=5
|
|||||||
# content_server_will_display is a list of custom fields to be displayed.
|
# content_server_will_display is a list of custom fields to be displayed.
|
||||||
# content_server_wont_display is a list of custom fields not to be displayed.
|
# content_server_wont_display is a list of custom fields not to be displayed.
|
||||||
# wont_display has priority over will_display.
|
# wont_display has priority over will_display.
|
||||||
# The special value '*' means all custom fields.
|
# The special value '*' means all custom fields. The value [] means no entries.
|
||||||
# Defaults:
|
# Defaults:
|
||||||
# content_server_will_display = ['*']
|
# content_server_will_display = ['*']
|
||||||
# content_server_wont_display = ['']
|
# content_server_wont_display = []
|
||||||
# Examples:
|
# Examples:
|
||||||
# To display only the custom fields #mytags and #genre:
|
# To display only the custom fields #mytags and #genre:
|
||||||
# content_server_will_display = ['#mytags', '#genre']
|
# content_server_will_display = ['#mytags', '#genre']
|
||||||
# content_server_wont_display = ['']
|
# content_server_wont_display = []
|
||||||
# To display all fields except #mycomments:
|
# To display all fields except #mycomments:
|
||||||
# content_server_will_display = ['*']
|
# content_server_will_display = ['*']
|
||||||
# content_server_wont_display['#mycomments']
|
# content_server_wont_display['#mycomments']
|
||||||
content_server_will_display = ['*']
|
content_server_will_display = ['*']
|
||||||
content_server_wont_display = ['']
|
content_server_wont_display = []
|
||||||
|
|
||||||
|
# Same as above (content server) but for the book details pane. Same syntax.
|
||||||
|
# As above, this tweak affects only display of custom fields. The standard
|
||||||
|
# fields are not affected
|
||||||
|
book_details_will_display = ['*']
|
||||||
|
book_details_wont_display = []
|
||||||
|
|
||||||
|
|
||||||
# Set the maximum number of sort 'levels' that calibre will use to resort the
|
# Set the maximum number of sort 'levels' that calibre will use to resort the
|
||||||
@ -217,3 +223,15 @@ generate_cover_foot_font = None
|
|||||||
# open_viewer, do_nothing, edit_cell. Default: open_viewer.
|
# open_viewer, do_nothing, edit_cell. Default: open_viewer.
|
||||||
# Example: doubleclick_on_library_view = 'do_nothing'
|
# Example: doubleclick_on_library_view = 'do_nothing'
|
||||||
doubleclick_on_library_view = 'open_viewer'
|
doubleclick_on_library_view = 'open_viewer'
|
||||||
|
|
||||||
|
|
||||||
|
# Language to use when sorting. Setting this tweak will force sorting to use the
|
||||||
|
# collating order for the specified language. This might be useful if you run
|
||||||
|
# calibre in English but want sorting to work in the language where you live.
|
||||||
|
# Set the tweak to the desired ISO 639-1 language code, in lower case.
|
||||||
|
# You can find the list of supported locales at
|
||||||
|
# http://publib.boulder.ibm.com/infocenter/iseries/v5r3/topic/nls/rbagsicusortsequencetables.htm
|
||||||
|
# Default: locale_for_sorting = '' -- use the language calibre displays in
|
||||||
|
# Example: locale_for_sorting = 'fr' -- sort using French rules.
|
||||||
|
# Example: locale_for_sorting = 'nb' -- sort using Norwegian rules.
|
||||||
|
locale_for_sorting = ''
|
||||||
|
BIN
resources/images/news/novaya_gazeta.png
Normal file
BIN
resources/images/news/novaya_gazeta.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 610 B |
BIN
resources/images/news/tpm_uk.png
Normal file
BIN
resources/images/news/tpm_uk.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 873 B |
BIN
resources/images/news/vedomosti.png
Normal file
BIN
resources/images/news/vedomosti.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 693 B |
@ -36,22 +36,37 @@
|
|||||||
/*
|
/*
|
||||||
** Title
|
** Title
|
||||||
*/
|
*/
|
||||||
.cbj_title {
|
table.cbj_header td.cbj_title {
|
||||||
font-size: x-large;
|
font-size: x-large;
|
||||||
|
font-style: italic;
|
||||||
|
text-align: center;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
** Series
|
||||||
|
*/
|
||||||
|
table.cbj_header td.cbj_series {
|
||||||
|
font-size: medium;
|
||||||
text-align: center;
|
text-align: center;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
** Author
|
** Author
|
||||||
*/
|
*/
|
||||||
.cbj_author {
|
table.cbj_header td.cbj_author {
|
||||||
font-size: medium;
|
font-size: medium;
|
||||||
text-align: center;
|
text-align: center;
|
||||||
margin-bottom: 1ex;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
** Table containing Series, Publication Year, Rating and Tags
|
** Publisher/published
|
||||||
|
*/
|
||||||
|
table.cbj_header td.cbj_pubdata {
|
||||||
|
text-align: center;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
** Table containing Rating and Tags
|
||||||
*/
|
*/
|
||||||
table.cbj_header {
|
table.cbj_header {
|
||||||
width: 100%;
|
width: 100%;
|
||||||
@ -62,9 +77,8 @@ table.cbj_header {
|
|||||||
*/
|
*/
|
||||||
table.cbj_header td.cbj_label {
|
table.cbj_header td.cbj_label {
|
||||||
font-family: sans-serif;
|
font-family: sans-serif;
|
||||||
font-weight: bold;
|
|
||||||
text-align: right;
|
text-align: right;
|
||||||
width: 40%;
|
width: 33%;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -73,9 +87,23 @@ table.cbj_header td.cbj_label {
|
|||||||
table.cbj_header td.cbj_content {
|
table.cbj_header td.cbj_content {
|
||||||
font-family: sans-serif;
|
font-family: sans-serif;
|
||||||
text-align: left;
|
text-align: left;
|
||||||
width:60%;
|
width:67%;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
** Metadata divider
|
||||||
|
*/
|
||||||
|
hr.metadata_divider {
|
||||||
|
width:90%;
|
||||||
|
margin-left:5%;
|
||||||
|
border-top: solid white 0px;
|
||||||
|
border-right: solid white 0px;
|
||||||
|
border-bottom: solid black 1px;
|
||||||
|
border-left: solid white 0px;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
** To skip a banner item (Series|Published|Rating|Tags),
|
** To skip a banner item (Series|Published|Rating|Tags),
|
||||||
** edit the appropriate CSS rule below.
|
** edit the appropriate CSS rule below.
|
||||||
|
@ -6,17 +6,24 @@
|
|||||||
</head>
|
</head>
|
||||||
<body>
|
<body>
|
||||||
<div class="cbj_banner">
|
<div class="cbj_banner">
|
||||||
<div class="cbj_title">{title}</div>
|
|
||||||
<div class="cbj_author">{author}</div>
|
|
||||||
<table class="cbj_header">
|
<table class="cbj_header">
|
||||||
<tr class="cbj_series">
|
<tr>
|
||||||
<td class="cbj_label">{series_label}:</td>
|
<td class="cbj_title" colspan="2">{title}</td>
|
||||||
<td class="cbj_content">{series}</td>
|
|
||||||
</tr>
|
</tr>
|
||||||
<tr class="cbj_pubdate">
|
<tr>
|
||||||
<td class="cbj_label">{pubdate_label}:</td>
|
<td class="cbj_series" colspan="2">{series}</td>
|
||||||
<td class="cbj_content">{pubdate}</td>
|
|
||||||
</tr>
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td class="cbj_author" colspan="2">{author}</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td class="cbj_pubdata" colspan="2">{publisher} ({pubdate})</td>
|
||||||
|
</tr>
|
||||||
|
|
||||||
|
<tr>
|
||||||
|
<td class="cbj_author" colspan="2"><hr class="metadata_divider" /></td>
|
||||||
|
</tr>
|
||||||
|
|
||||||
<tr class="cbj_rating">
|
<tr class="cbj_rating">
|
||||||
<td class="cbj_label">{rating_label}:</td>
|
<td class="cbj_label">{rating_label}:</td>
|
||||||
<td class="cbj_content">{rating}</td>
|
<td class="cbj_content">{rating}</td>
|
||||||
|
1381
resources/mime.types
Normal file
1381
resources/mime.types
Normal file
File diff suppressed because it is too large
Load Diff
54
resources/recipes/abc_au.recipe
Normal file
54
resources/recipes/abc_au.recipe
Normal file
@ -0,0 +1,54 @@
|
|||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Dean Cording'
|
||||||
|
'''
|
||||||
|
abc.net.au/news
|
||||||
|
'''
|
||||||
|
import re
|
||||||
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
|
||||||
|
class ABCNews(BasicNewsRecipe):
|
||||||
|
title = 'ABC News'
|
||||||
|
__author__ = 'Dean Cording'
|
||||||
|
description = 'News from Australia'
|
||||||
|
masthead_url = 'http://www.abc.net.au/news/assets/v5/images/common/logo-news.png'
|
||||||
|
cover_url = 'http://www.abc.net.au/news/assets/v5/images/common/logo-news.png'
|
||||||
|
|
||||||
|
oldest_article = 2
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = False
|
||||||
|
#delay = 1
|
||||||
|
use_embedded_content = False
|
||||||
|
encoding = 'utf8'
|
||||||
|
publisher = 'ABC News'
|
||||||
|
category = 'News, Australia, World'
|
||||||
|
language = 'en_AU'
|
||||||
|
publication_type = 'newsportal'
|
||||||
|
preprocess_regexps = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
|
||||||
|
conversion_options = {
|
||||||
|
'comments' : description
|
||||||
|
,'tags' : category
|
||||||
|
,'language' : language
|
||||||
|
,'publisher' : publisher
|
||||||
|
,'linearize_tables': False
|
||||||
|
}
|
||||||
|
|
||||||
|
keep_only_tags = dict(id='article')
|
||||||
|
|
||||||
|
remove_tags = [dict(attrs={'class':['related', 'tags']}),
|
||||||
|
dict(id='statepromo')
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_attributes = ['width','height']
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
('Top Stories', 'http://www.abc.net.au/news/syndicate/topstoriesrss.xml'),
|
||||||
|
('Canberra', 'http://www.abc.net.au/news/indexes/idx-act/rss.xml'),
|
||||||
|
('Sydney', 'http://www.abc.net.au/news/indexes/sydney/rss.xml'),
|
||||||
|
('Melbourne', 'http://www.abc.net.au/news/indexes/melbourne/rss.xml'),
|
||||||
|
('Brisbane', 'http://www.abc.net.au/news/indexes/brisbane/rss.xml'),
|
||||||
|
('Perth', 'http://www.abc.net.au/news/indexes/perth/rss.xml'),
|
||||||
|
('Australia', 'http://www.abc.net.au/news/indexes/idx-australia/rss.xml'),
|
||||||
|
('World', 'http://www.abc.net.au/news/indexes/world/rss.xml'),
|
||||||
|
('Business', 'http://www.abc.net.au/news/indexes/business/rss.xml'),
|
||||||
|
('Science and Technology', 'http://www.abc.net.au/news/tag/science-and-technology/rss.xml'),
|
||||||
|
]
|
48
resources/recipes/business_spectator.recipe
Normal file
48
resources/recipes/business_spectator.recipe
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Dean Cording'
|
||||||
|
'''
|
||||||
|
abc.net.au/news
|
||||||
|
'''
|
||||||
|
import re
|
||||||
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
|
||||||
|
class BusinessSpectator(BasicNewsRecipe):
|
||||||
|
title = 'Business Spectator'
|
||||||
|
__author__ = 'Dean Cording'
|
||||||
|
description = 'Australian Business News & commentary delivered the way you want it.'
|
||||||
|
masthead_url = 'http://www.businessspectator.com.au/bs.nsf/logo-business-spectator.gif'
|
||||||
|
cover_url = masthead_url
|
||||||
|
|
||||||
|
oldest_article = 2
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
#delay = 1
|
||||||
|
use_embedded_content = False
|
||||||
|
encoding = 'utf8'
|
||||||
|
publisher = 'Business Spectator'
|
||||||
|
category = 'News, Australia, Business'
|
||||||
|
language = 'en_AU'
|
||||||
|
publication_type = 'newsportal'
|
||||||
|
preprocess_regexps = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
|
||||||
|
conversion_options = {
|
||||||
|
'comments' : description
|
||||||
|
,'tags' : category
|
||||||
|
,'language' : language
|
||||||
|
,'publisher' : publisher
|
||||||
|
,'linearize_tables': False
|
||||||
|
}
|
||||||
|
|
||||||
|
keep_only_tags = [dict(id='storyHeader'), dict(id='body-html')]
|
||||||
|
|
||||||
|
remove_tags = [dict(attrs={'class':'hql'})]
|
||||||
|
|
||||||
|
remove_attributes = ['width','height','style']
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
('Top Stories', 'http://www.businessspectator.com.au/top-stories.rss'),
|
||||||
|
('Alan Kohler', 'http://www.businessspectator.com.au/bs.nsf/RSS?readform&type=spectators&cat=Alan%20Kohler'),
|
||||||
|
('Robert Gottliebsen', 'http://www.businessspectator.com.au/bs.nsf/RSS?readform&type=spectators&cat=Robert%20Gottliebsen'),
|
||||||
|
('Stephen Bartholomeusz', 'http://www.businessspectator.com.au/bs.nsf/RSS?readform&type=spectators&cat=Stephen%20Bartholomeusz'),
|
||||||
|
('Daily Dossier', 'http://www.businessspectator.com.au/bs.nsf/RSS?readform&type=kgb&cat=dossier'),
|
||||||
|
('Australia', 'http://www.businessspectator.com.au/bs.nsf/RSS?readform&type=region&cat=australia'),
|
||||||
|
]
|
87
resources/recipes/esenja.recipe
Normal file
87
resources/recipes/esenja.recipe
Normal file
@ -0,0 +1,87 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, matek09, matek09@gmail.com'
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
import re
|
||||||
|
|
||||||
|
class Esensja(BasicNewsRecipe):
|
||||||
|
|
||||||
|
title = u'Esensja'
|
||||||
|
__author__ = 'matek09'
|
||||||
|
description = 'Monthly magazine'
|
||||||
|
encoding = 'utf-8'
|
||||||
|
no_stylesheets = True
|
||||||
|
language = 'pl'
|
||||||
|
remove_javascript = True
|
||||||
|
HREF = '0'
|
||||||
|
|
||||||
|
#keep_only_tags =[]
|
||||||
|
#keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'article'})
|
||||||
|
remove_tags_before = dict(dict(name = 'div', attrs = {'class' : 't-title'}))
|
||||||
|
remove_tags_after = dict(dict(name = 'img', attrs = {'src' : '../../../2000/01/img/tab_bot.gif'}))
|
||||||
|
|
||||||
|
remove_tags =[]
|
||||||
|
remove_tags.append(dict(name = 'img', attrs = {'src' : '../../../2000/01/img/tab_top.gif'}))
|
||||||
|
remove_tags.append(dict(name = 'img', attrs = {'src' : '../../../2000/01/img/tab_bot.gif'}))
|
||||||
|
remove_tags.append(dict(name = 'div', attrs = {'class' : 't-title2 nextpage'}))
|
||||||
|
|
||||||
|
extra_css = '''
|
||||||
|
.t-title {font-size: x-large; font-weight: bold; text-align: left}
|
||||||
|
.t-author {font-size: x-small; text-align: left}
|
||||||
|
.t-title2 {font-size: x-small; font-style: italic; text-align: left}
|
||||||
|
.text {font-size: small; text-align: left}
|
||||||
|
.annot-ref {font-style: italic; text-align: left}
|
||||||
|
'''
|
||||||
|
|
||||||
|
preprocess_regexps = [(re.compile(r'alt="[^"]*"'),
|
||||||
|
lambda match: '')]
|
||||||
|
|
||||||
|
def parse_index(self):
|
||||||
|
soup = self.index_to_soup('http://www.esensja.pl/magazyn/')
|
||||||
|
a = soup.find('a', attrs={'href' : re.compile('.*/index.html')})
|
||||||
|
year = a['href'].split('/')[0]
|
||||||
|
month = a['href'].split('/')[1]
|
||||||
|
self.HREF = 'http://www.esensja.pl/magazyn/' + year + '/' + month + '/iso/'
|
||||||
|
soup = self.index_to_soup(self.HREF + '01.html')
|
||||||
|
self.cover_url = 'http://www.esensja.pl/magazyn/' + year + '/' + month + '/img/ilustr/cover_b.jpg'
|
||||||
|
feeds = []
|
||||||
|
intro = soup.find('div', attrs={'class' : 'n-title'})
|
||||||
|
introduction = {'title' : self.tag_to_string(intro.a),
|
||||||
|
'url' : self.HREF + intro.a['href'],
|
||||||
|
'date' : '',
|
||||||
|
'description' : ''}
|
||||||
|
chapter = 'Wprowadzenie'
|
||||||
|
subchapter = ''
|
||||||
|
articles = []
|
||||||
|
articles.append(introduction)
|
||||||
|
for tag in intro.findAllNext(attrs={'class': ['chapter', 'subchapter', 'n-title']}):
|
||||||
|
if tag.name in 'td':
|
||||||
|
if len(articles) > 0:
|
||||||
|
section = chapter
|
||||||
|
if len(subchapter) > 0:
|
||||||
|
section += ' - ' + subchapter
|
||||||
|
feeds.append((section, articles))
|
||||||
|
articles = []
|
||||||
|
if tag['class'] == 'chapter':
|
||||||
|
chapter = self.tag_to_string(tag).capitalize()
|
||||||
|
subchapter = ''
|
||||||
|
else:
|
||||||
|
subchapter = self.tag_to_string(tag)
|
||||||
|
subchapter = self.tag_to_string(tag)
|
||||||
|
continue
|
||||||
|
articles.append({'title' : self.tag_to_string(tag.a), 'url' : self.HREF + tag.a['href'], 'date' : '', 'description' : ''})
|
||||||
|
|
||||||
|
a = self.index_to_soup(self.HREF + tag.a['href'])
|
||||||
|
i = 1
|
||||||
|
while True:
|
||||||
|
div = a.find('div', attrs={'class' : 't-title2 nextpage'})
|
||||||
|
if div is not None:
|
||||||
|
a = self.index_to_soup(self.HREF + div.a['href'])
|
||||||
|
articles.append({'title' : self.tag_to_string(tag.a) + ' c. d. ' + str(i), 'url' : self.HREF + div.a['href'], 'date' : '', 'description' : ''})
|
||||||
|
i = i + 1
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
|
||||||
|
return feeds
|
@ -1,67 +1,61 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2009, Justus Bisser <justus.bisser at gmail.com>'
|
__copyright__ = '2010, Christian Schmitt'
|
||||||
|
|
||||||
'''
|
'''
|
||||||
fr-online.de
|
fr-online.de
|
||||||
'''
|
'''
|
||||||
import re
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
|
||||||
class Spiegel_ger(BasicNewsRecipe):
|
class FROnlineRecipe(BasicNewsRecipe):
|
||||||
title = 'Frankfurter Rundschau'
|
title = 'Frankfurter Rundschau'
|
||||||
__author__ = 'Justus Bisser'
|
__author__ = 'maccs'
|
||||||
description = "Dies ist die Online-Ausgabe der Frankfurter Rundschau. Um die abgerufenen individuell einzustellen bearbeiten sie die Liste im erweiterten Modus. Die Feeds findet man auf http://www.fr-online.de/verlagsservice/fr_newsreader/?em_cnt=574255"
|
description = 'Nachrichten aus D und aller Welt'
|
||||||
|
encoding = 'utf-8'
|
||||||
|
masthead_url = 'http://www.fr-online.de/image/view/-/1474018/data/823552/-/logo.png'
|
||||||
publisher = 'Druck- und Verlagshaus Frankfurt am Main GmbH'
|
publisher = 'Druck- und Verlagshaus Frankfurt am Main GmbH'
|
||||||
category = 'FR Online, Frankfurter Rundschau, Nachrichten, News,Dienste, RSS, RSS, Feedreader, Newsfeed, iGoogle, Netvibes, Widget'
|
category = 'news, germany, world'
|
||||||
oldest_article = 7
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
language = 'de'
|
language = 'de'
|
||||||
lang = 'de-DE'
|
publication_type = 'newspaper'
|
||||||
no_stylesheets = True
|
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
#encoding = 'cp1252'
|
remove_javascript = True
|
||||||
|
no_stylesheets = True
|
||||||
|
oldest_article = 1 # Increase this number if you're interested in older articles
|
||||||
|
max_articles_per_feed = 50 # Seems a reasonable number to me
|
||||||
|
extra_css = '''
|
||||||
|
body { font-family: "arial", "verdana", "geneva", sans-serif; font-size: 12px; margin: 0px; background-color: #ffffff;}
|
||||||
|
.imgSubline{background-color: #f4f4f4; font-size: 0.8em;}
|
||||||
|
.p--heading-1 {font-weight: bold;}
|
||||||
|
.calibre_navbar {font-size: 0.8em; font-family: "arial", "verdana", "geneva", sans-serif;}
|
||||||
|
'''
|
||||||
|
remove_tags = [dict(name='div', attrs={'id':'Logo'})]
|
||||||
|
cover_url = 'http://www.fr-online.de/image/view/-/1474018/data/823552/-/logo.png'
|
||||||
|
cover_margins = (100, 150, '#ffffff')
|
||||||
|
|
||||||
conversion_options = {
|
|
||||||
'comment' : description
|
|
||||||
, 'tags' : category
|
|
||||||
, 'publisher' : publisher
|
|
||||||
, 'language' : lang
|
|
||||||
}
|
|
||||||
|
|
||||||
recursions = 0
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
#keep_only_tags = [dict(name='div', attrs={'class':'text'})]
|
|
||||||
#tags_remove = [dict(name='div', attrs={'style':'text-align: left; margin: 4px 0px 0px 4px; width: 200px; float: right;'})]
|
|
||||||
remove_attributes = ['style']
|
|
||||||
feeds = []
|
feeds = []
|
||||||
#remove_tags_before = [dict(name='div', attrs={'style':'padding-left: 0px;'})]
|
feeds.append(('Startseite', u'http://www.fr-online.de/home/-/1472778/1472778/-/view/asFeed/-/index.xml'))
|
||||||
#remove_tags_after = [dict(name='div', attrs={'class':'box_head_text'})]
|
feeds.append(('Politik', u'http://www.fr-online.de/politik/-/1472596/1472596/-/view/asFeed/-/index.xml'))
|
||||||
|
feeds.append(('Meinung', u'http://www.fr-online.de/politik/meinung/-/1472602/1472602/-/view/asFeed/-/index.xml'))
|
||||||
|
feeds.append(('Wirtschaft', u'http://www.fr-online.de/wirtschaft/-/1472780/1472780/-/view/asFeed/-/index.xml'))
|
||||||
|
feeds.append(('Sport', u'http://www.fr-online.de/sport/-/1472784/1472784/-/view/asFeed/-/index.xml'))
|
||||||
|
feeds.append(('Eintracht Frankfurt', u'http://www.fr-online.de/sport/eintracht-frankfurt/-/1473446/1473446/-/view/asFeed/-/index.xml'))
|
||||||
|
feeds.append(('Kultur und Medien', u'http://www.fr-online.de/kultur/-/1472786/1472786/-/view/asFeed/-/index.xml'))
|
||||||
|
feeds.append(('Panorama', u'http://www.fr-online.de/panorama/-/1472782/1472782/-/view/asFeed/-/index.xml'))
|
||||||
|
feeds.append(('Frankfurt', u'http://www.fr-online.de/frankfurt/-/1472798/1472798/-/view/asFeed/-/index.xml'))
|
||||||
|
feeds.append(('Rhein-Main', u'http://www.fr-online.de/rhein-main/-/1472796/1472796/-/view/asFeed/-/index.xml'))
|
||||||
|
feeds.append(('Hanau', u'http://www.fr-online.de/rhein-main/hanau/-/1472866/1472866/-/view/asFeed/-/index.xml'))
|
||||||
|
feeds.append(('Darmstadt', u'http://www.fr-online.de/rhein-main/darmstadt/-/1472858/1472858/-/view/asFeed/-/index.xml'))
|
||||||
|
feeds.append(('Wiesbaden', u'http://www.fr-online.de/rhein-main/wiesbaden/-/1472860/1472860/-/view/asFeed/-/index.xml'))
|
||||||
|
feeds.append(('Offenbach', u'http://www.fr-online.de/rhein-main/offenbach/-/1472856/1472856/-/view/asFeed/-/index.xml'))
|
||||||
|
feeds.append(('Bad Homburg', u'http://www.fr-online.de/rhein-main/bad-homburg/-/1472864/1472864/-/view/asFeed/-/index.xml'))
|
||||||
|
feeds.append(('Digital', u'http://www.fr-online.de/digital/-/1472406/1472406/-/view/asFeed/-/index.xml'))
|
||||||
|
feeds.append(('Wissenschaft', u'http://www.fr-online.de/wissenschaft/-/1472788/1472788/-/view/asFeed/-/index.xml'))
|
||||||
|
|
||||||
# enable for all news
|
|
||||||
allNews = 0
|
|
||||||
if allNews:
|
|
||||||
feeds = [(u'Frankfurter Rundschau', u'http://www.fr-online.de/rss/sport/index.xml')]
|
|
||||||
else:
|
|
||||||
#select the feeds you like
|
|
||||||
feeds = [(u'Nachrichten', u'http://www.fr-online.de/rss/politik/index.xml')]
|
|
||||||
feeds.append((u'Kommentare und Analysen', u'http://www.fr-online.de/rss/meinung/index.xml'))
|
|
||||||
feeds.append((u'Dokumentationen', u'http://www.fr-online.de/rss/dokumentation/index.xml'))
|
|
||||||
feeds.append((u'Deutschlandtrend', u'http://www.fr-online.de/rss/deutschlandtrend/index.xml'))
|
|
||||||
feeds.append((u'Wirtschaft', u'http://www.fr-online.de/rss/wirtschaft/index.xml'))
|
|
||||||
feeds.append((u'Sport', u'http://www.fr-online.de/rss/sport/index.xml'))
|
|
||||||
feeds.append((u'Feuilleton', u'http://www.fr-online.de/rss/feuilleton/index.xml'))
|
|
||||||
feeds.append((u'Panorama', u'http://www.fr-online.de/rss/panorama/index.xml'))
|
|
||||||
feeds.append((u'Rhein Main und Hessen', u'http://www.fr-online.de/rss/hessen/index.xml'))
|
|
||||||
feeds.append((u'Fitness und Gesundheit', u'http://www.fr-online.de/rss/fit/index.xml'))
|
|
||||||
feeds.append((u'Multimedia', u'http://www.fr-online.de/rss/multimedia/index.xml'))
|
|
||||||
feeds.append((u'Wissen und Bildung', u'http://www.fr-online.de/rss/wissen/index.xml'))
|
|
||||||
|
|
||||||
def get_article_url(self, article):
|
def print_version(self, url):
|
||||||
url = article.link
|
return url.replace('index.html', 'view/printVersion/-/index.html')
|
||||||
regex = re.compile("0C[0-9]{6,8}0A?")
|
|
||||||
|
|
||||||
liste = regex.findall(url)
|
|
||||||
string = liste.pop(0)
|
|
||||||
string = string[2:len(string)-1]
|
|
||||||
return "http://www.fr-online.de/_em_cms/_globals/print.php?em_cnt=" + string
|
|
||||||
|
|
||||||
|
46
resources/recipes/globaltimes.recipe
Normal file
46
resources/recipes/globaltimes.recipe
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
import re
|
||||||
|
|
||||||
|
class globaltimes(BasicNewsRecipe):
|
||||||
|
title = u'Global Times'
|
||||||
|
__author__ = 'malfi'
|
||||||
|
language = 'zh'
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
cover_url = 'http://enhimg2.huanqiu.com/images/logo.png'
|
||||||
|
language = 'en'
|
||||||
|
keep_only_tags = []
|
||||||
|
keep_only_tags.append(dict(name = 'div', attrs = {'id': 'content'}))
|
||||||
|
remove_tags = []
|
||||||
|
remove_tags.append(dict(name = 'div', attrs = {'class': 'location'}))
|
||||||
|
remove_tags.append(dict(name = 'div', attrs = {'class': 'contentpage'}))
|
||||||
|
remove_tags.append(dict(name = 'li', attrs = {'id': 'pl'}))
|
||||||
|
|
||||||
|
extra_css = '''
|
||||||
|
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||||
|
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
||||||
|
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||||
|
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
||||||
|
'''
|
||||||
|
def parse_index(self):
|
||||||
|
catnames = {}
|
||||||
|
catnames["http://china.globaltimes.cn/chinanews/"] = "China Politics"
|
||||||
|
catnames["http://china.globaltimes.cn/diplomacy/"] = "China Diplomacy"
|
||||||
|
catnames["http://military.globaltimes.cn/china/"] = "China Military"
|
||||||
|
catnames["http://business.globaltimes.cn/china-economy/"] = "China Economy"
|
||||||
|
catnames["http://world.globaltimes.cn/asia-pacific/"] = "Asia Pacific"
|
||||||
|
feeds = []
|
||||||
|
|
||||||
|
for cat in catnames.keys():
|
||||||
|
articles = []
|
||||||
|
soup = self.index_to_soup(cat)
|
||||||
|
for a in soup.findAll('a',attrs={'href' : re.compile(cat+"201[0-9]-[0-1][0-9]/[0-9][0-9][0-9][0-9][0-9][0-9].html")}):
|
||||||
|
url = a['href'].strip()
|
||||||
|
myarticle=({'title':self.tag_to_string(a), 'url':url, 'description':'', 'date':''})
|
||||||
|
self.log("found %s" % url)
|
||||||
|
articles.append(myarticle)
|
||||||
|
self.log("Adding URL %s\n" %url)
|
||||||
|
if articles:
|
||||||
|
feeds.append((catnames[cat], articles))
|
||||||
|
return feeds
|
38
resources/recipes/heise_open.recipe
Normal file
38
resources/recipes/heise_open.recipe
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Anton Gillert <atx at binaryninja.de>'
|
||||||
|
|
||||||
|
'''
|
||||||
|
Fetch Heise Open.
|
||||||
|
'''
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
|
class HeiseOpenDe(BasicNewsRecipe):
|
||||||
|
|
||||||
|
title = 'Heise Open'
|
||||||
|
description = 'Opensource news from Germany'
|
||||||
|
__author__ = 'Anton Gillert'
|
||||||
|
use_embedded_content = False
|
||||||
|
language = 'de'
|
||||||
|
timefmt = ' [%d %b %Y]'
|
||||||
|
max_articles_per_feed = 40
|
||||||
|
no_stylesheets = True
|
||||||
|
|
||||||
|
feeds = [ ('Heise Open', 'http://www.heise.de/open/news/news-atom.xml') ]
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
return url + '?view=print'
|
||||||
|
|
||||||
|
remove_tags = [dict(id='navi_top'),
|
||||||
|
dict(id='navi_bottom'),
|
||||||
|
dict(name='div', attrs={'class':'navi_top_logo'}),
|
||||||
|
dict(name='img', attrs={'src':'/open/icons/open_logo_2009_weiss.gif'}),
|
||||||
|
dict(name='h5', attrs={'style':'margin: 0.5em 0;'}),
|
||||||
|
dict(name='p', attrs={'class':'news_datum'}),
|
||||||
|
dict(name='p', attrs={'class':'size80'})]
|
||||||
|
remove_tags_after = [dict(name='p', attrs={'class':'size80'})]
|
||||||
|
|
||||||
|
def get_cover_url(self):
|
||||||
|
return 'http://www.heise.de/open/icons/open_logo_2009_weiss.gif'
|
||||||
|
|
||||||
|
|
59
resources/recipes/histmag.recipe
Normal file
59
resources/recipes/histmag.recipe
Normal file
@ -0,0 +1,59 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, matek09, matek09@gmail.com'
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
import re
|
||||||
|
|
||||||
|
class Histmag(BasicNewsRecipe):
|
||||||
|
|
||||||
|
title = u'Histmag'
|
||||||
|
__author__ = 'matek09'
|
||||||
|
description = u"Artykuly historyczne i publicystyczne"
|
||||||
|
encoding = 'utf-8'
|
||||||
|
no_stylesheets = True
|
||||||
|
language = 'pl'
|
||||||
|
remove_javascript = True
|
||||||
|
#max_articles_per_feed = 1
|
||||||
|
remove_tags_before = dict(dict(name = 'div', attrs = {'id' : 'article'}))
|
||||||
|
remove_tags_after = dict(dict(name = 'h2', attrs = {'class' : 'komentarze'}))
|
||||||
|
#keep_only_tags =[]
|
||||||
|
#keep_only_tags.append(dict(name = 'h2'))
|
||||||
|
#keep_only_tags.append(dict(name = 'p'))
|
||||||
|
|
||||||
|
remove_tags =[]
|
||||||
|
remove_tags.append(dict(name = 'p', attrs = {'class' : 'podpis'}))
|
||||||
|
remove_tags.append(dict(name = 'h2', attrs = {'class' : 'komentarze'}))
|
||||||
|
remove_tags.append(dict(name = 'img', attrs = {'src' : 'style/buttons/wesprzyjnas-1.jpg'}))
|
||||||
|
|
||||||
|
preprocess_regexps = [(re.compile(r'</span>'), lambda match: '</span><br><br>'),
|
||||||
|
(re.compile(r'<span>'), lambda match: '<br><br><span>')]
|
||||||
|
extra_css = '''
|
||||||
|
.left {font-size: x-small}
|
||||||
|
.right {font-size: x-small}
|
||||||
|
'''
|
||||||
|
|
||||||
|
def find_articles(self, soup):
|
||||||
|
articles = []
|
||||||
|
for div in soup.findAll('div', attrs={'class' : 'text'}):
|
||||||
|
articles.append({
|
||||||
|
'title' : self.tag_to_string(div.h3.a),
|
||||||
|
'url' : 'http://www.histmag.org/' + div.h3.a['href'],
|
||||||
|
'date' : self.tag_to_string(div.next('p')).split('|')[0],
|
||||||
|
'description' : self.tag_to_string(div.next('p', podpis=False)),
|
||||||
|
})
|
||||||
|
return articles
|
||||||
|
|
||||||
|
def parse_index(self):
|
||||||
|
soup = self.index_to_soup('http://histmag.org/?arc=4&dx=0')
|
||||||
|
feeds = []
|
||||||
|
feeds.append((u"Artykuly historyczne", self.find_articles(soup)))
|
||||||
|
soup = self.index_to_soup('http://histmag.org/?arc=5&dx=0')
|
||||||
|
feeds.append((u"Artykuly publicystyczne", self.find_articles(soup)))
|
||||||
|
soup = self.index_to_soup('http://histmag.org/?arc=1&dx=0')
|
||||||
|
feeds.append((u"Wydarzenia", self.find_articles(soup)))
|
||||||
|
|
||||||
|
return feeds
|
||||||
|
|
||||||
|
|
36
resources/recipes/kompiutierra.recipe
Normal file
36
resources/recipes/kompiutierra.recipe
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
#!/usr/bin/python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Vadim Dyadkin, dyadkin@gmail.com'
|
||||||
|
__author__ = 'Vadim Dyadkin'
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class Computerra(BasicNewsRecipe):
|
||||||
|
title = u'\u041a\u043e\u043c\u043f\u044c\u044e\u0442\u0435\u0440\u0440\u0430'
|
||||||
|
recursion = 50
|
||||||
|
oldest_article = 100
|
||||||
|
__author__ = 'Vadim Dyadkin'
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
use_embedded_content = False
|
||||||
|
simultaneous_downloads = 5
|
||||||
|
language = 'ru'
|
||||||
|
description = u'\u041a\u043e\u043c\u043f\u044c\u044e\u0442\u0435\u0440\u044b, \u043e\u043a\u043e\u043b\u043e\u043d\u0430\u0443\u0447\u043d\u044b\u0435 \u0438 \u043e\u043a\u043e\u043b\u043e\u0444\u0438\u043b\u043e\u0441\u043e\u0444\u0441\u043a\u0438\u0435 \u0441\u0442\u0430\u0442\u044c\u0438, \u0433\u0430\u0434\u0436\u0435\u0442\u044b.'
|
||||||
|
|
||||||
|
keep_only_tags = [dict(name='div', attrs={'id': 'content'}),]
|
||||||
|
|
||||||
|
|
||||||
|
feeds = [(u'\u041a\u043e\u043c\u043f\u044c\u044e\u0442\u0435\u0440\u0440\u0430', 'http://feeds.feedburner.com/ct_news/'),]
|
||||||
|
|
||||||
|
remove_tags = [dict(name='div', attrs={'id': ['fin', 'idc-container', 'idc-noscript',]}),
|
||||||
|
dict(name='ul', attrs={'class': "related_post"}),
|
||||||
|
dict(name='p', attrs={'class': 'info'}),
|
||||||
|
dict(name='a', attrs={'rel': 'tag', 'class': 'twitter-share-button', 'type': 'button_count'}),
|
||||||
|
dict(name='h2', attrs={}),]
|
||||||
|
|
||||||
|
extra_css = 'body { text-align: justify; }'
|
||||||
|
|
||||||
|
def get_article_url(self, article):
|
||||||
|
return article.get('feedburner:origLink', article.get('guid'))
|
||||||
|
|
54
resources/recipes/la_rioja.recipe
Normal file
54
resources/recipes/la_rioja.recipe
Normal file
@ -0,0 +1,54 @@
|
|||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
'''
|
||||||
|
www.larioja.com
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class LaRioja(BasicNewsRecipe):
|
||||||
|
title = 'La Rioja'
|
||||||
|
__author__ = 'Arturo Martinez Nieves'
|
||||||
|
description = 'Noticias de La Rioja y el resto del mundo'
|
||||||
|
publisher = 'La Rioja'
|
||||||
|
category = 'news, politics, Spain'
|
||||||
|
oldest_article = 2
|
||||||
|
max_articles_per_feed = 200
|
||||||
|
no_stylesheets = True
|
||||||
|
encoding = 'cp1252'
|
||||||
|
use_embedded_content = False
|
||||||
|
language = 'es'
|
||||||
|
remove_empty_feeds = True
|
||||||
|
masthead_url = 'http://www.larioja.com/includes/manuales/larioja/include-lariojapapeldigital-zonac-fondocabecera01.jpg'
|
||||||
|
extra_css = ' body{font-family: Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} .photo-caption{font-size: x-small} '
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comment' : description
|
||||||
|
, 'tags' : category
|
||||||
|
, 'publisher' : publisher
|
||||||
|
, 'language' : language
|
||||||
|
}
|
||||||
|
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(attrs={'id':'title'})
|
||||||
|
,dict(attrs={'class':['overhead','headline','subhead','date','text','noticia_cont','desarrollo']})
|
||||||
|
]
|
||||||
|
remove_tags = [dict(name='ul')]
|
||||||
|
remove_attributes = ['width','height']
|
||||||
|
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Ultimas Noticias' , u'http://www.larioja.com/rss/feeds/ultima.xml' )
|
||||||
|
,(u'Portada' , u'http://www.larioja.com/rss/feeds/portada.xml' )
|
||||||
|
,(u'Mundo' , u'http://www.larioja.com/rss/feeds/mundo.xml' )
|
||||||
|
,(u'Espana' , u'http://www.larioja.com/rss/feeds/espana.xml' )
|
||||||
|
,(u'Region' , u'http://www.larioja.com/rss/feeds/region.xml' )
|
||||||
|
,(u'Comarcas' , u'http://www.larioja.com/rss/feeds/comarcas.xml')
|
||||||
|
,(u'Deportes' , u'http://www.larioja.com/rss/feeds/deportes.xml' )
|
||||||
|
,(u'Economia' , u'http://www.larioja.com/rss/feeds/economia.xml' )
|
||||||
|
,(u'Cultura' , u'http://www.larioja.com/rss/feeds/cultura.xml' )
|
||||||
|
,(u'Opinion' , u'http://www.larioja.com/rss/feeds/opinion.xml' )
|
||||||
|
,(u'Sociedad' , u'http://www.larioja.com/rss/feeds/sociedad.xml' )
|
||||||
|
|
||||||
|
]
|
||||||
|
|
@ -1,107 +1,90 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2008, Mathieu Godlewski <mathieu at godlewski.fr>'
|
|
||||||
'''
|
|
||||||
lemonde.fr
|
|
||||||
'''
|
|
||||||
|
|
||||||
import re
|
import re
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
class LeMonde(BasicNewsRecipe):
|
class LeMonde(BasicNewsRecipe):
|
||||||
title = 'LeMonde.fr'
|
title = 'Le Monde'
|
||||||
__author__ = 'Mathieu Godlewski and Sujata Raman'
|
__author__ = 'veezh'
|
||||||
description = 'Global news in french'
|
description = 'Actualités'
|
||||||
oldest_article = 3
|
oldest_article = 1
|
||||||
language = 'fr'
|
max_articles_per_feed = 100
|
||||||
|
|
||||||
max_articles_per_feed = 30
|
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
remove_javascript = True
|
#delay = 1
|
||||||
|
use_embedded_content = False
|
||||||
|
encoding = 'cp1252'
|
||||||
|
publisher = 'lemonde.fr'
|
||||||
|
language = 'fr'
|
||||||
|
conversion_options = {
|
||||||
|
'comments' : description
|
||||||
|
,'language' : language
|
||||||
|
,'publisher' : publisher
|
||||||
|
,'linearize_tables': True
|
||||||
|
}
|
||||||
|
|
||||||
|
remove_empty_feeds = True
|
||||||
|
|
||||||
|
filterDuplicates = True
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for alink in soup.findAll('a'):
|
||||||
|
if alink.string is not None:
|
||||||
|
tstr = alink.string
|
||||||
|
alink.replaceWith(tstr)
|
||||||
|
return soup
|
||||||
|
|
||||||
|
preprocess_regexps = [
|
||||||
|
(re.compile(r' \''), lambda match: ' ‘'),
|
||||||
|
(re.compile(r'\''), lambda match: '’'),
|
||||||
|
(re.compile(r'"<'), lambda match: ' »<'),
|
||||||
|
(re.compile(r'>"'), lambda match: '>« '),
|
||||||
|
(re.compile(r'’"'), lambda match: '’« '),
|
||||||
|
(re.compile(r' "'), lambda match: ' « '),
|
||||||
|
(re.compile(r'" '), lambda match: ' » '),
|
||||||
|
(re.compile(r'\("'), lambda match: '(« '),
|
||||||
|
(re.compile(r'"\)'), lambda match: ' »)'),
|
||||||
|
(re.compile(r'"\.'), lambda match: ' ».'),
|
||||||
|
(re.compile(r'",'), lambda match: ' »,'),
|
||||||
|
(re.compile(r'"\?'), lambda match: ' »?'),
|
||||||
|
(re.compile(r'":'), lambda match: ' »:'),
|
||||||
|
(re.compile(r'";'), lambda match: ' »;'),
|
||||||
|
(re.compile(r'"\!'), lambda match: ' »!'),
|
||||||
|
(re.compile(r' :'), lambda match: ' :'),
|
||||||
|
(re.compile(r' ;'), lambda match: ' ;'),
|
||||||
|
(re.compile(r' \?'), lambda match: ' ?'),
|
||||||
|
(re.compile(r' \!'), lambda match: ' !'),
|
||||||
|
(re.compile(r'\s»'), lambda match: ' »'),
|
||||||
|
(re.compile(r'«\s'), lambda match: '« '),
|
||||||
|
(re.compile(r' %'), lambda match: ' %'),
|
||||||
|
(re.compile(r'\.jpg » border='), lambda match: '.jpg'),
|
||||||
|
(re.compile(r'\.png » border='), lambda match: '.png'),
|
||||||
|
]
|
||||||
|
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(name='div', attrs={'class':['contenu']})
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags_after = [dict(id='appel_temoignage')]
|
||||||
|
|
||||||
|
def get_article_url(self, article):
|
||||||
|
link = article.get('link')
|
||||||
|
if 'blog' not in link:
|
||||||
|
return link
|
||||||
|
|
||||||
|
|
||||||
# cover_url='http://abonnes.lemonde.fr/titresdumonde/'+date.today().strftime("%y%m%d")+'/1.jpg'
|
|
||||||
|
|
||||||
|
|
||||||
extra_css = '''
|
|
||||||
.dateline{color:#666666;font-family:verdana,sans-serif;font-size:x-small;}
|
|
||||||
.author{font-family:verdana,sans-serif;font-size:x-small;color:#222222;}
|
|
||||||
.articleImage{color:#666666;font-family:verdana,sans-serif;font-size:x-small;}
|
|
||||||
.mainText{font-family:Georgia,serif;color:#222222;}
|
|
||||||
.LM_articleText{font-family:Arial,Helvetica,sans-serif;}
|
|
||||||
.LM_titleZone{font-family:Arial,Helvetica,sans-serif;}
|
|
||||||
.mainContent{font-family:Georgia,serif;}
|
|
||||||
.LM_content{font-family:Georgia,serif;}
|
|
||||||
.LM_caption{font-family:Georgia,serif;font-size:-small;}
|
|
||||||
.LM_imageSource{font-family:Arial,Helvetica,sans-serif;font-size:x-small;color:#666666;}
|
|
||||||
h1{font-family:Arial,Helvetica,sans-serif;font-size:medium;color:#000000;}
|
|
||||||
.post{font-family:Arial,Helvetica,sans-serif;}
|
|
||||||
.mainTitle{font-family:Georgia,serif;}
|
|
||||||
.content{font-family:Georgia,serif;}
|
|
||||||
.entry{font-family:Georgia,serif;}
|
|
||||||
h2{font-family:Arial,Helvetica,sans-serif;font-size:large;}
|
|
||||||
small{font-family:Arial,Helvetica,sans-serif; color:#ED1B23;}
|
|
||||||
'''
|
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
('A la Une', 'http://www.lemonde.fr/rss/une.xml'),
|
('A la une', 'http://www.lemonde.fr/rss/une.xml'),
|
||||||
('International', 'http://www.lemonde.fr/rss/sequence/0,2-3210,1-0,0.xml'),
|
('International', 'http://www.lemonde.fr/rss/tag/international.xml'),
|
||||||
('Europe', 'http://www.lemonde.fr/rss/sequence/0,2-3214,1-0,0.xml'),
|
('Europe', 'http://www.lemonde.fr/rss/tag/europe.xml'),
|
||||||
('Societe', 'http://www.lemonde.fr/rss/sequence/0,2-3224,1-0,0.xml'),
|
(u'Société', 'http://www.lemonde.fr/rss/tag/societe.xml'),
|
||||||
('Economie', 'http://www.lemonde.fr/rss/sequence/0,2-3234,1-0,0.xml'),
|
('Economie', 'http://www.lemonde.fr/rss/tag/economie.xml'),
|
||||||
('Medias', 'http://www.lemonde.fr/rss/sequence/0,2-3236,1-0,0.xml'),
|
(u'Médias', 'http://www.lemonde.fr/rss/tag/actualite-medias.xml'),
|
||||||
('Rendez-vous', 'http://www.lemonde.fr/rss/sequence/0,2-3238,1-0,0.xml'),
|
(u'Planète', 'http://www.lemonde.fr/rss/tag/planete.xml'),
|
||||||
('Sports', 'http://www.lemonde.fr/rss/sequence/0,2-3242,1-0,0.xml'),
|
('Culture', 'http://www.lemonde.fr/rss/tag/culture.xml'),
|
||||||
('Planete', 'http://www.lemonde.fr/rss/sequence/0,2-3244,1-0,0.xml'),
|
('Technologies', 'http://www.lemonde.fr/rss/tag/technologies.xml'),
|
||||||
('Culture', 'http://www.lemonde.fr/rss/sequence/0,2-3246,1-0,0.xml'),
|
('Livres', 'http://www.lemonde.fr/rss/tag/livres.xml'),
|
||||||
('Technologies', 'http://www.lemonde.fr/rss/sequence/0,2-651865,1-0,0.xml'),
|
|
||||||
('Cinema', 'http://www.lemonde.fr/rss/sequence/0,2-3476,1-0,0.xml'),
|
|
||||||
('Voyages', 'http://www.lemonde.fr/rss/sequence/0,2-3546,1-0,0.xml'),
|
|
||||||
('Livres', 'http://www.lemonde.fr/rss/sequence/0,2-3260,1-0,0.xml'),
|
|
||||||
('Examens', 'http://www.lemonde.fr/rss/sequence/0,2-3404,1-0,0.xml'),
|
|
||||||
('Opinions', 'http://www.lemonde.fr/rss/sequence/0,2-3232,1-0,0.xml')
|
|
||||||
]
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'id':["mainTitle","mainContent","LM_content","content"]}),
|
|
||||||
dict(name='div', attrs={'class':["post"]})
|
|
||||||
]
|
|
||||||
|
|
||||||
remove_tags = [dict(name='img', attrs={'src':'http://medias.lemonde.fr/mmpub/img/lgo/lemondefr_pet.gif'}),
|
|
||||||
dict(name='div', attrs={'id':'xiti-logo-noscript'}),
|
|
||||||
dict(name='br', attrs={}),
|
|
||||||
dict(name='iframe', attrs={}),
|
|
||||||
dict(name='table', attrs={'id':["toolBox"]}),
|
|
||||||
dict(name='table', attrs={'class':["bottomToolBox"]}),
|
|
||||||
dict(name='div', attrs={'class':["pageNavigation","LM_pagination","fenetreBoxesContainer","breakingNews","LM_toolsBottom","LM_comments","LM_tools","pave_meme_sujet_hidden","boxMemeSujet"]}),
|
|
||||||
dict(name='div', attrs={'id':["miniUne","LM_sideBar"]}),
|
|
||||||
]
|
]
|
||||||
|
|
||||||
preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE|re.DOTALL), i[1]) for i in
|
|
||||||
[
|
|
||||||
(r'<html.*(<div class="post".*?>.*?</div>.*?<div class="entry">.*?</div>).*You can start editing here.*</html>', lambda match : '<html><body>'+match.group(1)+'</body></html>'),
|
|
||||||
(r'<p> </p>', lambda match : ''),
|
|
||||||
(r'<img src="http://medias\.lemonde\.fr/mmpub/img/let/(.)\.gif"[^>]*><div class=ar-txt>', lambda match : '<div class=ar-txt>'+match.group(1).upper()),
|
|
||||||
(r'<img src="http://medias\.lemonde\.fr/mmpub/img/let/q(.)\.gif"[^>]*><div class=ar-txt>', lambda match : '<div class=ar-txt>"'+match.group(1).upper()),
|
|
||||||
(r'(<div class=desc><b>.*</b></div>).*</body>', lambda match : match.group(1)),
|
|
||||||
]
|
|
||||||
]
|
|
||||||
|
|
||||||
article_match_regexps = [ (re.compile(i)) for i in
|
|
||||||
[
|
|
||||||
(r'http://www\.lemonde\.fr/\S+/article/.*'),
|
|
||||||
(r'http://www\.lemonde\.fr/\S+/portfolio/.*'),
|
|
||||||
(r'http://www\.lemonde\.fr/\S+/article_interactif/.*'),
|
|
||||||
(r'http://\S+\.blog\.lemonde\.fr/.*'),
|
|
||||||
]
|
|
||||||
]
|
|
||||||
|
|
||||||
# def print_version(self, url):
|
|
||||||
# return re.sub('http://www\.lemonde\.fr/.*_([0-9]+)_[0-9]+\.html.*','http://www.lemonde.fr/web/imprimer_element/0,40-0,50-\\1,0.html' ,url)
|
|
||||||
|
|
||||||
# Used to filter duplicated articles
|
|
||||||
articles_list = []
|
|
||||||
|
|
||||||
def get_cover_url(self):
|
def get_cover_url(self):
|
||||||
cover_url = None
|
cover_url = None
|
||||||
soup = self.index_to_soup('http://www.lemonde.fr/web/monde_pdf/0,33-0,1-0,0.html')
|
soup = self.index_to_soup('http://www.lemonde.fr/web/monde_pdf/0,33-0,1-0,0.html')
|
||||||
@ -111,42 +94,3 @@ class LeMonde(BasicNewsRecipe):
|
|||||||
cover_url = link_item.img['src']
|
cover_url = link_item.img['src']
|
||||||
|
|
||||||
return cover_url
|
return cover_url
|
||||||
|
|
||||||
def get_article_url(self, article):
|
|
||||||
url=article.get('link', None)
|
|
||||||
url=url[0:url.find("#")]
|
|
||||||
if url in self.articles_list:
|
|
||||||
self.log_debug(_('Skipping duplicated article: %s')%url)
|
|
||||||
return False
|
|
||||||
if self.is_article_wanted(url):
|
|
||||||
self.articles_list.append(url)
|
|
||||||
if '/portfolio/' in url or '/video/' in url:
|
|
||||||
url = None
|
|
||||||
return url
|
|
||||||
self.log_debug(_('Skipping filtered article: %s')%url)
|
|
||||||
url = article.get('guid', None)
|
|
||||||
|
|
||||||
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
def is_article_wanted(self, url):
|
|
||||||
if self.article_match_regexps:
|
|
||||||
for m in self.article_match_regexps:
|
|
||||||
if m.search(url):
|
|
||||||
return True
|
|
||||||
return False
|
|
||||||
return False
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
|
|
||||||
for item in soup.findAll(style=True):
|
|
||||||
del item['style']
|
|
||||||
|
|
||||||
for item in soup.findAll(face=True):
|
|
||||||
del item['face']
|
|
||||||
for tag in soup.findAll(name=['ul','li']):
|
|
||||||
tag.name = 'div'
|
|
||||||
|
|
||||||
return soup
|
|
||||||
|
|
||||||
|
@ -4,6 +4,7 @@ __copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
|
|||||||
www.mainichi.jp
|
www.mainichi.jp
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
import re
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class MainichiDailyNews(BasicNewsRecipe):
|
class MainichiDailyNews(BasicNewsRecipe):
|
||||||
@ -22,3 +23,18 @@ class MainichiDailyNews(BasicNewsRecipe):
|
|||||||
remove_tags = [{'class':"RelatedArticle"}]
|
remove_tags = [{'class':"RelatedArticle"}]
|
||||||
remove_tags_after = {'class':"Credit"}
|
remove_tags_after = {'class':"Credit"}
|
||||||
|
|
||||||
|
def parse_feeds(self):
|
||||||
|
|
||||||
|
feeds = BasicNewsRecipe.parse_feeds(self)
|
||||||
|
|
||||||
|
for curfeed in feeds:
|
||||||
|
delList = []
|
||||||
|
for a,curarticle in enumerate(curfeed.articles):
|
||||||
|
if re.search(r'pheedo.jp', curarticle.url):
|
||||||
|
delList.append(curarticle)
|
||||||
|
if len(delList)>0:
|
||||||
|
for d in delList:
|
||||||
|
index = curfeed.articles.index(d)
|
||||||
|
curfeed.articles[index:index+1] = []
|
||||||
|
|
||||||
|
return feeds
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
import re
|
||||||
|
|
||||||
class MainichiDailyITNews(BasicNewsRecipe):
|
class MainichiDailyITNews(BasicNewsRecipe):
|
||||||
title = u'\u6bce\u65e5\u65b0\u805e(IT&\u5bb6\u96fb)'
|
title = u'\u6bce\u65e5\u65b0\u805e(IT&\u5bb6\u96fb)'
|
||||||
@ -16,3 +17,18 @@ class MainichiDailyITNews(BasicNewsRecipe):
|
|||||||
remove_tags = [{'class':"RelatedArticle"}]
|
remove_tags = [{'class':"RelatedArticle"}]
|
||||||
remove_tags_after = {'class':"Credit"}
|
remove_tags_after = {'class':"Credit"}
|
||||||
|
|
||||||
|
def parse_feeds(self):
|
||||||
|
|
||||||
|
feeds = BasicNewsRecipe.parse_feeds(self)
|
||||||
|
|
||||||
|
for curfeed in feeds:
|
||||||
|
delList = []
|
||||||
|
for a,curarticle in enumerate(curfeed.articles):
|
||||||
|
if re.search(r'pheedo.jp', curarticle.url):
|
||||||
|
delList.append(curarticle)
|
||||||
|
if len(delList)>0:
|
||||||
|
for d in delList:
|
||||||
|
index = curfeed.articles.index(d)
|
||||||
|
curfeed.articles[index:index+1] = []
|
||||||
|
|
||||||
|
return feeds
|
||||||
|
@ -1,8 +1,9 @@
|
|||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2010, Eddie Lau'
|
__copyright__ = '2010, Eddie Lau'
|
||||||
'''
|
'''
|
||||||
modified from Singtao Toronto calibre recipe by rty
|
|
||||||
Change Log:
|
Change Log:
|
||||||
|
2010/12/07: add entertainment section, use newspaper front page as ebook cover, suppress date display in section list
|
||||||
|
(to avoid wrong date display in case the user generates the ebook in a time zone different from HKT)
|
||||||
2010/11/22: add English section, remove eco-news section which is not updated daily, correct
|
2010/11/22: add English section, remove eco-news section which is not updated daily, correct
|
||||||
ordering of articles
|
ordering of articles
|
||||||
2010/11/12: add news image and eco-news section
|
2010/11/12: add news image and eco-news section
|
||||||
@ -17,14 +18,15 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
|
|||||||
from contextlib import nested
|
from contextlib import nested
|
||||||
|
|
||||||
|
|
||||||
from calibre import __appname__, strftime
|
from calibre import __appname__
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||||
from calibre.ebooks.metadata.opf2 import OPFCreator
|
from calibre.ebooks.metadata.opf2 import OPFCreator
|
||||||
from calibre.ebooks.metadata.toc import TOC
|
from calibre.ebooks.metadata.toc import TOC
|
||||||
from calibre.ebooks.metadata import MetaInformation
|
from calibre.ebooks.metadata import MetaInformation
|
||||||
from calibre.utils.date import now as nowf
|
|
||||||
|
|
||||||
class MPHKRecipe(BasicNewsRecipe):
|
class MPHKRecipe(BasicNewsRecipe):
|
||||||
|
IsKindleUsed = True # to avoid generating periodical in which CJK characters can't be displayed in section/article view
|
||||||
|
|
||||||
title = 'Ming Pao - Hong Kong'
|
title = 'Ming Pao - Hong Kong'
|
||||||
oldest_article = 1
|
oldest_article = 1
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
@ -39,13 +41,13 @@ class MPHKRecipe(BasicNewsRecipe):
|
|||||||
encoding = 'Big5-HKSCS'
|
encoding = 'Big5-HKSCS'
|
||||||
recursions = 0
|
recursions = 0
|
||||||
conversion_options = {'linearize_tables':True}
|
conversion_options = {'linearize_tables':True}
|
||||||
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;}'
|
timefmt = ''
|
||||||
#extra_css = 'img {float:right; margin:4px;}'
|
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} font>b {font-size:200%; font-weight:bold;}'
|
||||||
masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif'
|
masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif'
|
||||||
keep_only_tags = [dict(name='h1'),
|
keep_only_tags = [dict(name='h1'),
|
||||||
#dict(name='font', attrs={'style':['font-size:14pt; line-height:160%;']}), # for entertainment page
|
dict(name='font', attrs={'style':['font-size:14pt; line-height:160%;']}), # for entertainment page title
|
||||||
dict(attrs={'class':['photo']}),
|
dict(attrs={'class':['photo']}),
|
||||||
dict(attrs={'id':['newscontent']}),
|
dict(attrs={'id':['newscontent']}), # entertainment page content
|
||||||
dict(attrs={'id':['newscontent01','newscontent02']})]
|
dict(attrs={'id':['newscontent01','newscontent02']})]
|
||||||
remove_tags = [dict(name='style'),
|
remove_tags = [dict(name='style'),
|
||||||
dict(attrs={'id':['newscontent135']})] # for the finance page
|
dict(attrs={'id':['newscontent135']})] # for the finance page
|
||||||
@ -55,51 +57,68 @@ class MPHKRecipe(BasicNewsRecipe):
|
|||||||
lambda match: '<h1>'),
|
lambda match: '<h1>'),
|
||||||
(re.compile(r'</h5>', re.DOTALL|re.IGNORECASE),
|
(re.compile(r'</h5>', re.DOTALL|re.IGNORECASE),
|
||||||
lambda match: '</h1>'),
|
lambda match: '</h1>'),
|
||||||
|
(re.compile(r'<p><a href=.+?</a></p>', re.DOTALL|re.IGNORECASE), # for entertainment page
|
||||||
|
lambda match: '')
|
||||||
]
|
]
|
||||||
|
|
||||||
def image_url_processor(cls, baseurl, url):
|
def image_url_processor(cls, baseurl, url):
|
||||||
# trick: break the url at the first occurance of digit, add an additional
|
# trick: break the url at the first occurance of digit, add an additional
|
||||||
# '_' at the front
|
# '_' at the front
|
||||||
# not working, may need to move this to preprocess_html() method
|
# not working, may need to move this to preprocess_html() method
|
||||||
#minIdx = 10000
|
# minIdx = 10000
|
||||||
#i0 = url.find('0')
|
# i0 = url.find('0')
|
||||||
#if i0 >= 0 and i0 < minIdx:
|
# if i0 >= 0 and i0 < minIdx:
|
||||||
# minIdx = i0
|
# minIdx = i0
|
||||||
#i1 = url.find('1')
|
# i1 = url.find('1')
|
||||||
#if i1 >= 0 and i1 < minIdx:
|
# if i1 >= 0 and i1 < minIdx:
|
||||||
# minIdx = i1
|
# minIdx = i1
|
||||||
#i2 = url.find('2')
|
# i2 = url.find('2')
|
||||||
#if i2 >= 0 and i2 < minIdx:
|
# if i2 >= 0 and i2 < minIdx:
|
||||||
# minIdx = i2
|
# minIdx = i2
|
||||||
#i3 = url.find('3')
|
# i3 = url.find('3')
|
||||||
#if i3 >= 0 and i0 < minIdx:
|
# if i3 >= 0 and i0 < minIdx:
|
||||||
# minIdx = i3
|
# minIdx = i3
|
||||||
#i4 = url.find('4')
|
# i4 = url.find('4')
|
||||||
#if i4 >= 0 and i4 < minIdx:
|
# if i4 >= 0 and i4 < minIdx:
|
||||||
# minIdx = i4
|
# minIdx = i4
|
||||||
#i5 = url.find('5')
|
# i5 = url.find('5')
|
||||||
#if i5 >= 0 and i5 < minIdx:
|
# if i5 >= 0 and i5 < minIdx:
|
||||||
# minIdx = i5
|
# minIdx = i5
|
||||||
#i6 = url.find('6')
|
# i6 = url.find('6')
|
||||||
#if i6 >= 0 and i6 < minIdx:
|
# if i6 >= 0 and i6 < minIdx:
|
||||||
# minIdx = i6
|
# minIdx = i6
|
||||||
#i7 = url.find('7')
|
# i7 = url.find('7')
|
||||||
#if i7 >= 0 and i7 < minIdx:
|
# if i7 >= 0 and i7 < minIdx:
|
||||||
# minIdx = i7
|
# minIdx = i7
|
||||||
#i8 = url.find('8')
|
# i8 = url.find('8')
|
||||||
#if i8 >= 0 and i8 < minIdx:
|
# if i8 >= 0 and i8 < minIdx:
|
||||||
# minIdx = i8
|
# minIdx = i8
|
||||||
#i9 = url.find('9')
|
# i9 = url.find('9')
|
||||||
#if i9 >= 0 and i9 < minIdx:
|
# if i9 >= 0 and i9 < minIdx:
|
||||||
# minIdx = i9
|
# minIdx = i9
|
||||||
#return url[0:minIdx] + '_' + url[minIdx+1:]
|
|
||||||
return url
|
return url
|
||||||
|
|
||||||
def get_fetchdate(self):
|
def get_dtlocal(self):
|
||||||
dt_utc = datetime.datetime.utcnow()
|
dt_utc = datetime.datetime.utcnow()
|
||||||
# convert UTC to local hk time - at around HKT 6.00am, all news are available
|
# convert UTC to local hk time - at around HKT 6.00am, all news are available
|
||||||
dt_local = dt_utc - datetime.timedelta(-2.0/24)
|
dt_local = dt_utc - datetime.timedelta(-2.0/24)
|
||||||
return dt_local.strftime("%Y%m%d")
|
return dt_local
|
||||||
|
|
||||||
|
def get_fetchdate(self):
|
||||||
|
return self.get_dtlocal().strftime("%Y%m%d")
|
||||||
|
|
||||||
|
def get_fetchday(self):
|
||||||
|
# convert UTC to local hk time - at around HKT 6.00am, all news are available
|
||||||
|
return self.get_dtlocal().strftime("%d")
|
||||||
|
|
||||||
|
def get_cover_url(self):
|
||||||
|
cover = 'http://news.mingpao.com/' + self.get_fetchdate() + '/' + self.get_fetchdate() + '_' + self.get_fetchday() + 'gacov.jpg'
|
||||||
|
br = BasicNewsRecipe.get_browser()
|
||||||
|
try:
|
||||||
|
br.open(cover)
|
||||||
|
except:
|
||||||
|
cover = None
|
||||||
|
return cover
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
feeds = []
|
feeds = []
|
||||||
@ -127,9 +146,9 @@ class MPHKRecipe(BasicNewsRecipe):
|
|||||||
# if eco_articles:
|
# if eco_articles:
|
||||||
# feeds.append((u'\u74b0\u4fdd Eco News', eco_articles))
|
# feeds.append((u'\u74b0\u4fdd Eco News', eco_articles))
|
||||||
# special - entertainment
|
# special - entertainment
|
||||||
#ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm')
|
ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm')
|
||||||
#if ent_articles:
|
if ent_articles:
|
||||||
# feeds.append(('Entertainment', ent_articles))
|
feeds.append((u'\u5f71\u8996 Entertainment', ent_articles))
|
||||||
return feeds
|
return feeds
|
||||||
|
|
||||||
def parse_section(self, url):
|
def parse_section(self, url):
|
||||||
@ -164,6 +183,7 @@ class MPHKRecipe(BasicNewsRecipe):
|
|||||||
return current_articles
|
return current_articles
|
||||||
|
|
||||||
def parse_eco_section(self, url):
|
def parse_eco_section(self, url):
|
||||||
|
dateStr = self.get_fetchdate()
|
||||||
soup = self.index_to_soup(url)
|
soup = self.index_to_soup(url)
|
||||||
divs = soup.findAll(attrs={'class': ['bullet']})
|
divs = soup.findAll(attrs={'class': ['bullet']})
|
||||||
current_articles = []
|
current_articles = []
|
||||||
@ -173,23 +193,25 @@ class MPHKRecipe(BasicNewsRecipe):
|
|||||||
title = self.tag_to_string(a)
|
title = self.tag_to_string(a)
|
||||||
url = a.get('href', False)
|
url = a.get('href', False)
|
||||||
url = 'http://tssl.mingpao.com/htm/marketing/eco/cfm/' +url
|
url = 'http://tssl.mingpao.com/htm/marketing/eco/cfm/' +url
|
||||||
if url not in included_urls and url.rfind('Redirect') == -1:
|
if url not in included_urls and url.rfind('Redirect') == -1 and not url.rfind('.txt') == -1 and not url.rfind(dateStr) == -1:
|
||||||
current_articles.append({'title': title, 'url': url, 'description':''})
|
current_articles.append({'title': title, 'url': url, 'description':''})
|
||||||
included_urls.append(url)
|
included_urls.append(url)
|
||||||
return current_articles
|
return current_articles
|
||||||
|
|
||||||
#def parse_ent_section(self, url):
|
def parse_ent_section(self, url):
|
||||||
# dateStr = self.get_fetchdate()
|
soup = self.index_to_soup(url)
|
||||||
# soup = self.index_to_soup(url)
|
a = soup.findAll('a', href=True)
|
||||||
# a = soup.findAll('a', href=True)
|
a.reverse()
|
||||||
# current_articles = []
|
current_articles = []
|
||||||
# included_urls = []
|
included_urls = []
|
||||||
# for i in a:
|
for i in a:
|
||||||
# title = self.tag_to_string(i)
|
title = self.tag_to_string(i)
|
||||||
# url = 'http://ol.mingpao.com/cfm/' + i.get('href', False)
|
url = 'http://ol.mingpao.com/cfm/' + i.get('href', False)
|
||||||
# if url not in included_urls and not url.rfind('.txt') == -1 and not url.rfind(dateStr) == -1 and not title == '':
|
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('star') == -1):
|
||||||
# current_articles.append({'title': title, 'url': url, 'description': ''})
|
current_articles.append({'title': title, 'url': url, 'description': ''})
|
||||||
# return current_articles
|
included_urls.append(url)
|
||||||
|
current_articles.reverse()
|
||||||
|
return current_articles
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
for item in soup.findAll(style=True):
|
for item in soup.findAll(style=True):
|
||||||
@ -201,21 +223,26 @@ class MPHKRecipe(BasicNewsRecipe):
|
|||||||
return soup
|
return soup
|
||||||
|
|
||||||
def create_opf(self, feeds, dir=None):
|
def create_opf(self, feeds, dir=None):
|
||||||
#super(MPHKRecipe,self).create_opf(feeds, dir)
|
if self.IsKindleUsed == False:
|
||||||
|
super(MPHKRecipe,self).create_opf(feeds, dir)
|
||||||
|
return
|
||||||
if dir is None:
|
if dir is None:
|
||||||
dir = self.output_dir
|
dir = self.output_dir
|
||||||
title = self.short_title()
|
title = self.short_title()
|
||||||
if self.output_profile.periodical_date_in_title:
|
title += ' ' + self.get_fetchdate()
|
||||||
title += strftime(self.timefmt)
|
#if self.output_profile.periodical_date_in_title:
|
||||||
|
# title += strftime(self.timefmt)
|
||||||
mi = MetaInformation(title, [__appname__])
|
mi = MetaInformation(title, [__appname__])
|
||||||
mi.publisher = __appname__
|
mi.publisher = __appname__
|
||||||
mi.author_sort = __appname__
|
mi.author_sort = __appname__
|
||||||
mi.publication_type = self.publication_type+':'+self.short_title()
|
mi.publication_type = self.publication_type+':'+self.short_title()
|
||||||
mi.timestamp = nowf()
|
#mi.timestamp = nowf()
|
||||||
|
mi.timestamp = self.get_dtlocal()
|
||||||
mi.comments = self.description
|
mi.comments = self.description
|
||||||
if not isinstance(mi.comments, unicode):
|
if not isinstance(mi.comments, unicode):
|
||||||
mi.comments = mi.comments.decode('utf-8', 'replace')
|
mi.comments = mi.comments.decode('utf-8', 'replace')
|
||||||
mi.pubdate = nowf()
|
#mi.pubdate = nowf()
|
||||||
|
mi.pubdate = self.get_dtlocal()
|
||||||
opf_path = os.path.join(dir, 'index.opf')
|
opf_path = os.path.join(dir, 'index.opf')
|
||||||
ncx_path = os.path.join(dir, 'index.ncx')
|
ncx_path = os.path.join(dir, 'index.ncx')
|
||||||
opf = OPFCreator(dir, mi)
|
opf = OPFCreator(dir, mi)
|
||||||
|
11
resources/recipes/nacionred.recipe
Normal file
11
resources/recipes/nacionred.recipe
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class AdvancedUserRecipe1291022049(BasicNewsRecipe):
|
||||||
|
title = u'NacionRed.com'
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
language = 'es'
|
||||||
|
__author__ = 'Arturo Martinez Nieves'
|
||||||
|
|
||||||
|
feeds = [(u'NacionRed.com', u'http://feeds.weblogssl.com/nacionred?format=xml')]
|
||||||
|
|
@ -22,8 +22,19 @@ class NewYorker(BasicNewsRecipe):
|
|||||||
masthead_url = 'http://www.newyorker.com/css/i/hed/logo.gif'
|
masthead_url = 'http://www.newyorker.com/css/i/hed/logo.gif'
|
||||||
extra_css = """
|
extra_css = """
|
||||||
body {font-family: "Times New Roman",Times,serif}
|
body {font-family: "Times New Roman",Times,serif}
|
||||||
.articleauthor{color: #9F9F9F; font-family: Arial, sans-serif; font-size: small; text-transform: uppercase}
|
.articleauthor{color: #9F9F9F;
|
||||||
.rubric{color: #CD0021; font-family: Arial, sans-serif; font-size: small; text-transform: uppercase}
|
font-family: Arial, sans-serif;
|
||||||
|
font-size: small;
|
||||||
|
text-transform: uppercase}
|
||||||
|
.rubric,.dd,h6#credit{color: #CD0021;
|
||||||
|
font-family: Arial, sans-serif;
|
||||||
|
font-size: small;
|
||||||
|
text-transform: uppercase}
|
||||||
|
.descender:first-letter{display: inline; font-size: xx-large; font-weight: bold}
|
||||||
|
.dd,h6#credit{color: gray}
|
||||||
|
.c{display: block}
|
||||||
|
.caption,h2#articleintro{font-style: italic}
|
||||||
|
.caption{font-size: small}
|
||||||
"""
|
"""
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
@ -39,7 +50,7 @@ class NewYorker(BasicNewsRecipe):
|
|||||||
]
|
]
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name=['meta','iframe','base','link','embed','object'])
|
dict(name=['meta','iframe','base','link','embed','object'])
|
||||||
,dict(attrs={'class':['utils','articleRailLinks','icons'] })
|
,dict(attrs={'class':['utils','socialUtils','articleRailLinks','icons'] })
|
||||||
,dict(attrs={'id':['show-header','show-footer'] })
|
,dict(attrs={'id':['show-header','show-footer'] })
|
||||||
]
|
]
|
||||||
remove_attributes = ['lang']
|
remove_attributes = ['lang']
|
||||||
@ -59,3 +70,13 @@ class NewYorker(BasicNewsRecipe):
|
|||||||
cover_url = 'http://www.newyorker.com' + cover_item['src'].strip()
|
cover_url = 'http://www.newyorker.com' + cover_item['src'].strip()
|
||||||
return cover_url
|
return cover_url
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
auth = soup.find(attrs={'id':'articleauthor'})
|
||||||
|
if auth:
|
||||||
|
alink = auth.find('a')
|
||||||
|
if alink and alink.string is not None:
|
||||||
|
txt = alink.string
|
||||||
|
alink.replaceWith(txt)
|
||||||
|
return soup
|
||||||
|
@ -1,19 +1,22 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2010, Mateusz Kielar, matek09@gmail.com'
|
__copyright__ = '2010, matek09, matek09@gmail.com'
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class Newsweek(BasicNewsRecipe):
|
class Newsweek(BasicNewsRecipe):
|
||||||
EDITION = 0
|
FIND_LAST_FULL_ISSUE = True
|
||||||
|
EDITION = '0'
|
||||||
|
EXCLUDE_LOCKED = True
|
||||||
|
LOCKED_ICO = 'http://www.newsweek.pl/bins/media/static/newsweek/img/ico_locked.gif'
|
||||||
|
|
||||||
title = u'Newsweek Polska'
|
title = u'Newsweek Polska'
|
||||||
__author__ = 'Mateusz Kielar'
|
__author__ = 'matek09'
|
||||||
description = 'Weekly magazine'
|
description = 'Weekly magazine'
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
language = 'en'
|
language = 'pl'
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
|
|
||||||
keep_only_tags =[]
|
keep_only_tags =[]
|
||||||
@ -33,24 +36,42 @@ class Newsweek(BasicNewsRecipe):
|
|||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
return url.replace("http://www.newsweek.pl/artykuly/wydanie/" + str(self.EDITION), "http://www.newsweek.pl/artykuly") + '/print'
|
return url.replace("http://www.newsweek.pl/artykuly/wydanie/" + str(self.EDITION), "http://www.newsweek.pl/artykuly") + '/print'
|
||||||
|
|
||||||
|
def is_locked(self, a):
|
||||||
|
if a.findNext('img')['src'] == 'http://www.newsweek.pl/bins/media/static/newsweek/img/ico_locked.gif':
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
return False
|
||||||
|
|
||||||
|
def is_full(self, issue_soup):
|
||||||
|
if len(issue_soup.findAll('img', attrs={'src' : 'http://www.newsweek.pl/bins/media/static/newsweek/img/ico_locked.gif'})) > 1:
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
return True
|
||||||
|
|
||||||
def find_last_full_issue(self):
|
def find_last_full_issue(self):
|
||||||
page = self.index_to_soup('http://www.newsweek.pl/Frames/IssueCover.aspx')
|
frame_url = 'http://www.newsweek.pl/Frames/IssueCover.aspx'
|
||||||
issue = 'http://www.newsweek.pl/Frames/' + page.find(lambda tag: tag.name == 'span' and not tag.attrs).a['href']
|
while True:
|
||||||
page = self.index_to_soup(issue)
|
frame_soup = self.index_to_soup(frame_url)
|
||||||
issue = 'http://www.newsweek.pl/Frames/' + page.find(lambda tag: tag.name == 'span' and not tag.attrs).a['href']
|
self.EDITION = frame_soup.find('a', attrs={'target' : '_parent'})['href'].replace('/wydania/','')
|
||||||
page = self.index_to_soup(issue)
|
issue_soup = self.index_to_soup('http://www.newsweek.pl/wydania/' + self.EDITION)
|
||||||
self.EDITION = page.find('a', attrs={'target' : '_parent'})['href'].replace('/wydania/','')
|
if self.is_full(issue_soup):
|
||||||
|
break
|
||||||
|
frame_url = 'http://www.newsweek.pl/Frames/' + frame_soup.find(lambda tag: tag.name == 'span' and not tag.attrs).a['href']
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
|
if self.FIND_LAST_FULL_ISSUE:
|
||||||
self.find_last_full_issue()
|
self.find_last_full_issue()
|
||||||
soup = self.index_to_soup('http://www.newsweek.pl/wydania/' + str(self.EDITION))
|
soup = self.index_to_soup('http://www.newsweek.pl/wydania/' + self.EDITION)
|
||||||
img = soup.find('img', id="ctl00_C1_PaperIsssueView_IssueImage", src=True)
|
img = soup.find('img', id="ctl00_C1_PaperIsssueView_IssueImage", src=True)
|
||||||
self.cover_url = img['src']
|
self.cover_url = img['src']
|
||||||
feeds = []
|
feeds = []
|
||||||
parent = soup.find(id='content-left-big')
|
parent = soup.find(id='content-left-big')
|
||||||
for txt in parent.findAll(attrs={'class':'txt_normal_red strong'}):
|
for txt in parent.findAll(attrs={'class':'txt_normal_red strong'}):
|
||||||
section = self.tag_to_string(txt).capitalize()
|
|
||||||
articles = list(self.find_articles(txt))
|
articles = list(self.find_articles(txt))
|
||||||
|
if len(articles) > 0:
|
||||||
|
section = self.tag_to_string(txt).capitalize()
|
||||||
feeds.append((section, articles))
|
feeds.append((section, articles))
|
||||||
return feeds
|
return feeds
|
||||||
|
|
||||||
@ -58,9 +79,11 @@ class Newsweek(BasicNewsRecipe):
|
|||||||
for a in txt.findAllNext( attrs={'class':['strong','hr']}):
|
for a in txt.findAllNext( attrs={'class':['strong','hr']}):
|
||||||
if a.name in "div":
|
if a.name in "div":
|
||||||
break
|
break
|
||||||
|
if (not self.FIND_LAST_FULL_ISSUE) & self.EXCLUDE_LOCKED & self.is_locked(a):
|
||||||
|
continue
|
||||||
yield {
|
yield {
|
||||||
'title' : self.tag_to_string(a),
|
'title' : self.tag_to_string(a),
|
||||||
'url' : 'http://www.newsweek.pl'+a['href'],
|
'url' : 'http://www.newsweek.pl' + a['href'],
|
||||||
'date' : '',
|
'date' : '',
|
||||||
'description' : ''
|
'description' : ''
|
||||||
}
|
}
|
||||||
|
@ -32,12 +32,9 @@ class NikkeiNet_sub_life(BasicNewsRecipe):
|
|||||||
remove_tags_after = {'class':"cmn-pr_list"}
|
remove_tags_after = {'class':"cmn-pr_list"}
|
||||||
|
|
||||||
feeds = [ (u'\u304f\u3089\u3057', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kurashi'),
|
feeds = [ (u'\u304f\u3089\u3057', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kurashi'),
|
||||||
(u'\u30b9\u30dd\u30fc\u30c4', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=sports'),
|
|
||||||
(u'\u793e\u4f1a', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=shakai'),
|
|
||||||
(u'\u30a8\u30b3', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=eco'),
|
(u'\u30a8\u30b3', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=eco'),
|
||||||
(u'\u5065\u5eb7', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kenkou'),
|
(u'\u5065\u5eb7', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kenkou'),
|
||||||
(u'\u7279\u96c6', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=special'),
|
(u'\u7279\u96c6', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=special')
|
||||||
(u'\u30e9\u30f3\u30ad\u30f3\u30b0', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=ranking')
|
|
||||||
]
|
]
|
||||||
|
|
||||||
def get_browser(self):
|
def get_browser(self):
|
||||||
|
102
resources/recipes/nikkei_sub_shakai.recipe
Normal file
102
resources/recipes/nikkei_sub_shakai.recipe
Normal file
@ -0,0 +1,102 @@
|
|||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
|
||||||
|
'''
|
||||||
|
www.nikkei.com
|
||||||
|
'''
|
||||||
|
|
||||||
|
import re
|
||||||
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
import mechanize
|
||||||
|
from calibre.ptempfile import PersistentTemporaryFile
|
||||||
|
|
||||||
|
|
||||||
|
class NikkeiNet_sub_life(BasicNewsRecipe):
|
||||||
|
title = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248(\u751f\u6d3b)'
|
||||||
|
__author__ = 'Hiroshi Miura'
|
||||||
|
description = 'News and current market affairs from Japan'
|
||||||
|
cover_url = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
|
||||||
|
masthead_url = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
|
||||||
|
needs_subscription = True
|
||||||
|
oldest_article = 2
|
||||||
|
max_articles_per_feed = 20
|
||||||
|
language = 'ja'
|
||||||
|
remove_javascript = False
|
||||||
|
temp_files = []
|
||||||
|
|
||||||
|
remove_tags_before = {'class':"cmn-section cmn-indent"}
|
||||||
|
remove_tags = [
|
||||||
|
{'class':"JSID_basePageMove JSID_baseAsyncSubmit cmn-form_area JSID_optForm_utoken"},
|
||||||
|
{'class':"cmn-article_keyword cmn-clearfix"},
|
||||||
|
{'class':"cmn-print_headline cmn-clearfix"},
|
||||||
|
]
|
||||||
|
remove_tags_after = {'class':"cmn-pr_list"}
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'\u793e\u4f1a', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=shakai')
|
||||||
|
]
|
||||||
|
|
||||||
|
def get_browser(self):
|
||||||
|
br = BasicNewsRecipe.get_browser()
|
||||||
|
|
||||||
|
cj = mechanize.LWPCookieJar()
|
||||||
|
br.set_cookiejar(cj)
|
||||||
|
|
||||||
|
#br.set_debug_http(True)
|
||||||
|
#br.set_debug_redirects(True)
|
||||||
|
#br.set_debug_responses(True)
|
||||||
|
|
||||||
|
if self.username is not None and self.password is not None:
|
||||||
|
#print "----------------------------get login form--------------------------------------------"
|
||||||
|
# open login form
|
||||||
|
br.open('https://id.nikkei.com/lounge/nl/base/LA0010.seam')
|
||||||
|
response = br.response()
|
||||||
|
#print "----------------------------get login form---------------------------------------------"
|
||||||
|
#print "----------------------------set login form---------------------------------------------"
|
||||||
|
# remove disabled input which brings error on mechanize
|
||||||
|
response.set_data(response.get_data().replace("<input id=\"j_id48\"", "<!-- "))
|
||||||
|
response.set_data(response.get_data().replace("gm_home_on.gif\" />", " -->"))
|
||||||
|
br.set_response(response)
|
||||||
|
br.select_form(name='LA0010Form01')
|
||||||
|
br['LA0010Form01:LA0010Email'] = self.username
|
||||||
|
br['LA0010Form01:LA0010Password'] = self.password
|
||||||
|
br.form.find_control(id='LA0010Form01:LA0010AutoLoginOn',type="checkbox").get(nr=0).selected = True
|
||||||
|
br.submit()
|
||||||
|
br.response()
|
||||||
|
#print "----------------------------send login form---------------------------------------------"
|
||||||
|
#print "----------------------------open news main page-----------------------------------------"
|
||||||
|
# open news site
|
||||||
|
br.open('http://www.nikkei.com/')
|
||||||
|
br.response()
|
||||||
|
#print "----------------------------www.nikkei.com BODY --------------------------------------"
|
||||||
|
#print response2.get_data()
|
||||||
|
#print "-------------------------^^-got auto redirect form----^^--------------------------------"
|
||||||
|
# forced redirect in default
|
||||||
|
br.select_form(nr=0)
|
||||||
|
br.submit()
|
||||||
|
response3 = br.response()
|
||||||
|
# return some cookie which should be set by Javascript
|
||||||
|
#print response3.geturl()
|
||||||
|
raw = response3.get_data()
|
||||||
|
#print "---------------------------response to form --------------------------------------------"
|
||||||
|
# grab cookie from JS and set it
|
||||||
|
redirectflag = re.search(r"var checkValue = '(\d+)';", raw, re.M).group(1)
|
||||||
|
br.select_form(nr=0)
|
||||||
|
|
||||||
|
self.temp_files.append(PersistentTemporaryFile('_fa.html'))
|
||||||
|
self.temp_files[-1].write("#LWP-Cookies-2.0\n")
|
||||||
|
|
||||||
|
self.temp_files[-1].write("Set-Cookie3: Cookie-dummy=Cookie-value; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
|
||||||
|
self.temp_files[-1].write("Set-Cookie3: redirectFlag="+redirectflag+"; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
|
||||||
|
self.temp_files[-1].close()
|
||||||
|
cj.load(self.temp_files[-1].name)
|
||||||
|
|
||||||
|
br.submit()
|
||||||
|
|
||||||
|
#br.set_debug_http(False)
|
||||||
|
#br.set_debug_redirects(False)
|
||||||
|
#br.set_debug_responses(False)
|
||||||
|
return br
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -8,12 +8,15 @@ www.nin.co.rs
|
|||||||
import re
|
import re
|
||||||
from calibre import strftime
|
from calibre import strftime
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
from contextlib import closing
|
||||||
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||||
|
from calibre import entity_to_unicode
|
||||||
|
|
||||||
class Nin(BasicNewsRecipe):
|
class Nin(BasicNewsRecipe):
|
||||||
title = 'NIN online'
|
title = 'NIN online'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'Nedeljne Informativne Novine'
|
description = 'Nedeljne Informativne Novine'
|
||||||
publisher = 'NIN d.o.o.'
|
publisher = 'NIN d.o.o. - Ringier d.o.o.'
|
||||||
category = 'news, politics, Serbia'
|
category = 'news, politics, Serbia'
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
delay = 1
|
delay = 1
|
||||||
@ -26,18 +29,29 @@ class Nin(BasicNewsRecipe):
|
|||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
language = 'sr'
|
language = 'sr'
|
||||||
publication_type = 'magazine'
|
publication_type = 'magazine'
|
||||||
extra_css = ' @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: Verdana, Lucida, sans1, sans-serif} .article_description{font-family: Verdana, Lucida, sans1, sans-serif} .artTitle{font-size: x-large; font-weight: bold; color: #900} .izjava{font-size: x-large; font-weight: bold} .columnhead{font-size: small; font-weight: bold;} img{margin-top:0.5em; margin-bottom: 0.7em} b{margin-top: 1em} '
|
extra_css = """
|
||||||
|
@font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
|
||||||
|
body{font-family: Verdana, Lucida, sans1, sans-serif}
|
||||||
|
.article_description{font-family: Verdana, Lucida, sans1, sans-serif}
|
||||||
|
.artTitle{font-size: x-large; font-weight: bold; color: #900}
|
||||||
|
.izjava{font-size: x-large; font-weight: bold}
|
||||||
|
.columnhead{font-size: small; font-weight: bold;}
|
||||||
|
img{margin-top:0.5em; margin-bottom: 0.7em; display: block}
|
||||||
|
b{margin-top: 1em}
|
||||||
|
"""
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comment' : description
|
'comment' : description
|
||||||
, 'tags' : category
|
, 'tags' : category
|
||||||
, 'publisher' : publisher
|
, 'publisher' : publisher
|
||||||
, 'language' : language
|
, 'language' : language
|
||||||
, 'linearize_tables' : True
|
|
||||||
}
|
}
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
preprocess_regexps = [
|
||||||
remove_attributes = ['height','width']
|
(re.compile(r'</body>.*?<html>', re.DOTALL|re.IGNORECASE),lambda match: '</body>')
|
||||||
|
,(re.compile(r'</html>.*?</html>', re.DOTALL|re.IGNORECASE),lambda match: '</html>')
|
||||||
|
,(re.compile(u'\u0110'), lambda match: u'\u00D0')
|
||||||
|
]
|
||||||
|
|
||||||
def get_browser(self):
|
def get_browser(self):
|
||||||
br = BasicNewsRecipe.get_browser()
|
br = BasicNewsRecipe.get_browser()
|
||||||
@ -50,7 +64,10 @@ class Nin(BasicNewsRecipe):
|
|||||||
return br
|
return br
|
||||||
|
|
||||||
keep_only_tags =[dict(name='td', attrs={'width':'520'})]
|
keep_only_tags =[dict(name='td', attrs={'width':'520'})]
|
||||||
|
remove_tags_before =dict(name='span', attrs={'class':'izjava'})
|
||||||
remove_tags_after =dict(name='html')
|
remove_tags_after =dict(name='html')
|
||||||
|
remove_tags = [dict(name=['object','link','iframe','meta','base'])]
|
||||||
|
remove_attributes=['border','background','height','width','align','valign']
|
||||||
|
|
||||||
def get_cover_url(self):
|
def get_cover_url(self):
|
||||||
cover_url = None
|
cover_url = None
|
||||||
@ -63,7 +80,7 @@ class Nin(BasicNewsRecipe):
|
|||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
articles = []
|
articles = []
|
||||||
count = 0
|
count = 0
|
||||||
soup = self.index_to_soup(self.PREFIX)
|
soup = self.index_to_soup(self.INDEX)
|
||||||
for item in soup.findAll('a',attrs={'class':'lmeninavFont'}):
|
for item in soup.findAll('a',attrs={'class':'lmeninavFont'}):
|
||||||
count = count +1
|
count = count +1
|
||||||
if self.test and count > 2:
|
if self.test and count > 2:
|
||||||
@ -90,3 +107,45 @@ class Nin(BasicNewsRecipe):
|
|||||||
articles.append((section,inarts))
|
articles.append((section,inarts))
|
||||||
return articles
|
return articles
|
||||||
|
|
||||||
|
def index_to_soup(self, url_or_raw, raw=False):
|
||||||
|
if re.match(r'\w+://', url_or_raw):
|
||||||
|
open_func = getattr(self.browser, 'open_novisit', self.browser.open)
|
||||||
|
with closing(open_func(url_or_raw)) as f:
|
||||||
|
_raw = f.read()
|
||||||
|
if not _raw:
|
||||||
|
raise RuntimeError('Could not fetch index from %s'%url_or_raw)
|
||||||
|
else:
|
||||||
|
_raw = url_or_raw
|
||||||
|
if raw:
|
||||||
|
return _raw
|
||||||
|
if not isinstance(_raw, unicode) and self.encoding:
|
||||||
|
if callable(self.encoding):
|
||||||
|
_raw = self.encoding(_raw)
|
||||||
|
else:
|
||||||
|
_raw = _raw.decode(self.encoding, 'replace')
|
||||||
|
massage = list(BeautifulSoup.MARKUP_MASSAGE)
|
||||||
|
enc = 'cp1252' if callable(self.encoding) or self.encoding is None else self.encoding
|
||||||
|
massage.append((re.compile(r'&(\S+?);'), lambda match:
|
||||||
|
entity_to_unicode(match, encoding=enc)))
|
||||||
|
massage.append((re.compile(r'[\x00-\x08]+'), lambda match:
|
||||||
|
''))
|
||||||
|
return BeautifulSoup(_raw, markupMassage=massage)
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
for item in soup.findAll('div'):
|
||||||
|
if len(item.contents) == 0:
|
||||||
|
item.extract()
|
||||||
|
for item in soup.findAll(['td','tr']):
|
||||||
|
item.name='div'
|
||||||
|
for item in soup.findAll('img'):
|
||||||
|
if not item.has_key('alt'):
|
||||||
|
item['alt'] = 'image'
|
||||||
|
for tbl in soup.findAll('table'):
|
||||||
|
img = tbl.find('img')
|
||||||
|
if img:
|
||||||
|
img.extract()
|
||||||
|
tbl.replaceWith(img)
|
||||||
|
return soup
|
||||||
|
|
||||||
|
@ -282,9 +282,9 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
br = BasicNewsRecipe.get_browser()
|
br = BasicNewsRecipe.get_browser()
|
||||||
if self.username is not None and self.password is not None:
|
if self.username is not None and self.password is not None:
|
||||||
br.open('http://www.nytimes.com/auth/login')
|
br.open('http://www.nytimes.com/auth/login')
|
||||||
br.select_form(name='login')
|
br.form = br.forms().next()
|
||||||
br['USERID'] = self.username
|
br['userid'] = self.username
|
||||||
br['PASSWORD'] = self.password
|
br['password'] = self.password
|
||||||
raw = br.submit().read()
|
raw = br.submit().read()
|
||||||
if 'Please try again' in raw:
|
if 'Please try again' in raw:
|
||||||
raise Exception('Your username and password are incorrect')
|
raise Exception('Your username and password are incorrect')
|
||||||
|
@ -282,9 +282,9 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
br = BasicNewsRecipe.get_browser()
|
br = BasicNewsRecipe.get_browser()
|
||||||
if self.username is not None and self.password is not None:
|
if self.username is not None and self.password is not None:
|
||||||
br.open('http://www.nytimes.com/auth/login')
|
br.open('http://www.nytimes.com/auth/login')
|
||||||
br.select_form(name='login')
|
br.form = br.forms().next()
|
||||||
br['USERID'] = self.username
|
br['userid'] = self.username
|
||||||
br['PASSWORD'] = self.password
|
br['password'] = self.password
|
||||||
raw = br.submit().read()
|
raw = br.submit().read()
|
||||||
if 'Please try again' in raw:
|
if 'Please try again' in raw:
|
||||||
raise Exception('Your username and password are incorrect')
|
raise Exception('Your username and password are incorrect')
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
|
||||||
'''
|
'''
|
||||||
www.nzz.ch
|
www.nzz.ch
|
||||||
@ -20,6 +20,19 @@ class Nzz(BasicNewsRecipe):
|
|||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
language = 'de'
|
language = 'de'
|
||||||
|
extra_css = """
|
||||||
|
body{font-family: Georgia,"Times New Roman",Times,serif }
|
||||||
|
.artikel h3,.artikel h4,.bildLegende,.question,.autor{font-family: Arial,Verdana,Helvetica,sans-serif}
|
||||||
|
.bildLegende{font-size: small}
|
||||||
|
.autor{font-size: 0.9375em; color: #666666}
|
||||||
|
.quote{font-size: large !important;
|
||||||
|
font-style: italic;
|
||||||
|
font-weight: normal !important;
|
||||||
|
border-bottom: 1px dotted #BFBFBF;
|
||||||
|
border-top: 1px dotted #BFBFBF;
|
||||||
|
line-height: 1.25em}
|
||||||
|
.quelle{color: #666666; font-style: italic; white-space: nowrap}
|
||||||
|
"""
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comments' : description
|
'comments' : description
|
||||||
@ -28,12 +41,14 @@ class Nzz(BasicNewsRecipe):
|
|||||||
,'publisher' : publisher
|
,'publisher' : publisher
|
||||||
}
|
}
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'class':'article'})]
|
keep_only_tags = [dict(name='div', attrs={'class':'zone'})]
|
||||||
|
remove_tags_before = dict(name='p', attrs={'class':'dachzeile'})
|
||||||
|
remove_tags_after=dict(name='p', attrs={'class':'fussnote'})
|
||||||
|
remove_attributes=['width','height','lang']
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name=['object','link','base'])
|
dict(name=['object','link','base','meta','iframe'])
|
||||||
,dict(name='div',attrs={'class':['more','teaser','advXertXoriXals','legal']})
|
,dict(attrs={'id':'content_rectangle_1'})
|
||||||
,dict(name='div',attrs={'id':['popup-src','readercomments','google-ad','advXertXoriXals']})
|
,dict(attrs={'class':['weiterfuehrendeLinks','fussnote','video']})
|
||||||
]
|
]
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
@ -50,7 +65,7 @@ class Nzz(BasicNewsRecipe):
|
|||||||
,(u'Reisen' , u'http://www.nzz.ch/magazin/reisen?rss=true')
|
,(u'Reisen' , u'http://www.nzz.ch/magazin/reisen?rss=true')
|
||||||
]
|
]
|
||||||
|
|
||||||
def print_version(self, url):
|
def preprocess_html(self, soup):
|
||||||
return url + '?printview=true'
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
return self.adeify_images(soup)
|
||||||
|
@ -1,18 +1,18 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2010, Mateusz Kielar, matek09@gmail.com'
|
__copyright__ = '2010, matek09, matek09@gmail.com'
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class Polityka(BasicNewsRecipe):
|
class Polityka(BasicNewsRecipe):
|
||||||
|
|
||||||
title = u'Polityka'
|
title = u'Polityka'
|
||||||
__author__ = 'Mateusz Kielar'
|
__author__ = 'matek09'
|
||||||
description = 'Weekly magazine. Last archive issue'
|
description = 'Weekly magazine. Last archive issue'
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
language = 'en'
|
language = 'pl'
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
|
|
||||||
remove_tags_before = dict(dict(name = 'h2', attrs = {'class' : 'box_nag'}))
|
remove_tags_before = dict(dict(name = 'h2', attrs = {'class' : 'box_nag'}))
|
||||||
@ -48,7 +48,6 @@ class Polityka(BasicNewsRecipe):
|
|||||||
for div in box.findAll('div', attrs={'class': 'list_tresc'}):
|
for div in box.findAll('div', attrs={'class': 'list_tresc'}):
|
||||||
article_page = self.index_to_soup('http://archiwum.polityka.pl' + div.a['href'],)
|
article_page = self.index_to_soup('http://archiwum.polityka.pl' + div.a['href'],)
|
||||||
section = self.tag_to_string(article_page.find('h2', attrs = {'class' : 'box_nag'})).split('/')[0].lstrip().rstrip()
|
section = self.tag_to_string(article_page.find('h2', attrs = {'class' : 'box_nag'})).split('/')[0].lstrip().rstrip()
|
||||||
print section
|
|
||||||
if not articles.has_key(section):
|
if not articles.has_key(section):
|
||||||
articles[section] = []
|
articles[section] = []
|
||||||
articles[section].append( {
|
articles[section].append( {
|
||||||
|
19
resources/recipes/poughkeepsie_journal.recipe
Normal file
19
resources/recipes/poughkeepsie_journal.recipe
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class AdvancedUserRecipe1291143841(BasicNewsRecipe):
|
||||||
|
title = u'Poughkeepsipe Journal'
|
||||||
|
language = 'en'
|
||||||
|
__author__ = 'weebl'
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
timefmt = ' [%a, %d %b, %Y]'
|
||||||
|
feeds = [(u'Local News', u'http://poughkeepsiejournal.com/apps/pbcs.dll/oversikt?Category=RSS01&mime=xml'),
|
||||||
|
(u'Local Business', u'http://poughkeepsiejournal.com/apps/pbcs.dll/oversikt?Category=RSS02&mime=xml'),
|
||||||
|
(u'Local Sports', u'http://poughkeepsiejournal.com/apps/pbcs.dll/oversikt?Category=RSS03&mime=xml'),
|
||||||
|
(u'Life', u'http://poughkeepsiejournal.com/apps/pbcs.dll/oversikt?Category=RSS04&mime=xml')]
|
||||||
|
remove_tags = [dict(name='img', attrs={'src':'/graphics/mastlogo.gif'})]
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
return url.replace('http://www.poughkeepsiejournal.com', 'http://www.poughkeepsiejournal.com/print')
|
||||||
|
|
70
resources/recipes/st_louis_post_dispatch.recipe
Normal file
70
resources/recipes/st_louis_post_dispatch.recipe
Normal file
@ -0,0 +1,70 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class AdvancedUserRecipe1282093204(BasicNewsRecipe):
|
||||||
|
title = u'St Louis Post-Dispatch'
|
||||||
|
__author__ = 'cisaak'
|
||||||
|
language = 'en'
|
||||||
|
|
||||||
|
oldest_article = 1
|
||||||
|
max_articles_per_feed = 15
|
||||||
|
masthead_url = 'http://farm5.static.flickr.com/4118/4929686950_0e22e2c88a.jpg'
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'News-Bill McClellan', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=news%2Flocal%2fcolumns%2Fbill-mclellan&f=rss&t=article'),
|
||||||
|
(u'News-Columns', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=news%2Flocal%2Fcolumns*&l=50&f=rss&t=article'),
|
||||||
|
(u'News-Crime & Courtshttp://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=news%2Flocal%2Fcrime-and-courts&l=50&f=rss&t=article'),
|
||||||
|
(u'News-Deb Peterson', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=news%2Flocal%2fcolumns%2Fdeb-peterson&f=rss&t=article'),
|
||||||
|
(u'News-Education', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=news%2Flocal%2feducation&f=rss&t=article'),
|
||||||
|
(u'News-Government & Politics', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=news%2Flocal%2fgovt-and-politics&f=rss&t=article'),
|
||||||
|
(u'News-Local', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=news%2Flocal&f=rss&t=article'),
|
||||||
|
(u'News-Metro', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=news%2Flocal%2fmetro&f=rss&t=article'),
|
||||||
|
(u'News-Metro East', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=news%2Flocal%2fillinois&f=rss&t=article'),
|
||||||
|
(u'News-Missouri Out State', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=news%2Fstate-and-regional%2FMissouri&l=50&f=rss&t=article'),
|
||||||
|
(u'Opinion-Colleen Carroll Campbell', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=news%2Fopinion%2fcolumns%2Fcolleen-carroll-campbell&f=rss&t=article'),
|
||||||
|
(u'Opinion-Editorial', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=news%2Fopinion%2feditorial&f=rss&t=article'),
|
||||||
|
(u'Opinion-Kevin Horrigan', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=news%2Fopinion%2fcolumns%2Fkevin-horrigan&f=rss&t=article'),
|
||||||
|
(u'Opinion-Mailbag', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=news%2Fopinion%2fmailbag&f=rss&t=article'),
|
||||||
|
(u'Business Columns-Savvy Consumer', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=business%2Fcolumns%2Fsavvy-consumer&l=100&f=rss&t=article'),
|
||||||
|
(u'Business Columns-Lager Heads', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=business%2Fcolumns%2Flager-heads&l=100&f=rss&t=article'),
|
||||||
|
(u'Business Columns-Job Watch', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=business%2Fcolumns%2Fjob-watch&l=100&f=rss&t=article'),
|
||||||
|
(u'Business Columns-Steve Geigerich', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=business%2Fcolumns%2Fsteve-giegerich&l=100&f=rss&t=article'),
|
||||||
|
(u'Business Columns-David Nicklaus', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=business%2Fcolumns%2Fdavid-nicklaus&l=100&f=rss&t=article'),
|
||||||
|
(u'Business Columns-Jim Gallagher', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=business%2Fcolumns%2Fjim-gallagher&l=100&f=rss&t=article'),
|
||||||
|
(u'Business Columns-Building Blocks', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=business%2Fcolumns%2Fbuilding-blocks&l=100&f=rss&t=article'),
|
||||||
|
(u'Business', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=business*l&l=100&f=rss&t=article'),
|
||||||
|
(u'Business-Technology', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=business%2Ftechnology&l=50&f=rss&t=article'),
|
||||||
|
(u'Business-National', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=business%2Fnational-and-international&l=50&f=rss&t=article'),
|
||||||
|
(u'Travel', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=travel*&l=100&f=rss&t=article'),
|
||||||
|
(u'Sports', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=sports*&f=rss&t=article'),
|
||||||
|
(u'Sports-Baseball', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=sports%2Fbaseball%2Fprofessional&l=100&f=rss&t=article'),
|
||||||
|
(u'Sports-Bernie Miklasz', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=sports%2Fcolumns%2Fbernie-miklasz&l=50&f=rss&t=article'),
|
||||||
|
(u'Sports-Bryan Burwell', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=sports%2Fcolumns%2Fbryan-burwell&l=50&f=rss&t=article'),
|
||||||
|
(u'Sports-College', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=sports%2Fcollege*&l=100&f=rss&t=article'),
|
||||||
|
(u'Sports-Dan Caesar', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=sports%2Fcolumns%2Fdan-caesar&l=50&f=rss&t=article'),
|
||||||
|
(u'Sports-Football', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=sports%2Ffootball%2Fprofessional&l=100&f=rss&t=article'),
|
||||||
|
(u'Sports-Hockey', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=sports%2Fhockey%2Fprofessional&l=100&f=rss&t=article'),
|
||||||
|
(u'Sports-Illini', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=sports%2Fcollege%2Fillini&l=100&f=rss&t=article'),
|
||||||
|
(u'Sports-Jeff Gordon', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=sports%2Fcolumns%2Fjeff-gordon&l=100&f=rss&t=article'),
|
||||||
|
(u'Life & Style', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=lifestyles&l=100&f=rss&t=article'),
|
||||||
|
(u'Life & Style-Debra Bass', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=lifestyles%2Ffashion-and-style%2Fdebra-bass&l=100&f=rss&t=article'),
|
||||||
|
(u'Life & Style-Food and Cooking', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=lifestyles%2Ffood-and-cooking&l=100&f=rss&t=article'),
|
||||||
|
(u'Life & Style-Health/Medicine/Fitness', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=lifestyles%2Fhealth-med-fit&l=100&f=rss&t=article'),
|
||||||
|
(u'Life & Style-Joe Holleman', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=lifestyles%2Fcolumns%2Fjoe-holleman&l=100&f=rss&t=article'),
|
||||||
|
(u'Life & Style-Steals-and-Deals', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=lifestyles%2Fcolumns%2Fsteals-and-deals&l=100&f=rss&t=article'),
|
||||||
|
(u'Life & Style-Tim Townsend', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=lifestyles%2Ffaith-and-values%2Ftim-townsend&l=100&f=rss&t=article'),
|
||||||
|
(u'Entertainment', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=entertainment&l=100&f=rss&t=article'),
|
||||||
|
(u'Entertainment-Arts & Theatre', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=entertainment%2Farts-and-theatre&l=100&f=rss&t=article'),
|
||||||
|
(u'Entertainment-Books & Literature', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=entertainment%2Fbooks-and-literature&l=100&f=rss&t=article'),
|
||||||
|
(u'Entertainment-Dining', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=sports%2Fhockey%2Fprofessional&l=100&f=rss&t=article'),
|
||||||
|
(u'Entertainment-Events Calendar', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=entertainment%2Fevents-calendar&l=100&f=rss&t=article'),
|
||||||
|
(u'Entertainment-Gail Pennington', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=entertainment%2Ftelevision%2Fgail-pennington&l=100&f=rss&t=article'),
|
||||||
|
(u'Entertainment-Hip Hops', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=entertainment%2Fdining%2Fbars-and-clubs-other%2Fhip-hops&l=100&f=rss&t=article'),
|
||||||
|
(u'Entertainment-House-O-Fun', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=entertainment%2Fhouse-o-fun&l=100&f=rss&t=article'),
|
||||||
|
(u'Entertainment-Kevin C. Johnson', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=entertainment%2Fmusic%2Fkevin-johnson&l=100&f=rss&t=article')
|
||||||
|
]
|
||||||
|
remove_empty_feeds = True
|
||||||
|
remove_tags = [dict(name='div', attrs={'id':'blox-logo'}),dict(name='a')]
|
||||||
|
keep_only_tags = [dict(name='h1'), dict(name='p', attrs={'class':'byline'}), dict(name="div", attrs={'id':'blox-story-text'})]
|
||||||
|
extra_css = 'p {text-align: left;}'
|
||||||
|
|
||||||
|
|
@ -14,7 +14,7 @@ class TheHeiseOnline(BasicNewsRecipe):
|
|||||||
oldest_article = 3
|
oldest_article = 3
|
||||||
description = 'In association with Heise Online'
|
description = 'In association with Heise Online'
|
||||||
publisher = 'Heise Media UK Ltd.'
|
publisher = 'Heise Media UK Ltd.'
|
||||||
category = 'news, technology, security'
|
category = 'news, technology, security, OSS, internet'
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
language = 'en'
|
language = 'en'
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
@ -27,6 +27,12 @@ class TheHeiseOnline(BasicNewsRecipe):
|
|||||||
feeds = [
|
feeds = [
|
||||||
(u'The H News Feed', u'http://www.h-online.com/news/atom.xml')
|
(u'The H News Feed', u'http://www.h-online.com/news/atom.xml')
|
||||||
]
|
]
|
||||||
|
cover_url = 'http://www.h-online.com/icons/logo_theH.gif'
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(id="logo"),
|
||||||
|
dict(id="footer")
|
||||||
|
]
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
return url + '?view=print'
|
return url + '?view=print'
|
||||||
|
68
resources/recipes/toyokeizai.recipe
Normal file
68
resources/recipes/toyokeizai.recipe
Normal file
@ -0,0 +1,68 @@
|
|||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
|
||||||
|
'''
|
||||||
|
www.toyokeizai.net
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
import re
|
||||||
|
|
||||||
|
class Toyokeizai(BasicNewsRecipe):
|
||||||
|
title = u'ToyoKeizai News'
|
||||||
|
__author__ = 'Hiroshi Miura'
|
||||||
|
oldest_article = 1
|
||||||
|
max_articles_per_feed = 50
|
||||||
|
description = 'Japanese traditional economy and business magazine, only for advanced subscribers supported'
|
||||||
|
publisher = 'Toyokeizai Shinbun Sha'
|
||||||
|
category = 'economy, magazine, japan'
|
||||||
|
language = 'ja'
|
||||||
|
encoding = 'euc-jp'
|
||||||
|
index = 'http://member.toyokeizai.net/news/'
|
||||||
|
remove_javascript = True
|
||||||
|
no_stylesheets = True
|
||||||
|
masthead_title = u'TOYOKEIZAI'
|
||||||
|
needs_subscription = True
|
||||||
|
timefmt = '[%y/%m/%d]'
|
||||||
|
recursions = 5
|
||||||
|
match_regexps =[ r'page/\d+']
|
||||||
|
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(name='div', attrs={'class':['news']}),
|
||||||
|
dict(name='div', attrs={'class':["news_cont"]}),
|
||||||
|
dict(name='div', attrs={'class':["news_con"]}),
|
||||||
|
# dict(name='div', attrs={'class':["norightsMessage"]})
|
||||||
|
]
|
||||||
|
remove_tags = [{'class':"mt35 mgz"},
|
||||||
|
{'class':"mt20 newzia"},
|
||||||
|
{'class':"mt20 fontS"},
|
||||||
|
{'class':"bk_btn_m"},
|
||||||
|
dict(id='newzia_connect_member')
|
||||||
|
]
|
||||||
|
|
||||||
|
def parse_index(self):
|
||||||
|
feeds = []
|
||||||
|
soup = self.index_to_soup(self.index)
|
||||||
|
topstories = soup.find('ul',attrs={'class':'list6'})
|
||||||
|
if topstories:
|
||||||
|
newsarticles = []
|
||||||
|
for itt in topstories.findAll('li'):
|
||||||
|
itema = itt.find('a',href=True)
|
||||||
|
itemd = itt.find('span')
|
||||||
|
newsarticles.append({
|
||||||
|
'title' :itema.string
|
||||||
|
,'date' :re.compile(r"\- ").sub("",itemd.string)
|
||||||
|
,'url' :'http://member.toyokeizai.net' + itema['href']
|
||||||
|
,'description':itema['title']
|
||||||
|
})
|
||||||
|
feeds.append(('news', newsarticles))
|
||||||
|
return feeds
|
||||||
|
|
||||||
|
def get_browser(self):
|
||||||
|
br = BasicNewsRecipe.get_browser()
|
||||||
|
if self.username is not None and self.password is not None:
|
||||||
|
br.open('http://member.toyokeizai.net/norights/form/')
|
||||||
|
br.select_form(nr=0)
|
||||||
|
br['kaiin_id'] = self.username
|
||||||
|
br['password'] = self.password
|
||||||
|
br.submit()
|
||||||
|
return br
|
72
resources/recipes/tpm_uk.recipe
Normal file
72
resources/recipes/tpm_uk.recipe
Normal file
@ -0,0 +1,72 @@
|
|||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
'''
|
||||||
|
www.philosophypress.co.uk
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class TPM_uk(BasicNewsRecipe):
|
||||||
|
title = "The Philosophers' Magazine"
|
||||||
|
__author__ = 'Darko Miletic'
|
||||||
|
description = 'Title says it all'
|
||||||
|
publisher = "The Philosophers' Magazine"
|
||||||
|
category = 'philosophy, news'
|
||||||
|
oldest_article = 25
|
||||||
|
max_articles_per_feed = 200
|
||||||
|
no_stylesheets = True
|
||||||
|
encoding = 'utf8'
|
||||||
|
use_embedded_content = False
|
||||||
|
language = 'en_GB'
|
||||||
|
remove_empty_feeds = True
|
||||||
|
publication_type = 'magazine'
|
||||||
|
masthead_url = 'http://www.philosophypress.co.uk/wp-content/themes/masterplan/tma/images/bg/sitelogo.png'
|
||||||
|
extra_css = """
|
||||||
|
body{font-family: Helvetica,Arial,"Lucida Grande",Verdana,sans-serif }
|
||||||
|
img{margin-bottom: 0.4em; display:block}
|
||||||
|
"""
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comment' : description
|
||||||
|
, 'tags' : category
|
||||||
|
, 'publisher' : publisher
|
||||||
|
, 'language' : language
|
||||||
|
}
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name=['meta','link','base','iframe','embed','object','img'])
|
||||||
|
,dict(attrs={'id':['respond','sharethis_0']})
|
||||||
|
,dict(attrs={'class':'wp-caption-text'})
|
||||||
|
]
|
||||||
|
keep_only_tags=[
|
||||||
|
dict(attrs={'class':['post_cat','post_name','post_meta','post_text']})
|
||||||
|
,dict(attrs={'id':'comments'})
|
||||||
|
]
|
||||||
|
remove_attributes=['lang','width','height']
|
||||||
|
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Columns' , u'http://www.philosophypress.co.uk/?feed=rss2&cat=15' )
|
||||||
|
,(u'Essays' , u'http://www.philosophypress.co.uk/?feed=rss2&cat=19' )
|
||||||
|
,(u"21'st Century" , u'http://www.philosophypress.co.uk/?feed=rss2&cat=101')
|
||||||
|
,(u'Interviews' , u'http://www.philosophypress.co.uk/?feed=rss2&cat=9' )
|
||||||
|
,(u'News' , u'http://www.philosophypress.co.uk/?feed=rss2&cat=28' )
|
||||||
|
,(u'Profiles' , u'http://www.philosophypress.co.uk/?feed=rss2&cat=59' )
|
||||||
|
,(u'Reviews' , u'http://www.philosophypress.co.uk/?feed=rss2&cat=12' )
|
||||||
|
]
|
||||||
|
|
||||||
|
def get_cover_url(self):
|
||||||
|
soup = self.index_to_soup('http://www.philosophypress.co.uk/')
|
||||||
|
for image in soup.findAll('img',title=True):
|
||||||
|
if image['title'].startswith('Click to Subscribe'):
|
||||||
|
return image['src']
|
||||||
|
return None
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
for alink in soup.findAll('a', rel=True):
|
||||||
|
if alink.string is not None:
|
||||||
|
tstr = alink.string
|
||||||
|
alink.replaceWith(tstr)
|
||||||
|
return soup
|
37
resources/recipes/tr.recipe
Normal file
37
resources/recipes/tr.recipe
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Anton Gillert <atx at binaryninja.de>'
|
||||||
|
|
||||||
|
'''
|
||||||
|
Fetch Technology Review.
|
||||||
|
'''
|
||||||
|
from time import strftime
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
|
class TechnologyReviewDe(BasicNewsRecipe):
|
||||||
|
|
||||||
|
title = 'Technology Review'
|
||||||
|
description = 'Technology news from Germany'
|
||||||
|
__author__ = 'Anton Gillert'
|
||||||
|
use_embedded_content = False
|
||||||
|
language = 'de'
|
||||||
|
timefmt = ' [%d %b %Y]'
|
||||||
|
max_articles_per_feed = 40
|
||||||
|
no_stylesheets = True
|
||||||
|
|
||||||
|
feeds = [ ('Technology Review', 'http://www.heise.de/tr/news-atom.xml') ]
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
return url + '?view=print'
|
||||||
|
|
||||||
|
remove_tags = [dict(id='navi_top'),
|
||||||
|
dict(id='navi_bottom'),
|
||||||
|
dict(name='div', attrs={'class':'navi_top_logo'}),
|
||||||
|
dict(name='img', attrs={'src':'/tr/icons/tr_logo2006.gif'}),
|
||||||
|
dict(name='p', attrs={'class':'size80'})]
|
||||||
|
remove_tags_after = [dict(name='p', attrs={'class':'size80'})]
|
||||||
|
|
||||||
|
def get_cover_url(self):
|
||||||
|
return 'http://www.heise-medien.de/presseinfo/bilder/tr/' + strftime("%y/tr%m%Y.jpg")
|
||||||
|
|
||||||
|
|
91
resources/recipes/wprost.recipe
Normal file
91
resources/recipes/wprost.recipe
Normal file
@ -0,0 +1,91 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, matek09, matek09@gmail.com'
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
import re
|
||||||
|
|
||||||
|
class Wprost(BasicNewsRecipe):
|
||||||
|
EDITION = 0
|
||||||
|
FIND_LAST_FULL_ISSUE = True
|
||||||
|
EXCLUDE_LOCKED = True
|
||||||
|
ICO_BLOCKED = 'http://www.wprost.pl/G/icons/ico_blocked.gif'
|
||||||
|
|
||||||
|
title = u'Wprost'
|
||||||
|
__author__ = 'matek09'
|
||||||
|
description = 'Weekly magazine'
|
||||||
|
encoding = 'ISO-8859-2'
|
||||||
|
no_stylesheets = True
|
||||||
|
language = 'pl'
|
||||||
|
remove_javascript = True
|
||||||
|
|
||||||
|
remove_tags_before = dict(dict(name = 'div', attrs = {'id' : 'print-layer'}))
|
||||||
|
remove_tags_after = dict(dict(name = 'div', attrs = {'id' : 'print-layer'}))
|
||||||
|
|
||||||
|
'''keep_only_tags =[]
|
||||||
|
keep_only_tags.append(dict(name = 'table', attrs = {'id' : 'title-table'}))
|
||||||
|
keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'div-header'}))
|
||||||
|
keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'div-content'}))
|
||||||
|
keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'def element-autor'}))'''
|
||||||
|
|
||||||
|
preprocess_regexps = [(re.compile(r'style="display: none;"'), lambda match: ''),
|
||||||
|
(re.compile(r'display: block;'), lambda match: '')]
|
||||||
|
|
||||||
|
|
||||||
|
remove_tags =[]
|
||||||
|
remove_tags.append(dict(name = 'div', attrs = {'class' : 'def element-date'}))
|
||||||
|
remove_tags.append(dict(name = 'div', attrs = {'class' : 'def silver'}))
|
||||||
|
remove_tags.append(dict(name = 'div', attrs = {'id' : 'content-main-column-right'}))
|
||||||
|
|
||||||
|
|
||||||
|
extra_css = '''
|
||||||
|
.div-header {font-size: x-small; font-weight: bold}
|
||||||
|
'''
|
||||||
|
#h2 {font-size: x-large; font-weight: bold}
|
||||||
|
def is_blocked(self, a):
|
||||||
|
if a.findNextSibling('img') is None:
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def find_last_issue(self):
|
||||||
|
soup = self.index_to_soup('http://www.wprost.pl/archiwum/')
|
||||||
|
a = 0
|
||||||
|
if self.FIND_LAST_FULL_ISSUE:
|
||||||
|
ico_blocked = soup.findAll('img', attrs={'src' : self.ICO_BLOCKED})
|
||||||
|
a = ico_blocked[-1].findNext('a', attrs={'title' : re.compile('Zobacz spis tre.ci')})
|
||||||
|
else:
|
||||||
|
a = soup.find('a', attrs={'title' : re.compile('Zobacz spis tre.ci')})
|
||||||
|
self.EDITION = a['href'].replace('/tygodnik/?I=', '')
|
||||||
|
self.cover_url = a.img['src']
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def parse_index(self):
|
||||||
|
self.find_last_issue()
|
||||||
|
soup = self.index_to_soup('http://www.wprost.pl/tygodnik/?I=' + self.EDITION)
|
||||||
|
feeds = []
|
||||||
|
for main_block in soup.findAll(attrs={'class':'main-block-s3 s3-head head-red3'}):
|
||||||
|
articles = list(self.find_articles(main_block))
|
||||||
|
if len(articles) > 0:
|
||||||
|
section = self.tag_to_string(main_block)
|
||||||
|
feeds.append((section, articles))
|
||||||
|
return feeds
|
||||||
|
|
||||||
|
def find_articles(self, main_block):
|
||||||
|
for a in main_block.findAllNext( attrs={'style':['','padding-top: 15px;']}):
|
||||||
|
if a.name in "td":
|
||||||
|
break
|
||||||
|
if self.EXCLUDE_LOCKED & self.is_blocked(a):
|
||||||
|
continue
|
||||||
|
yield {
|
||||||
|
'title' : self.tag_to_string(a),
|
||||||
|
'url' : 'http://www.wprost.pl' + a['href'],
|
||||||
|
'date' : '',
|
||||||
|
'description' : ''
|
||||||
|
}
|
||||||
|
|
||||||
|
|
@ -21,7 +21,7 @@ class YOLNews(BasicNewsRecipe):
|
|||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
masthead_title = u'YOMIURI ONLINE'
|
masthead_title = u'YOMIURI ONLINE'
|
||||||
|
|
||||||
remove_tags_before = {'class':"article-def"}
|
keep_only_tags = [{'class':"article-def"}]
|
||||||
remove_tags = [{'class':"RelatedArticle"},
|
remove_tags = [{'class':"RelatedArticle"},
|
||||||
{'class':"sbtns"}
|
{'class':"sbtns"}
|
||||||
]
|
]
|
||||||
|
@ -21,7 +21,7 @@ class YOLNews(BasicNewsRecipe):
|
|||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
masthead_title = u"YOMIURI ONLINE"
|
masthead_title = u"YOMIURI ONLINE"
|
||||||
|
|
||||||
remove_tags_before = {'class':"article-def"}
|
keep_only_tags = [{'class':"article-def"}]
|
||||||
remove_tags = [{'class':"RelatedArticle"},
|
remove_tags = [{'class':"RelatedArticle"},
|
||||||
{'class':"sbtns"}
|
{'class':"sbtns"}
|
||||||
]
|
]
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
" Project wide builtins
|
" Project wide builtins
|
||||||
let g:pyflakes_builtins += ["dynamic_property", "__", "P", "I", "lopen"]
|
let g:pyflakes_builtins += ["dynamic_property", "__", "P", "I", "lopen", "icu_lower", "icu_upper", "icu_title"]
|
||||||
|
|
||||||
python << EOFPY
|
python << EOFPY
|
||||||
import os
|
import os
|
||||||
|
@ -91,11 +91,15 @@ podofo_inc = '/usr/include/podofo'
|
|||||||
podofo_lib = '/usr/lib'
|
podofo_lib = '/usr/lib'
|
||||||
chmlib_inc_dirs = chmlib_lib_dirs = []
|
chmlib_inc_dirs = chmlib_lib_dirs = []
|
||||||
sqlite_inc_dirs = []
|
sqlite_inc_dirs = []
|
||||||
|
icu_inc_dirs = []
|
||||||
|
icu_lib_dirs = []
|
||||||
|
|
||||||
if iswindows:
|
if iswindows:
|
||||||
prefix = r'C:\cygwin\home\kovid\sw'
|
prefix = r'C:\cygwin\home\kovid\sw'
|
||||||
sw_inc_dir = os.path.join(prefix, 'include')
|
sw_inc_dir = os.path.join(prefix, 'include')
|
||||||
sw_lib_dir = os.path.join(prefix, 'lib')
|
sw_lib_dir = os.path.join(prefix, 'lib')
|
||||||
|
icu_inc_dirs = [sw_inc_dir]
|
||||||
|
icu_lib_dirs = [sw_lib_dir]
|
||||||
sqlite_inc_dirs = [sw_inc_dir]
|
sqlite_inc_dirs = [sw_inc_dir]
|
||||||
fc_inc = os.path.join(sw_inc_dir, 'fontconfig')
|
fc_inc = os.path.join(sw_inc_dir, 'fontconfig')
|
||||||
fc_lib = sw_lib_dir
|
fc_lib = sw_lib_dir
|
||||||
|
@ -63,7 +63,8 @@ class Check(Command):
|
|||||||
|
|
||||||
description = 'Check for errors in the calibre source code'
|
description = 'Check for errors in the calibre source code'
|
||||||
|
|
||||||
BUILTINS = ['_', '__', 'dynamic_property', 'I', 'P', 'lopen']
|
BUILTINS = ['_', '__', 'dynamic_property', 'I', 'P', 'lopen', 'icu_lower',
|
||||||
|
'icu_upper', 'icu_title']
|
||||||
CACHE = '.check-cache.pickle'
|
CACHE = '.check-cache.pickle'
|
||||||
|
|
||||||
def get_files(self, cache):
|
def get_files(self, cache):
|
||||||
|
@ -18,7 +18,7 @@ __all__ = [
|
|||||||
'pypi_register', 'pypi_upload', 'upload_to_server',
|
'pypi_register', 'pypi_upload', 'upload_to_server',
|
||||||
'upload_user_manual', 'upload_to_mobileread', 'upload_demo',
|
'upload_user_manual', 'upload_to_mobileread', 'upload_demo',
|
||||||
'upload_to_sourceforge', 'upload_to_google_code',
|
'upload_to_sourceforge', 'upload_to_google_code',
|
||||||
'linux32', 'linux64', 'linux', 'linux_freeze', 'linux_freeze2',
|
'linux32', 'linux64', 'linux', 'linux_freeze',
|
||||||
'osx32_freeze', 'osx', 'rsync', 'push',
|
'osx32_freeze', 'osx', 'rsync', 'push',
|
||||||
'win32_freeze', 'win32', 'win',
|
'win32_freeze', 'win32', 'win',
|
||||||
'stage1', 'stage2', 'stage3', 'stage4', 'publish'
|
'stage1', 'stage2', 'stage3', 'stage4', 'publish'
|
||||||
@ -79,10 +79,8 @@ from setup.installer.linux import Linux, Linux32, Linux64
|
|||||||
linux = Linux()
|
linux = Linux()
|
||||||
linux32 = Linux32()
|
linux32 = Linux32()
|
||||||
linux64 = Linux64()
|
linux64 = Linux64()
|
||||||
from setup.installer.linux.freeze import LinuxFreeze
|
from setup.installer.linux.freeze2 import LinuxFreeze
|
||||||
linux_freeze = LinuxFreeze()
|
linux_freeze = LinuxFreeze()
|
||||||
from setup.installer.linux.freeze2 import LinuxFreeze2
|
|
||||||
linux_freeze2 = LinuxFreeze2()
|
|
||||||
|
|
||||||
from setup.installer.osx import OSX
|
from setup.installer.osx import OSX
|
||||||
osx = OSX()
|
osx = OSX()
|
||||||
|
@ -18,7 +18,8 @@ from setup.build_environment import fc_inc, fc_lib, chmlib_inc_dirs, \
|
|||||||
QMAKE, msvc, MT, win_inc, win_lib, png_inc_dirs, win_ddk, \
|
QMAKE, msvc, MT, win_inc, win_lib, png_inc_dirs, win_ddk, \
|
||||||
magick_inc_dirs, magick_lib_dirs, png_lib_dirs, png_libs, \
|
magick_inc_dirs, magick_lib_dirs, png_lib_dirs, png_libs, \
|
||||||
magick_error, magick_libs, ft_lib_dirs, ft_libs, jpg_libs, \
|
magick_error, magick_libs, ft_lib_dirs, ft_libs, jpg_libs, \
|
||||||
jpg_lib_dirs, chmlib_lib_dirs, sqlite_inc_dirs
|
jpg_lib_dirs, chmlib_lib_dirs, sqlite_inc_dirs, icu_inc_dirs, \
|
||||||
|
icu_lib_dirs
|
||||||
MT
|
MT
|
||||||
isunix = islinux or isosx or isfreebsd
|
isunix = islinux or isosx or isfreebsd
|
||||||
|
|
||||||
@ -56,8 +57,25 @@ pdfreflow_libs = []
|
|||||||
if iswindows:
|
if iswindows:
|
||||||
pdfreflow_libs = ['advapi32', 'User32', 'Gdi32', 'zlib']
|
pdfreflow_libs = ['advapi32', 'User32', 'Gdi32', 'zlib']
|
||||||
|
|
||||||
|
icu_libs = ['icudata', 'icui18n', 'icuuc', 'icuio']
|
||||||
|
icu_cflags = []
|
||||||
|
if iswindows:
|
||||||
|
icu_libs = ['icudt', 'icuin', 'icuuc', 'icuio']
|
||||||
|
if isosx:
|
||||||
|
icu_libs = ['icucore']
|
||||||
|
icu_cflags = ['-DU_DISABLE_RENAMING'] # Needed to use system libicucore.dylib
|
||||||
|
|
||||||
|
|
||||||
extensions = [
|
extensions = [
|
||||||
|
|
||||||
|
Extension('icu',
|
||||||
|
['calibre/utils/icu.c'],
|
||||||
|
libraries=icu_libs,
|
||||||
|
lib_dirs=icu_lib_dirs,
|
||||||
|
inc_dirs=icu_inc_dirs,
|
||||||
|
cflags=icu_cflags
|
||||||
|
),
|
||||||
|
|
||||||
Extension('sqlite_custom',
|
Extension('sqlite_custom',
|
||||||
['calibre/library/sqlite_custom.c'],
|
['calibre/library/sqlite_custom.c'],
|
||||||
inc_dirs=sqlite_inc_dirs
|
inc_dirs=sqlite_inc_dirs
|
||||||
|
@ -17,7 +17,7 @@ class Linux32(VMInstaller):
|
|||||||
INSTALLER_EXT = 'tar.bz2'
|
INSTALLER_EXT = 'tar.bz2'
|
||||||
VM_NAME = 'gentoo32_build'
|
VM_NAME = 'gentoo32_build'
|
||||||
VM = '/vmware/bin/gentoo32_build'
|
VM = '/vmware/bin/gentoo32_build'
|
||||||
FREEZE_COMMAND = 'linux_freeze2'
|
FREEZE_COMMAND = 'linux_freeze'
|
||||||
FREEZE_TEMPLATE = 'sudo python -OO setup.py {freeze_command}'
|
FREEZE_TEMPLATE = 'sudo python -OO setup.py {freeze_command}'
|
||||||
|
|
||||||
|
|
||||||
|
@ -14,23 +14,12 @@ from setup import Command, modules, basenames, functions, __version__, \
|
|||||||
|
|
||||||
SITE_PACKAGES = ['IPython', 'PIL', 'dateutil', 'dns', 'PyQt4', 'mechanize',
|
SITE_PACKAGES = ['IPython', 'PIL', 'dateutil', 'dns', 'PyQt4', 'mechanize',
|
||||||
'sip.so', 'BeautifulSoup.py', 'cssutils', 'encutils', 'lxml',
|
'sip.so', 'BeautifulSoup.py', 'cssutils', 'encutils', 'lxml',
|
||||||
'sipconfig.py', 'xdg']
|
'sipconfig.py', 'xdg', 'dbus', '_dbus_bindings.so', 'dbus_bindings.py',
|
||||||
|
'_dbus_glib_bindings.so']
|
||||||
|
|
||||||
|
|
||||||
gcc = subprocess.Popen(["gcc-config", "-c"], stdout=subprocess.PIPE).communicate()[0]
|
|
||||||
chost, _, gcc = gcc.rpartition('-')
|
|
||||||
stdcpp = '/usr/lib/gcc/%s/%s/libstdc++.so.?'%(chost.strip(), gcc.strip())
|
|
||||||
stdcpp = glob.glob(stdcpp)[-1]
|
|
||||||
is64bit = platform.architecture()[0] == '64bit'
|
|
||||||
arch = 'x86_64' if is64bit else 'i686'
|
|
||||||
ffi = '/usr/lib/libffi.so.5' if is64bit else '/usr/lib/gcc/i686-pc-linux-gnu/4.4.1/libffi.so.4'
|
|
||||||
|
|
||||||
|
|
||||||
QTDIR = '/usr/lib/qt4'
|
QTDIR = '/usr/lib/qt4'
|
||||||
QTDLLS = ('QtCore', 'QtGui', 'QtNetwork', 'QtSvg', 'QtXml', 'QtWebKit', 'QtDBus')
|
QTDLLS = ('QtCore', 'QtGui', 'QtNetwork', 'QtSvg', 'QtXml', 'QtWebKit', 'QtDBus')
|
||||||
|
|
||||||
|
|
||||||
binary_includes = [
|
binary_includes = [
|
||||||
'/usr/bin/pdftohtml',
|
'/usr/bin/pdftohtml',
|
||||||
'/usr/lib/libwmflite-0.2.so.7',
|
'/usr/lib/libwmflite-0.2.so.7',
|
||||||
@ -49,8 +38,6 @@ binary_includes = [
|
|||||||
'/usr/lib/libjpeg.so.8',
|
'/usr/lib/libjpeg.so.8',
|
||||||
'/usr/lib/libxslt.so.1',
|
'/usr/lib/libxslt.so.1',
|
||||||
'/usr/lib/libgthread-2.0.so.0',
|
'/usr/lib/libgthread-2.0.so.0',
|
||||||
stdcpp,
|
|
||||||
ffi,
|
|
||||||
'/usr/lib/libpng14.so.14',
|
'/usr/lib/libpng14.so.14',
|
||||||
'/usr/lib/libexslt.so.0',
|
'/usr/lib/libexslt.so.0',
|
||||||
'/usr/lib/libMagickWand.so.4',
|
'/usr/lib/libMagickWand.so.4',
|
||||||
@ -63,10 +50,18 @@ binary_includes = [
|
|||||||
'/lib/libreadline.so.6',
|
'/lib/libreadline.so.6',
|
||||||
'/usr/lib/libchm.so.0',
|
'/usr/lib/libchm.so.0',
|
||||||
'/usr/lib/liblcms2.so.2',
|
'/usr/lib/liblcms2.so.2',
|
||||||
|
'/usr/lib/libicudata.so.46',
|
||||||
|
'/usr/lib/libicui18n.so.46',
|
||||||
|
'/usr/lib/libicuuc.so.46',
|
||||||
|
'/usr/lib/libicuio.so.46',
|
||||||
]
|
]
|
||||||
binary_includes += [os.path.join(QTDIR, 'lib%s.so.4'%x) for x in QTDLLS]
|
binary_includes += [os.path.join(QTDIR, 'lib%s.so.4'%x) for x in QTDLLS]
|
||||||
|
|
||||||
class LinuxFreeze2(Command):
|
is64bit = platform.architecture()[0] == '64bit'
|
||||||
|
arch = 'x86_64' if is64bit else 'i686'
|
||||||
|
|
||||||
|
|
||||||
|
class LinuxFreeze(Command):
|
||||||
|
|
||||||
def run(self, opts):
|
def run(self, opts):
|
||||||
self.drop_privileges()
|
self.drop_privileges()
|
||||||
@ -93,7 +88,21 @@ class LinuxFreeze2(Command):
|
|||||||
self.info('Copying libs...')
|
self.info('Copying libs...')
|
||||||
os.mkdir(self.lib_dir)
|
os.mkdir(self.lib_dir)
|
||||||
os.mkdir(self.bin_dir)
|
os.mkdir(self.bin_dir)
|
||||||
for x in binary_includes:
|
|
||||||
|
gcc = subprocess.Popen(["gcc-config", "-c"], stdout=subprocess.PIPE).communicate()[0]
|
||||||
|
chost, _, gcc = gcc.rpartition('-')
|
||||||
|
gcc_lib = '/usr/lib/gcc/%s/%s/'%(chost.strip(), gcc.strip())
|
||||||
|
stdcpp = gcc_lib+'libstdc++.so.?'
|
||||||
|
stdcpp = glob.glob(stdcpp)[-1]
|
||||||
|
ffi = gcc_lib+'libffi.so.?'
|
||||||
|
ffi = glob.glob(ffi)
|
||||||
|
if ffi:
|
||||||
|
ffi = ffi[-1]
|
||||||
|
else:
|
||||||
|
ffi = glob.glob('/usr/lib/libffi.so.?')[-1]
|
||||||
|
|
||||||
|
|
||||||
|
for x in binary_includes + [stdcpp, ffi]:
|
||||||
dest = self.bin_dir if '/bin/' in x else self.lib_dir
|
dest = self.bin_dir if '/bin/' in x else self.lib_dir
|
||||||
shutil.copy2(x, dest)
|
shutil.copy2(x, dest)
|
||||||
shutil.copy2('/usr/lib/libpython%s.so.1.0'%self.py_ver, dest)
|
shutil.copy2('/usr/lib/libpython%s.so.1.0'%self.py_ver, dest)
|
||||||
@ -268,7 +277,6 @@ class LinuxFreeze2(Command):
|
|||||||
base=`dirname $path`
|
base=`dirname $path`
|
||||||
lib=$base/lib
|
lib=$base/lib
|
||||||
export LD_LIBRARY_PATH=$lib:$LD_LIBRARY_PATH
|
export LD_LIBRARY_PATH=$lib:$LD_LIBRARY_PATH
|
||||||
export QT_PLUGIN_PATH=$lib/qt_plugins
|
|
||||||
export MAGICK_CONFIGURE_PATH=$lib/ImageMagick/config
|
export MAGICK_CONFIGURE_PATH=$lib/ImageMagick/config
|
||||||
export MAGICK_CODER_MODULE_PATH=$lib/ImageMagick/modules-Q16/coders
|
export MAGICK_CODER_MODULE_PATH=$lib/ImageMagick/modules-Q16/coders
|
||||||
export MAGICK_CODER_FILTER_PATH=$lib/ImageMagick/modules-Q16/filters
|
export MAGICK_CODER_FILTER_PATH=$lib/ImageMagick/modules-Q16/filters
|
||||||
@ -336,12 +344,21 @@ class LinuxFreeze2(Command):
|
|||||||
def set_helper():
|
def set_helper():
|
||||||
__builtin__.help = _Helper()
|
__builtin__.help = _Helper()
|
||||||
|
|
||||||
|
def set_qt_plugin_path():
|
||||||
|
import uuid
|
||||||
|
uuid.uuid4() # Workaround for libuuid/PyQt conflict
|
||||||
|
from PyQt4.Qt import QCoreApplication
|
||||||
|
paths = list(map(unicode, QCoreApplication.libraryPaths()))
|
||||||
|
paths.insert(0, sys.frozen_path + '/lib/qt_plugins')
|
||||||
|
QCoreApplication.setLibraryPaths(paths)
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
try:
|
try:
|
||||||
sys.argv[0] = sys.calibre_basename
|
sys.argv[0] = sys.calibre_basename
|
||||||
set_default_encoding()
|
set_default_encoding()
|
||||||
set_helper()
|
set_helper()
|
||||||
|
set_qt_plugin_path()
|
||||||
mod = __import__(sys.calibre_module, fromlist=[1])
|
mod = __import__(sys.calibre_module, fromlist=[1])
|
||||||
func = getattr(mod, sys.calibre_function)
|
func = getattr(mod, sys.calibre_function)
|
||||||
return func()
|
return func()
|
||||||
|
@ -199,7 +199,7 @@ class Win32Freeze(Command, WixMixIn):
|
|||||||
for pat in ('*.dll',):
|
for pat in ('*.dll',):
|
||||||
for f in glob.glob(os.path.join(bindir, pat)):
|
for f in glob.glob(os.path.join(bindir, pat)):
|
||||||
ok = True
|
ok = True
|
||||||
for ex in ('expatw',):
|
for ex in ('expatw', 'testplug'):
|
||||||
if ex in f.lower():
|
if ex in f.lower():
|
||||||
ok = False
|
ok = False
|
||||||
if not ok: continue
|
if not ok: continue
|
||||||
|
@ -77,6 +77,15 @@ Test it on the target system with
|
|||||||
|
|
||||||
calibre-debug -c "import _imaging, _imagingmath, _imagingft, _imagingcms"
|
calibre-debug -c "import _imaging, _imagingmath, _imagingft, _imagingcms"
|
||||||
|
|
||||||
|
ICU
|
||||||
|
-------
|
||||||
|
|
||||||
|
Download the win32 msvc9 binary from http://www.icu-project.org/download/4.4.html
|
||||||
|
|
||||||
|
Note that 4.4 is the last version of ICU that can be compiled (is precompiled) with msvc9
|
||||||
|
|
||||||
|
Put the dlls into sw/bin and the unicode dir into sw/include and the contents of lib int sw/lib
|
||||||
|
|
||||||
Libunrar
|
Libunrar
|
||||||
----------
|
----------
|
||||||
|
|
||||||
|
@ -3,7 +3,7 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2008, Kovid Goyal <kovid@kovidgoyal.net>'
|
__copyright__ = '2008, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import uuid, sys, os, re, logging, time, mimetypes, \
|
import uuid, sys, os, re, logging, time, \
|
||||||
__builtin__, warnings, multiprocessing
|
__builtin__, warnings, multiprocessing
|
||||||
from urllib import getproxies
|
from urllib import getproxies
|
||||||
__builtin__.__dict__['dynamic_property'] = lambda(func): func(None)
|
__builtin__.__dict__['dynamic_property'] = lambda(func): func(None)
|
||||||
@ -19,43 +19,18 @@ from calibre.constants import iswindows, isosx, islinux, isfreebsd, isfrozen, \
|
|||||||
__appname__, __version__, __author__, \
|
__appname__, __version__, __author__, \
|
||||||
win32event, win32api, winerror, fcntl, \
|
win32event, win32api, winerror, fcntl, \
|
||||||
filesystem_encoding, plugins, config_dir
|
filesystem_encoding, plugins, config_dir
|
||||||
from calibre.startup import winutil, winutilerror
|
from calibre.startup import winutil, winutilerror, guess_type
|
||||||
|
|
||||||
uuid.uuid4() # Imported before PyQt4 to workaround PyQt4 util-linux conflict on gentoo
|
if islinux and not getattr(sys, 'frozen', False):
|
||||||
|
# Imported before PyQt4 to workaround PyQt4 util-linux conflict on gentoo
|
||||||
|
uuid.uuid4()
|
||||||
|
|
||||||
if False:
|
if False:
|
||||||
|
# Prevent pyflakes from complaining
|
||||||
winutil, winutilerror, __appname__, islinux, __version__
|
winutil, winutilerror, __appname__, islinux, __version__
|
||||||
fcntl, win32event, isfrozen, __author__, terminal_controller
|
fcntl, win32event, isfrozen, __author__, terminal_controller
|
||||||
winerror, win32api, isfreebsd
|
winerror, win32api, isfreebsd, guess_type
|
||||||
|
|
||||||
mimetypes.add_type('application/epub+zip', '.epub')
|
|
||||||
mimetypes.add_type('text/x-sony-bbeb+xml', '.lrs')
|
|
||||||
mimetypes.add_type('application/xhtml+xml', '.xhtml')
|
|
||||||
mimetypes.add_type('image/svg+xml', '.svg')
|
|
||||||
mimetypes.add_type('text/fb2+xml', '.fb2')
|
|
||||||
mimetypes.add_type('application/x-sony-bbeb', '.lrf')
|
|
||||||
mimetypes.add_type('application/x-sony-bbeb', '.lrx')
|
|
||||||
mimetypes.add_type('application/x-dtbncx+xml', '.ncx')
|
|
||||||
mimetypes.add_type('application/adobe-page-template+xml', '.xpgt')
|
|
||||||
mimetypes.add_type('application/x-font-opentype', '.otf')
|
|
||||||
mimetypes.add_type('application/x-font-truetype', '.ttf')
|
|
||||||
mimetypes.add_type('application/oebps-package+xml', '.opf')
|
|
||||||
mimetypes.add_type('application/vnd.palm', '.pdb')
|
|
||||||
mimetypes.add_type('application/x-mobipocket-ebook', '.mobi')
|
|
||||||
mimetypes.add_type('application/x-mobipocket-ebook', '.prc')
|
|
||||||
mimetypes.add_type('application/x-mobipocket-ebook', '.azw')
|
|
||||||
mimetypes.add_type('application/x-cbz', '.cbz')
|
|
||||||
mimetypes.add_type('application/x-cbr', '.cbr')
|
|
||||||
mimetypes.add_type('application/x-koboreader-ebook', '.kobo')
|
|
||||||
mimetypes.add_type('image/wmf', '.wmf')
|
|
||||||
mimetypes.add_type('image/jpeg', '.jpg')
|
|
||||||
mimetypes.add_type('image/jpeg', '.jpeg')
|
|
||||||
mimetypes.add_type('image/png', '.png')
|
|
||||||
mimetypes.add_type('image/gif', '.gif')
|
|
||||||
mimetypes.add_type('image/bmp', '.bmp')
|
|
||||||
mimetypes.add_type('image/svg+xml', '.svg')
|
|
||||||
|
|
||||||
guess_type = mimetypes.guess_type
|
|
||||||
import cssutils
|
import cssutils
|
||||||
cssutils.log.setLevel(logging.WARN)
|
cssutils.log.setLevel(logging.WARN)
|
||||||
|
|
||||||
|
@ -2,7 +2,7 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
__appname__ = 'calibre'
|
__appname__ = 'calibre'
|
||||||
__version__ = '0.7.31'
|
__version__ = '0.7.33'
|
||||||
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
|
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
|
||||||
|
|
||||||
import re
|
import re
|
||||||
@ -67,7 +67,8 @@ if plugins is None:
|
|||||||
'pdfreflow',
|
'pdfreflow',
|
||||||
'progress_indicator',
|
'progress_indicator',
|
||||||
'chmlib',
|
'chmlib',
|
||||||
'chm_extra'
|
'chm_extra',
|
||||||
|
'icu',
|
||||||
] + \
|
] + \
|
||||||
(['winutil'] if iswindows else []) + \
|
(['winutil'] if iswindows else []) + \
|
||||||
(['usbobserver'] if isosx else []):
|
(['usbobserver'] if isosx else []):
|
||||||
|
@ -37,6 +37,8 @@ class Plugin(_Plugin):
|
|||||||
self.fsizes.append((name, num, float(size)))
|
self.fsizes.append((name, num, float(size)))
|
||||||
self.fnames = dict((name, sz) for name, _, sz in self.fsizes if name)
|
self.fnames = dict((name, sz) for name, _, sz in self.fsizes if name)
|
||||||
self.fnums = dict((num, sz) for _, num, sz in self.fsizes if num)
|
self.fnums = dict((num, sz) for _, num, sz in self.fsizes if num)
|
||||||
|
self.width_pts = self.width * 72./self.dpi
|
||||||
|
self.height_pts = self.height * 72./self.dpi
|
||||||
|
|
||||||
# Input profiles {{{
|
# Input profiles {{{
|
||||||
class InputProfile(Plugin):
|
class InputProfile(Plugin):
|
||||||
|
@ -19,9 +19,9 @@ class ANDROID(USBMS):
|
|||||||
|
|
||||||
VENDOR_ID = {
|
VENDOR_ID = {
|
||||||
# HTC
|
# HTC
|
||||||
0x0bb4 : { 0x0c02 : [0x100, 0x0227], 0x0c01 : [0x100, 0x0227], 0x0ff9
|
0x0bb4 : { 0x0c02 : [0x100, 0x0227, 0x0226], 0x0c01 : [0x100, 0x0227], 0x0ff9
|
||||||
: [0x0100, 0x0227, 0x0226], 0x0c87: [0x0100, 0x0227, 0x0226],
|
: [0x0100, 0x0227, 0x0226], 0x0c87: [0x0100, 0x0227, 0x0226],
|
||||||
0xc92 : [0x100]},
|
0xc92 : [0x100], 0xc97: [0x226]},
|
||||||
|
|
||||||
# Eken
|
# Eken
|
||||||
0x040d : { 0x8510 : [0x0001] },
|
0x040d : { 0x8510 : [0x0001] },
|
||||||
@ -38,7 +38,7 @@ class ANDROID(USBMS):
|
|||||||
0x227]},
|
0x227]},
|
||||||
|
|
||||||
# Samsung
|
# Samsung
|
||||||
0x04e8 : { 0x681d : [0x0222, 0x0224, 0x0400],
|
0x04e8 : { 0x681d : [0x0222, 0x0223, 0x0224, 0x0400],
|
||||||
0x681c : [0x0222, 0x0224, 0x0400],
|
0x681c : [0x0222, 0x0224, 0x0400],
|
||||||
0x6640 : [0x0100],
|
0x6640 : [0x0100],
|
||||||
},
|
},
|
||||||
@ -62,7 +62,8 @@ class ANDROID(USBMS):
|
|||||||
'GT-I5700', 'SAMSUNG', 'DELL', 'LINUX']
|
'GT-I5700', 'SAMSUNG', 'DELL', 'LINUX']
|
||||||
WINDOWS_MAIN_MEM = ['ANDROID_PHONE', 'A855', 'A853', 'INC.NEXUS_ONE',
|
WINDOWS_MAIN_MEM = ['ANDROID_PHONE', 'A855', 'A853', 'INC.NEXUS_ONE',
|
||||||
'__UMS_COMPOSITE', '_MB200', 'MASS_STORAGE', '_-_CARD', 'SGH-I897',
|
'__UMS_COMPOSITE', '_MB200', 'MASS_STORAGE', '_-_CARD', 'SGH-I897',
|
||||||
'GT-I9000', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID']
|
'GT-I9000', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID',
|
||||||
|
'SCH-I500_CARD', 'SPH-D700_CARD', 'MB810']
|
||||||
WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
|
WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
|
||||||
'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID']
|
'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID']
|
||||||
|
|
||||||
|
@ -2637,7 +2637,7 @@ class ITUNES(DriverBase):
|
|||||||
lb_added.composer.set(metadata_x.uuid)
|
lb_added.composer.set(metadata_x.uuid)
|
||||||
lb_added.description.set("%s %s" % (self.description_prefix,strftime('%Y-%m-%d %H:%M:%S')))
|
lb_added.description.set("%s %s" % (self.description_prefix,strftime('%Y-%m-%d %H:%M:%S')))
|
||||||
lb_added.enabled.set(True)
|
lb_added.enabled.set(True)
|
||||||
lb_added.sort_artist.set(metadata_x.author_sort.title())
|
lb_added.sort_artist.set(icu_title(metadata_x.author_sort))
|
||||||
lb_added.sort_name.set(metadata.title_sort)
|
lb_added.sort_name.set(metadata.title_sort)
|
||||||
|
|
||||||
|
|
||||||
@ -2648,7 +2648,7 @@ class ITUNES(DriverBase):
|
|||||||
db_added.composer.set(metadata_x.uuid)
|
db_added.composer.set(metadata_x.uuid)
|
||||||
db_added.description.set("%s %s" % (self.description_prefix,strftime('%Y-%m-%d %H:%M:%S')))
|
db_added.description.set("%s %s" % (self.description_prefix,strftime('%Y-%m-%d %H:%M:%S')))
|
||||||
db_added.enabled.set(True)
|
db_added.enabled.set(True)
|
||||||
db_added.sort_artist.set(metadata_x.author_sort.title())
|
db_added.sort_artist.set(icu_title(metadata_x.author_sort))
|
||||||
db_added.sort_name.set(metadata.title_sort)
|
db_added.sort_name.set(metadata.title_sort)
|
||||||
|
|
||||||
if metadata_x.comments:
|
if metadata_x.comments:
|
||||||
@ -2729,7 +2729,7 @@ class ITUNES(DriverBase):
|
|||||||
lb_added.Composer = metadata_x.uuid
|
lb_added.Composer = metadata_x.uuid
|
||||||
lb_added.Description = ("%s %s" % (self.description_prefix,strftime('%Y-%m-%d %H:%M:%S')))
|
lb_added.Description = ("%s %s" % (self.description_prefix,strftime('%Y-%m-%d %H:%M:%S')))
|
||||||
lb_added.Enabled = True
|
lb_added.Enabled = True
|
||||||
lb_added.SortArtist = metadata_x.author_sort.title()
|
lb_added.SortArtist = icu_title(metadata_x.author_sort)
|
||||||
lb_added.SortName = metadata.title_sort
|
lb_added.SortName = metadata.title_sort
|
||||||
|
|
||||||
if db_added:
|
if db_added:
|
||||||
@ -2739,7 +2739,7 @@ class ITUNES(DriverBase):
|
|||||||
db_added.Composer = metadata_x.uuid
|
db_added.Composer = metadata_x.uuid
|
||||||
db_added.Description = ("%s %s" % (self.description_prefix,strftime('%Y-%m-%d %H:%M:%S')))
|
db_added.Description = ("%s %s" % (self.description_prefix,strftime('%Y-%m-%d %H:%M:%S')))
|
||||||
db_added.Enabled = True
|
db_added.Enabled = True
|
||||||
db_added.SortArtist = metadata_x.author_sort.title()
|
db_added.SortArtist = icu_title(metadata_x.author_sort)
|
||||||
db_added.SortName = metadata.title_sort
|
db_added.SortName = metadata.title_sort
|
||||||
|
|
||||||
if metadata_x.comments:
|
if metadata_x.comments:
|
||||||
|
@ -65,8 +65,9 @@ class ORIZON(CYBOOK):
|
|||||||
|
|
||||||
BCD = [0x319]
|
BCD = [0x319]
|
||||||
|
|
||||||
WINDOWS_MAIN_MEM = re.compile(r'CYBOOK_ORIZON__-FD')
|
VENDOR_NAME = ['BOOKEEN', 'LINUX']
|
||||||
WINDOWS_CARD_A_MEM = re.compile('CYBOOK_ORIZON__-SD')
|
WINDOWS_MAIN_MEM = re.compile(r'(CYBOOK_ORIZON__-FD)|(FILE-STOR_GADGET)')
|
||||||
|
WINDOWS_CARD_A_MEM = re.compile('(CYBOOK_ORIZON__-SD)|(FILE-STOR_GADGET)')
|
||||||
|
|
||||||
EBOOK_DIR_MAIN = EBOOK_DIR_CARD_A = 'Digital Editions'
|
EBOOK_DIR_MAIN = EBOOK_DIR_CARD_A = 'Digital Editions'
|
||||||
|
|
||||||
|
@ -229,7 +229,7 @@ class POCKETBOOK301(USBMS):
|
|||||||
|
|
||||||
class POCKETBOOK602(USBMS):
|
class POCKETBOOK602(USBMS):
|
||||||
|
|
||||||
name = 'PocketBook Pro 602 Device Interface'
|
name = 'PocketBook Pro 602/902 Device Interface'
|
||||||
description = _('Communicate with the PocketBook 602 reader.')
|
description = _('Communicate with the PocketBook 602 reader.')
|
||||||
author = 'Kovid Goyal'
|
author = 'Kovid Goyal'
|
||||||
supported_platforms = ['windows', 'osx', 'linux']
|
supported_platforms = ['windows', 'osx', 'linux']
|
||||||
@ -244,5 +244,5 @@ class POCKETBOOK602(USBMS):
|
|||||||
BCD = [0x0324]
|
BCD = [0x0324]
|
||||||
|
|
||||||
VENDOR_NAME = ''
|
VENDOR_NAME = ''
|
||||||
WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = 'PB602'
|
WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = ['PB602', 'PB902']
|
||||||
|
|
||||||
|
@ -79,11 +79,11 @@ class KOBO(USBMS):
|
|||||||
|
|
||||||
# Determine the firmware version
|
# Determine the firmware version
|
||||||
f = open(self.normalize_path(self._main_prefix + '.kobo/version'), 'r')
|
f = open(self.normalize_path(self._main_prefix + '.kobo/version'), 'r')
|
||||||
fwversion = f.readline().split(',')[2]
|
self.fwversion = f.readline().split(',')[2]
|
||||||
f.close()
|
f.close()
|
||||||
if fwversion != '1.0' and fwversion != '1.4':
|
if self.fwversion != '1.0' and self.fwversion != '1.4':
|
||||||
self.has_kepubs = True
|
self.has_kepubs = True
|
||||||
debug_print('Version of firmware: ', fwversion, 'Has kepubs:', self.has_kepubs)
|
debug_print('Version of firmware: ', self.fwversion, 'Has kepubs:', self.has_kepubs)
|
||||||
|
|
||||||
self.booklist_class.rebuild_collections = self.rebuild_collections
|
self.booklist_class.rebuild_collections = self.rebuild_collections
|
||||||
|
|
||||||
@ -220,6 +220,7 @@ class KOBO(USBMS):
|
|||||||
# 2) volume_shorcover
|
# 2) volume_shorcover
|
||||||
# 2) content
|
# 2) content
|
||||||
|
|
||||||
|
debug_print('delete_via_sql: ContentID: ', ContentID, 'ContentType: ', ContentType)
|
||||||
connection = sqlite.connect(self.normalize_path(self._main_prefix + '.kobo/KoboReader.sqlite'))
|
connection = sqlite.connect(self.normalize_path(self._main_prefix + '.kobo/KoboReader.sqlite'))
|
||||||
cursor = connection.cursor()
|
cursor = connection.cursor()
|
||||||
t = (ContentID,)
|
t = (ContentID,)
|
||||||
@ -400,6 +401,12 @@ class KOBO(USBMS):
|
|||||||
elif extension == '.pdf' or extension == '.epub':
|
elif extension == '.pdf' or extension == '.epub':
|
||||||
# print "ePub or pdf"
|
# print "ePub or pdf"
|
||||||
ContentType = 16
|
ContentType = 16
|
||||||
|
elif extension == '.rtf' or extension == '.txt' or extension == '.htm' or extension == '.html':
|
||||||
|
# print "txt"
|
||||||
|
if self.fwversion == '1.0' or self.fwversion == '1.4' or self.fwversion == '1.7.4':
|
||||||
|
ContentType = 999
|
||||||
|
else:
|
||||||
|
ContentType = 901
|
||||||
else: # if extension == '.html' or extension == '.txt':
|
else: # if extension == '.html' or extension == '.txt':
|
||||||
ContentType = 999 # Yet another hack: to get around Kobo changing how ContentID is stored
|
ContentType = 999 # Yet another hack: to get around Kobo changing how ContentID is stored
|
||||||
return ContentType
|
return ContentType
|
||||||
|
@ -11,8 +11,9 @@ from calibre.ebooks.metadata.book.base import Metadata
|
|||||||
from calibre.devices.mime import mime_type_ext
|
from calibre.devices.mime import mime_type_ext
|
||||||
from calibre.devices.interface import BookList as _BookList
|
from calibre.devices.interface import BookList as _BookList
|
||||||
from calibre.constants import preferred_encoding
|
from calibre.constants import preferred_encoding
|
||||||
from calibre import isbytestring
|
from calibre import isbytestring, force_unicode
|
||||||
from calibre.utils.config import prefs, tweaks
|
from calibre.utils.config import prefs, tweaks
|
||||||
|
from calibre.utils.icu import strcmp
|
||||||
|
|
||||||
class Book(Metadata):
|
class Book(Metadata):
|
||||||
def __init__(self, prefix, lpath, size=None, other=None):
|
def __init__(self, prefix, lpath, size=None, other=None):
|
||||||
@ -215,14 +216,17 @@ class CollectionsBookList(BookList):
|
|||||||
elif is_series:
|
elif is_series:
|
||||||
if doing_dc:
|
if doing_dc:
|
||||||
collections[cat_name][lpath] = \
|
collections[cat_name][lpath] = \
|
||||||
(book, book.get('series_index', sys.maxint), '')
|
(book, book.get('series_index', sys.maxint),
|
||||||
|
book.get('title_sort', 'zzzz'))
|
||||||
else:
|
else:
|
||||||
collections[cat_name][lpath] = \
|
collections[cat_name][lpath] = \
|
||||||
(book, book.get(attr+'_index', sys.maxint), '')
|
(book, book.get(attr+'_index', sys.maxint),
|
||||||
|
book.get('title_sort', 'zzzz'))
|
||||||
else:
|
else:
|
||||||
if lpath not in collections[cat_name]:
|
if lpath not in collections[cat_name]:
|
||||||
collections[cat_name][lpath] = \
|
collections[cat_name][lpath] = \
|
||||||
(book, book.get('title_sort', 'zzzz'), '')
|
(book, book.get('title_sort', 'zzzz'),
|
||||||
|
book.get('title_sort', 'zzzz'))
|
||||||
# Sort collections
|
# Sort collections
|
||||||
result = {}
|
result = {}
|
||||||
|
|
||||||
@ -230,14 +234,19 @@ class CollectionsBookList(BookList):
|
|||||||
x = xx[1]
|
x = xx[1]
|
||||||
y = yy[1]
|
y = yy[1]
|
||||||
if x is None and y is None:
|
if x is None and y is None:
|
||||||
|
# No sort_key needed here, because defaults are ascii
|
||||||
return cmp(xx[2], yy[2])
|
return cmp(xx[2], yy[2])
|
||||||
if x is None:
|
if x is None:
|
||||||
return 1
|
return 1
|
||||||
if y is None:
|
if y is None:
|
||||||
return -1
|
return -1
|
||||||
|
if isinstance(x, basestring) and isinstance(y, basestring):
|
||||||
|
c = strcmp(force_unicode(x), force_unicode(y))
|
||||||
|
else:
|
||||||
c = cmp(x, y)
|
c = cmp(x, y)
|
||||||
if c != 0:
|
if c != 0:
|
||||||
return c
|
return c
|
||||||
|
# same as above -- no sort_key needed here
|
||||||
return cmp(xx[2], yy[2])
|
return cmp(xx[2], yy[2])
|
||||||
|
|
||||||
for category, lpaths in collections.items():
|
for category, lpaths in collections.items():
|
||||||
|
@ -142,6 +142,9 @@ class EPUBOutput(OutputFormatPlugin):
|
|||||||
def convert(self, oeb, output_path, input_plugin, opts, log):
|
def convert(self, oeb, output_path, input_plugin, opts, log):
|
||||||
self.log, self.opts, self.oeb = log, opts, oeb
|
self.log, self.opts, self.oeb = log, opts, oeb
|
||||||
|
|
||||||
|
#from calibre.ebooks.oeb.transforms.filenames import UniqueFilenames
|
||||||
|
#UniqueFilenames()(oeb, opts)
|
||||||
|
|
||||||
self.workaround_ade_quirks()
|
self.workaround_ade_quirks()
|
||||||
self.workaround_webkit_quirks()
|
self.workaround_webkit_quirks()
|
||||||
self.upshift_markup()
|
self.upshift_markup()
|
||||||
|
@ -6,9 +6,11 @@ __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
|||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
from uuid import uuid4
|
from uuid import uuid4
|
||||||
|
import time
|
||||||
|
|
||||||
from calibre.constants import __appname__, __version__
|
from calibre.constants import __appname__, __version__
|
||||||
from calibre import strftime, prepare_string_for_xml as xml
|
from calibre import strftime, prepare_string_for_xml as xml
|
||||||
|
from calibre.utils.date import parse_date
|
||||||
|
|
||||||
SONY_METADATA = u'''\
|
SONY_METADATA = u'''\
|
||||||
<?xml version="1.0" encoding="utf-8"?>
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
@ -87,7 +89,8 @@ def sony_metadata(oeb):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
try:
|
try:
|
||||||
date = unicode(m.date[0]).split('T')[0]
|
date = parse_date(unicode(m.date[0]),
|
||||||
|
as_utc=False).strftime('%Y-%m-%d')
|
||||||
except:
|
except:
|
||||||
date = strftime('%Y-%m-%d')
|
date = strftime('%Y-%m-%d')
|
||||||
try:
|
try:
|
||||||
@ -101,7 +104,7 @@ def sony_metadata(oeb):
|
|||||||
publisher=xml(publisher), issue_date=xml(date),
|
publisher=xml(publisher), issue_date=xml(date),
|
||||||
language=xml(language))
|
language=xml(language))
|
||||||
|
|
||||||
updated = strftime('%Y-%m-%dT%H:%M:%SZ')
|
updated = strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime())
|
||||||
|
|
||||||
def cal_id(x):
|
def cal_id(x):
|
||||||
for k, v in x.attrib.items():
|
for k, v in x.attrib.items():
|
||||||
|
@ -8,15 +8,11 @@ __docformat__ = 'restructuredtext en'
|
|||||||
Transform OEB content into FB2 markup
|
Transform OEB content into FB2 markup
|
||||||
'''
|
'''
|
||||||
|
|
||||||
import cStringIO
|
|
||||||
from base64 import b64encode
|
from base64 import b64encode
|
||||||
|
from datetime import datetime
|
||||||
|
from mimetypes import types_map
|
||||||
import re
|
import re
|
||||||
|
import uuid
|
||||||
try:
|
|
||||||
from PIL import Image
|
|
||||||
Image
|
|
||||||
except ImportError:
|
|
||||||
import Image
|
|
||||||
|
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
|
|
||||||
@ -24,41 +20,13 @@ from calibre import prepare_string_for_xml
|
|||||||
from calibre.constants import __appname__, __version__
|
from calibre.constants import __appname__, __version__
|
||||||
from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace
|
from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace
|
||||||
from calibre.ebooks.oeb.stylizer import Stylizer
|
from calibre.ebooks.oeb.stylizer import Stylizer
|
||||||
from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES
|
from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES, OPF
|
||||||
|
from calibre.utils.magick import Image
|
||||||
TAG_MAP = {
|
|
||||||
'b' : 'strong',
|
|
||||||
'i' : 'emphasis',
|
|
||||||
'p' : 'p',
|
|
||||||
'li' : 'p',
|
|
||||||
'div': 'p',
|
|
||||||
'br' : 'p',
|
|
||||||
}
|
|
||||||
|
|
||||||
TAG_SPACE = []
|
|
||||||
|
|
||||||
TAG_IMAGES = [
|
|
||||||
'img',
|
|
||||||
]
|
|
||||||
|
|
||||||
TAG_LINKS = [
|
|
||||||
'a',
|
|
||||||
]
|
|
||||||
|
|
||||||
BLOCK = [
|
|
||||||
'p',
|
|
||||||
]
|
|
||||||
|
|
||||||
STYLES = [
|
|
||||||
('font-weight', {'bold' : 'strong', 'bolder' : 'strong'}),
|
|
||||||
('font-style', {'italic' : 'emphasis'}),
|
|
||||||
]
|
|
||||||
|
|
||||||
class FB2MLizer(object):
|
class FB2MLizer(object):
|
||||||
'''
|
'''
|
||||||
Todo: * Ensure all style tags are inside of the p tags.
|
Todo: * Include more FB2 specific tags in the conversion.
|
||||||
* Include more FB2 specific tags in the conversion.
|
* Handle a tags.
|
||||||
* Handle reopening of a tag properly.
|
|
||||||
* Figure out some way to turn oeb_book.toc items into <section><title>
|
* Figure out some way to turn oeb_book.toc items into <section><title>
|
||||||
<p> to allow for readers to generate toc from the document.
|
<p> to allow for readers to generate toc from the document.
|
||||||
'''
|
'''
|
||||||
@ -66,140 +34,136 @@ class FB2MLizer(object):
|
|||||||
def __init__(self, log):
|
def __init__(self, log):
|
||||||
self.log = log
|
self.log = log
|
||||||
self.image_hrefs = {}
|
self.image_hrefs = {}
|
||||||
self.link_hrefs = {}
|
self.reset_state()
|
||||||
|
|
||||||
|
def reset_state(self):
|
||||||
|
# Used to ensure text and tags are always within <p> and </p>
|
||||||
|
self.in_p = False
|
||||||
|
# Mapping of image names. OEB allows for images to have the same name but be stored
|
||||||
|
# in different directories. FB2 images are all in a flat layout so we rename all images
|
||||||
|
# into a sequential numbering system to ensure there are no collisions between image names.
|
||||||
|
self.image_hrefs = {}
|
||||||
|
|
||||||
def extract_content(self, oeb_book, opts):
|
def extract_content(self, oeb_book, opts):
|
||||||
self.log.info('Converting XHTML to FB2 markup...')
|
self.log.info('Converting XHTML to FB2 markup...')
|
||||||
self.oeb_book = oeb_book
|
self.oeb_book = oeb_book
|
||||||
self.opts = opts
|
self.opts = opts
|
||||||
|
|
||||||
return self.fb2mlize_spine()
|
return self.fb2mlize_spine()
|
||||||
|
|
||||||
def fb2mlize_spine(self):
|
def fb2mlize_spine(self):
|
||||||
self.image_hrefs = {}
|
self.reset_state()
|
||||||
self.link_hrefs = {}
|
|
||||||
output = [self.fb2_header()]
|
output = [self.fb2_header()]
|
||||||
output.append(self.get_cover_page())
|
|
||||||
output.append(u'ghji87yhjko0Caliblre-toc-placeholder-for-insertion-later8ujko0987yjk')
|
|
||||||
output.append(self.get_text())
|
output.append(self.get_text())
|
||||||
output.append(self.fb2_body_footer())
|
|
||||||
output.append(self.fb2mlize_images())
|
output.append(self.fb2mlize_images())
|
||||||
output.append(self.fb2_footer())
|
output.append(self.fb2_footer())
|
||||||
output = ''.join(output).replace(u'ghji87yhjko0Caliblre-toc-placeholder-for-insertion-later8ujko0987yjk', self.get_toc())
|
output = self.clean_text(u''.join(output))
|
||||||
output = self.clean_text(output)
|
|
||||||
if self.opts.sectionize_chapters:
|
if self.opts.pretty_print:
|
||||||
output = self.sectionize_chapters(output)
|
|
||||||
return u'<?xml version="1.0" encoding="UTF-8"?>\n%s' % etree.tostring(etree.fromstring(output), encoding=unicode, pretty_print=True)
|
return u'<?xml version="1.0" encoding="UTF-8"?>\n%s' % etree.tostring(etree.fromstring(output), encoding=unicode, pretty_print=True)
|
||||||
|
else:
|
||||||
|
return u'<?xml version="1.0" encoding="UTF-8"?>' + output
|
||||||
|
|
||||||
def clean_text(self, text):
|
def clean_text(self, text):
|
||||||
|
text = re.sub(r'(?miu)<section>\s*</section>', '', text)
|
||||||
|
text = re.sub(r'(?miu)\s+</section>', '</section>', text)
|
||||||
|
text = re.sub(r'(?miu)</section><section>', '</section>\n\n<section>', text)
|
||||||
|
|
||||||
text = re.sub(r'(?miu)<p>\s*</p>', '', text)
|
text = re.sub(r'(?miu)<p>\s*</p>', '', text)
|
||||||
text = re.sub(r'(?miu)\s+</p>', '</p>', text)
|
text = re.sub(r'(?miu)\s+</p>', '</p>', text)
|
||||||
text = re.sub(r'(?miu)</p><p>', '</p>\n\n<p>', text)
|
text = re.sub(r'(?miu)</p><p>', '</p>\n\n<p>', text)
|
||||||
return text
|
return text
|
||||||
|
|
||||||
def fb2_header(self):
|
def fb2_header(self):
|
||||||
author_first = u''
|
metadata = {}
|
||||||
author_middle = u''
|
metadata['author_first'] = u''
|
||||||
author_last = u''
|
metadata['author_middle'] = u''
|
||||||
|
metadata['author_last'] = u''
|
||||||
|
metadata['title'] = self.oeb_book.metadata.title[0].value
|
||||||
|
metadata['appname'] = __appname__
|
||||||
|
metadata['version'] = __version__
|
||||||
|
metadata['date'] = '%i.%i.%i' % (datetime.now().day, datetime.now().month, datetime.now().year)
|
||||||
|
metadata['lang'] = u''.join(self.oeb_book.metadata.lang) if self.oeb_book.metadata.lang else 'en'
|
||||||
|
metadata['id'] = None
|
||||||
|
|
||||||
author_parts = self.oeb_book.metadata.creator[0].value.split(' ')
|
author_parts = self.oeb_book.metadata.creator[0].value.split(' ')
|
||||||
|
|
||||||
if len(author_parts) == 1:
|
if len(author_parts) == 1:
|
||||||
author_last = author_parts[0]
|
metadata['author_last'] = author_parts[0]
|
||||||
elif len(author_parts) == 2:
|
elif len(author_parts) == 2:
|
||||||
author_first = author_parts[0]
|
metadata['author_first'] = author_parts[0]
|
||||||
author_last = author_parts[1]
|
metadata['author_last'] = author_parts[1]
|
||||||
else:
|
else:
|
||||||
author_first = author_parts[0]
|
metadata['author_first'] = author_parts[0]
|
||||||
author_middle = ' '.join(author_parts[1:-2])
|
metadata['author_middle'] = ' '.join(author_parts[1:-2])
|
||||||
author_last = author_parts[-1]
|
metadata['author_last'] = author_parts[-1]
|
||||||
|
|
||||||
return u'<FictionBook xmlns:xlink="http://www.w3.org/1999/xlink" ' \
|
identifiers = self.oeb_book.metadata['identifier']
|
||||||
'xmlns="http://www.gribuser.ru/xml/fictionbook/2.0">\n' \
|
for x in identifiers:
|
||||||
'<description>\n<title-info>\n ' \
|
if x.get(OPF('scheme'), None).lower() == 'uuid' or unicode(x).startswith('urn:uuid:'):
|
||||||
'<author>\n<first-name>%s</first-name>\n<middle-name>%s' \
|
metadata['id'] = unicode(x).split(':')[-1]
|
||||||
'</middle-name>\n<last-name>%s</last-name>\n</author>\n' \
|
break
|
||||||
'<book-title>%s</book-title> ' \
|
if metadata['id'] is None:
|
||||||
'</title-info><document-info> ' \
|
self.log.warn('No UUID identifier found')
|
||||||
'<program-used>%s - %s</program-used></document-info>\n' \
|
metadata['id'] = str(uuid.uuid4())
|
||||||
'</description>\n<body>\n<section>' % tuple(map(prepare_string_for_xml,
|
|
||||||
(author_first, author_middle,
|
|
||||||
author_last, self.oeb_book.metadata.title[0].value,
|
|
||||||
__appname__, __version__)))
|
|
||||||
|
|
||||||
def get_cover_page(self):
|
for key, value in metadata.items():
|
||||||
output = u''
|
metadata[key] = prepare_string_for_xml(value)
|
||||||
if 'cover' in self.oeb_book.guide:
|
|
||||||
output += '<image xlink:href="#cover.jpg" />'
|
|
||||||
self.image_hrefs[self.oeb_book.guide['cover'].href] = 'cover.jpg'
|
|
||||||
if 'titlepage' in self.oeb_book.guide:
|
|
||||||
self.log.debug('Generating cover page...')
|
|
||||||
href = self.oeb_book.guide['titlepage'].href
|
|
||||||
item = self.oeb_book.manifest.hrefs[href]
|
|
||||||
if item.spine_position is None:
|
|
||||||
stylizer = Stylizer(item.data, item.href, self.oeb_book,
|
|
||||||
self.opts, self.opts.output_profile)
|
|
||||||
output += ''.join(self.dump_text(item.data.find(XHTML('body')), stylizer, item))
|
|
||||||
return output
|
|
||||||
|
|
||||||
def get_toc(self):
|
return u'<FictionBook xmlns="http://www.gribuser.ru/xml/fictionbook/2.0" xmlns:xlink="http://www.w3.org/1999/xlink">' \
|
||||||
toc = []
|
'<description>' \
|
||||||
if self.opts.inline_toc:
|
'<title-info>' \
|
||||||
self.log.debug('Generating table of contents...')
|
'<genre>antique</genre>' \
|
||||||
toc.append(u'<p>%s</p>' % _('Table of Contents:'))
|
'<author>' \
|
||||||
for item in self.oeb_book.toc:
|
'<first-name>%(author_first)s</first-name>' \
|
||||||
if item.href in self.link_hrefs.keys():
|
'<middle-name>%(author_middle)s</middle-name>' \
|
||||||
toc.append('<p><a xlink:href="#%s">%s</a></p>\n' % (self.link_hrefs[item.href], item.title))
|
'<last-name>%(author_last)s</last-name>' \
|
||||||
else:
|
'</author>' \
|
||||||
self.oeb.warn('Ignoring toc item: %s not found in document.' % item)
|
'<book-title>%(title)s</book-title>' \
|
||||||
return ''.join(toc)
|
'<lang>%(lang)s</lang>' \
|
||||||
|
'</title-info>' \
|
||||||
def sectionize_chapters(self, text):
|
'<document-info>' \
|
||||||
def remove_p(t):
|
'<author>' \
|
||||||
t = t.replace('<p>', '')
|
'<first-name></first-name>' \
|
||||||
t = t.replace('</p>', '')
|
'<middle-name></middle-name>' \
|
||||||
return t
|
'<last-name></last-name>' \
|
||||||
text = re.sub(r'(?imsu)(<p>)\s*(?P<anchor><a\s+id="calibre_link-\d+"\s*/>)\s*(</p>)\s*(<p>)\s*(?P<strong><strong>.+?</strong>)\s*(</p>)', lambda mo: '</section><section>%s<title><p>%s</p></title>' % (mo.group('anchor'), remove_p(mo.group('strong'))), text)
|
'</author>' \
|
||||||
text = re.sub(r'(?imsu)(<p>)\s*(?P<anchor><a\s+id="calibre_link-\d+"\s*/>)\s*(</p>)\s*(?P<strong><strong>.+?</strong>)', lambda mo: '</section><section>%s<title><p>%s</p></title>' % (mo.group('anchor'), remove_p(mo.group('strong'))), text)
|
'<program-used>%(appname)s %(version)s</program-used>' \
|
||||||
text = re.sub(r'(?imsu)(?P<anchor><a\s+id="calibre_link-\d+"\s*/>)\s*(<p>)\s*(?P<strong><strong>.+?</strong>)\s*(</p>)', lambda mo: '</section><section>%s<title><p>%s</p></title>' % (mo.group('anchor'), remove_p(mo.group('strong'))), text)
|
'<date>%(date)s</date>' \
|
||||||
text = re.sub(r'(?imsu)(<p>)\s*(?P<anchor><a\s+id="calibre_link-\d+"\s*/>)\s*(?P<strong><strong>.+?</strong>)\s*(</p>)', lambda mo: '</section><section>%s<title><p>%s</p></title>' % (mo.group('anchor'), remove_p(mo.group('strong'))), text)
|
'<id>%(id)s</id>' \
|
||||||
text = re.sub(r'(?imsu)(?P<anchor><a\s+id="calibre_link-\d+"\s*/>)\s*(?P<strong><strong>.+?</strong>)', lambda mo: '</section><section>%s<title><p>%s</p></title>' % (mo.group('anchor'), remove_p(mo.group('strong'))), text)
|
'<version>1.0</version>' \
|
||||||
return text
|
'</document-info>' \
|
||||||
|
'</description>' % metadata
|
||||||
def get_text(self):
|
|
||||||
text = []
|
|
||||||
for item in self.oeb_book.spine:
|
|
||||||
self.log.debug('Converting %s to FictionBook2 XML' % item.href)
|
|
||||||
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts, self.opts.output_profile)
|
|
||||||
text.append(self.add_page_anchor(item))
|
|
||||||
text += self.dump_text(item.data.find(XHTML('body')), stylizer, item)
|
|
||||||
return ''.join(text)
|
|
||||||
|
|
||||||
def fb2_body_footer(self):
|
|
||||||
return u'\n</section>\n</body>'
|
|
||||||
|
|
||||||
def fb2_footer(self):
|
def fb2_footer(self):
|
||||||
return u'</FictionBook>'
|
return u'</FictionBook>'
|
||||||
|
|
||||||
def add_page_anchor(self, page):
|
def get_text(self):
|
||||||
return self.get_anchor(page, '')
|
text = ['<body>']
|
||||||
|
for item in self.oeb_book.spine:
|
||||||
def get_anchor(self, page, aid):
|
self.log.debug('Converting %s to FictionBook2 XML' % item.href)
|
||||||
aid = prepare_string_for_xml(aid)
|
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts, self.opts.output_profile)
|
||||||
aid = '%s#%s' % (page.href, aid)
|
text.append('<section>')
|
||||||
if aid not in self.link_hrefs.keys():
|
text += self.dump_text(item.data.find(XHTML('body')), stylizer, item)
|
||||||
self.link_hrefs[aid] = 'calibre_link-%s' % len(self.link_hrefs.keys())
|
text.append('</section>')
|
||||||
aid = self.link_hrefs[aid]
|
return ''.join(text) + '</body>'
|
||||||
return '<a id="%s" />' % aid
|
|
||||||
|
|
||||||
def fb2mlize_images(self):
|
def fb2mlize_images(self):
|
||||||
|
'''
|
||||||
|
This function uses the self.image_hrefs dictionary mapping. It is populated by the dump_text function.
|
||||||
|
'''
|
||||||
images = []
|
images = []
|
||||||
for item in self.oeb_book.manifest:
|
for item in self.oeb_book.manifest:
|
||||||
|
# Don't write the image if it's not referenced in the document's text.
|
||||||
|
if item.href not in self.image_hrefs:
|
||||||
|
continue
|
||||||
if item.media_type in OEB_RASTER_IMAGES:
|
if item.media_type in OEB_RASTER_IMAGES:
|
||||||
try:
|
try:
|
||||||
im = Image.open(cStringIO.StringIO(item.data)).convert('RGB')
|
if not item.media_type == types_map['.jpeg'] or not item.media_type == types_map['.jpg']:
|
||||||
data = cStringIO.StringIO()
|
im = Image()
|
||||||
im.save(data, 'JPEG')
|
im.load(item.data)
|
||||||
data = data.getvalue()
|
im.set_compression_quality(70)
|
||||||
|
data = im.export('jpg')
|
||||||
raw_data = b64encode(data)
|
raw_data = b64encode(data)
|
||||||
# Don't put the encoded image on a single line.
|
# Don't put the encoded image on a single line.
|
||||||
data = ''
|
data = ''
|
||||||
@ -210,110 +174,167 @@ class FB2MLizer(object):
|
|||||||
col = 1
|
col = 1
|
||||||
col += 1
|
col += 1
|
||||||
data += char
|
data += char
|
||||||
images.append('<binary id="%s" content-type="%s">%s\n</binary>' % (self.image_hrefs.get(item.href, '0000.JPEG'), item.media_type, data))
|
images.append('<binary id="%s" content-type="image/jpeg">%s\n</binary>' % (self.image_hrefs[item.href], data))
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.log.error('Error: Could not include file %s becuase ' \
|
self.log.error('Error: Could not include file %s because ' \
|
||||||
'%s.' % (item.href, e))
|
'%s.' % (item.href, e))
|
||||||
return ''.join(images)
|
return ''.join(images)
|
||||||
|
|
||||||
def dump_text(self, elem, stylizer, page, tag_stack=[]):
|
def ensure_p(self):
|
||||||
if not isinstance(elem.tag, basestring) \
|
if self.in_p:
|
||||||
or namespace(elem.tag) != XHTML_NS:
|
return [], []
|
||||||
return []
|
|
||||||
|
|
||||||
style = stylizer.style(elem)
|
|
||||||
if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') \
|
|
||||||
or style['visibility'] == 'hidden':
|
|
||||||
return []
|
|
||||||
|
|
||||||
fb2_text = []
|
|
||||||
tags = []
|
|
||||||
|
|
||||||
tag = barename(elem.tag)
|
|
||||||
|
|
||||||
if tag in TAG_IMAGES:
|
|
||||||
if elem.attrib.get('src', None):
|
|
||||||
if page.abshref(elem.attrib['src']) not in self.image_hrefs.keys():
|
|
||||||
self.image_hrefs[page.abshref(elem.attrib['src'])] = '%s.jpg' % len(self.image_hrefs.keys())
|
|
||||||
fb2_text.append('<image xlink:href="#%s" />' % self.image_hrefs[page.abshref(elem.attrib['src'])])
|
|
||||||
|
|
||||||
if tag in TAG_LINKS:
|
|
||||||
href = elem.get('href')
|
|
||||||
if href:
|
|
||||||
href = prepare_string_for_xml(page.abshref(href))
|
|
||||||
href = href.replace('"', '"')
|
|
||||||
if '://' in href:
|
|
||||||
fb2_text.append('<a xlink:href="%s">' % href)
|
|
||||||
else:
|
else:
|
||||||
if href.startswith('#'):
|
self.in_p = True
|
||||||
href = href[1:]
|
return ['<p>'], ['p']
|
||||||
if href not in self.link_hrefs.keys():
|
|
||||||
self.link_hrefs[href] = 'calibre_link-%s' % len(self.link_hrefs.keys())
|
|
||||||
href = self.link_hrefs[href]
|
|
||||||
fb2_text.append('<a xlink:href="#%s">' % href)
|
|
||||||
tags.append('a')
|
|
||||||
|
|
||||||
# Anchor ids
|
def close_open_p(self, tags):
|
||||||
id_name = elem.get('id')
|
text = ['']
|
||||||
if id_name:
|
added_p = False
|
||||||
fb2_text.append(self.get_anchor(page, id_name))
|
|
||||||
|
|
||||||
fb2_tag = TAG_MAP.get(tag, None)
|
if self.in_p:
|
||||||
if fb2_tag == 'p':
|
|
||||||
if 'p' in tag_stack+tags:
|
|
||||||
# Close all up to p. Close p. Reopen all closed tags including p.
|
# Close all up to p. Close p. Reopen all closed tags including p.
|
||||||
all_tags = tag_stack+tags
|
|
||||||
closed_tags = []
|
closed_tags = []
|
||||||
all_tags.reverse()
|
tags.reverse()
|
||||||
for t in all_tags:
|
for t in tags:
|
||||||
fb2_text.append('</%s>' % t)
|
text.append('</%s>' % t)
|
||||||
closed_tags.append(t)
|
closed_tags.append(t)
|
||||||
if t == 'p':
|
if t == 'p':
|
||||||
break
|
break
|
||||||
closed_tags.reverse()
|
closed_tags.reverse()
|
||||||
for t in closed_tags:
|
for t in closed_tags:
|
||||||
fb2_text.append('<%s>' % t)
|
text.append('<%s>' % t)
|
||||||
else:
|
else:
|
||||||
fb2_text.append('<p>')
|
text.append('<p>')
|
||||||
|
added_p = True
|
||||||
|
self.in_p = True
|
||||||
|
|
||||||
|
return text, added_p
|
||||||
|
|
||||||
|
def handle_simple_tag(self, tag, tags):
|
||||||
|
s_out = []
|
||||||
|
s_tags = []
|
||||||
|
if tag not in tags:
|
||||||
|
p_out, p_tags = self.ensure_p()
|
||||||
|
s_out += p_out
|
||||||
|
s_tags += p_tags
|
||||||
|
s_out.append('<%s>' % tag)
|
||||||
|
s_tags.append(tag)
|
||||||
|
return s_out, s_tags
|
||||||
|
|
||||||
|
def dump_text(self, elem_tree, stylizer, page, tag_stack=[]):
|
||||||
|
'''
|
||||||
|
This function is intended to be used in a recursive manner. dump_text will
|
||||||
|
run though all elements in the elem_tree and call itself on each element.
|
||||||
|
|
||||||
|
self.image_hrefs will be populated by calling this function.
|
||||||
|
|
||||||
|
@param elem_tree: etree representation of XHTML content to be transformed.
|
||||||
|
@param stylizer: Used to track the style of elements within the tree.
|
||||||
|
@param page: OEB page used to determine absolute urls.
|
||||||
|
@param tag_stack: List of open FB2 tags to take into account.
|
||||||
|
|
||||||
|
@return: List of string representing the XHTML converted to FB2 markup.
|
||||||
|
'''
|
||||||
|
# Ensure what we are converting is not a string and that the fist tag is part of the XHTML namespace.
|
||||||
|
if not isinstance(elem_tree.tag, basestring) or namespace(elem_tree.tag) != XHTML_NS:
|
||||||
|
return []
|
||||||
|
|
||||||
|
style = stylizer.style(elem_tree)
|
||||||
|
if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') or style['visibility'] == 'hidden':
|
||||||
|
return []
|
||||||
|
|
||||||
|
# FB2 generated output.
|
||||||
|
fb2_out = []
|
||||||
|
# FB2 tags in the order they are opened. This will be used to close the tags.
|
||||||
|
tags = []
|
||||||
|
# First tag in tree
|
||||||
|
tag = barename(elem_tree.tag)
|
||||||
|
|
||||||
|
# Process the XHTML tag if it needs to be converted to an FB2 tag.
|
||||||
|
if tag == 'h1' and self.opts.h1_to_title or tag == 'h2' and self.opts.h2_to_title or tag == 'h3' and self.opts.h3_to_title:
|
||||||
|
fb2_out.append('<title>')
|
||||||
|
tags.append('title')
|
||||||
|
if tag == 'img':
|
||||||
|
if elem_tree.attrib.get('src', None):
|
||||||
|
# Only write the image tag if it is in the manifest.
|
||||||
|
if page.abshref(elem_tree.attrib['src']) in self.oeb_book.manifest.hrefs.keys():
|
||||||
|
if page.abshref(elem_tree.attrib['src']) not in self.image_hrefs.keys():
|
||||||
|
self.image_hrefs[page.abshref(elem_tree.attrib['src'])] = '_%s.jpg' % len(self.image_hrefs.keys())
|
||||||
|
p_txt, p_tag = self.ensure_p()
|
||||||
|
fb2_out += p_txt
|
||||||
|
tags += p_tag
|
||||||
|
fb2_out.append('<image xlink:href="#%s" />' % self.image_hrefs[page.abshref(elem_tree.attrib['src'])])
|
||||||
|
elif tag == 'br':
|
||||||
|
if self.in_p:
|
||||||
|
closed_tags = []
|
||||||
|
open_tags = tag_stack+tags
|
||||||
|
open_tags.reverse()
|
||||||
|
for t in open_tags:
|
||||||
|
fb2_out.append('</%s>' % t)
|
||||||
|
closed_tags.append(t)
|
||||||
|
if t == 'p':
|
||||||
|
break
|
||||||
|
fb2_out.append('<empty-line />')
|
||||||
|
closed_tags.reverse()
|
||||||
|
for t in closed_tags:
|
||||||
|
fb2_out.append('<%s>' % t)
|
||||||
|
else:
|
||||||
|
fb2_out.append('<empty-line />')
|
||||||
|
elif tag in ('div', 'li', 'p'):
|
||||||
|
p_text, added_p = self.close_open_p(tag_stack+tags)
|
||||||
|
fb2_out += p_text
|
||||||
|
if added_p:
|
||||||
tags.append('p')
|
tags.append('p')
|
||||||
elif fb2_tag and fb2_tag not in tag_stack+tags:
|
elif tag == 'b':
|
||||||
fb2_text.append('<%s>' % fb2_tag)
|
s_out, s_tags = self.handle_simple_tag('strong', tag_stack+tags)
|
||||||
tags.append(fb2_tag)
|
fb2_out += s_out
|
||||||
|
tags += s_tags
|
||||||
|
elif tag == 'i':
|
||||||
|
s_out, s_tags = self.handle_simple_tag('emphasis', tag_stack+tags)
|
||||||
|
fb2_out += s_out
|
||||||
|
tags += s_tags
|
||||||
|
|
||||||
# Processes style information
|
# Processes style information.
|
||||||
for s in STYLES:
|
if style['font-style'] == 'italic':
|
||||||
style_tag = s[1].get(style[s[0]], None)
|
s_out, s_tags = self.handle_simple_tag('emphasis', tag_stack+tags)
|
||||||
if style_tag and style_tag not in tag_stack+tags:
|
fb2_out += s_out
|
||||||
fb2_text.append('<%s>' % style_tag)
|
tags += s_tags
|
||||||
tags.append(style_tag)
|
elif style['font-weight'] in ('bold', 'bolder'):
|
||||||
|
s_out, s_tags = self.handle_simple_tag('strong', tag_stack+tags)
|
||||||
|
fb2_out += s_out
|
||||||
|
tags += s_tags
|
||||||
|
|
||||||
if tag in TAG_SPACE:
|
# Process element text.
|
||||||
if not fb2_text or fb2_text[-1] != ' ' or not fb2_text[-1].endswith(' '):
|
if hasattr(elem_tree, 'text') and elem_tree.text:
|
||||||
fb2_text.append(' ')
|
if not self.in_p:
|
||||||
|
fb2_out.append('<p>')
|
||||||
|
fb2_out.append(prepare_string_for_xml(elem_tree.text))
|
||||||
|
if not self.in_p:
|
||||||
|
fb2_out.append('</p>')
|
||||||
|
|
||||||
if hasattr(elem, 'text') and elem.text:
|
# Process sub-elements.
|
||||||
if 'p' not in tag_stack+tags:
|
for item in elem_tree:
|
||||||
fb2_text.append('<p>%s</p>' % prepare_string_for_xml(elem.text))
|
fb2_out += self.dump_text(item, stylizer, page, tag_stack+tags)
|
||||||
else:
|
|
||||||
fb2_text.append(prepare_string_for_xml(elem.text))
|
|
||||||
|
|
||||||
for item in elem:
|
|
||||||
fb2_text += self.dump_text(item, stylizer, page, tag_stack+tags)
|
|
||||||
|
|
||||||
|
# Close open FB2 tags.
|
||||||
tags.reverse()
|
tags.reverse()
|
||||||
fb2_text += self.close_tags(tags)
|
fb2_out += self.close_tags(tags)
|
||||||
|
|
||||||
if hasattr(elem, 'tail') and elem.tail:
|
# Process element text that comes after the close of the XHTML tag but before the next XHTML tag.
|
||||||
if 'p' not in tag_stack:
|
if hasattr(elem_tree, 'tail') and elem_tree.tail:
|
||||||
fb2_text.append('<p>%s</p>' % prepare_string_for_xml(elem.tail))
|
if not self.in_p:
|
||||||
else:
|
fb2_out.append('<p>')
|
||||||
fb2_text.append(prepare_string_for_xml(elem.tail))
|
fb2_out.append(prepare_string_for_xml(elem_tree.tail))
|
||||||
|
if not self.in_p:
|
||||||
|
fb2_out.append('</p>')
|
||||||
|
|
||||||
return fb2_text
|
return fb2_out
|
||||||
|
|
||||||
def close_tags(self, tags):
|
def close_tags(self, tags):
|
||||||
text = []
|
text = []
|
||||||
for tag in tags:
|
for tag in tags:
|
||||||
text.append('</%s>' % tag)
|
text.append('</%s>' % tag)
|
||||||
|
if tag == 'p':
|
||||||
|
self.in_p = False
|
||||||
|
|
||||||
return text
|
return text
|
||||||
|
@ -16,19 +16,27 @@ class FB2Output(OutputFormatPlugin):
|
|||||||
file_type = 'fb2'
|
file_type = 'fb2'
|
||||||
|
|
||||||
options = set([
|
options = set([
|
||||||
OptionRecommendation(name='inline_toc',
|
OptionRecommendation(name='h1_to_title',
|
||||||
recommended_value=False, level=OptionRecommendation.LOW,
|
recommended_value=False, level=OptionRecommendation.LOW,
|
||||||
help=_('Add Table of Contents to beginning of the book.')),
|
help=_('Wrap all h1 tags with fb2 title elements.')),
|
||||||
OptionRecommendation(name='sectionize_chapters',
|
OptionRecommendation(name='h2_to_title',
|
||||||
recommended_value=False, level=OptionRecommendation.LOW,
|
recommended_value=False, level=OptionRecommendation.LOW,
|
||||||
help=_('Try to turn chapters into individual sections. ' \
|
help=_('Wrap all h2 tags with fb2 title elements.')),
|
||||||
'WARNING: ' \
|
OptionRecommendation(name='h3_to_title',
|
||||||
'This option is experimental. It can cause conversion ' \
|
recommended_value=False, level=OptionRecommendation.LOW,
|
||||||
'to fail. It can also produce unexpected output.')),
|
help=_('Wrap all h3 tags with fb2 title elements.')),
|
||||||
])
|
])
|
||||||
|
|
||||||
def convert(self, oeb_book, output_path, input_plugin, opts, log):
|
def convert(self, oeb_book, output_path, input_plugin, opts, log):
|
||||||
from calibre.ebooks.oeb.transforms.jacket import linearize_jacket
|
from calibre.ebooks.oeb.transforms.jacket import linearize_jacket
|
||||||
|
from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer, Unavailable
|
||||||
|
|
||||||
|
try:
|
||||||
|
rasterizer = SVGRasterizer()
|
||||||
|
rasterizer(oeb_book, opts)
|
||||||
|
except Unavailable:
|
||||||
|
self.log.warn('SVG rasterizer unavailable, SVG will not be converted')
|
||||||
|
|
||||||
linearize_jacket(oeb_book)
|
linearize_jacket(oeb_book)
|
||||||
|
|
||||||
fb2mlizer = FB2MLizer(log)
|
fb2mlizer = FB2MLizer(log)
|
||||||
|
@ -18,9 +18,10 @@ def extract_alphanumeric(in_str=None):
|
|||||||
"""
|
"""
|
||||||
# I'm sure this is really inefficient and
|
# I'm sure this is really inefficient and
|
||||||
# could be done with a lambda/map()
|
# could be done with a lambda/map()
|
||||||
#x.strip().title().replace(' ', "")
|
#x.strip(). title().replace(' ', "")
|
||||||
out_str=[]
|
out_str=[]
|
||||||
for x in in_str.title():
|
for x in in_str:
|
||||||
|
x = icu_title(x)
|
||||||
if x.isalnum(): out_str.append(x)
|
if x.isalnum(): out_str.append(x)
|
||||||
return ''.join(out_str)
|
return ''.join(out_str)
|
||||||
|
|
||||||
|
516
src/calibre/ebooks/metadata/amazonfr.py
Normal file
516
src/calibre/ebooks/metadata/amazonfr.py
Normal file
@ -0,0 +1,516 @@
|
|||||||
|
from __future__ import with_statement
|
||||||
|
__license__ = 'GPL 3'
|
||||||
|
__copyright__ = '2010, sengian <sengian1@gmail.com>'
|
||||||
|
|
||||||
|
import sys, textwrap, re, traceback
|
||||||
|
from urllib import urlencode
|
||||||
|
from math import ceil
|
||||||
|
|
||||||
|
from lxml import html
|
||||||
|
from lxml.html import soupparser
|
||||||
|
|
||||||
|
from calibre.utils.date import parse_date, utcnow, replace_months
|
||||||
|
from calibre.utils.cleantext import clean_ascii_chars
|
||||||
|
from calibre import browser, preferred_encoding
|
||||||
|
from calibre.ebooks.chardet import xml_to_unicode
|
||||||
|
from calibre.ebooks.metadata import MetaInformation, check_isbn, \
|
||||||
|
authors_to_sort_string
|
||||||
|
from calibre.ebooks.metadata.fetch import MetadataSource
|
||||||
|
from calibre.utils.config import OptionParser
|
||||||
|
from calibre.library.comments import sanitize_comments_html
|
||||||
|
|
||||||
|
|
||||||
|
class AmazonFr(MetadataSource):
|
||||||
|
|
||||||
|
name = 'Amazon French'
|
||||||
|
description = _('Downloads metadata from amazon.fr')
|
||||||
|
supported_platforms = ['windows', 'osx', 'linux']
|
||||||
|
author = 'Sengian'
|
||||||
|
version = (1, 0, 0)
|
||||||
|
has_html_comments = True
|
||||||
|
|
||||||
|
def fetch(self):
|
||||||
|
try:
|
||||||
|
self.results = search(self.title, self.book_author, self.publisher,
|
||||||
|
self.isbn, max_results=10, verbose=self.verbose, lang='fr')
|
||||||
|
except Exception, e:
|
||||||
|
self.exception = e
|
||||||
|
self.tb = traceback.format_exc()
|
||||||
|
|
||||||
|
class AmazonEs(MetadataSource):
|
||||||
|
|
||||||
|
name = 'Amazon Spanish'
|
||||||
|
description = _('Downloads metadata from amazon.com in spanish')
|
||||||
|
supported_platforms = ['windows', 'osx', 'linux']
|
||||||
|
author = 'Sengian'
|
||||||
|
version = (1, 0, 0)
|
||||||
|
has_html_comments = True
|
||||||
|
|
||||||
|
def fetch(self):
|
||||||
|
try:
|
||||||
|
self.results = search(self.title, self.book_author, self.publisher,
|
||||||
|
self.isbn, max_results=10, verbose=self.verbose, lang='es')
|
||||||
|
except Exception, e:
|
||||||
|
self.exception = e
|
||||||
|
self.tb = traceback.format_exc()
|
||||||
|
|
||||||
|
class AmazonEn(MetadataSource):
|
||||||
|
|
||||||
|
name = 'Amazon English'
|
||||||
|
description = _('Downloads metadata from amazon.com in english')
|
||||||
|
supported_platforms = ['windows', 'osx', 'linux']
|
||||||
|
author = 'Sengian'
|
||||||
|
version = (1, 0, 0)
|
||||||
|
has_html_comments = True
|
||||||
|
|
||||||
|
def fetch(self):
|
||||||
|
try:
|
||||||
|
self.results = search(self.title, self.book_author, self.publisher,
|
||||||
|
self.isbn, max_results=10, verbose=self.verbose, lang='en')
|
||||||
|
except Exception, e:
|
||||||
|
self.exception = e
|
||||||
|
self.tb = traceback.format_exc()
|
||||||
|
|
||||||
|
class AmazonDe(MetadataSource):
|
||||||
|
|
||||||
|
name = 'Amazon German'
|
||||||
|
description = _('Downloads metadata from amazon.de')
|
||||||
|
supported_platforms = ['windows', 'osx', 'linux']
|
||||||
|
author = 'Sengian'
|
||||||
|
version = (1, 0, 0)
|
||||||
|
has_html_comments = True
|
||||||
|
|
||||||
|
def fetch(self):
|
||||||
|
try:
|
||||||
|
self.results = search(self.title, self.book_author, self.publisher,
|
||||||
|
self.isbn, max_results=10, verbose=self.verbose, lang='de')
|
||||||
|
except Exception, e:
|
||||||
|
self.exception = e
|
||||||
|
self.tb = traceback.format_exc()
|
||||||
|
|
||||||
|
class Amazon(MetadataSource):
|
||||||
|
|
||||||
|
name = 'Amazon'
|
||||||
|
description = _('Downloads metadata from amazon.com')
|
||||||
|
supported_platforms = ['windows', 'osx', 'linux']
|
||||||
|
author = 'Kovid Goyal & Sengian'
|
||||||
|
version = (1, 1, 0)
|
||||||
|
has_html_comments = True
|
||||||
|
|
||||||
|
def fetch(self):
|
||||||
|
# if not self.site_customization:
|
||||||
|
# return
|
||||||
|
try:
|
||||||
|
self.results = search(self.title, self.book_author, self.publisher,
|
||||||
|
self.isbn, max_results=10, verbose=self.verbose, lang='all')
|
||||||
|
except Exception, e:
|
||||||
|
self.exception = e
|
||||||
|
self.tb = traceback.format_exc()
|
||||||
|
|
||||||
|
# @property
|
||||||
|
# def string_customization_help(self):
|
||||||
|
# return _('You can select here the language for metadata search with amazon.com')
|
||||||
|
|
||||||
|
|
||||||
|
def report(verbose):
|
||||||
|
if verbose:
|
||||||
|
traceback.print_exc()
|
||||||
|
|
||||||
|
|
||||||
|
class Query(object):
|
||||||
|
|
||||||
|
BASE_URL_ALL = 'http://www.amazon.com'
|
||||||
|
BASE_URL_FR = 'http://www.amazon.fr'
|
||||||
|
BASE_URL_DE = 'http://www.amazon.de'
|
||||||
|
|
||||||
|
def __init__(self, title=None, author=None, publisher=None, isbn=None, keywords=None,
|
||||||
|
max_results=20, rlang='all'):
|
||||||
|
assert not(title is None and author is None and publisher is None \
|
||||||
|
and isbn is None and keywords is None)
|
||||||
|
assert (max_results < 21)
|
||||||
|
|
||||||
|
self.max_results = int(max_results)
|
||||||
|
self.renbres = re.compile(u'\s*(\d+)\s*')
|
||||||
|
|
||||||
|
q = { 'search-alias' : 'stripbooks' ,
|
||||||
|
'unfiltered' : '1',
|
||||||
|
'field-keywords' : '',
|
||||||
|
'field-author' : '',
|
||||||
|
'field-title' : '',
|
||||||
|
'field-isbn' : '',
|
||||||
|
'field-publisher' : ''
|
||||||
|
#get to amazon detailed search page to get all options
|
||||||
|
# 'node' : '',
|
||||||
|
# 'field-binding' : '',
|
||||||
|
#before, during, after
|
||||||
|
# 'field-dateop' : '',
|
||||||
|
#month as number
|
||||||
|
# 'field-datemod' : '',
|
||||||
|
# 'field-dateyear' : '',
|
||||||
|
#french only
|
||||||
|
# 'field-collection' : '',
|
||||||
|
#many options available
|
||||||
|
}
|
||||||
|
|
||||||
|
if rlang =='all':
|
||||||
|
q['sort'] = 'relevanceexprank'
|
||||||
|
self.urldata = self.BASE_URL_ALL
|
||||||
|
elif rlang =='es':
|
||||||
|
q['sort'] = 'relevanceexprank'
|
||||||
|
q['field-language'] = 'Spanish'
|
||||||
|
self.urldata = self.BASE_URL_ALL
|
||||||
|
elif rlang =='en':
|
||||||
|
q['sort'] = 'relevanceexprank'
|
||||||
|
q['field-language'] = 'English'
|
||||||
|
self.urldata = self.BASE_URL_ALL
|
||||||
|
elif rlang =='fr':
|
||||||
|
q['sort'] = 'relevancerank'
|
||||||
|
self.urldata = self.BASE_URL_FR
|
||||||
|
elif rlang =='de':
|
||||||
|
q['sort'] = 'relevancerank'
|
||||||
|
self.urldata = self.BASE_URL_DE
|
||||||
|
self.baseurl = self.urldata
|
||||||
|
|
||||||
|
if isbn is not None:
|
||||||
|
q['field-isbn'] = isbn.replace('-', '')
|
||||||
|
else:
|
||||||
|
if title is not None:
|
||||||
|
q['field-title'] = title
|
||||||
|
if author is not None:
|
||||||
|
q['field-author'] = author
|
||||||
|
if publisher is not None:
|
||||||
|
q['field-publisher'] = publisher
|
||||||
|
if keywords is not None:
|
||||||
|
q['field-keywords'] = keywords
|
||||||
|
|
||||||
|
if isinstance(q, unicode):
|
||||||
|
q = q.encode('utf-8')
|
||||||
|
self.urldata += '/gp/search/ref=sr_adv_b/?' + urlencode(q)
|
||||||
|
|
||||||
|
def __call__(self, browser, verbose, timeout = 5.):
|
||||||
|
if verbose:
|
||||||
|
print 'Query:', self.urldata
|
||||||
|
|
||||||
|
try:
|
||||||
|
raw = browser.open_novisit(self.urldata, timeout=timeout).read()
|
||||||
|
except Exception, e:
|
||||||
|
report(verbose)
|
||||||
|
if callable(getattr(e, 'getcode', None)) and \
|
||||||
|
e.getcode() == 404:
|
||||||
|
return
|
||||||
|
raise
|
||||||
|
if '<title>404 - ' in raw:
|
||||||
|
return
|
||||||
|
raw = xml_to_unicode(raw, strip_encoding_pats=True,
|
||||||
|
resolve_entities=True)[0]
|
||||||
|
|
||||||
|
try:
|
||||||
|
feed = soupparser.fromstring(raw)
|
||||||
|
except:
|
||||||
|
try:
|
||||||
|
#remove ASCII invalid chars
|
||||||
|
return soupparser.fromstring(clean_ascii_chars(raw))
|
||||||
|
except:
|
||||||
|
return None, self.urldata
|
||||||
|
|
||||||
|
#nb of page
|
||||||
|
try:
|
||||||
|
nbresults = self.renbres.findall(feed.xpath("//*[@class='resultCount']")[0].text)
|
||||||
|
except:
|
||||||
|
return None, self.urldata
|
||||||
|
|
||||||
|
pages =[feed]
|
||||||
|
if len(nbresults) > 1:
|
||||||
|
nbpagetoquery = int(ceil(float(min(int(nbresults[2]), self.max_results))/ int(nbresults[1])))
|
||||||
|
for i in xrange(2, nbpagetoquery + 1):
|
||||||
|
try:
|
||||||
|
urldata = self.urldata + '&page=' + str(i)
|
||||||
|
raw = browser.open_novisit(urldata, timeout=timeout).read()
|
||||||
|
except Exception, e:
|
||||||
|
continue
|
||||||
|
if '<title>404 - ' in raw:
|
||||||
|
continue
|
||||||
|
raw = xml_to_unicode(raw, strip_encoding_pats=True,
|
||||||
|
resolve_entities=True)[0]
|
||||||
|
try:
|
||||||
|
feed = soupparser.fromstring(raw)
|
||||||
|
except:
|
||||||
|
try:
|
||||||
|
#remove ASCII invalid chars
|
||||||
|
return soupparser.fromstring(clean_ascii_chars(raw))
|
||||||
|
except:
|
||||||
|
continue
|
||||||
|
pages.append(feed)
|
||||||
|
|
||||||
|
results = []
|
||||||
|
for x in pages:
|
||||||
|
results.extend([i.getparent().get('href') \
|
||||||
|
for i in x.xpath("//a/span[@class='srTitle']")])
|
||||||
|
return results[:self.max_results], self.baseurl
|
||||||
|
|
||||||
|
class ResultList(list):
|
||||||
|
|
||||||
|
def __init__(self, baseurl, lang = 'all'):
|
||||||
|
self.baseurl = baseurl
|
||||||
|
self.lang = lang
|
||||||
|
self.repub = re.compile(u'\((.*)\)')
|
||||||
|
self.rerat = re.compile(u'([0-9.]+)')
|
||||||
|
self.reattr = re.compile(r'<([a-zA-Z0-9]+)\s[^>]+>')
|
||||||
|
self.reoutp = re.compile(r'(?s)<em>--This text ref.*?</em>')
|
||||||
|
self.recom = re.compile(r'(?s)<!--.*?-->')
|
||||||
|
self.republi = re.compile(u'(Editeur|Publisher|Verlag)', re.I)
|
||||||
|
self.reisbn = re.compile(u'(ISBN-10|ISBN-10|ASIN)', re.I)
|
||||||
|
self.relang = re.compile(u'(Language|Langue|Sprache)', re.I)
|
||||||
|
self.reratelt = re.compile(u'(Average\s*Customer\s*Review|Moyenne\s*des\s*commentaires\s*client|Durchschnittliche\s*Kundenbewertung)', re.I)
|
||||||
|
self.reprod = re.compile(u'(Product\s*Details|D.tails\s*sur\s*le\s*produit|Produktinformation)', re.I)
|
||||||
|
|
||||||
|
def strip_tags_etree(self, etreeobj, invalid_tags):
|
||||||
|
for (itag, rmv) in invalid_tags.iteritems():
|
||||||
|
if rmv:
|
||||||
|
for elts in etreeobj.getiterator(itag):
|
||||||
|
elts.drop_tree()
|
||||||
|
else:
|
||||||
|
for elts in etreeobj.getiterator(itag):
|
||||||
|
elts.drop_tag()
|
||||||
|
|
||||||
|
def clean_entry(self, entry, invalid_tags = {'script': True},
|
||||||
|
invalid_id = (), invalid_class=()):
|
||||||
|
#invalid_tags: remove tag and keep content if False else remove
|
||||||
|
#remove tags
|
||||||
|
if invalid_tags:
|
||||||
|
self.strip_tags_etree(entry, invalid_tags)
|
||||||
|
#remove id
|
||||||
|
if invalid_id:
|
||||||
|
for eltid in invalid_id:
|
||||||
|
elt = entry.get_element_by_id(eltid)
|
||||||
|
if elt is not None:
|
||||||
|
elt.drop_tree()
|
||||||
|
#remove class
|
||||||
|
if invalid_class:
|
||||||
|
for eltclass in invalid_class:
|
||||||
|
elts = entry.find_class(eltclass)
|
||||||
|
if elts is not None:
|
||||||
|
for elt in elts:
|
||||||
|
elt.drop_tree()
|
||||||
|
|
||||||
|
def get_title(self, entry):
|
||||||
|
title = entry.get_element_by_id('btAsinTitle')
|
||||||
|
if title is not None:
|
||||||
|
title = title.text
|
||||||
|
return unicode(title.replace('\n', '').strip())
|
||||||
|
|
||||||
|
def get_authors(self, entry):
|
||||||
|
author = entry.get_element_by_id('btAsinTitle')
|
||||||
|
while author.getparent().tag != 'div':
|
||||||
|
author = author.getparent()
|
||||||
|
author = author.getparent()
|
||||||
|
authortext = []
|
||||||
|
for x in author.getiterator('a'):
|
||||||
|
authortext.append(unicode(x.text_content().strip()))
|
||||||
|
return authortext
|
||||||
|
|
||||||
|
def get_description(self, entry, verbose):
|
||||||
|
try:
|
||||||
|
description = entry.get_element_by_id("productDescription").find("div[@class='content']")
|
||||||
|
inv_class = ('seeAll', 'emptyClear')
|
||||||
|
inv_tags ={'img': True, 'a': False}
|
||||||
|
self.clean_entry(description, invalid_tags=inv_tags, invalid_class=inv_class)
|
||||||
|
description = html.tostring(description, method='html', encoding=unicode).strip()
|
||||||
|
# remove all attributes from tags
|
||||||
|
description = self.reattr.sub(r'<\1>', description)
|
||||||
|
# Remove the notice about text referring to out of print editions
|
||||||
|
description = self.reoutp.sub('', description)
|
||||||
|
# Remove comments
|
||||||
|
description = self.recom.sub('', description)
|
||||||
|
return unicode(sanitize_comments_html(description))
|
||||||
|
except:
|
||||||
|
report(verbose)
|
||||||
|
return None
|
||||||
|
|
||||||
|
def get_tags(self, entry, browser, verbose):
|
||||||
|
try:
|
||||||
|
tags = entry.get_element_by_id('tagContentHolder')
|
||||||
|
testptag = tags.find_class('see-all')
|
||||||
|
if testptag:
|
||||||
|
for x in testptag:
|
||||||
|
alink = x.xpath('descendant-or-self::a')
|
||||||
|
if alink:
|
||||||
|
if alink[0].get('class') == 'tgJsActive':
|
||||||
|
continue
|
||||||
|
link = self.baseurl + alink[0].get('href')
|
||||||
|
entry = self.get_individual_metadata(browser, link, verbose)
|
||||||
|
tags = entry.get_element_by_id('tagContentHolder')
|
||||||
|
break
|
||||||
|
tags = [a.text for a in tags.getiterator('a') if a.get('rel') == 'tag']
|
||||||
|
except:
|
||||||
|
report(verbose)
|
||||||
|
tags = []
|
||||||
|
return tags
|
||||||
|
|
||||||
|
def get_book_info(self, entry, mi, verbose):
|
||||||
|
try:
|
||||||
|
entry = entry.get_element_by_id('SalesRank').getparent()
|
||||||
|
except:
|
||||||
|
try:
|
||||||
|
for z in entry.getiterator('h2'):
|
||||||
|
if self.reprod.search(z.text_content()):
|
||||||
|
entry = z.getparent().find("div[@class='content']/ul")
|
||||||
|
break
|
||||||
|
except:
|
||||||
|
report(verbose)
|
||||||
|
return mi
|
||||||
|
elts = entry.findall('li')
|
||||||
|
#pub & date
|
||||||
|
elt = filter(lambda x: self.republi.search(x.find('b').text), elts)
|
||||||
|
if elt:
|
||||||
|
pub = elt[0].find('b').tail
|
||||||
|
mi.publisher = unicode(self.repub.sub('', pub).strip())
|
||||||
|
d = self.repub.search(pub)
|
||||||
|
if d is not None:
|
||||||
|
d = d.group(1)
|
||||||
|
try:
|
||||||
|
default = utcnow().replace(day=15)
|
||||||
|
if self.lang != 'all':
|
||||||
|
d = replace_months(d, self.lang)
|
||||||
|
d = parse_date(d, assume_utc=True, default=default)
|
||||||
|
mi.pubdate = d
|
||||||
|
except:
|
||||||
|
report(verbose)
|
||||||
|
#ISBN
|
||||||
|
elt = filter(lambda x: self.reisbn.search(x.find('b').text), elts)
|
||||||
|
if elt:
|
||||||
|
isbn = elt[0].find('b').tail.replace('-', '').strip()
|
||||||
|
if check_isbn(isbn):
|
||||||
|
mi.isbn = unicode(isbn)
|
||||||
|
elif len(elt) > 1:
|
||||||
|
isbn = elt[1].find('b').tail.replace('-', '').strip()
|
||||||
|
if check_isbn(isbn):
|
||||||
|
mi.isbn = unicode(isbn)
|
||||||
|
#Langue
|
||||||
|
elt = filter(lambda x: self.relang.search(x.find('b').text), elts)
|
||||||
|
if elt:
|
||||||
|
langue = elt[0].find('b').tail.strip()
|
||||||
|
if langue:
|
||||||
|
mi.language = unicode(langue)
|
||||||
|
#ratings
|
||||||
|
elt = filter(lambda x: self.reratelt.search(x.find('b').text), elts)
|
||||||
|
if elt:
|
||||||
|
ratings = elt[0].find_class('swSprite')
|
||||||
|
if ratings:
|
||||||
|
ratings = self.rerat.findall(ratings[0].get('title'))
|
||||||
|
if len(ratings) == 2:
|
||||||
|
mi.rating = float(ratings[0])/float(ratings[1]) * 5
|
||||||
|
return mi
|
||||||
|
|
||||||
|
def fill_MI(self, entry, title, authors, browser, verbose):
|
||||||
|
mi = MetaInformation(title, authors)
|
||||||
|
mi.author_sort = authors_to_sort_string(authors)
|
||||||
|
mi.comments = self.get_description(entry, verbose)
|
||||||
|
mi = self.get_book_info(entry, mi, verbose)
|
||||||
|
mi.tags = self.get_tags(entry, browser, verbose)
|
||||||
|
return mi
|
||||||
|
|
||||||
|
def get_individual_metadata(self, browser, linkdata, verbose):
|
||||||
|
try:
|
||||||
|
raw = browser.open_novisit(linkdata).read()
|
||||||
|
except Exception, e:
|
||||||
|
report(verbose)
|
||||||
|
if callable(getattr(e, 'getcode', None)) and \
|
||||||
|
e.getcode() == 404:
|
||||||
|
return
|
||||||
|
raise
|
||||||
|
if '<title>404 - ' in raw:
|
||||||
|
report(verbose)
|
||||||
|
return
|
||||||
|
raw = xml_to_unicode(raw, strip_encoding_pats=True,
|
||||||
|
resolve_entities=True)[0]
|
||||||
|
try:
|
||||||
|
return soupparser.fromstring(raw)
|
||||||
|
except:
|
||||||
|
try:
|
||||||
|
#remove ASCII invalid chars
|
||||||
|
return soupparser.fromstring(clean_ascii_chars(raw))
|
||||||
|
except:
|
||||||
|
report(verbose)
|
||||||
|
return
|
||||||
|
|
||||||
|
def populate(self, entries, browser, verbose=False):
|
||||||
|
for x in entries:
|
||||||
|
try:
|
||||||
|
entry = self.get_individual_metadata(browser, x, verbose)
|
||||||
|
# clean results
|
||||||
|
# inv_ids = ('divsinglecolumnminwidth', 'sims.purchase', 'AutoBuyXGetY', 'A9AdsMiddleBoxTop')
|
||||||
|
# inv_class = ('buyingDetailsGrid', 'productImageGrid')
|
||||||
|
# inv_tags ={'script': True, 'style': True, 'form': False}
|
||||||
|
# self.clean_entry(entry, invalid_id=inv_ids)
|
||||||
|
title = self.get_title(entry)
|
||||||
|
authors = self.get_authors(entry)
|
||||||
|
except Exception, e:
|
||||||
|
if verbose:
|
||||||
|
print 'Failed to get all details for an entry'
|
||||||
|
print e
|
||||||
|
print 'URL who failed:', x
|
||||||
|
report(verbose)
|
||||||
|
continue
|
||||||
|
self.append(self.fill_MI(entry, title, authors, browser, verbose))
|
||||||
|
|
||||||
|
|
||||||
|
def search(title=None, author=None, publisher=None, isbn=None,
|
||||||
|
max_results=5, verbose=False, keywords=None, lang='all'):
|
||||||
|
br = browser()
|
||||||
|
entries, baseurl = Query(title=title, author=author, isbn=isbn, publisher=publisher,
|
||||||
|
keywords=keywords, max_results=max_results,rlang=lang)(br, verbose)
|
||||||
|
|
||||||
|
if entries is None or len(entries) == 0:
|
||||||
|
return
|
||||||
|
|
||||||
|
#List of entry
|
||||||
|
ans = ResultList(baseurl, lang)
|
||||||
|
ans.populate(entries, br, verbose)
|
||||||
|
return ans
|
||||||
|
|
||||||
|
def option_parser():
|
||||||
|
parser = OptionParser(textwrap.dedent(\
|
||||||
|
_('''\
|
||||||
|
%prog [options]
|
||||||
|
|
||||||
|
Fetch book metadata from Amazon. You must specify one of title, author,
|
||||||
|
ISBN, publisher or keywords. Will fetch a maximum of 10 matches,
|
||||||
|
so you should make your query as specific as possible.
|
||||||
|
You can chose the language for metadata retrieval:
|
||||||
|
All & english & french & german & spanish
|
||||||
|
'''
|
||||||
|
)))
|
||||||
|
parser.add_option('-t', '--title', help='Book title')
|
||||||
|
parser.add_option('-a', '--author', help='Book author(s)')
|
||||||
|
parser.add_option('-p', '--publisher', help='Book publisher')
|
||||||
|
parser.add_option('-i', '--isbn', help='Book ISBN')
|
||||||
|
parser.add_option('-k', '--keywords', help='Keywords')
|
||||||
|
parser.add_option('-m', '--max-results', default=10,
|
||||||
|
help='Maximum number of results to fetch')
|
||||||
|
parser.add_option('-l', '--lang', default='all',
|
||||||
|
help='Chosen language for metadata search (all, en, fr, es, de)')
|
||||||
|
parser.add_option('-v', '--verbose', default=0, action='count',
|
||||||
|
help='Be more verbose about errors')
|
||||||
|
return parser
|
||||||
|
|
||||||
|
def main(args=sys.argv):
|
||||||
|
parser = option_parser()
|
||||||
|
opts, args = parser.parse_args(args)
|
||||||
|
try:
|
||||||
|
results = search(opts.title, opts.author, isbn=opts.isbn, publisher=opts.publisher,
|
||||||
|
keywords=opts.keywords, verbose=opts.verbose, max_results=opts.max_results,
|
||||||
|
lang=opts.lang)
|
||||||
|
except AssertionError:
|
||||||
|
report(True)
|
||||||
|
parser.print_help()
|
||||||
|
return 1
|
||||||
|
if results is None or len(results) == 0:
|
||||||
|
print 'No result found for this search!'
|
||||||
|
return 0
|
||||||
|
for result in results:
|
||||||
|
print unicode(result).encode(preferred_encoding, 'replace')
|
||||||
|
print
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
sys.exit(main())
|
@ -531,6 +531,8 @@ class Metadata(object):
|
|||||||
res = format_date(res, cmeta['display'].get('date_format','dd MMM yyyy'))
|
res = format_date(res, cmeta['display'].get('date_format','dd MMM yyyy'))
|
||||||
elif datatype == 'bool':
|
elif datatype == 'bool':
|
||||||
res = _('Yes') if res else _('No')
|
res = _('Yes') if res else _('No')
|
||||||
|
elif datatype == 'rating':
|
||||||
|
res = res/2
|
||||||
return (name, unicode(res), orig_res, cmeta)
|
return (name, unicode(res), orig_res, cmeta)
|
||||||
|
|
||||||
# Translate aliases into the standard field name
|
# Translate aliases into the standard field name
|
||||||
|
390
src/calibre/ebooks/metadata/fictionwise.py
Normal file
390
src/calibre/ebooks/metadata/fictionwise.py
Normal file
@ -0,0 +1,390 @@
|
|||||||
|
from __future__ import with_statement
|
||||||
|
__license__ = 'GPL 3'
|
||||||
|
__copyright__ = '2010, sengian <sengian1@gmail.com>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import sys, textwrap, re, traceback, socket
|
||||||
|
from urllib import urlencode
|
||||||
|
|
||||||
|
from lxml.html import soupparser, tostring
|
||||||
|
|
||||||
|
from calibre import browser, preferred_encoding
|
||||||
|
from calibre.ebooks.chardet import xml_to_unicode
|
||||||
|
from calibre.ebooks.metadata import MetaInformation, check_isbn, \
|
||||||
|
authors_to_sort_string
|
||||||
|
from calibre.library.comments import sanitize_comments_html
|
||||||
|
from calibre.ebooks.metadata.fetch import MetadataSource
|
||||||
|
from calibre.utils.config import OptionParser
|
||||||
|
from calibre.utils.date import parse_date, utcnow
|
||||||
|
from calibre.utils.cleantext import clean_ascii_chars
|
||||||
|
|
||||||
|
class Fictionwise(MetadataSource): # {{{
|
||||||
|
|
||||||
|
author = 'Sengian'
|
||||||
|
name = 'Fictionwise'
|
||||||
|
description = _('Downloads metadata from Fictionwise')
|
||||||
|
|
||||||
|
has_html_comments = True
|
||||||
|
|
||||||
|
def fetch(self):
|
||||||
|
try:
|
||||||
|
self.results = search(self.title, self.book_author, self.publisher,
|
||||||
|
self.isbn, max_results=10, verbose=self.verbose)
|
||||||
|
except Exception, e:
|
||||||
|
self.exception = e
|
||||||
|
self.tb = traceback.format_exc()
|
||||||
|
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
class FictionwiseError(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def report(verbose):
|
||||||
|
if verbose:
|
||||||
|
traceback.print_exc()
|
||||||
|
|
||||||
|
class Query(object):
|
||||||
|
|
||||||
|
BASE_URL = 'http://www.fictionwise.com/servlet/mw'
|
||||||
|
|
||||||
|
def __init__(self, title=None, author=None, publisher=None, keywords=None, max_results=20):
|
||||||
|
assert not(title is None and author is None and publisher is None and keywords is None)
|
||||||
|
assert (max_results < 21)
|
||||||
|
|
||||||
|
self.max_results = int(max_results)
|
||||||
|
q = { 'template' : 'searchresults_adv.htm' ,
|
||||||
|
'searchtitle' : '',
|
||||||
|
'searchauthor' : '',
|
||||||
|
'searchpublisher' : '',
|
||||||
|
'searchkeyword' : '',
|
||||||
|
#possibilities startoflast, fullname, lastfirst
|
||||||
|
'searchauthortype' : 'startoflast',
|
||||||
|
'searchcategory' : '',
|
||||||
|
'searchcategory2' : '',
|
||||||
|
'searchprice_s' : '0',
|
||||||
|
'searchprice_e' : 'ANY',
|
||||||
|
'searchformat' : '',
|
||||||
|
'searchgeo' : 'US',
|
||||||
|
'searchfwdatetype' : '',
|
||||||
|
#maybe use dates fields if needed?
|
||||||
|
#'sortorder' : 'DESC',
|
||||||
|
#many options available: b.SortTitle, a.SortName,
|
||||||
|
#b.DateFirstPublished, b.FWPublishDate
|
||||||
|
'sortby' : 'b.SortTitle'
|
||||||
|
}
|
||||||
|
if title is not None:
|
||||||
|
q['searchtitle'] = title
|
||||||
|
if author is not None:
|
||||||
|
q['searchauthor'] = author
|
||||||
|
if publisher is not None:
|
||||||
|
q['searchpublisher'] = publisher
|
||||||
|
if keywords is not None:
|
||||||
|
q['searchkeyword'] = keywords
|
||||||
|
|
||||||
|
if isinstance(q, unicode):
|
||||||
|
q = q.encode('utf-8')
|
||||||
|
self.urldata = urlencode(q)
|
||||||
|
|
||||||
|
def __call__(self, browser, verbose, timeout = 5.):
|
||||||
|
if verbose:
|
||||||
|
print _('Query: %s') % self.BASE_URL+self.urldata
|
||||||
|
|
||||||
|
try:
|
||||||
|
raw = browser.open_novisit(self.BASE_URL, self.urldata, timeout=timeout).read()
|
||||||
|
except Exception, e:
|
||||||
|
report(verbose)
|
||||||
|
if callable(getattr(e, 'getcode', None)) and \
|
||||||
|
e.getcode() == 404:
|
||||||
|
return
|
||||||
|
if isinstance(getattr(e, 'args', [None])[0], socket.timeout):
|
||||||
|
raise FictionwiseError(_('Fictionwise timed out. Try again later.'))
|
||||||
|
raise FictionwiseError(_('Fictionwise encountered an error.'))
|
||||||
|
if '<title>404 - ' in raw:
|
||||||
|
return
|
||||||
|
raw = xml_to_unicode(raw, strip_encoding_pats=True,
|
||||||
|
resolve_entities=True)[0]
|
||||||
|
try:
|
||||||
|
feed = soupparser.fromstring(raw)
|
||||||
|
except:
|
||||||
|
try:
|
||||||
|
#remove ASCII invalid chars
|
||||||
|
feed = soupparser.fromstring(clean_ascii_chars(raw))
|
||||||
|
except:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# get list of results as links
|
||||||
|
results = feed.xpath("//table[3]/tr/td[2]/table/tr/td/p/table[2]/tr[@valign]")
|
||||||
|
results = results[:self.max_results]
|
||||||
|
results = [i.xpath('descendant-or-self::a')[0].get('href') for i in results]
|
||||||
|
#return feed if no links ie normally a single book or nothing
|
||||||
|
if not results:
|
||||||
|
results = [feed]
|
||||||
|
return results
|
||||||
|
|
||||||
|
class ResultList(list):
|
||||||
|
|
||||||
|
BASE_URL = 'http://www.fictionwise.com'
|
||||||
|
COLOR_VALUES = {'BLUE': 4, 'GREEN': 3, 'YELLOW': 2, 'RED': 1, 'NA': 0}
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.retitle = re.compile(r'\[[^\[\]]+\]')
|
||||||
|
self.rechkauth = re.compile(r'.*book\s*by', re.I)
|
||||||
|
self.redesc = re.compile(r'book\s*description\s*:\s*(<br[^>]+>)*(?P<desc>.*)<br[^>]*>.{,15}publisher\s*:', re.I)
|
||||||
|
self.repub = re.compile(r'.*publisher\s*:\s*', re.I)
|
||||||
|
self.redate = re.compile(r'.*release\s*date\s*:\s*', re.I)
|
||||||
|
self.retag = re.compile(r'.*book\s*category\s*:\s*', re.I)
|
||||||
|
self.resplitbr = re.compile(r'<br[^>]*>', re.I)
|
||||||
|
self.recomment = re.compile(r'(?s)<!--.*?-->')
|
||||||
|
self.reimg = re.compile(r'<img[^>]*>', re.I)
|
||||||
|
self.resanitize = re.compile(r'\[HTML_REMOVED\]\s*', re.I)
|
||||||
|
self.renbcom = re.compile('(?P<nbcom>\d+)\s*Reader Ratings:')
|
||||||
|
self.recolor = re.compile('(?P<ncolor>[^/]+).gif')
|
||||||
|
self.resplitbrdiv = re.compile(r'(<br[^>]+>|</?div[^>]*>)', re.I)
|
||||||
|
self.reisbn = re.compile(r'.*ISBN\s*:\s*', re.I)
|
||||||
|
|
||||||
|
def strip_tags_etree(self, etreeobj, invalid_tags):
|
||||||
|
for (itag, rmv) in invalid_tags.iteritems():
|
||||||
|
if rmv:
|
||||||
|
for elts in etreeobj.getiterator(itag):
|
||||||
|
elts.drop_tree()
|
||||||
|
else:
|
||||||
|
for elts in etreeobj.getiterator(itag):
|
||||||
|
elts.drop_tag()
|
||||||
|
|
||||||
|
def clean_entry(self, entry, invalid_tags = {'script': True},
|
||||||
|
invalid_id = (), invalid_class=(), invalid_xpath = ()):
|
||||||
|
#invalid_tags: remove tag and keep content if False else remove
|
||||||
|
#remove tags
|
||||||
|
if invalid_tags:
|
||||||
|
self.strip_tags_etree(entry, invalid_tags)
|
||||||
|
#remove xpath
|
||||||
|
if invalid_xpath:
|
||||||
|
for eltid in invalid_xpath:
|
||||||
|
elt = entry.xpath(eltid)
|
||||||
|
for el in elt:
|
||||||
|
el.drop_tree()
|
||||||
|
#remove id
|
||||||
|
if invalid_id:
|
||||||
|
for eltid in invalid_id:
|
||||||
|
elt = entry.get_element_by_id(eltid)
|
||||||
|
if elt is not None:
|
||||||
|
elt.drop_tree()
|
||||||
|
#remove class
|
||||||
|
if invalid_class:
|
||||||
|
for eltclass in invalid_class:
|
||||||
|
elts = entry.find_class(eltclass)
|
||||||
|
if elts is not None:
|
||||||
|
for elt in elts:
|
||||||
|
elt.drop_tree()
|
||||||
|
|
||||||
|
def output_entry(self, entry, prettyout = True, htmlrm="\d+"):
|
||||||
|
out = tostring(entry, pretty_print=prettyout)
|
||||||
|
#try to work around tostring to remove this encoding for exemle
|
||||||
|
reclean = re.compile('(\n+|\t+|\r+|&#'+htmlrm+';)')
|
||||||
|
return reclean.sub('', out)
|
||||||
|
|
||||||
|
def get_title(self, entry):
|
||||||
|
title = entry.findtext('./')
|
||||||
|
return self.retitle.sub('', title).strip()
|
||||||
|
|
||||||
|
def get_authors(self, entry):
|
||||||
|
authortext = entry.find('./br').tail
|
||||||
|
if not self.rechkauth.search(authortext):
|
||||||
|
return []
|
||||||
|
authortext = self.rechkauth.sub('', authortext)
|
||||||
|
return [a.strip() for a in authortext.split('&')]
|
||||||
|
|
||||||
|
def get_rating(self, entrytable, verbose):
|
||||||
|
nbcomment = tostring(entrytable.getprevious())
|
||||||
|
try:
|
||||||
|
nbcomment = self.renbcom.search(nbcomment).group("nbcom")
|
||||||
|
except:
|
||||||
|
report(verbose)
|
||||||
|
return None
|
||||||
|
hval = dict((self.COLOR_VALUES[self.recolor.search(image.get('src', default='NA.gif')).group("ncolor")],
|
||||||
|
float(image.get('height', default=0))) \
|
||||||
|
for image in entrytable.getiterator('img'))
|
||||||
|
#ratings as x/5
|
||||||
|
return float(1.25*sum(k*v for (k, v) in hval.iteritems())/sum(hval.itervalues()))
|
||||||
|
|
||||||
|
def get_description(self, entry):
|
||||||
|
description = self.output_entry(entry.xpath('./p')[1],htmlrm="")
|
||||||
|
description = self.redesc.search(description)
|
||||||
|
if not description or not description.group("desc"):
|
||||||
|
return None
|
||||||
|
#remove invalid tags
|
||||||
|
description = self.reimg.sub('', description.group("desc"))
|
||||||
|
description = self.recomment.sub('', description)
|
||||||
|
description = self.resanitize.sub('', sanitize_comments_html(description))
|
||||||
|
return _('SUMMARY:\n %s') % re.sub(r'\n\s+</p>','\n</p>', description)
|
||||||
|
|
||||||
|
def get_publisher(self, entry):
|
||||||
|
publisher = self.output_entry(entry.xpath('./p')[1])
|
||||||
|
publisher = filter(lambda x: self.repub.search(x) is not None,
|
||||||
|
self.resplitbr.split(publisher))
|
||||||
|
if not len(publisher):
|
||||||
|
return None
|
||||||
|
publisher = self.repub.sub('', publisher[0])
|
||||||
|
return publisher.split(',')[0].strip()
|
||||||
|
|
||||||
|
def get_tags(self, entry):
|
||||||
|
tag = self.output_entry(entry.xpath('./p')[1])
|
||||||
|
tag = filter(lambda x: self.retag.search(x) is not None,
|
||||||
|
self.resplitbr.split(tag))
|
||||||
|
if not len(tag):
|
||||||
|
return []
|
||||||
|
return map(lambda x: x.strip(), self.retag.sub('', tag[0]).split('/'))
|
||||||
|
|
||||||
|
def get_date(self, entry, verbose):
|
||||||
|
date = self.output_entry(entry.xpath('./p')[1])
|
||||||
|
date = filter(lambda x: self.redate.search(x) is not None,
|
||||||
|
self.resplitbr.split(date))
|
||||||
|
if not len(date):
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
d = self.redate.sub('', date[0])
|
||||||
|
if d:
|
||||||
|
default = utcnow().replace(day=15)
|
||||||
|
d = parse_date(d, assume_utc=True, default=default)
|
||||||
|
else:
|
||||||
|
d = None
|
||||||
|
except:
|
||||||
|
report(verbose)
|
||||||
|
d = None
|
||||||
|
return d
|
||||||
|
|
||||||
|
def get_ISBN(self, entry):
|
||||||
|
isbns = self.output_entry(entry.xpath('./p')[2])
|
||||||
|
isbns = filter(lambda x: self.reisbn.search(x) is not None,
|
||||||
|
self.resplitbrdiv.split(isbns))
|
||||||
|
if not len(isbns):
|
||||||
|
return None
|
||||||
|
isbns = [self.reisbn.sub('', x) for x in isbns if check_isbn(self.reisbn.sub('', x))]
|
||||||
|
return sorted(isbns, cmp=lambda x,y:cmp(len(x), len(y)))[-1]
|
||||||
|
|
||||||
|
def fill_MI(self, entry, title, authors, ratings, verbose):
|
||||||
|
mi = MetaInformation(title, authors)
|
||||||
|
mi.rating = ratings
|
||||||
|
mi.comments = self.get_description(entry)
|
||||||
|
mi.publisher = self.get_publisher(entry)
|
||||||
|
mi.tags = self.get_tags(entry)
|
||||||
|
mi.pubdate = self.get_date(entry, verbose)
|
||||||
|
mi.isbn = self.get_ISBN(entry)
|
||||||
|
mi.author_sort = authors_to_sort_string(authors)
|
||||||
|
return mi
|
||||||
|
|
||||||
|
def get_individual_metadata(self, browser, linkdata, verbose):
|
||||||
|
try:
|
||||||
|
raw = browser.open_novisit(self.BASE_URL + linkdata).read()
|
||||||
|
except Exception, e:
|
||||||
|
report(verbose)
|
||||||
|
if callable(getattr(e, 'getcode', None)) and \
|
||||||
|
e.getcode() == 404:
|
||||||
|
return
|
||||||
|
if isinstance(getattr(e, 'args', [None])[0], socket.timeout):
|
||||||
|
raise FictionwiseError(_('Fictionwise timed out. Try again later.'))
|
||||||
|
raise FictionwiseError(_('Fictionwise encountered an error.'))
|
||||||
|
if '<title>404 - ' in raw:
|
||||||
|
report(verbose)
|
||||||
|
return
|
||||||
|
raw = xml_to_unicode(raw, strip_encoding_pats=True,
|
||||||
|
resolve_entities=True)[0]
|
||||||
|
try:
|
||||||
|
return soupparser.fromstring(raw)
|
||||||
|
except:
|
||||||
|
try:
|
||||||
|
#remove ASCII invalid chars
|
||||||
|
return soupparser.fromstring(clean_ascii_chars(raw))
|
||||||
|
except:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def populate(self, entries, browser, verbose=False):
|
||||||
|
inv_tags ={'script': True, 'a': False, 'font': False, 'strong': False, 'b': False,
|
||||||
|
'ul': False, 'span': False}
|
||||||
|
inv_xpath =('./table',)
|
||||||
|
#single entry
|
||||||
|
if len(entries) == 1 and not isinstance(entries[0], str):
|
||||||
|
try:
|
||||||
|
entry = entries.xpath("//table[3]/tr/td[2]/table[1]/tr/td/font/table/tr/td")
|
||||||
|
self.clean_entry(entry, invalid_tags=inv_tags, invalid_xpath=inv_xpath)
|
||||||
|
title = self.get_title(entry)
|
||||||
|
#maybe strenghten the search
|
||||||
|
ratings = self.get_rating(entry.xpath("./p/table")[1], verbose)
|
||||||
|
authors = self.get_authors(entry)
|
||||||
|
except Exception, e:
|
||||||
|
if verbose:
|
||||||
|
print _('Failed to get all details for an entry')
|
||||||
|
print e
|
||||||
|
return
|
||||||
|
self.append(self.fill_MI(entry, title, authors, ratings, verbose))
|
||||||
|
else:
|
||||||
|
#multiple entries
|
||||||
|
for x in entries:
|
||||||
|
try:
|
||||||
|
entry = self.get_individual_metadata(browser, x, verbose)
|
||||||
|
entry = entry.xpath("//table[3]/tr/td[2]/table[1]/tr/td/font/table/tr/td")[0]
|
||||||
|
self.clean_entry(entry, invalid_tags=inv_tags, invalid_xpath=inv_xpath)
|
||||||
|
title = self.get_title(entry)
|
||||||
|
#maybe strenghten the search
|
||||||
|
ratings = self.get_rating(entry.xpath("./p/table")[1], verbose)
|
||||||
|
authors = self.get_authors(entry)
|
||||||
|
except Exception, e:
|
||||||
|
if verbose:
|
||||||
|
print _('Failed to get all details for an entry')
|
||||||
|
print e
|
||||||
|
continue
|
||||||
|
self.append(self.fill_MI(entry, title, authors, ratings, verbose))
|
||||||
|
|
||||||
|
|
||||||
|
def search(title=None, author=None, publisher=None, isbn=None,
|
||||||
|
min_viewability='none', verbose=False, max_results=5,
|
||||||
|
keywords=None):
|
||||||
|
br = browser()
|
||||||
|
entries = Query(title=title, author=author, publisher=publisher,
|
||||||
|
keywords=keywords, max_results=max_results)(br, verbose, timeout = 15.)
|
||||||
|
|
||||||
|
#List of entry
|
||||||
|
ans = ResultList()
|
||||||
|
ans.populate(entries, br, verbose)
|
||||||
|
return ans
|
||||||
|
|
||||||
|
|
||||||
|
def option_parser():
|
||||||
|
parser = OptionParser(textwrap.dedent(\
|
||||||
|
_('''\
|
||||||
|
%prog [options]
|
||||||
|
|
||||||
|
Fetch book metadata from Fictionwise. You must specify one of title, author,
|
||||||
|
or keywords. No ISBN specification possible. Will fetch a maximum of 20 matches,
|
||||||
|
so you should make your query as specific as possible.
|
||||||
|
''')
|
||||||
|
))
|
||||||
|
parser.add_option('-t', '--title', help=_('Book title'))
|
||||||
|
parser.add_option('-a', '--author', help=_('Book author(s)'))
|
||||||
|
parser.add_option('-p', '--publisher', help=_('Book publisher'))
|
||||||
|
parser.add_option('-k', '--keywords', help=_('Keywords'))
|
||||||
|
parser.add_option('-m', '--max-results', default=20,
|
||||||
|
help=_('Maximum number of results to fetch'))
|
||||||
|
parser.add_option('-v', '--verbose', default=0, action='count',
|
||||||
|
help=_('Be more verbose about errors'))
|
||||||
|
return parser
|
||||||
|
|
||||||
|
def main(args=sys.argv):
|
||||||
|
parser = option_parser()
|
||||||
|
opts, args = parser.parse_args(args)
|
||||||
|
try:
|
||||||
|
results = search(opts.title, opts.author, publisher=opts.publisher,
|
||||||
|
keywords=opts.keywords, verbose=opts.verbose, max_results=opts.max_results)
|
||||||
|
except AssertionError:
|
||||||
|
report(True)
|
||||||
|
parser.print_help()
|
||||||
|
return 1
|
||||||
|
if results is None or len(results) == 0:
|
||||||
|
print _('No result found for this search!')
|
||||||
|
return 0
|
||||||
|
for result in results:
|
||||||
|
print unicode(result).encode(preferred_encoding, 'replace')
|
||||||
|
print
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
sys.exit(main())
|
@ -10,7 +10,8 @@ from copy import deepcopy
|
|||||||
|
|
||||||
from lxml.html import soupparser
|
from lxml.html import soupparser
|
||||||
|
|
||||||
from calibre.utils.date import parse_date, utcnow
|
from calibre.utils.date import parse_date, utcnow, replace_months
|
||||||
|
from calibre.utils.cleantext import clean_ascii_chars
|
||||||
from calibre import browser, preferred_encoding
|
from calibre import browser, preferred_encoding
|
||||||
from calibre.ebooks.chardet import xml_to_unicode
|
from calibre.ebooks.chardet import xml_to_unicode
|
||||||
from calibre.ebooks.metadata import MetaInformation, check_isbn, \
|
from calibre.ebooks.metadata import MetaInformation, check_isbn, \
|
||||||
@ -71,31 +72,16 @@ class NiceBooksCovers(CoverDownload):
|
|||||||
traceback.format_exc(), self.name))
|
traceback.format_exc(), self.name))
|
||||||
|
|
||||||
|
|
||||||
|
class NiceBooksError(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
class ISBNNotFound(NiceBooksError):
|
||||||
|
pass
|
||||||
|
|
||||||
def report(verbose):
|
def report(verbose):
|
||||||
if verbose:
|
if verbose:
|
||||||
import traceback
|
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
|
|
||||||
def replace_monthsfr(datefr):
|
|
||||||
# Replace french months by english equivalent for parse_date
|
|
||||||
frtoen = {
|
|
||||||
u'[jJ]anvier': u'jan',
|
|
||||||
u'[fF].vrier': u'feb',
|
|
||||||
u'[mM]ars': u'mar',
|
|
||||||
u'[aA]vril': u'apr',
|
|
||||||
u'[mM]ai': u'may',
|
|
||||||
u'[jJ]uin': u'jun',
|
|
||||||
u'[jJ]uillet': u'jul',
|
|
||||||
u'[aA]o.t': u'aug',
|
|
||||||
u'[sS]eptembre': u'sep',
|
|
||||||
u'[Oo]ctobre': u'oct',
|
|
||||||
u'[nN]ovembre': u'nov',
|
|
||||||
u'[dD].cembre': u'dec' }
|
|
||||||
for k in frtoen.iterkeys():
|
|
||||||
tmp = re.sub(k, frtoen[k], datefr)
|
|
||||||
if tmp <> datefr: break
|
|
||||||
return tmp
|
|
||||||
|
|
||||||
class Query(object):
|
class Query(object):
|
||||||
|
|
||||||
BASE_URL = 'http://fr.nicebooks.com/'
|
BASE_URL = 'http://fr.nicebooks.com/'
|
||||||
@ -119,7 +105,7 @@ class Query(object):
|
|||||||
|
|
||||||
def __call__(self, browser, verbose, timeout = 5.):
|
def __call__(self, browser, verbose, timeout = 5.):
|
||||||
if verbose:
|
if verbose:
|
||||||
print 'Query:', self.BASE_URL+self.urldata
|
print _('Query: %s') % self.BASE_URL+self.urldata
|
||||||
|
|
||||||
try:
|
try:
|
||||||
raw = browser.open_novisit(self.BASE_URL+self.urldata, timeout=timeout).read()
|
raw = browser.open_novisit(self.BASE_URL+self.urldata, timeout=timeout).read()
|
||||||
@ -128,7 +114,9 @@ class Query(object):
|
|||||||
if callable(getattr(e, 'getcode', None)) and \
|
if callable(getattr(e, 'getcode', None)) and \
|
||||||
e.getcode() == 404:
|
e.getcode() == 404:
|
||||||
return
|
return
|
||||||
raise
|
if isinstance(getattr(e, 'args', [None])[0], socket.timeout):
|
||||||
|
raise NiceBooksError(_('Nicebooks timed out. Try again later.'))
|
||||||
|
raise NiceBooksError(_('Nicebooks encountered an error.'))
|
||||||
if '<title>404 - ' in raw:
|
if '<title>404 - ' in raw:
|
||||||
return
|
return
|
||||||
raw = xml_to_unicode(raw, strip_encoding_pats=True,
|
raw = xml_to_unicode(raw, strip_encoding_pats=True,
|
||||||
@ -136,7 +124,11 @@ class Query(object):
|
|||||||
try:
|
try:
|
||||||
feed = soupparser.fromstring(raw)
|
feed = soupparser.fromstring(raw)
|
||||||
except:
|
except:
|
||||||
return
|
try:
|
||||||
|
#remove ASCII invalid chars
|
||||||
|
feed = soupparser.fromstring(clean_ascii_chars(raw))
|
||||||
|
except:
|
||||||
|
return None
|
||||||
|
|
||||||
#nb of page to call
|
#nb of page to call
|
||||||
try:
|
try:
|
||||||
@ -160,6 +152,10 @@ class Query(object):
|
|||||||
resolve_entities=True)[0]
|
resolve_entities=True)[0]
|
||||||
try:
|
try:
|
||||||
feed = soupparser.fromstring(raw)
|
feed = soupparser.fromstring(raw)
|
||||||
|
except:
|
||||||
|
try:
|
||||||
|
#remove ASCII invalid chars
|
||||||
|
feed = soupparser.fromstring(clean_ascii_chars(raw))
|
||||||
except:
|
except:
|
||||||
continue
|
continue
|
||||||
pages.append(feed)
|
pages.append(feed)
|
||||||
@ -180,14 +176,12 @@ class ResultList(list):
|
|||||||
self.reautclean = re.compile(u'\s*\(.*\)\s*')
|
self.reautclean = re.compile(u'\s*\(.*\)\s*')
|
||||||
|
|
||||||
def get_title(self, entry):
|
def get_title(self, entry):
|
||||||
# title = deepcopy(entry.find("div[@id='book-info']"))
|
|
||||||
title = deepcopy(entry)
|
title = deepcopy(entry)
|
||||||
title.remove(title.find("dl[@title='Informations sur le livre']"))
|
title.remove(title.find("dl[@title='Informations sur le livre']"))
|
||||||
title = ' '.join([i.text_content() for i in title.iterchildren()])
|
title = ' '.join([i.text_content() for i in title.iterchildren()])
|
||||||
return unicode(title.replace('\n', ''))
|
return unicode(title.replace('\n', ''))
|
||||||
|
|
||||||
def get_authors(self, entry):
|
def get_authors(self, entry):
|
||||||
# author = entry.find("div[@id='book-info']/dl[@title='Informations sur le livre']")
|
|
||||||
author = entry.find("dl[@title='Informations sur le livre']")
|
author = entry.find("dl[@title='Informations sur le livre']")
|
||||||
authortext = []
|
authortext = []
|
||||||
for x in author.getiterator('dt'):
|
for x in author.getiterator('dt'):
|
||||||
@ -223,7 +217,7 @@ class ResultList(list):
|
|||||||
d = x.getnext().text_content()
|
d = x.getnext().text_content()
|
||||||
try:
|
try:
|
||||||
default = utcnow().replace(day=15)
|
default = utcnow().replace(day=15)
|
||||||
d = replace_monthsfr(d)
|
d = replace_months(d, 'fr')
|
||||||
d = parse_date(d, assume_utc=True, default=default)
|
d = parse_date(d, assume_utc=True, default=default)
|
||||||
mi.pubdate = d
|
mi.pubdate = d
|
||||||
except:
|
except:
|
||||||
@ -234,11 +228,6 @@ class ResultList(list):
|
|||||||
mi = MetaInformation(title, authors)
|
mi = MetaInformation(title, authors)
|
||||||
mi.author_sort = authors_to_sort_string(authors)
|
mi.author_sort = authors_to_sort_string(authors)
|
||||||
mi.comments = self.get_description(entry, verbose)
|
mi.comments = self.get_description(entry, verbose)
|
||||||
# entry = entry.find("dl[@title='Informations sur le livre']")
|
|
||||||
# mi.publisher = self.get_publisher(entry)
|
|
||||||
# mi.pubdate = self.get_date(entry, verbose)
|
|
||||||
# mi.isbn = self.get_ISBN(entry)
|
|
||||||
# mi.language = self.get_language(entry)
|
|
||||||
return self.get_book_info(entry, mi, verbose)
|
return self.get_book_info(entry, mi, verbose)
|
||||||
|
|
||||||
def get_individual_metadata(self, browser, linkdata, verbose):
|
def get_individual_metadata(self, browser, linkdata, verbose):
|
||||||
@ -249,7 +238,9 @@ class ResultList(list):
|
|||||||
if callable(getattr(e, 'getcode', None)) and \
|
if callable(getattr(e, 'getcode', None)) and \
|
||||||
e.getcode() == 404:
|
e.getcode() == 404:
|
||||||
return
|
return
|
||||||
raise
|
if isinstance(getattr(e, 'args', [None])[0], socket.timeout):
|
||||||
|
raise NiceBooksError(_('Nicebooks timed out. Try again later.'))
|
||||||
|
raise NiceBooksError(_('Nicebooks encountered an error.'))
|
||||||
if '<title>404 - ' in raw:
|
if '<title>404 - ' in raw:
|
||||||
report(verbose)
|
report(verbose)
|
||||||
return
|
return
|
||||||
@ -258,7 +249,11 @@ class ResultList(list):
|
|||||||
try:
|
try:
|
||||||
feed = soupparser.fromstring(raw)
|
feed = soupparser.fromstring(raw)
|
||||||
except:
|
except:
|
||||||
return
|
try:
|
||||||
|
#remove ASCII invalid chars
|
||||||
|
feed = soupparser.fromstring(clean_ascii_chars(raw))
|
||||||
|
except:
|
||||||
|
return None
|
||||||
|
|
||||||
# get results
|
# get results
|
||||||
return feed.xpath("//div[@id='container']")[0]
|
return feed.xpath("//div[@id='container']")[0]
|
||||||
@ -292,13 +287,6 @@ class ResultList(list):
|
|||||||
continue
|
continue
|
||||||
self.append(self.fill_MI(entry, title, authors, verbose))
|
self.append(self.fill_MI(entry, title, authors, verbose))
|
||||||
|
|
||||||
|
|
||||||
class NiceBooksError(Exception):
|
|
||||||
pass
|
|
||||||
|
|
||||||
class ISBNNotFound(NiceBooksError):
|
|
||||||
pass
|
|
||||||
|
|
||||||
class Covers(object):
|
class Covers(object):
|
||||||
|
|
||||||
def __init__(self, isbn = None):
|
def __init__(self, isbn = None):
|
||||||
@ -329,11 +317,10 @@ class Covers(object):
|
|||||||
return cover, ext if ext else 'jpg'
|
return cover, ext if ext else 'jpg'
|
||||||
except Exception, err:
|
except Exception, err:
|
||||||
if isinstance(getattr(err, 'args', [None])[0], socket.timeout):
|
if isinstance(getattr(err, 'args', [None])[0], socket.timeout):
|
||||||
err = NiceBooksError(_('Nicebooks timed out. Try again later.'))
|
raise NiceBooksError(_('Nicebooks timed out. Try again later.'))
|
||||||
raise err
|
|
||||||
if not len(self.urlimg):
|
if not len(self.urlimg):
|
||||||
if not self.isbnf:
|
if not self.isbnf:
|
||||||
raise ISBNNotFound('ISBN: '+self.isbn+_(' not found.'))
|
raise ISBNNotFound(_('ISBN: %s not found.') % self.isbn)
|
||||||
raise NiceBooksError(_('An errror occured with Nicebooks cover fetcher'))
|
raise NiceBooksError(_('An errror occured with Nicebooks cover fetcher'))
|
||||||
|
|
||||||
|
|
||||||
@ -341,10 +328,10 @@ def search(title=None, author=None, publisher=None, isbn=None,
|
|||||||
max_results=5, verbose=False, keywords=None):
|
max_results=5, verbose=False, keywords=None):
|
||||||
br = browser()
|
br = browser()
|
||||||
entries = Query(title=title, author=author, isbn=isbn, publisher=publisher,
|
entries = Query(title=title, author=author, isbn=isbn, publisher=publisher,
|
||||||
keywords=keywords, max_results=max_results)(br, verbose)
|
keywords=keywords, max_results=max_results)(br, verbose,timeout = 10.)
|
||||||
|
|
||||||
if entries is None or len(entries) == 0:
|
if entries is None or len(entries) == 0:
|
||||||
return
|
return None
|
||||||
|
|
||||||
#List of entry
|
#List of entry
|
||||||
ans = ResultList()
|
ans = ResultList()
|
||||||
@ -364,28 +351,28 @@ def cover_from_isbn(isbn, timeout = 5.):
|
|||||||
|
|
||||||
def option_parser():
|
def option_parser():
|
||||||
parser = OptionParser(textwrap.dedent(\
|
parser = OptionParser(textwrap.dedent(\
|
||||||
'''\
|
_('''\
|
||||||
%prog [options]
|
%prog [options]
|
||||||
|
|
||||||
Fetch book metadata from Nicebooks. You must specify one of title, author,
|
Fetch book metadata from Nicebooks. You must specify one of title, author,
|
||||||
ISBN, publisher or keywords. Will fetch a maximum of 20 matches,
|
ISBN, publisher or keywords. Will fetch a maximum of 20 matches,
|
||||||
so you should make your query as specific as possible.
|
so you should make your query as specific as possible.
|
||||||
It can also get covers if the option is activated.
|
It can also get covers if the option is activated.
|
||||||
'''
|
''')
|
||||||
))
|
))
|
||||||
parser.add_option('-t', '--title', help='Book title')
|
parser.add_option('-t', '--title', help=_('Book title'))
|
||||||
parser.add_option('-a', '--author', help='Book author(s)')
|
parser.add_option('-a', '--author', help=_('Book author(s)'))
|
||||||
parser.add_option('-p', '--publisher', help='Book publisher')
|
parser.add_option('-p', '--publisher', help=_('Book publisher'))
|
||||||
parser.add_option('-i', '--isbn', help='Book ISBN')
|
parser.add_option('-i', '--isbn', help=_('Book ISBN'))
|
||||||
parser.add_option('-k', '--keywords', help='Keywords')
|
parser.add_option('-k', '--keywords', help=_('Keywords'))
|
||||||
parser.add_option('-c', '--covers', default=0,
|
parser.add_option('-c', '--covers', default=0,
|
||||||
help='Covers: 1-Check/ 2-Download')
|
help=_('Covers: 1-Check/ 2-Download'))
|
||||||
parser.add_option('-p', '--coverspath', default='',
|
parser.add_option('-p', '--coverspath', default='',
|
||||||
help='Covers files path')
|
help=_('Covers files path'))
|
||||||
parser.add_option('-m', '--max-results', default=20,
|
parser.add_option('-m', '--max-results', default=20,
|
||||||
help='Maximum number of results to fetch')
|
help=_('Maximum number of results to fetch'))
|
||||||
parser.add_option('-v', '--verbose', default=0, action='count',
|
parser.add_option('-v', '--verbose', default=0, action='count',
|
||||||
help='Be more verbose about errors')
|
help=_('Be more verbose about errors'))
|
||||||
return parser
|
return parser
|
||||||
|
|
||||||
def main(args=sys.argv):
|
def main(args=sys.argv):
|
||||||
@ -400,15 +387,15 @@ def main(args=sys.argv):
|
|||||||
parser.print_help()
|
parser.print_help()
|
||||||
return 1
|
return 1
|
||||||
if results is None or len(results) == 0:
|
if results is None or len(results) == 0:
|
||||||
print 'No result found for this search!'
|
print _('No result found for this search!')
|
||||||
return 0
|
return 0
|
||||||
for result in results:
|
for result in results:
|
||||||
print unicode(result).encode(preferred_encoding, 'replace')
|
print unicode(result).encode(preferred_encoding, 'replace')
|
||||||
covact = int(opts.covers)
|
covact = int(opts.covers)
|
||||||
if covact == 1:
|
if covact == 1:
|
||||||
textcover = 'No cover found!'
|
textcover = _('No cover found!')
|
||||||
if check_for_cover(result.isbn):
|
if check_for_cover(result.isbn):
|
||||||
textcover = 'A cover was found for this book'
|
textcover = _('A cover was found for this book')
|
||||||
print textcover
|
print textcover
|
||||||
elif covact == 2:
|
elif covact == 2:
|
||||||
cover_data, ext = cover_from_isbn(result.isbn)
|
cover_data, ext = cover_from_isbn(result.isbn)
|
||||||
@ -417,7 +404,7 @@ def main(args=sys.argv):
|
|||||||
cpath = os.path.normpath(opts.coverspath + '/' + result.isbn)
|
cpath = os.path.normpath(opts.coverspath + '/' + result.isbn)
|
||||||
oname = os.path.abspath(cpath+'.'+ext)
|
oname = os.path.abspath(cpath+'.'+ext)
|
||||||
open(oname, 'wb').write(cover_data)
|
open(oname, 'wb').write(cover_data)
|
||||||
print 'Cover saved to file ', oname
|
print _('Cover saved to file '), oname
|
||||||
print
|
print
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
@ -8,12 +8,12 @@ __docformat__ = 'restructuredtext en'
|
|||||||
|
|
||||||
from threading import Thread
|
from threading import Thread
|
||||||
from Queue import Empty
|
from Queue import Empty
|
||||||
import os, time, sys, shutil
|
import os, time, sys, shutil, json
|
||||||
|
|
||||||
from calibre.utils.ipc.job import ParallelJob
|
from calibre.utils.ipc.job import ParallelJob
|
||||||
from calibre.utils.ipc.server import Server
|
from calibre.utils.ipc.server import Server
|
||||||
from calibre.ptempfile import PersistentTemporaryDirectory, TemporaryDirectory
|
from calibre.ptempfile import PersistentTemporaryDirectory, TemporaryDirectory
|
||||||
from calibre import prints
|
from calibre import prints, isbytestring
|
||||||
from calibre.constants import filesystem_encoding
|
from calibre.constants import filesystem_encoding
|
||||||
|
|
||||||
|
|
||||||
@ -194,14 +194,42 @@ class SaveWorker(Thread):
|
|||||||
self.daemon = True
|
self.daemon = True
|
||||||
self.path, self.opts = path, opts
|
self.path, self.opts = path, opts
|
||||||
self.ids = ids
|
self.ids = ids
|
||||||
self.library_path = db.library_path
|
self.db = db
|
||||||
self.canceled = False
|
self.canceled = False
|
||||||
self.result_queue = result_queue
|
self.result_queue = result_queue
|
||||||
self.error = None
|
self.error = None
|
||||||
self.spare_server = spare_server
|
self.spare_server = spare_server
|
||||||
self.start()
|
self.start()
|
||||||
|
|
||||||
|
def collect_data(self, ids):
|
||||||
|
from calibre.ebooks.metadata.opf2 import metadata_to_opf
|
||||||
|
data = {}
|
||||||
|
for i in set(ids):
|
||||||
|
mi = self.db.get_metadata(i, index_is_id=True, get_cover=True)
|
||||||
|
opf = metadata_to_opf(mi)
|
||||||
|
if isbytestring(opf):
|
||||||
|
opf = opf.decode('utf-8')
|
||||||
|
cpath = None
|
||||||
|
if mi.cover:
|
||||||
|
cpath = mi.cover
|
||||||
|
if isbytestring(cpath):
|
||||||
|
cpath = cpath.decode(filesystem_encoding)
|
||||||
|
formats = {}
|
||||||
|
if mi.formats:
|
||||||
|
for fmt in mi.formats:
|
||||||
|
fpath = self.db.format_abspath(i, fmt, index_is_id=True)
|
||||||
|
if fpath is not None:
|
||||||
|
if isbytestring(fpath):
|
||||||
|
fpath = fpath.decode(filesystem_encoding)
|
||||||
|
formats[fmt.lower()] = fpath
|
||||||
|
data[i] = [opf, cpath, formats]
|
||||||
|
return data
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
|
with TemporaryDirectory('save_to_disk_data') as tdir:
|
||||||
|
self._run(tdir)
|
||||||
|
|
||||||
|
def _run(self, tdir):
|
||||||
from calibre.library.save_to_disk import config
|
from calibre.library.save_to_disk import config
|
||||||
server = Server() if self.spare_server is None else self.spare_server
|
server = Server() if self.spare_server is None else self.spare_server
|
||||||
ids = set(self.ids)
|
ids = set(self.ids)
|
||||||
@ -212,12 +240,19 @@ class SaveWorker(Thread):
|
|||||||
for pref in c.preferences:
|
for pref in c.preferences:
|
||||||
recs[pref.name] = getattr(self.opts, pref.name)
|
recs[pref.name] = getattr(self.opts, pref.name)
|
||||||
|
|
||||||
|
plugboards = self.db.prefs.get('plugboards', {})
|
||||||
|
|
||||||
for i, task in enumerate(tasks):
|
for i, task in enumerate(tasks):
|
||||||
tids = [x[-1] for x in task]
|
tids = [x[-1] for x in task]
|
||||||
|
data = self.collect_data(tids)
|
||||||
|
dpath = os.path.join(tdir, '%d.json'%i)
|
||||||
|
with open(dpath, 'wb') as f:
|
||||||
|
f.write(json.dumps(data, ensure_ascii=False).encode('utf-8'))
|
||||||
|
|
||||||
job = ParallelJob('save_book',
|
job = ParallelJob('save_book',
|
||||||
'Save books (%d of %d)'%(i, len(tasks)),
|
'Save books (%d of %d)'%(i, len(tasks)),
|
||||||
lambda x,y:x,
|
lambda x,y:x,
|
||||||
args=[tids, self.library_path, self.path, recs])
|
args=[tids, dpath, plugboards, self.path, recs])
|
||||||
jobs.add(job)
|
jobs.add(job)
|
||||||
server.add_job(job)
|
server.add_job(job)
|
||||||
|
|
||||||
@ -226,21 +261,21 @@ class SaveWorker(Thread):
|
|||||||
time.sleep(0.2)
|
time.sleep(0.2)
|
||||||
running = False
|
running = False
|
||||||
for job in jobs:
|
for job in jobs:
|
||||||
job.update(consume_notifications=False)
|
self.get_notifications(job, ids)
|
||||||
while True:
|
|
||||||
try:
|
|
||||||
id, title, ok, tb = job.notifications.get_nowait()[0]
|
|
||||||
if id in ids:
|
|
||||||
self.result_queue.put((id, title, ok, tb))
|
|
||||||
ids.remove(id)
|
|
||||||
except Empty:
|
|
||||||
break
|
|
||||||
if not job.is_finished:
|
if not job.is_finished:
|
||||||
running = True
|
running = True
|
||||||
|
|
||||||
if not running:
|
if not running:
|
||||||
break
|
break
|
||||||
|
|
||||||
|
for job in jobs:
|
||||||
|
if not job.result:
|
||||||
|
continue
|
||||||
|
for id_, title, ok, tb in job.result:
|
||||||
|
if id_ in ids:
|
||||||
|
self.result_queue.put((id_, title, ok, tb))
|
||||||
|
ids.remove(id_)
|
||||||
|
|
||||||
server.close()
|
server.close()
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
|
|
||||||
@ -257,21 +292,39 @@ class SaveWorker(Thread):
|
|||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
def get_notifications(self, job, ids):
|
||||||
|
job.update(consume_notifications=False)
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
id, title, ok, tb = job.notifications.get_nowait()[0]
|
||||||
|
if id in ids:
|
||||||
|
self.result_queue.put((id, title, ok, tb))
|
||||||
|
ids.remove(id)
|
||||||
|
except Empty:
|
||||||
|
break
|
||||||
|
|
||||||
def save_book(task, library_path, path, recs, notification=lambda x,y:x):
|
|
||||||
from calibre.library.database2 import LibraryDatabase2
|
def save_book(ids, dpath, plugboards, path, recs, notification=lambda x,y:x):
|
||||||
db = LibraryDatabase2(library_path)
|
from calibre.library.save_to_disk import config, save_serialized_to_disk
|
||||||
from calibre.library.save_to_disk import config, save_to_disk
|
|
||||||
from calibre.customize.ui import apply_null_metadata
|
from calibre.customize.ui import apply_null_metadata
|
||||||
opts = config().parse()
|
opts = config().parse()
|
||||||
for name in recs:
|
for name in recs:
|
||||||
setattr(opts, name, recs[name])
|
setattr(opts, name, recs[name])
|
||||||
|
|
||||||
|
results = []
|
||||||
|
|
||||||
def callback(id, title, failed, tb):
|
def callback(id, title, failed, tb):
|
||||||
|
results.append((id, title, not failed, tb))
|
||||||
notification((id, title, not failed, tb))
|
notification((id, title, not failed, tb))
|
||||||
return True
|
return True
|
||||||
|
|
||||||
with apply_null_metadata:
|
data_ = json.loads(open(dpath, 'rb').read().decode('utf-8'))
|
||||||
save_to_disk(db, task, path, opts, callback)
|
data = {}
|
||||||
|
for k, v in data_.iteritems():
|
||||||
|
data[int(k)] = v
|
||||||
|
|
||||||
|
with apply_null_metadata:
|
||||||
|
save_serialized_to_disk(ids, data, plugboards, path, opts, callback)
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
|
@ -10,9 +10,10 @@ import copy
|
|||||||
import re
|
import re
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
from calibre.ebooks.oeb.base import namespace, barename
|
from calibre.ebooks.oeb.base import namespace, barename
|
||||||
from calibre.ebooks.oeb.base import XHTML, XHTML_NS, OEB_DOCS
|
from calibre.ebooks.oeb.base import XHTML, XHTML_NS, OEB_DOCS, urlnormalize
|
||||||
from calibre.ebooks.oeb.stylizer import Stylizer
|
from calibre.ebooks.oeb.stylizer import Stylizer
|
||||||
from calibre.ebooks.oeb.transforms.flatcss import KeyMapper
|
from calibre.ebooks.oeb.transforms.flatcss import KeyMapper
|
||||||
|
from calibre.utils.magick.draw import identify_data
|
||||||
|
|
||||||
MBP_NS = 'http://mobipocket.com/ns/mbp'
|
MBP_NS = 'http://mobipocket.com/ns/mbp'
|
||||||
def MBP(name): return '{%s}%s' % (MBP_NS, name)
|
def MBP(name): return '{%s}%s' % (MBP_NS, name)
|
||||||
@ -121,6 +122,7 @@ class MobiMLizer(object):
|
|||||||
body = item.data.find(XHTML('body'))
|
body = item.data.find(XHTML('body'))
|
||||||
nroot = etree.Element(XHTML('html'), nsmap=MOBI_NSMAP)
|
nroot = etree.Element(XHTML('html'), nsmap=MOBI_NSMAP)
|
||||||
nbody = etree.SubElement(nroot, XHTML('body'))
|
nbody = etree.SubElement(nroot, XHTML('body'))
|
||||||
|
self.current_spine_item = item
|
||||||
self.mobimlize_elem(body, stylizer, BlockState(nbody),
|
self.mobimlize_elem(body, stylizer, BlockState(nbody),
|
||||||
[FormatState()])
|
[FormatState()])
|
||||||
item.data = nroot
|
item.data = nroot
|
||||||
@ -357,8 +359,9 @@ class MobiMLizer(object):
|
|||||||
if tag == 'img' and 'src' in elem.attrib:
|
if tag == 'img' and 'src' in elem.attrib:
|
||||||
istate.attrib['src'] = elem.attrib['src']
|
istate.attrib['src'] = elem.attrib['src']
|
||||||
istate.attrib['align'] = 'baseline'
|
istate.attrib['align'] = 'baseline'
|
||||||
|
cssdict = style.cssdict()
|
||||||
for prop in ('width', 'height'):
|
for prop in ('width', 'height'):
|
||||||
if style[prop] != 'auto':
|
if cssdict[prop] != 'auto':
|
||||||
value = style[prop]
|
value = style[prop]
|
||||||
if value == getattr(self.profile, prop):
|
if value == getattr(self.profile, prop):
|
||||||
result = '100%'
|
result = '100%'
|
||||||
@ -371,8 +374,40 @@ class MobiMLizer(object):
|
|||||||
(72./self.profile.dpi)))
|
(72./self.profile.dpi)))
|
||||||
except:
|
except:
|
||||||
continue
|
continue
|
||||||
result = "%d"%pixs
|
result = str(pixs)
|
||||||
istate.attrib[prop] = result
|
istate.attrib[prop] = result
|
||||||
|
if 'width' not in istate.attrib or 'height' not in istate.attrib:
|
||||||
|
href = self.current_spine_item.abshref(elem.attrib['src'])
|
||||||
|
try:
|
||||||
|
item = self.oeb.manifest.hrefs[urlnormalize(href)]
|
||||||
|
except:
|
||||||
|
self.oeb.logger.warn('Failed to find image:',
|
||||||
|
href)
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
width, height = identify_data(item.data)[:2]
|
||||||
|
except:
|
||||||
|
self.oeb.logger.warn('Invalid image:', href)
|
||||||
|
else:
|
||||||
|
if 'width' not in istate.attrib and 'height' not in \
|
||||||
|
istate.attrib:
|
||||||
|
istate.attrib['width'] = str(width)
|
||||||
|
istate.attrib['height'] = str(height)
|
||||||
|
else:
|
||||||
|
ar = float(width)/float(height)
|
||||||
|
if 'width' not in istate.attrib:
|
||||||
|
try:
|
||||||
|
width = int(istate.attrib['height'])*ar
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
istate.attrib['width'] = str(int(width))
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
height = int(istate.attrib['width'])/ar
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
istate.attrib['height'] = str(int(height))
|
||||||
|
item.unload_data_from_memory()
|
||||||
elif tag == 'hr' and asfloat(style['width']) > 0:
|
elif tag == 'hr' and asfloat(style['width']) > 0:
|
||||||
prop = style['width'] / self.profile.width
|
prop = style['width'] / self.profile.width
|
||||||
istate.attrib['width'] = "%d%%" % int(round(prop * 100))
|
istate.attrib['width'] = "%d%%" % int(round(prop * 100))
|
||||||
|
@ -504,6 +504,9 @@ class MobiReader(object):
|
|||||||
'x-large': '5',
|
'x-large': '5',
|
||||||
'xx-large': '6',
|
'xx-large': '6',
|
||||||
}
|
}
|
||||||
|
def barename(x):
|
||||||
|
return x.rpartition(':')[-1]
|
||||||
|
|
||||||
mobi_version = self.book_header.mobi_version
|
mobi_version = self.book_header.mobi_version
|
||||||
for x in root.xpath('//ncx'):
|
for x in root.xpath('//ncx'):
|
||||||
x.getparent().remove(x)
|
x.getparent().remove(x)
|
||||||
@ -512,7 +515,8 @@ class MobiReader(object):
|
|||||||
for x in tag.attrib:
|
for x in tag.attrib:
|
||||||
if ':' in x:
|
if ':' in x:
|
||||||
del tag.attrib[x]
|
del tag.attrib[x]
|
||||||
if tag.tag in ('country-region', 'place', 'placetype', 'placename',
|
if tag.tag and barename(tag.tag.lower()) in \
|
||||||
|
('country-region', 'place', 'placetype', 'placename',
|
||||||
'state', 'city', 'street', 'address', 'content', 'form'):
|
'state', 'city', 'street', 'address', 'content', 'form'):
|
||||||
tag.tag = 'div' if tag.tag in ('content', 'form') else 'span'
|
tag.tag = 'div' if tag.tag in ('content', 'form') else 'span'
|
||||||
for key in tag.attrib.keys():
|
for key in tag.attrib.keys():
|
||||||
|
@ -607,7 +607,7 @@ class Metadata(object):
|
|||||||
key = barename(key)
|
key = barename(key)
|
||||||
attrib[key] = prefixname(value, nsrmap)
|
attrib[key] = prefixname(value, nsrmap)
|
||||||
if namespace(self.term) == DC11_NS:
|
if namespace(self.term) == DC11_NS:
|
||||||
name = DC(barename(self.term).title())
|
name = DC(icu_title(barename(self.term)))
|
||||||
elem = element(dcmeta, name, attrib=attrib)
|
elem = element(dcmeta, name, attrib=attrib)
|
||||||
elem.text = self.value
|
elem.text = self.value
|
||||||
else:
|
else:
|
||||||
@ -775,6 +775,7 @@ class Manifest(object):
|
|||||||
return u'Item(id=%r, href=%r, media_type=%r)' \
|
return u'Item(id=%r, href=%r, media_type=%r)' \
|
||||||
% (self.id, self.href, self.media_type)
|
% (self.id, self.href, self.media_type)
|
||||||
|
|
||||||
|
# Parsing {{{
|
||||||
def _parse_xml(self, data):
|
def _parse_xml(self, data):
|
||||||
data = xml_to_unicode(data, strip_encoding_pats=True,
|
data = xml_to_unicode(data, strip_encoding_pats=True,
|
||||||
assume_utf8=True, resolve_entities=True)[0]
|
assume_utf8=True, resolve_entities=True)[0]
|
||||||
@ -1035,6 +1036,8 @@ class Manifest(object):
|
|||||||
data = item.data.cssText
|
data = item.data.cssText
|
||||||
return ('utf-8', data)
|
return ('utf-8', data)
|
||||||
|
|
||||||
|
# }}}
|
||||||
|
|
||||||
@dynamic_property
|
@dynamic_property
|
||||||
def data(self):
|
def data(self):
|
||||||
doc = """Provides MIME type sensitive access to the manifest
|
doc = """Provides MIME type sensitive access to the manifest
|
||||||
|
@ -96,7 +96,10 @@ class EbookIterator(object):
|
|||||||
|
|
||||||
def search(self, text, index, backwards=False):
|
def search(self, text, index, backwards=False):
|
||||||
text = text.lower()
|
text = text.lower()
|
||||||
for i, path in enumerate(self.spine):
|
pmap = [(i, path) for i, path in enumerate(self.spine)]
|
||||||
|
if backwards:
|
||||||
|
pmap.reverse()
|
||||||
|
for i, path in pmap:
|
||||||
if (backwards and i < index) or (not backwards and i > index):
|
if (backwards and i < index) or (not backwards and i > index):
|
||||||
if text in open(path, 'rb').read().decode(path.encoding).lower():
|
if text in open(path, 'rb').read().decode(path.encoding).lower():
|
||||||
return i
|
return i
|
||||||
|
@ -544,7 +544,7 @@ class OEBReader(object):
|
|||||||
data = render_html_svg_workaround(path, self.logger)
|
data = render_html_svg_workaround(path, self.logger)
|
||||||
if not data:
|
if not data:
|
||||||
data = ''
|
data = ''
|
||||||
id, href = self.oeb.manifest.generate('cover', 'cover.jpeg')
|
id, href = self.oeb.manifest.generate('cover', 'cover.jpg')
|
||||||
item = self.oeb.manifest.add(id, href, JPEG_MIME, data=data)
|
item = self.oeb.manifest.add(id, href, JPEG_MIME, data=data)
|
||||||
return item
|
return item
|
||||||
|
|
||||||
|
@ -240,18 +240,29 @@ class Stylizer(object):
|
|||||||
else:
|
else:
|
||||||
for elem in matches:
|
for elem in matches:
|
||||||
self.style(elem)._update_cssdict(cssdict)
|
self.style(elem)._update_cssdict(cssdict)
|
||||||
for elem in xpath(tree, '//h:img[@width or @height]'):
|
|
||||||
base = elem.get('style', '').strip()
|
|
||||||
if base:
|
|
||||||
base += ';'
|
|
||||||
for prop in ('width', 'height'):
|
|
||||||
val = elem.get(prop, False)
|
|
||||||
if val:
|
|
||||||
base += '%s: %s;'%(prop, val)
|
|
||||||
del elem.attrib[prop]
|
|
||||||
elem.set('style', base)
|
|
||||||
for elem in xpath(tree, '//h:*[@style]'):
|
for elem in xpath(tree, '//h:*[@style]'):
|
||||||
self.style(elem)._apply_style_attr()
|
self.style(elem)._apply_style_attr()
|
||||||
|
num_pat = re.compile(r'\d+$')
|
||||||
|
for elem in xpath(tree, '//h:img[@width or @height]'):
|
||||||
|
style = self.style(elem)
|
||||||
|
# Check if either height or width is not default
|
||||||
|
is_styled = style._style.get('width', 'auto') != 'auto' or \
|
||||||
|
style._style.get('height', 'auto') != 'auto'
|
||||||
|
if not is_styled:
|
||||||
|
# Update img style dimension using width and height
|
||||||
|
upd = {}
|
||||||
|
for prop in ('width', 'height'):
|
||||||
|
val = elem.get(prop, '').strip()
|
||||||
|
try:
|
||||||
|
del elem.attrib[prop]
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
if val:
|
||||||
|
if num_pat.match(val) is not None:
|
||||||
|
val += 'px'
|
||||||
|
upd[prop] = val
|
||||||
|
if upd:
|
||||||
|
style._update_cssdict(upd)
|
||||||
|
|
||||||
def _fetch_css_file(self, path):
|
def _fetch_css_file(self, path):
|
||||||
hrefs = self.oeb.manifest.hrefs
|
hrefs = self.oeb.manifest.hrefs
|
||||||
@ -564,7 +575,7 @@ class Style(object):
|
|||||||
if parent is not None:
|
if parent is not None:
|
||||||
base = parent.width
|
base = parent.width
|
||||||
else:
|
else:
|
||||||
base = self._profile.width
|
base = self._profile.width_pts
|
||||||
if 'width' in self._element.attrib:
|
if 'width' in self._element.attrib:
|
||||||
width = self._element.attrib['width']
|
width = self._element.attrib['width']
|
||||||
elif 'width' in self._style:
|
elif 'width' in self._style:
|
||||||
@ -576,6 +587,13 @@ class Style(object):
|
|||||||
if isinstance(result, (unicode, str, bytes)):
|
if isinstance(result, (unicode, str, bytes)):
|
||||||
result = self._profile.width
|
result = self._profile.width
|
||||||
self._width = result
|
self._width = result
|
||||||
|
if 'max-width' in self._style:
|
||||||
|
result = self._unit_convert(self._style['max-width'], base=base)
|
||||||
|
if isinstance(result, (unicode, str, bytes)):
|
||||||
|
result = self._width
|
||||||
|
if result < self._width:
|
||||||
|
self._width = result
|
||||||
|
|
||||||
return self._width
|
return self._width
|
||||||
|
|
||||||
@property
|
@property
|
||||||
@ -587,7 +605,7 @@ class Style(object):
|
|||||||
if parent is not None:
|
if parent is not None:
|
||||||
base = parent.height
|
base = parent.height
|
||||||
else:
|
else:
|
||||||
base = self._profile.height
|
base = self._profile.height_pts
|
||||||
if 'height' in self._element.attrib:
|
if 'height' in self._element.attrib:
|
||||||
height = self._element.attrib['height']
|
height = self._element.attrib['height']
|
||||||
elif 'height' in self._style:
|
elif 'height' in self._style:
|
||||||
@ -599,6 +617,13 @@ class Style(object):
|
|||||||
if isinstance(result, (unicode, str, bytes)):
|
if isinstance(result, (unicode, str, bytes)):
|
||||||
result = self._profile.height
|
result = self._profile.height
|
||||||
self._height = result
|
self._height = result
|
||||||
|
if 'max-height' in self._style:
|
||||||
|
result = self._unit_convert(self._style['max-height'], base=base)
|
||||||
|
if isinstance(result, (unicode, str, bytes)):
|
||||||
|
result = self._height
|
||||||
|
if result < self._height:
|
||||||
|
self._height = result
|
||||||
|
|
||||||
return self._height
|
return self._height
|
||||||
|
|
||||||
@property
|
@property
|
||||||
|
130
src/calibre/ebooks/oeb/transforms/filenames.py
Normal file
130
src/calibre/ebooks/oeb/transforms/filenames.py
Normal file
@ -0,0 +1,130 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import posixpath
|
||||||
|
from urlparse import urldefrag
|
||||||
|
|
||||||
|
from lxml import etree
|
||||||
|
import cssutils
|
||||||
|
|
||||||
|
from calibre.ebooks.oeb.base import rewrite_links, urlnormalize
|
||||||
|
|
||||||
|
class RenameFiles(object):
|
||||||
|
|
||||||
|
'''
|
||||||
|
Rename files and adjust all links pointing to them. Note that the spine
|
||||||
|
and manifest are not touched by this transform.
|
||||||
|
'''
|
||||||
|
|
||||||
|
def __init__(self, rename_map):
|
||||||
|
self.rename_map = rename_map
|
||||||
|
|
||||||
|
def __call__(self, oeb, opts):
|
||||||
|
self.log = oeb.logger
|
||||||
|
self.opts = opts
|
||||||
|
self.oeb = oeb
|
||||||
|
|
||||||
|
for item in oeb.manifest.items:
|
||||||
|
self.current_item = item
|
||||||
|
if etree.iselement(item.data):
|
||||||
|
rewrite_links(self.current_item.data, self.url_replacer)
|
||||||
|
elif hasattr(item.data, 'cssText'):
|
||||||
|
cssutils.replaceUrls(item.data, self.url_replacer)
|
||||||
|
|
||||||
|
if self.oeb.guide:
|
||||||
|
for ref in self.oeb.guide.values():
|
||||||
|
href = urlnormalize(ref.href)
|
||||||
|
href, frag = urldefrag(href)
|
||||||
|
replacement = self.rename_map.get(href, None)
|
||||||
|
if replacement is not None:
|
||||||
|
nhref = replacement
|
||||||
|
if frag:
|
||||||
|
nhref += '#' + frag
|
||||||
|
ref.href = nhref
|
||||||
|
|
||||||
|
if self.oeb.toc:
|
||||||
|
self.fix_toc_entry(self.oeb.toc)
|
||||||
|
|
||||||
|
|
||||||
|
def fix_toc_entry(self, toc):
|
||||||
|
if toc.href:
|
||||||
|
href = urlnormalize(toc.href)
|
||||||
|
href, frag = urldefrag(href)
|
||||||
|
replacement = self.rename_map.get(href, None)
|
||||||
|
|
||||||
|
if replacement is not None:
|
||||||
|
nhref = replacement
|
||||||
|
if frag:
|
||||||
|
nhref = '#'.join((nhref, frag))
|
||||||
|
toc.href = nhref
|
||||||
|
|
||||||
|
for x in toc:
|
||||||
|
self.fix_toc_entry(x)
|
||||||
|
|
||||||
|
def url_replacer(self, orig_url):
|
||||||
|
url = urlnormalize(orig_url)
|
||||||
|
path, frag = urldefrag(url)
|
||||||
|
href = self.current_item.abshref(path)
|
||||||
|
replacement = self.rename_map.get(href, None)
|
||||||
|
if replacement is None:
|
||||||
|
return orig_url
|
||||||
|
replacement = self.current_item.relhref(replacement)
|
||||||
|
if frag:
|
||||||
|
replacement += '#' + frag
|
||||||
|
return replacement
|
||||||
|
|
||||||
|
class UniqueFilenames(object):
|
||||||
|
|
||||||
|
'Ensure that every item in the manifest has a unique filename'
|
||||||
|
|
||||||
|
def __call__(self, oeb, opts):
|
||||||
|
self.log = oeb.logger
|
||||||
|
self.opts = opts
|
||||||
|
self.oeb = oeb
|
||||||
|
|
||||||
|
self.seen_filenames = set([])
|
||||||
|
self.rename_map = {}
|
||||||
|
|
||||||
|
for item in list(oeb.manifest.items):
|
||||||
|
fname = posixpath.basename(item.href)
|
||||||
|
if fname in self.seen_filenames:
|
||||||
|
suffix = self.unique_suffix(fname)
|
||||||
|
data = item.data
|
||||||
|
base, ext = posixpath.splitext(item.href)
|
||||||
|
nhref = base + suffix + ext
|
||||||
|
nhref = oeb.manifest.generate(href=nhref)[1]
|
||||||
|
nitem = oeb.manifest.add(item.id, nhref, item.media_type, data=data,
|
||||||
|
fallback=item.fallback)
|
||||||
|
self.seen_filenames.add(posixpath.basename(nhref))
|
||||||
|
self.rename_map[item.href] = nhref
|
||||||
|
if item.spine_position is not None:
|
||||||
|
oeb.spine.insert(item.spine_position, nitem, item.linear)
|
||||||
|
oeb.spine.remove(item)
|
||||||
|
oeb.manifest.remove(item)
|
||||||
|
else:
|
||||||
|
self.seen_filenames.add(fname)
|
||||||
|
|
||||||
|
if self.rename_map:
|
||||||
|
self.log('Found non-unique filenames, renaming to support broken'
|
||||||
|
' EPUB readers like FBReader, Aldiko and Stanza...')
|
||||||
|
from pprint import pformat
|
||||||
|
self.log.debug(pformat(self.rename_map))
|
||||||
|
|
||||||
|
renamer = RenameFiles(self.rename_map)
|
||||||
|
renamer(oeb, opts)
|
||||||
|
|
||||||
|
|
||||||
|
def unique_suffix(self, fname):
|
||||||
|
base, ext = posixpath.splitext(fname)
|
||||||
|
c = 0
|
||||||
|
while True:
|
||||||
|
c += 1
|
||||||
|
suffix = '_u%d'%c
|
||||||
|
candidate = base + suffix + ext
|
||||||
|
if candidate not in self.seen_filenames:
|
||||||
|
return suffix
|
||||||
|
|
@ -93,7 +93,7 @@ class Jacket(object):
|
|||||||
|
|
||||||
# Render Jacket {{{
|
# Render Jacket {{{
|
||||||
|
|
||||||
def get_rating(rating, rchar):
|
def get_rating(rating, rchar, e_rchar):
|
||||||
ans = ''
|
ans = ''
|
||||||
try:
|
try:
|
||||||
num = float(rating)/2
|
num = float(rating)/2
|
||||||
@ -104,12 +104,12 @@ def get_rating(rating, rchar):
|
|||||||
if num < 1:
|
if num < 1:
|
||||||
return ans
|
return ans
|
||||||
|
|
||||||
ans = rchar * int(num)
|
ans = ("%s%s") % (rchar * int(num), e_rchar * (5 - int(num)))
|
||||||
return ans
|
return ans
|
||||||
|
|
||||||
|
|
||||||
def render_jacket(mi, output_profile,
|
def render_jacket(mi, output_profile,
|
||||||
alt_title=_('Unknown'), alt_tags=[], alt_comments=''):
|
alt_title=_('Unknown'), alt_tags=[], alt_comments='',
|
||||||
|
alt_publisher=('Unknown publisher')):
|
||||||
css = P('jacket/stylesheet.css', data=True).decode('utf-8')
|
css = P('jacket/stylesheet.css', data=True).decode('utf-8')
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@ -124,12 +124,17 @@ def render_jacket(mi, output_profile,
|
|||||||
if not mi.series:
|
if not mi.series:
|
||||||
series = ''
|
series = ''
|
||||||
|
|
||||||
|
try:
|
||||||
|
publisher = mi.publisher if mi.publisher else alt_publisher
|
||||||
|
except:
|
||||||
|
publisher = _('Unknown publisher')
|
||||||
|
|
||||||
try:
|
try:
|
||||||
pubdate = strftime(u'%Y', mi.pubdate.timetuple())
|
pubdate = strftime(u'%Y', mi.pubdate.timetuple())
|
||||||
except:
|
except:
|
||||||
pubdate = ''
|
pubdate = ''
|
||||||
|
|
||||||
rating = get_rating(mi.rating, output_profile.ratings_char)
|
rating = get_rating(mi.rating, output_profile.ratings_char, output_profile.empty_ratings_char)
|
||||||
|
|
||||||
tags = mi.tags if mi.tags else alt_tags
|
tags = mi.tags if mi.tags else alt_tags
|
||||||
if tags:
|
if tags:
|
||||||
@ -154,6 +159,7 @@ def render_jacket(mi, output_profile,
|
|||||||
css=css,
|
css=css,
|
||||||
title=title,
|
title=title,
|
||||||
author=author,
|
author=author,
|
||||||
|
publisher=publisher,
|
||||||
pubdate_label=_('Published'), pubdate=pubdate,
|
pubdate_label=_('Published'), pubdate=pubdate,
|
||||||
series_label=_('Series'), series=series,
|
series_label=_('Series'), series=series,
|
||||||
rating_label=_('Rating'), rating=rating,
|
rating_label=_('Rating'), rating=rating,
|
||||||
@ -168,16 +174,16 @@ def render_jacket(mi, output_profile,
|
|||||||
# Post-process the generated html to strip out empty header items
|
# Post-process the generated html to strip out empty header items
|
||||||
soup = BeautifulSoup(generated_html)
|
soup = BeautifulSoup(generated_html)
|
||||||
if not series:
|
if not series:
|
||||||
series_tag = soup.find('tr', attrs={'class':'cbj_series'})
|
series_tag = soup.find(attrs={'class':'cbj_series'})
|
||||||
series_tag.extract()
|
series_tag.extract()
|
||||||
if not rating:
|
if not rating:
|
||||||
rating_tag = soup.find('tr', attrs={'class':'cbj_rating'})
|
rating_tag = soup.find(attrs={'class':'cbj_rating'})
|
||||||
rating_tag.extract()
|
rating_tag.extract()
|
||||||
if not tags:
|
if not tags:
|
||||||
tags_tag = soup.find('tr', attrs={'class':'cbj_tags'})
|
tags_tag = soup.find(attrs={'class':'cbj_tags'})
|
||||||
tags_tag.extract()
|
tags_tag.extract()
|
||||||
if not pubdate:
|
if not pubdate:
|
||||||
pubdate_tag = soup.find('tr', attrs={'class':'cbj_pubdate'})
|
pubdate_tag = soup.find(attrs={'class':'cbj_pubdate'})
|
||||||
pubdate_tag.extract()
|
pubdate_tag.extract()
|
||||||
if output_profile.short_name != 'kindle':
|
if output_profile.short_name != 'kindle':
|
||||||
hr_tag = soup.find('hr', attrs={'class':'cbj_kindle_banner_hr'})
|
hr_tag = soup.find('hr', attrs={'class':'cbj_kindle_banner_hr'})
|
||||||
|
@ -50,11 +50,11 @@ class CaseMangler(object):
|
|||||||
|
|
||||||
def text_transform(self, transform, text):
|
def text_transform(self, transform, text):
|
||||||
if transform == 'capitalize':
|
if transform == 'capitalize':
|
||||||
return text.title()
|
return icu_title(text)
|
||||||
elif transform == 'uppercase':
|
elif transform == 'uppercase':
|
||||||
return text.upper()
|
return icu_upper(text)
|
||||||
elif transform == 'lowercase':
|
elif transform == 'lowercase':
|
||||||
return text.lower()
|
return icu_lower(text)
|
||||||
return text
|
return text
|
||||||
|
|
||||||
def split_text(self, text):
|
def split_text(self, text):
|
||||||
|
@ -35,6 +35,12 @@ class PMLOutput(OutputFormatPlugin):
|
|||||||
OptionRecommendation(name='inline_toc',
|
OptionRecommendation(name='inline_toc',
|
||||||
recommended_value=False, level=OptionRecommendation.LOW,
|
recommended_value=False, level=OptionRecommendation.LOW,
|
||||||
help=_('Add Table of Contents to beginning of the book.')),
|
help=_('Add Table of Contents to beginning of the book.')),
|
||||||
|
OptionRecommendation(name='full_image_depth',
|
||||||
|
recommended_value=False, level=OptionRecommendation.LOW,
|
||||||
|
help=_('Do not reduce the size or bit depth of images. Images ' \
|
||||||
|
'have their size and depth reduced by default to accommodate ' \
|
||||||
|
'applications that can not convert images on their ' \
|
||||||
|
'own such as Dropbook.')),
|
||||||
])
|
])
|
||||||
|
|
||||||
def convert(self, oeb_book, output_path, input_plugin, opts, log):
|
def convert(self, oeb_book, output_path, input_plugin, opts, log):
|
||||||
@ -44,16 +50,20 @@ class PMLOutput(OutputFormatPlugin):
|
|||||||
with open(os.path.join(tdir, 'index.pml'), 'wb') as out:
|
with open(os.path.join(tdir, 'index.pml'), 'wb') as out:
|
||||||
out.write(pml.encode(opts.output_encoding, 'replace'))
|
out.write(pml.encode(opts.output_encoding, 'replace'))
|
||||||
|
|
||||||
self.write_images(oeb_book.manifest, pmlmlizer.image_hrefs, tdir)
|
self.write_images(oeb_book.manifest, pmlmlizer.image_hrefs, tdir, opts)
|
||||||
|
|
||||||
log.debug('Compressing output...')
|
log.debug('Compressing output...')
|
||||||
pmlz = ZipFile(output_path, 'w')
|
pmlz = ZipFile(output_path, 'w')
|
||||||
pmlz.add_dir(tdir)
|
pmlz.add_dir(tdir)
|
||||||
|
|
||||||
def write_images(self, manifest, image_hrefs, out_dir):
|
def write_images(self, manifest, image_hrefs, out_dir, opts):
|
||||||
for item in manifest:
|
for item in manifest:
|
||||||
if item.media_type in OEB_RASTER_IMAGES and item.href in image_hrefs.keys():
|
if item.media_type in OEB_RASTER_IMAGES and item.href in image_hrefs.keys():
|
||||||
|
if opts.full_image_depth:
|
||||||
im = Image.open(cStringIO.StringIO(item.data))
|
im = Image.open(cStringIO.StringIO(item.data))
|
||||||
|
else:
|
||||||
|
im = Image.open(cStringIO.StringIO(item.data)).convert('P')
|
||||||
|
im.thumbnail((300,300), Image.ANTIALIAS)
|
||||||
|
|
||||||
data = cStringIO.StringIO()
|
data = cStringIO.StringIO()
|
||||||
im.save(data, 'PNG')
|
im.save(data, 'PNG')
|
||||||
|
@ -216,7 +216,9 @@ class PMLMLizer(object):
|
|||||||
w = '\\w'
|
w = '\\w'
|
||||||
width = elem.get('width')
|
width = elem.get('width')
|
||||||
if width:
|
if width:
|
||||||
w += '="%s%%"' % width
|
if not width.endswith('%'):
|
||||||
|
width += '%'
|
||||||
|
w += '="%s"' % width
|
||||||
else:
|
else:
|
||||||
w += '="50%"'
|
w += '="50%"'
|
||||||
text.append(w)
|
text.append(w)
|
||||||
|
63
src/calibre/ebooks/txt/markdownml.py
Normal file
63
src/calibre/ebooks/txt/markdownml.py
Normal file
@ -0,0 +1,63 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
__license__ = 'GPL 3'
|
||||||
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
'''
|
||||||
|
Transform OEB content into Markdown formatted plain text
|
||||||
|
'''
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from lxml import etree
|
||||||
|
|
||||||
|
from calibre.utils.html2text import html2text
|
||||||
|
|
||||||
|
class MarkdownMLizer(object):
|
||||||
|
|
||||||
|
def __init__(self, log):
|
||||||
|
self.log = log
|
||||||
|
|
||||||
|
def extract_content(self, oeb_book, opts):
|
||||||
|
self.log.info('Converting XHTML to Markdown formatted TXT...')
|
||||||
|
self.oeb_book = oeb_book
|
||||||
|
self.opts = opts
|
||||||
|
|
||||||
|
return self.mlize_spine()
|
||||||
|
|
||||||
|
def mlize_spine(self):
|
||||||
|
output = [u'']
|
||||||
|
|
||||||
|
for item in self.oeb_book.spine:
|
||||||
|
self.log.debug('Converting %s to Markdown formatted TXT...' % item.href)
|
||||||
|
|
||||||
|
html = unicode(etree.tostring(item.data, encoding=unicode))
|
||||||
|
|
||||||
|
if not self.opts.keep_links:
|
||||||
|
html = re.sub(r'<\s*a[^>]*>', '', html)
|
||||||
|
html = re.sub(r'<\s*/\s*a\s*>', '', html)
|
||||||
|
if not self.opts.keep_image_references:
|
||||||
|
html = re.sub(r'<\s*img[^>]*>', '', html)
|
||||||
|
html = re.sub(r'<\s*img\s*>', '', html)
|
||||||
|
|
||||||
|
text = html2text(html)
|
||||||
|
|
||||||
|
# Ensure the section ends with at least two new line characters.
|
||||||
|
# This is to prevent the last paragraph from a section being
|
||||||
|
# combined into the fist paragraph of the next.
|
||||||
|
end_chars = text[-4:]
|
||||||
|
# Convert all newlines to \n
|
||||||
|
end_chars = end_chars.replace('\r\n', '\n')
|
||||||
|
end_chars = end_chars.replace('\r', '\n')
|
||||||
|
end_chars = end_chars[-2:]
|
||||||
|
if not end_chars[1] == '\n':
|
||||||
|
text += '\n\n'
|
||||||
|
if end_chars[1] == '\n' and not end_chars[0] == '\n':
|
||||||
|
text += '\n'
|
||||||
|
|
||||||
|
output += text
|
||||||
|
|
||||||
|
output = u''.join(output)
|
||||||
|
|
||||||
|
return output
|
@ -8,6 +8,7 @@ import os
|
|||||||
|
|
||||||
from calibre.customize.conversion import OutputFormatPlugin, \
|
from calibre.customize.conversion import OutputFormatPlugin, \
|
||||||
OptionRecommendation
|
OptionRecommendation
|
||||||
|
from calibre.ebooks.txt.markdownml import MarkdownMLizer
|
||||||
from calibre.ebooks.txt.txtml import TXTMLizer
|
from calibre.ebooks.txt.txtml import TXTMLizer
|
||||||
from calibre.ebooks.txt.newlines import TxtNewlines, specified_newlines
|
from calibre.ebooks.txt.newlines import TxtNewlines, specified_newlines
|
||||||
|
|
||||||
@ -44,10 +45,27 @@ class TXTOutput(OutputFormatPlugin):
|
|||||||
recommended_value=False, level=OptionRecommendation.LOW,
|
recommended_value=False, level=OptionRecommendation.LOW,
|
||||||
help=_('Force splitting on the max-line-length value when no space '
|
help=_('Force splitting on the max-line-length value when no space '
|
||||||
'is present. Also allows max-line-length to be below the minimum')),
|
'is present. Also allows max-line-length to be below the minimum')),
|
||||||
|
OptionRecommendation(name='markdown_format',
|
||||||
|
recommended_value=False, level=OptionRecommendation.LOW,
|
||||||
|
help=_('Produce Markdown formatted text.')),
|
||||||
|
OptionRecommendation(name='keep_links',
|
||||||
|
recommended_value=False, level=OptionRecommendation.LOW,
|
||||||
|
help=_('Do not remove links within the document. This is only ' \
|
||||||
|
'useful when paired with the markdown-format option because' \
|
||||||
|
'links are always removed with plain text output.')),
|
||||||
|
OptionRecommendation(name='keep_image_references',
|
||||||
|
recommended_value=False, level=OptionRecommendation.LOW,
|
||||||
|
help=_('Do not remove image references within the document. This is only ' \
|
||||||
|
'useful when paired with the markdown-format option because' \
|
||||||
|
'image references are always removed with plain text output.')),
|
||||||
])
|
])
|
||||||
|
|
||||||
def convert(self, oeb_book, output_path, input_plugin, opts, log):
|
def convert(self, oeb_book, output_path, input_plugin, opts, log):
|
||||||
|
if opts.markdown_format:
|
||||||
|
writer = MarkdownMLizer(log)
|
||||||
|
else:
|
||||||
writer = TXTMLizer(log)
|
writer = TXTMLizer(log)
|
||||||
|
|
||||||
txt = writer.extract_content(oeb_book, opts)
|
txt = writer.extract_content(oeb_book, opts)
|
||||||
|
|
||||||
log.debug('\tReplacing newlines with selected type...')
|
log.debug('\tReplacing newlines with selected type...')
|
||||||
|
@ -35,6 +35,7 @@ BLOCK_STYLES = [
|
|||||||
|
|
||||||
SPACE_TAGS = [
|
SPACE_TAGS = [
|
||||||
'td',
|
'td',
|
||||||
|
'br',
|
||||||
]
|
]
|
||||||
|
|
||||||
class TXTMLizer(object):
|
class TXTMLizer(object):
|
||||||
@ -178,7 +179,6 @@ class TXTMLizer(object):
|
|||||||
text.append(u'\n\n')
|
text.append(u'\n\n')
|
||||||
|
|
||||||
if tag in SPACE_TAGS:
|
if tag in SPACE_TAGS:
|
||||||
if not end.endswith('u ') and hasattr(elem, 'text') and elem.text:
|
|
||||||
text.append(u' ')
|
text.append(u' ')
|
||||||
|
|
||||||
# Process tags that contain text.
|
# Process tags that contain text.
|
||||||
|
@ -123,6 +123,8 @@ def _config():
|
|||||||
help=_('Download social metadata (tags/rating/etc.)'))
|
help=_('Download social metadata (tags/rating/etc.)'))
|
||||||
c.add_opt('overwrite_author_title_metadata', default=True,
|
c.add_opt('overwrite_author_title_metadata', default=True,
|
||||||
help=_('Overwrite author and title with new metadata'))
|
help=_('Overwrite author and title with new metadata'))
|
||||||
|
c.add_opt('auto_download_cover', default=False,
|
||||||
|
help=_('Automatically download the cover, if available'))
|
||||||
c.add_opt('enforce_cpu_limit', default=True,
|
c.add_opt('enforce_cpu_limit', default=True,
|
||||||
help=_('Limit max simultaneous jobs to number of CPUs'))
|
help=_('Limit max simultaneous jobs to number of CPUs'))
|
||||||
c.add_opt('tag_browser_hidden_categories', default=set(),
|
c.add_opt('tag_browser_hidden_categories', default=set(),
|
||||||
|
@ -18,6 +18,7 @@ from calibre.ebooks import BOOK_EXTENSIONS
|
|||||||
from calibre.utils.filenames import ascii_filename
|
from calibre.utils.filenames import ascii_filename
|
||||||
from calibre.constants import preferred_encoding, filesystem_encoding
|
from calibre.constants import preferred_encoding, filesystem_encoding
|
||||||
from calibre.gui2.actions import InterfaceAction
|
from calibre.gui2.actions import InterfaceAction
|
||||||
|
from calibre.gui2 import config
|
||||||
|
|
||||||
class AddAction(InterfaceAction):
|
class AddAction(InterfaceAction):
|
||||||
|
|
||||||
@ -60,6 +61,7 @@ class AddAction(InterfaceAction):
|
|||||||
self._adder = Adder(self.gui,
|
self._adder = Adder(self.gui,
|
||||||
self.gui.library_view.model().db,
|
self.gui.library_view.model().db,
|
||||||
self.Dispatcher(self._files_added), spare_server=self.gui.spare_server)
|
self.Dispatcher(self._files_added), spare_server=self.gui.spare_server)
|
||||||
|
self.gui.tags_view.disable_recounting = True
|
||||||
self._adder.add_recursive(root, single)
|
self._adder.add_recursive(root, single)
|
||||||
|
|
||||||
def add_recursive_single(self, *args):
|
def add_recursive_single(self, *args):
|
||||||
@ -101,7 +103,12 @@ class AddAction(InterfaceAction):
|
|||||||
else:
|
else:
|
||||||
ids.add(db.import_book(mi, []))
|
ids.add(db.import_book(mi, []))
|
||||||
self.gui.library_view.model().books_added(len(books))
|
self.gui.library_view.model().books_added(len(books))
|
||||||
|
orig = config['overwrite_author_title_metadata']
|
||||||
|
config['overwrite_author_title_metadata'] = True
|
||||||
|
try:
|
||||||
self.gui.iactions['Edit Metadata'].do_download_metadata(ids)
|
self.gui.iactions['Edit Metadata'].do_download_metadata(ids)
|
||||||
|
finally:
|
||||||
|
config['overwrite_author_title_metadata'] = orig
|
||||||
|
|
||||||
|
|
||||||
def files_dropped(self, paths):
|
def files_dropped(self, paths):
|
||||||
@ -195,9 +202,11 @@ class AddAction(InterfaceAction):
|
|||||||
self._adder = Adder(self.gui,
|
self._adder = Adder(self.gui,
|
||||||
None if to_device else self.gui.library_view.model().db,
|
None if to_device else self.gui.library_view.model().db,
|
||||||
self.Dispatcher(self.__adder_func), spare_server=self.gui.spare_server)
|
self.Dispatcher(self.__adder_func), spare_server=self.gui.spare_server)
|
||||||
|
self.gui.tags_view.disable_recounting = True
|
||||||
self._adder.add(paths)
|
self._adder.add(paths)
|
||||||
|
|
||||||
def _files_added(self, paths=[], names=[], infos=[], on_card=None):
|
def _files_added(self, paths=[], names=[], infos=[], on_card=None):
|
||||||
|
self.gui.tags_view.disable_recounting = False
|
||||||
if paths:
|
if paths:
|
||||||
self.gui.upload_books(paths,
|
self.gui.upload_books(paths,
|
||||||
list(map(ascii_filename, names)),
|
list(map(ascii_filename, names)),
|
||||||
@ -208,6 +217,7 @@ class AddAction(InterfaceAction):
|
|||||||
self.gui.library_view.model().books_added(self._adder.number_of_books_added)
|
self.gui.library_view.model().books_added(self._adder.number_of_books_added)
|
||||||
if hasattr(self.gui, 'db_images'):
|
if hasattr(self.gui, 'db_images'):
|
||||||
self.gui.db_images.reset()
|
self.gui.db_images.reset()
|
||||||
|
self.gui.tags_view.recount()
|
||||||
if getattr(self._adder, 'merged_books', False):
|
if getattr(self._adder, 'merged_books', False):
|
||||||
books = u'\n'.join([x if isinstance(x, unicode) else
|
books = u'\n'.join([x if isinstance(x, unicode) else
|
||||||
x.decode(preferred_encoding, 'replace') for x in
|
x.decode(preferred_encoding, 'replace') for x in
|
||||||
|
@ -37,7 +37,8 @@ class GenerateCatalogAction(InterfaceAction):
|
|||||||
dbspec[id] = {'ondevice': db.ondevice(id, index_is_id=True)}
|
dbspec[id] = {'ondevice': db.ondevice(id, index_is_id=True)}
|
||||||
|
|
||||||
# Calling gui2.tools:generate_catalog()
|
# Calling gui2.tools:generate_catalog()
|
||||||
ret = generate_catalog(self.gui, dbspec, ids, self.gui.device_manager)
|
ret = generate_catalog(self.gui, dbspec, ids, self.gui.device_manager,
|
||||||
|
db)
|
||||||
if ret is None:
|
if ret is None:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
@ -5,13 +5,67 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
from PyQt4.Qt import QMenu
|
from functools import partial
|
||||||
|
|
||||||
|
from PyQt4.Qt import QMenu, QObject, QTimer
|
||||||
|
|
||||||
from calibre.gui2 import error_dialog
|
from calibre.gui2 import error_dialog
|
||||||
from calibre.gui2.dialogs.delete_matching_from_device import DeleteMatchingFromDeviceDialog
|
from calibre.gui2.dialogs.delete_matching_from_device import DeleteMatchingFromDeviceDialog
|
||||||
from calibre.gui2.dialogs.confirm_delete import confirm
|
from calibre.gui2.dialogs.confirm_delete import confirm
|
||||||
from calibre.gui2.actions import InterfaceAction
|
from calibre.gui2.actions import InterfaceAction
|
||||||
|
|
||||||
|
single_shot = partial(QTimer.singleShot, 10)
|
||||||
|
|
||||||
|
class MultiDeleter(QObject):
|
||||||
|
|
||||||
|
def __init__(self, gui, rows, callback):
|
||||||
|
from calibre.gui2.dialogs.progress import ProgressDialog
|
||||||
|
QObject.__init__(self, gui)
|
||||||
|
self.model = gui.library_view.model()
|
||||||
|
self.ids = list(map(self.model.id, rows))
|
||||||
|
self.gui = gui
|
||||||
|
self.failures = []
|
||||||
|
self.deleted_ids = []
|
||||||
|
self.callback = callback
|
||||||
|
single_shot(self.delete_one)
|
||||||
|
self.pd = ProgressDialog(_('Deleting...'), parent=gui,
|
||||||
|
cancelable=False, min=0, max=len(self.ids))
|
||||||
|
self.pd.setModal(True)
|
||||||
|
self.pd.show()
|
||||||
|
|
||||||
|
def delete_one(self):
|
||||||
|
if not self.ids:
|
||||||
|
self.cleanup()
|
||||||
|
return
|
||||||
|
id_ = self.ids.pop()
|
||||||
|
title = 'id:%d'%id_
|
||||||
|
try:
|
||||||
|
title_ = self.model.db.title(id_, index_is_id=True)
|
||||||
|
if title_:
|
||||||
|
title = title_
|
||||||
|
self.model.db.delete_book(id_, notify=False, commit=False)
|
||||||
|
self.deleted_ids.append(id_)
|
||||||
|
except:
|
||||||
|
import traceback
|
||||||
|
self.failures.append((id_, title, traceback.format_exc()))
|
||||||
|
single_shot(self.delete_one)
|
||||||
|
self.pd.value += 1
|
||||||
|
self.pd.set_msg(_('Deleted') + ' ' + title)
|
||||||
|
|
||||||
|
def cleanup(self):
|
||||||
|
self.pd.hide()
|
||||||
|
self.pd = None
|
||||||
|
self.model.db.commit()
|
||||||
|
self.model.db.clean()
|
||||||
|
self.model.books_deleted()
|
||||||
|
self.gui.tags_view.recount()
|
||||||
|
self.callback(self.deleted_ids)
|
||||||
|
if self.failures:
|
||||||
|
msg = ['==> '+x[1]+'\n'+x[2] for x in self.failures]
|
||||||
|
error_dialog(self.gui, _('Failed to delete'),
|
||||||
|
_('Failed to delete some books, click the Show Details button'
|
||||||
|
' for details.'), det_msg='\n\n'.join(msg), show=True)
|
||||||
|
|
||||||
class DeleteAction(InterfaceAction):
|
class DeleteAction(InterfaceAction):
|
||||||
|
|
||||||
name = 'Remove Books'
|
name = 'Remove Books'
|
||||||
@ -179,8 +233,13 @@ class DeleteAction(InterfaceAction):
|
|||||||
row = None
|
row = None
|
||||||
if ci.isValid():
|
if ci.isValid():
|
||||||
row = ci.row()
|
row = ci.row()
|
||||||
|
if len(rows) < 5:
|
||||||
ids_deleted = view.model().delete_books(rows)
|
ids_deleted = view.model().delete_books(rows)
|
||||||
self.library_ids_deleted(ids_deleted, row)
|
self.library_ids_deleted(ids_deleted, row)
|
||||||
|
else:
|
||||||
|
self.__md = MultiDeleter(self.gui, rows,
|
||||||
|
partial(self.library_ids_deleted, current_row=row))
|
||||||
|
|
||||||
else:
|
else:
|
||||||
if not confirm('<p>'+_('The selected books will be '
|
if not confirm('<p>'+_('The selected books will be '
|
||||||
'<b>permanently deleted</b> '
|
'<b>permanently deleted</b> '
|
||||||
|
@ -8,7 +8,7 @@ __docformat__ = 'restructuredtext en'
|
|||||||
import os
|
import os
|
||||||
from functools import partial
|
from functools import partial
|
||||||
|
|
||||||
from PyQt4.Qt import Qt, QMenu
|
from PyQt4.Qt import Qt, QMenu, QModelIndex
|
||||||
|
|
||||||
from calibre.gui2 import error_dialog, config
|
from calibre.gui2 import error_dialog, config
|
||||||
from calibre.gui2.dialogs.metadata_single import MetadataSingleDialog
|
from calibre.gui2.dialogs.metadata_single import MetadataSingleDialog
|
||||||
@ -16,6 +16,7 @@ from calibre.gui2.dialogs.metadata_bulk import MetadataBulkDialog
|
|||||||
from calibre.gui2.dialogs.confirm_delete import confirm
|
from calibre.gui2.dialogs.confirm_delete import confirm
|
||||||
from calibre.gui2.dialogs.tag_list_editor import TagListEditor
|
from calibre.gui2.dialogs.tag_list_editor import TagListEditor
|
||||||
from calibre.gui2.actions import InterfaceAction
|
from calibre.gui2.actions import InterfaceAction
|
||||||
|
from calibre.utils.icu import sort_key
|
||||||
|
|
||||||
class EditMetadataAction(InterfaceAction):
|
class EditMetadataAction(InterfaceAction):
|
||||||
|
|
||||||
@ -53,6 +54,10 @@ class EditMetadataAction(InterfaceAction):
|
|||||||
mb.addAction(_('Merge into first selected book - keep others'),
|
mb.addAction(_('Merge into first selected book - keep others'),
|
||||||
partial(self.merge_books, safe_merge=True),
|
partial(self.merge_books, safe_merge=True),
|
||||||
Qt.AltModifier+Qt.Key_M)
|
Qt.AltModifier+Qt.Key_M)
|
||||||
|
mb.addSeparator()
|
||||||
|
mb.addAction(_('Merge only formats into first selected book - delete others'),
|
||||||
|
partial(self.merge_books, merge_only_formats=True),
|
||||||
|
Qt.AltModifier+Qt.ShiftModifier+Qt.Key_M)
|
||||||
self.merge_menu = mb
|
self.merge_menu = mb
|
||||||
self.action_merge.setMenu(mb)
|
self.action_merge.setMenu(mb)
|
||||||
md.addSeparator()
|
md.addSeparator()
|
||||||
@ -126,20 +131,40 @@ class EditMetadataAction(InterfaceAction):
|
|||||||
if bulk or (bulk is None and len(rows) > 1):
|
if bulk or (bulk is None and len(rows) > 1):
|
||||||
return self.edit_bulk_metadata(checked)
|
return self.edit_bulk_metadata(checked)
|
||||||
|
|
||||||
def accepted(id):
|
row_list = [r.row() for r in rows]
|
||||||
self.gui.library_view.model().refresh_ids([id])
|
current_row = 0
|
||||||
|
changed = set([])
|
||||||
|
db = self.gui.library_view.model().db
|
||||||
|
|
||||||
for row in rows:
|
if len(row_list) == 1:
|
||||||
self.gui.iactions['View'].metadata_view_id = self.gui.library_view.model().db.id(row.row())
|
cr = row_list[0]
|
||||||
d = MetadataSingleDialog(self.gui, row.row(),
|
row_list = \
|
||||||
self.gui.library_view.model().db,
|
list(range(self.gui.library_view.model().rowCount(QModelIndex())))
|
||||||
accepted_callback=accepted,
|
current_row = row_list.index(cr)
|
||||||
cancel_all=rows.index(row) < len(rows)-1)
|
|
||||||
d.view_format.connect(self.gui.iactions['View'].metadata_view_format)
|
while True:
|
||||||
d.exec_()
|
prev = next_ = None
|
||||||
if d.cancel_all:
|
if current_row > 0:
|
||||||
|
prev = db.title(row_list[current_row-1])
|
||||||
|
if current_row < len(row_list) - 1:
|
||||||
|
next_ = db.title(row_list[current_row+1])
|
||||||
|
|
||||||
|
d = MetadataSingleDialog(self.gui, row_list[current_row], db,
|
||||||
|
prev=prev, next_=next_)
|
||||||
|
d.view_format.connect(lambda
|
||||||
|
fmt:self.gui.iactions['View'].view_format(row_list[current_row],
|
||||||
|
fmt))
|
||||||
|
if d.exec_() != d.Accepted:
|
||||||
|
d.view_format.disconnect()
|
||||||
break
|
break
|
||||||
if rows:
|
d.view_format.disconnect()
|
||||||
|
changed.add(d.id)
|
||||||
|
if d.row_delta == 0:
|
||||||
|
break
|
||||||
|
current_row += d.row_delta
|
||||||
|
|
||||||
|
if changed:
|
||||||
|
self.gui.library_view.model().refresh_ids(list(changed))
|
||||||
current = self.gui.library_view.currentIndex()
|
current = self.gui.library_view.currentIndex()
|
||||||
m = self.gui.library_view.model()
|
m = self.gui.library_view.model()
|
||||||
if self.gui.cover_flow:
|
if self.gui.cover_flow:
|
||||||
@ -162,9 +187,17 @@ class EditMetadataAction(InterfaceAction):
|
|||||||
return
|
return
|
||||||
# Prevent the TagView from updating due to signals from the database
|
# Prevent the TagView from updating due to signals from the database
|
||||||
self.gui.tags_view.blockSignals(True)
|
self.gui.tags_view.blockSignals(True)
|
||||||
|
changed = False
|
||||||
try:
|
try:
|
||||||
changed = MetadataBulkDialog(self.gui, rows,
|
current_tab = 0
|
||||||
self.gui.library_view.model()).changed
|
while True:
|
||||||
|
dialog = MetadataBulkDialog(self.gui, rows,
|
||||||
|
self.gui.library_view.model(), current_tab)
|
||||||
|
if dialog.changed:
|
||||||
|
changed = True
|
||||||
|
if not dialog.do_again:
|
||||||
|
break
|
||||||
|
current_tab = dialog.central_widget.currentIndex()
|
||||||
finally:
|
finally:
|
||||||
self.gui.tags_view.blockSignals(False)
|
self.gui.tags_view.blockSignals(False)
|
||||||
if changed:
|
if changed:
|
||||||
@ -177,7 +210,7 @@ class EditMetadataAction(InterfaceAction):
|
|||||||
self.gui.library_view.select_rows(ids)
|
self.gui.library_view.select_rows(ids)
|
||||||
|
|
||||||
# Merge books {{{
|
# Merge books {{{
|
||||||
def merge_books(self, safe_merge=False):
|
def merge_books(self, safe_merge=False, merge_only_formats=False):
|
||||||
'''
|
'''
|
||||||
Merge selected books in library.
|
Merge selected books in library.
|
||||||
'''
|
'''
|
||||||
@ -191,6 +224,12 @@ class EditMetadataAction(InterfaceAction):
|
|||||||
return error_dialog(self.gui, _('Cannot merge books'),
|
return error_dialog(self.gui, _('Cannot merge books'),
|
||||||
_('At least two books must be selected for merging'),
|
_('At least two books must be selected for merging'),
|
||||||
show=True)
|
show=True)
|
||||||
|
if len(rows) > 5:
|
||||||
|
if not confirm('<p>'+_('You are about to merge more than 5 books. '
|
||||||
|
'Are you <b>sure</b> you want to proceed?')
|
||||||
|
+'</p>', 'merge_too_many_books', self.gui):
|
||||||
|
return
|
||||||
|
|
||||||
dest_id, src_books, src_ids = self.books_to_merge(rows)
|
dest_id, src_books, src_ids = self.books_to_merge(rows)
|
||||||
title = self.gui.library_view.model().db.title(dest_id, index_is_id=True)
|
title = self.gui.library_view.model().db.title(dest_id, index_is_id=True)
|
||||||
if safe_merge:
|
if safe_merge:
|
||||||
@ -205,6 +244,22 @@ class EditMetadataAction(InterfaceAction):
|
|||||||
return
|
return
|
||||||
self.add_formats(dest_id, src_books)
|
self.add_formats(dest_id, src_books)
|
||||||
self.merge_metadata(dest_id, src_ids)
|
self.merge_metadata(dest_id, src_ids)
|
||||||
|
elif merge_only_formats:
|
||||||
|
if not confirm('<p>'+_(
|
||||||
|
'Book formats from the selected books will be merged '
|
||||||
|
'into the <b>first selected book</b> (%s). '
|
||||||
|
'Metadata in the first selected book will not be changed.'
|
||||||
|
'Author, Title, ISBN and all other metadata will <i>not</i> be merged.<br><br>'
|
||||||
|
'After merger the second and subsequently '
|
||||||
|
'selected books, with any metadata they have will be <b>deleted</b>. <br><br>'
|
||||||
|
'All book formats of the first selected book will be kept '
|
||||||
|
'and any duplicate formats in the second and subsequently selected books '
|
||||||
|
'will be permanently <b>deleted</b> from your calibre library.<br><br> '
|
||||||
|
'Are you <b>sure</b> you want to proceed?')%title
|
||||||
|
+'</p>', 'merge_only_formats', self.gui):
|
||||||
|
return
|
||||||
|
self.add_formats(dest_id, src_books)
|
||||||
|
self.delete_books_after_merge(src_ids)
|
||||||
else:
|
else:
|
||||||
if not confirm('<p>'+_(
|
if not confirm('<p>'+_(
|
||||||
'Book formats and metadata from the selected books will be merged '
|
'Book formats and metadata from the selected books will be merged '
|
||||||
@ -214,15 +269,10 @@ class EditMetadataAction(InterfaceAction):
|
|||||||
'subsequently selected books will be <b>deleted</b>. <br><br>'
|
'subsequently selected books will be <b>deleted</b>. <br><br>'
|
||||||
'All book formats of the first selected book will be kept '
|
'All book formats of the first selected book will be kept '
|
||||||
'and any duplicate formats in the second and subsequently selected books '
|
'and any duplicate formats in the second and subsequently selected books '
|
||||||
'will be permanently <b>deleted</b> from your computer.<br><br> '
|
'will be permanently <b>deleted</b> from your calibre library.<br><br> '
|
||||||
'Are you <b>sure</b> you want to proceed?')%title
|
'Are you <b>sure</b> you want to proceed?')%title
|
||||||
+'</p>', 'merge_books', self.gui):
|
+'</p>', 'merge_books', self.gui):
|
||||||
return
|
return
|
||||||
if len(rows)>5:
|
|
||||||
if not confirm('<p>'+_('You are about to merge more than 5 books. '
|
|
||||||
'Are you <b>sure</b> you want to proceed?')
|
|
||||||
+'</p>', 'merge_too_many_books', self.gui):
|
|
||||||
return
|
|
||||||
self.add_formats(dest_id, src_books)
|
self.add_formats(dest_id, src_books)
|
||||||
self.merge_metadata(dest_id, src_ids)
|
self.merge_metadata(dest_id, src_ids)
|
||||||
self.delete_books_after_merge(src_ids)
|
self.delete_books_after_merge(src_ids)
|
||||||
@ -340,8 +390,7 @@ class EditMetadataAction(InterfaceAction):
|
|||||||
def edit_device_collections(self, view, oncard=None):
|
def edit_device_collections(self, view, oncard=None):
|
||||||
model = view.model()
|
model = view.model()
|
||||||
result = model.get_collections_with_ids()
|
result = model.get_collections_with_ids()
|
||||||
compare = (lambda x,y:cmp(x.lower(), y.lower()))
|
d = TagListEditor(self.gui, tag_to_match=None, data=result, key=sort_key)
|
||||||
d = TagListEditor(self.gui, tag_to_match=None, data=result, compare=compare)
|
|
||||||
d.exec_()
|
d.exec_()
|
||||||
if d.result() == d.Accepted:
|
if d.result() == d.Accepted:
|
||||||
to_rename = d.to_rename # dict of new text to old ids
|
to_rename = d.to_rename # dict of new text to old ids
|
||||||
|
@ -29,5 +29,6 @@ class ShowBookDetailsAction(InterfaceAction):
|
|||||||
return
|
return
|
||||||
index = self.gui.library_view.currentIndex()
|
index = self.gui.library_view.currentIndex()
|
||||||
if index.isValid():
|
if index.isValid():
|
||||||
BookInfo(self.gui, self.gui.library_view, index).show()
|
BookInfo(self.gui, self.gui.library_view, index,
|
||||||
|
self.gui.iactions['View'].view_format_by_id).show()
|
||||||
|
|
||||||
|
@ -58,6 +58,7 @@ class SimilarBooksAction(InterfaceAction):
|
|||||||
for a in authors.split(',')]
|
for a in authors.split(',')]
|
||||||
join = ' or '
|
join = ' or '
|
||||||
if search:
|
if search:
|
||||||
self.gui.search.set_search_string(join.join(search))
|
self.gui.search.set_search_string(join.join(search),
|
||||||
|
store_in_history=True)
|
||||||
|
|
||||||
|
|
||||||
|
@ -12,7 +12,7 @@ from PyQt4.Qt import Qt, QMenu
|
|||||||
|
|
||||||
from calibre.constants import isosx
|
from calibre.constants import isosx
|
||||||
from calibre.gui2 import error_dialog, Dispatcher, question_dialog, config, \
|
from calibre.gui2 import error_dialog, Dispatcher, question_dialog, config, \
|
||||||
open_local_file
|
open_local_file, info_dialog
|
||||||
from calibre.gui2.dialogs.choose_format import ChooseFormatDialog
|
from calibre.gui2.dialogs.choose_format import ChooseFormatDialog
|
||||||
from calibre.utils.config import prefs
|
from calibre.utils.config import prefs
|
||||||
from calibre.ptempfile import PersistentTemporaryFile
|
from calibre.ptempfile import PersistentTemporaryFile
|
||||||
@ -26,7 +26,6 @@ class ViewAction(InterfaceAction):
|
|||||||
|
|
||||||
def genesis(self):
|
def genesis(self):
|
||||||
self.persistent_files = []
|
self.persistent_files = []
|
||||||
self.metadata_view_id = None
|
|
||||||
self.qaction.triggered.connect(self.view_book)
|
self.qaction.triggered.connect(self.view_book)
|
||||||
self.view_menu = QMenu()
|
self.view_menu = QMenu()
|
||||||
self.view_menu.addAction(_('View'), partial(self.view_book, False))
|
self.view_menu.addAction(_('View'), partial(self.view_book, False))
|
||||||
@ -51,14 +50,6 @@ class ViewAction(InterfaceAction):
|
|||||||
if fmt_path:
|
if fmt_path:
|
||||||
self._view_file(fmt_path)
|
self._view_file(fmt_path)
|
||||||
|
|
||||||
def metadata_view_format(self, fmt):
|
|
||||||
fmt_path = self.gui.library_view.model().db.\
|
|
||||||
format_abspath(self.metadata_view_id,
|
|
||||||
fmt, index_is_id=True)
|
|
||||||
if fmt_path:
|
|
||||||
self._view_file(fmt_path)
|
|
||||||
|
|
||||||
|
|
||||||
def book_downloaded_for_viewing(self, job):
|
def book_downloaded_for_viewing(self, job):
|
||||||
if job.failed:
|
if job.failed:
|
||||||
self.gui.device_job_exception(job)
|
self.gui.device_job_exception(job)
|
||||||
@ -89,18 +80,34 @@ class ViewAction(InterfaceAction):
|
|||||||
self._launch_viewer(name, viewer, internal)
|
self._launch_viewer(name, viewer, internal)
|
||||||
|
|
||||||
def view_specific_format(self, triggered):
|
def view_specific_format(self, triggered):
|
||||||
rows = self.gui.library_view.selectionModel().selectedRows()
|
rows = list(self.gui.library_view.selectionModel().selectedRows())
|
||||||
if not rows or len(rows) == 0:
|
if not rows or len(rows) == 0:
|
||||||
d = error_dialog(self.gui, _('Cannot view'), _('No book selected'))
|
d = error_dialog(self.gui, _('Cannot view'), _('No book selected'))
|
||||||
d.exec_()
|
d.exec_()
|
||||||
return
|
return
|
||||||
|
|
||||||
row = rows[0].row()
|
db = self.gui.library_view.model().db
|
||||||
formats = self.gui.library_view.model().db.formats(row).upper().split(',')
|
rows = [r.row() for r in rows]
|
||||||
d = ChooseFormatDialog(self.gui, _('Choose the format to view'), formats)
|
formats = [db.formats(row) for row in rows]
|
||||||
|
formats = [list(f.upper().split(',')) if f else None for f in formats]
|
||||||
|
all_fmts = set([])
|
||||||
|
for x in formats:
|
||||||
|
for f in x: all_fmts.add(f)
|
||||||
|
d = ChooseFormatDialog(self.gui, _('Choose the format to view'),
|
||||||
|
list(sorted(all_fmts)))
|
||||||
if d.exec_() == d.Accepted:
|
if d.exec_() == d.Accepted:
|
||||||
format = d.format()
|
fmt = d.format()
|
||||||
self.view_format(row, format)
|
orig_num = len(rows)
|
||||||
|
rows = [rows[i] for i in range(len(rows)) if formats[i] and fmt in
|
||||||
|
formats[i]]
|
||||||
|
if self._view_check(len(rows)):
|
||||||
|
for row in rows:
|
||||||
|
self.view_format(row, fmt)
|
||||||
|
if len(rows) < orig_num:
|
||||||
|
info_dialog(self.gui, _('Format unavailable'),
|
||||||
|
_('Not all the selected books were available in'
|
||||||
|
' the %s format. You should convert'
|
||||||
|
' them first.')%fmt, show=True)
|
||||||
|
|
||||||
def _view_check(self, num, max_=3):
|
def _view_check(self, num, max_=3):
|
||||||
if num <= max_:
|
if num <= max_:
|
||||||
|
@ -3,41 +3,55 @@ UI for adding books to the database and saving books to disk
|
|||||||
'''
|
'''
|
||||||
import os, shutil, time
|
import os, shutil, time
|
||||||
from Queue import Queue, Empty
|
from Queue import Queue, Empty
|
||||||
from threading import Thread
|
from functools import partial
|
||||||
|
|
||||||
from PyQt4.Qt import QThread, SIGNAL, QObject, QTimer, Qt, \
|
from PyQt4.Qt import QThread, QObject, Qt, QProgressDialog, pyqtSignal, QTimer
|
||||||
QProgressDialog
|
|
||||||
|
|
||||||
from calibre.gui2.dialogs.progress import ProgressDialog
|
from calibre.gui2.dialogs.progress import ProgressDialog
|
||||||
from calibre.gui2 import question_dialog, error_dialog, info_dialog
|
from calibre.gui2 import question_dialog, error_dialog, info_dialog
|
||||||
from calibre.ebooks.metadata.opf2 import OPF
|
from calibre.ebooks.metadata.opf2 import OPF
|
||||||
from calibre.ebooks.metadata import MetaInformation
|
from calibre.ebooks.metadata import MetaInformation
|
||||||
from calibre.constants import preferred_encoding, filesystem_encoding
|
from calibre.constants import preferred_encoding, filesystem_encoding, DEBUG
|
||||||
from calibre.utils.config import prefs
|
from calibre.utils.config import prefs
|
||||||
|
from calibre import prints
|
||||||
|
|
||||||
|
single_shot = partial(QTimer.singleShot, 75)
|
||||||
|
|
||||||
|
class DuplicatesAdder(QObject): # {{{
|
||||||
|
|
||||||
|
added = pyqtSignal(object)
|
||||||
|
adding_done = pyqtSignal()
|
||||||
|
|
||||||
class DuplicatesAdder(QThread): # {{{
|
|
||||||
# Add duplicate books
|
|
||||||
def __init__(self, parent, db, duplicates, db_adder):
|
def __init__(self, parent, db, duplicates, db_adder):
|
||||||
QThread.__init__(self, parent)
|
QObject.__init__(self, parent)
|
||||||
self.db, self.db_adder = db, db_adder
|
self.db, self.db_adder = db, db_adder
|
||||||
self.duplicates = duplicates
|
self.duplicates = list(duplicates)
|
||||||
|
self.count = 0
|
||||||
|
single_shot(self.add_one)
|
||||||
|
|
||||||
def run(self):
|
def add_one(self):
|
||||||
count = 1
|
if not self.duplicates:
|
||||||
for mi, cover, formats in self.duplicates:
|
self.adding_done.emit()
|
||||||
|
return
|
||||||
|
|
||||||
|
mi, cover, formats = self.duplicates.pop()
|
||||||
formats = [f for f in formats if not f.lower().endswith('.opf')]
|
formats = [f for f in formats if not f.lower().endswith('.opf')]
|
||||||
id = self.db.create_book_entry(mi, cover=cover,
|
id = self.db.create_book_entry(mi, cover=cover,
|
||||||
add_duplicates=True)
|
add_duplicates=True)
|
||||||
# here we add all the formats for dupe book record created above
|
# here we add all the formats for dupe book record created above
|
||||||
self.db_adder.add_formats(id, formats)
|
self.db_adder.add_formats(id, formats)
|
||||||
self.db_adder.number_of_books_added += 1
|
self.db_adder.number_of_books_added += 1
|
||||||
self.emit(SIGNAL('added(PyQt_PyObject)'), count)
|
self.count += 1
|
||||||
count += 1
|
self.added.emit(self.count)
|
||||||
self.emit(SIGNAL('adding_done()'))
|
single_shot(self.add_one)
|
||||||
|
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
class RecursiveFind(QThread): # {{{
|
class RecursiveFind(QThread): # {{{
|
||||||
|
|
||||||
|
update = pyqtSignal(object)
|
||||||
|
found = pyqtSignal(object)
|
||||||
|
|
||||||
def __init__(self, parent, db, root, single):
|
def __init__(self, parent, db, root, single):
|
||||||
QThread.__init__(self, parent)
|
QThread.__init__(self, parent)
|
||||||
self.db = db
|
self.db = db
|
||||||
@ -50,7 +64,7 @@ class RecursiveFind(QThread): # {{{
|
|||||||
for dirpath in os.walk(root):
|
for dirpath in os.walk(root):
|
||||||
if self.canceled:
|
if self.canceled:
|
||||||
return
|
return
|
||||||
self.emit(SIGNAL('update(PyQt_PyObject)'),
|
self.update.emit(
|
||||||
_('Searching in')+' '+dirpath[0])
|
_('Searching in')+' '+dirpath[0])
|
||||||
self.books += list(self.db.find_books_in_directory(dirpath[0],
|
self.books += list(self.db.find_books_in_directory(dirpath[0],
|
||||||
self.single_book_per_directory))
|
self.single_book_per_directory))
|
||||||
@ -71,39 +85,45 @@ class RecursiveFind(QThread): # {{{
|
|||||||
msg = unicode(err)
|
msg = unicode(err)
|
||||||
except:
|
except:
|
||||||
msg = repr(err)
|
msg = repr(err)
|
||||||
self.emit(SIGNAL('found(PyQt_PyObject)'), msg)
|
self.found.emit(msg)
|
||||||
return
|
return
|
||||||
|
|
||||||
self.books = [formats for formats in self.books if formats]
|
self.books = [formats for formats in self.books if formats]
|
||||||
|
|
||||||
if not self.canceled:
|
if not self.canceled:
|
||||||
self.emit(SIGNAL('found(PyQt_PyObject)'), self.books)
|
self.found.emit(self.books)
|
||||||
|
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
class DBAdder(Thread): # {{{
|
class DBAdder(QObject): # {{{
|
||||||
|
|
||||||
|
def __init__(self, parent, db, ids, nmap):
|
||||||
|
QObject.__init__(self, parent)
|
||||||
|
|
||||||
def __init__(self, db, ids, nmap):
|
|
||||||
self.db, self.ids, self.nmap = db, dict(**ids), dict(**nmap)
|
self.db, self.ids, self.nmap = db, dict(**ids), dict(**nmap)
|
||||||
self.end = False
|
|
||||||
self.critical = {}
|
self.critical = {}
|
||||||
self.number_of_books_added = 0
|
self.number_of_books_added = 0
|
||||||
self.duplicates = []
|
self.duplicates = []
|
||||||
self.names, self.paths, self.infos = [], [], []
|
self.names, self.paths, self.infos = [], [], []
|
||||||
Thread.__init__(self)
|
|
||||||
self.daemon = True
|
|
||||||
self.input_queue = Queue()
|
self.input_queue = Queue()
|
||||||
self.output_queue = Queue()
|
self.output_queue = Queue()
|
||||||
self.merged_books = set([])
|
self.merged_books = set([])
|
||||||
|
|
||||||
def run(self):
|
def end(self):
|
||||||
while not self.end:
|
self.input_queue.put((None, None, None))
|
||||||
|
|
||||||
|
def start(self):
|
||||||
try:
|
try:
|
||||||
id, opf, cover = self.input_queue.get(True, 0.2)
|
id, opf, cover = self.input_queue.get_nowait()
|
||||||
except Empty:
|
except Empty:
|
||||||
continue
|
single_shot(self.start)
|
||||||
|
return
|
||||||
|
if id is None and opf is None and cover is None:
|
||||||
|
return
|
||||||
name = self.nmap.pop(id)
|
name = self.nmap.pop(id)
|
||||||
title = None
|
title = None
|
||||||
|
if DEBUG:
|
||||||
|
st = time.time()
|
||||||
try:
|
try:
|
||||||
title = self.add(id, opf, cover, name)
|
title = self.add(id, opf, cover, name)
|
||||||
except:
|
except:
|
||||||
@ -111,6 +131,9 @@ class DBAdder(Thread): # {{{
|
|||||||
self.critical[name] = traceback.format_exc()
|
self.critical[name] = traceback.format_exc()
|
||||||
title = name
|
title = name
|
||||||
self.output_queue.put(title)
|
self.output_queue.put(title)
|
||||||
|
if DEBUG:
|
||||||
|
prints('Added', title, 'to db in:', time.time() - st, 'seconds')
|
||||||
|
single_shot(self.start)
|
||||||
|
|
||||||
def process_formats(self, opf, formats):
|
def process_formats(self, opf, formats):
|
||||||
imp = opf[:-4]+'.import'
|
imp = opf[:-4]+'.import'
|
||||||
@ -201,10 +224,10 @@ class Adder(QObject): # {{{
|
|||||||
self.pd.setModal(True)
|
self.pd.setModal(True)
|
||||||
self.pd.show()
|
self.pd.show()
|
||||||
self._parent = parent
|
self._parent = parent
|
||||||
self.rfind = self.worker = self.timer = None
|
self.rfind = self.worker = None
|
||||||
self.callback = callback
|
self.callback = callback
|
||||||
self.callback_called = False
|
self.callback_called = False
|
||||||
self.connect(self.pd, SIGNAL('canceled()'), self.canceled)
|
self.pd.canceled_signal.connect(self.canceled)
|
||||||
|
|
||||||
def add_recursive(self, root, single=True):
|
def add_recursive(self, root, single=True):
|
||||||
self.path = root
|
self.path = root
|
||||||
@ -213,10 +236,8 @@ class Adder(QObject): # {{{
|
|||||||
self.pd.set_max(0)
|
self.pd.set_max(0)
|
||||||
self.pd.value = 0
|
self.pd.value = 0
|
||||||
self.rfind = RecursiveFind(self, self.db, root, single)
|
self.rfind = RecursiveFind(self, self.db, root, single)
|
||||||
self.connect(self.rfind, SIGNAL('update(PyQt_PyObject)'),
|
self.rfind.update.connect(self.pd.set_msg, type=Qt.QueuedConnection)
|
||||||
self.pd.set_msg, Qt.QueuedConnection)
|
self.rfind.found.connect(self.add, type=Qt.QueuedConnection)
|
||||||
self.connect(self.rfind, SIGNAL('found(PyQt_PyObject)'),
|
|
||||||
self.add, Qt.QueuedConnection)
|
|
||||||
self.rfind.start()
|
self.rfind.start()
|
||||||
|
|
||||||
def add(self, books):
|
def add(self, books):
|
||||||
@ -246,12 +267,12 @@ class Adder(QObject): # {{{
|
|||||||
self.pd.set_min(0)
|
self.pd.set_min(0)
|
||||||
self.pd.set_max(len(self.ids))
|
self.pd.set_max(len(self.ids))
|
||||||
self.pd.value = 0
|
self.pd.value = 0
|
||||||
self.db_adder = DBAdder(self.db, self.ids, self.nmap)
|
self.db_adder = DBAdder(self, self.db, self.ids, self.nmap)
|
||||||
self.db_adder.start()
|
self.db_adder.start()
|
||||||
self.last_added_at = time.time()
|
self.last_added_at = time.time()
|
||||||
self.entry_count = len(self.ids)
|
self.entry_count = len(self.ids)
|
||||||
self.continue_updating = True
|
self.continue_updating = True
|
||||||
QTimer.singleShot(200, self.update)
|
single_shot(self.update)
|
||||||
|
|
||||||
def canceled(self):
|
def canceled(self):
|
||||||
self.continue_updating = False
|
self.continue_updating = False
|
||||||
@ -260,14 +281,14 @@ class Adder(QObject): # {{{
|
|||||||
if self.worker is not None:
|
if self.worker is not None:
|
||||||
self.worker.canceled = True
|
self.worker.canceled = True
|
||||||
if hasattr(self, 'db_adder'):
|
if hasattr(self, 'db_adder'):
|
||||||
self.db_adder.end = True
|
self.db_adder.end()
|
||||||
self.pd.hide()
|
self.pd.hide()
|
||||||
if not self.callback_called:
|
if not self.callback_called:
|
||||||
self.callback(self.paths, self.names, self.infos)
|
self.callback(self.paths, self.names, self.infos)
|
||||||
self.callback_called = True
|
self.callback_called = True
|
||||||
|
|
||||||
def duplicates_processed(self):
|
def duplicates_processed(self):
|
||||||
self.db_adder.end = True
|
self.db_adder.end()
|
||||||
if not self.callback_called:
|
if not self.callback_called:
|
||||||
self.callback(self.paths, self.names, self.infos)
|
self.callback(self.paths, self.names, self.infos)
|
||||||
self.callback_called = True
|
self.callback_called = True
|
||||||
@ -300,7 +321,7 @@ class Adder(QObject): # {{{
|
|||||||
if (time.time() - self.last_added_at) > self.ADD_TIMEOUT:
|
if (time.time() - self.last_added_at) > self.ADD_TIMEOUT:
|
||||||
self.continue_updating = False
|
self.continue_updating = False
|
||||||
self.pd.hide()
|
self.pd.hide()
|
||||||
self.db_adder.end = True
|
self.db_adder.end()
|
||||||
if not self.callback_called:
|
if not self.callback_called:
|
||||||
self.callback([], [], [])
|
self.callback([], [], [])
|
||||||
self.callback_called = True
|
self.callback_called = True
|
||||||
@ -311,7 +332,7 @@ class Adder(QObject): # {{{
|
|||||||
'find the problem book.'), show=True)
|
'find the problem book.'), show=True)
|
||||||
|
|
||||||
if self.continue_updating:
|
if self.continue_updating:
|
||||||
QTimer.singleShot(200, self.update)
|
single_shot(self.update)
|
||||||
|
|
||||||
|
|
||||||
def process_duplicates(self):
|
def process_duplicates(self):
|
||||||
@ -332,11 +353,8 @@ class Adder(QObject): # {{{
|
|||||||
self.__p_d = pd
|
self.__p_d = pd
|
||||||
self.__d_a = DuplicatesAdder(self._parent, self.db, duplicates,
|
self.__d_a = DuplicatesAdder(self._parent, self.db, duplicates,
|
||||||
self.db_adder)
|
self.db_adder)
|
||||||
self.connect(self.__d_a, SIGNAL('added(PyQt_PyObject)'),
|
self.__d_a.added.connect(pd.setValue)
|
||||||
pd.setValue)
|
self.__d_a.adding_done.connect(self.duplicates_processed)
|
||||||
self.connect(self.__d_a, SIGNAL('adding_done()'),
|
|
||||||
self.duplicates_processed)
|
|
||||||
self.__d_a.start()
|
|
||||||
else:
|
else:
|
||||||
return self.duplicates_processed()
|
return self.duplicates_processed()
|
||||||
|
|
||||||
@ -407,14 +425,12 @@ class Saver(QObject): # {{{
|
|||||||
self.worker = SaveWorker(self.rq, db, self.ids, path, self.opts,
|
self.worker = SaveWorker(self.rq, db, self.ids, path, self.opts,
|
||||||
spare_server=self.spare_server)
|
spare_server=self.spare_server)
|
||||||
self.pd.canceled_signal.connect(self.canceled)
|
self.pd.canceled_signal.connect(self.canceled)
|
||||||
self.timer = QTimer(self)
|
self.continue_updating = True
|
||||||
self.connect(self.timer, SIGNAL('timeout()'), self.update)
|
single_shot(self.update)
|
||||||
self.timer.start(200)
|
|
||||||
|
|
||||||
|
|
||||||
def canceled(self):
|
def canceled(self):
|
||||||
if self.timer is not None:
|
self.continue_updating = False
|
||||||
self.timer.stop()
|
|
||||||
if self.worker is not None:
|
if self.worker is not None:
|
||||||
self.worker.canceled = True
|
self.worker.canceled = True
|
||||||
self.pd.hide()
|
self.pd.hide()
|
||||||
@ -424,14 +440,38 @@ class Saver(QObject): # {{{
|
|||||||
|
|
||||||
|
|
||||||
def update(self):
|
def update(self):
|
||||||
if not self.ids or not self.worker.is_alive():
|
if not self.continue_updating:
|
||||||
self.timer.stop()
|
return
|
||||||
|
if not self.worker.is_alive():
|
||||||
|
# Check that all ids were processed
|
||||||
|
while self.ids:
|
||||||
|
# Get all queued results since worker is dead
|
||||||
|
before = len(self.ids)
|
||||||
|
self.get_result()
|
||||||
|
if before == len(self.ids):
|
||||||
|
# No results available => worker died unexpectedly
|
||||||
|
for i in list(self.ids):
|
||||||
|
self.failures.add(('id:%d'%i, 'Unknown error'))
|
||||||
|
self.ids.remove(i)
|
||||||
|
|
||||||
|
if not self.ids:
|
||||||
|
self.continue_updating = False
|
||||||
self.pd.hide()
|
self.pd.hide()
|
||||||
if not self.callback_called:
|
if not self.callback_called:
|
||||||
self.callback(self.worker.path, self.failures, self.worker.error)
|
try:
|
||||||
|
# Give the worker time to clean up and set worker.error
|
||||||
|
self.worker.join(2)
|
||||||
|
except:
|
||||||
|
pass # The worker was not yet started
|
||||||
self.callback_called = True
|
self.callback_called = True
|
||||||
return
|
self.callback(self.worker.path, self.failures, self.worker.error)
|
||||||
|
|
||||||
|
if self.continue_updating:
|
||||||
|
self.get_result()
|
||||||
|
single_shot(self.update)
|
||||||
|
|
||||||
|
|
||||||
|
def get_result(self):
|
||||||
try:
|
try:
|
||||||
id, title, ok, tb = self.rq.get_nowait()
|
id, title, ok, tb = self.rq.get_nowait()
|
||||||
except Empty:
|
except Empty:
|
||||||
@ -441,6 +481,7 @@ class Saver(QObject): # {{{
|
|||||||
if not isinstance(title, unicode):
|
if not isinstance(title, unicode):
|
||||||
title = str(title).decode(preferred_encoding, 'replace')
|
title = str(title).decode(preferred_encoding, 'replace')
|
||||||
self.pd.set_msg(_('Saved')+' '+title)
|
self.pd.set_msg(_('Saved')+' '+title)
|
||||||
|
|
||||||
if not ok:
|
if not ok:
|
||||||
self.failures.add((title, tb))
|
self.failures.add((title, tb))
|
||||||
# }}}
|
# }}}
|
||||||
|
@ -19,6 +19,7 @@ from calibre.ebooks import BOOK_EXTENSIONS
|
|||||||
from calibre.constants import preferred_encoding
|
from calibre.constants import preferred_encoding
|
||||||
from calibre.library.comments import comments_to_html
|
from calibre.library.comments import comments_to_html
|
||||||
from calibre.gui2 import config, open_local_file
|
from calibre.gui2 import config, open_local_file
|
||||||
|
from calibre.utils.icu import sort_key
|
||||||
|
|
||||||
# render_rows(data) {{{
|
# render_rows(data) {{{
|
||||||
WEIGHTS = collections.defaultdict(lambda : 100)
|
WEIGHTS = collections.defaultdict(lambda : 100)
|
||||||
@ -31,8 +32,8 @@ WEIGHTS[_('Tags')] = 4
|
|||||||
def render_rows(data):
|
def render_rows(data):
|
||||||
keys = data.keys()
|
keys = data.keys()
|
||||||
# First sort by name. The WEIGHTS sort will preserve this sub-order
|
# First sort by name. The WEIGHTS sort will preserve this sub-order
|
||||||
keys.sort(cmp=lambda x, y: cmp(x.lower(), y.lower()))
|
keys.sort(key=sort_key)
|
||||||
keys.sort(cmp=lambda x, y: cmp(WEIGHTS[x], WEIGHTS[y]))
|
keys.sort(key=lambda x: WEIGHTS[x])
|
||||||
rows = []
|
rows = []
|
||||||
for key in keys:
|
for key in keys:
|
||||||
txt = data[key]
|
txt = data[key]
|
||||||
@ -208,7 +209,8 @@ class BookInfo(QWebView):
|
|||||||
rows = u'\n'.join([u'<tr><td valign="top"><b>%s:</b></td><td valign="top">%s</td></tr>'%(k,t) for
|
rows = u'\n'.join([u'<tr><td valign="top"><b>%s:</b></td><td valign="top">%s</td></tr>'%(k,t) for
|
||||||
k, t in rows])
|
k, t in rows])
|
||||||
comments = data.get(_('Comments'), '')
|
comments = data.get(_('Comments'), '')
|
||||||
if comments and comments != u'None':
|
if not comments or comments == u'None':
|
||||||
|
comments = ''
|
||||||
self.renderer.queue.put((rows, comments))
|
self.renderer.queue.put((rows, comments))
|
||||||
self._show_data(rows, '')
|
self._show_data(rows, '')
|
||||||
|
|
||||||
|
@ -34,7 +34,7 @@ class PluginWidget(QWidget, Ui_Form):
|
|||||||
self.all_fields.append(x)
|
self.all_fields.append(x)
|
||||||
QListWidgetItem(x, self.db_fields)
|
QListWidgetItem(x, self.db_fields)
|
||||||
|
|
||||||
def initialize(self, name): #not working properly to update
|
def initialize(self, name, db): #not working properly to update
|
||||||
self.name = name
|
self.name = name
|
||||||
fields = gprefs.get(name+'_db_fields', self.all_fields)
|
fields = gprefs.get(name+'_db_fields', self.all_fields)
|
||||||
# Restore the activated db_fields from last use
|
# Restore the activated db_fields from last use
|
||||||
|
@ -28,7 +28,7 @@ class PluginWidget(QWidget, Ui_Form):
|
|||||||
self.all_fields.append(x)
|
self.all_fields.append(x)
|
||||||
QListWidgetItem(x, self.db_fields)
|
QListWidgetItem(x, self.db_fields)
|
||||||
|
|
||||||
def initialize(self, name):
|
def initialize(self, name, db):
|
||||||
self.name = name
|
self.name = name
|
||||||
fields = gprefs.get(name+'_db_fields', self.all_fields)
|
fields = gprefs.get(name+'_db_fields', self.all_fields)
|
||||||
# Restore the activated fields from last use
|
# Restore the activated fields from last use
|
||||||
|
@ -7,10 +7,11 @@ __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
|||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
|
||||||
from calibre.gui2 import gprefs
|
|
||||||
from catalog_epub_mobi_ui import Ui_Form
|
|
||||||
from calibre.ebooks.conversion.config import load_defaults
|
from calibre.ebooks.conversion.config import load_defaults
|
||||||
from PyQt4.Qt import QWidget
|
from calibre.gui2 import gprefs
|
||||||
|
|
||||||
|
from catalog_epub_mobi_ui import Ui_Form
|
||||||
|
from PyQt4.Qt import QWidget, QLineEdit
|
||||||
|
|
||||||
class PluginWidget(QWidget,Ui_Form):
|
class PluginWidget(QWidget,Ui_Form):
|
||||||
|
|
||||||
@ -23,7 +24,8 @@ class PluginWidget(QWidget,Ui_Form):
|
|||||||
('generate_recently_added', True),
|
('generate_recently_added', True),
|
||||||
('note_tag','*'),
|
('note_tag','*'),
|
||||||
('numbers_as_text', False),
|
('numbers_as_text', False),
|
||||||
('read_tag','+'),
|
('read_pattern','+'),
|
||||||
|
('read_source_field_cb','Tag'),
|
||||||
('wishlist_tag','Wishlist'),
|
('wishlist_tag','Wishlist'),
|
||||||
]
|
]
|
||||||
|
|
||||||
@ -38,16 +40,54 @@ class PluginWidget(QWidget,Ui_Form):
|
|||||||
QWidget.__init__(self, parent)
|
QWidget.__init__(self, parent)
|
||||||
self.setupUi(self)
|
self.setupUi(self)
|
||||||
|
|
||||||
def initialize(self, name):
|
def initialize(self, name, db):
|
||||||
self.name = name
|
self.name = name
|
||||||
|
|
||||||
|
# Populate the 'Read book' source fields
|
||||||
|
all_custom_fields = db.custom_field_keys()
|
||||||
|
custom_fields = {}
|
||||||
|
custom_fields['Tag'] = {'field':'tag', 'datatype':u'text'}
|
||||||
|
for custom_field in all_custom_fields:
|
||||||
|
field_md = db.metadata_for_field(custom_field)
|
||||||
|
if field_md['datatype'] in ['bool','composite','datetime','text']:
|
||||||
|
custom_fields[field_md['name']] = {'field':custom_field,
|
||||||
|
'datatype':field_md['datatype']}
|
||||||
|
|
||||||
|
# Add the sorted eligible fields to the combo box
|
||||||
|
for cf in sorted(custom_fields):
|
||||||
|
self.read_source_field_cb.addItem(cf)
|
||||||
|
|
||||||
|
self.read_source_fields = custom_fields
|
||||||
|
self.read_source_field_cb.currentIndexChanged.connect(self.read_source_field_changed)
|
||||||
|
|
||||||
# Update dialog fields from stored options
|
# Update dialog fields from stored options
|
||||||
for opt in self.OPTION_FIELDS:
|
for opt in self.OPTION_FIELDS:
|
||||||
opt_value = gprefs.get(self.name + '_' + opt[0], opt[1])
|
opt_value = gprefs.get(self.name + '_' + opt[0], opt[1])
|
||||||
if opt[0] in ['numbers_as_text','generate_titles','generate_series','generate_recently_added']:
|
if opt[0] in [
|
||||||
|
'generate_recently_added',
|
||||||
|
'generate_series',
|
||||||
|
'generate_titles',
|
||||||
|
'numbers_as_text',
|
||||||
|
]:
|
||||||
getattr(self, opt[0]).setChecked(opt_value)
|
getattr(self, opt[0]).setChecked(opt_value)
|
||||||
|
|
||||||
|
# Combo box
|
||||||
|
elif opt[0] in ['read_source_field_cb']:
|
||||||
|
# Look for last-stored combo box value
|
||||||
|
index = self.read_source_field_cb.findText(opt_value)
|
||||||
|
if index == -1:
|
||||||
|
index = self.read_source_field_cb.findText('Tag')
|
||||||
|
self.read_source_field_cb.setCurrentIndex(index)
|
||||||
|
|
||||||
|
# Text fields
|
||||||
else:
|
else:
|
||||||
getattr(self, opt[0]).setText(opt_value)
|
getattr(self, opt[0]).setText(opt_value)
|
||||||
|
|
||||||
|
# Init self.read_source_field
|
||||||
|
cs = unicode(self.read_source_field_cb.currentText())
|
||||||
|
read_source_spec = self.read_source_fields[cs]
|
||||||
|
self.read_source_field = read_source_spec['field']
|
||||||
|
|
||||||
def options(self):
|
def options(self):
|
||||||
# Save/return the current options
|
# Save/return the current options
|
||||||
# exclude_genre stores literally
|
# exclude_genre stores literally
|
||||||
@ -55,16 +95,60 @@ class PluginWidget(QWidget,Ui_Form):
|
|||||||
# others store as lists
|
# others store as lists
|
||||||
opts_dict = {}
|
opts_dict = {}
|
||||||
for opt in self.OPTION_FIELDS:
|
for opt in self.OPTION_FIELDS:
|
||||||
if opt[0] in ['numbers_as_text','generate_titles','generate_series','generate_recently_added']:
|
# Save values to gprefs
|
||||||
|
if opt[0] in [
|
||||||
|
'generate_recently_added',
|
||||||
|
'generate_series',
|
||||||
|
'generate_titles',
|
||||||
|
'numbers_as_text',
|
||||||
|
]:
|
||||||
opt_value = getattr(self,opt[0]).isChecked()
|
opt_value = getattr(self,opt[0]).isChecked()
|
||||||
|
|
||||||
|
# Combo box uses .currentText()
|
||||||
|
elif opt[0] in ['read_source_field_cb']:
|
||||||
|
opt_value = unicode(getattr(self, opt[0]).currentText())
|
||||||
|
|
||||||
|
# text fields use .text()
|
||||||
else:
|
else:
|
||||||
opt_value = unicode(getattr(self, opt[0]).text())
|
opt_value = unicode(getattr(self, opt[0]).text())
|
||||||
gprefs.set(self.name + '_' + opt[0], opt_value)
|
gprefs.set(self.name + '_' + opt[0], opt_value)
|
||||||
|
|
||||||
if opt[0] in ['exclude_genre','numbers_as_text','generate_titles','generate_series','generate_recently_added']:
|
# Construct opts
|
||||||
|
if opt[0] in [
|
||||||
|
'exclude_genre',
|
||||||
|
'generate_recently_added',
|
||||||
|
'generate_series',
|
||||||
|
'generate_titles',
|
||||||
|
'numbers_as_text',
|
||||||
|
]:
|
||||||
opts_dict[opt[0]] = opt_value
|
opts_dict[opt[0]] = opt_value
|
||||||
else:
|
else:
|
||||||
opts_dict[opt[0]] = opt_value.split(',')
|
opts_dict[opt[0]] = opt_value.split(',')
|
||||||
opts_dict['output_profile'] = [load_defaults('page_setup')['output_profile']]
|
|
||||||
|
|
||||||
|
# Generate read_book_marker
|
||||||
|
opts_dict['read_book_marker'] = "%s:%s" % (self.read_source_field, self.read_pattern.text())
|
||||||
|
|
||||||
|
# Append the output profile
|
||||||
|
opts_dict['output_profile'] = [load_defaults('page_setup')['output_profile']]
|
||||||
return opts_dict
|
return opts_dict
|
||||||
|
|
||||||
|
def read_source_field_changed(self,new_index):
|
||||||
|
'''
|
||||||
|
Process changes in the read_source_field combo box
|
||||||
|
Currently using QLineEdit for all field types
|
||||||
|
Possible to modify to switch QWidget type
|
||||||
|
'''
|
||||||
|
new_source = str(self.read_source_field_cb.currentText())
|
||||||
|
read_source_spec = self.read_source_fields[str(new_source)]
|
||||||
|
self.read_source_field = read_source_spec['field']
|
||||||
|
|
||||||
|
# Change pattern input widget to match the source field datatype
|
||||||
|
if read_source_spec['datatype'] in ['bool','composite','datetime','text']:
|
||||||
|
if not isinstance(self.read_pattern, QLineEdit):
|
||||||
|
self.read_spec_hl.removeWidget(self.read_pattern)
|
||||||
|
dw = QLineEdit(self)
|
||||||
|
dw.setObjectName('read_pattern')
|
||||||
|
dw.setToolTip('Pattern for read book')
|
||||||
|
self.read_pattern = dw
|
||||||
|
self.read_spec_hl.addWidget(dw)
|
||||||
|
|
||||||
|
@ -6,8 +6,8 @@
|
|||||||
<rect>
|
<rect>
|
||||||
<x>0</x>
|
<x>0</x>
|
||||||
<y>0</y>
|
<y>0</y>
|
||||||
<width>579</width>
|
<width>627</width>
|
||||||
<height>411</height>
|
<height>549</height>
|
||||||
</rect>
|
</rect>
|
||||||
</property>
|
</property>
|
||||||
<property name="windowTitle">
|
<property name="windowTitle">
|
||||||
@ -28,42 +28,28 @@
|
|||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
<item row="1" column="0">
|
<item row="4" column="0">
|
||||||
<widget class="QLabel" name="label_3">
|
|
||||||
<property name="text">
|
|
||||||
<string>'Mark this book as read' tag:</string>
|
|
||||||
</property>
|
|
||||||
</widget>
|
|
||||||
</item>
|
|
||||||
<item row="1" column="1">
|
|
||||||
<widget class="QLineEdit" name="read_tag">
|
|
||||||
<property name="toolTip">
|
|
||||||
<string extracomment="Default: +"/>
|
|
||||||
</property>
|
|
||||||
</widget>
|
|
||||||
</item>
|
|
||||||
<item row="3" column="0">
|
|
||||||
<widget class="QLabel" name="label_4">
|
<widget class="QLabel" name="label_4">
|
||||||
<property name="text">
|
<property name="text">
|
||||||
<string>Additional note tag prefix:</string>
|
<string>Additional note tag prefix:</string>
|
||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
<item row="3" column="1">
|
<item row="4" column="1">
|
||||||
<widget class="QLineEdit" name="note_tag">
|
<widget class="QLineEdit" name="note_tag">
|
||||||
<property name="toolTip">
|
<property name="toolTip">
|
||||||
<string extracomment="Default: *"/>
|
<string extracomment="Default: *"/>
|
||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
<item row="5" column="1">
|
<item row="6" column="1">
|
||||||
<widget class="QLineEdit" name="exclude_genre">
|
<widget class="QLineEdit" name="exclude_genre">
|
||||||
<property name="toolTip">
|
<property name="toolTip">
|
||||||
<string extracomment="Default: \[[\w]*\]"/>
|
<string extracomment="Default: \[[\w]*\]"/>
|
||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
<item row="5" column="0">
|
<item row="6" column="0">
|
||||||
<widget class="QLabel" name="label">
|
<widget class="QLabel" name="label">
|
||||||
<property name="text">
|
<property name="text">
|
||||||
<string>Regex pattern describing tags to exclude as genres:</string>
|
<string>Regex pattern describing tags to exclude as genres:</string>
|
||||||
@ -76,7 +62,7 @@
|
|||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
<item row="6" column="1">
|
<item row="7" column="1">
|
||||||
<widget class="QLabel" name="label_6">
|
<widget class="QLabel" name="label_6">
|
||||||
<property name="text">
|
<property name="text">
|
||||||
<string>Regex tips:
|
<string>Regex tips:
|
||||||
@ -88,7 +74,7 @@
|
|||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
<item row="7" column="0">
|
<item row="8" column="0">
|
||||||
<spacer name="verticalSpacer">
|
<spacer name="verticalSpacer">
|
||||||
<property name="orientation">
|
<property name="orientation">
|
||||||
<enum>Qt::Vertical</enum>
|
<enum>Qt::Vertical</enum>
|
||||||
@ -101,44 +87,84 @@
|
|||||||
</property>
|
</property>
|
||||||
</spacer>
|
</spacer>
|
||||||
</item>
|
</item>
|
||||||
<item row="9" column="0">
|
<item row="10" column="0">
|
||||||
<widget class="QCheckBox" name="generate_titles">
|
<widget class="QCheckBox" name="generate_titles">
|
||||||
<property name="text">
|
<property name="text">
|
||||||
<string>Include 'Titles' Section</string>
|
<string>Include 'Titles' Section</string>
|
||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
<item row="11" column="0">
|
<item row="12" column="0">
|
||||||
<widget class="QCheckBox" name="generate_recently_added">
|
<widget class="QCheckBox" name="generate_recently_added">
|
||||||
<property name="text">
|
<property name="text">
|
||||||
<string>Include 'Recently Added' Section</string>
|
<string>Include 'Recently Added' Section</string>
|
||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
<item row="12" column="0">
|
<item row="13" column="0">
|
||||||
<widget class="QCheckBox" name="numbers_as_text">
|
<widget class="QCheckBox" name="numbers_as_text">
|
||||||
<property name="text">
|
<property name="text">
|
||||||
<string>Sort numbers as text</string>
|
<string>Sort numbers as text</string>
|
||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
<item row="10" column="0">
|
<item row="11" column="0">
|
||||||
<widget class="QCheckBox" name="generate_series">
|
<widget class="QCheckBox" name="generate_series">
|
||||||
<property name="text">
|
<property name="text">
|
||||||
<string>Include 'Series' Section</string>
|
<string>Include 'Series' Section</string>
|
||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
<item row="2" column="1">
|
<item row="3" column="1">
|
||||||
<widget class="QLineEdit" name="wishlist_tag"/>
|
<widget class="QLineEdit" name="wishlist_tag"/>
|
||||||
</item>
|
</item>
|
||||||
<item row="2" column="0">
|
<item row="3" column="0">
|
||||||
<widget class="QLabel" name="label_5">
|
<widget class="QLabel" name="label_5">
|
||||||
<property name="text">
|
<property name="text">
|
||||||
<string>Wishlist tag:</string>
|
<string>Wishlist tag:</string>
|
||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
|
<item row="2" column="1">
|
||||||
|
<layout class="QHBoxLayout" name="read_spec_hl">
|
||||||
|
<property name="sizeConstraint">
|
||||||
|
<enum>QLayout::SetMinimumSize</enum>
|
||||||
|
</property>
|
||||||
|
<item>
|
||||||
|
<widget class="QComboBox" name="read_source_field_cb">
|
||||||
|
<property name="sizePolicy">
|
||||||
|
<sizepolicy hsizetype="MinimumExpanding" vsizetype="Fixed">
|
||||||
|
<horstretch>0</horstretch>
|
||||||
|
<verstretch>0</verstretch>
|
||||||
|
</sizepolicy>
|
||||||
|
</property>
|
||||||
|
<property name="toolTip">
|
||||||
|
<string>Source column for read book</string>
|
||||||
|
</property>
|
||||||
|
<property name="statusTip">
|
||||||
|
<string/>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
</item>
|
||||||
|
<item>
|
||||||
|
<widget class="QLineEdit" name="read_pattern">
|
||||||
|
<property name="toolTip">
|
||||||
|
<string>Pattern for read book</string>
|
||||||
|
</property>
|
||||||
|
<property name="statusTip">
|
||||||
|
<string/>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
</item>
|
||||||
|
</layout>
|
||||||
|
</item>
|
||||||
|
<item row="2" column="0">
|
||||||
|
<widget class="QLabel" name="label_3">
|
||||||
|
<property name="text">
|
||||||
|
<string>Books marked as read:</string>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
</item>
|
||||||
</layout>
|
</layout>
|
||||||
</widget>
|
</widget>
|
||||||
<resources/>
|
<resources/>
|
||||||
|
@ -17,6 +17,6 @@ class PluginWidget(Widget, Ui_Form):
|
|||||||
ICON = I('mimetypes/fb2.png')
|
ICON = I('mimetypes/fb2.png')
|
||||||
|
|
||||||
def __init__(self, parent, get_option, get_help, db=None, book_id=None):
|
def __init__(self, parent, get_option, get_help, db=None, book_id=None):
|
||||||
Widget.__init__(self, parent, ['inline_toc', 'sectionize_chapters'])
|
Widget.__init__(self, parent, ['h1_to_title', 'h2_to_title', 'h3_to_title'])
|
||||||
self.db, self.book_id = db, book_id
|
self.db, self.book_id = db, book_id
|
||||||
self.initialize_options(get_option, get_help, db, book_id)
|
self.initialize_options(get_option, get_help, db, book_id)
|
||||||
|
@ -14,7 +14,7 @@
|
|||||||
<string>Form</string>
|
<string>Form</string>
|
||||||
</property>
|
</property>
|
||||||
<layout class="QGridLayout" name="gridLayout">
|
<layout class="QGridLayout" name="gridLayout">
|
||||||
<item row="2" column="0">
|
<item row="3" column="0">
|
||||||
<spacer name="verticalSpacer">
|
<spacer name="verticalSpacer">
|
||||||
<property name="orientation">
|
<property name="orientation">
|
||||||
<enum>Qt::Vertical</enum>
|
<enum>Qt::Vertical</enum>
|
||||||
@ -28,16 +28,23 @@
|
|||||||
</spacer>
|
</spacer>
|
||||||
</item>
|
</item>
|
||||||
<item row="0" column="0">
|
<item row="0" column="0">
|
||||||
<widget class="QCheckBox" name="opt_inline_toc">
|
<widget class="QCheckBox" name="opt_h1_to_title">
|
||||||
<property name="text">
|
<property name="text">
|
||||||
<string>&Inline TOC</string>
|
<string>Wrap h1 tags with <title> elements</string>
|
||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
<item row="1" column="0">
|
<item row="1" column="0">
|
||||||
<widget class="QCheckBox" name="opt_sectionize_chapters">
|
<widget class="QCheckBox" name="opt_h2_to_title">
|
||||||
<property name="text">
|
<property name="text">
|
||||||
<string>Sectionize Chapters (Use with care!)</string>
|
<string>Wrap h2 tags with <title> elements</string>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
</item>
|
||||||
|
<item row="2" column="0">
|
||||||
|
<widget class="QCheckBox" name="opt_h3_to_title">
|
||||||
|
<property name="text">
|
||||||
|
<string>Wrap h3 tags with <title> elements</string>
|
||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
|
@ -17,6 +17,7 @@ from calibre.ebooks.metadata import authors_to_string, string_to_authors, \
|
|||||||
from calibre.ebooks.metadata.opf2 import metadata_to_opf
|
from calibre.ebooks.metadata.opf2 import metadata_to_opf
|
||||||
from calibre.ptempfile import PersistentTemporaryFile
|
from calibre.ptempfile import PersistentTemporaryFile
|
||||||
from calibre.gui2.convert import Widget
|
from calibre.gui2.convert import Widget
|
||||||
|
from calibre.utils.icu import sort_key
|
||||||
|
|
||||||
def create_opf_file(db, book_id):
|
def create_opf_file(db, book_id):
|
||||||
mi = db.get_metadata(book_id, index_is_id=True)
|
mi = db.get_metadata(book_id, index_is_id=True)
|
||||||
@ -102,7 +103,7 @@ class MetadataWidget(Widget, Ui_Form):
|
|||||||
|
|
||||||
def initalize_authors(self):
|
def initalize_authors(self):
|
||||||
all_authors = self.db.all_authors()
|
all_authors = self.db.all_authors()
|
||||||
all_authors.sort(cmp=lambda x, y : cmp(x[1], y[1]))
|
all_authors.sort(key=lambda x : sort_key(x[1]))
|
||||||
|
|
||||||
for i in all_authors:
|
for i in all_authors:
|
||||||
id, name = i
|
id, name = i
|
||||||
@ -117,7 +118,7 @@ class MetadataWidget(Widget, Ui_Form):
|
|||||||
|
|
||||||
def initialize_series(self):
|
def initialize_series(self):
|
||||||
all_series = self.db.all_series()
|
all_series = self.db.all_series()
|
||||||
all_series.sort(cmp=lambda x, y : cmp(x[1], y[1]))
|
all_series.sort(key=lambda x : sort_key(x[1]))
|
||||||
|
|
||||||
for i in all_series:
|
for i in all_series:
|
||||||
id, name = i
|
id, name = i
|
||||||
@ -126,7 +127,7 @@ class MetadataWidget(Widget, Ui_Form):
|
|||||||
|
|
||||||
def initialize_publisher(self):
|
def initialize_publisher(self):
|
||||||
all_publishers = self.db.all_publishers()
|
all_publishers = self.db.all_publishers()
|
||||||
all_publishers.sort(cmp=lambda x, y : cmp(x[1], y[1]))
|
all_publishers.sort(key=lambda x : sort_key(x[1]))
|
||||||
|
|
||||||
for i in all_publishers:
|
for i in all_publishers:
|
||||||
id, name = i
|
id, name = i
|
||||||
|
22
src/calibre/gui2/convert/pml_output.py
Normal file
22
src/calibre/gui2/convert/pml_output.py
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
__license__ = 'GPL 3'
|
||||||
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
from calibre.gui2.convert.pmlz_output_ui import Ui_Form
|
||||||
|
from calibre.gui2.convert import Widget
|
||||||
|
|
||||||
|
format_model = None
|
||||||
|
|
||||||
|
class PluginWidget(Widget, Ui_Form):
|
||||||
|
|
||||||
|
TITLE = _('PMLZ Output')
|
||||||
|
HELP = _('Options specific to')+' PMLZ '+_('output')
|
||||||
|
COMMIT_NAME = 'pmlz_output'
|
||||||
|
ICON = I('mimetypes/unknown.png')
|
||||||
|
|
||||||
|
def __init__(self, parent, get_option, get_help, db=None, book_id=None):
|
||||||
|
Widget.__init__(self, parent, ['inline_toc', 'full_image_depth'])
|
||||||
|
self.db, self.book_id = db, book_id
|
||||||
|
self.initialize_options(get_option, get_help, db, book_id)
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user