Merge from trunk
263
Changelog.yaml
@ -19,6 +19,269 @@
|
||||
# new recipes:
|
||||
# - title:
|
||||
|
||||
- version: 0.8.9
|
||||
date: 2011-07-08
|
||||
|
||||
new features:
|
||||
- title: "Kobo Touch: Display Preview Tag for book previews on the device"
|
||||
|
||||
- title: "Improved display of grouped search terms in Tag Browser"
|
||||
|
||||
- title: "When adding HTML files to calibre, add an option to process links in breadth first rather than depth first order. Access it via Preferences->Plugins and customize the HTML to ZIP plugin"
|
||||
|
||||
- title: "Conversion pipeline: Add option to control if duplicate entries are allowed when generating the Table of Contents from links."
|
||||
tickets: [806095]
|
||||
|
||||
- title: "Metadata download: When merging results, if the query to the xisbn service hangs, wait no more than 10 seconds. Also try harder to preserve the month when downlaoding published date. Do not throw away isbnless results if there are some sources that return isbns and some that do not."
|
||||
tickets: [798309]
|
||||
|
||||
- title: "Get Books: Remove OpenLibrary since it has the same files as archive.org. Allow direct downloading from Project Gutenberg."
|
||||
|
||||
- title: "Add functions to the template language that allow getting the last modified time and size of the individual format files for a book. Also add a has_cover() function."
|
||||
|
||||
bug fixes:
|
||||
- title: "Fix true/false searches dont work on device views"
|
||||
tickets: [807262]
|
||||
|
||||
- title: "Fix renaming of collections in device views"
|
||||
tickets: [807256]
|
||||
|
||||
- title: "Fix regression that broke the use of the device_db plugboard"
|
||||
tickets: [806483]
|
||||
|
||||
- title: "Kobo driver: Hide Expired Book Status for deleted books. Also fix regression that broke connecting to Kobo devices running very old firmware."
|
||||
tickets: [802083]
|
||||
|
||||
- title: "Fix bug in 0.8.8 that could cause the metadata.db to be left in an unusable state if calibre is interrupted at just the wrong moment or if the db is stored in dropbox"
|
||||
|
||||
- title: "Fix sorting of composite custom columns that display numbers."
|
||||
|
||||
improved recipes:
|
||||
- "Computer Act!ve"
|
||||
- Metro News NL
|
||||
- Spiegel Online International
|
||||
- cracked.com
|
||||
- Endgadget
|
||||
- Independent
|
||||
- Telegraph UK
|
||||
|
||||
new recipes:
|
||||
- title: "Blog da Cidadania and Noticias UnB"
|
||||
author: Diniz Bortolotto
|
||||
|
||||
- title: "Galicia Confidential"
|
||||
author: Susana Sotelo Docio
|
||||
|
||||
- title: "South China Morning Post"
|
||||
author: llam
|
||||
|
||||
- title: "Szinti Derigisi"
|
||||
author: thomass
|
||||
|
||||
|
||||
- version: 0.8.8
|
||||
date: 2011-07-01
|
||||
|
||||
new features:
|
||||
- title: "Make author names in the Book Details panel clickable. Clicking them takes you to the wikipedia page for the author by default. You may have to tell calibre to display author names in the Book details panel first via Preferences->Look & Feel->Book details. You can change the link for individual authors by right clicking on the author's name in the Tag Browser and selecting Manage Authors."
|
||||
|
||||
- title: "Get Books: Add 'Open Books' as an available book source"
|
||||
|
||||
- title: "Get Books: When a free download is available for a search result, for example, for public domain books, allow direct download of the book into your calibre library."
|
||||
|
||||
- title: "Support for detecting and mounting reader devices on FreeBSD."
|
||||
tickets: [802708]
|
||||
|
||||
- title: "When creating a composite custom column, allow the use of HTML to create links and other markup that display in the Book details panel"
|
||||
|
||||
- title: "Add the swap_around_comma function to the template language."
|
||||
|
||||
- title: "Drivers for HTC G2, Advent Vega, iRiver Story HD, Lark FreeMe and Moovyman mp7"
|
||||
|
||||
- title: "Quick View: Survives changing libraries. Also allow sorting by series index as well as name."
|
||||
|
||||
- title: "Connect to iTunes: Add an option to control how the driver works depending on whether you have iTunes setup to copy files to its media directory or not. Set this option by customizing the Apple driver in Preferences->Plugins. Having iTunes copy media to its storage folder is no longer neccessary. See http://www.mobileread.com/forums/showthread.php?t=118559 for details"
|
||||
|
||||
- title: "Remove the delete library functionality from calibre, instead you can now remove a library, so calibre will forget about it, but you have to delete the files manually"
|
||||
|
||||
bug fixes:
|
||||
- title: "Fix a regression introduced in 0.8.7 in the Tag Browser that could cause calibre to crash after performing various actions"
|
||||
|
||||
- title: "Fix an unhandled error when deleting all saved searches"
|
||||
tickets: [804383]
|
||||
|
||||
- title: "Fix row numbers in a previous selection being incorrect after a sort operation."
|
||||
|
||||
- title: "Fix ISBN identifier type not recognized if it is in upper case"
|
||||
tickets: [802288]
|
||||
|
||||
- title: "Fix a regression in 0.8.7 that broke reading metadata from MOBI files in the Edit metadata dialog."
|
||||
tickets: [801981]
|
||||
|
||||
- title: "Fix handling of filenames that have an even number of periods before the file extension."
|
||||
tickets: [801939]
|
||||
|
||||
- title: "Fix lack of thread saefty in template format system, that could lead to incorrect template evaluation in some cases."
|
||||
tickets: [801944]
|
||||
|
||||
- title: "Fix conversion to PDB when the input document has no text"
|
||||
tickets: [801888]
|
||||
|
||||
- title: "Fix clicking on first letter of author names generating incorrect search."
|
||||
|
||||
- title: "Also fix updating bulk metadata in custom column causing unnneccessary Tag Browser refreshes."
|
||||
|
||||
- title: "Fix a regression in 0.8.7 that broke renaming items via the Tag Browser"
|
||||
|
||||
- title: "Fix a regression in 0.8.7 that caused the regex builder wizard to fail with LIT files as the input"
|
||||
|
||||
improved recipes:
|
||||
- Zaman Gazetesi
|
||||
- Infobae
|
||||
- El Cronista
|
||||
- Critica de la Argentina
|
||||
- Buenos Aires Economico
|
||||
- El Universal (Venezuela)
|
||||
- wprost
|
||||
- Financial Times UK
|
||||
|
||||
new recipes:
|
||||
- title: "Today's Zaman by thomass"
|
||||
|
||||
- title: "Athens News by Darko Miletic"
|
||||
|
||||
- title: "Catholic News Agency"
|
||||
author: Jetkey
|
||||
|
||||
- title: "Arizona Republic"
|
||||
author: Jim Olo
|
||||
|
||||
- title: "Add Ming Pao Vancouver and Toronto"
|
||||
author: Eddie Lau
|
||||
|
||||
|
||||
- version: 0.8.7
|
||||
date: 2011-06-24
|
||||
|
||||
new features:
|
||||
- title: "Connect to iTunes: You now need to tell iTunes to keep its own copy of every ebook. Do this in iTunes by going to Preferences->Advanced and setting the 'Copy files to iTunes Media folder when adding to library' option. To learn about why this is necessary, see: http://www.mobileread.com/forums/showthread.php?t=140260"
|
||||
|
||||
- title: "Add a couple of date related functions to the calibre template langauge to get 'todays' date and create text based on the value of a date type field"
|
||||
|
||||
- title: "Improved reading of metadata from FB2 files, with support for reading isbns, tags, published date, etc."
|
||||
|
||||
- title: "Driver for the Imagine IMEB5"
|
||||
tickets: [800642]
|
||||
|
||||
- title: "Show the currently used network proxies in Preferences->Miscellaneous"
|
||||
|
||||
- title: "Kobo Touch driver: Show Favorites as a device collection. Various other minor fixes."
|
||||
|
||||
- title: "Content server now sends the Content-Disposition header when sending ebook files."
|
||||
|
||||
- title: "Allow search and replace on comments custom columns."
|
||||
|
||||
- title: "Add a new action 'Quick View' to show the books in your library by the author/tags/series/etc. of the currently selected book, in a separate window. You can add it to your toolbar or right click menu by going to Preferences->Toolbars."
|
||||
|
||||
- title: "Get Books: Add libri.de as a book source. Fix a bug that caused some books downloads to fail. Fixes to the Legimi and beam-ebooks.de stores"
|
||||
tickets: [799367]
|
||||
|
||||
bug fixes:
|
||||
- title: "Fix a memory leak that could result in the leaking of several MB of memory with large libraries"
|
||||
tickets: [800952]
|
||||
|
||||
- title: "Fix the read metadata from format button in the edit metadata dialog using the wrong timezone when setting published date"
|
||||
tickets: [799777]
|
||||
|
||||
- title: "Generating catalog: Fix occassional file in use errors when generating catalogs on windows"
|
||||
|
||||
- title: "Fix clicking on News in Tag Browser not working in non English locales."
|
||||
tickets: [799471]
|
||||
|
||||
- title: "HTML Input: Fix a regression in 0.8.6 that caused CSS stylesheets to be ignored"
|
||||
tickets: [799171]
|
||||
|
||||
- title: "Fix a regression that caused restore database to stop working on some windows sytems"
|
||||
|
||||
- title: "EPUB Output: Convert <br> tags with text in them into <divs> as ADE cannot handle them."
|
||||
tickets: [794427]
|
||||
|
||||
improved recipes:
|
||||
- Le Temps
|
||||
- Perfil
|
||||
- Financial Times UK
|
||||
|
||||
new recipes:
|
||||
- title: "Daytona Beach Journal"
|
||||
author: BRGriff
|
||||
|
||||
- title: "El club del ebook and Frontline"
|
||||
author: Darko Miletic
|
||||
|
||||
|
||||
- version: 0.8.6
|
||||
date: 2011-06-17
|
||||
|
||||
new features:
|
||||
- title: "Builtin support for downloading and installing/updating calibre plugins. Go to Preferences->Plugins and click 'Get new plugins'"
|
||||
description: "When updates for installed plugins are available, calibre will automatically (unobtrusively) notify you"
|
||||
type: major
|
||||
|
||||
- title: "Metadata download configuration: Allow defining a set of 'default' fields for metadata download and quichly switching to/from them"
|
||||
|
||||
- title: "Allow clicking on the news category in the Tag Browser to display all downloaded periodicals"
|
||||
|
||||
- title: "Driver for the Libre Air"
|
||||
|
||||
- title: "Email sending: Allow user to stop email jobs (note that stopping may not actually prevent the email from being sent, depending on when the stop happens). Also automatically abort email sending if it takes longer than 15mins."
|
||||
tickets: [795960]
|
||||
|
||||
bug fixes:
|
||||
- title: "MOBI Output: Allow setting of background color on tables also set the border attribute on the table if the table has any border related css defined."
|
||||
tickets: [797580]
|
||||
|
||||
- title: "Nook TSR: Put news sent to the device in My Files/Newspapers instaed of My Files/Books."
|
||||
tickets: [796674]
|
||||
|
||||
- title: "MOBI Output: Fix a bug where linking to the very first element in an HTML file could sometimes result in the link pointing to the last element in the previous file."
|
||||
tickets: [797214]
|
||||
|
||||
- title: "CSV catalog: Convert HTML comments to plain text"
|
||||
|
||||
- title: "HTML Input: Ignore links to text files."
|
||||
tickets: [791568]
|
||||
|
||||
- title: "EPUB Output: Change orphaned <td> tags to <div> as they cause ADE to crash."
|
||||
|
||||
- title: "Fix 'Stop selected jobs' button trying to stop the same job multiple times"
|
||||
|
||||
- title: "Database: Explicitly test for case sensitivity on OS X instead of assuming a case insensitive filesystem."
|
||||
tickets: [796258]
|
||||
|
||||
- title: "Get Books: More fixes to the Amazon store plugin"
|
||||
|
||||
- title: "FB2 Input: Do not specify font families/background colors"
|
||||
|
||||
|
||||
improved recipes:
|
||||
- Philadelphia Inquirer
|
||||
- Macleans Magazone
|
||||
- Metro UK
|
||||
|
||||
new recipes:
|
||||
- title: "Christian Post, Down To Earth and Words Without Borders"
|
||||
author: sexymax15
|
||||
|
||||
- title: "Noticias R7"
|
||||
author: Diniz Bortolotto
|
||||
|
||||
- title: "UK Daily Mirror"
|
||||
author: Dave Asbury
|
||||
|
||||
- title: "New Musical Express Magazine"
|
||||
author: scissors
|
||||
|
||||
|
||||
- version: 0.8.5
|
||||
date: 2011-06-10
|
||||
|
||||
|
68
recipes/arizona_republic.recipe
Normal file
@ -0,0 +1,68 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, jolo'
|
||||
'''
|
||||
azrepublic.com
|
||||
'''
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1307301031(BasicNewsRecipe):
|
||||
title = u'AZRepublic'
|
||||
__author__ = 'Jim Olo'
|
||||
language = 'en'
|
||||
description = "The Arizona Republic is Arizona's leading provider of news and information, and has published a daily newspaper in Phoenix for more than 110 years"
|
||||
publisher = 'AZRepublic/AZCentral'
|
||||
masthead_url = 'http://freedom2t.com/wp-content/uploads/press_az_republic_v2.gif'
|
||||
cover_url = 'http://www.valleyleadership.org/Common/Img/2line4c_AZRepublic%20with%20azcentral%20logo.jpg'
|
||||
category = 'news, politics, USA, AZ, Arizona'
|
||||
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
remove_empty_feeds = True
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
# extra_css = '.headline {font-size: medium;} \n .fact { padding-top: 10pt }'
|
||||
extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .headline {font-size: medium} .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
|
||||
|
||||
remove_attributes = ['width','height','h2','subHeadline','style']
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'id':['slidingBillboard', 'top728x90', 'subindex-header', 'topSearch']}),
|
||||
dict(name='div', attrs={'id':['simplesearch', 'azcLoginBox', 'azcLoginBoxInner', 'topNav']}),
|
||||
dict(name='div', attrs={'id':['carsDrop', 'homesDrop', 'rentalsDrop', 'classifiedDrop']}),
|
||||
dict(name='div', attrs={'id':['nav', 'mp', 'subnav', 'jobsDrop']}),
|
||||
dict(name='h6', attrs={'class':['section-header']}),
|
||||
dict(name='a', attrs={'href':['#comments']}),
|
||||
dict(name='div', attrs={'class':['articletools clearfix', 'floatRight']}),
|
||||
dict(name='div', attrs={'id':['fbFrame', 'ob', 'storyComments', 'storyGoogleAdBox']}),
|
||||
dict(name='div', attrs={'id':['storyTopHomes', 'openRight', 'footerwrap', 'copyright']}),
|
||||
dict(name='div', attrs={'id':['blogsHed', 'blog_comments', 'blogByline','blogTopics']}),
|
||||
dict(name='div', attrs={'id':['membersRightMain', 'dealsfooter', 'azrTopHed', 'azrRightCol']}),
|
||||
dict(name='div', attrs={'id':['ttdHeader', 'ttdTimeWeather']}),
|
||||
dict(name='div', attrs={'id':['membersRightMain', 'deals-header-wrap']}),
|
||||
dict(name='div', attrs={'id':['todoTopSearchBar', 'byline clearfix', 'subdex-topnav']}),
|
||||
dict(name='h1', attrs={'id':['SEOtext']}),
|
||||
dict(name='table', attrs={'class':['ap-mediabox-table']}),
|
||||
dict(name='p', attrs={'class':['ap_para']}),
|
||||
dict(name='span', attrs={'class':['source-org vcard', 'org fn']}),
|
||||
dict(name='a', attrs={'href':['http://hosted2.ap.org/APDEFAULT/privacy']}),
|
||||
dict(name='a', attrs={'href':['http://hosted2.ap.org/APDEFAULT/terms']}),
|
||||
dict(name='div', attrs={'id':['onespot_nextclick']}),
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'FrontPage', u'http://www.azcentral.com/rss/feeds/republicfront.xml'),
|
||||
(u'TopUS-News', u'http://hosted.ap.org/lineups/USHEADS.rss?SITE=AZPHG&SECTION=HOME'),
|
||||
(u'WorldNews', u'http://hosted.ap.org/lineups/WORLDHEADS.rss?SITE=AZPHG&SECTION=HOME'),
|
||||
(u'TopBusiness', u'http://hosted.ap.org/lineups/BUSINESSHEADS.rss?SITE=AZPHG&SECTION=HOME'),
|
||||
(u'Entertainment', u'http://hosted.ap.org/lineups/ENTERTAINMENT.rss?SITE=AZPHG&SECTION=HOME'),
|
||||
(u'ArizonaNews', u'http://www.azcentral.com/rss/feeds/news.xml'),
|
||||
(u'Gilbert', u'http://www.azcentral.com/rss/feeds/gilbert.xml'),
|
||||
(u'Chandler', u'http://www.azcentral.com/rss/feeds/chandler.xml'),
|
||||
(u'DiningReviews', u'http://www.azcentral.com/rss/feeds/diningreviews.xml'),
|
||||
(u'AZBusiness', u'http://www.azcentral.com/rss/feeds/business.xml'),
|
||||
(u'ArizonaDeals', u'http://www.azcentral.com/members/Blog%7E/RealDealsblog'),
|
||||
(u'GroceryDeals', u'http://www.azcentral.com/members/Blog%7E/RealDealsblog/tag/2646')
|
||||
]
|
||||
|
||||
|
||||
|
||||
|
70
recipes/athens_news.recipe
Normal file
@ -0,0 +1,70 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
www.athensnews.gr
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AthensNews(BasicNewsRecipe):
|
||||
title = 'Athens News'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Greece in English since 1952'
|
||||
publisher = 'NEP Publishing Company SA'
|
||||
category = 'news, politics, Greece, Athens'
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 200
|
||||
no_stylesheets = True
|
||||
encoding = 'utf8'
|
||||
use_embedded_content = False
|
||||
language = 'en_GR'
|
||||
remove_empty_feeds = True
|
||||
publication_type = 'newspaper'
|
||||
masthead_url = 'http://www.athensnews.gr/sites/athensnews/themes/athensnewsv3/images/logo.jpg'
|
||||
extra_css = """
|
||||
body{font-family: Arial,Helvetica,sans-serif }
|
||||
img{margin-bottom: 0.4em; display:block}
|
||||
.big{font-size: xx-large; font-family: Georgia,serif}
|
||||
.articlepubdate{font-size: small; color: gray; font-family: Georgia,serif}
|
||||
.lezanta{font-size: x-small; font-weight: bold; text-align: left; margin-bottom: 1em; display: block}
|
||||
"""
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
, 'linearize_tables' : True
|
||||
}
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['meta','link'])
|
||||
]
|
||||
keep_only_tags=[
|
||||
dict(name='span',attrs={'class':'big'})
|
||||
,dict(name='td', attrs={'class':['articlepubdate','text']})
|
||||
]
|
||||
remove_attributes=['lang']
|
||||
|
||||
|
||||
feeds = [
|
||||
(u'News' , u'http://www.athensnews.gr/category/1/feed' )
|
||||
,(u'Politics' , u'http://www.athensnews.gr/category/8/feed' )
|
||||
,(u'Business' , u'http://www.athensnews.gr/category/2/feed' )
|
||||
,(u'Economy' , u'http://www.athensnews.gr/category/11/feed')
|
||||
,(u'Community' , u'http://www.athensnews.gr/category/5/feed' )
|
||||
,(u'Arts' , u'http://www.athensnews.gr/category/3/feed' )
|
||||
,(u'Living in Athens', u'http://www.athensnews.gr/category/7/feed' )
|
||||
,(u'Sports' , u'http://www.athensnews.gr/category/4/feed' )
|
||||
,(u'Travel' , u'http://www.athensnews.gr/category/6/feed' )
|
||||
,(u'Letters' , u'http://www.athensnews.gr/category/44/feed')
|
||||
,(u'Media' , u'http://www.athensnews.gr/multimedia/feed' )
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
return url + '?action=print'
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
39
recipes/automatiseringgids.recipe
Normal file
@ -0,0 +1,39 @@
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class autogids(BasicNewsRecipe):
|
||||
title = u'Automatiseringgids IT'
|
||||
oldest_article = 7
|
||||
__author__ = 'DrMerry'
|
||||
description = 'IT-nieuws van Automatiseringgids'
|
||||
language = 'nl'
|
||||
publisher = 'AutomatiseringGids'
|
||||
category = 'Nieuws, IT, Nederlandstalig'
|
||||
simultaneous_downloads = 5
|
||||
#delay = 1
|
||||
timefmt = ' [%A, %d %B, %Y]'
|
||||
#timefmt = ''
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
remove_empty_feeds = True
|
||||
publication_type = 'newspaper'
|
||||
encoding = 'utf-8'
|
||||
cover_url = 'http://www.automatiseringgids.nl/siteimg/header_logo.gif'
|
||||
keep_only_tags = [dict(id=['content'])]
|
||||
extra_css = '.artikelheader {font-size:0.8em; color: #666;} .artikelintro {font-weight:bold} div.imgArticle {float: right; margin: 0 0em 1em 1em; display: block; position: relative; } \
|
||||
h2 { margin: 0 0 0.5em; min-height: 30px; font-size: 1.5em; letter-spacing: -0.2px; margin: 0 0 0.5em; color: black; font-weight: bold; line-height: 1.2em; padding: 4px 3px 0; }'
|
||||
|
||||
|
||||
|
||||
remove_tags = [dict(name='div', attrs={'id':['loginbox','reactiecollapsible','reactiebox']}),
|
||||
dict(name='div', attrs={'class':['column_a','column_c','bannerfullsize','reactieheader','reactiecollapsible','formulier','artikel_headeroptions']}),
|
||||
dict(name='ul', attrs={'class':['highlightlist']}),
|
||||
dict(name='input', attrs={'type':['button']}),
|
||||
dict(name='div', attrs={'style':['display:block; width:428px; height:30px; float:left;']}),
|
||||
]
|
||||
preprocess_regexps = [
|
||||
(re.compile(r'(<h3>Reacties</h3>|<h2>Zie ook:</h2>|<div style=".*</div>|<a[^>]*>|</a>)', re.DOTALL|re.IGNORECASE),
|
||||
lambda match: ''),
|
||||
]
|
||||
|
||||
feeds = [(u'Actueel', u'http://www.automatiseringgids.nl/rss.aspx')]
|
20
recipes/blog_da_cidadania.recipe
Normal file
@ -0,0 +1,20 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class BlogdaCidadania(BasicNewsRecipe):
|
||||
title = 'Blog da Cidadania'
|
||||
__author__ = 'Diniz Bortolotto'
|
||||
description = 'Posts do Blog da Cidadania'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 50
|
||||
encoding = 'utf8'
|
||||
publisher = 'Eduardo Guimaraes'
|
||||
category = 'politics, Brazil'
|
||||
language = 'pt_BR'
|
||||
publication_type = 'politics portal'
|
||||
|
||||
feeds = [(u'Blog da Cidadania', u'http://www.blogcidadania.com.br/feed/')]
|
||||
|
||||
reverse_article_order = True
|
||||
|
@ -1,19 +1,16 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '2009-2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
elargentino.com
|
||||
www.diariobae.com
|
||||
'''
|
||||
|
||||
from calibre import strftime
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
|
||||
class BsAsEconomico(BasicNewsRecipe):
|
||||
title = 'Buenos Aires Economico'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Revista Argentina'
|
||||
publisher = 'ElArgentino.com'
|
||||
description = 'Diario BAE es el diario economico-politico con mas influencia en la Argentina. Fuente de empresarios y politicos del pais y el exterior. El pozo estaria aportando en periodos breves un volumen equivalente a 800m3 diarios. Pero todavia deben efectuarse otras perforaciones adicionales.'
|
||||
publisher = 'Diario BAE'
|
||||
category = 'news, politics, economy, Argentina'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
@ -21,52 +18,42 @@ class BsAsEconomico(BasicNewsRecipe):
|
||||
use_embedded_content = False
|
||||
encoding = 'utf-8'
|
||||
language = 'es_AR'
|
||||
cover_url = strftime('http://www.diariobae.com/imgs_portadas/%Y%m%d_portadasBAE.jpg')
|
||||
masthead_url = 'http://www.diariobae.com/img/logo_bae.png'
|
||||
remove_empty_feeds = True
|
||||
publication_type = 'newspaper'
|
||||
extra_css = """
|
||||
body{font-family: Georgia,"Times New Roman",Times,serif}
|
||||
#titulo{font-size: x-large}
|
||||
#epi{font-size: small; font-style: italic; font-weight: bold}
|
||||
img{display: block; margin-top: 1em}
|
||||
"""
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
lang = 'es-AR'
|
||||
direction = 'ltr'
|
||||
INDEX = 'http://www.elargentino.com/medios/121/Buenos-Aires-Economico.html'
|
||||
extra_css = ' .titulo{font-size: x-large; font-weight: bold} .volantaImp{font-size: small; font-weight: bold} '
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment' , description
|
||||
, '--category' , category
|
||||
, '--publisher', publisher
|
||||
remove_tags_before= dict(attrs={'id':'titulo'})
|
||||
remove_tags_after = dict(attrs={'id':'autor' })
|
||||
remove_tags = [
|
||||
dict(name=['meta','base','iframe','link','lang'])
|
||||
,dict(attrs={'id':'barra_tw'})
|
||||
]
|
||||
remove_attributes = ['data-count','data-via']
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0cm; margin-top: 0em; margin-bottom: 0.5em} "'
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'ContainerPop'})]
|
||||
|
||||
remove_tags = [dict(name='link')]
|
||||
|
||||
feeds = [(u'Articulos', u'http://www.elargentino.com/Highlights.aspx?ParentType=Section&ParentId=121&Content-Type=text/xml&ChannelDesc=Buenos%20Aires%20Econ%C3%B3mico')]
|
||||
|
||||
def print_version(self, url):
|
||||
main, sep, article_part = url.partition('/nota-')
|
||||
article_id, rsep, rrest = article_part.partition('-')
|
||||
return u'http://www.elargentino.com/Impresion.aspx?Id=' + article_id
|
||||
feeds = [
|
||||
(u'Argentina' , u'http://www.diariobae.com/rss/argentina.xml' )
|
||||
,(u'Valores' , u'http://www.diariobae.com/rss/valores.xml' )
|
||||
,(u'Finanzas' , u'http://www.diariobae.com/rss/finanzas.xml' )
|
||||
,(u'Negocios' , u'http://www.diariobae.com/rss/negocios.xml' )
|
||||
,(u'Mundo' , u'http://www.diariobae.com/rss/mundo.xml' )
|
||||
,(u'5 dias' , u'http://www.diariobae.com/rss/5dias.xml' )
|
||||
,(u'Espectaculos', u'http://www.diariobae.com/rss/espectaculos.xml')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
soup.html['lang'] = self.lang
|
||||
soup.html['dir' ] = self.direction
|
||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
|
||||
soup.head.insert(0,mlang)
|
||||
soup.head.insert(1,mcharset)
|
||||
return soup
|
||||
|
||||
def get_cover_url(self):
|
||||
cover_url = None
|
||||
soup = self.index_to_soup(self.INDEX)
|
||||
cover_item = soup.find('div',attrs={'class':'colder'})
|
||||
if cover_item:
|
||||
clean_url = self.image_url_processor(None,cover_item.div.img['src'])
|
||||
cover_url = 'http://www.elargentino.com' + clean_url + '&height=600'
|
||||
return cover_url
|
||||
|
||||
def image_url_processor(self, baseurl, url):
|
||||
base, sep, rest = url.rpartition('?Id=')
|
||||
img, sep2, rrest = rest.partition('&')
|
||||
return base + sep + img
|
||||
|
13
recipes/catholic_news_agency.recipe
Normal file
@ -0,0 +1,13 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1301972345(BasicNewsRecipe):
|
||||
title = u'Catholic News Agency'
|
||||
language = 'en'
|
||||
__author__ = 'Jetkey'
|
||||
oldest_article = 5
|
||||
max_articles_per_feed = 20
|
||||
|
||||
feeds = [(u'U.S. News', u'http://feeds.feedburner.com/catholicnewsagency/dailynews-us'),
|
||||
(u'Vatican', u'http://feeds.feedburner.com/catholicnewsagency/dailynews-vatican'),
|
||||
(u'Bishops Corner', u'http://feeds.feedburner.com/catholicnewsagency/columns/bishopscorner'),
|
||||
(u'Saint of the Day', u'http://feeds.feedburner.com/catholicnewsagency/saintoftheday')]
|
37
recipes/christian_post.recipe
Normal file
@ -0,0 +1,37 @@
|
||||
#created by sexymax15 ....sexymax15@gmail.com
|
||||
#christian post recipe
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class ChristianPost(BasicNewsRecipe):
|
||||
|
||||
title = 'The Christian Post'
|
||||
__author__ = 'sexymax15'
|
||||
description = 'Homepage'
|
||||
language = 'en'
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
oldest_article = 30
|
||||
max_articles_per_feed = 15
|
||||
|
||||
remove_empty_feeds = True
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
|
||||
extra_css = '''
|
||||
h1 {color:#008852;font-family:Arial,Helvetica,sans-serif; font-size:20px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:18px;}
|
||||
h2 {color:#4D4D4D;font-family:Arial,Helvetica,sans-serif; font-size:16px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:16px; } '''
|
||||
|
||||
|
||||
feeds = [
|
||||
('Homepage', 'http://www.christianpost.com/services/rss/feed/'),
|
||||
('Most Popular', 'http://www.christianpost.com/services/rss/feed/most-popular'),
|
||||
('Entertainment', 'http://www.christianpost.com/services/rss/feed/entertainment/'),
|
||||
('Politics', 'http://www.christianpost.com/services/rss/feed/politics/'),
|
||||
('Living', 'http://www.christianpost.com/services/rss/feed/living/'),
|
||||
('Business', 'http://www.christianpost.com/services/rss/feed/business/'),
|
||||
('Opinion', 'http://www.christianpost.com/services/rss/feed/opinion/')
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
return url +'print.html'
|
||||
|
@ -1,19 +1,20 @@
|
||||
#!/usr/bin/env python
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'Lorenzo Vigentini'
|
||||
__copyright__ = '2009, Lorenzo Vigentini <l.vigentini at gmail.com>'
|
||||
__version__ = 'v1.01'
|
||||
__date__ = '14, January 2010'
|
||||
__description__ = 'Computeractive publishes new downloads, reviews, news stories, step-by-step guides and answers to PC problems every day.'
|
||||
__author__ = 'DrMerry Based on v1.01 by Lorenzo Vigentini'
|
||||
__copyright__ = 'For version 1.02, 1.03: DrMerry'
|
||||
__version__ = 'v1.03'
|
||||
__date__ = '11, July 2011'
|
||||
__description__ = 'Computeractive publishes new downloads, reviews, news stories, step-by-step guides and answers to PC problems every day. Original version (c): 2009, Lorenzo Vigentini <l.vigentini at gmail.com>'
|
||||
|
||||
'''
|
||||
http://www.computeractive.co.uk/
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
import re
|
||||
|
||||
class computeractive(BasicNewsRecipe):
|
||||
__author__ = 'Lorenzo Vigentini'
|
||||
__author__ = 'DrMerry'
|
||||
description = 'Computeractive publishes new downloads, reviews, news stories, step-by-step guides and answers to PC problems every day.'
|
||||
cover_url = 'http://images.pcworld.com/images/common/header/header-logo.gif'
|
||||
|
||||
@ -31,24 +32,27 @@ class computeractive(BasicNewsRecipe):
|
||||
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
remove_empty_feeds = True
|
||||
remove_tags_after = dict(name='div', attrs={'class':'article_tags_block'})
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'id':'main'})
|
||||
dict(name='div', attrs={'id':'container_left'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'id':['seeAlsoTags','commentsModule','relatedArticles','mainLeft','mainRight']}),
|
||||
dict(name='div', attrs={'class':['buyIt','detailMpu']}),
|
||||
dict(name='div', attrs={'id':['seeAlsoTags','commentsModule','relatedArticles','mainLeft','mainRight','recent_comment_block_parent','reviewDetails']}),
|
||||
dict(name='div', attrs={'class':['buyIt','detailMpu','small_section','recent_comment_block_parent','title_right_button_fix','section_title.title_right_button_fix','common_button']}),
|
||||
dict(name='a', attrs={'class':'largerImage'})
|
||||
]
|
||||
|
||||
preprocess_regexps = [
|
||||
(re.compile(r'(<a [^>]*>|</a>)', re.DOTALL|re.IGNORECASE),
|
||||
lambda match: ''),
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'General content', u'http://feeds.computeractive.co.uk/rss/latest/computeractive/all'),
|
||||
(u'News', u'http://feeds.computeractive.co.uk/rss/latest/computeractive/news'),
|
||||
(u'Downloads', u'http://feeds.computeractive.co.uk/rss/latest/computeractive/downloads'),
|
||||
(u'Hardware', u'http://feeds.computeractive.co.uk/rss/latest/computeractive/hardware'),
|
||||
(u'Software', u'http://feeds.computeractive.co.uk/rss/latest/computeractive/software'),
|
||||
(u'Competitions', u'http://www.v3.co.uk/feeds/rss20/personal-technology/competitions')
|
||||
]
|
||||
|
||||
|
||||
|
@ -1,83 +1,63 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
import re
|
||||
|
||||
class Cracked(BasicNewsRecipe):
|
||||
title = u'Cracked.com'
|
||||
__author__ = u'Nudgenudge'
|
||||
__author__ = 'UnWeave'
|
||||
language = 'en'
|
||||
description = 'America''s Only Humor and Video Site, since 1958'
|
||||
description = "America's Only HumorSite since 1958"
|
||||
publisher = 'Cracked'
|
||||
category = 'comedy, lists'
|
||||
oldest_article = 2
|
||||
delay = 10
|
||||
max_articles_per_feed = 2
|
||||
oldest_article = 3 #days
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
encoding = 'cp1252'
|
||||
encoding = 'ascii'
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
INDEX = u'http://www.cracked.com'
|
||||
extra_css = """
|
||||
.pageheader_type{font-size: x-large; font-weight: bold; color: #828D74}
|
||||
.pageheader_title{font-size: xx-large; color: #394128}
|
||||
.pageheader_byline{font-size: small; font-weight: bold; color: #394128}
|
||||
.score_bg {display: inline; width: 100%; margin-bottom: 2em}
|
||||
.score_column_1{ padding-left: 10px; font-size: small; width: 50%}
|
||||
.score_column_2{ padding-left: 10px; font-size: small; width: 50%}
|
||||
.score_column_3{ padding-left: 10px; font-size: small; width: 50%}
|
||||
.score_header{font-size: large; color: #50544A}
|
||||
.bodytext{display: block}
|
||||
body{font-family: Helvetica,Arial,sans-serif}
|
||||
"""
|
||||
|
||||
feeds = [ (u'Articles', u'http://feeds.feedburner.com/CrackedRSS/') ]
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
, 'linearize_tables' : True
|
||||
}
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':['Column1']})
|
||||
remove_tags_before = dict(id='PrimaryContent')
|
||||
|
||||
remove_tags_after = dict(name='div', attrs={'class':'shareBar'})
|
||||
|
||||
remove_tags = [ dict(name='div', attrs={'class':['social',
|
||||
'FacebookLike',
|
||||
'shareBar'
|
||||
]}),
|
||||
|
||||
dict(name='div', attrs={'id':['inline-share-buttons',
|
||||
]}),
|
||||
|
||||
dict(name='span', attrs={'class':['views',
|
||||
'KonaFilter'
|
||||
]}),
|
||||
#dict(name='img'),
|
||||
]
|
||||
|
||||
feeds = [(u'Articles', u'http://feeds.feedburner.com/CrackedRSS')]
|
||||
|
||||
def get_article_url(self, article):
|
||||
return article.get('guid', None)
|
||||
|
||||
def cleanup_page(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
for alink in soup.findAll('a'):
|
||||
if alink.string is not None:
|
||||
tstr = alink.string
|
||||
alink.replaceWith(tstr)
|
||||
for div_to_remove in soup.findAll('div', attrs={'id':['googlead_1','fb-like-article','comments_section']}):
|
||||
div_to_remove.extract()
|
||||
for div_to_remove in soup.findAll('div', attrs={'class':['share_buttons_col_1','GenericModule1']}):
|
||||
div_to_remove.extract()
|
||||
for div_to_remove in soup.findAll('div', attrs={'class':re.compile("prev_next")}):
|
||||
div_to_remove.extract()
|
||||
for ul_to_remove in soup.findAll('ul', attrs={'class':['Nav6']}):
|
||||
ul_to_remove.extract()
|
||||
for image in soup.findAll('img', attrs={'alt': 'article image'}):
|
||||
image.extract()
|
||||
|
||||
def append_page(self, soup, appendtag, position):
|
||||
pager = soup.find('a',attrs={'class':'next_arrow_active'})
|
||||
if pager:
|
||||
nexturl = self.INDEX + pager['href']
|
||||
soup2 = self.index_to_soup(nexturl)
|
||||
texttag = soup2.find('div', attrs={'class':re.compile("userStyled")})
|
||||
newpos = len(texttag.contents)
|
||||
self.append_page(soup2,texttag,newpos)
|
||||
texttag.extract()
|
||||
self.cleanup_page(appendtag)
|
||||
appendtag.insert(position,texttag)
|
||||
else:
|
||||
self.cleanup_page(appendtag)
|
||||
def appendPage(self, soup, appendTag, position):
|
||||
# Check if article has multiple pages
|
||||
pageNav = soup.find('nav', attrs={'class':'PaginationContent'})
|
||||
if pageNav:
|
||||
# Check not at last page
|
||||
nextPage = pageNav.find('a', attrs={'class':'next'})
|
||||
if nextPage:
|
||||
nextPageURL = nextPage['href']
|
||||
nextPageSoup = self.index_to_soup(nextPageURL)
|
||||
# 8th <section> tag contains article content
|
||||
nextPageContent = nextPageSoup.findAll('section')[7]
|
||||
newPosition = len(nextPageContent.contents)
|
||||
self.appendPage(nextPageSoup,nextPageContent,newPosition)
|
||||
nextPageContent.extract()
|
||||
pageNav.extract()
|
||||
appendTag.insert(position,nextPageContent)
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
self.append_page(soup, soup.body, 3)
|
||||
return self.adeify_images(soup)
|
||||
self.appendPage(soup, soup.body, 3)
|
||||
return soup
|
||||
|
@ -1,69 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
criticadigital.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class CriticaDigital(BasicNewsRecipe):
|
||||
title = 'Critica de la Argentina'
|
||||
__author__ = 'Darko Miletic and Sujata Raman'
|
||||
description = 'Noticias de Argentina'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
language = 'es_AR'
|
||||
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'cp1252'
|
||||
|
||||
extra_css = '''
|
||||
h1{font-family:"Trebuchet MS";}
|
||||
h3{color:#9A0000; font-family:Tahoma; font-size:x-small;}
|
||||
h2{color:#504E53; font-family:Arial,Helvetica,sans-serif ;font-size:small;}
|
||||
#epigrafe{font-family:Arial,Helvetica,sans-serif ;color:#666666 ; font-size:x-small;}
|
||||
p {font-family:Arial,Helvetica,sans-serif;}
|
||||
#fecha{color:#858585; font-family:Tahoma; font-size:x-small;}
|
||||
#autor{color:#858585; font-family:Tahoma; font-size:x-small;}
|
||||
#hora{color:#F00000;font-family:Tahoma; font-size:x-small;}
|
||||
'''
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':['bloqueTitulosNoticia','cfotonota']})
|
||||
,dict(name='div', attrs={'id':'boxautor'})
|
||||
,dict(name='p', attrs={'id':'textoNota'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':'box300' })
|
||||
,dict(name='div', style=True )
|
||||
,dict(name='div', attrs={'class':'titcomentario'})
|
||||
,dict(name='div', attrs={'class':'comentario' })
|
||||
,dict(name='div', attrs={'class':'paginador' })
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Politica', u'http://www.criticadigital.com/herramientas/rss.php?ch=politica' )
|
||||
,(u'Economia', u'http://www.criticadigital.com/herramientas/rss.php?ch=economia' )
|
||||
,(u'Deportes', u'http://www.criticadigital.com/herramientas/rss.php?ch=deportes' )
|
||||
,(u'Espectaculos', u'http://www.criticadigital.com/herramientas/rss.php?ch=espectaculos')
|
||||
,(u'Mundo', u'http://www.criticadigital.com/herramientas/rss.php?ch=mundo' )
|
||||
,(u'Policiales', u'http://www.criticadigital.com/herramientas/rss.php?ch=policiales' )
|
||||
,(u'Sociedad', u'http://www.criticadigital.com/herramientas/rss.php?ch=sociedad' )
|
||||
,(u'Salud', u'http://www.criticadigital.com/herramientas/rss.php?ch=salud' )
|
||||
,(u'Tecnologia', u'http://www.criticadigital.com/herramientas/rss.php?ch=tecnologia' )
|
||||
,(u'Santa Fe', u'http://www.criticadigital.com/herramientas/rss.php?ch=santa_fe' )
|
||||
]
|
||||
|
||||
def get_cover_url(self):
|
||||
cover_url = None
|
||||
index = 'http://www.criticadigital.com/impresa/'
|
||||
soup = self.index_to_soup(index)
|
||||
link_item = soup.find('div',attrs={'class':'tapa'})
|
||||
if link_item:
|
||||
cover_url = index + link_item.img['src']
|
||||
return cover_url
|
||||
|
||||
|
78
recipes/daytona_beach.recipe
Normal file
@ -0,0 +1,78 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class DaytonBeachNewsJournal(BasicNewsRecipe):
|
||||
title ='Daytona Beach News Journal'
|
||||
__author__ = 'BRGriff'
|
||||
pubisher = 'News-JournalOnline.com'
|
||||
description = 'Daytona Beach, Florida, Newspaper'
|
||||
category = 'News, Daytona Beach, Florida'
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
no_stylesheets = True
|
||||
language = 'en'
|
||||
filterDuplicates = True
|
||||
remove_attributes = ['style']
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'page-header'}),
|
||||
dict(name='div', attrs={'class':'asset-body'})
|
||||
]
|
||||
remove_tags = [dict(name='div', attrs={'class':['byline-section', 'asset-meta']})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
#####NEWS#####
|
||||
(u"News", u"http://www.news-journalonline.com/rss.xml"),
|
||||
(u"Breaking News", u"http://www.news-journalonline.com/breakingnews/rss.xml"),
|
||||
(u"Local - East Volusia", u"http://www.news-journalonline.com/news/local/east-volusia/rss.xml"),
|
||||
(u"Local - West Volusia", u"http://www.news-journalonline.com/news/local/west-volusia/rss.xml"),
|
||||
(u"Local - Southeast", u"http://www.news-journalonline.com/news/local/southeast-volusia/rss.xml"),
|
||||
(u"Local - Flagler", u"http://www.news-journalonline.com/news/local/flagler/rss.xml"),
|
||||
(u"Florida", u"http://www.news-journalonline.com/news/florida/rss.xml"),
|
||||
(u"National/World", u"http://www.news-journalonline.com/news/nationworld/rss.xml"),
|
||||
(u"Politics", u"http://www.news-journalonline.com/news/politics/rss.xml"),
|
||||
(u"News of Record", u"http://www.news-journalonline.com/news/news-of-record/rss.xml"),
|
||||
####BUSINESS####
|
||||
(u"Business", u"http://www.news-journalonline.com/business/rss.xml"),
|
||||
#(u"Jobs", u"http://www.news-journalonline.com/business/jobs/rss.xml"),
|
||||
#(u"Markets", u"http://www.news-journalonline.com/business/markets/rss.xml"),
|
||||
#(u"Real Estate", u"http://www.news-journalonline.com/business/real-estate/rss.xml"),
|
||||
#(u"Technology", u"http://www.news-journalonline.com/business/technology/rss.xml"),
|
||||
####SPORTS####
|
||||
(u"Sports", u"http://www.news-journalonline.com/sports/rss.xml"),
|
||||
(u"Racing", u"http://www.news-journalonline.com/racing/rss.xml"),
|
||||
(u"Highschool", u"http://www.news-journalonline.com/sports/highschool/rss.xml"),
|
||||
(u"College", u"http://www.news-journalonline.com/sports/college/rss.xml"),
|
||||
(u"Basketball", u"http://www.news-journalonline.com/sports/basketball/rss.xml"),
|
||||
(u"Football", u"http://www.news-journalonline.com/sports/football/rss.xml"),
|
||||
(u"Golf", u"http://www.news-journalonline.com/sports/golf/rss.xml"),
|
||||
(u"Other Sports", u"http://www.news-journalonline.com/sports/other/rss.xml"),
|
||||
####LIFESTYLE####
|
||||
(u"Lifestyle", u"http://www.news-journalonline.com/lifestyle/rss.xml"),
|
||||
#(u"Fashion", u"http://www.news-journalonline.com/lifestyle/fashion/rss.xml"),
|
||||
(u"Food", u"http://www.news-journalonline.com/lifestyle/food/rss.xml"),
|
||||
#(u"Health", u"http://www.news-journalonline.com/lifestyle/health/rss.xml"),
|
||||
(u"Home and Garden", u"http://www.news-journalonline.com/lifestyle/home-and-garden/rss.xml"),
|
||||
(u"Living", u"http://www.news-journalonline.com/lifestyle/living/rss.xml"),
|
||||
(u"Religion", u"http://www.news-journalonline.com/lifestyle/religion/rss.xml"),
|
||||
#(u"Travel", u"http://www.news-journalonline.com/lifestyle/travel/rss.xml"),
|
||||
####OPINION####
|
||||
#(u"Opinion", u"http://www.news-journalonline.com/opinion/rss.xml"),
|
||||
#(u"Letters to Editor", u"http://www.news-journalonline.com/opinion/letters-to-the-editor/rss.xml"),
|
||||
#(u"Columns", u"http://www.news-journalonline.com/columns/rss.xml"),
|
||||
#(u"Podcasts", u"http://www.news-journalonline.com/podcasts/rss.xml"),
|
||||
####ENTERTAINMENT#### ##Weekly Feature##
|
||||
(u"Entertainment", u"http://www.go386.com/rss.xml"),
|
||||
(u"Go Out", u"http://www.go386.com/go/rss.xml"),
|
||||
(u"Music", u"http://www.go386.com/music/rss.xml"),
|
||||
(u"Movies", u"http://www.go386.com/movies/rss.xml"),
|
||||
#(u"Culture", u"http://www.go386.com/culture/rss.xml"),
|
||||
|
||||
]
|
||||
|
||||
extra_css = '''
|
||||
.page-header{font-family:Arial,Helvetica,sans-serif; font-style:bold;font-size:22pt;}
|
||||
.asset-body{font-family:Helvetica,Arial,sans-serif; font-size:16pt;}
|
||||
|
||||
'''
|
18
recipes/down_to_earth.recipe
Normal file
@ -0,0 +1,18 @@
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1307834113(BasicNewsRecipe):
|
||||
|
||||
title = u'Down To Earth'
|
||||
oldest_article = 300
|
||||
__author__ = 'sexymax15'
|
||||
max_articles_per_feed = 30
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
remove_attributes = ['width','height']
|
||||
use_embedded_content = False
|
||||
language = 'en_IN'
|
||||
remove_empty_feeds = True
|
||||
remove_tags_before = dict(name='div', id='PageContent')
|
||||
remove_tags_after = [dict(name='div'),{'class':'box'}]
|
||||
remove_tags =[{'class':'box'}]
|
||||
feeds = [(u'editor', u'http://www.downtoearth.org.in/taxonomy/term/20348/0/feed'), (u'cover story', u'http://www.downtoearth.org.in/taxonomy/term/20345/0/feed'), (u'special report', u'http://www.downtoearth.org.in/taxonomy/term/20384/0/feed'), (u'features', u'http://www.downtoearth.org.in/taxonomy/term/20350/0/feed'), (u'news', u'http://www.downtoearth.org.in/taxonomy/term/20366/0/feed'), (u'debate', u'http://www.downtoearth.org.in/taxonomy/term/20347/0/feed'), (u'natural disasters', u'http://www.downtoearth.org.in/taxonomy/term/20822/0/feed')]
|
61
recipes/elclubdelebook.recipe
Normal file
@ -0,0 +1,61 @@
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
www.clubdelebook.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class ElClubDelEbook(BasicNewsRecipe):
|
||||
title = 'El club del ebook'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'El Club del eBook, es la primera fuente de informacion sobre ebooks de Argentina. Aca vas a encontrar noticias, tips, tutoriales, recursos y opiniones sobre el mundo de los libros electronicos.'
|
||||
tags = 'ebook, libro electronico, e-book, ebooks, libros electronicos, e-books'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
language = 'es_AR'
|
||||
encoding = 'utf-8'
|
||||
no_stylesheets = True
|
||||
use_embedded_content = True
|
||||
publication_type = 'blog'
|
||||
masthead_url = 'http://dl.dropbox.com/u/2845131/elclubdelebook.png'
|
||||
extra_css = """
|
||||
body{font-family: Arial,Helvetica,sans-serif}
|
||||
img{ margin-bottom: 0.8em;
|
||||
border: 1px solid #333333;
|
||||
padding: 4px; display: block
|
||||
}
|
||||
"""
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : tags
|
||||
, 'publisher': title
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
remove_tags = [dict(attrs={'id':'crp_related'})]
|
||||
remove_tags_after = dict(attrs={'id':'crp_related'})
|
||||
|
||||
feeds = [(u'Articulos', u'http://feeds.feedburner.com/ElClubDelEbook')]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
for item in soup.findAll('a'):
|
||||
limg = item.find('img')
|
||||
if item.string is not None:
|
||||
str = item.string
|
||||
item.replaceWith(str)
|
||||
else:
|
||||
if limg:
|
||||
item.name = 'div'
|
||||
item.attrs = []
|
||||
else:
|
||||
str = self.tag_to_string(item)
|
||||
item.replaceWith(str)
|
||||
for item in soup.findAll('img'):
|
||||
if not item.has_key('alt'):
|
||||
item['alt'] = 'image'
|
||||
return soup
|
@ -1,72 +1,59 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
cronista.com
|
||||
www.cronista.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class ElCronista(BasicNewsRecipe):
|
||||
title = 'El Cronista'
|
||||
class Pagina12(BasicNewsRecipe):
|
||||
title = 'El Cronista Comercial'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Noticias de Argentina'
|
||||
description = 'El Cronista Comercial es el Diario economico-politico mas valorado. Es la fuente mas confiable de informacion en temas de economia, finanzas y negocios enmarcados politicamente.'
|
||||
publisher = 'Cronista.com'
|
||||
category = 'news, politics, economy, finances, Argentina'
|
||||
oldest_article = 2
|
||||
language = 'es_AR'
|
||||
|
||||
max_articles_per_feed = 100
|
||||
max_articles_per_feed = 200
|
||||
no_stylesheets = True
|
||||
encoding = 'utf8'
|
||||
use_embedded_content = False
|
||||
encoding = 'cp1252'
|
||||
language = 'es_AR'
|
||||
remove_empty_feeds = True
|
||||
publication_type = 'newspaper'
|
||||
masthead_url = 'http://www.cronista.com/export/sites/diarioelcronista/arte/header-logo.gif'
|
||||
extra_css = """
|
||||
body{font-family: Arial,Helvetica,sans-serif }
|
||||
h2{font-family: Georgia,"Times New Roman",Times,serif }
|
||||
img{margin-bottom: 0.4em; display:block}
|
||||
.nom{font-weight: bold; vertical-align: baseline}
|
||||
.autor-cfoto{border-bottom: 1px solid #D2D2D2;
|
||||
border-top: 1px solid #D2D2D2;
|
||||
display: inline-block;
|
||||
margin: 0 10px 10px 0;
|
||||
padding: 10px;
|
||||
width: 210px}
|
||||
.under{font-weight: bold}
|
||||
.time{font-size: small}
|
||||
"""
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment' , description
|
||||
, '--category' , 'news, Argentina'
|
||||
, '--publisher' , title
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['meta','link','base','iframe','object','embed'])
|
||||
,dict(attrs={'class':['user-tools','tabsmedia']})
|
||||
]
|
||||
remove_attributes = ['lang']
|
||||
remove_tags_before = dict(attrs={'class':'top'})
|
||||
remove_tags_after = dict(attrs={'class':'content-nota'})
|
||||
feeds = [(u'Ultimas noticias', u'http://www.cronista.com/rss.html')]
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='table', attrs={'width':'100%' })
|
||||
,dict(name='h1' , attrs={'class':'Arialgris16normal'})
|
||||
]
|
||||
|
||||
remove_tags = [dict(name='a', attrs={'class':'Arialazul12'})]
|
||||
|
||||
feeds = [
|
||||
(u'Economia' , u'http://www.cronista.com/adjuntos/8/rss/Economia_EI.xml' )
|
||||
,(u'Negocios' , u'http://www.cronista.com/adjuntos/8/rss/negocios_EI.xml' )
|
||||
,(u'Ultimo momento' , u'http://www.cronista.com/adjuntos/8/rss/ultimo_momento.xml' )
|
||||
,(u'Finanzas y Mercados' , u'http://www.cronista.com/adjuntos/8/rss/Finanzas_Mercados_EI.xml' )
|
||||
,(u'Financial Times' , u'http://www.cronista.com/adjuntos/8/rss/FT_EI.xml' )
|
||||
,(u'Opinion edicion impresa' , u'http://www.cronista.com/adjuntos/8/rss/opinion_edicion_impresa.xml' )
|
||||
,(u'Socialmente Responsables', u'http://www.cronista.com/adjuntos/8/rss/Socialmente_Responsables.xml')
|
||||
,(u'Asuntos Legales' , u'http://www.cronista.com/adjuntos/8/rss/asuntoslegales.xml' )
|
||||
,(u'IT Business' , u'http://www.cronista.com/adjuntos/8/rss/itbusiness.xml' )
|
||||
,(u'Management y RR.HH.' , u'http://www.cronista.com/adjuntos/8/rss/management.xml' )
|
||||
,(u'Inversiones Personales' , u'http://www.cronista.com/adjuntos/8/rss/inversionespersonales.xml' )
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
main, sep, rest = url.partition('.com/notas/')
|
||||
article_id, lsep, rrest = rest.partition('-')
|
||||
return 'http://www.cronista.com/interior/index.php?p=imprimir_nota&idNota=' + article_id
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
|
||||
soup.head.insert(0,mtag)
|
||||
soup.head.base.extract()
|
||||
htext = soup.find('h1',attrs={'class':'Arialgris16normal'})
|
||||
htext.name = 'p'
|
||||
soup.prettify()
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
||||
def get_cover_url(self):
|
||||
cover_url = None
|
||||
index = 'http://www.cronista.com/contenidos/'
|
||||
soup = self.index_to_soup(index + 'ee.html')
|
||||
link_item = soup.find('a',attrs={'href':"javascript:Close()"})
|
||||
if link_item:
|
||||
cover_url = index + link_item.img['src']
|
||||
return cover_url
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '2010-2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
www.eluniversal.com
|
||||
'''
|
||||
@ -15,12 +15,20 @@ class ElUniversal(BasicNewsRecipe):
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
remove_empty_feeds = True
|
||||
encoding = 'cp1252'
|
||||
publisher = 'El Universal'
|
||||
category = 'news, Caracas, Venezuela, world'
|
||||
language = 'es_VE'
|
||||
publication_type = 'newspaper'
|
||||
cover_url = strftime('http://static.eluniversal.com/%Y/%m/%d/portada.jpg')
|
||||
|
||||
extra_css = """
|
||||
.txt60{font-family: Tahoma,Geneva,sans-serif; font-size: small}
|
||||
.txt29{font-family: Tahoma,Geneva,sans-serif; font-size: small; color: gray}
|
||||
.txt38{font-family: Georgia,"Times New Roman",Times,serif; font-size: xx-large}
|
||||
.txt35{font-family: Georgia,"Times New Roman",Times,serif; font-size: large}
|
||||
body{font-family: Verdana,Arial,Helvetica,sans-serif}
|
||||
"""
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
@ -28,10 +36,11 @@ class ElUniversal(BasicNewsRecipe):
|
||||
,'publisher' : publisher
|
||||
}
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'Nota'})]
|
||||
remove_tags_before=dict(attrs={'class':'header-print MB10'})
|
||||
remove_tags_after= dict(attrs={'id':'SizeText'})
|
||||
remove_tags = [
|
||||
dict(name=['object','link','script','iframe'])
|
||||
,dict(name='div',attrs={'class':'Herramientas'})
|
||||
dict(name=['object','link','script','iframe','meta'])
|
||||
,dict(attrs={'class':'header-print MB10'})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
|
@ -1,7 +1,7 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008 - 2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = 'Copyright 2011 Starson17'
|
||||
'''
|
||||
engadget.com
|
||||
'''
|
||||
@ -10,13 +10,28 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Engadget(BasicNewsRecipe):
|
||||
title = u'Engadget'
|
||||
__author__ = 'Darko Miletic'
|
||||
__author__ = 'Starson17'
|
||||
__version__ = 'v1.00'
|
||||
__date__ = '02, July 2011'
|
||||
description = 'Tech news'
|
||||
language = 'en'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = True
|
||||
use_embedded_content = False
|
||||
remove_javascript = True
|
||||
remove_empty_feeds = True
|
||||
|
||||
feeds = [ (u'Posts', u'http://www.engadget.com/rss.xml')]
|
||||
keep_only_tags = [dict(name='div', attrs={'class':['post_content permalink ','post_content permalink alt-post-full']})]
|
||||
remove_tags = [dict(name='div', attrs={'class':['filed_under','post_footer']})]
|
||||
remove_tags_after = [dict(name='div', attrs={'class':['post_footer']})]
|
||||
|
||||
feeds = [(u'Posts', u'http://www.engadget.com/rss.xml')]
|
||||
|
||||
extra_css = '''
|
||||
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
||||
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
||||
'''
|
||||
|
||||
|
@ -1,32 +1,41 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '2010-2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
ft.com
|
||||
www.ft.com
|
||||
'''
|
||||
|
||||
import datetime
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class FinancialTimes(BasicNewsRecipe):
|
||||
title = u'Financial Times'
|
||||
__author__ = 'Darko Miletic and Sujata Raman'
|
||||
description = ('Financial world news. Available after 5AM '
|
||||
'GMT, daily.')
|
||||
class FinancialTimes_rss(BasicNewsRecipe):
|
||||
title = 'Financial Times'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = "The Financial Times (FT) is one of the world's leading business news and information organisations, recognised internationally for its authority, integrity and accuracy."
|
||||
publisher = 'The Financial Times Ltd.'
|
||||
category = 'news, finances, politics, World'
|
||||
oldest_article = 2
|
||||
language = 'en'
|
||||
|
||||
max_articles_per_feed = 100
|
||||
max_articles_per_feed = 250
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
needs_subscription = True
|
||||
simultaneous_downloads= 1
|
||||
delay = 1
|
||||
|
||||
encoding = 'utf8'
|
||||
publication_type = 'newspaper'
|
||||
masthead_url = 'http://im.media.ft.com/m/img/masthead_main.jpg'
|
||||
LOGIN = 'https://registration.ft.com/registration/barrier/login'
|
||||
INDEX = 'http://www.ft.com'
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
, 'linearize_tables' : True
|
||||
}
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
br.open(self.INDEX)
|
||||
if self.username is not None and self.password is not None:
|
||||
br.open(self.LOGIN)
|
||||
br.select_form(name='loginForm')
|
||||
@ -35,31 +44,63 @@ class FinancialTimes(BasicNewsRecipe):
|
||||
br.submit()
|
||||
return br
|
||||
|
||||
keep_only_tags = [ dict(name='div', attrs={'id':'cont'}) ]
|
||||
remove_tags_after = dict(name='p', attrs={'class':'copyright'})
|
||||
keep_only_tags = [dict(name='div', attrs={'class':['fullstory fullstoryHeader','fullstory fullstoryBody','ft-story-header','ft-story-body','index-detail']})]
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'id':'floating-con'})
|
||||
,dict(name=['meta','iframe','base','object','embed','link'])
|
||||
,dict(attrs={'class':['storyTools','story-package','screen-copy','story-package separator','expandable-image']})
|
||||
]
|
||||
remove_attributes = ['width','height','lang']
|
||||
|
||||
extra_css = '''
|
||||
body{font-family:Arial,Helvetica,sans-serif;}
|
||||
h2(font-size:large;}
|
||||
.ft-story-header(font-size:xx-small;}
|
||||
.ft-story-body(font-size:small;}
|
||||
a{color:#003399;}
|
||||
extra_css = """
|
||||
body{font-family: Georgia,Times,"Times New Roman",serif}
|
||||
h2{font-size:large}
|
||||
.ft-story-header{font-size: x-small}
|
||||
.container{font-size:x-small;}
|
||||
h3{font-size:x-small;color:#003399;}
|
||||
'''
|
||||
.copyright{font-size: x-small}
|
||||
img{margin-top: 0.8em; display: block}
|
||||
.lastUpdated{font-family: Arial,Helvetica,sans-serif; font-size: x-small}
|
||||
.byline,.ft-story-body,.ft-story-header{font-family: Arial,Helvetica,sans-serif}
|
||||
"""
|
||||
|
||||
feeds = [
|
||||
(u'UK' , u'http://www.ft.com/rss/home/uk' )
|
||||
,(u'US' , u'http://www.ft.com/rss/home/us' )
|
||||
,(u'Europe' , u'http://www.ft.com/rss/home/europe' )
|
||||
,(u'Asia' , u'http://www.ft.com/rss/home/asia' )
|
||||
,(u'Middle East', u'http://www.ft.com/rss/home/middleeast')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
content_type = soup.find('meta', {'http-equiv':'Content-Type'})
|
||||
if content_type:
|
||||
content_type['content'] = 'text/html; charset=utf-8'
|
||||
items = ['promo-box','promo-title',
|
||||
'promo-headline','promo-image',
|
||||
'promo-intro','promo-link','subhead']
|
||||
for item in items:
|
||||
for it in soup.findAll(item):
|
||||
it.name = 'div'
|
||||
it.attrs = []
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
for item in soup.findAll('a'):
|
||||
limg = item.find('img')
|
||||
if item.string is not None:
|
||||
str = item.string
|
||||
item.replaceWith(str)
|
||||
else:
|
||||
if limg:
|
||||
item.name = 'div'
|
||||
item.attrs = []
|
||||
else:
|
||||
str = self.tag_to_string(item)
|
||||
item.replaceWith(str)
|
||||
for item in soup.findAll('img'):
|
||||
if not item.has_key('alt'):
|
||||
item['alt'] = 'image'
|
||||
return soup
|
||||
|
||||
def get_cover_url(self):
|
||||
cdate = datetime.date.today()
|
||||
if cdate.isoweekday() == 7:
|
||||
cdate -= datetime.timedelta(days=1)
|
||||
return cdate.strftime('http://specials.ft.com/vtf_pdf/%d%m%y_FRONT1_USA.pdf')
|
||||
|
||||
|
@ -1,15 +1,19 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010-2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
ft.com
|
||||
www.ft.com/uk-edition
|
||||
'''
|
||||
|
||||
import datetime
|
||||
from calibre import strftime
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class FinancialTimes(BasicNewsRecipe):
|
||||
title = u'Financial Times - UK printed edition'
|
||||
title = 'Financial Times - UK printed edition'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Financial world news'
|
||||
description = "The Financial Times (FT) is one of the world's leading business news and information organisations, recognised internationally for its authority, integrity and accuracy."
|
||||
publisher = 'The Financial Times Ltd.'
|
||||
category = 'news, finances, politics, UK, World'
|
||||
oldest_article = 2
|
||||
language = 'en_GB'
|
||||
max_articles_per_feed = 250
|
||||
@ -17,14 +21,23 @@ class FinancialTimes(BasicNewsRecipe):
|
||||
use_embedded_content = False
|
||||
needs_subscription = True
|
||||
encoding = 'utf8'
|
||||
simultaneous_downloads= 1
|
||||
delay = 1
|
||||
publication_type = 'newspaper'
|
||||
masthead_url = 'http://im.media.ft.com/m/img/masthead_main.jpg'
|
||||
LOGIN = 'https://registration.ft.com/registration/barrier/login'
|
||||
INDEX = 'http://www.ft.com/uk-edition'
|
||||
PREFIX = 'http://www.ft.com'
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
, 'linearize_tables' : True
|
||||
}
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
br.open(self.INDEX)
|
||||
if self.username is not None and self.password is not None:
|
||||
br.open(self.LOGIN)
|
||||
br.select_form(name='loginForm')
|
||||
@ -33,29 +46,34 @@ class FinancialTimes(BasicNewsRecipe):
|
||||
br.submit()
|
||||
return br
|
||||
|
||||
keep_only_tags = [ dict(name='div', attrs={'id':'cont'}) ]
|
||||
remove_tags_after = dict(name='p', attrs={'class':'copyright'})
|
||||
keep_only_tags = [dict(name='div', attrs={'class':['fullstory fullstoryHeader','fullstory fullstoryBody','ft-story-header','ft-story-body','index-detail']})]
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'id':'floating-con'})
|
||||
,dict(name=['meta','iframe','base','object','embed','link'])
|
||||
,dict(attrs={'class':['storyTools','story-package','screen-copy','story-package separator','expandable-image']})
|
||||
]
|
||||
remove_attributes = ['width','height','lang']
|
||||
|
||||
extra_css = """
|
||||
body{font-family:Arial,Helvetica,sans-serif;}
|
||||
h2{font-size:large;}
|
||||
.ft-story-header{font-size:xx-small;}
|
||||
.ft-story-body{font-size:small;}
|
||||
a{color:#003399;}
|
||||
body{font-family: Georgia,Times,"Times New Roman",serif}
|
||||
h2{font-size:large}
|
||||
.ft-story-header{font-size: x-small}
|
||||
.container{font-size:x-small;}
|
||||
h3{font-size:x-small;color:#003399;}
|
||||
.copyright{font-size: x-small}
|
||||
img{margin-top: 0.8em; display: block}
|
||||
.lastUpdated{font-family: Arial,Helvetica,sans-serif; font-size: x-small}
|
||||
.byline,.ft-story-body,.ft-story-header{font-family: Arial,Helvetica,sans-serif}
|
||||
"""
|
||||
|
||||
def get_artlinks(self, elem):
|
||||
articles = []
|
||||
for item in elem.findAll('a',href=True):
|
||||
url = self.PREFIX + item['href']
|
||||
rawlink = item['href']
|
||||
if rawlink.startswith('http://'):
|
||||
url = rawlink
|
||||
else:
|
||||
url = self.PREFIX + rawlink
|
||||
title = self.tag_to_string(item)
|
||||
date = strftime(self.timefmt)
|
||||
articles.append({
|
||||
@ -86,5 +104,35 @@ class FinancialTimes(BasicNewsRecipe):
|
||||
return feeds
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
||||
items = ['promo-box','promo-title',
|
||||
'promo-headline','promo-image',
|
||||
'promo-intro','promo-link','subhead']
|
||||
for item in items:
|
||||
for it in soup.findAll(item):
|
||||
it.name = 'div'
|
||||
it.attrs = []
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
for item in soup.findAll('a'):
|
||||
limg = item.find('img')
|
||||
if item.string is not None:
|
||||
str = item.string
|
||||
item.replaceWith(str)
|
||||
else:
|
||||
if limg:
|
||||
item.name = 'div'
|
||||
item.attrs = []
|
||||
else:
|
||||
str = self.tag_to_string(item)
|
||||
item.replaceWith(str)
|
||||
for item in soup.findAll('img'):
|
||||
if not item.has_key('alt'):
|
||||
item['alt'] = 'image'
|
||||
return soup
|
||||
|
||||
def get_cover_url(self):
|
||||
cdate = datetime.date.today()
|
||||
if cdate.isoweekday() == 7:
|
||||
cdate -= datetime.timedelta(days=1)
|
||||
return cdate.strftime('http://specials.ft.com/vtf_pdf/%d%m%y_FRONT1_LON.pdf')
|
||||
|
81
recipes/frontlineonnet.recipe
Normal file
@ -0,0 +1,81 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
frontlineonnet.com
|
||||
'''
|
||||
|
||||
import re
|
||||
from calibre import strftime
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Frontlineonnet(BasicNewsRecipe):
|
||||
title = 'Frontline'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = "India's national magazine"
|
||||
publisher = 'Frontline'
|
||||
category = 'news, politics, India'
|
||||
no_stylesheets = True
|
||||
delay = 1
|
||||
INDEX = 'http://frontlineonnet.com/'
|
||||
use_embedded_content = False
|
||||
encoding = 'cp1252'
|
||||
language = 'en_IN'
|
||||
publication_type = 'magazine'
|
||||
masthead_url = 'http://frontlineonnet.com/images/newfline.jpg'
|
||||
extra_css = """
|
||||
body{font-family: Verdana,Arial,Helvetica,sans-serif}
|
||||
img{margin-top:0.5em; margin-bottom: 0.7em; display: block}
|
||||
"""
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
, 'linearize_tables' : True
|
||||
}
|
||||
|
||||
preprocess_regexps = [
|
||||
(re.compile(r'.*?<base', re.DOTALL|re.IGNORECASE),lambda match: '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"><html dir="ltr" xml:lang="en-IN"><head><title>title</title><base')
|
||||
,(re.compile(r'<base .*?>', re.DOTALL|re.IGNORECASE),lambda match: '</head><body>')
|
||||
,(re.compile(r'<byline>', re.DOTALL|re.IGNORECASE),lambda match: '<div class="byline">')
|
||||
,(re.compile(r'</byline>', re.DOTALL|re.IGNORECASE),lambda match: '</div>')
|
||||
,(re.compile(r'<center>', re.DOTALL|re.IGNORECASE),lambda match: '<div class="ctr">')
|
||||
,(re.compile(r'</center>', re.DOTALL|re.IGNORECASE),lambda match: '</div>')
|
||||
]
|
||||
|
||||
keep_only_tags= [
|
||||
dict(name='font', attrs={'class':'storyhead'})
|
||||
,dict(attrs={'class':'byline'})
|
||||
]
|
||||
remove_attributes=['size','noshade','border']
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
for item in soup.findAll('img'):
|
||||
if not item.has_key('alt'):
|
||||
item['alt'] = 'image'
|
||||
return soup
|
||||
|
||||
def parse_index(self):
|
||||
articles = []
|
||||
soup = self.index_to_soup(self.INDEX)
|
||||
for feed_link in soup.findAll('a',href=True):
|
||||
if feed_link['href'].startswith('stories/'):
|
||||
url = self.INDEX + feed_link['href']
|
||||
title = self.tag_to_string(feed_link)
|
||||
date = strftime(self.timefmt)
|
||||
articles.append({
|
||||
'title' :title
|
||||
,'date' :date
|
||||
,'url' :url
|
||||
,'description':''
|
||||
})
|
||||
return [('Frontline', articles)]
|
||||
|
||||
def print_version(self, url):
|
||||
return "http://www.hinduonnet.com/thehindu/thscrip/print.pl?prd=fline&file=" + url.rpartition('/')[2]
|
||||
|
||||
def image_url_processor(self, baseurl, url):
|
||||
return url.replace('../images/', self.INDEX + 'images/').strip()
|
49
recipes/galicia_confidential.recipe
Normal file
@ -0,0 +1,49 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.web.feeds import Feed
|
||||
|
||||
class GC_gl(BasicNewsRecipe):
|
||||
title = u'Galicia Confidencial (RSS)'
|
||||
__author__ = u'Susana Sotelo Docío'
|
||||
description = u'Unha fiestra de información aberta a todos'
|
||||
publisher = u'Galicia Confidencial'
|
||||
category = u'news, society, politics, Galicia'
|
||||
encoding = 'utf-8'
|
||||
language = 'gl'
|
||||
direction = 'ltr'
|
||||
cover_url = 'http://galiciaconfidencial.com/imagenes/header/logo_gc.gif'
|
||||
oldest_article = 5
|
||||
max_articles_per_feed = 100
|
||||
center_navbar = False
|
||||
|
||||
feeds = [(u'Novas no RSS', u'http://galiciaconfidencial.com/rss2/xeral.rss')]
|
||||
|
||||
extra_css = u' p{text-align:left} '
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('http://galiciaconfidencial.com/nova/', 'http://galiciaconfidencial.com/imprimir/')
|
||||
|
||||
def parse_index(self):
|
||||
feeds = []
|
||||
self.gc_parse_feeds(feeds)
|
||||
return feeds
|
||||
|
||||
def gc_parse_feeds(self, feeds):
|
||||
rssFeeds = Feed()
|
||||
rssFeeds = BasicNewsRecipe.parse_feeds(self)
|
||||
self.feed_to_index_append(rssFeeds[:], feeds)
|
||||
|
||||
|
||||
def feed_to_index_append(self, feedObject, masterFeed):
|
||||
for feed in feedObject:
|
||||
newArticles = []
|
||||
for article in feed.articles:
|
||||
newArt = {
|
||||
'title' : article.title,
|
||||
'url' : article.url,
|
||||
'date' : article.date
|
||||
}
|
||||
newArticles.append(newArt)
|
||||
masterFeed.append((feed.title,newArticles))
|
||||
|
35
recipes/geek_poke.recipe
Normal file
@ -0,0 +1,35 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
import re
|
||||
|
||||
class AdvancedUserRecipe1307556816(BasicNewsRecipe):
|
||||
title = u'Geek and Poke'
|
||||
__author__ = u'DrMerry'
|
||||
description = u'Geek and Poke Cartoons'
|
||||
oldest_article = 31
|
||||
max_articles_per_feed = 100
|
||||
language = u'en'
|
||||
simultaneous_downloads = 5
|
||||
#delay = 1
|
||||
timefmt = ' [%A, %d %B, %Y]'
|
||||
summary_length = -1
|
||||
no_stylesheets = True
|
||||
cover_url = 'http://geekandpoke.typepad.com/aboutcoders.jpeg'
|
||||
remove_javascript = True
|
||||
remove_empty_feeds = True
|
||||
publication_type = 'blog'
|
||||
|
||||
preprocess_regexps = [ (re.compile(r'(<p> </p>|<iframe.*</iframe>|<a[^>]*>Tweet</a>|<a[^>]*>|</a>)', re.DOTALL|re.IGNORECASE),lambda match: ''),
|
||||
(re.compile(r'( | )', re.DOTALL|re.IGNORECASE),lambda match: ' '),
|
||||
(re.compile(r'<br( /)?>(<br( /)?>)+', re.DOTALL|re.IGNORECASE),lambda match: '<br>')
|
||||
]
|
||||
|
||||
extra_css = 'body, h3, p, h2, h1, div, span{margin:0px} h2.date-header {font-size: 0.7em; color:#eee;} h3.entry-header{font-size: 1.0em} div.entry-body{font-size: 0.9em}'
|
||||
|
||||
|
||||
remove_tags_before = dict(name='h2', attrs={'class':'date-header'})
|
||||
remove_tags_after = dict(name='div', attrs={'class':'entry-body'})
|
||||
|
||||
|
||||
feeds = [(u'Geek and Poke', u'http://feeds.feedburner.com/GeekAndPoke?format=xml')]
|
@ -1,5 +1,6 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
import re
|
||||
from datetime import date, timedelta
|
||||
|
||||
class HBR(BasicNewsRecipe):
|
||||
|
||||
@ -12,13 +13,14 @@ class HBR(BasicNewsRecipe):
|
||||
no_stylesheets = True
|
||||
|
||||
LOGIN_URL = 'http://hbr.org/login?request_url=/'
|
||||
INDEX = 'http://hbr.org/current'
|
||||
INDEX = 'http://hbr.org/archive-toc/BR'
|
||||
|
||||
keep_only_tags = [dict(name='div', id='pageContainer')]
|
||||
remove_tags = [dict(id=['mastheadContainer', 'magazineHeadline',
|
||||
'articleToolbarTopRD', 'pageRightSubColumn', 'pageRightColumn',
|
||||
'todayOnHBRListWidget', 'mostWidget', 'keepUpWithHBR',
|
||||
'mailingListTout', 'partnerCenter', 'pageFooter',
|
||||
'superNavHeadContainer', 'hbrDisqus',
|
||||
'articleToolbarTop', 'articleToolbarBottom', 'articleToolbarRD']),
|
||||
dict(name='iframe')]
|
||||
extra_css = '''
|
||||
@ -55,9 +57,14 @@ class HBR(BasicNewsRecipe):
|
||||
|
||||
|
||||
def hbr_get_toc(self):
|
||||
soup = self.index_to_soup(self.INDEX)
|
||||
url = soup.find('a', text=lambda t:'Full Table of Contents' in t).parent.get('href')
|
||||
return self.index_to_soup('http://hbr.org'+url)
|
||||
today = date.today()
|
||||
future = today + timedelta(days=30)
|
||||
for x in [x.strftime('%y%m') for x in (future, today)]:
|
||||
url = self.INDEX + x
|
||||
soup = self.index_to_soup(url)
|
||||
if not soup.find(text='Issue Not Found'):
|
||||
return soup
|
||||
raise Exception('Could not find current issue')
|
||||
|
||||
def hbr_parse_section(self, container, feeds):
|
||||
current_section = None
|
||||
|
BIN
recipes/icons/athens_news.png
Normal file
After Width: | Height: | Size: 514 B |
BIN
recipes/icons/buenosaireseconomico.png
Normal file
After Width: | Height: | Size: 400 B |
BIN
recipes/icons/elclubdelebook.png
Normal file
After Width: | Height: | Size: 5.3 KiB |
Before Width: | Height: | Size: 770 B After Width: | Height: | Size: 1.1 KiB |
BIN
recipes/icons/financial_times.png
Normal file
After Width: | Height: | Size: 1.4 KiB |
BIN
recipes/icons/financial_times_uk.png
Normal file
After Width: | Height: | Size: 1.4 KiB |
BIN
recipes/icons/pecat.png
Normal file
After Width: | Height: | Size: 383 B |
43
recipes/idg_now.recipe
Normal file
@ -0,0 +1,43 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class IDGNow(BasicNewsRecipe):
|
||||
title = 'IDG Now!'
|
||||
__author__ = 'Diniz Bortolotto'
|
||||
description = 'Posts do IDG Now!'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 20
|
||||
encoding = 'utf8'
|
||||
publisher = 'Now!Digital Business Ltda.'
|
||||
category = 'technology, telecom, IT, Brazil'
|
||||
language = 'pt_BR'
|
||||
publication_type = 'technology portal'
|
||||
use_embedded_content = False
|
||||
extra_css = '.headline {font-size: x-large;} \n .fact { padding-top: 10pt }'
|
||||
|
||||
def get_article_url(self, article):
|
||||
link = article.get('link', None)
|
||||
if link is None:
|
||||
return article
|
||||
if link.split('/')[-1]=="story01.htm":
|
||||
link=link.split('/')[-2]
|
||||
a=['0B','0C','0D','0E','0F','0G','0I','0N' ,'0L0S','0A','0J3A']
|
||||
b=['.' ,'/' ,'?' ,'-' ,'=' ,'&' ,'_','.com','www.','0',':']
|
||||
for i in range(0,len(a)):
|
||||
link=link.replace(a[i],b[i])
|
||||
link=link.split('&')[-3]
|
||||
link=link.split('=')[1]
|
||||
link=link + "/IDGNoticiaPrint_view"
|
||||
return link
|
||||
|
||||
feeds = [
|
||||
(u'Ultimas noticias', u'http://rss.idgnow.com.br/c/32184/f/499640/index.rss'),
|
||||
(u'Computa\xe7\xe3o Corporativa', u'http://rss.idgnow.com.br/c/32184/f/499643/index.rss'),
|
||||
(u'Carreira', u'http://rss.idgnow.com.br/c/32184/f/499644/index.rss'),
|
||||
(u'Computa\xe7\xe3o Pessoal', u'http://rss.idgnow.com.br/c/32184/f/499645/index.rss'),
|
||||
(u'Internet', u'http://rss.idgnow.com.br/c/32184/f/499646/index.rss'),
|
||||
(u'Mercado', u'http://rss.idgnow.com.br/c/32184/f/419982/index.rss'),
|
||||
(u'Seguran\xe7a', u'http://rss.idgnow.com.br/c/32184/f/499647/index.rss'),
|
||||
(u'Telecom e Redes', u'http://rss.idgnow.com.br/c/32184/f/499648/index.rss')
|
||||
]
|
||||
|
||||
reverse_article_order = True
|
@ -6,7 +6,7 @@ class TheIndependent(BasicNewsRecipe):
|
||||
language = 'en_GB'
|
||||
__author__ = 'Krittika Goyal'
|
||||
oldest_article = 1 #days
|
||||
max_articles_per_feed = 25
|
||||
max_articles_per_feed = 30
|
||||
encoding = 'latin1'
|
||||
|
||||
no_stylesheets = True
|
||||
@ -25,24 +25,39 @@ class TheIndependent(BasicNewsRecipe):
|
||||
'http://www.independent.co.uk/news/uk/rss'),
|
||||
('World',
|
||||
'http://www.independent.co.uk/news/world/rss'),
|
||||
('Sport',
|
||||
'http://www.independent.co.uk/sport/rss'),
|
||||
('Arts and Entertainment',
|
||||
'http://www.independent.co.uk/arts-entertainment/rss'),
|
||||
('Business',
|
||||
'http://www.independent.co.uk/news/business/rss'),
|
||||
('Life and Style',
|
||||
'http://www.independent.co.uk/life-style/gadgets-and-tech/news/rss'),
|
||||
('Science',
|
||||
'http://www.independent.co.uk/news/science/rss'),
|
||||
('People',
|
||||
'http://www.independent.co.uk/news/people/rss'),
|
||||
('Science',
|
||||
'http://www.independent.co.uk/news/science/rss'),
|
||||
('Media',
|
||||
'http://www.independent.co.uk/news/media/rss'),
|
||||
('Health and Families',
|
||||
'http://www.independent.co.uk/life-style/health-and-families/rss'),
|
||||
('Education',
|
||||
'http://www.independent.co.uk/news/education/rss'),
|
||||
('Obituaries',
|
||||
'http://www.independent.co.uk/news/obituaries/rss'),
|
||||
|
||||
('Opinion',
|
||||
'http://www.independent.co.uk/opinion/rss'),
|
||||
|
||||
('Environment',
|
||||
'http://www.independent.co.uk/environment/rss'),
|
||||
|
||||
('Sport',
|
||||
'http://www.independent.co.uk/sport/rss'),
|
||||
|
||||
('Life and Style',
|
||||
'http://www.independent.co.uk/life-style/rss'),
|
||||
|
||||
('Arts and Entertainment',
|
||||
'http://www.independent.co.uk/arts-entertainment/rss'),
|
||||
|
||||
('Travel',
|
||||
'http://www.independent.co.uk/travel/rss'),
|
||||
|
||||
('Money',
|
||||
'http://www.independent.co.uk/money/rss'),
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
|
@ -1,5 +1,5 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
infobae.com
|
||||
'''
|
||||
@ -9,7 +9,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
class Infobae(BasicNewsRecipe):
|
||||
title = 'Infobae.com'
|
||||
__author__ = 'Darko Miletic and Sujata Raman'
|
||||
description = 'Informacion Libre las 24 horas'
|
||||
description = 'Infobae.com es el sitio de noticias con mayor actualizacion de Latinoamérica. Noticias actualizadas las 24 horas, los 365 días del año.'
|
||||
publisher = 'Infobae.com'
|
||||
category = 'news, politics, Argentina'
|
||||
oldest_article = 1
|
||||
@ -17,13 +17,13 @@ class Infobae(BasicNewsRecipe):
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
language = 'es_AR'
|
||||
encoding = 'cp1252'
|
||||
masthead_url = 'http://www.infobae.com/imgs/header/header.gif'
|
||||
remove_javascript = True
|
||||
encoding = 'utf8'
|
||||
masthead_url = 'http://www.infobae.com/media/img/static/logo-infobae.gif'
|
||||
remove_empty_feeds = True
|
||||
extra_css = '''
|
||||
body{font-family:Arial,Helvetica,sans-serif;}
|
||||
.popUpTitulo{color:#0D4261; font-size: xx-large}
|
||||
body{font-family: Arial,Helvetica,sans-serif}
|
||||
img{display: block}
|
||||
.categoria{font-size: small; text-transform: uppercase}
|
||||
'''
|
||||
|
||||
conversion_options = {
|
||||
@ -31,26 +31,44 @@ class Infobae(BasicNewsRecipe):
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
, 'linearize_tables' : True
|
||||
}
|
||||
|
||||
|
||||
feeds = [
|
||||
(u'Noticias' , u'http://www.infobae.com/adjuntos/html/RSS/hoy.xml' )
|
||||
,(u'Salud' , u'http://www.infobae.com/adjuntos/html/RSS/salud.xml' )
|
||||
,(u'Tecnologia', u'http://www.infobae.com/adjuntos/html/RSS/tecnologia.xml')
|
||||
,(u'Deportes' , u'http://www.infobae.com/adjuntos/html/RSS/deportes.xml' )
|
||||
keep_only_tags = [dict(attrs={'class':['titularnota','nota','post-title','post-entry','entry-title','entry-info','entry-content']})]
|
||||
remove_tags_after = dict(attrs={'class':['interior-noticia','nota-desc','tags']})
|
||||
remove_tags = [
|
||||
dict(name=['base','meta','link','iframe','object','embed','ins'])
|
||||
,dict(attrs={'class':['barranota','tags']})
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
article_part = url.rpartition('/')[2]
|
||||
article_id= article_part.partition('-')[0]
|
||||
return 'http://www.infobae.com/notas/nota_imprimir.php?Idx=' + article_id
|
||||
feeds = [
|
||||
(u'Saludable' , u'http://www.infobae.com/rss/saludable.xml')
|
||||
,(u'Economia' , u'http://www.infobae.com/rss/economia.xml' )
|
||||
,(u'En Numeros', u'http://www.infobae.com/rss/rating.xml' )
|
||||
,(u'Finanzas' , u'http://www.infobae.com/rss/finanzas.xml' )
|
||||
,(u'Mundo' , u'http://www.infobae.com/rss/mundo.xml' )
|
||||
,(u'Sociedad' , u'http://www.infobae.com/rss/sociedad.xml' )
|
||||
,(u'Politica' , u'http://www.infobae.com/rss/politica.xml' )
|
||||
,(u'Deportes' , u'http://www.infobae.com/rss/deportes.xml' )
|
||||
]
|
||||
|
||||
def postprocess_html(self, soup, first):
|
||||
for tag in soup.findAll(name='strong'):
|
||||
tag.name = 'b'
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
for item in soup.findAll('a'):
|
||||
limg = item.find('img')
|
||||
if item.string is not None:
|
||||
str = item.string
|
||||
item.replaceWith(str)
|
||||
else:
|
||||
if limg:
|
||||
item.name = 'div'
|
||||
item.attrs = []
|
||||
else:
|
||||
str = self.tag_to_string(item)
|
||||
item.replaceWith(str)
|
||||
for item in soup.findAll('img'):
|
||||
if not item.has_key('alt'):
|
||||
item['alt'] = 'image'
|
||||
return soup
|
||||
|
||||
|
||||
|
||||
|
@ -99,7 +99,7 @@ class LeMonde(BasicNewsRecipe):
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':['contenu']})
|
||||
]
|
||||
|
||||
remove_tags = [dict(name='div', attrs={'class':['LM_atome']})]
|
||||
remove_tags_after = [dict(id='appel_temoignage')]
|
||||
|
||||
def get_article_url(self, article):
|
||||
|
@ -14,7 +14,7 @@ class LeTemps(BasicNewsRecipe):
|
||||
title = u'Le Temps'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
__author__ = 'Sujata Raman'
|
||||
__author__ = 'Kovid Goyal'
|
||||
description = 'French news. Needs a subscription from http://www.letemps.ch'
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
@ -27,6 +27,7 @@ class LeTemps(BasicNewsRecipe):
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser(self)
|
||||
br.open('http://www.letemps.ch/login')
|
||||
br.select_form(nr=1)
|
||||
br['username'] = self.username
|
||||
br['password'] = self.password
|
||||
raw = br.submit().read()
|
||||
|
@ -1,239 +1,28 @@
|
||||
#!/usr/bin/env python
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
|
||||
'''
|
||||
macleans.ca
|
||||
'''
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
from datetime import timedelta, date
|
||||
|
||||
class Macleans(BasicNewsRecipe):
|
||||
class AdvancedUserRecipe1308306308(BasicNewsRecipe):
|
||||
title = u'Macleans Magazine'
|
||||
__author__ = 'Nick Redding'
|
||||
language = 'en_CA'
|
||||
description = ('Macleans Magazine')
|
||||
__author__ = 'sexymax15'
|
||||
oldest_article = 30
|
||||
max_articles_per_feed = 12
|
||||
|
||||
use_embedded_content = False
|
||||
|
||||
remove_empty_feeds = True
|
||||
no_stylesheets = True
|
||||
timefmt = ' [%b %d]'
|
||||
remove_javascript = True
|
||||
remove_tags = [dict(name ='img'),dict (id='header'),{'class':'postmetadata'}]
|
||||
remove_tags_after = {'class':'postmetadata'}
|
||||
|
||||
# customization notes: delete sections you are not interested in
|
||||
# set oldest_article to the maximum number of days back from today to include articles
|
||||
sectionlist = [
|
||||
['http://www2.macleans.ca/','Front Page'],
|
||||
['http://www2.macleans.ca/category/canada/','Canada'],
|
||||
['http://www2.macleans.ca/category/world-from-the-magazine/','World'],
|
||||
['http://www2.macleans.ca/category/business','Business'],
|
||||
['http://www2.macleans.ca/category/arts-culture/','Culture'],
|
||||
['http://www2.macleans.ca/category/opinion','Opinion'],
|
||||
['http://www2.macleans.ca/category/health-from-the-magazine/','Health'],
|
||||
['http://www2.macleans.ca/category/environment-from-the-magazine/','Environment'],
|
||||
['http://www2.macleans.ca/category/education/','On Campus'],
|
||||
['http://www2.macleans.ca/category/travel-from-the-magazine/','Travel']
|
||||
]
|
||||
oldest_article = 7
|
||||
|
||||
# formatting for print version of articles
|
||||
extra_css = '''h2{font-family:Times,serif; font-size:large;}
|
||||
small {font-family:Times,serif; font-size:xx-small; list-style-type: none;}
|
||||
'''
|
||||
|
||||
# tag handling for print version of articles
|
||||
keep_only_tags = [dict(id='tw-print')]
|
||||
remove_tags = [dict({'class':'postmetadata'})]
|
||||
|
||||
|
||||
def preprocess_html(self,soup):
|
||||
for img_tag in soup.findAll('img'):
|
||||
parent_tag = img_tag.parent
|
||||
if parent_tag.name == 'a':
|
||||
new_tag = Tag(soup,'p')
|
||||
new_tag.insert(0,img_tag)
|
||||
parent_tag.replaceWith(new_tag)
|
||||
elif parent_tag.name == 'p':
|
||||
if not self.tag_to_string(parent_tag) == '':
|
||||
new_div = Tag(soup,'div')
|
||||
new_tag = Tag(soup,'p')
|
||||
new_tag.insert(0,img_tag)
|
||||
parent_tag.replaceWith(new_div)
|
||||
new_div.insert(0,new_tag)
|
||||
new_div.insert(1,parent_tag)
|
||||
return soup
|
||||
|
||||
def parse_index(self):
|
||||
|
||||
|
||||
|
||||
articles = {}
|
||||
key = None
|
||||
ans = []
|
||||
|
||||
def parse_index_page(page_url,page_title):
|
||||
|
||||
def decode_date(datestr):
|
||||
dmysplit = datestr.strip().lower().split(',')
|
||||
mdsplit = dmysplit[1].split()
|
||||
m = ['january','february','march','april','may','june','july','august','september','october','november','december'].index(mdsplit[0])+1
|
||||
d = int(mdsplit[1])
|
||||
y = int(dmysplit[2].split()[0])
|
||||
return date(y,m,d)
|
||||
|
||||
def article_title(tag):
|
||||
atag = tag.find('a',href=True)
|
||||
if not atag:
|
||||
return ''
|
||||
return self.tag_to_string(atag)
|
||||
|
||||
def article_url(tag):
|
||||
atag = tag.find('a',href=True)
|
||||
if not atag:
|
||||
return ''
|
||||
return atag['href']+'print/'
|
||||
|
||||
def article_description(tag):
|
||||
for p_tag in tag.findAll('p'):
|
||||
d = self.tag_to_string(p_tag,False)
|
||||
if not d == '':
|
||||
return d
|
||||
return ''
|
||||
|
||||
def compound_h4_h3_title(tag):
|
||||
if tag.h4:
|
||||
if tag.h3:
|
||||
return self.tag_to_string(tag.h4,False)+u'\u2014'+self.tag_to_string(tag.h3,False)
|
||||
else:
|
||||
return self.tag_to_string(tag.h4,False)
|
||||
elif tag.h3:
|
||||
return self.tag_to_string(tag.h3,False)
|
||||
else:
|
||||
return ''
|
||||
|
||||
def compound_h2_h4_title(tag):
|
||||
if tag.h2:
|
||||
if tag.h4:
|
||||
return self.tag_to_string(tag.h2,False)+u'\u2014'+self.tag_to_string(tag.h4,False)
|
||||
else:
|
||||
return self.tag_to_string(tag.h2,False)
|
||||
elif tag.h4:
|
||||
return self.tag_to_string(tag.h4,False)
|
||||
else:
|
||||
return ''
|
||||
|
||||
|
||||
def handle_article(header_tag, outer_tag):
|
||||
if header_tag:
|
||||
url = article_url(header_tag)
|
||||
title = article_title(header_tag)
|
||||
author_date_tag = outer_tag.h4
|
||||
if author_date_tag:
|
||||
author_date = self.tag_to_string(author_date_tag,False).split(' - ')
|
||||
author = author_date[0].strip()
|
||||
article_date = decode_date(author_date[1])
|
||||
earliest_date = date.today() - timedelta(days=self.oldest_article)
|
||||
if article_date < earliest_date:
|
||||
self.log("Skipping article dated %s" % author_date[1])
|
||||
else:
|
||||
excerpt_div = outer_tag.find('div','excerpt')
|
||||
if excerpt_div:
|
||||
description = article_description(excerpt_div)
|
||||
else:
|
||||
description = ''
|
||||
if not articles.has_key(page_title):
|
||||
articles[page_title] = []
|
||||
articles[page_title].append(dict(title=title,url=url,date=author_date[1],description=description,author=author,content=''))
|
||||
|
||||
def handle_category_article(cat, header_tag, outer_tag):
|
||||
url = article_url(header_tag)
|
||||
title = article_title(header_tag)
|
||||
if not title == '':
|
||||
title = cat+u'\u2014'+title
|
||||
a_tag = outer_tag.find('span','authorLink')
|
||||
if a_tag:
|
||||
author = self.tag_to_string(a_tag,False)
|
||||
a_tag.parent.extract()
|
||||
else:
|
||||
author = ''
|
||||
description = article_description(outer_tag)
|
||||
if not articles.has_key(page_title):
|
||||
articles[page_title] = []
|
||||
articles[page_title].append(dict(title=title,url=url,date='',description=description,author=author,content=''))
|
||||
|
||||
|
||||
soup = self.index_to_soup(page_url)
|
||||
|
||||
if page_title == 'Front Page':
|
||||
# special processing for the front page
|
||||
top_stories = soup.find('div',{ "id" : "macleansFeatured" })
|
||||
if top_stories:
|
||||
for div_slide in top_stories.findAll('div','slide'):
|
||||
url = article_url(div_slide)
|
||||
div_title = div_slide.find('div','header')
|
||||
if div_title:
|
||||
title = self.tag_to_string(div_title,False)
|
||||
else:
|
||||
title = ''
|
||||
description = article_description(div_slide)
|
||||
if not articles.has_key(page_title):
|
||||
articles[page_title] = []
|
||||
articles[page_title].append(dict(title=title,url=url,date='',description=description,author='',content=''))
|
||||
|
||||
from_macleans = soup.find('div',{ "id" : "fromMacleans" })
|
||||
if from_macleans:
|
||||
for li_tag in from_macleans.findAll('li','fromMacleansArticle'):
|
||||
title = compound_h4_h3_title(li_tag)
|
||||
url = article_url(li_tag)
|
||||
description = article_description(li_tag)
|
||||
if not articles.has_key(page_title):
|
||||
articles[page_title] = []
|
||||
articles[page_title].append(dict(title=title,url=url,date='',description=description,author='',content=''))
|
||||
|
||||
blog_central = soup.find('div',{ "id" : "bloglist" })
|
||||
if blog_central:
|
||||
for li_tag in blog_central.findAll('li'):
|
||||
title = compound_h2_h4_title(li_tag)
|
||||
if li_tag.h4:
|
||||
url = article_url(li_tag.h4)
|
||||
if not articles.has_key(page_title):
|
||||
articles[page_title] = []
|
||||
articles[page_title].append(dict(title=title,url=url,date='',description='',author='',content=''))
|
||||
|
||||
# need_to_know = soup.find('div',{ "id" : "needToKnow" })
|
||||
# if need_to_know:
|
||||
# for div_tag in need_to_know('div',attrs={'class' : re.compile("^needToKnowArticle")}):
|
||||
# title = compound_h4_h3_title(div_tag)
|
||||
# url = article_url(div_tag)
|
||||
# description = article_description(div_tag)
|
||||
# if not articles.has_key(page_title):
|
||||
# articles[page_title] = []
|
||||
# articles[page_title].append(dict(title=title,url=url,date='',description=description,author='',content=''))
|
||||
|
||||
for news_category in soup.findAll('div','newsCategory'):
|
||||
news_cat = self.tag_to_string(news_category.h4,False)
|
||||
handle_category_article(news_cat, news_category.find('h2'), news_category.find('div'))
|
||||
for news_item in news_category.findAll('li'):
|
||||
handle_category_article(news_cat,news_item.h3,news_item)
|
||||
|
||||
return
|
||||
|
||||
# find the div containing the highlight article
|
||||
div_post = soup.find('div','post')
|
||||
if div_post:
|
||||
h1_tag = div_post.h1
|
||||
handle_article(h1_tag,div_post)
|
||||
|
||||
# find the divs containing the rest of the articles
|
||||
div_other = div_post.find('div', { "id" : "categoryOtherPosts" })
|
||||
if div_other:
|
||||
for div_entry in div_other.findAll('div','entry'):
|
||||
h2_tag = div_entry.h2
|
||||
handle_article(h2_tag,div_entry)
|
||||
|
||||
|
||||
|
||||
for page_name,page_title in self.sectionlist:
|
||||
parse_index_page(page_name,page_title)
|
||||
ans.append(page_title)
|
||||
|
||||
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
|
||||
return ans
|
||||
feeds = [(u'Blog Central', u'http://www2.macleans.ca/category/blog-central/feed/'),
|
||||
(u'Canada', u'http://www2.macleans.ca/category/canada/feed/'),
|
||||
(u'World', u'http://www2.macleans.ca/category/world-from-the-magazine/feed/'),
|
||||
(u'Business', u'http://www2.macleans.ca/category/business/feed/'),
|
||||
(u'Arts & Culture', u'http://www2.macleans.ca/category/arts-culture/feed/'),
|
||||
(u'Opinion', u'http://www2.macleans.ca/category/opinion/feed/'),
|
||||
(u'Health', u'http://www2.macleans.ca/category/health-from-the-magazine/feed/'),
|
||||
(u'Environment', u'http://www2.macleans.ca/category/environment-from-the-magazine/feed/')]
|
||||
def print_version(self, url):
|
||||
return url + 'print/'
|
||||
|
@ -3,7 +3,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
class AdvancedUserRecipe1295081935(BasicNewsRecipe):
|
||||
title = u'Mail & Guardian ZA News'
|
||||
__author__ = '77ja65'
|
||||
language = 'en'
|
||||
language = 'en_ZA'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 30
|
||||
no_stylesheets = True
|
||||
|
138
recipes/menorca.recipe
Normal file
@ -0,0 +1,138 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
from calibre.web.feeds import Feed
|
||||
|
||||
class Menorca(BasicNewsRecipe):
|
||||
|
||||
title = 'Menorca'
|
||||
publisher = 'Editorial Menorca S.A. '
|
||||
__author__ = 'M. Sintes'
|
||||
description = u'Peri\xf3dico con informaci\xf3n de Menorca, Espa\xf1a'
|
||||
category = 'news, politics, economy, culture, Menorca, Spain '
|
||||
language = 'es'
|
||||
enconding = 'cp1252'
|
||||
|
||||
no_stylesheets = True
|
||||
oldest_article = 5
|
||||
max_articles_per_feed = 25
|
||||
|
||||
|
||||
feeds = [ (u'Principal',u'http://www.menorca.info/rss'),
|
||||
(u'Opini\xf3n',u'http://www.menorca.info/rss?seccion=opinion'),
|
||||
(u'Menorca',u'http://www.menorca.info/rss?seccion=menorca'),
|
||||
(u'Alaior',u'http://www.menorca.info/rss?seccion=pueblos/alaior'),
|
||||
(u'Ciutadella', u'http://www.menorca.info/rss?seccion=pueblos/ciutadella'),
|
||||
(u'Es Castell', u'http://www.menorca.info/rss?seccion=pueblos/escastell'),
|
||||
(u'Es Mercadal', u'http://www.menorca.info/rss?seccion=pueblos/esmercadal'),
|
||||
(u'Es Migjorn', u'http://www.menorca.info/rss?seccion=pueblos/esmigjorn'),
|
||||
(u'Ferreries', u'http://www.menorca.info/rss?seccion=pueblos/ferreries'),
|
||||
(u'Fornells', u'http://www.menorca.info/rss?seccion=pueblos/fornells'),
|
||||
(u'Llucma\xe7anes', u'http://www.menorca.info/rss?seccion=pueblos/llucmaanes'),
|
||||
(u'Ma\xf3', u'http://www.menorca.info/rss?seccion=pueblos/mao'),
|
||||
(u'Sant Climent', u'http://www.menorca.info/rss?seccion=pueblos/santcliment'),
|
||||
(u'Sant Llu\xeds', u'http://www.menorca.info/rss?seccion=pueblos/santlluis'),
|
||||
(u'Deportes',u'http://www.menorca.info/rss?seccion=deportes'),
|
||||
(u'Balears', u'http://www.menorca.info/rss?seccion=balears')]
|
||||
|
||||
#Seccions amb link rss erroni. Es recupera directament de la pagina web
|
||||
seccions_web = [(u'Mundo',u'http://www.menorca.info/actualidad/mundo'),
|
||||
(u'Econom\xeda',u'http://www.menorca.info/actualidad/economia'),
|
||||
(u'Espa\xf1a',u'http://www.menorca.info/actualidad/espana')]
|
||||
|
||||
remove_tags_before = dict(name='div', attrs={'class':'bloqueTitulosNoticia'})
|
||||
remove_tags_after = dict(name='div', attrs={'class':'compartir'})
|
||||
remove_tags = [dict(id = 'utilidades'),
|
||||
dict(name='div', attrs={'class': 'totalComentarios'}),
|
||||
dict(name='div', attrs={'class': 'compartir'}),
|
||||
dict(name='div', attrs={'class': re.compile("img_noticia*")})
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
url_imprimir = url + '?d=print'
|
||||
return url.replace(url, url_imprimir)
|
||||
|
||||
def feed_to_index_append(self, feedObject, masterFeed):
|
||||
|
||||
# Loop thru the feed object and build the correct type of article list
|
||||
for feed in feedObject:
|
||||
newArticles = []
|
||||
for article in feed.articles:
|
||||
newArt = {
|
||||
'title' : article.title,
|
||||
'url' : article.url,
|
||||
'date' : article.date,
|
||||
'description' : article.text_summary
|
||||
}
|
||||
|
||||
newArticles.append(newArt)
|
||||
|
||||
# append the newly-built list object to the index object # passed in as masterFeed.
|
||||
masterFeed.append((feed.title,newArticles))
|
||||
|
||||
|
||||
def parse_index(self):
|
||||
|
||||
rssFeeds = Feed()
|
||||
rssFeeds = BasicNewsRecipe.parse_feeds(self)
|
||||
|
||||
articles = []
|
||||
feeds = []
|
||||
|
||||
self.feed_to_index_append(rssFeeds,feeds)
|
||||
|
||||
|
||||
|
||||
for (nom_seccio, url_seccio) in self.seccions_web:
|
||||
|
||||
|
||||
articles = []
|
||||
|
||||
soup = self.index_to_soup(url_seccio)
|
||||
for article in soup.findAll('div', attrs={'class':re.compile("articulo noticia|cajaNoticiaPortada")}):
|
||||
h = article.find(['h2','h3'])
|
||||
titol = self.tag_to_string(h)
|
||||
a = article.find('a', href=True)
|
||||
url = 'http://www.menorca.info' + a['href']
|
||||
|
||||
desc = None
|
||||
autor = ''
|
||||
dt = ''
|
||||
|
||||
soup_art = self.index_to_soup(url)
|
||||
aut = soup_art.find('div', attrs={'class':'autor'})
|
||||
tx = self.tag_to_string(aut)
|
||||
ls = re.split('[,;]',tx)
|
||||
|
||||
t = len(ls)
|
||||
if t >= 1:
|
||||
autor = ls[0]
|
||||
|
||||
if t > 1:
|
||||
d = ls[t-1]
|
||||
|
||||
if len(d) >= 10:
|
||||
lt = len(d) - 10
|
||||
dt = d[lt:]
|
||||
|
||||
|
||||
|
||||
self.log('\tTrobat article: ', titol, 'a', url, 'Seccio: ', nom_seccio, 'Autor: ', autor, 'Data: ', dt)
|
||||
|
||||
articles.append({'title': titol, 'url': url, 'description': desc, 'date':dt, 'author': autor})
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
if articles:
|
||||
feeds.append((nom_seccio, articles))
|
||||
|
||||
|
||||
|
||||
|
||||
return feeds
|
||||
|
||||
|
||||
|
@ -20,7 +20,7 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
|
||||
remove_tags_before = dict(name='div', attrs={'id':'date'})
|
||||
remove_tags_after = dict(name='div', attrs={'id':'column-1-3'})
|
||||
encoding = 'utf-8'
|
||||
extra_css = '#date {font-size: 10px} .article-image-caption {font-size: 8px}'
|
||||
extra_css = 'body{font-size:12px} #date, .article-image-caption {font-size: 0.583em} h2 {font-size: 0.917em} p.small, span, li, li span span, p, b, i, u, p.small.article-paragraph, p.small.article-paragraph p, p.small.article-paragraph span, p span, span {font-size: 0.833em} h1 {font-size: 1em}'
|
||||
|
||||
remove_tags = [dict(name='div', attrs={'class':[ 'metroCommentFormWrap',
|
||||
'commentForm', 'metroCommentInnerWrap', 'article-slideshow-counter-container', 'article-slideshow-control', 'ad', 'header-links',
|
||||
|
@ -1,29 +1,34 @@
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
class AdvancedUserRecipe1306097511(BasicNewsRecipe):
|
||||
title = u'Metro UK'
|
||||
|
||||
no_stylesheets = True
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 200
|
||||
description = 'News as provide by The Metro -UK'
|
||||
|
||||
__author__ = 'Dave Asbury'
|
||||
no_stylesheets = True
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 25
|
||||
remove_empty_feeds = True
|
||||
remove_javascript = True
|
||||
|
||||
preprocess_regexps = [(re.compile(r'Tweet'), lambda a : '')]
|
||||
|
||||
language = 'en_GB'
|
||||
simultaneous_downloads= 3
|
||||
|
||||
|
||||
masthead_url = 'http://e-edition.metro.co.uk/images/metro_logo.gif'
|
||||
|
||||
extra_css = 'h2 {font: sans-serif medium;}'
|
||||
keep_only_tags = [
|
||||
dict(name='h1'),dict(name='h2', attrs={'class':'h2'}),
|
||||
dict(attrs={'class':['img-cnt figure']}),
|
||||
dict(attrs={'class':['art-img']}),
|
||||
dict(name='h1'),
|
||||
dict(name='h2', attrs={'class':'h2'}),
|
||||
|
||||
dict(name='div', attrs={'class':'art-lft'})
|
||||
]
|
||||
remove_tags = [dict(name='div', attrs={'class':[ 'metroCommentFormWrap',
|
||||
'commentForm', 'metroCommentInnerWrap',
|
||||
'art-rgt','pluck-app pluck-comm','news m12 clrd clr-l p5t', 'flt-r' ]})]
|
||||
|
||||
remove_tags = [dict(name='div', attrs={'class':[ 'news m12 clrd clr-b p5t shareBtm', 'commentForm', 'metroCommentInnerWrap',
|
||||
'art-rgt','pluck-app pluck-comm','news m12 clrd clr-l p5t', 'flt-r' ]}),
|
||||
dict(attrs={'class':[ 'metroCommentFormWrap','commentText','commentsNav','avatar','submDateAndTime']})
|
||||
]
|
||||
feeds = [
|
||||
(u'News', u'http://www.metro.co.uk/rss/news/'), (u'Money', u'http://www.metro.co.uk/rss/money/'), (u'Sport', u'http://www.metro.co.uk/rss/sport/'), (u'Film', u'http://www.metro.co.uk/rss/metrolife/film/'), (u'Music', u'http://www.metro.co.uk/rss/metrolife/music/'), (u'TV', u'http://www.metro.co.uk/rss/tv/'), (u'Showbiz', u'http://www.metro.co.uk/rss/showbiz/'), (u'Weird News', u'http://www.metro.co.uk/rss/weird/'), (u'Travel', u'http://www.metro.co.uk/rss/travel/'), (u'Lifestyle', u'http://www.metro.co.uk/rss/lifestyle/'), (u'Books', u'http://www.metro.co.uk/rss/lifestyle/books/'), (u'Food', u'http://www.metro.co.uk/rss/lifestyle/restaurants/')]
|
||||
|
||||
|
||||
|
@ -1,17 +1,23 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010-2011, Eddie Lau'
|
||||
|
||||
# Region - Hong Kong, Vancouver, Toronto
|
||||
__Region__ = 'Hong Kong'
|
||||
# Users of Kindle 3 with limited system-level CJK support
|
||||
# please replace the following "True" with "False".
|
||||
__MakePeriodical__ = True
|
||||
# Turn below to true if your device supports display of CJK titles
|
||||
__UseChineseTitle__ = False
|
||||
# Trun below to true if you wish to use life.mingpao.com as the main article source
|
||||
# Set it to False if you want to skip images
|
||||
__KeepImages__ = True
|
||||
# (HK only) Turn below to true if you wish to use life.mingpao.com as the main article source
|
||||
__UseLife__ = True
|
||||
|
||||
|
||||
'''
|
||||
Change Log:
|
||||
2011/06/26: add fetching Vancouver and Toronto versions of the paper, also provide captions for images using life.mingpao fetch source
|
||||
provide options to remove all images in the file
|
||||
2011/05/12: switch the main parse source to life.mingpao.com, which has more photos on the article pages
|
||||
2011/03/06: add new articles for finance section, also a new section "Columns"
|
||||
2011/02/28: rearrange the sections
|
||||
@ -34,29 +40,17 @@ Change Log:
|
||||
import os, datetime, re
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
from contextlib import nested
|
||||
|
||||
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||
from calibre.ebooks.metadata.opf2 import OPFCreator
|
||||
from calibre.ebooks.metadata.toc import TOC
|
||||
from calibre.ebooks.metadata import MetaInformation
|
||||
|
||||
class MPHKRecipe(BasicNewsRecipe):
|
||||
# MAIN CLASS
|
||||
class MPRecipe(BasicNewsRecipe):
|
||||
if __Region__ == 'Hong Kong':
|
||||
title = 'Ming Pao - Hong Kong'
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
__author__ = 'Eddie Lau'
|
||||
description = 'Hong Kong Chinese Newspaper (http://news.mingpao.com)'
|
||||
publisher = 'MingPao'
|
||||
category = 'Chinese, News, Hong Kong'
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
no_stylesheets = True
|
||||
language = 'zh'
|
||||
encoding = 'Big5-HKSCS'
|
||||
recursions = 0
|
||||
conversion_options = {'linearize_tables':True}
|
||||
timefmt = ''
|
||||
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} font>b {font-size:200%; font-weight:bold;}'
|
||||
masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif'
|
||||
keep_only_tags = [dict(name='h1'),
|
||||
@ -65,11 +59,22 @@ class MPHKRecipe(BasicNewsRecipe):
|
||||
dict(attrs={'id':['newscontent']}), # entertainment and column page content
|
||||
dict(attrs={'id':['newscontent01','newscontent02']}),
|
||||
dict(attrs={'class':['photo']}),
|
||||
dict(name='table', attrs={'width':['100%'], 'border':['0'], 'cellspacing':['5'], 'cellpadding':['0']}), # content in printed version of life.mingpao.com
|
||||
dict(name='img', attrs={'width':['180'], 'alt':['按圖放大']}) # images for source from life.mingpao.com
|
||||
]
|
||||
if __KeepImages__:
|
||||
remove_tags = [dict(name='style'),
|
||||
dict(attrs={'id':['newscontent135']}), # for the finance page from mpfinance.com
|
||||
dict(name='table')] # for content fetched from life.mingpao.com
|
||||
dict(name='font', attrs={'size':['2'], 'color':['666666']}), # article date in life.mingpao.com article
|
||||
#dict(name='table') # for content fetched from life.mingpao.com
|
||||
]
|
||||
else:
|
||||
remove_tags = [dict(name='style'),
|
||||
dict(attrs={'id':['newscontent135']}), # for the finance page from mpfinance.com
|
||||
dict(name='font', attrs={'size':['2'], 'color':['666666']}), # article date in life.mingpao.com article
|
||||
dict(name='img'),
|
||||
#dict(name='table') # for content fetched from life.mingpao.com
|
||||
]
|
||||
remove_attributes = ['width']
|
||||
preprocess_regexps = [
|
||||
(re.compile(r'<h5>', re.DOTALL|re.IGNORECASE),
|
||||
@ -84,6 +89,55 @@ class MPHKRecipe(BasicNewsRecipe):
|
||||
(re.compile(r"<br><br></b>", re.DOTALL|re.IGNORECASE),
|
||||
lambda match: "</b>")
|
||||
]
|
||||
elif __Region__ == 'Vancouver':
|
||||
title = 'Ming Pao - Vancouver'
|
||||
description = 'Vancouver Chinese Newspaper (http://www.mingpaovan.com)'
|
||||
category = 'Chinese, News, Vancouver'
|
||||
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} b>font {font-size:200%; font-weight:bold;}'
|
||||
masthead_url = 'http://www.mingpaovan.com/image/mainlogo2_VAN2.gif'
|
||||
keep_only_tags = [dict(name='table', attrs={'width':['450'], 'border':['0'], 'cellspacing':['0'], 'cellpadding':['1']}),
|
||||
dict(name='table', attrs={'width':['450'], 'border':['0'], 'cellspacing':['3'], 'cellpadding':['3'], 'id':['tblContent3']}),
|
||||
dict(name='table', attrs={'width':['180'], 'border':['0'], 'cellspacing':['0'], 'cellpadding':['0'], 'bgcolor':['F0F0F0']}),
|
||||
]
|
||||
if __KeepImages__:
|
||||
remove_tags = [dict(name='img', attrs={'src':['../../../image/magnifier.gif']})] # the magnifier icon
|
||||
else:
|
||||
remove_tags = [dict(name='img')]
|
||||
remove_attributes = ['width']
|
||||
preprocess_regexps = [(re.compile(r' ', re.DOTALL|re.IGNORECASE),
|
||||
lambda match: ''),
|
||||
]
|
||||
elif __Region__ == 'Toronto':
|
||||
title = 'Ming Pao - Toronto'
|
||||
description = 'Toronto Chinese Newspaper (http://www.mingpaotor.com)'
|
||||
category = 'Chinese, News, Toronto'
|
||||
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} b>font {font-size:200%; font-weight:bold;}'
|
||||
masthead_url = 'http://www.mingpaotor.com/image/mainlogo2_TOR2.gif'
|
||||
keep_only_tags = [dict(name='table', attrs={'width':['450'], 'border':['0'], 'cellspacing':['0'], 'cellpadding':['1']}),
|
||||
dict(name='table', attrs={'width':['450'], 'border':['0'], 'cellspacing':['3'], 'cellpadding':['3'], 'id':['tblContent3']}),
|
||||
dict(name='table', attrs={'width':['180'], 'border':['0'], 'cellspacing':['0'], 'cellpadding':['0'], 'bgcolor':['F0F0F0']}),
|
||||
]
|
||||
if __KeepImages__:
|
||||
remove_tags = [dict(name='img', attrs={'src':['../../../image/magnifier.gif']})] # the magnifier icon
|
||||
else:
|
||||
remove_tags = [dict(name='img')]
|
||||
remove_attributes = ['width']
|
||||
preprocess_regexps = [(re.compile(r' ', re.DOTALL|re.IGNORECASE),
|
||||
lambda match: ''),
|
||||
]
|
||||
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
__author__ = 'Eddie Lau'
|
||||
publisher = 'MingPao'
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
no_stylesheets = True
|
||||
language = 'zh'
|
||||
encoding = 'Big5-HKSCS'
|
||||
recursions = 0
|
||||
conversion_options = {'linearize_tables':True}
|
||||
timefmt = ''
|
||||
|
||||
def image_url_processor(cls, baseurl, url):
|
||||
# trick: break the url at the first occurance of digit, add an additional
|
||||
@ -124,8 +178,18 @@ class MPHKRecipe(BasicNewsRecipe):
|
||||
|
||||
def get_dtlocal(self):
|
||||
dt_utc = datetime.datetime.utcnow()
|
||||
# convert UTC to local hk time - at around HKT 6.00am, all news are available
|
||||
dt_local = dt_utc - datetime.timedelta(-2.0/24)
|
||||
if __Region__ == 'Hong Kong':
|
||||
# convert UTC to local hk time - at HKT 5.30am, all news are available
|
||||
dt_local = dt_utc + datetime.timedelta(8.0/24) - datetime.timedelta(5.5/24)
|
||||
# dt_local = dt_utc.astimezone(pytz.timezone('Asia/Hong_Kong')) - datetime.timedelta(5.5/24)
|
||||
elif __Region__ == 'Vancouver':
|
||||
# convert UTC to local Vancouver time - at PST time 5.30am, all news are available
|
||||
dt_local = dt_utc + datetime.timedelta(-8.0/24) - datetime.timedelta(5.5/24)
|
||||
#dt_local = dt_utc.astimezone(pytz.timezone('America/Vancouver')) - datetime.timedelta(5.5/24)
|
||||
elif __Region__ == 'Toronto':
|
||||
# convert UTC to local Toronto time - at EST time 8.30am, all news are available
|
||||
dt_local = dt_utc + datetime.timedelta(-5.0/24) - datetime.timedelta(8.5/24)
|
||||
#dt_local = dt_utc.astimezone(pytz.timezone('America/Toronto')) - datetime.timedelta(8.5/24)
|
||||
return dt_local
|
||||
|
||||
def get_fetchdate(self):
|
||||
@ -135,13 +199,15 @@ class MPHKRecipe(BasicNewsRecipe):
|
||||
return self.get_dtlocal().strftime("%Y-%m-%d")
|
||||
|
||||
def get_fetchday(self):
|
||||
# dt_utc = datetime.datetime.utcnow()
|
||||
# convert UTC to local hk time - at around HKT 6.00am, all news are available
|
||||
# dt_local = dt_utc - datetime.timedelta(-2.0/24)
|
||||
return self.get_dtlocal().strftime("%d")
|
||||
|
||||
def get_cover_url(self):
|
||||
if __Region__ == 'Hong Kong':
|
||||
cover = 'http://news.mingpao.com/' + self.get_fetchdate() + '/' + self.get_fetchdate() + '_' + self.get_fetchday() + 'gacov.jpg'
|
||||
elif __Region__ == 'Vancouver':
|
||||
cover = 'http://www.mingpaovan.com/ftp/News/' + self.get_fetchdate() + '/' + self.get_fetchday() + 'pgva1s.jpg'
|
||||
elif __Region__ == 'Toronto':
|
||||
cover = 'http://www.mingpaotor.com/ftp/News/' + self.get_fetchdate() + '/' + self.get_fetchday() + 'pgtas.jpg'
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
try:
|
||||
br.open(cover)
|
||||
@ -153,6 +219,7 @@ class MPHKRecipe(BasicNewsRecipe):
|
||||
feeds = []
|
||||
dateStr = self.get_fetchdate()
|
||||
|
||||
if __Region__ == 'Hong Kong':
|
||||
if __UseLife__:
|
||||
for title, url, keystr in [(u'\u8981\u805e Headline', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalga', 'nal'),
|
||||
(u'\u6e2f\u805e Local', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalgb', 'nal'),
|
||||
@ -222,7 +289,34 @@ class MPHKRecipe(BasicNewsRecipe):
|
||||
col_articles = self.parse_col_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn')
|
||||
if col_articles:
|
||||
feeds.append((u'\u5c08\u6b04 Columns', col_articles))
|
||||
|
||||
elif __Region__ == 'Vancouver':
|
||||
for title, url in [(u'\u8981\u805e Headline', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VAindex.htm'),
|
||||
(u'\u52a0\u570b Canada', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VBindex.htm'),
|
||||
(u'\u793e\u5340 Local', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VDindex.htm'),
|
||||
(u'\u6e2f\u805e Hong Kong', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/HK-VGindex.htm'),
|
||||
(u'\u570b\u969b World', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VTindex.htm'),
|
||||
(u'\u4e2d\u570b China', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VCindex.htm'),
|
||||
(u'\u7d93\u6fdf Economics', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VEindex.htm'),
|
||||
(u'\u9ad4\u80b2 Sports', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VSindex.htm'),
|
||||
(u'\u5f71\u8996 Film/TV', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/HK-MAindex.htm'),
|
||||
(u'\u526f\u520a Supplements', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/WWindex.htm'),]:
|
||||
articles = self.parse_section3(url, 'http://www.mingpaovan.com/')
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
elif __Region__ == 'Toronto':
|
||||
for title, url in [(u'\u8981\u805e Headline', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TAindex.htm'),
|
||||
(u'\u52a0\u570b Canada', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TDindex.htm'),
|
||||
(u'\u793e\u5340 Local', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TFindex.htm'),
|
||||
(u'\u4e2d\u570b China', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TCAindex.htm'),
|
||||
(u'\u570b\u969b World', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TTAindex.htm'),
|
||||
(u'\u6e2f\u805e Hong Kong', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/HK-GAindex.htm'),
|
||||
(u'\u7d93\u6fdf Economics', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/THindex.htm'),
|
||||
(u'\u9ad4\u80b2 Sports', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TSindex.htm'),
|
||||
(u'\u5f71\u8996 Film/TV', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/HK-MAindex.htm'),
|
||||
(u'\u526f\u520a Supplements', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/WWindex.htm'),]:
|
||||
articles = self.parse_section3(url, 'http://www.mingpaotor.com/')
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
return feeds
|
||||
|
||||
# parse from news.mingpao.com
|
||||
@ -256,11 +350,30 @@ class MPHKRecipe(BasicNewsRecipe):
|
||||
title = self.tag_to_string(i)
|
||||
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
|
||||
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind(keystr) == -1):
|
||||
url = url.replace('dailynews3.cfm', 'dailynews3a.cfm') # use printed version of the article
|
||||
current_articles.append({'title': title, 'url': url, 'description': ''})
|
||||
included_urls.append(url)
|
||||
current_articles.reverse()
|
||||
return current_articles
|
||||
|
||||
# parse from www.mingpaovan.com
|
||||
def parse_section3(self, url, baseUrl):
|
||||
self.get_fetchdate()
|
||||
soup = self.index_to_soup(url)
|
||||
divs = soup.findAll(attrs={'class': ['ListContentLargeLink']})
|
||||
current_articles = []
|
||||
included_urls = []
|
||||
divs.reverse()
|
||||
for i in divs:
|
||||
title = self.tag_to_string(i)
|
||||
urlstr = i.get('href', False)
|
||||
urlstr = baseUrl + '/' + urlstr.replace('../../../', '')
|
||||
if urlstr not in included_urls:
|
||||
current_articles.append({'title': title, 'url': urlstr, 'description': '', 'date': ''})
|
||||
included_urls.append(urlstr)
|
||||
current_articles.reverse()
|
||||
return current_articles
|
||||
|
||||
def parse_ed_section(self, url):
|
||||
self.get_fetchdate()
|
||||
soup = self.index_to_soup(url)
|
||||
@ -338,7 +451,12 @@ class MPHKRecipe(BasicNewsRecipe):
|
||||
if dir is None:
|
||||
dir = self.output_dir
|
||||
if __UseChineseTitle__ == True:
|
||||
if __Region__ == 'Hong Kong':
|
||||
title = u'\u660e\u5831 (\u9999\u6e2f)'
|
||||
elif __Region__ == 'Vancouver':
|
||||
title = u'\u660e\u5831 (\u6eab\u54e5\u83ef)'
|
||||
elif __Region__ == 'Toronto':
|
||||
title = u'\u660e\u5831 (\u591a\u502b\u591a)'
|
||||
else:
|
||||
title = self.short_title()
|
||||
# if not generating a periodical, force date to apply in title
|
||||
|
594
recipes/ming_pao_toronto.recipe
Normal file
@ -0,0 +1,594 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010-2011, Eddie Lau'
|
||||
|
||||
# Region - Hong Kong, Vancouver, Toronto
|
||||
__Region__ = 'Toronto'
|
||||
# Users of Kindle 3 with limited system-level CJK support
|
||||
# please replace the following "True" with "False".
|
||||
__MakePeriodical__ = True
|
||||
# Turn below to true if your device supports display of CJK titles
|
||||
__UseChineseTitle__ = False
|
||||
# Set it to False if you want to skip images
|
||||
__KeepImages__ = True
|
||||
# (HK only) Turn below to true if you wish to use life.mingpao.com as the main article source
|
||||
__UseLife__ = True
|
||||
|
||||
|
||||
'''
|
||||
Change Log:
|
||||
2011/06/26: add fetching Vancouver and Toronto versions of the paper, also provide captions for images using life.mingpao fetch source
|
||||
provide options to remove all images in the file
|
||||
2011/05/12: switch the main parse source to life.mingpao.com, which has more photos on the article pages
|
||||
2011/03/06: add new articles for finance section, also a new section "Columns"
|
||||
2011/02/28: rearrange the sections
|
||||
[Disabled until Kindle has better CJK support and can remember last (section,article) read in Sections & Articles
|
||||
View] make it the same title if generating a periodical, so past issue will be automatically put into "Past Issues"
|
||||
folder in Kindle 3
|
||||
2011/02/20: skip duplicated links in finance section, put photos which may extend a whole page to the back of the articles
|
||||
clean up the indentation
|
||||
2010/12/07: add entertainment section, use newspaper front page as ebook cover, suppress date display in section list
|
||||
(to avoid wrong date display in case the user generates the ebook in a time zone different from HKT)
|
||||
2010/11/22: add English section, remove eco-news section which is not updated daily, correct
|
||||
ordering of articles
|
||||
2010/11/12: add news image and eco-news section
|
||||
2010/11/08: add parsing of finance section
|
||||
2010/11/06: temporary work-around for Kindle device having no capability to display unicode
|
||||
in section/article list.
|
||||
2010/10/31: skip repeated articles in section pages
|
||||
'''
|
||||
|
||||
import os, datetime, re
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
from contextlib import nested
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||
from calibre.ebooks.metadata.opf2 import OPFCreator
|
||||
from calibre.ebooks.metadata.toc import TOC
|
||||
from calibre.ebooks.metadata import MetaInformation
|
||||
|
||||
# MAIN CLASS
|
||||
class MPRecipe(BasicNewsRecipe):
|
||||
if __Region__ == 'Hong Kong':
|
||||
title = 'Ming Pao - Hong Kong'
|
||||
description = 'Hong Kong Chinese Newspaper (http://news.mingpao.com)'
|
||||
category = 'Chinese, News, Hong Kong'
|
||||
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} font>b {font-size:200%; font-weight:bold;}'
|
||||
masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif'
|
||||
keep_only_tags = [dict(name='h1'),
|
||||
dict(name='font', attrs={'style':['font-size:14pt; line-height:160%;']}), # for entertainment page title
|
||||
dict(name='font', attrs={'color':['AA0000']}), # for column articles title
|
||||
dict(attrs={'id':['newscontent']}), # entertainment and column page content
|
||||
dict(attrs={'id':['newscontent01','newscontent02']}),
|
||||
dict(attrs={'class':['photo']}),
|
||||
dict(name='table', attrs={'width':['100%'], 'border':['0'], 'cellspacing':['5'], 'cellpadding':['0']}), # content in printed version of life.mingpao.com
|
||||
dict(name='img', attrs={'width':['180'], 'alt':['按圖放大']}) # images for source from life.mingpao.com
|
||||
]
|
||||
if __KeepImages__:
|
||||
remove_tags = [dict(name='style'),
|
||||
dict(attrs={'id':['newscontent135']}), # for the finance page from mpfinance.com
|
||||
dict(name='font', attrs={'size':['2'], 'color':['666666']}), # article date in life.mingpao.com article
|
||||
#dict(name='table') # for content fetched from life.mingpao.com
|
||||
]
|
||||
else:
|
||||
remove_tags = [dict(name='style'),
|
||||
dict(attrs={'id':['newscontent135']}), # for the finance page from mpfinance.com
|
||||
dict(name='font', attrs={'size':['2'], 'color':['666666']}), # article date in life.mingpao.com article
|
||||
dict(name='img'),
|
||||
#dict(name='table') # for content fetched from life.mingpao.com
|
||||
]
|
||||
remove_attributes = ['width']
|
||||
preprocess_regexps = [
|
||||
(re.compile(r'<h5>', re.DOTALL|re.IGNORECASE),
|
||||
lambda match: '<h1>'),
|
||||
(re.compile(r'</h5>', re.DOTALL|re.IGNORECASE),
|
||||
lambda match: '</h1>'),
|
||||
(re.compile(r'<p><a href=.+?</a></p>', re.DOTALL|re.IGNORECASE), # for entertainment page
|
||||
lambda match: ''),
|
||||
# skip <br> after title in life.mingpao.com fetched article
|
||||
(re.compile(r"<div id='newscontent'><br>", re.DOTALL|re.IGNORECASE),
|
||||
lambda match: "<div id='newscontent'>"),
|
||||
(re.compile(r"<br><br></b>", re.DOTALL|re.IGNORECASE),
|
||||
lambda match: "</b>")
|
||||
]
|
||||
elif __Region__ == 'Vancouver':
|
||||
title = 'Ming Pao - Vancouver'
|
||||
description = 'Vancouver Chinese Newspaper (http://www.mingpaovan.com)'
|
||||
category = 'Chinese, News, Vancouver'
|
||||
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} b>font {font-size:200%; font-weight:bold;}'
|
||||
masthead_url = 'http://www.mingpaovan.com/image/mainlogo2_VAN2.gif'
|
||||
keep_only_tags = [dict(name='table', attrs={'width':['450'], 'border':['0'], 'cellspacing':['0'], 'cellpadding':['1']}),
|
||||
dict(name='table', attrs={'width':['450'], 'border':['0'], 'cellspacing':['3'], 'cellpadding':['3'], 'id':['tblContent3']}),
|
||||
dict(name='table', attrs={'width':['180'], 'border':['0'], 'cellspacing':['0'], 'cellpadding':['0'], 'bgcolor':['F0F0F0']}),
|
||||
]
|
||||
if __KeepImages__:
|
||||
remove_tags = [dict(name='img', attrs={'src':['../../../image/magnifier.gif']})] # the magnifier icon
|
||||
else:
|
||||
remove_tags = [dict(name='img')]
|
||||
remove_attributes = ['width']
|
||||
preprocess_regexps = [(re.compile(r' ', re.DOTALL|re.IGNORECASE),
|
||||
lambda match: ''),
|
||||
]
|
||||
elif __Region__ == 'Toronto':
|
||||
title = 'Ming Pao - Toronto'
|
||||
description = 'Toronto Chinese Newspaper (http://www.mingpaotor.com)'
|
||||
category = 'Chinese, News, Toronto'
|
||||
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} b>font {font-size:200%; font-weight:bold;}'
|
||||
masthead_url = 'http://www.mingpaotor.com/image/mainlogo2_TOR2.gif'
|
||||
keep_only_tags = [dict(name='table', attrs={'width':['450'], 'border':['0'], 'cellspacing':['0'], 'cellpadding':['1']}),
|
||||
dict(name='table', attrs={'width':['450'], 'border':['0'], 'cellspacing':['3'], 'cellpadding':['3'], 'id':['tblContent3']}),
|
||||
dict(name='table', attrs={'width':['180'], 'border':['0'], 'cellspacing':['0'], 'cellpadding':['0'], 'bgcolor':['F0F0F0']}),
|
||||
]
|
||||
if __KeepImages__:
|
||||
remove_tags = [dict(name='img', attrs={'src':['../../../image/magnifier.gif']})] # the magnifier icon
|
||||
else:
|
||||
remove_tags = [dict(name='img')]
|
||||
remove_attributes = ['width']
|
||||
preprocess_regexps = [(re.compile(r' ', re.DOTALL|re.IGNORECASE),
|
||||
lambda match: ''),
|
||||
]
|
||||
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
__author__ = 'Eddie Lau'
|
||||
publisher = 'MingPao'
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
no_stylesheets = True
|
||||
language = 'zh'
|
||||
encoding = 'Big5-HKSCS'
|
||||
recursions = 0
|
||||
conversion_options = {'linearize_tables':True}
|
||||
timefmt = ''
|
||||
|
||||
def image_url_processor(cls, baseurl, url):
|
||||
# trick: break the url at the first occurance of digit, add an additional
|
||||
# '_' at the front
|
||||
# not working, may need to move this to preprocess_html() method
|
||||
# minIdx = 10000
|
||||
# i0 = url.find('0')
|
||||
# if i0 >= 0 and i0 < minIdx:
|
||||
# minIdx = i0
|
||||
# i1 = url.find('1')
|
||||
# if i1 >= 0 and i1 < minIdx:
|
||||
# minIdx = i1
|
||||
# i2 = url.find('2')
|
||||
# if i2 >= 0 and i2 < minIdx:
|
||||
# minIdx = i2
|
||||
# i3 = url.find('3')
|
||||
# if i3 >= 0 and i0 < minIdx:
|
||||
# minIdx = i3
|
||||
# i4 = url.find('4')
|
||||
# if i4 >= 0 and i4 < minIdx:
|
||||
# minIdx = i4
|
||||
# i5 = url.find('5')
|
||||
# if i5 >= 0 and i5 < minIdx:
|
||||
# minIdx = i5
|
||||
# i6 = url.find('6')
|
||||
# if i6 >= 0 and i6 < minIdx:
|
||||
# minIdx = i6
|
||||
# i7 = url.find('7')
|
||||
# if i7 >= 0 and i7 < minIdx:
|
||||
# minIdx = i7
|
||||
# i8 = url.find('8')
|
||||
# if i8 >= 0 and i8 < minIdx:
|
||||
# minIdx = i8
|
||||
# i9 = url.find('9')
|
||||
# if i9 >= 0 and i9 < minIdx:
|
||||
# minIdx = i9
|
||||
return url
|
||||
|
||||
def get_dtlocal(self):
|
||||
dt_utc = datetime.datetime.utcnow()
|
||||
if __Region__ == 'Hong Kong':
|
||||
# convert UTC to local hk time - at HKT 5.30am, all news are available
|
||||
dt_local = dt_utc + datetime.timedelta(8.0/24) - datetime.timedelta(5.5/24)
|
||||
# dt_local = dt_utc.astimezone(pytz.timezone('Asia/Hong_Kong')) - datetime.timedelta(5.5/24)
|
||||
elif __Region__ == 'Vancouver':
|
||||
# convert UTC to local Vancouver time - at PST time 5.30am, all news are available
|
||||
dt_local = dt_utc + datetime.timedelta(-8.0/24) - datetime.timedelta(5.5/24)
|
||||
#dt_local = dt_utc.astimezone(pytz.timezone('America/Vancouver')) - datetime.timedelta(5.5/24)
|
||||
elif __Region__ == 'Toronto':
|
||||
# convert UTC to local Toronto time - at EST time 8.30am, all news are available
|
||||
dt_local = dt_utc + datetime.timedelta(-5.0/24) - datetime.timedelta(8.5/24)
|
||||
#dt_local = dt_utc.astimezone(pytz.timezone('America/Toronto')) - datetime.timedelta(8.5/24)
|
||||
return dt_local
|
||||
|
||||
def get_fetchdate(self):
|
||||
return self.get_dtlocal().strftime("%Y%m%d")
|
||||
|
||||
def get_fetchformatteddate(self):
|
||||
return self.get_dtlocal().strftime("%Y-%m-%d")
|
||||
|
||||
def get_fetchday(self):
|
||||
return self.get_dtlocal().strftime("%d")
|
||||
|
||||
def get_cover_url(self):
|
||||
if __Region__ == 'Hong Kong':
|
||||
cover = 'http://news.mingpao.com/' + self.get_fetchdate() + '/' + self.get_fetchdate() + '_' + self.get_fetchday() + 'gacov.jpg'
|
||||
elif __Region__ == 'Vancouver':
|
||||
cover = 'http://www.mingpaovan.com/ftp/News/' + self.get_fetchdate() + '/' + self.get_fetchday() + 'pgva1s.jpg'
|
||||
elif __Region__ == 'Toronto':
|
||||
cover = 'http://www.mingpaotor.com/ftp/News/' + self.get_fetchdate() + '/' + self.get_fetchday() + 'pgtas.jpg'
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
try:
|
||||
br.open(cover)
|
||||
except:
|
||||
cover = None
|
||||
return cover
|
||||
|
||||
def parse_index(self):
|
||||
feeds = []
|
||||
dateStr = self.get_fetchdate()
|
||||
|
||||
if __Region__ == 'Hong Kong':
|
||||
if __UseLife__:
|
||||
for title, url, keystr in [(u'\u8981\u805e Headline', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalga', 'nal'),
|
||||
(u'\u6e2f\u805e Local', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalgb', 'nal'),
|
||||
(u'\u6559\u80b2 Education', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalgf', 'nal'),
|
||||
(u'\u793e\u8a55/\u7b46\u9663 Editorial', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr', 'nal'),
|
||||
(u'\u8ad6\u58c7 Forum', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalfa', 'nal'),
|
||||
(u'\u4e2d\u570b China', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalca', 'nal'),
|
||||
(u'\u570b\u969b World', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalta', 'nal'),
|
||||
(u'\u7d93\u6fdf Finance', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea', 'nal'),
|
||||
(u'\u9ad4\u80b2 Sport', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalsp', 'nal'),
|
||||
(u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal'),
|
||||
(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')]:
|
||||
articles = self.parse_section2(url, keystr)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
|
||||
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
|
||||
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
|
||||
articles = self.parse_section(url)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
else:
|
||||
for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'),
|
||||
(u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'),
|
||||
(u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm')]:
|
||||
articles = self.parse_section(url)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
|
||||
# special- editorial
|
||||
ed_articles = self.parse_ed_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr')
|
||||
if ed_articles:
|
||||
feeds.append((u'\u793e\u8a55/\u7b46\u9663 Editorial', ed_articles))
|
||||
|
||||
for title, url in [(u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'),
|
||||
(u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm'),
|
||||
(u'\u570b\u969b World', 'http://news.mingpao.com/' + dateStr + '/taindex.htm')]:
|
||||
articles = self.parse_section(url)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
|
||||
# special - finance
|
||||
#fin_articles = self.parse_fin_section('http://www.mpfinance.com/htm/Finance/' + dateStr + '/News/ea,eb,ecindex.htm')
|
||||
fin_articles = self.parse_fin_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea')
|
||||
if fin_articles:
|
||||
feeds.append((u'\u7d93\u6fdf Finance', fin_articles))
|
||||
|
||||
for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
|
||||
(u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]:
|
||||
articles = self.parse_section(url)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
|
||||
# special - entertainment
|
||||
ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm')
|
||||
if ent_articles:
|
||||
feeds.append((u'\u5f71\u8996 Film/TV', ent_articles))
|
||||
|
||||
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
|
||||
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
|
||||
articles = self.parse_section(url)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
|
||||
|
||||
# special- columns
|
||||
col_articles = self.parse_col_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn')
|
||||
if col_articles:
|
||||
feeds.append((u'\u5c08\u6b04 Columns', col_articles))
|
||||
elif __Region__ == 'Vancouver':
|
||||
for title, url in [(u'\u8981\u805e Headline', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VAindex.htm'),
|
||||
(u'\u52a0\u570b Canada', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VBindex.htm'),
|
||||
(u'\u793e\u5340 Local', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VDindex.htm'),
|
||||
(u'\u6e2f\u805e Hong Kong', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/HK-VGindex.htm'),
|
||||
(u'\u570b\u969b World', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VTindex.htm'),
|
||||
(u'\u4e2d\u570b China', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VCindex.htm'),
|
||||
(u'\u7d93\u6fdf Economics', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VEindex.htm'),
|
||||
(u'\u9ad4\u80b2 Sports', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VSindex.htm'),
|
||||
(u'\u5f71\u8996 Film/TV', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/HK-MAindex.htm'),
|
||||
(u'\u526f\u520a Supplements', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/WWindex.htm'),]:
|
||||
articles = self.parse_section3(url, 'http://www.mingpaovan.com/')
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
elif __Region__ == 'Toronto':
|
||||
for title, url in [(u'\u8981\u805e Headline', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TAindex.htm'),
|
||||
(u'\u52a0\u570b Canada', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TDindex.htm'),
|
||||
(u'\u793e\u5340 Local', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TFindex.htm'),
|
||||
(u'\u4e2d\u570b China', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TCAindex.htm'),
|
||||
(u'\u570b\u969b World', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TTAindex.htm'),
|
||||
(u'\u6e2f\u805e Hong Kong', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/HK-GAindex.htm'),
|
||||
(u'\u7d93\u6fdf Economics', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/THindex.htm'),
|
||||
(u'\u9ad4\u80b2 Sports', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TSindex.htm'),
|
||||
(u'\u5f71\u8996 Film/TV', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/HK-MAindex.htm'),
|
||||
(u'\u526f\u520a Supplements', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/WWindex.htm'),]:
|
||||
articles = self.parse_section3(url, 'http://www.mingpaotor.com/')
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
return feeds
|
||||
|
||||
# parse from news.mingpao.com
|
||||
def parse_section(self, url):
|
||||
dateStr = self.get_fetchdate()
|
||||
soup = self.index_to_soup(url)
|
||||
divs = soup.findAll(attrs={'class': ['bullet','bullet_grey']})
|
||||
current_articles = []
|
||||
included_urls = []
|
||||
divs.reverse()
|
||||
for i in divs:
|
||||
a = i.find('a', href = True)
|
||||
title = self.tag_to_string(a)
|
||||
url = a.get('href', False)
|
||||
url = 'http://news.mingpao.com/' + dateStr + '/' +url
|
||||
if url not in included_urls and url.rfind('Redirect') == -1:
|
||||
current_articles.append({'title': title, 'url': url, 'description':'', 'date':''})
|
||||
included_urls.append(url)
|
||||
current_articles.reverse()
|
||||
return current_articles
|
||||
|
||||
# parse from life.mingpao.com
|
||||
def parse_section2(self, url, keystr):
|
||||
self.get_fetchdate()
|
||||
soup = self.index_to_soup(url)
|
||||
a = soup.findAll('a', href=True)
|
||||
a.reverse()
|
||||
current_articles = []
|
||||
included_urls = []
|
||||
for i in a:
|
||||
title = self.tag_to_string(i)
|
||||
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
|
||||
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind(keystr) == -1):
|
||||
url = url.replace('dailynews3.cfm', 'dailynews3a.cfm') # use printed version of the article
|
||||
current_articles.append({'title': title, 'url': url, 'description': ''})
|
||||
included_urls.append(url)
|
||||
current_articles.reverse()
|
||||
return current_articles
|
||||
|
||||
# parse from www.mingpaovan.com
|
||||
def parse_section3(self, url, baseUrl):
|
||||
self.get_fetchdate()
|
||||
soup = self.index_to_soup(url)
|
||||
divs = soup.findAll(attrs={'class': ['ListContentLargeLink']})
|
||||
current_articles = []
|
||||
included_urls = []
|
||||
divs.reverse()
|
||||
for i in divs:
|
||||
title = self.tag_to_string(i)
|
||||
urlstr = i.get('href', False)
|
||||
urlstr = baseUrl + '/' + urlstr.replace('../../../', '')
|
||||
if urlstr not in included_urls:
|
||||
current_articles.append({'title': title, 'url': urlstr, 'description': '', 'date': ''})
|
||||
included_urls.append(urlstr)
|
||||
current_articles.reverse()
|
||||
return current_articles
|
||||
|
||||
def parse_ed_section(self, url):
|
||||
self.get_fetchdate()
|
||||
soup = self.index_to_soup(url)
|
||||
a = soup.findAll('a', href=True)
|
||||
a.reverse()
|
||||
current_articles = []
|
||||
included_urls = []
|
||||
for i in a:
|
||||
title = self.tag_to_string(i)
|
||||
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
|
||||
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('nal') == -1):
|
||||
current_articles.append({'title': title, 'url': url, 'description': ''})
|
||||
included_urls.append(url)
|
||||
current_articles.reverse()
|
||||
return current_articles
|
||||
|
||||
def parse_fin_section(self, url):
|
||||
self.get_fetchdate()
|
||||
soup = self.index_to_soup(url)
|
||||
a = soup.findAll('a', href= True)
|
||||
current_articles = []
|
||||
included_urls = []
|
||||
for i in a:
|
||||
#url = 'http://www.mpfinance.com/cfm/' + i.get('href', False)
|
||||
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
|
||||
#if url not in included_urls and not url.rfind(dateStr) == -1 and url.rfind('index') == -1:
|
||||
if url not in included_urls and (not url.rfind('txt') == -1) and (not url.rfind('nal') == -1):
|
||||
title = self.tag_to_string(i)
|
||||
current_articles.append({'title': title, 'url': url, 'description':''})
|
||||
included_urls.append(url)
|
||||
return current_articles
|
||||
|
||||
def parse_ent_section(self, url):
|
||||
self.get_fetchdate()
|
||||
soup = self.index_to_soup(url)
|
||||
a = soup.findAll('a', href=True)
|
||||
a.reverse()
|
||||
current_articles = []
|
||||
included_urls = []
|
||||
for i in a:
|
||||
title = self.tag_to_string(i)
|
||||
url = 'http://ol.mingpao.com/cfm/' + i.get('href', False)
|
||||
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('star') == -1):
|
||||
current_articles.append({'title': title, 'url': url, 'description': ''})
|
||||
included_urls.append(url)
|
||||
current_articles.reverse()
|
||||
return current_articles
|
||||
|
||||
def parse_col_section(self, url):
|
||||
self.get_fetchdate()
|
||||
soup = self.index_to_soup(url)
|
||||
a = soup.findAll('a', href=True)
|
||||
a.reverse()
|
||||
current_articles = []
|
||||
included_urls = []
|
||||
for i in a:
|
||||
title = self.tag_to_string(i)
|
||||
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
|
||||
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('ncl') == -1):
|
||||
current_articles.append({'title': title, 'url': url, 'description': ''})
|
||||
included_urls.append(url)
|
||||
current_articles.reverse()
|
||||
return current_articles
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
for item in soup.findAll(style=True):
|
||||
del item['width']
|
||||
for item in soup.findAll(stype=True):
|
||||
del item['absmiddle']
|
||||
return soup
|
||||
|
||||
def create_opf(self, feeds, dir=None):
|
||||
if dir is None:
|
||||
dir = self.output_dir
|
||||
if __UseChineseTitle__ == True:
|
||||
if __Region__ == 'Hong Kong':
|
||||
title = u'\u660e\u5831 (\u9999\u6e2f)'
|
||||
elif __Region__ == 'Vancouver':
|
||||
title = u'\u660e\u5831 (\u6eab\u54e5\u83ef)'
|
||||
elif __Region__ == 'Toronto':
|
||||
title = u'\u660e\u5831 (\u591a\u502b\u591a)'
|
||||
else:
|
||||
title = self.short_title()
|
||||
# if not generating a periodical, force date to apply in title
|
||||
if __MakePeriodical__ == False:
|
||||
title = title + ' ' + self.get_fetchformatteddate()
|
||||
if True:
|
||||
mi = MetaInformation(title, [self.publisher])
|
||||
mi.publisher = self.publisher
|
||||
mi.author_sort = self.publisher
|
||||
if __MakePeriodical__ == True:
|
||||
mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
|
||||
else:
|
||||
mi.publication_type = self.publication_type+':'+self.short_title()
|
||||
#mi.timestamp = nowf()
|
||||
mi.timestamp = self.get_dtlocal()
|
||||
mi.comments = self.description
|
||||
if not isinstance(mi.comments, unicode):
|
||||
mi.comments = mi.comments.decode('utf-8', 'replace')
|
||||
#mi.pubdate = nowf()
|
||||
mi.pubdate = self.get_dtlocal()
|
||||
opf_path = os.path.join(dir, 'index.opf')
|
||||
ncx_path = os.path.join(dir, 'index.ncx')
|
||||
opf = OPFCreator(dir, mi)
|
||||
# Add mastheadImage entry to <guide> section
|
||||
mp = getattr(self, 'masthead_path', None)
|
||||
if mp is not None and os.access(mp, os.R_OK):
|
||||
from calibre.ebooks.metadata.opf2 import Guide
|
||||
ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
|
||||
ref.type = 'masthead'
|
||||
ref.title = 'Masthead Image'
|
||||
opf.guide.append(ref)
|
||||
|
||||
manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
|
||||
manifest.append(os.path.join(dir, 'index.html'))
|
||||
manifest.append(os.path.join(dir, 'index.ncx'))
|
||||
|
||||
# Get cover
|
||||
cpath = getattr(self, 'cover_path', None)
|
||||
if cpath is None:
|
||||
pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
|
||||
if self.default_cover(pf):
|
||||
cpath = pf.name
|
||||
if cpath is not None and os.access(cpath, os.R_OK):
|
||||
opf.cover = cpath
|
||||
manifest.append(cpath)
|
||||
|
||||
# Get masthead
|
||||
mpath = getattr(self, 'masthead_path', None)
|
||||
if mpath is not None and os.access(mpath, os.R_OK):
|
||||
manifest.append(mpath)
|
||||
|
||||
opf.create_manifest_from_files_in(manifest)
|
||||
for mani in opf.manifest:
|
||||
if mani.path.endswith('.ncx'):
|
||||
mani.id = 'ncx'
|
||||
if mani.path.endswith('mastheadImage.jpg'):
|
||||
mani.id = 'masthead-image'
|
||||
entries = ['index.html']
|
||||
toc = TOC(base_path=dir)
|
||||
self.play_order_counter = 0
|
||||
self.play_order_map = {}
|
||||
|
||||
def feed_index(num, parent):
|
||||
f = feeds[num]
|
||||
for j, a in enumerate(f):
|
||||
if getattr(a, 'downloaded', False):
|
||||
adir = 'feed_%d/article_%d/'%(num, j)
|
||||
auth = a.author
|
||||
if not auth:
|
||||
auth = None
|
||||
desc = a.text_summary
|
||||
if not desc:
|
||||
desc = None
|
||||
else:
|
||||
desc = self.description_limiter(desc)
|
||||
entries.append('%sindex.html'%adir)
|
||||
po = self.play_order_map.get(entries[-1], None)
|
||||
if po is None:
|
||||
self.play_order_counter += 1
|
||||
po = self.play_order_counter
|
||||
parent.add_item('%sindex.html'%adir, None, a.title if a.title else _('Untitled Article'),
|
||||
play_order=po, author=auth, description=desc)
|
||||
last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
|
||||
for sp in a.sub_pages:
|
||||
prefix = os.path.commonprefix([opf_path, sp])
|
||||
relp = sp[len(prefix):]
|
||||
entries.append(relp.replace(os.sep, '/'))
|
||||
last = sp
|
||||
|
||||
if os.path.exists(last):
|
||||
with open(last, 'rb') as fi:
|
||||
src = fi.read().decode('utf-8')
|
||||
soup = BeautifulSoup(src)
|
||||
body = soup.find('body')
|
||||
if body is not None:
|
||||
prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last))))
|
||||
templ = self.navbar.generate(True, num, j, len(f),
|
||||
not self.has_single_feed,
|
||||
a.orig_url, self.publisher, prefix=prefix,
|
||||
center=self.center_navbar)
|
||||
elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div')
|
||||
body.insert(len(body.contents), elem)
|
||||
with open(last, 'wb') as fi:
|
||||
fi.write(unicode(soup).encode('utf-8'))
|
||||
if len(feeds) == 0:
|
||||
raise Exception('All feeds are empty, aborting.')
|
||||
|
||||
if len(feeds) > 1:
|
||||
for i, f in enumerate(feeds):
|
||||
entries.append('feed_%d/index.html'%i)
|
||||
po = self.play_order_map.get(entries[-1], None)
|
||||
if po is None:
|
||||
self.play_order_counter += 1
|
||||
po = self.play_order_counter
|
||||
auth = getattr(f, 'author', None)
|
||||
if not auth:
|
||||
auth = None
|
||||
desc = getattr(f, 'description', None)
|
||||
if not desc:
|
||||
desc = None
|
||||
feed_index(i, toc.add_item('feed_%d/index.html'%i, None,
|
||||
f.title, play_order=po, description=desc, author=auth))
|
||||
|
||||
else:
|
||||
entries.append('feed_%d/index.html'%0)
|
||||
feed_index(0, toc)
|
||||
|
||||
for i, p in enumerate(entries):
|
||||
entries[i] = os.path.join(dir, p.replace('/', os.sep))
|
||||
opf.create_spine(entries)
|
||||
opf.set_toc(toc)
|
||||
|
||||
with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
|
||||
opf.render(opf_file, ncx_file)
|
||||
|
594
recipes/ming_pao_vancouver.recipe
Normal file
@ -0,0 +1,594 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010-2011, Eddie Lau'
|
||||
|
||||
# Region - Hong Kong, Vancouver, Toronto
|
||||
__Region__ = 'Vancouver'
|
||||
# Users of Kindle 3 with limited system-level CJK support
|
||||
# please replace the following "True" with "False".
|
||||
__MakePeriodical__ = True
|
||||
# Turn below to true if your device supports display of CJK titles
|
||||
__UseChineseTitle__ = False
|
||||
# Set it to False if you want to skip images
|
||||
__KeepImages__ = True
|
||||
# (HK only) Turn below to true if you wish to use life.mingpao.com as the main article source
|
||||
__UseLife__ = True
|
||||
|
||||
|
||||
'''
|
||||
Change Log:
|
||||
2011/06/26: add fetching Vancouver and Toronto versions of the paper, also provide captions for images using life.mingpao fetch source
|
||||
provide options to remove all images in the file
|
||||
2011/05/12: switch the main parse source to life.mingpao.com, which has more photos on the article pages
|
||||
2011/03/06: add new articles for finance section, also a new section "Columns"
|
||||
2011/02/28: rearrange the sections
|
||||
[Disabled until Kindle has better CJK support and can remember last (section,article) read in Sections & Articles
|
||||
View] make it the same title if generating a periodical, so past issue will be automatically put into "Past Issues"
|
||||
folder in Kindle 3
|
||||
2011/02/20: skip duplicated links in finance section, put photos which may extend a whole page to the back of the articles
|
||||
clean up the indentation
|
||||
2010/12/07: add entertainment section, use newspaper front page as ebook cover, suppress date display in section list
|
||||
(to avoid wrong date display in case the user generates the ebook in a time zone different from HKT)
|
||||
2010/11/22: add English section, remove eco-news section which is not updated daily, correct
|
||||
ordering of articles
|
||||
2010/11/12: add news image and eco-news section
|
||||
2010/11/08: add parsing of finance section
|
||||
2010/11/06: temporary work-around for Kindle device having no capability to display unicode
|
||||
in section/article list.
|
||||
2010/10/31: skip repeated articles in section pages
|
||||
'''
|
||||
|
||||
import os, datetime, re
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
from contextlib import nested
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||
from calibre.ebooks.metadata.opf2 import OPFCreator
|
||||
from calibre.ebooks.metadata.toc import TOC
|
||||
from calibre.ebooks.metadata import MetaInformation
|
||||
|
||||
# MAIN CLASS
|
||||
class MPRecipe(BasicNewsRecipe):
|
||||
if __Region__ == 'Hong Kong':
|
||||
title = 'Ming Pao - Hong Kong'
|
||||
description = 'Hong Kong Chinese Newspaper (http://news.mingpao.com)'
|
||||
category = 'Chinese, News, Hong Kong'
|
||||
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} font>b {font-size:200%; font-weight:bold;}'
|
||||
masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif'
|
||||
keep_only_tags = [dict(name='h1'),
|
||||
dict(name='font', attrs={'style':['font-size:14pt; line-height:160%;']}), # for entertainment page title
|
||||
dict(name='font', attrs={'color':['AA0000']}), # for column articles title
|
||||
dict(attrs={'id':['newscontent']}), # entertainment and column page content
|
||||
dict(attrs={'id':['newscontent01','newscontent02']}),
|
||||
dict(attrs={'class':['photo']}),
|
||||
dict(name='table', attrs={'width':['100%'], 'border':['0'], 'cellspacing':['5'], 'cellpadding':['0']}), # content in printed version of life.mingpao.com
|
||||
dict(name='img', attrs={'width':['180'], 'alt':['按圖放大']}) # images for source from life.mingpao.com
|
||||
]
|
||||
if __KeepImages__:
|
||||
remove_tags = [dict(name='style'),
|
||||
dict(attrs={'id':['newscontent135']}), # for the finance page from mpfinance.com
|
||||
dict(name='font', attrs={'size':['2'], 'color':['666666']}), # article date in life.mingpao.com article
|
||||
#dict(name='table') # for content fetched from life.mingpao.com
|
||||
]
|
||||
else:
|
||||
remove_tags = [dict(name='style'),
|
||||
dict(attrs={'id':['newscontent135']}), # for the finance page from mpfinance.com
|
||||
dict(name='font', attrs={'size':['2'], 'color':['666666']}), # article date in life.mingpao.com article
|
||||
dict(name='img'),
|
||||
#dict(name='table') # for content fetched from life.mingpao.com
|
||||
]
|
||||
remove_attributes = ['width']
|
||||
preprocess_regexps = [
|
||||
(re.compile(r'<h5>', re.DOTALL|re.IGNORECASE),
|
||||
lambda match: '<h1>'),
|
||||
(re.compile(r'</h5>', re.DOTALL|re.IGNORECASE),
|
||||
lambda match: '</h1>'),
|
||||
(re.compile(r'<p><a href=.+?</a></p>', re.DOTALL|re.IGNORECASE), # for entertainment page
|
||||
lambda match: ''),
|
||||
# skip <br> after title in life.mingpao.com fetched article
|
||||
(re.compile(r"<div id='newscontent'><br>", re.DOTALL|re.IGNORECASE),
|
||||
lambda match: "<div id='newscontent'>"),
|
||||
(re.compile(r"<br><br></b>", re.DOTALL|re.IGNORECASE),
|
||||
lambda match: "</b>")
|
||||
]
|
||||
elif __Region__ == 'Vancouver':
|
||||
title = 'Ming Pao - Vancouver'
|
||||
description = 'Vancouver Chinese Newspaper (http://www.mingpaovan.com)'
|
||||
category = 'Chinese, News, Vancouver'
|
||||
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} b>font {font-size:200%; font-weight:bold;}'
|
||||
masthead_url = 'http://www.mingpaovan.com/image/mainlogo2_VAN2.gif'
|
||||
keep_only_tags = [dict(name='table', attrs={'width':['450'], 'border':['0'], 'cellspacing':['0'], 'cellpadding':['1']}),
|
||||
dict(name='table', attrs={'width':['450'], 'border':['0'], 'cellspacing':['3'], 'cellpadding':['3'], 'id':['tblContent3']}),
|
||||
dict(name='table', attrs={'width':['180'], 'border':['0'], 'cellspacing':['0'], 'cellpadding':['0'], 'bgcolor':['F0F0F0']}),
|
||||
]
|
||||
if __KeepImages__:
|
||||
remove_tags = [dict(name='img', attrs={'src':['../../../image/magnifier.gif']})] # the magnifier icon
|
||||
else:
|
||||
remove_tags = [dict(name='img')]
|
||||
remove_attributes = ['width']
|
||||
preprocess_regexps = [(re.compile(r' ', re.DOTALL|re.IGNORECASE),
|
||||
lambda match: ''),
|
||||
]
|
||||
elif __Region__ == 'Toronto':
|
||||
title = 'Ming Pao - Toronto'
|
||||
description = 'Toronto Chinese Newspaper (http://www.mingpaotor.com)'
|
||||
category = 'Chinese, News, Toronto'
|
||||
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} b>font {font-size:200%; font-weight:bold;}'
|
||||
masthead_url = 'http://www.mingpaotor.com/image/mainlogo2_TOR2.gif'
|
||||
keep_only_tags = [dict(name='table', attrs={'width':['450'], 'border':['0'], 'cellspacing':['0'], 'cellpadding':['1']}),
|
||||
dict(name='table', attrs={'width':['450'], 'border':['0'], 'cellspacing':['3'], 'cellpadding':['3'], 'id':['tblContent3']}),
|
||||
dict(name='table', attrs={'width':['180'], 'border':['0'], 'cellspacing':['0'], 'cellpadding':['0'], 'bgcolor':['F0F0F0']}),
|
||||
]
|
||||
if __KeepImages__:
|
||||
remove_tags = [dict(name='img', attrs={'src':['../../../image/magnifier.gif']})] # the magnifier icon
|
||||
else:
|
||||
remove_tags = [dict(name='img')]
|
||||
remove_attributes = ['width']
|
||||
preprocess_regexps = [(re.compile(r' ', re.DOTALL|re.IGNORECASE),
|
||||
lambda match: ''),
|
||||
]
|
||||
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
__author__ = 'Eddie Lau'
|
||||
publisher = 'MingPao'
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
no_stylesheets = True
|
||||
language = 'zh'
|
||||
encoding = 'Big5-HKSCS'
|
||||
recursions = 0
|
||||
conversion_options = {'linearize_tables':True}
|
||||
timefmt = ''
|
||||
|
||||
def image_url_processor(cls, baseurl, url):
|
||||
# trick: break the url at the first occurance of digit, add an additional
|
||||
# '_' at the front
|
||||
# not working, may need to move this to preprocess_html() method
|
||||
# minIdx = 10000
|
||||
# i0 = url.find('0')
|
||||
# if i0 >= 0 and i0 < minIdx:
|
||||
# minIdx = i0
|
||||
# i1 = url.find('1')
|
||||
# if i1 >= 0 and i1 < minIdx:
|
||||
# minIdx = i1
|
||||
# i2 = url.find('2')
|
||||
# if i2 >= 0 and i2 < minIdx:
|
||||
# minIdx = i2
|
||||
# i3 = url.find('3')
|
||||
# if i3 >= 0 and i0 < minIdx:
|
||||
# minIdx = i3
|
||||
# i4 = url.find('4')
|
||||
# if i4 >= 0 and i4 < minIdx:
|
||||
# minIdx = i4
|
||||
# i5 = url.find('5')
|
||||
# if i5 >= 0 and i5 < minIdx:
|
||||
# minIdx = i5
|
||||
# i6 = url.find('6')
|
||||
# if i6 >= 0 and i6 < minIdx:
|
||||
# minIdx = i6
|
||||
# i7 = url.find('7')
|
||||
# if i7 >= 0 and i7 < minIdx:
|
||||
# minIdx = i7
|
||||
# i8 = url.find('8')
|
||||
# if i8 >= 0 and i8 < minIdx:
|
||||
# minIdx = i8
|
||||
# i9 = url.find('9')
|
||||
# if i9 >= 0 and i9 < minIdx:
|
||||
# minIdx = i9
|
||||
return url
|
||||
|
||||
def get_dtlocal(self):
|
||||
dt_utc = datetime.datetime.utcnow()
|
||||
if __Region__ == 'Hong Kong':
|
||||
# convert UTC to local hk time - at HKT 5.30am, all news are available
|
||||
dt_local = dt_utc + datetime.timedelta(8.0/24) - datetime.timedelta(5.5/24)
|
||||
# dt_local = dt_utc.astimezone(pytz.timezone('Asia/Hong_Kong')) - datetime.timedelta(5.5/24)
|
||||
elif __Region__ == 'Vancouver':
|
||||
# convert UTC to local Vancouver time - at PST time 5.30am, all news are available
|
||||
dt_local = dt_utc + datetime.timedelta(-8.0/24) - datetime.timedelta(5.5/24)
|
||||
#dt_local = dt_utc.astimezone(pytz.timezone('America/Vancouver')) - datetime.timedelta(5.5/24)
|
||||
elif __Region__ == 'Toronto':
|
||||
# convert UTC to local Toronto time - at EST time 8.30am, all news are available
|
||||
dt_local = dt_utc + datetime.timedelta(-5.0/24) - datetime.timedelta(8.5/24)
|
||||
#dt_local = dt_utc.astimezone(pytz.timezone('America/Toronto')) - datetime.timedelta(8.5/24)
|
||||
return dt_local
|
||||
|
||||
def get_fetchdate(self):
|
||||
return self.get_dtlocal().strftime("%Y%m%d")
|
||||
|
||||
def get_fetchformatteddate(self):
|
||||
return self.get_dtlocal().strftime("%Y-%m-%d")
|
||||
|
||||
def get_fetchday(self):
|
||||
return self.get_dtlocal().strftime("%d")
|
||||
|
||||
def get_cover_url(self):
|
||||
if __Region__ == 'Hong Kong':
|
||||
cover = 'http://news.mingpao.com/' + self.get_fetchdate() + '/' + self.get_fetchdate() + '_' + self.get_fetchday() + 'gacov.jpg'
|
||||
elif __Region__ == 'Vancouver':
|
||||
cover = 'http://www.mingpaovan.com/ftp/News/' + self.get_fetchdate() + '/' + self.get_fetchday() + 'pgva1s.jpg'
|
||||
elif __Region__ == 'Toronto':
|
||||
cover = 'http://www.mingpaotor.com/ftp/News/' + self.get_fetchdate() + '/' + self.get_fetchday() + 'pgtas.jpg'
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
try:
|
||||
br.open(cover)
|
||||
except:
|
||||
cover = None
|
||||
return cover
|
||||
|
||||
def parse_index(self):
|
||||
feeds = []
|
||||
dateStr = self.get_fetchdate()
|
||||
|
||||
if __Region__ == 'Hong Kong':
|
||||
if __UseLife__:
|
||||
for title, url, keystr in [(u'\u8981\u805e Headline', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalga', 'nal'),
|
||||
(u'\u6e2f\u805e Local', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalgb', 'nal'),
|
||||
(u'\u6559\u80b2 Education', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalgf', 'nal'),
|
||||
(u'\u793e\u8a55/\u7b46\u9663 Editorial', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr', 'nal'),
|
||||
(u'\u8ad6\u58c7 Forum', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalfa', 'nal'),
|
||||
(u'\u4e2d\u570b China', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalca', 'nal'),
|
||||
(u'\u570b\u969b World', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalta', 'nal'),
|
||||
(u'\u7d93\u6fdf Finance', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea', 'nal'),
|
||||
(u'\u9ad4\u80b2 Sport', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalsp', 'nal'),
|
||||
(u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal'),
|
||||
(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')]:
|
||||
articles = self.parse_section2(url, keystr)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
|
||||
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
|
||||
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
|
||||
articles = self.parse_section(url)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
else:
|
||||
for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'),
|
||||
(u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'),
|
||||
(u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm')]:
|
||||
articles = self.parse_section(url)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
|
||||
# special- editorial
|
||||
ed_articles = self.parse_ed_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr')
|
||||
if ed_articles:
|
||||
feeds.append((u'\u793e\u8a55/\u7b46\u9663 Editorial', ed_articles))
|
||||
|
||||
for title, url in [(u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'),
|
||||
(u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm'),
|
||||
(u'\u570b\u969b World', 'http://news.mingpao.com/' + dateStr + '/taindex.htm')]:
|
||||
articles = self.parse_section(url)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
|
||||
# special - finance
|
||||
#fin_articles = self.parse_fin_section('http://www.mpfinance.com/htm/Finance/' + dateStr + '/News/ea,eb,ecindex.htm')
|
||||
fin_articles = self.parse_fin_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea')
|
||||
if fin_articles:
|
||||
feeds.append((u'\u7d93\u6fdf Finance', fin_articles))
|
||||
|
||||
for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
|
||||
(u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]:
|
||||
articles = self.parse_section(url)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
|
||||
# special - entertainment
|
||||
ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm')
|
||||
if ent_articles:
|
||||
feeds.append((u'\u5f71\u8996 Film/TV', ent_articles))
|
||||
|
||||
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
|
||||
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
|
||||
articles = self.parse_section(url)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
|
||||
|
||||
# special- columns
|
||||
col_articles = self.parse_col_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn')
|
||||
if col_articles:
|
||||
feeds.append((u'\u5c08\u6b04 Columns', col_articles))
|
||||
elif __Region__ == 'Vancouver':
|
||||
for title, url in [(u'\u8981\u805e Headline', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VAindex.htm'),
|
||||
(u'\u52a0\u570b Canada', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VBindex.htm'),
|
||||
(u'\u793e\u5340 Local', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VDindex.htm'),
|
||||
(u'\u6e2f\u805e Hong Kong', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/HK-VGindex.htm'),
|
||||
(u'\u570b\u969b World', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VTindex.htm'),
|
||||
(u'\u4e2d\u570b China', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VCindex.htm'),
|
||||
(u'\u7d93\u6fdf Economics', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VEindex.htm'),
|
||||
(u'\u9ad4\u80b2 Sports', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VSindex.htm'),
|
||||
(u'\u5f71\u8996 Film/TV', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/HK-MAindex.htm'),
|
||||
(u'\u526f\u520a Supplements', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/WWindex.htm'),]:
|
||||
articles = self.parse_section3(url, 'http://www.mingpaovan.com/')
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
elif __Region__ == 'Toronto':
|
||||
for title, url in [(u'\u8981\u805e Headline', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TAindex.htm'),
|
||||
(u'\u52a0\u570b Canada', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TDindex.htm'),
|
||||
(u'\u793e\u5340 Local', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TFindex.htm'),
|
||||
(u'\u4e2d\u570b China', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TCAindex.htm'),
|
||||
(u'\u570b\u969b World', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TTAindex.htm'),
|
||||
(u'\u6e2f\u805e Hong Kong', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/HK-GAindex.htm'),
|
||||
(u'\u7d93\u6fdf Economics', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/THindex.htm'),
|
||||
(u'\u9ad4\u80b2 Sports', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TSindex.htm'),
|
||||
(u'\u5f71\u8996 Film/TV', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/HK-MAindex.htm'),
|
||||
(u'\u526f\u520a Supplements', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/WWindex.htm'),]:
|
||||
articles = self.parse_section3(url, 'http://www.mingpaotor.com/')
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
return feeds
|
||||
|
||||
# parse from news.mingpao.com
|
||||
def parse_section(self, url):
|
||||
dateStr = self.get_fetchdate()
|
||||
soup = self.index_to_soup(url)
|
||||
divs = soup.findAll(attrs={'class': ['bullet','bullet_grey']})
|
||||
current_articles = []
|
||||
included_urls = []
|
||||
divs.reverse()
|
||||
for i in divs:
|
||||
a = i.find('a', href = True)
|
||||
title = self.tag_to_string(a)
|
||||
url = a.get('href', False)
|
||||
url = 'http://news.mingpao.com/' + dateStr + '/' +url
|
||||
if url not in included_urls and url.rfind('Redirect') == -1:
|
||||
current_articles.append({'title': title, 'url': url, 'description':'', 'date':''})
|
||||
included_urls.append(url)
|
||||
current_articles.reverse()
|
||||
return current_articles
|
||||
|
||||
# parse from life.mingpao.com
|
||||
def parse_section2(self, url, keystr):
|
||||
self.get_fetchdate()
|
||||
soup = self.index_to_soup(url)
|
||||
a = soup.findAll('a', href=True)
|
||||
a.reverse()
|
||||
current_articles = []
|
||||
included_urls = []
|
||||
for i in a:
|
||||
title = self.tag_to_string(i)
|
||||
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
|
||||
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind(keystr) == -1):
|
||||
url = url.replace('dailynews3.cfm', 'dailynews3a.cfm') # use printed version of the article
|
||||
current_articles.append({'title': title, 'url': url, 'description': ''})
|
||||
included_urls.append(url)
|
||||
current_articles.reverse()
|
||||
return current_articles
|
||||
|
||||
# parse from www.mingpaovan.com
|
||||
def parse_section3(self, url, baseUrl):
|
||||
self.get_fetchdate()
|
||||
soup = self.index_to_soup(url)
|
||||
divs = soup.findAll(attrs={'class': ['ListContentLargeLink']})
|
||||
current_articles = []
|
||||
included_urls = []
|
||||
divs.reverse()
|
||||
for i in divs:
|
||||
title = self.tag_to_string(i)
|
||||
urlstr = i.get('href', False)
|
||||
urlstr = baseUrl + '/' + urlstr.replace('../../../', '')
|
||||
if urlstr not in included_urls:
|
||||
current_articles.append({'title': title, 'url': urlstr, 'description': '', 'date': ''})
|
||||
included_urls.append(urlstr)
|
||||
current_articles.reverse()
|
||||
return current_articles
|
||||
|
||||
def parse_ed_section(self, url):
|
||||
self.get_fetchdate()
|
||||
soup = self.index_to_soup(url)
|
||||
a = soup.findAll('a', href=True)
|
||||
a.reverse()
|
||||
current_articles = []
|
||||
included_urls = []
|
||||
for i in a:
|
||||
title = self.tag_to_string(i)
|
||||
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
|
||||
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('nal') == -1):
|
||||
current_articles.append({'title': title, 'url': url, 'description': ''})
|
||||
included_urls.append(url)
|
||||
current_articles.reverse()
|
||||
return current_articles
|
||||
|
||||
def parse_fin_section(self, url):
|
||||
self.get_fetchdate()
|
||||
soup = self.index_to_soup(url)
|
||||
a = soup.findAll('a', href= True)
|
||||
current_articles = []
|
||||
included_urls = []
|
||||
for i in a:
|
||||
#url = 'http://www.mpfinance.com/cfm/' + i.get('href', False)
|
||||
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
|
||||
#if url not in included_urls and not url.rfind(dateStr) == -1 and url.rfind('index') == -1:
|
||||
if url not in included_urls and (not url.rfind('txt') == -1) and (not url.rfind('nal') == -1):
|
||||
title = self.tag_to_string(i)
|
||||
current_articles.append({'title': title, 'url': url, 'description':''})
|
||||
included_urls.append(url)
|
||||
return current_articles
|
||||
|
||||
def parse_ent_section(self, url):
|
||||
self.get_fetchdate()
|
||||
soup = self.index_to_soup(url)
|
||||
a = soup.findAll('a', href=True)
|
||||
a.reverse()
|
||||
current_articles = []
|
||||
included_urls = []
|
||||
for i in a:
|
||||
title = self.tag_to_string(i)
|
||||
url = 'http://ol.mingpao.com/cfm/' + i.get('href', False)
|
||||
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('star') == -1):
|
||||
current_articles.append({'title': title, 'url': url, 'description': ''})
|
||||
included_urls.append(url)
|
||||
current_articles.reverse()
|
||||
return current_articles
|
||||
|
||||
def parse_col_section(self, url):
|
||||
self.get_fetchdate()
|
||||
soup = self.index_to_soup(url)
|
||||
a = soup.findAll('a', href=True)
|
||||
a.reverse()
|
||||
current_articles = []
|
||||
included_urls = []
|
||||
for i in a:
|
||||
title = self.tag_to_string(i)
|
||||
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
|
||||
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('ncl') == -1):
|
||||
current_articles.append({'title': title, 'url': url, 'description': ''})
|
||||
included_urls.append(url)
|
||||
current_articles.reverse()
|
||||
return current_articles
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
for item in soup.findAll(style=True):
|
||||
del item['width']
|
||||
for item in soup.findAll(stype=True):
|
||||
del item['absmiddle']
|
||||
return soup
|
||||
|
||||
def create_opf(self, feeds, dir=None):
|
||||
if dir is None:
|
||||
dir = self.output_dir
|
||||
if __UseChineseTitle__ == True:
|
||||
if __Region__ == 'Hong Kong':
|
||||
title = u'\u660e\u5831 (\u9999\u6e2f)'
|
||||
elif __Region__ == 'Vancouver':
|
||||
title = u'\u660e\u5831 (\u6eab\u54e5\u83ef)'
|
||||
elif __Region__ == 'Toronto':
|
||||
title = u'\u660e\u5831 (\u591a\u502b\u591a)'
|
||||
else:
|
||||
title = self.short_title()
|
||||
# if not generating a periodical, force date to apply in title
|
||||
if __MakePeriodical__ == False:
|
||||
title = title + ' ' + self.get_fetchformatteddate()
|
||||
if True:
|
||||
mi = MetaInformation(title, [self.publisher])
|
||||
mi.publisher = self.publisher
|
||||
mi.author_sort = self.publisher
|
||||
if __MakePeriodical__ == True:
|
||||
mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
|
||||
else:
|
||||
mi.publication_type = self.publication_type+':'+self.short_title()
|
||||
#mi.timestamp = nowf()
|
||||
mi.timestamp = self.get_dtlocal()
|
||||
mi.comments = self.description
|
||||
if not isinstance(mi.comments, unicode):
|
||||
mi.comments = mi.comments.decode('utf-8', 'replace')
|
||||
#mi.pubdate = nowf()
|
||||
mi.pubdate = self.get_dtlocal()
|
||||
opf_path = os.path.join(dir, 'index.opf')
|
||||
ncx_path = os.path.join(dir, 'index.ncx')
|
||||
opf = OPFCreator(dir, mi)
|
||||
# Add mastheadImage entry to <guide> section
|
||||
mp = getattr(self, 'masthead_path', None)
|
||||
if mp is not None and os.access(mp, os.R_OK):
|
||||
from calibre.ebooks.metadata.opf2 import Guide
|
||||
ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
|
||||
ref.type = 'masthead'
|
||||
ref.title = 'Masthead Image'
|
||||
opf.guide.append(ref)
|
||||
|
||||
manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
|
||||
manifest.append(os.path.join(dir, 'index.html'))
|
||||
manifest.append(os.path.join(dir, 'index.ncx'))
|
||||
|
||||
# Get cover
|
||||
cpath = getattr(self, 'cover_path', None)
|
||||
if cpath is None:
|
||||
pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
|
||||
if self.default_cover(pf):
|
||||
cpath = pf.name
|
||||
if cpath is not None and os.access(cpath, os.R_OK):
|
||||
opf.cover = cpath
|
||||
manifest.append(cpath)
|
||||
|
||||
# Get masthead
|
||||
mpath = getattr(self, 'masthead_path', None)
|
||||
if mpath is not None and os.access(mpath, os.R_OK):
|
||||
manifest.append(mpath)
|
||||
|
||||
opf.create_manifest_from_files_in(manifest)
|
||||
for mani in opf.manifest:
|
||||
if mani.path.endswith('.ncx'):
|
||||
mani.id = 'ncx'
|
||||
if mani.path.endswith('mastheadImage.jpg'):
|
||||
mani.id = 'masthead-image'
|
||||
entries = ['index.html']
|
||||
toc = TOC(base_path=dir)
|
||||
self.play_order_counter = 0
|
||||
self.play_order_map = {}
|
||||
|
||||
def feed_index(num, parent):
|
||||
f = feeds[num]
|
||||
for j, a in enumerate(f):
|
||||
if getattr(a, 'downloaded', False):
|
||||
adir = 'feed_%d/article_%d/'%(num, j)
|
||||
auth = a.author
|
||||
if not auth:
|
||||
auth = None
|
||||
desc = a.text_summary
|
||||
if not desc:
|
||||
desc = None
|
||||
else:
|
||||
desc = self.description_limiter(desc)
|
||||
entries.append('%sindex.html'%adir)
|
||||
po = self.play_order_map.get(entries[-1], None)
|
||||
if po is None:
|
||||
self.play_order_counter += 1
|
||||
po = self.play_order_counter
|
||||
parent.add_item('%sindex.html'%adir, None, a.title if a.title else _('Untitled Article'),
|
||||
play_order=po, author=auth, description=desc)
|
||||
last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
|
||||
for sp in a.sub_pages:
|
||||
prefix = os.path.commonprefix([opf_path, sp])
|
||||
relp = sp[len(prefix):]
|
||||
entries.append(relp.replace(os.sep, '/'))
|
||||
last = sp
|
||||
|
||||
if os.path.exists(last):
|
||||
with open(last, 'rb') as fi:
|
||||
src = fi.read().decode('utf-8')
|
||||
soup = BeautifulSoup(src)
|
||||
body = soup.find('body')
|
||||
if body is not None:
|
||||
prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last))))
|
||||
templ = self.navbar.generate(True, num, j, len(f),
|
||||
not self.has_single_feed,
|
||||
a.orig_url, self.publisher, prefix=prefix,
|
||||
center=self.center_navbar)
|
||||
elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div')
|
||||
body.insert(len(body.contents), elem)
|
||||
with open(last, 'wb') as fi:
|
||||
fi.write(unicode(soup).encode('utf-8'))
|
||||
if len(feeds) == 0:
|
||||
raise Exception('All feeds are empty, aborting.')
|
||||
|
||||
if len(feeds) > 1:
|
||||
for i, f in enumerate(feeds):
|
||||
entries.append('feed_%d/index.html'%i)
|
||||
po = self.play_order_map.get(entries[-1], None)
|
||||
if po is None:
|
||||
self.play_order_counter += 1
|
||||
po = self.play_order_counter
|
||||
auth = getattr(f, 'author', None)
|
||||
if not auth:
|
||||
auth = None
|
||||
desc = getattr(f, 'description', None)
|
||||
if not desc:
|
||||
desc = None
|
||||
feed_index(i, toc.add_item('feed_%d/index.html'%i, None,
|
||||
f.title, play_order=po, description=desc, author=auth))
|
||||
|
||||
else:
|
||||
entries.append('feed_%d/index.html'%0)
|
||||
feed_index(0, toc)
|
||||
|
||||
for i, p in enumerate(entries):
|
||||
entries[i] = os.path.join(dir, p.replace('/', os.sep))
|
||||
opf.create_spine(entries)
|
||||
opf.set_toc(toc)
|
||||
|
||||
with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
|
||||
opf.render(opf_file, ncx_file)
|
||||
|
40
recipes/noticias_r7.recipe
Normal file
@ -0,0 +1,40 @@
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class PortalR7(BasicNewsRecipe):
|
||||
title = 'Noticias R7'
|
||||
__author__ = 'Diniz Bortolotto'
|
||||
description = 'Noticias Portal R7'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 20
|
||||
encoding = 'utf8'
|
||||
publisher = 'Rede Record'
|
||||
category = 'news, Brazil'
|
||||
language = 'pt_BR'
|
||||
publication_type = 'newsportal'
|
||||
use_embedded_content = False
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
remove_attributes = ['style']
|
||||
|
||||
feeds = [
|
||||
(u'Brasil', u'http://www.r7.com/data/rss/brasil.xml'),
|
||||
(u'Economia', u'http://www.r7.com/data/rss/economia.xml'),
|
||||
(u'Internacional', u'http://www.r7.com/data/rss/internacional.xml'),
|
||||
(u'Tecnologia e Ci\xeancia', u'http://www.r7.com/data/rss/tecnologiaCiencia.xml')
|
||||
]
|
||||
reverse_article_order = True
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'materia'})]
|
||||
remove_tags = [
|
||||
dict(id=['espalhe', 'report-erro']),
|
||||
dict(name='ul', attrs={'class':'controles'}),
|
||||
dict(name='ul', attrs={'class':'relacionados'}),
|
||||
dict(name='div', attrs={'class':'materia_banner'}),
|
||||
dict(name='div', attrs={'class':'materia_controles'})
|
||||
]
|
||||
|
||||
preprocess_regexps = [
|
||||
(re.compile(r'<div class="materia">.*<div class="materia_cabecalho">',re.DOTALL|re.IGNORECASE),
|
||||
lambda match: '<div class="materia"><div class="materia_cabecalho">')
|
||||
]
|
24
recipes/noticias_unb.recipe
Normal file
@ -0,0 +1,24 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class NoticiasUnB(BasicNewsRecipe):
|
||||
title = 'Noticias UnB'
|
||||
__author__ = 'Diniz Bortolotto'
|
||||
description = 'Noticias da UnB'
|
||||
oldest_article = 5
|
||||
max_articles_per_feed = 20
|
||||
category = 'news, educational, Brazil'
|
||||
language = 'pt_BR'
|
||||
publication_type = 'newsportal'
|
||||
use_embedded_content = False
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
|
||||
feeds = [(u'UnB Agência', u'http://www.unb.br/noticias/rss/noticias.rss')]
|
||||
|
||||
reverse_article_order = True
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('http://', 'http://www.unb.br/noticias/print_email/imprimir.php?u=http://')
|
||||
|
72
recipes/pecat.recipe
Normal file
@ -0,0 +1,72 @@
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
www.pecat.co.rs
|
||||
'''
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Pecat_rs(BasicNewsRecipe):
|
||||
title = 'Pecat'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Internet portal slobodne Srbije'
|
||||
oldest_article = 15
|
||||
max_articles_per_feed = 100
|
||||
language = 'sr'
|
||||
encoding = 'utf-8'
|
||||
no_stylesheets = True
|
||||
use_embedded_content = True
|
||||
masthead_url = 'http://www.pecat.co.rs/wp-content/themes/zenko-v1/images/logo.jpg'
|
||||
publication_type = 'magazine'
|
||||
extra_css = """
|
||||
@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)}
|
||||
@font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
|
||||
body{font-family: Arial,Helvetica,sans1,sans-serif}
|
||||
img{display: block; margin-bottom: 1em; margin-top: 1em}
|
||||
p{display: block; margin-bottom: 1em; margin-top: 1em}
|
||||
"""
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : 'politika, Srbija'
|
||||
, 'publisher': 'Pecat'
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
|
||||
feeds = [(u'Clanci', u'http://www.pecat.co.rs/feed/')]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
for item in soup.findAll('a'):
|
||||
limg = item.find('img')
|
||||
if item.string is not None:
|
||||
str = item.string
|
||||
item.replaceWith(str)
|
||||
else:
|
||||
if limg:
|
||||
limg.extract()
|
||||
item.replaceWith(limg)
|
||||
else:
|
||||
str = self.tag_to_string(item)
|
||||
item.replaceWith(str)
|
||||
for item in soup.findAll('img'):
|
||||
dad = item.findParent('p')
|
||||
if dad:
|
||||
mydad = dad.parent
|
||||
myIndex = mydad.contents.index(dad)
|
||||
item.extract()
|
||||
mydad.insert(myIndex,item)
|
||||
for item in soup.findAll('strong'):
|
||||
dad = item.findParent('p')
|
||||
if dad:
|
||||
mydad = dad.parent
|
||||
myIndex = mydad.contents.index(dad)
|
||||
item.extract()
|
||||
item.name='h4'
|
||||
mydad.insert(myIndex,item)
|
||||
return soup
|
@ -26,6 +26,7 @@ class Perfil(BasicNewsRecipe):
|
||||
.foto1 h1{font-size: x-small}
|
||||
h1{font-family: Georgia,"Times New Roman",serif}
|
||||
img{margin-bottom: 0.4em}
|
||||
.hora{font-size: x-small; color: red}
|
||||
"""
|
||||
|
||||
conversion_options = {
|
||||
@ -60,7 +61,26 @@ class Perfil(BasicNewsRecipe):
|
||||
,(u'Tecnologia' , u'http://www.perfil.com/rss/tecnologia.xml' )
|
||||
]
|
||||
|
||||
def get_article_url(self, article):
|
||||
return article.get('guid', None)
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
for item in soup.findAll('a'):
|
||||
limg = item.find('img')
|
||||
if item.string is not None:
|
||||
str = item.string
|
||||
item.replaceWith(str)
|
||||
else:
|
||||
if limg:
|
||||
item.name = 'div'
|
||||
item.attrs = []
|
||||
else:
|
||||
str = self.tag_to_string(item)
|
||||
item.replaceWith(str)
|
||||
for item in soup.findAll('img'):
|
||||
if not item.has_key('alt'):
|
||||
item['alt'] = 'image'
|
||||
return soup
|
||||
|
@ -1,85 +1,45 @@
|
||||
#!/usr/bin/env python
|
||||
__license__ = 'GPL v3'
|
||||
'''
|
||||
philly.com/inquirer/
|
||||
'''
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Philly(BasicNewsRecipe):
|
||||
|
||||
title = 'Philadelphia Inquirer'
|
||||
__author__ = 'RadikalDissent and Sujata Raman'
|
||||
class AdvancedUserRecipe1308312288(BasicNewsRecipe):
|
||||
title = u'Philadelphia Inquirer'
|
||||
__author__ = 'sexymax15'
|
||||
language = 'en'
|
||||
description = 'Daily news from the Philadelphia Inquirer'
|
||||
no_stylesheets = True
|
||||
oldest_article = 15
|
||||
max_articles_per_feed = 20
|
||||
use_embedded_content = False
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 25
|
||||
remove_empty_feeds = True
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
|
||||
extra_css = '''
|
||||
h1{font-family:verdana,arial,helvetica,sans-serif; font-size: large;}
|
||||
h2{font-family:verdana,arial,helvetica,sans-serif; font-size: small;}
|
||||
.body-content{font-family:verdana,arial,helvetica,sans-serif; font-size: small;}
|
||||
.byline {font-size: small; color: #666666; font-style:italic; }
|
||||
.lastline {font-size: small; color: #666666; font-style:italic;}
|
||||
.contact {font-size: small; color: #666666;}
|
||||
.contact p {font-size: small; color: #666666;}
|
||||
#photoCaption { font-family:verdana,arial,helvetica,sans-serif; font-size:x-small;}
|
||||
.photoCaption { font-family:verdana,arial,helvetica,sans-serif; font-size:x-small;}
|
||||
#photoCredit{ font-family:verdana,arial,helvetica,sans-serif; font-size:x-small; color:#666666;}
|
||||
.photoCredit{ font-family:verdana,arial,helvetica,sans-serif; font-size:x-small; color:#666666;}
|
||||
.article_timestamp{font-size:x-small; color:#666666;}
|
||||
a {font-family:verdana,arial,helvetica,sans-serif; font-size: x-small;}
|
||||
'''
|
||||
# remove_tags_before = {'class':'article_timestamp'}
|
||||
#remove_tags_after = {'class':'graylabel'}
|
||||
keep_only_tags= [dict(name=['h1','p'])]
|
||||
remove_tags = [dict(name=['hr','dl','dt','img','meta','iframe','link','script','form','input','label']),
|
||||
dict(id=['toggleConfirmEmailDiv','toggleTOS','toggleUsernameMsgDiv','toggleConfirmYear','navT1_philly','secondaryNav','navPlacement','globalPrimaryNav'
|
||||
,'ugc-footer-philly','bv_footer_include','footer','header',
|
||||
'container_rag_bottom','section_rectangle','contentrightside'])
|
||||
,{'class':['megamenu3 megamenu','container misc','container_inner misc_inner'
|
||||
,'misccontainer_left_32','headlineonly','misccontainer_middle_32'
|
||||
,'misccontainer_right_32','headline formBegin',
|
||||
'post_balloon','relatedlist','linkssubhead','b_sq','dotted-rule-above'
|
||||
,'container','headlines-digest','graylabel','container_inner'
|
||||
,'rlinks_colorbar1','rlinks_colorbar2','supercontainer','container_5col_left','container_image_left',
|
||||
'digest-headline2','digest-lead','container_5col_leftmiddle',
|
||||
'container_5col_middlemiddle','container_5col_rightmiddle'
|
||||
,'container_5col_right','divclear','supercontainer_outer force-width',
|
||||
'supercontainer','containertitle kicker-title',
|
||||
'pollquestion','pollchoice','photomore','pollbutton','container rssbox','containertitle video ',
|
||||
'containertitle_image ','container_tabtwo','selected'
|
||||
,'shadetabs','selected','tabcontentstyle','tabcontent','inner_container'
|
||||
,'arrow','container_ad','containertitlespacer','adUnit','tracking','sitemsg_911 clearfix']}]
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':'story-content'}),
|
||||
dict(name='div', attrs={'id': 'contentinside'})
|
||||
]
|
||||
extra_css = """
|
||||
h1{font-family: Georgia,serif; font-size: xx-large}
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':['linkssubhead','post_balloon','relatedlist','pollquestion','b_sq']}),
|
||||
dict(name='dl', attrs={'class':'relatedlist'}),
|
||||
dict(name='div', attrs={'id':['photoNav','sidebar_adholder']}),
|
||||
dict(name='a', attrs={'class': ['headlineonly','bl']}),
|
||||
dict(name='img', attrs={'class':'img_noborder'})
|
||||
]
|
||||
# def print_version(self, url):
|
||||
# return url + '?viewAll=y'
|
||||
"""
|
||||
|
||||
|
||||
feeds = [
|
||||
('Front Page', 'http://www.philly.com/inquirer_front_page.rss'),
|
||||
('Business', 'http://www.philly.com/inq_business.rss'),
|
||||
#('News', 'http://www.philly.com/inquirer/news/index.rss'),
|
||||
('Nation', 'http://www.philly.com/inq_news_world_us.rss'),
|
||||
('Local', 'http://www.philly.com/inquirer_local.rss'),
|
||||
('Health', 'http://www.philly.com/inquirer_health_science.rss'),
|
||||
('Education', 'http://www.philly.com/inquirer_education.rss'),
|
||||
('Editorial and opinion', 'http://www.philly.com/inq_news_editorial.rss'),
|
||||
('Sports', 'http://www.philly.com/inquirer_sports.rss')
|
||||
]
|
||||
feeds = [(u'News', u'http://www.philly.com/philly_news.rss')]
|
||||
|
||||
def get_article_url(self, article):
|
||||
ans = article.link
|
||||
|
||||
try:
|
||||
self.log('Looking for full story link in', ans)
|
||||
soup = self.index_to_soup(ans)
|
||||
x = soup.find(text="View All")
|
||||
|
||||
if x is not None:
|
||||
ans = ans + '?viewAll=y'
|
||||
self.log('Found full story link', ans)
|
||||
except:
|
||||
pass
|
||||
return ans
|
||||
|
||||
def postprocess_html(self, soup,first):
|
||||
|
||||
for tag in soup.findAll(name='div',attrs={'class':"container_ate_qandatitle"}):
|
||||
tag.extract()
|
||||
for tag in soup.findAll(name='br'):
|
||||
tag.extract()
|
||||
|
||||
return soup
|
||||
|
80
recipes/scmp.recipe
Normal file
@ -0,0 +1,80 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
scmp.com
|
||||
'''
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class SCMP(BasicNewsRecipe):
|
||||
title = 'South China Morning Post'
|
||||
__author__ = 'llam'
|
||||
description = "SCMP.com, Hong Kong's premier online English daily provides exclusive up-to-date news, audio video news, podcasts, RSS Feeds, Blogs, breaking news, top stories, award winning news and analysis on Hong Kong and China."
|
||||
publisher = 'South China Morning Post Publishers Ltd.'
|
||||
category = 'SCMP, Online news, Hong Kong News, China news, Business news, English newspaper, daily newspaper, Lifestyle news, Sport news, Audio Video news, Asia news, World news, economy news, investor relations news, RSS Feeds'
|
||||
oldest_article = 2
|
||||
delay = 1
|
||||
max_articles_per_feed = 200
|
||||
no_stylesheets = True
|
||||
encoding = 'utf-8'
|
||||
use_embedded_content = False
|
||||
language = 'en_CN'
|
||||
remove_empty_feeds = True
|
||||
needs_subscription = True
|
||||
publication_type = 'newspaper'
|
||||
masthead_url = 'http://www.scmp.com/images/logo_scmp_home.gif'
|
||||
extra_css = ' body{font-family: Arial,Helvetica,sans-serif } '
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
#br.set_debug_http(True)
|
||||
#br.set_debug_responses(True)
|
||||
#br.set_debug_redirects(True)
|
||||
if self.username is not None and self.password is not None:
|
||||
br.open('http://www.scmp.com/portal/site/SCMP/')
|
||||
br.select_form(name='loginForm')
|
||||
br['Login' ] = self.username
|
||||
br['Password'] = self.password
|
||||
br.submit()
|
||||
return br
|
||||
|
||||
remove_attributes=['width','height','border']
|
||||
|
||||
keep_only_tags = [
|
||||
dict(attrs={'id':['ART','photoBox']})
|
||||
,dict(attrs={'class':['article_label','article_byline','article_body']})
|
||||
]
|
||||
|
||||
preprocess_regexps = [
|
||||
(re.compile(r'<P><table((?!<table).)*class="embscreen"((?!</table>).)*</table>', re.DOTALL|re.IGNORECASE),
|
||||
lambda match: ''),
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Business' , u'http://www.scmp.com/rss/business.xml' )
|
||||
,(u'Hong Kong' , u'http://www.scmp.com/rss/hong_kong.xml' )
|
||||
,(u'China' , u'http://www.scmp.com/rss/china.xml' )
|
||||
,(u'Asia & World' , u'http://www.scmp.com/rss/news_asia_world.xml')
|
||||
,(u'Opinion' , u'http://www.scmp.com/rss/opinion.xml' )
|
||||
,(u'LifeSTYLE' , u'http://www.scmp.com/rss/lifestyle.xml' )
|
||||
,(u'Sport' , u'http://www.scmp.com/rss/sport.xml' )
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
rpart, sep, rest = url.rpartition('&')
|
||||
return rpart #+ sep + urllib.quote_plus(rest)
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
items = soup.findAll(src="/images/label_icon.gif")
|
||||
[item.extract() for item in items]
|
||||
return self.adeify_images(soup)
|
40
recipes/sizinti_derigisi.recipe
Normal file
@ -0,0 +1,40 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class TodaysZaman_en(BasicNewsRecipe):
|
||||
title = u'Sızıntı Dergisi'
|
||||
__author__ = u'thomass'
|
||||
description = 'a Turkey based daily for national and international news in the fields of business, diplomacy, politics, culture, arts, sports and economics, in addition to commentaries, specials and features'
|
||||
oldest_article = 30
|
||||
max_articles_per_feed =80
|
||||
no_stylesheets = True
|
||||
#delay = 1
|
||||
#use_embedded_content = False
|
||||
encoding = 'utf-8'
|
||||
#publisher = ' '
|
||||
category = 'dergi, ilim, kültür, bilim,Türkçe'
|
||||
language = 'tr'
|
||||
publication_type = 'magazine'
|
||||
#extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
|
||||
#keep_only_tags = [dict(name='h1', attrs={'class':['georgia_30']})]
|
||||
|
||||
#remove_attributes = ['aria-describedby']
|
||||
#remove_tags = [dict(name='div', attrs={'id':['renk10']}) ]
|
||||
cover_img_url = 'http://www.sizinti.com.tr/images/sizintiprint.jpg'
|
||||
masthead_url = 'http://www.sizinti.com.tr/images/sizintiprint.jpg'
|
||||
remove_tags_before = dict(id='content-right')
|
||||
|
||||
|
||||
#remove_empty_feeds= True
|
||||
#remove_attributes = ['width','height']
|
||||
|
||||
feeds = [
|
||||
( u'Sızıntı', u'http://www.sizinti.com.tr/rss'),
|
||||
]
|
||||
|
||||
#def preprocess_html(self, soup):
|
||||
# return self.adeify_images(soup)
|
||||
#def print_version(self, url): #there is a probem caused by table format
|
||||
#return url.replace('http://www.todayszaman.com/newsDetail_getNewsById.action?load=detay&', 'http://www.todayszaman.com/newsDetail_openPrintPage.action?')
|
||||
|
@ -1,94 +1,67 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
spiegel.de
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class Spiegel_int(BasicNewsRecipe):
|
||||
title = 'Spiegel Online International'
|
||||
__author__ = 'Darko Miletic and Sujata Raman'
|
||||
description = "News and POV from Europe's largest newsmagazine"
|
||||
description = "Daily news, analysis and opinion from Europe's leading newsmagazine and Germany's top news Web site"
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
language = 'en'
|
||||
|
||||
language = 'en_DE'
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'cp1252'
|
||||
publisher = 'SPIEGEL ONLINE GmbH'
|
||||
category = 'news, politics, Germany'
|
||||
lang = 'en'
|
||||
recursions = 1
|
||||
match_regexps = [r'http://www.spiegel.de/.*-[1-9],00.html']
|
||||
masthead_url = 'http://www.spiegel.de/static/sys/v9/spiegelonline_logo.png'
|
||||
publication_type = 'magazine'
|
||||
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : lang
|
||||
,'publisher' : publisher
|
||||
,'pretty_print': True
|
||||
,'language' : language
|
||||
,'publisher': publisher
|
||||
}
|
||||
|
||||
extra_css = '''
|
||||
#spArticleColumn{font-family:verdana,arial,helvetica,geneva,sans-serif ; }
|
||||
#spArticleContent{font-family: Verdana,Arial,Helvetica,Geneva,sans-serif}
|
||||
h1{color:#666666; font-weight:bold;}
|
||||
h2{color:#990000;}
|
||||
h3{color:#990000;}
|
||||
h4 {color:#990000;}
|
||||
a{color:#990000;}
|
||||
.spAuthor{font-style:italic;}
|
||||
#spIntroTeaser{font-weight:bold;}
|
||||
#spIntroTeaser{font-weight:bold}
|
||||
.spCredit{color:#666666; font-size:x-small;}
|
||||
.spShortDate{font-size:x-small;}
|
||||
.spArticleImageBox {font-size:x-small;}
|
||||
.spPhotoGallery{font-size:x-small; color:#990000 ;}
|
||||
'''
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name ='div', attrs={'id': ['spArticleImageBox spAssetAlignleft','spArticleColumn']}),
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'id':['spSocialBookmark','spArticleFunctions','spMultiPagerHeadlines',]}),
|
||||
dict(name='div', attrs={'class':['spCommercial spM520','spArticleCredit','spPicZoom']}),
|
||||
]
|
||||
|
||||
feeds = [(u'Spiegel Online', u'http://www.spiegel.de/schlagzeilen/rss/0,5291,676,00.xml')]
|
||||
|
||||
def postprocess_html(self, soup,first):
|
||||
|
||||
for tag in soup.findAll(name='div',attrs={'id':"spMultiPagerControl"}):
|
||||
tag.extract()
|
||||
|
||||
p = soup.find(name = 'p', attrs={'id':'spIntroTeaser'})
|
||||
|
||||
if p.string is not None:
|
||||
t = p.string.rpartition(':')[0]
|
||||
|
||||
if 'Part'in t:
|
||||
if soup.h1 is not None:
|
||||
soup.h1.extract()
|
||||
if soup.h2 is not None:
|
||||
soup.h2.extract()
|
||||
functag = soup.find(name= 'div', attrs={'id':"spArticleFunctions"})
|
||||
if functag is not None:
|
||||
functag.extract()
|
||||
auttag = soup.find(name= 'p', attrs={'class':"spAuthor"})
|
||||
if auttag is not None:
|
||||
auttag.extract()
|
||||
|
||||
pictag = soup.find(name= 'div', attrs={'id':"spArticleTopAsset"})
|
||||
if pictag is not None:
|
||||
pictag.extract()
|
||||
keep_only_tags = [dict(attrs={'id':'spArticleContent'})]
|
||||
remove_tags_after = dict(attrs={'id':'spArticleBody'})
|
||||
remove_tags = [dict(name=['meta','base','iframe','embed','object'])]
|
||||
remove_attributes = ['clear']
|
||||
feeds = [(u'Spiegel Online', u'http://www.spiegel.de/international/index.rss')]
|
||||
|
||||
def print_version(self, url):
|
||||
main, sep, rest = url.rpartition(',')
|
||||
rmain, rsep, rrest = main.rpartition(',')
|
||||
return rmain + ',druck-' + rrest + ',' + rest
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
for item in soup.findAll('a'):
|
||||
if item.string is not None:
|
||||
str = item.string
|
||||
item.replaceWith(str)
|
||||
else:
|
||||
str = self.tag_to_string(item)
|
||||
item.replaceWith(str)
|
||||
return soup
|
||||
|
||||
# def print_version(self, url):
|
||||
# main, sep, rest = url.rpartition(',')
|
||||
# rmain, rsep, rrest = main.rpartition(',')
|
||||
# return rmain + ',druck-' + rrest + ',' + rest
|
||||
|
||||
|
@ -56,6 +56,7 @@ class TelegraphUK(BasicNewsRecipe):
|
||||
,(u'Sport' , u'http://www.telegraph.co.uk/sport/rss' )
|
||||
,(u'Earth News' , u'http://www.telegraph.co.uk/earth/earthnews/rss' )
|
||||
,(u'Comment' , u'http://www.telegraph.co.uk/comment/rss' )
|
||||
,(u'Travel' , u'http://www.telegraph.co.uk/travel/rss' )
|
||||
,(u'How about that?', u'http://www.telegraph.co.uk/news/newstopics/howaboutthat/rss' )
|
||||
]
|
||||
|
||||
|
53
recipes/todays_zaman.recipe
Normal file
@ -0,0 +1,53 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class TodaysZaman_en(BasicNewsRecipe):
|
||||
title = u'Todays Zaman'
|
||||
__author__ = u'thomass'
|
||||
description = 'a Turkey based daily for national and international news in the fields of business, diplomacy, politics, culture, arts, sports and economics, in addition to commentaries, specials and features'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed =100
|
||||
no_stylesheets = True
|
||||
#delay = 1
|
||||
#use_embedded_content = False
|
||||
encoding = 'utf-8'
|
||||
#publisher = ' '
|
||||
category = 'news, haberler,TR,gazete'
|
||||
language = 'en_TR'
|
||||
publication_type = 'newspaper'
|
||||
#extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
|
||||
#keep_only_tags = [dict(name='font', attrs={'class':['newsDetail','agenda2NewsSpot']}),dict(name='span', attrs={'class':['agenda2Title']}),dict(name='div', attrs={'id':['gallery']})]
|
||||
keep_only_tags = [dict(name='h1', attrs={'class':['georgia_30']}),dict(name='span', attrs={'class':['left-date','detailDate','detailCName']}),dict(name='td', attrs={'id':['newsSpot','newsText']})] #resim ekleme: ,dict(name='div', attrs={'id':['gallery','detailDate',]})
|
||||
|
||||
remove_attributes = ['aria-describedby']
|
||||
remove_tags = [dict(name='img', attrs={'src':['/images/icon_print.gif','http://gmodules.com/ig/images/plus_google.gif','/images/template/jazz/agenda/i1.jpg', 'http://medya.todayszaman.com/todayszaman/images/logo/logo.bmp']}),dict(name='hr', attrs={'class':[ 'interactive-hr']}),dict(name='div', attrs={'class':[ 'empty_height_18','empty_height_9']}) ,dict(name='td', attrs={'id':[ 'superTitle']}),dict(name='span', attrs={'class':[ 't-count enabled t-count-focus']}),dict(name='a', attrs={'id':[ 'count']}),dict(name='td', attrs={'class':[ 'left-date']}) ]
|
||||
cover_img_url = 'http://medya.todayszaman.com/todayszaman/images/logo/logo.bmp'
|
||||
masthead_url = 'http://medya.todayszaman.com/todayszaman/images/logo/logo.bmp'
|
||||
remove_empty_feeds= True
|
||||
# remove_attributes = ['width','height']
|
||||
|
||||
feeds = [
|
||||
( u'Home', u'http://www.todayszaman.com/rss?sectionId=0'),
|
||||
( u'News', u'http://www.todayszaman.com/rss?sectionId=100'),
|
||||
( u'Business', u'http://www.todayszaman.com/rss?sectionId=105'),
|
||||
( u'Interviews', u'http://www.todayszaman.com/rss?sectionId=8'),
|
||||
( u'Columnists', u'http://www.todayszaman.com/rss?sectionId=6'),
|
||||
( u'Op-Ed', u'http://www.todayszaman.com/rss?sectionId=109'),
|
||||
( u'Arts & Culture', u'http://www.todayszaman.com/rss?sectionId=110'),
|
||||
( u'Expat Zone', u'http://www.todayszaman.com/rss?sectionId=132'),
|
||||
( u'Sports', u'http://www.todayszaman.com/rss?sectionId=5'),
|
||||
( u'Features', u'http://www.todayszaman.com/rss?sectionId=116'),
|
||||
( u'Travel', u'http://www.todayszaman.com/rss?sectionId=117'),
|
||||
( u'Leisure', u'http://www.todayszaman.com/rss?sectionId=118'),
|
||||
( u'Weird But True', u'http://www.todayszaman.com/rss?sectionId=134'),
|
||||
( u'Life', u'http://www.todayszaman.com/rss?sectionId=133'),
|
||||
( u'Health', u'http://www.todayszaman.com/rss?sectionId=126'),
|
||||
( u'Press Review', u'http://www.todayszaman.com/rss?sectionId=130'),
|
||||
( u'Todays think tanks', u'http://www.todayszaman.com/rss?sectionId=159'),
|
||||
|
||||
]
|
||||
|
||||
#def preprocess_html(self, soup):
|
||||
# return self.adeify_images(soup)
|
||||
#def print_version(self, url): #there is a probem caused by table format
|
||||
#return url.replace('http://www.todayszaman.com/newsDetail_getNewsById.action?load=detay&', 'http://www.todayszaman.com/newsDetail_openPrintPage.action?')
|
||||
|
25
recipes/words_without_borders.recipe
Normal file
@ -0,0 +1,25 @@
|
||||
#recipe created by sexymax15.....sexymax15@gmail.com
|
||||
#Words without Borders recipe
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1308302002(BasicNewsRecipe):
|
||||
title = u'Words Without Borders'
|
||||
language = 'en'
|
||||
__author__ = 'sexymax15'
|
||||
oldest_article = 90
|
||||
max_articles_per_feed = 30
|
||||
use_embedded_content = False
|
||||
|
||||
remove_empty_feeds = True
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
keep_only_tags = {'class':'span-14 article'}
|
||||
remove_tags_after = [{'class':'addthis_toolbox addthis_default_style no_print'}]
|
||||
remove_tags = [{'class':['posterous_quote_citation','button']}]
|
||||
extra_css = """
|
||||
h1{font-family: Georgia,serif; font-size: large}h2{font-family: Georgia,serif; font-size: large} """
|
||||
|
||||
|
||||
|
||||
feeds = [(u'wwb', u'http://feeds.feedburner.com/wwborders?format=xml')]
|
@ -2,6 +2,7 @@
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, matek09, matek09@gmail.com'
|
||||
__copyright__ = 'Modified 2011, Mariusz Wolek <mariusz_dot_wolek @ gmail dot com>'
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
import re
|
||||
@ -30,15 +31,17 @@ class Wprost(BasicNewsRecipe):
|
||||
keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'def element-autor'}))'''
|
||||
|
||||
preprocess_regexps = [(re.compile(r'style="display: none;"'), lambda match: ''),
|
||||
(re.compile(r'display: block;'), lambda match: '')]
|
||||
|
||||
(re.compile(r'display: block;'), lambda match: ''),
|
||||
(re.compile(r'\<td\>\<tr\>\<\/table\>'), lambda match: ''),
|
||||
(re.compile(r'\<table .*?\>'), lambda match: ''),
|
||||
(re.compile(r'\<tr>'), lambda match: ''),
|
||||
(re.compile(r'\<td .*?\>'), lambda match: '')]
|
||||
|
||||
remove_tags =[]
|
||||
remove_tags.append(dict(name = 'div', attrs = {'class' : 'def element-date'}))
|
||||
remove_tags.append(dict(name = 'div', attrs = {'class' : 'def silver'}))
|
||||
remove_tags.append(dict(name = 'div', attrs = {'id' : 'content-main-column-right'}))
|
||||
|
||||
|
||||
extra_css = '''
|
||||
.div-header {font-size: x-small; font-weight: bold}
|
||||
'''
|
||||
@ -88,4 +91,3 @@ class Wprost(BasicNewsRecipe):
|
||||
'description' : ''
|
||||
}
|
||||
|
||||
|
||||
|
@ -51,7 +51,7 @@ class WallStreetJournal(BasicNewsRecipe):
|
||||
br['password'] = self.password
|
||||
res = br.submit()
|
||||
raw = res.read()
|
||||
if 'Welcome,' not in raw:
|
||||
if 'Welcome,' not in raw and '>Logout<' not in raw:
|
||||
raise ValueError('Failed to log in to wsj.com, check your '
|
||||
'username and password')
|
||||
return br
|
||||
|
@ -1,20 +1,55 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class ZamanRecipe(BasicNewsRecipe):
|
||||
title = u'Zaman'
|
||||
__author__ = u'Deniz Og\xfcz'
|
||||
class Zaman (BasicNewsRecipe):
|
||||
|
||||
title = u'ZAMAN Gazetesi'
|
||||
__author__ = u'thomass'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed =100
|
||||
# no_stylesheets = True
|
||||
#delay = 1
|
||||
#use_embedded_content = False
|
||||
encoding = 'ISO 8859-9'
|
||||
publisher = 'Zaman'
|
||||
category = 'news, haberler,TR,gazete'
|
||||
language = 'tr'
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 10
|
||||
publication_type = 'newspaper '
|
||||
extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
|
||||
conversion_options = {
|
||||
'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
,'linearize_tables': False
|
||||
}
|
||||
cover_img_url = 'https://fbcdn-profile-a.akamaihd.net/hprofile-ak-snc4/188140_81722291869_2111820_n.jpg'
|
||||
masthead_url = 'http://medya.zaman.com.tr/extentions/zaman.com.tr/img/section/logo-section.png'
|
||||
|
||||
cover_url = 'http://medya.zaman.com.tr/zamantryeni/pics/zamanonline.gif'
|
||||
feeds = [(u'Gundem', u'http://www.zaman.com.tr/gundem.rss'),
|
||||
(u'Son Dakika', u'http://www.zaman.com.tr/sondakika.rss'),
|
||||
(u'Spor', u'http://www.zaman.com.tr/spor.rss'),
|
||||
(u'Ekonomi', u'http://www.zaman.com.tr/ekonomi.rss'),
|
||||
(u'Politika', u'http://www.zaman.com.tr/politika.rss'),
|
||||
(u'D\u0131\u015f Haberler', u'http://www.zaman.com.tr/dishaberler.rss'),
|
||||
(u'Yazarlar', u'http://www.zaman.com.tr/yazarlar.rss'),]
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('www.zaman.com.tr/haber.do?', 'www.zaman.com.tr/yazdir.do?')
|
||||
keep_only_tags = [dict(name='div', attrs={'id':[ 'news-detail-content']}), dict(name='td', attrs={'class':['columnist-detail','columnist_head']}) ]
|
||||
remove_tags = [ dict(name='div', attrs={'id':['news-detail-news-text-font-size','news-detail-gallery','news-detail-news-bottom-social']}),dict(name='div', attrs={'class':['radioEmbedBg','radyoProgramAdi']}),dict(name='a', attrs={'class':['webkit-html-attribute-value webkit-html-external-link']}),dict(name='table', attrs={'id':['yaziYorumTablosu']}),dict(name='img', attrs={'src':['http://medya.zaman.com.tr/pics/paylas.gif','http://medya.zaman.com.tr/extentions/zaman.com.tr/img/columnist/ma-16.png']})]
|
||||
|
||||
|
||||
#remove_attributes = ['width','height']
|
||||
remove_empty_feeds= True
|
||||
|
||||
feeds = [
|
||||
( u'Anasayfa', u'http://www.zaman.com.tr/anasayfa.rss'),
|
||||
( u'Son Dakika', u'http://www.zaman.com.tr/sondakika.rss'),
|
||||
( u'En çok Okunanlar', u'http://www.zaman.com.tr/max_all.rss'),
|
||||
( u'Gündem', u'http://www.zaman.com.tr/gundem.rss'),
|
||||
( u'Yazarlar', u'http://www.zaman.com.tr/yazarlar.rss'),
|
||||
( u'Politika', u'http://www.zaman.com.tr/politika.rss'),
|
||||
( u'Ekonomi', u'http://www.zaman.com.tr/ekonomi.rss'),
|
||||
( u'Dış Haberler', u'http://www.zaman.com.tr/dishaberler.rss'),
|
||||
( u'Yorumlar', u'http://www.zaman.com.tr/yorumlar.rss'),
|
||||
( u'Röportaj', u'http://www.zaman.com.tr/roportaj.rss'),
|
||||
( u'Spor', u'http://www.zaman.com.tr/spor.rss'),
|
||||
( u'Kürsü', u'http://www.zaman.com.tr/kursu.rss'),
|
||||
( u'Kültür Sanat', u'http://www.zaman.com.tr/kultursanat.rss'),
|
||||
( u'Televizyon', u'http://www.zaman.com.tr/televizyon.rss'),
|
||||
( u'Manşet', u'http://www.zaman.com.tr/manset.rss'),
|
||||
|
||||
|
||||
]
|
||||
|
@ -1,5 +1,5 @@
|
||||
Monocle = {
|
||||
VERSION: "1.0.0"
|
||||
VERSION: "2.0.0"
|
||||
};
|
||||
|
||||
|
||||
@ -170,7 +170,8 @@ Monocle.Browser.has.iframeTouchBug = Monocle.Browser.iOSVersionBelow("4.2");
|
||||
Monocle.Browser.has.selectThruBug = Monocle.Browser.iOSVersionBelow("4.2");
|
||||
|
||||
Monocle.Browser.has.mustScrollSheaf = Monocle.Browser.is.MobileSafari;
|
||||
Monocle.Browser.has.iframeDoubleWidthBug = Monocle.Browser.has.mustScrollSheaf;
|
||||
Monocle.Browser.has.iframeDoubleWidthBug =
|
||||
Monocle.Browser.has.mustScrollSheaf || Monocle.Browser.on.Kindle3;
|
||||
|
||||
Monocle.Browser.has.floatColumnBug = Monocle.Browser.is.WebKit;
|
||||
|
||||
@ -181,6 +182,11 @@ Monocle.Browser.has.jumpFlickerBug =
|
||||
Monocle.Browser.on.MacOSX && Monocle.Browser.is.WebKit;
|
||||
|
||||
|
||||
Monocle.Browser.has.columnOverflowPaintBug = Monocle.Browser.is.WebKit &&
|
||||
!Monocle.Browser.is.MobileSafari &&
|
||||
navigator.userAgent.indexOf("AppleWebKit/534") > 0;
|
||||
|
||||
|
||||
if (typeof window.console == "undefined") {
|
||||
window.console = {
|
||||
messages: [],
|
||||
@ -241,6 +247,7 @@ Monocle.Factory = function (element, label, index, reader) {
|
||||
|
||||
|
||||
function initialize() {
|
||||
if (!p.label) { return; }
|
||||
var node = p.reader.properties.graph;
|
||||
node[p.label] = node[p.label] || [];
|
||||
if (typeof p.index == 'undefined' && node[p.label][p.index]) {
|
||||
@ -274,7 +281,11 @@ Monocle.Factory = function (element, label, index, reader) {
|
||||
|
||||
function make(tagName, oLabel, index_or_options, or_options) {
|
||||
var oIndex, options;
|
||||
if (arguments.length == 2) {
|
||||
if (arguments.length == 1) {
|
||||
oLabel = null,
|
||||
oIndex = 0;
|
||||
options = {};
|
||||
} else if (arguments.length == 2) {
|
||||
oIndex = 0;
|
||||
options = {};
|
||||
} else if (arguments.length == 4) {
|
||||
@ -376,6 +387,22 @@ Monocle.pieceLoaded('factory');
|
||||
Monocle.Events = {}
|
||||
|
||||
|
||||
Monocle.Events.dispatch = function (elem, evtType, data, cancelable) {
|
||||
if (!document.createEvent) {
|
||||
return true;
|
||||
}
|
||||
var evt = document.createEvent("Events");
|
||||
evt.initEvent(evtType, false, cancelable || false);
|
||||
evt.m = data;
|
||||
try {
|
||||
return elem.dispatchEvent(evt);
|
||||
} catch(e) {
|
||||
console.warn("Failed to dispatch event: "+evtType);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Monocle.Events.listen = function (elem, evtType, fn, useCapture) {
|
||||
if (elem.addEventListener) {
|
||||
return elem.addEventListener(evtType, fn, useCapture || false);
|
||||
@ -405,7 +432,7 @@ Monocle.Events.listenForContact = function (elem, fns, options) {
|
||||
pageY: ci.pageY
|
||||
};
|
||||
|
||||
var target = evt.target || window.srcElement;
|
||||
var target = evt.target || evt.srcElement;
|
||||
while (target.nodeType != 1 && target.parentNode) {
|
||||
target = target.parentNode;
|
||||
}
|
||||
@ -527,13 +554,18 @@ Monocle.Events.deafenForContact = function (elem, listeners) {
|
||||
}
|
||||
|
||||
|
||||
Monocle.Events.listenForTap = function (elem, fn) {
|
||||
Monocle.Events.listenForTap = function (elem, fn, activeClass) {
|
||||
var startPos;
|
||||
|
||||
if (Monocle.Browser.on.Kindle3) {
|
||||
Monocle.Events.listen(elem, 'click', function () {});
|
||||
}
|
||||
|
||||
var annul = function () {
|
||||
startPos = null;
|
||||
if (activeClass && elem.dom) { elem.dom.removeClass(activeClass); }
|
||||
}
|
||||
|
||||
var annulIfOutOfBounds = function (evt) {
|
||||
if (evt.type.match(/^mouse/)) {
|
||||
return;
|
||||
@ -545,7 +577,7 @@ Monocle.Events.listenForTap = function (elem, fn) {
|
||||
evt.m.registrantX < 0 || evt.m.registrantX > elem.offsetWidth ||
|
||||
evt.m.registrantY < 0 || evt.m.registrantY > elem.offsetHeight
|
||||
) {
|
||||
startPos = null;
|
||||
annul();
|
||||
} else {
|
||||
evt.preventDefault();
|
||||
}
|
||||
@ -557,6 +589,7 @@ Monocle.Events.listenForTap = function (elem, fn) {
|
||||
start: function (evt) {
|
||||
startPos = [evt.m.pageX, evt.m.pageY];
|
||||
evt.preventDefault();
|
||||
if (activeClass && elem.dom) { elem.dom.addClass(activeClass); }
|
||||
},
|
||||
move: annulIfOutOfBounds,
|
||||
end: function (evt) {
|
||||
@ -565,10 +598,9 @@ Monocle.Events.listenForTap = function (elem, fn) {
|
||||
evt.m.startOffset = startPos;
|
||||
fn(evt);
|
||||
}
|
||||
annul();
|
||||
},
|
||||
cancel: function (evt) {
|
||||
startPos = null;
|
||||
}
|
||||
cancel: annul
|
||||
},
|
||||
{
|
||||
useCapture: false
|
||||
@ -997,6 +1029,9 @@ Monocle.Reader = function (node, bookData, options, onLoadCallback) {
|
||||
createReaderElements();
|
||||
|
||||
p.defaultStyles = addPageStyles(k.DEFAULT_STYLE_RULES, false);
|
||||
if (options.stylesheet) {
|
||||
p.initialStyles = addPageStyles(options.stylesheet, false);
|
||||
}
|
||||
|
||||
primeFrames(options.primeURL, function () {
|
||||
applyStyles();
|
||||
@ -1077,6 +1112,7 @@ Monocle.Reader = function (node, bookData, options, onLoadCallback) {
|
||||
if (Monocle.Browser.is.WebKit) {
|
||||
frame.contentDocument.documentElement.style.overflow = "hidden";
|
||||
}
|
||||
dispatchEvent('monocle:frameprimed', { frame: frame, pageIndex: pageCount });
|
||||
if ((pageCount += 1) == pageMax) {
|
||||
Monocle.defer(callback);
|
||||
}
|
||||
@ -1131,6 +1167,7 @@ Monocle.Reader = function (node, bookData, options, onLoadCallback) {
|
||||
var pageCount = 0;
|
||||
if (typeof callback == 'function') {
|
||||
var watcher = function (evt) {
|
||||
dispatchEvent('monocle:firstcomponentchange', evt.m);
|
||||
if ((pageCount += 1) == p.flipper.pageCount) {
|
||||
deafen('monocle:componentchange', watcher);
|
||||
callback();
|
||||
@ -1239,7 +1276,7 @@ Monocle.Reader = function (node, bookData, options, onLoadCallback) {
|
||||
page.appendChild(runner);
|
||||
ctrlData.elements.push(runner);
|
||||
}
|
||||
} else if (cType == "modal" || cType == "popover") {
|
||||
} else if (cType == "modal" || cType == "popover" || cType == "hud") {
|
||||
ctrlElem = ctrl.createControlElements(overlay);
|
||||
overlay.appendChild(ctrlElem);
|
||||
ctrlData.elements.push(ctrlElem);
|
||||
@ -1312,24 +1349,33 @@ Monocle.Reader = function (node, bookData, options, onLoadCallback) {
|
||||
var controlData = dataForControl(ctrl);
|
||||
if (!controlData) {
|
||||
console.warn("No data for control: " + ctrl);
|
||||
return;
|
||||
return false;
|
||||
}
|
||||
if (controlData.hidden == false) {
|
||||
return;
|
||||
|
||||
if (showingControl(ctrl)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
var overlay = dom.find('overlay');
|
||||
if (controlData.usesOverlay && controlData.controlType != "hud") {
|
||||
for (var i = 0, ii = p.controls.length; i < ii; ++i) {
|
||||
if (p.controls[i].usesOverlay && !p.controls[i].hidden) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
overlay.style.display = "block";
|
||||
}
|
||||
|
||||
for (var i = 0; i < controlData.elements.length; ++i) {
|
||||
controlData.elements[i].style.display = "block";
|
||||
}
|
||||
var overlay = dom.find('overlay');
|
||||
if (controlData.usesOverlay) {
|
||||
overlay.style.display = "block";
|
||||
}
|
||||
|
||||
if (controlData.controlType == "popover") {
|
||||
overlay.listeners = Monocle.Events.listenForContact(
|
||||
overlay,
|
||||
{
|
||||
start: function (evt) {
|
||||
obj = evt.target || window.event.srcElement;
|
||||
var obj = evt.target || window.event.srcElement;
|
||||
do {
|
||||
if (obj == controlData.elements[0]) { return true; }
|
||||
} while (obj && (obj = obj.parentNode));
|
||||
@ -1346,22 +1392,18 @@ Monocle.Reader = function (node, bookData, options, onLoadCallback) {
|
||||
ctrl.properties.hidden = false;
|
||||
}
|
||||
dispatchEvent('controlshow', ctrl, false);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
function showingControl(ctrl) {
|
||||
var controlData = dataForControl(ctrl);
|
||||
return controlData.hidden == false;
|
||||
}
|
||||
|
||||
|
||||
function dispatchEvent(evtType, data, cancelable) {
|
||||
if (!document.createEvent) {
|
||||
return true;
|
||||
}
|
||||
var evt = document.createEvent("Events");
|
||||
evt.initEvent(evtType, false, cancelable || false);
|
||||
evt.m = data;
|
||||
try {
|
||||
return dom.find('box').dispatchEvent(evt);
|
||||
} catch(e) {
|
||||
console.warn("Failed to dispatch event: " + evtType);
|
||||
return false;
|
||||
}
|
||||
return Monocle.Events.dispatch(dom.find('box'), evtType, data, cancelable);
|
||||
}
|
||||
|
||||
|
||||
@ -1502,6 +1544,7 @@ Monocle.Reader = function (node, bookData, options, onLoadCallback) {
|
||||
API.addControl = addControl;
|
||||
API.hideControl = hideControl;
|
||||
API.showControl = showControl;
|
||||
API.showingControl = showingControl;
|
||||
API.dispatchEvent = dispatchEvent;
|
||||
API.listen = listen;
|
||||
API.deafen = deafen;
|
||||
@ -1527,22 +1570,32 @@ Monocle.Reader.DEFAULT_CLASS_PREFIX = 'monelem_'
|
||||
Monocle.Reader.FLIPPER_DEFAULT_CLASS = "Slider";
|
||||
Monocle.Reader.FLIPPER_LEGACY_CLASS = "Legacy";
|
||||
Monocle.Reader.DEFAULT_STYLE_RULES = [
|
||||
"html * {" +
|
||||
"html#RS\\:monocle * {" +
|
||||
"-webkit-font-smoothing: subpixel-antialiased;" +
|
||||
"text-rendering: auto !important;" +
|
||||
"word-wrap: break-word !important;" +
|
||||
"overflow: visible !important;" +
|
||||
(Monocle.Browser.has.floatColumnBug ? "float: none !important;" : "") +
|
||||
"}" +
|
||||
"body {" +
|
||||
"}",
|
||||
"html#RS\\:monocle body {" +
|
||||
"margin: 0 !important;" +
|
||||
"padding: 0 !important;" +
|
||||
"-webkit-text-size-adjust: none;" +
|
||||
"}" +
|
||||
"table, img {" +
|
||||
"}",
|
||||
"html#RS\\:monocle body * {" +
|
||||
"max-width: 100% !important;" +
|
||||
"max-height: 90% !important;" +
|
||||
"}",
|
||||
"html#RS\\:monocle img, html#RS\\:monocle video, html#RS\\:monocle object {" +
|
||||
"max-height: 95% !important;" +
|
||||
"}"
|
||||
]
|
||||
|
||||
if (Monocle.Browser.has.columnOverflowPaintBug) {
|
||||
Monocle.Reader.DEFAULT_STYLE_RULES.push(
|
||||
"::-webkit-scrollbar { width: 0; height: 0; }"
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
Monocle.pieceLoaded('reader');
|
||||
/* BOOK */
|
||||
@ -1586,6 +1639,16 @@ Monocle.Book = function (dataSource) {
|
||||
locus.load = true;
|
||||
locus.componentId = p.componentIds[0];
|
||||
return locus;
|
||||
} else if (
|
||||
cIndex < 0 &&
|
||||
locus.componentId &&
|
||||
currComponent.properties.id != locus.componentId
|
||||
) {
|
||||
pageDiv.m.reader.dispatchEvent(
|
||||
"monocle:notfound",
|
||||
{ href: locus.componentId }
|
||||
);
|
||||
return null;
|
||||
} else if (cIndex < 0) {
|
||||
component = currComponent;
|
||||
locus.componentId = pageDiv.m.activeFrame.m.component.properties.id;
|
||||
@ -1619,6 +1682,8 @@ Monocle.Book = function (dataSource) {
|
||||
result.page += locus.direction;
|
||||
} else if (typeof(locus.anchor) == "string") {
|
||||
result.page = component.pageForChapter(locus.anchor, pageDiv);
|
||||
} else if (typeof(locus.xpath) == "string") {
|
||||
result.page = component.pageForXPath(locus.xpath, pageDiv);
|
||||
} else if (typeof(locus.position) == "string") {
|
||||
if (locus.position == "start") {
|
||||
result.page = 1;
|
||||
@ -1638,6 +1703,7 @@ Monocle.Book = function (dataSource) {
|
||||
if (result.page < 1) {
|
||||
if (cIndex == 0) {
|
||||
result.page = 1;
|
||||
result.boundarystart = true;
|
||||
} else {
|
||||
result.load = true;
|
||||
result.componentId = p.componentIds[cIndex - 1];
|
||||
@ -1647,6 +1713,7 @@ Monocle.Book = function (dataSource) {
|
||||
} else if (result.page > lastPageNum['new']) {
|
||||
if (cIndex == p.lastCIndex) {
|
||||
result.page = lastPageNum['new'];
|
||||
result.boundaryend = true;
|
||||
} else {
|
||||
result.load = true;
|
||||
result.componentId = p.componentIds[cIndex + 1];
|
||||
@ -1660,7 +1727,13 @@ Monocle.Book = function (dataSource) {
|
||||
|
||||
function setPageAt(pageDiv, locus) {
|
||||
locus = pageNumberAt(pageDiv, locus);
|
||||
if (!locus.load) {
|
||||
if (locus && !locus.load) {
|
||||
var evtData = { locus: locus, page: pageDiv }
|
||||
if (locus.boundarystart) {
|
||||
pageDiv.m.reader.dispatchEvent('monocle:boundarystart', evtData);
|
||||
} else if (locus.boundaryend) {
|
||||
pageDiv.m.reader.dispatchEvent('monocle:boundaryend', evtData);
|
||||
} else {
|
||||
var component = p.components[p.componentIds.indexOf(locus.componentId)];
|
||||
pageDiv.m.place = pageDiv.m.place || new Monocle.Place();
|
||||
pageDiv.m.place.setPlace(component, locus.page);
|
||||
@ -1673,6 +1746,7 @@ Monocle.Book = function (dataSource) {
|
||||
}
|
||||
pageDiv.m.reader.dispatchEvent("monocle:pagechange", evtData);
|
||||
}
|
||||
}
|
||||
return locus;
|
||||
}
|
||||
|
||||
@ -1683,6 +1757,10 @@ Monocle.Book = function (dataSource) {
|
||||
locus = pageNumberAt(pageDiv, locus);
|
||||
}
|
||||
|
||||
if (!locus) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (!locus.load) {
|
||||
callback(locus);
|
||||
return;
|
||||
@ -1690,7 +1768,9 @@ Monocle.Book = function (dataSource) {
|
||||
|
||||
var findPageNumber = function () {
|
||||
locus = setPageAt(pageDiv, locus);
|
||||
if (locus.load) {
|
||||
if (!locus) {
|
||||
return;
|
||||
} else if (locus.load) {
|
||||
loadPageAt(pageDiv, locus, callback, progressCallback)
|
||||
} else {
|
||||
callback(locus);
|
||||
@ -1715,10 +1795,12 @@ Monocle.Book = function (dataSource) {
|
||||
}
|
||||
|
||||
|
||||
function setOrLoadPageAt(pageDiv, locus, callback, progressCallback) {
|
||||
function setOrLoadPageAt(pageDiv, locus, callback, onProgress, onFail) {
|
||||
locus = setPageAt(pageDiv, locus);
|
||||
if (locus.load) {
|
||||
loadPageAt(pageDiv, locus, callback, progressCallback);
|
||||
if (!locus) {
|
||||
if (onFail) { onFail(); }
|
||||
} else if (locus.load) {
|
||||
loadPageAt(pageDiv, locus, callback, onProgress);
|
||||
} else {
|
||||
callback(locus);
|
||||
}
|
||||
@ -1864,13 +1946,18 @@ Monocle.Place = function () {
|
||||
}
|
||||
|
||||
|
||||
function percentageThrough() {
|
||||
function percentAtTopOfPage() {
|
||||
return p.percent - 1.0 / p.component.lastPageNumber();
|
||||
}
|
||||
|
||||
|
||||
function percentAtBottomOfPage() {
|
||||
return p.percent;
|
||||
}
|
||||
|
||||
|
||||
function pageAtPercentageThrough(pc) {
|
||||
return Math.max(Math.round(p.component.lastPageNumber() * pc), 1);
|
||||
function pageAtPercentageThrough(percent) {
|
||||
return Math.max(Math.round(p.component.lastPageNumber() * percent), 1);
|
||||
}
|
||||
|
||||
|
||||
@ -1911,6 +1998,8 @@ Monocle.Place = function () {
|
||||
}
|
||||
if (options.direction) {
|
||||
locus.page += options.direction;
|
||||
} else {
|
||||
locus.percent = percentAtBottomOfPage();
|
||||
}
|
||||
return locus;
|
||||
}
|
||||
@ -1942,7 +2031,9 @@ Monocle.Place = function () {
|
||||
API.setPlace = setPlace;
|
||||
API.setPercentageThrough = setPercentageThrough;
|
||||
API.componentId = componentId;
|
||||
API.percentageThrough = percentageThrough;
|
||||
API.percentAtTopOfPage = percentAtTopOfPage;
|
||||
API.percentAtBottomOfPage = percentAtBottomOfPage;
|
||||
API.percentageThrough = percentAtBottomOfPage;
|
||||
API.pageAtPercentageThrough = pageAtPercentageThrough;
|
||||
API.pageNumber = pageNumber;
|
||||
API.chapterInfo = chapterInfo;
|
||||
@ -2158,11 +2249,13 @@ Monocle.Component = function (book, id, index, chapters, source) {
|
||||
if (p.chapters[0] && typeof p.chapters[0].percent == "number") {
|
||||
return;
|
||||
}
|
||||
var doc = pageDiv.m.activeFrame.contentDocument;
|
||||
for (var i = 0; i < p.chapters.length; ++i) {
|
||||
var chp = p.chapters[i];
|
||||
chp.percent = 0;
|
||||
if (chp.fragment) {
|
||||
chp.percent = pageDiv.m.dimensions.percentageThroughOfId(chp.fragment);
|
||||
var node = doc.getElementById(chp.fragment);
|
||||
chp.percent = pageDiv.m.dimensions.percentageThroughOfNode(node);
|
||||
}
|
||||
}
|
||||
return p.chapters;
|
||||
@ -2187,14 +2280,37 @@ Monocle.Component = function (book, id, index, chapters, source) {
|
||||
if (!fragment) {
|
||||
return 1;
|
||||
}
|
||||
var pc2pn = function (pc) { return Math.floor(pc * p.pageLength) + 1 }
|
||||
for (var i = 0; i < p.chapters.length; ++i) {
|
||||
if (p.chapters[i].fragment == fragment) {
|
||||
return pc2pn(p.chapters[i].percent);
|
||||
return percentToPageNumber(p.chapters[i].percent);
|
||||
}
|
||||
}
|
||||
var percent = pageDiv.m.dimensions.percentageThroughOfId(fragment);
|
||||
return pc2pn(percent);
|
||||
var doc = pageDiv.m.activeFrame.contentDocument;
|
||||
var node = doc.getElementById(fragment);
|
||||
var percent = pageDiv.m.dimensions.percentageThroughOfNode(node);
|
||||
return percentToPageNumber(percent);
|
||||
}
|
||||
|
||||
|
||||
function pageForXPath(xpath, pageDiv) {
|
||||
var doc = pageDiv.m.activeFrame.contentDocument;
|
||||
var percent = 0;
|
||||
if (typeof doc.evaluate == "function") {
|
||||
var node = doc.evaluate(
|
||||
xpath,
|
||||
doc,
|
||||
null,
|
||||
9,
|
||||
null
|
||||
).singleNodeValue;
|
||||
var percent = pageDiv.m.dimensions.percentageThroughOfNode(node);
|
||||
}
|
||||
return percentToPageNumber(percent);
|
||||
}
|
||||
|
||||
|
||||
function percentToPageNumber(pc) {
|
||||
return Math.floor(pc * p.pageLength) + 1;
|
||||
}
|
||||
|
||||
|
||||
@ -2207,6 +2323,7 @@ Monocle.Component = function (book, id, index, chapters, source) {
|
||||
API.updateDimensions = updateDimensions;
|
||||
API.chapterForPage = chapterForPage;
|
||||
API.pageForChapter = pageForChapter;
|
||||
API.pageForXPath = pageForXPath;
|
||||
API.lastPageNumber = lastPageNumber;
|
||||
|
||||
return API;
|
||||
@ -2415,9 +2532,11 @@ Monocle.Dimensions.Vert = function (pageDiv) {
|
||||
}
|
||||
|
||||
|
||||
function percentageThroughOfId(id) {
|
||||
function percentageThroughOfNode(target) {
|
||||
if (!target) {
|
||||
return 0;
|
||||
}
|
||||
var doc = p.page.m.activeFrame.contentDocument;
|
||||
var target = doc.getElementById(id);
|
||||
var offset = 0;
|
||||
if (target.getBoundingClientRect) {
|
||||
offset = target.getBoundingClientRect().top;
|
||||
@ -2456,7 +2575,7 @@ Monocle.Dimensions.Vert = function (pageDiv) {
|
||||
API.hasChanged = hasChanged;
|
||||
API.measure = measure;
|
||||
API.pages = pages;
|
||||
API.percentageThroughOfId = percentageThroughOfId;
|
||||
API.percentageThroughOfNode = percentageThroughOfNode;
|
||||
API.locusToOffset = locusToOffset;
|
||||
|
||||
initialize();
|
||||
@ -2713,8 +2832,7 @@ Monocle.Dimensions.Columns = function (pageDiv) {
|
||||
(!p.measurements) ||
|
||||
(p.measurements.width != newMeasurements.width) ||
|
||||
(p.measurements.height != newMeasurements.height) ||
|
||||
(p.measurements.scrollWidth != newMeasurements.scrollWidth) ||
|
||||
(p.measurements.fontSize != newMeasurements.fontSize)
|
||||
(p.measurements.scrollWidth != newMeasurements.scrollWidth)
|
||||
);
|
||||
}
|
||||
|
||||
@ -2736,12 +2854,18 @@ Monocle.Dimensions.Columns = function (pageDiv) {
|
||||
if (!lc || !lc.getBoundingClientRect) {
|
||||
console.warn('Empty document for page['+p.page.m.pageIndex+']');
|
||||
p.measurements.scrollWidth = p.measurements.width;
|
||||
} else if (lc.getBoundingClientRect().bottom > p.measurements.height) {
|
||||
} else {
|
||||
var bcr = lc.getBoundingClientRect();
|
||||
if (
|
||||
bcr.right > p.measurements.width ||
|
||||
bcr.bottom > p.measurements.height
|
||||
) {
|
||||
p.measurements.scrollWidth = p.measurements.width * 2;
|
||||
} else {
|
||||
p.measurements.scrollWidth = p.measurements.width;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
p.length = Math.ceil(p.measurements.scrollWidth / p.measurements.width);
|
||||
p.dirty = false;
|
||||
@ -2758,12 +2882,11 @@ Monocle.Dimensions.Columns = function (pageDiv) {
|
||||
}
|
||||
|
||||
|
||||
function percentageThroughOfId(id) {
|
||||
var doc = p.page.m.activeFrame.contentDocument;
|
||||
var target = doc.getElementById(id);
|
||||
function percentageThroughOfNode(target) {
|
||||
if (!target) {
|
||||
return 0;
|
||||
}
|
||||
var doc = p.page.m.activeFrame.contentDocument;
|
||||
var offset = 0;
|
||||
if (target.getBoundingClientRect) {
|
||||
offset = target.getBoundingClientRect().left;
|
||||
@ -2785,20 +2908,30 @@ Monocle.Dimensions.Columns = function (pageDiv) {
|
||||
function componentChanged(evt) {
|
||||
if (evt.m['page'] != p.page) { return; }
|
||||
var doc = evt.m['document'];
|
||||
if (Monocle.Browser.has.columnOverflowPaintBug) {
|
||||
var div = doc.createElement('div');
|
||||
Monocle.Styles.applyRules(div, k.BODY_STYLES);
|
||||
div.style.cssText += "overflow: scroll !important;";
|
||||
while (doc.body.childNodes.length) {
|
||||
div.appendChild(doc.body.firstChild);
|
||||
}
|
||||
doc.body.appendChild(div);
|
||||
} else {
|
||||
Monocle.Styles.applyRules(doc.body, k.BODY_STYLES);
|
||||
|
||||
if (Monocle.Browser.is.WebKit) {
|
||||
doc.documentElement.style.overflow = 'hidden';
|
||||
}
|
||||
}
|
||||
|
||||
p.dirty = true;
|
||||
}
|
||||
|
||||
|
||||
function setColumnWidth() {
|
||||
var cw = p.page.m.sheafDiv.clientWidth;
|
||||
var doc = p.page.m.activeFrame.contentDocument;
|
||||
if (currBodyStyleValue('column-width') != cw+"px") {
|
||||
Monocle.Styles.affix(doc.body, 'column-width', cw+"px");
|
||||
Monocle.Styles.affix(columnedElement(), 'column-width', cw+"px");
|
||||
p.dirty = true;
|
||||
}
|
||||
}
|
||||
@ -2809,8 +2942,7 @@ Monocle.Dimensions.Columns = function (pageDiv) {
|
||||
return {
|
||||
width: sheaf.clientWidth,
|
||||
height: sheaf.clientHeight,
|
||||
scrollWidth: scrollerWidth(),
|
||||
fontSize: currBodyStyleValue('font-size')
|
||||
scrollWidth: scrollerWidth()
|
||||
}
|
||||
}
|
||||
|
||||
@ -2819,16 +2951,24 @@ Monocle.Dimensions.Columns = function (pageDiv) {
|
||||
if (Monocle.Browser.has.mustScrollSheaf) {
|
||||
return p.page.m.sheafDiv;
|
||||
} else {
|
||||
return p.page.m.activeFrame.contentDocument.body;
|
||||
return columnedElement();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
function columnedElement() {
|
||||
var elem = p.page.m.activeFrame.contentDocument.body;
|
||||
return Monocle.Browser.has.columnOverflowPaintBug ? elem.firstChild : elem;
|
||||
}
|
||||
|
||||
|
||||
function scrollerWidth() {
|
||||
var bdy = p.page.m.activeFrame.contentDocument.body;
|
||||
if (Monocle.Browser.has.iframeDoubleWidthBug) {
|
||||
if (Monocle.Browser.on.Android) {
|
||||
return bdy.scrollWidth * 1.5; // I actually have no idea why 1.5.
|
||||
if (Monocle.Browser.on.Kindle3) {
|
||||
return scrollerElement().scrollWidth;
|
||||
} else if (Monocle.Browser.on.Android) {
|
||||
return bdy.scrollWidth;
|
||||
} else if (Monocle.Browser.iOSVersion < "4.1") {
|
||||
var hbw = bdy.scrollWidth / 2;
|
||||
var sew = scrollerElement().scrollWidth;
|
||||
@ -2838,15 +2978,18 @@ Monocle.Dimensions.Columns = function (pageDiv) {
|
||||
var hbw = bdy.scrollWidth / 2;
|
||||
return hbw;
|
||||
}
|
||||
} else if (Monocle.Browser.is.Gecko) {
|
||||
var lc = bdy.lastChild;
|
||||
while (lc && lc.nodeType != 1) {
|
||||
lc = lc.previousSibling;
|
||||
}
|
||||
if (lc && lc.getBoundingClientRect) {
|
||||
return lc.getBoundingClientRect().right;
|
||||
} else if (bdy.getBoundingClientRect) {
|
||||
var elems = bdy.getElementsByTagName('*');
|
||||
var bdyRect = bdy.getBoundingClientRect();
|
||||
var l = bdyRect.left, r = bdyRect.right;
|
||||
for (var i = elems.length - 1; i >= 0; --i) {
|
||||
var rect = elems[i].getBoundingClientRect();
|
||||
l = Math.min(l, rect.left);
|
||||
r = Math.max(r, rect.right);
|
||||
}
|
||||
return Math.abs(l) + Math.abs(r);
|
||||
}
|
||||
|
||||
return scrollerElement().scrollWidth;
|
||||
}
|
||||
|
||||
@ -2867,8 +3010,14 @@ Monocle.Dimensions.Columns = function (pageDiv) {
|
||||
|
||||
function translateToLocus(locus) {
|
||||
var offset = locusToOffset(locus);
|
||||
p.page.m.offset = 0 - offset;
|
||||
if (k.SETX && !Monocle.Browser.has.columnOverflowPaintBug) {
|
||||
var bdy = p.page.m.activeFrame.contentDocument.body;
|
||||
Monocle.Styles.affix(bdy, "transform", "translateX("+offset+"px)");
|
||||
} else {
|
||||
var scrElem = scrollerElement();
|
||||
scrElem.scrollLeft = 0 - offset;
|
||||
}
|
||||
return offset;
|
||||
}
|
||||
|
||||
@ -2876,7 +3025,7 @@ Monocle.Dimensions.Columns = function (pageDiv) {
|
||||
API.hasChanged = hasChanged;
|
||||
API.measure = measure;
|
||||
API.pages = pages;
|
||||
API.percentageThroughOfId = percentageThroughOfId;
|
||||
API.percentageThroughOfNode = percentageThroughOfNode;
|
||||
|
||||
API.locusToOffset = locusToOffset;
|
||||
API.translateToLocus = translateToLocus;
|
||||
@ -2898,6 +3047,8 @@ Monocle.Dimensions.Columns.BODY_STYLES = {
|
||||
"column-fill": "auto"
|
||||
}
|
||||
|
||||
Monocle.Dimensions.Columns.SETX = true; // Set to false for scrollLeft.
|
||||
|
||||
if (Monocle.Browser.has.iframeDoubleWidthBug) {
|
||||
Monocle.Dimensions.Columns.BODY_STYLES["min-width"] = "200%";
|
||||
} else {
|
||||
@ -2924,6 +3075,8 @@ Monocle.Flippers.Slider = function (reader) {
|
||||
|
||||
function addPage(pageDiv) {
|
||||
pageDiv.m.dimensions = new Monocle.Dimensions.Columns(pageDiv);
|
||||
|
||||
Monocle.Styles.setX(pageDiv, "0px");
|
||||
}
|
||||
|
||||
|
||||
@ -2963,6 +3116,7 @@ Monocle.Flippers.Slider = function (reader) {
|
||||
|
||||
|
||||
function interactiveMode(bState) {
|
||||
p.reader.dispatchEvent('monocle:interactive:'+(bState ? 'on' : 'off'));
|
||||
if (!Monocle.Browser.has.selectThruBug) {
|
||||
return;
|
||||
}
|
||||
@ -2994,10 +3148,10 @@ Monocle.Flippers.Slider = function (reader) {
|
||||
|
||||
function moveTo(locus, callback) {
|
||||
var fn = function () {
|
||||
prepareNextPage(announceTurn);
|
||||
if (typeof callback == "function") {
|
||||
callback();
|
||||
}
|
||||
prepareNextPage(function () {
|
||||
if (typeof callback == "function") { callback(); }
|
||||
announceTurn();
|
||||
});
|
||||
}
|
||||
setPage(upperPage(), locus, fn);
|
||||
}
|
||||
@ -3045,12 +3199,26 @@ Monocle.Flippers.Slider = function (reader) {
|
||||
|
||||
if (dir == k.FORWARDS) {
|
||||
if (getPlace().onLastPageOfBook()) {
|
||||
p.reader.dispatchEvent(
|
||||
'monocle:boundaryend',
|
||||
{
|
||||
locus: getPlace().getLocus({ direction : dir }),
|
||||
page: upperPage()
|
||||
}
|
||||
);
|
||||
resetTurnData();
|
||||
return;
|
||||
}
|
||||
onGoingForward(boxPointX);
|
||||
} else if (dir == k.BACKWARDS) {
|
||||
if (getPlace().onFirstPageOfBook()) {
|
||||
p.reader.dispatchEvent(
|
||||
'monocle:boundarystart',
|
||||
{
|
||||
locus: getPlace().getLocus({ direction : dir }),
|
||||
page: upperPage()
|
||||
}
|
||||
);
|
||||
resetTurnData();
|
||||
return;
|
||||
}
|
||||
@ -3215,14 +3383,14 @@ Monocle.Flippers.Slider = function (reader) {
|
||||
|
||||
|
||||
function announceTurn() {
|
||||
hideWaitControl(upperPage());
|
||||
hideWaitControl(lowerPage());
|
||||
p.reader.dispatchEvent('monocle:turn');
|
||||
resetTurnData();
|
||||
}
|
||||
|
||||
|
||||
function resetTurnData() {
|
||||
hideWaitControl(upperPage());
|
||||
hideWaitControl(lowerPage());
|
||||
p.turnData = {};
|
||||
}
|
||||
|
||||
@ -3268,7 +3436,7 @@ Monocle.Flippers.Slider = function (reader) {
|
||||
(new Date()).getTime() - stamp > duration ||
|
||||
Math.abs(currX - finalX) <= Math.abs((currX + step) - finalX)
|
||||
) {
|
||||
clearTimeout(elem.setXTransitionInterval)
|
||||
clearTimeout(elem.setXTransitionInterval);
|
||||
Monocle.Styles.setX(elem, finalX);
|
||||
if (elem.setXTCB) {
|
||||
elem.setXTCB();
|
||||
@ -3366,13 +3534,17 @@ Monocle.Flippers.Slider = function (reader) {
|
||||
|
||||
function jumpIn(pageDiv, callback) {
|
||||
var dur = Monocle.Browser.has.jumpFlickerBug ? 1 : 0;
|
||||
Monocle.defer(function () {
|
||||
setX(pageDiv, 0, { duration: dur }, callback);
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
function jumpOut(pageDiv, callback) {
|
||||
var dur = Monocle.Browser.has.jumpFlickerBug ? 1 : 0;
|
||||
Monocle.defer(function () {
|
||||
setX(pageDiv, 0 - pageDiv.offsetWidth, { duration: dur }, callback);
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@ -3382,7 +3554,9 @@ Monocle.Flippers.Slider = function (reader) {
|
||||
duration: k.durations.SLIDE,
|
||||
timing: 'ease-in'
|
||||
};
|
||||
Monocle.defer(function () {
|
||||
setX(upperPage(), 0, slideOpts, callback);
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@ -3391,7 +3565,9 @@ Monocle.Flippers.Slider = function (reader) {
|
||||
duration: k.durations.SLIDE,
|
||||
timing: 'ease-in'
|
||||
};
|
||||
Monocle.defer(function () {
|
||||
setX(upperPage(), 0 - upperPage().offsetWidth, slideOpts, callback);
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@ -3418,13 +3594,13 @@ Monocle.Flippers.Slider = function (reader) {
|
||||
|
||||
function showWaitControl(page) {
|
||||
var ctrl = p.reader.dom.find('flippers_slider_wait', page.m.pageIndex);
|
||||
ctrl.style.opacity = 0.5;
|
||||
ctrl.style.visibility = "visible";
|
||||
}
|
||||
|
||||
|
||||
function hideWaitControl(page) {
|
||||
var ctrl = p.reader.dom.find('flippers_slider_wait', page.m.pageIndex);
|
||||
ctrl.style.opacity = 0;
|
||||
ctrl.style.visibility = "hidden";
|
||||
}
|
||||
|
||||
API.pageCount = p.pageCount;
|
||||
|
@ -292,13 +292,17 @@ maximum_resort_levels = 5
|
||||
generate_cover_title_font = None
|
||||
generate_cover_foot_font = None
|
||||
|
||||
#: Control behavior of double clicks on the book list
|
||||
# Behavior of doubleclick on the books list. Choices: open_viewer, do_nothing,
|
||||
#: Control behavior of the book list
|
||||
# You can control the behavior of doubleclicks on the books list.
|
||||
# Choices: open_viewer, do_nothing,
|
||||
# edit_cell, edit_metadata. Selecting edit_metadata has the side effect of
|
||||
# disabling editing a field using a single click.
|
||||
# Default: open_viewer.
|
||||
# Example: doubleclick_on_library_view = 'do_nothing'
|
||||
# You can also control whether the book list scrolls horizontal per column or
|
||||
# per pixel. Default is per column.
|
||||
doubleclick_on_library_view = 'open_viewer'
|
||||
horizontal_scrolling_per_column = True
|
||||
|
||||
#: Language to use when sorting.
|
||||
# Setting this tweak will force sorting to use the
|
||||
|
Before Width: | Height: | Size: 25 KiB After Width: | Height: | Size: 25 KiB |
BIN
resources/images/plugins/mobileread.png
Normal file
After Width: | Height: | Size: 641 B |
BIN
resources/images/plugins/plugin_deprecated.png
Normal file
After Width: | Height: | Size: 9.7 KiB |
BIN
resources/images/plugins/plugin_disabled_invalid.png
Normal file
After Width: | Height: | Size: 12 KiB |
BIN
resources/images/plugins/plugin_disabled_ok.png
Normal file
After Width: | Height: | Size: 7.6 KiB |
BIN
resources/images/plugins/plugin_disabled_valid.png
Normal file
After Width: | Height: | Size: 10 KiB |
BIN
resources/images/plugins/plugin_new.png
Normal file
After Width: | Height: | Size: 8.7 KiB |
BIN
resources/images/plugins/plugin_new_invalid.png
Normal file
After Width: | Height: | Size: 13 KiB |
BIN
resources/images/plugins/plugin_new_valid.png
Normal file
After Width: | Height: | Size: 11 KiB |
BIN
resources/images/plugins/plugin_updater.png
Normal file
After Width: | Height: | Size: 18 KiB |
BIN
resources/images/plugins/plugin_updater_updates.png
Normal file
After Width: | Height: | Size: 15 KiB |
BIN
resources/images/plugins/plugin_upgrade_invalid.png
Normal file
After Width: | Height: | Size: 15 KiB |
BIN
resources/images/plugins/plugin_upgrade_ok.png
Normal file
After Width: | Height: | Size: 12 KiB |
BIN
resources/images/plugins/plugin_upgrade_valid.png
Normal file
After Width: | Height: | Size: 14 KiB |
@ -1,6 +1,7 @@
|
||||
CREATE TABLE authors ( id INTEGER PRIMARY KEY,
|
||||
name TEXT NOT NULL COLLATE NOCASE,
|
||||
sort TEXT COLLATE NOCASE,
|
||||
link TEXT NOT NULL DEFAULT "",
|
||||
UNIQUE(name)
|
||||
);
|
||||
CREATE TABLE books ( id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
@ -13,8 +14,10 @@ CREATE TABLE books ( id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
isbn TEXT DEFAULT "" COLLATE NOCASE,
|
||||
lccn TEXT DEFAULT "" COLLATE NOCASE,
|
||||
path TEXT NOT NULL DEFAULT "",
|
||||
flags INTEGER NOT NULL DEFAULT 1
|
||||
, uuid TEXT, has_cover BOOL DEFAULT 0, last_modified TIMESTAMP NOT NULL DEFAULT "2000-01-01 00:00:00+00:00");
|
||||
flags INTEGER NOT NULL DEFAULT 1,
|
||||
uuid TEXT,
|
||||
has_cover BOOL DEFAULT 0,
|
||||
last_modified TIMESTAMP NOT NULL DEFAULT "2000-01-01 00:00:00+00:00");
|
||||
CREATE TABLE books_authors_link ( id INTEGER PRIMARY KEY,
|
||||
book INTEGER NOT NULL,
|
||||
author INTEGER NOT NULL,
|
||||
@ -543,4 +546,4 @@ CREATE TRIGGER series_update_trg
|
||||
BEGIN
|
||||
UPDATE series SET sort=NEW.name WHERE id=NEW.id;
|
||||
END;
|
||||
pragma user_version=20;
|
||||
pragma user_version=21;
|
||||
|
@ -1,5 +1,5 @@
|
||||
" Project wide builtins
|
||||
let g:pyflakes_builtins += ["dynamic_property", "__", "P", "I", "lopen", "icu_lower", "icu_upper", "icu_title"]
|
||||
let g:pyflakes_builtins += ["dynamic_property", "__", "P", "I", "lopen", "icu_lower", "icu_upper", "icu_title", "ngettext"]
|
||||
|
||||
python << EOFPY
|
||||
import os
|
||||
|
@ -64,7 +64,7 @@ class Check(Command):
|
||||
description = 'Check for errors in the calibre source code'
|
||||
|
||||
BUILTINS = ['_', '__', 'dynamic_property', 'I', 'P', 'lopen', 'icu_lower',
|
||||
'icu_upper', 'icu_title']
|
||||
'icu_upper', 'icu_title', 'ngettext']
|
||||
CACHE = '.check-cache.pickle'
|
||||
|
||||
def get_files(self, cache):
|
||||
|
@ -53,6 +53,13 @@ SQLite
|
||||
|
||||
Put sqlite3*.h from the sqlite windows amlgamation in ~/sw/include
|
||||
|
||||
APSW
|
||||
-----
|
||||
|
||||
Download source from http://code.google.com/p/apsw/downloads/list and run in visual studio prompt
|
||||
|
||||
python setup.py fetch --all build --missing-checksum-ok --enable-all-extensions install test
|
||||
|
||||
OpenSSL
|
||||
--------
|
||||
|
||||
|
@ -95,6 +95,11 @@ void launch_calibre(LPCTSTR exe, LPCTSTR config_dir, LPCTSTR library_dir) {
|
||||
ExitProcess(1);
|
||||
}
|
||||
|
||||
if (! SetEnvironmentVariable(TEXT("CALIBRE_PORTABLE_BUILD"), exe)) {
|
||||
show_last_error(TEXT("Failed to set environment variables"));
|
||||
ExitProcess(1);
|
||||
}
|
||||
|
||||
dwFlags = CREATE_UNICODE_ENVIRONMENT | CREATE_NEW_PROCESS_GROUP;
|
||||
_sntprintf_s(cmdline, BUFSIZE, _TRUNCATE, TEXT(" \"--with-library=%s\""), library_dir);
|
||||
|
||||
|
@ -1,646 +0,0 @@
|
||||
#! /usr/bin/env python
|
||||
# Originally written by Barry Warsaw <barry@zope.com>
|
||||
#
|
||||
# Minimally patched to make it even more xgettext compatible
|
||||
# by Peter Funk <pf@artcom-gmbh.de>
|
||||
#
|
||||
# 2002-11-22 Jrgen Hermann <jh@web.de>
|
||||
# Added checks that _() only contains string literals, and
|
||||
# command line args are resolved to module lists, i.e. you
|
||||
# can now pass a filename, a module or package name, or a
|
||||
# directory (including globbing chars, important for Win32).
|
||||
# Made docstring fit in 80 chars wide displays using pydoc.
|
||||
#
|
||||
|
||||
__doc__ = """pygettext -- Python equivalent of xgettext(1)
|
||||
|
||||
Many systems (Solaris, Linux, Gnu) provide extensive tools that ease the
|
||||
internationalization of C programs. Most of these tools are independent of
|
||||
the programming language and can be used from within Python programs.
|
||||
Martin von Loewis' work[1] helps considerably in this regard.
|
||||
|
||||
There's one problem though; xgettext is the program that scans source code
|
||||
looking for message strings, but it groks only C (or C++). Python
|
||||
introduces a few wrinkles, such as dual quoting characters, triple quoted
|
||||
strings, and raw strings. xgettext understands none of this.
|
||||
|
||||
Enter pygettext, which uses Python's standard tokenize module to scan
|
||||
Python source code, generating .pot files identical to what GNU xgettext[2]
|
||||
generates for C and C++ code. From there, the standard GNU tools can be
|
||||
used.
|
||||
|
||||
A word about marking Python strings as candidates for translation. GNU
|
||||
xgettext recognizes the following keywords: gettext, dgettext, dcgettext,
|
||||
and gettext_noop. But those can be a lot of text to include all over your
|
||||
code. C and C++ have a trick: they use the C preprocessor. Most
|
||||
internationalized C source includes a #define for gettext() to _() so that
|
||||
what has to be written in the source is much less. Thus these are both
|
||||
translatable strings:
|
||||
|
||||
gettext("Translatable String")
|
||||
_("Translatable String")
|
||||
|
||||
Python of course has no preprocessor so this doesn't work so well. Thus,
|
||||
pygettext searches only for _() by default, but see the -k/--keyword flag
|
||||
below for how to augment this.
|
||||
|
||||
[1] http://www.python.org/workshops/1997-10/proceedings/loewis.html
|
||||
[2] http://www.gnu.org/software/gettext/gettext.html
|
||||
|
||||
NOTE: pygettext attempts to be option and feature compatible with GNU
|
||||
xgettext where ever possible. However some options are still missing or are
|
||||
not fully implemented. Also, xgettext's use of command line switches with
|
||||
option arguments is broken, and in these cases, pygettext just defines
|
||||
additional switches.
|
||||
|
||||
Usage: pygettext [options] inputfile ...
|
||||
|
||||
Options:
|
||||
|
||||
-a
|
||||
--extract-all
|
||||
Extract all strings.
|
||||
|
||||
-d name
|
||||
--default-domain=name
|
||||
Rename the default output file from messages.pot to name.pot.
|
||||
|
||||
-E
|
||||
--escape
|
||||
Replace non-ASCII characters with octal escape sequences.
|
||||
|
||||
-D
|
||||
--docstrings
|
||||
Extract module, class, method, and function docstrings. These do
|
||||
not need to be wrapped in _() markers, and in fact cannot be for
|
||||
Python to consider them docstrings. (See also the -X option).
|
||||
|
||||
-h
|
||||
--help
|
||||
Print this help message and exit.
|
||||
|
||||
-k word
|
||||
--keyword=word
|
||||
Keywords to look for in addition to the default set, which are:
|
||||
%(DEFAULTKEYWORDS)s
|
||||
|
||||
You can have multiple -k flags on the command line.
|
||||
|
||||
-K
|
||||
--no-default-keywords
|
||||
Disable the default set of keywords (see above). Any keywords
|
||||
explicitly added with the -k/--keyword option are still recognized.
|
||||
|
||||
--no-location
|
||||
Do not write filename/lineno location comments.
|
||||
|
||||
-n
|
||||
--add-location
|
||||
Write filename/lineno location comments indicating where each
|
||||
extracted string is found in the source. These lines appear before
|
||||
each msgid. The style of comments is controlled by the -S/--style
|
||||
option. This is the default.
|
||||
|
||||
-o filename
|
||||
--output=filename
|
||||
Rename the default output file from messages.pot to filename. If
|
||||
filename is `-' then the output is sent to standard out.
|
||||
|
||||
-p dir
|
||||
--output-dir=dir
|
||||
Output files will be placed in directory dir.
|
||||
|
||||
-S stylename
|
||||
--style stylename
|
||||
Specify which style to use for location comments. Two styles are
|
||||
supported:
|
||||
|
||||
Solaris # File: filename, line: line-number
|
||||
GNU #: filename:line
|
||||
|
||||
The style name is case insensitive. GNU style is the default.
|
||||
|
||||
-v
|
||||
--verbose
|
||||
Print the names of the files being processed.
|
||||
|
||||
-V
|
||||
--version
|
||||
Print the version of pygettext and exit.
|
||||
|
||||
-w columns
|
||||
--width=columns
|
||||
Set width of output to columns.
|
||||
|
||||
-x filename
|
||||
--exclude-file=filename
|
||||
Specify a file that contains a list of strings that are not be
|
||||
extracted from the input files. Each string to be excluded must
|
||||
appear on a line by itself in the file.
|
||||
|
||||
-X filename
|
||||
--no-docstrings=filename
|
||||
Specify a file that contains a list of files (one per line) that
|
||||
should not have their docstrings extracted. This is only useful in
|
||||
conjunction with the -D option above.
|
||||
|
||||
If `inputfile' is -, standard input is read.
|
||||
"""
|
||||
|
||||
import os
|
||||
import imp
|
||||
import sys
|
||||
import glob
|
||||
import time
|
||||
import getopt
|
||||
import token
|
||||
import tokenize
|
||||
import operator
|
||||
|
||||
__version__ = '1.5'
|
||||
|
||||
default_keywords = ['_']
|
||||
DEFAULTKEYWORDS = ', '.join(default_keywords)
|
||||
|
||||
EMPTYSTRING = ''
|
||||
|
||||
from setup import __appname__, __version__ as version
|
||||
|
||||
# The normal pot-file header. msgmerge and Emacs's po-mode work better if it's
|
||||
# there.
|
||||
pot_header = '''\
|
||||
# Translation template file..
|
||||
# Copyright (C) %(year)s Kovid Goyal
|
||||
# Kovid Goyal <kovid@kovidgoyal.net>, %(year)s.
|
||||
#
|
||||
msgid ""
|
||||
msgstr ""
|
||||
"Project-Id-Version: %(appname)s %(version)s\\n"
|
||||
"POT-Creation-Date: %%(time)s\\n"
|
||||
"PO-Revision-Date: %%(time)s\\n"
|
||||
"Last-Translator: Automatically generated\\n"
|
||||
"Language-Team: LANGUAGE\\n"
|
||||
"MIME-Version: 1.0\\n"
|
||||
"Content-Type: text/plain; charset=UTF-8\\n"
|
||||
"Content-Transfer-Encoding: 8bit\\n"
|
||||
"Generated-By: pygettext.py %%(version)s\\n"
|
||||
|
||||
'''%dict(appname=__appname__, version=version, year=time.strftime('%Y'))
|
||||
|
||||
def usage(code, msg=''):
|
||||
print >> sys.stderr, __doc__ % globals()
|
||||
if msg:
|
||||
print >> sys.stderr, msg
|
||||
sys.exit(code)
|
||||
|
||||
|
||||
|
||||
escapes = []
|
||||
|
||||
def make_escapes(pass_iso8859):
|
||||
global escapes
|
||||
if pass_iso8859:
|
||||
# Allow iso-8859 characters to pass through so that e.g. 'msgid
|
||||
# would result not result in 'msgid "H\366he"'. Otherwise we
|
||||
# escape any character outside the 32..126 range.
|
||||
mod = 128
|
||||
else:
|
||||
mod = 256
|
||||
for i in range(256):
|
||||
if 32 <= (i % mod) <= 126:
|
||||
escapes.append(chr(i))
|
||||
else:
|
||||
escapes.append("\\%03o" % i)
|
||||
escapes[ord('\\')] = '\\\\'
|
||||
escapes[ord('\t')] = '\\t'
|
||||
escapes[ord('\r')] = '\\r'
|
||||
escapes[ord('\n')] = '\\n'
|
||||
escapes[ord('\"')] = '\\"'
|
||||
|
||||
|
||||
def escape(s):
|
||||
global escapes
|
||||
s = list(s)
|
||||
for i in range(len(s)):
|
||||
s[i] = escapes[ord(s[i])]
|
||||
return EMPTYSTRING.join(s)
|
||||
|
||||
|
||||
def safe_eval(s):
|
||||
# unwrap quotes, safely
|
||||
return eval(s, {'__builtins__':{}}, {})
|
||||
|
||||
|
||||
def normalize(s):
|
||||
# This converts the various Python string types into a format that is
|
||||
# appropriate for .po files, namely much closer to C style.
|
||||
lines = s.split('\n')
|
||||
if len(lines) == 1:
|
||||
s = '"' + escape(s) + '"'
|
||||
else:
|
||||
if not lines[-1]:
|
||||
del lines[-1]
|
||||
lines[-1] = lines[-1] + '\n'
|
||||
for i in range(len(lines)):
|
||||
lines[i] = escape(lines[i])
|
||||
lineterm = '\\n"\n"'
|
||||
s = '""\n"' + lineterm.join(lines) + '"'
|
||||
return s
|
||||
|
||||
|
||||
def containsAny(str, set):
|
||||
"""Check whether 'str' contains ANY of the chars in 'set'"""
|
||||
return 1 in [c in str for c in set]
|
||||
|
||||
|
||||
def _visit_pyfiles(list, dirname, names):
|
||||
"""Helper for getFilesForName()."""
|
||||
# get extension for python source files
|
||||
if not globals().has_key('_py_ext'):
|
||||
global _py_ext
|
||||
_py_ext = [triple[0] for triple in imp.get_suffixes()
|
||||
if triple[2] == imp.PY_SOURCE][0]
|
||||
|
||||
# don't recurse into CVS directories
|
||||
if 'CVS' in names:
|
||||
names.remove('CVS')
|
||||
|
||||
# add all *.py files to list
|
||||
list.extend(
|
||||
[os.path.join(dirname, file) for file in names
|
||||
if os.path.splitext(file)[1] == _py_ext]
|
||||
)
|
||||
|
||||
|
||||
def _get_modpkg_path(dotted_name, pathlist=None):
|
||||
"""Get the filesystem path for a module or a package.
|
||||
|
||||
Return the file system path to a file for a module, and to a directory for
|
||||
a package. Return None if the name is not found, or is a builtin or
|
||||
extension module.
|
||||
"""
|
||||
# split off top-most name
|
||||
parts = dotted_name.split('.', 1)
|
||||
|
||||
if len(parts) > 1:
|
||||
# we have a dotted path, import top-level package
|
||||
try:
|
||||
file, pathname, description = imp.find_module(parts[0], pathlist)
|
||||
if file: file.close()
|
||||
except ImportError:
|
||||
return None
|
||||
|
||||
# check if it's indeed a package
|
||||
if description[2] == imp.PKG_DIRECTORY:
|
||||
# recursively handle the remaining name parts
|
||||
pathname = _get_modpkg_path(parts[1], [pathname])
|
||||
else:
|
||||
pathname = None
|
||||
else:
|
||||
# plain name
|
||||
try:
|
||||
file, pathname, description = imp.find_module(
|
||||
dotted_name, pathlist)
|
||||
if file:
|
||||
file.close()
|
||||
if description[2] not in [imp.PY_SOURCE, imp.PKG_DIRECTORY]:
|
||||
pathname = None
|
||||
except ImportError:
|
||||
pathname = None
|
||||
|
||||
return pathname
|
||||
|
||||
|
||||
def getFilesForName(name):
|
||||
"""Get a list of module files for a filename, a module or package name,
|
||||
or a directory.
|
||||
"""
|
||||
if not os.path.exists(name):
|
||||
# check for glob chars
|
||||
if containsAny(name, "*?[]"):
|
||||
files = glob.glob(name)
|
||||
list = []
|
||||
for file in files:
|
||||
list.extend(getFilesForName(file))
|
||||
return list
|
||||
|
||||
# try to find module or package
|
||||
name = _get_modpkg_path(name)
|
||||
if not name:
|
||||
return []
|
||||
|
||||
if os.path.isdir(name):
|
||||
# find all python files in directory
|
||||
list = []
|
||||
os.path.walk(name, _visit_pyfiles, list)
|
||||
return list
|
||||
elif os.path.exists(name):
|
||||
# a single file
|
||||
return [name]
|
||||
|
||||
return []
|
||||
|
||||
|
||||
class TokenEater:
|
||||
def __init__(self, options):
|
||||
self.__options = options
|
||||
self.__messages = {}
|
||||
self.__state = self.__waiting
|
||||
self.__data = []
|
||||
self.__lineno = -1
|
||||
self.__freshmodule = 1
|
||||
self.__curfile = None
|
||||
|
||||
def __call__(self, ttype, tstring, stup, etup, line):
|
||||
# dispatch
|
||||
## import token
|
||||
## print >> sys.stderr, 'ttype:', token.tok_name[ttype], \
|
||||
## 'tstring:', tstring
|
||||
self.__state(ttype, tstring, stup[0])
|
||||
|
||||
def __waiting(self, ttype, tstring, lineno):
|
||||
opts = self.__options
|
||||
# Do docstring extractions, if enabled
|
||||
if opts.docstrings and not opts.nodocstrings.get(self.__curfile):
|
||||
# module docstring?
|
||||
if self.__freshmodule:
|
||||
if ttype == tokenize.STRING:
|
||||
self.__addentry(safe_eval(tstring), lineno, isdocstring=1)
|
||||
self.__freshmodule = 0
|
||||
elif ttype not in (tokenize.COMMENT, tokenize.NL):
|
||||
self.__freshmodule = 0
|
||||
return
|
||||
# class docstring?
|
||||
if ttype == tokenize.NAME and tstring in ('class', 'def'):
|
||||
self.__state = self.__suiteseen
|
||||
return
|
||||
if ttype == tokenize.NAME and tstring in opts.keywords:
|
||||
self.__state = self.__keywordseen
|
||||
|
||||
def __suiteseen(self, ttype, tstring, lineno):
|
||||
# ignore anything until we see the colon
|
||||
if ttype == tokenize.OP and tstring == ':':
|
||||
self.__state = self.__suitedocstring
|
||||
|
||||
def __suitedocstring(self, ttype, tstring, lineno):
|
||||
# ignore any intervening noise
|
||||
if ttype == tokenize.STRING:
|
||||
self.__addentry(safe_eval(tstring), lineno, isdocstring=1)
|
||||
self.__state = self.__waiting
|
||||
elif ttype not in (tokenize.NEWLINE, tokenize.INDENT,
|
||||
tokenize.COMMENT):
|
||||
# there was no class docstring
|
||||
self.__state = self.__waiting
|
||||
|
||||
def __keywordseen(self, ttype, tstring, lineno):
|
||||
if ttype == tokenize.OP and tstring == '(':
|
||||
self.__data = []
|
||||
self.__lineno = lineno
|
||||
self.__state = self.__openseen
|
||||
else:
|
||||
self.__state = self.__waiting
|
||||
|
||||
def __openseen(self, ttype, tstring, lineno):
|
||||
if ttype == tokenize.OP and tstring == ')':
|
||||
# We've seen the last of the translatable strings. Record the
|
||||
# line number of the first line of the strings and update the list
|
||||
# of messages seen. Reset state for the next batch. If there
|
||||
# were no strings inside _(), then just ignore this entry.
|
||||
if self.__data:
|
||||
self.__addentry(EMPTYSTRING.join(self.__data))
|
||||
self.__state = self.__waiting
|
||||
elif ttype == tokenize.STRING:
|
||||
self.__data.append(safe_eval(tstring))
|
||||
elif ttype not in [tokenize.COMMENT, token.INDENT, token.DEDENT,
|
||||
token.NEWLINE, tokenize.NL]:
|
||||
# warn if we see anything else than STRING or whitespace
|
||||
print >> sys.stderr, \
|
||||
'*** %(file)s:%(lineno)s: Seen unexpected token "%(token)s"'\
|
||||
% {
|
||||
'token': tstring,
|
||||
'file': self.__curfile,
|
||||
'lineno': self.__lineno
|
||||
}
|
||||
self.__state = self.__waiting
|
||||
|
||||
def __addentry(self, msg, lineno=None, isdocstring=0):
|
||||
if lineno is None:
|
||||
lineno = self.__lineno
|
||||
if not msg in self.__options.toexclude:
|
||||
entry = (self.__curfile, lineno)
|
||||
self.__messages.setdefault(msg, {})[entry] = isdocstring
|
||||
|
||||
def set_filename(self, filename):
|
||||
self.__curfile = filename
|
||||
self.__freshmodule = 1
|
||||
|
||||
def write(self, fp):
|
||||
options = self.__options
|
||||
timestamp = time.strftime('%Y-%m-%d %H:%M+%Z')
|
||||
# The time stamp in the header doesn't have the same format as that
|
||||
# generated by xgettext...
|
||||
print >> fp, pot_header % {'time': timestamp, 'version': __version__}
|
||||
# Sort the entries. First sort each particular entry's keys, then
|
||||
# sort all the entries by their first item.
|
||||
reverse = {}
|
||||
for k, v in self.__messages.items():
|
||||
keys = v.keys()
|
||||
keys.sort()
|
||||
reverse.setdefault(tuple(keys), []).append((k, v))
|
||||
rkeys = reverse.keys()
|
||||
rkeys.sort()
|
||||
for rkey in rkeys:
|
||||
rentries = reverse[rkey]
|
||||
rentries.sort()
|
||||
for k, v in rentries:
|
||||
isdocstring = 0
|
||||
# If the entry was gleaned out of a docstring, then add a
|
||||
# comment stating so. This is to aid translators who may wish
|
||||
# to skip translating some unimportant docstrings.
|
||||
if reduce(operator.__add__, v.values()):
|
||||
isdocstring = 1
|
||||
# k is the message string, v is a dictionary-set of (filename,
|
||||
# lineno) tuples. We want to sort the entries in v first by
|
||||
# file name and then by line number.
|
||||
v = v.keys()
|
||||
v.sort()
|
||||
if not options.writelocations:
|
||||
pass
|
||||
# location comments are different b/w Solaris and GNU:
|
||||
elif options.locationstyle == options.SOLARIS:
|
||||
for filename, lineno in v:
|
||||
d = {'filename': filename, 'lineno': lineno}
|
||||
print >>fp, \
|
||||
'# File: %(filename)s, line: %(lineno)d' % d
|
||||
elif options.locationstyle == options.GNU:
|
||||
# fit as many locations on one line, as long as the
|
||||
# resulting line length doesn't exceeds 'options.width'
|
||||
locline = '#:'
|
||||
for filename, lineno in v:
|
||||
d = {'filename': filename, 'lineno': lineno}
|
||||
s = ' %(filename)s:%(lineno)d' % d
|
||||
if len(locline) + len(s) <= options.width:
|
||||
locline = locline + s
|
||||
else:
|
||||
print >> fp, locline
|
||||
locline = "#:" + s
|
||||
if len(locline) > 2:
|
||||
print >> fp, locline
|
||||
if isdocstring:
|
||||
print >> fp, '#, docstring'
|
||||
print >> fp, 'msgid', normalize(k)
|
||||
print >> fp, 'msgstr ""\n'
|
||||
|
||||
|
||||
|
||||
def main(outfile, args=sys.argv[1:]):
|
||||
global default_keywords
|
||||
try:
|
||||
opts, args = getopt.getopt(
|
||||
args,
|
||||
'ad:DEhk:Kno:p:S:Vvw:x:X:',
|
||||
['extract-all', 'default-domain=', 'escape', 'help',
|
||||
'keyword=', 'no-default-keywords',
|
||||
'add-location', 'no-location', 'output=', 'output-dir=',
|
||||
'style=', 'verbose', 'version', 'width=', 'exclude-file=',
|
||||
'docstrings', 'no-docstrings',
|
||||
])
|
||||
except getopt.error, msg:
|
||||
usage(1, msg)
|
||||
|
||||
# for holding option values
|
||||
class Options:
|
||||
# constants
|
||||
GNU = 1
|
||||
SOLARIS = 2
|
||||
# defaults
|
||||
extractall = 0 # FIXME: currently this option has no effect at all.
|
||||
escape = 0
|
||||
keywords = []
|
||||
outpath = ''
|
||||
outfile = 'messages.pot'
|
||||
writelocations = 1
|
||||
locationstyle = GNU
|
||||
verbose = 0
|
||||
width = 78
|
||||
excludefilename = ''
|
||||
docstrings = 0
|
||||
nodocstrings = {}
|
||||
|
||||
options = Options()
|
||||
locations = {'gnu' : options.GNU,
|
||||
'solaris' : options.SOLARIS,
|
||||
}
|
||||
|
||||
# parse options
|
||||
for opt, arg in opts:
|
||||
if opt in ('-h', '--help'):
|
||||
usage(0)
|
||||
elif opt in ('-a', '--extract-all'):
|
||||
options.extractall = 1
|
||||
elif opt in ('-d', '--default-domain'):
|
||||
options.outfile = arg + '.pot'
|
||||
elif opt in ('-E', '--escape'):
|
||||
options.escape = 1
|
||||
elif opt in ('-D', '--docstrings'):
|
||||
options.docstrings = 1
|
||||
elif opt in ('-k', '--keyword'):
|
||||
options.keywords.append(arg)
|
||||
elif opt in ('-K', '--no-default-keywords'):
|
||||
default_keywords = []
|
||||
elif opt in ('-n', '--add-location'):
|
||||
options.writelocations = 1
|
||||
elif opt in ('--no-location',):
|
||||
options.writelocations = 0
|
||||
elif opt in ('-S', '--style'):
|
||||
options.locationstyle = locations.get(arg.lower())
|
||||
if options.locationstyle is None:
|
||||
usage(1, ('Invalid value for --style: %s') % arg)
|
||||
elif opt in ('-o', '--output'):
|
||||
options.outfile = arg
|
||||
elif opt in ('-p', '--output-dir'):
|
||||
options.outpath = arg
|
||||
elif opt in ('-v', '--verbose'):
|
||||
options.verbose = 1
|
||||
elif opt in ('-V', '--version'):
|
||||
print ('pygettext.py (xgettext for Python) %s') % __version__
|
||||
sys.exit(0)
|
||||
elif opt in ('-w', '--width'):
|
||||
try:
|
||||
options.width = int(arg)
|
||||
except ValueError:
|
||||
usage(1, ('--width argument must be an integer: %s') % arg)
|
||||
elif opt in ('-x', '--exclude-file'):
|
||||
options.excludefilename = arg
|
||||
elif opt in ('-X', '--no-docstrings'):
|
||||
fp = open(arg)
|
||||
try:
|
||||
while 1:
|
||||
line = fp.readline()
|
||||
if not line:
|
||||
break
|
||||
options.nodocstrings[line[:-1]] = 1
|
||||
finally:
|
||||
fp.close()
|
||||
|
||||
# calculate escapes
|
||||
make_escapes(options.escape)
|
||||
|
||||
# calculate all keywords
|
||||
options.keywords.extend(default_keywords)
|
||||
|
||||
# initialize list of strings to exclude
|
||||
if options.excludefilename:
|
||||
try:
|
||||
fp = open(options.excludefilename)
|
||||
options.toexclude = fp.readlines()
|
||||
fp.close()
|
||||
except IOError:
|
||||
print >> sys.stderr, (
|
||||
"Can't read --exclude-file: %s") % options.excludefilename
|
||||
sys.exit(1)
|
||||
else:
|
||||
options.toexclude = []
|
||||
|
||||
# resolve args to module lists
|
||||
expanded = []
|
||||
for arg in args:
|
||||
if arg == '-':
|
||||
expanded.append(arg)
|
||||
else:
|
||||
expanded.extend(getFilesForName(arg))
|
||||
args = expanded
|
||||
|
||||
# slurp through all the files
|
||||
eater = TokenEater(options)
|
||||
for filename in args:
|
||||
if filename == '-':
|
||||
if options.verbose:
|
||||
print ('Reading standard input')
|
||||
fp = sys.stdin
|
||||
closep = 0
|
||||
else:
|
||||
if options.verbose:
|
||||
print ('Working on %s') % filename
|
||||
fp = open(filename)
|
||||
closep = 1
|
||||
try:
|
||||
eater.set_filename(filename)
|
||||
try:
|
||||
tokenize.tokenize(fp.readline, eater)
|
||||
except tokenize.TokenError, e:
|
||||
print >> sys.stderr, '%s: %s, line %d, column %d' % (
|
||||
e[0], filename, e[1][0], e[1][1])
|
||||
except IndentationError, e:
|
||||
print >> sys.stderr, '%s: %s, line %s, column %s' % (
|
||||
e[0], filename, e.lineno, e[1][1])
|
||||
|
||||
finally:
|
||||
if closep:
|
||||
fp.close()
|
||||
|
||||
# write the output
|
||||
eater.write(outfile)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main(sys.stdout)
|
@ -6,11 +6,10 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import os, cStringIO, tempfile, shutil, atexit, subprocess, glob, re
|
||||
import os, tempfile, shutil, subprocess, glob, re, time, textwrap
|
||||
from distutils import sysconfig
|
||||
|
||||
from setup import Command, __appname__
|
||||
from setup.pygettext import main as pygettext
|
||||
from setup import Command, __appname__, __version__
|
||||
from setup.build_environment import pyqt
|
||||
|
||||
class POT(Command):
|
||||
@ -60,13 +59,44 @@ class POT(Command):
|
||||
|
||||
|
||||
def run(self, opts):
|
||||
pot_header = textwrap.dedent('''\
|
||||
# Translation template file..
|
||||
# Copyright (C) %(year)s Kovid Goyal
|
||||
# Kovid Goyal <kovid@kovidgoyal.net>, %(year)s.
|
||||
#
|
||||
msgid ""
|
||||
msgstr ""
|
||||
"Project-Id-Version: %(appname)s %(version)s\\n"
|
||||
"POT-Creation-Date: %(time)s\\n"
|
||||
"PO-Revision-Date: %(time)s\\n"
|
||||
"Last-Translator: Automatically generated\\n"
|
||||
"Language-Team: LANGUAGE\\n"
|
||||
"MIME-Version: 1.0\\n"
|
||||
"Report-Msgid-Bugs-To: https://bugs.launchpad.net/calibre\\n"
|
||||
"Plural-Forms: nplurals=INTEGER; plural=EXPRESSION;\\n"
|
||||
"Content-Type: text/plain; charset=UTF-8\\n"
|
||||
"Content-Transfer-Encoding: 8bit\\n"
|
||||
|
||||
''')%dict(appname=__appname__, version=__version__,
|
||||
year=time.strftime('%Y'),
|
||||
time=time.strftime('%Y-%m-%d %H:%M+%Z'))
|
||||
|
||||
files = self.source_files()
|
||||
buf = cStringIO.StringIO()
|
||||
with tempfile.NamedTemporaryFile() as fl:
|
||||
fl.write('\n'.join(files))
|
||||
fl.flush()
|
||||
out = tempfile.NamedTemporaryFile(suffix='.pot', delete=False)
|
||||
out.close()
|
||||
self.info('Creating translations template...')
|
||||
tempdir = tempfile.mkdtemp()
|
||||
atexit.register(shutil.rmtree, tempdir)
|
||||
pygettext(buf, ['-k', '__', '-p', tempdir]+files)
|
||||
src = buf.getvalue()
|
||||
subprocess.check_call(['xgettext', '-f', fl.name,
|
||||
'--default-domain=calibre', '-o', out.name, '-L', 'Python',
|
||||
'--from-code=UTF-8', '--sort-by-file', '--omit-header',
|
||||
'--no-wrap', '-k__',
|
||||
])
|
||||
with open(out.name, 'rb') as f:
|
||||
src = f.read()
|
||||
os.remove(out.name)
|
||||
src = pot_header + '\n' + src
|
||||
src += '\n\n' + self.get_tweaks_docs()
|
||||
pot = os.path.join(self.PATH, __appname__+'.pot')
|
||||
with open(pot, 'wb') as f:
|
||||
|
@ -106,10 +106,12 @@ def sanitize_file_name(name, substitute='_', as_unicode=False):
|
||||
name = name.encode(filesystem_encoding, 'ignore')
|
||||
one = _filename_sanitize.sub(substitute, name)
|
||||
one = re.sub(r'\s', ' ', one).strip()
|
||||
one = re.sub(r'^\.+$', '_', one)
|
||||
bname, ext = os.path.splitext(one)
|
||||
one = re.sub(r'^\.+$', '_', bname)
|
||||
if as_unicode:
|
||||
one = one.decode(filesystem_encoding)
|
||||
one = one.replace('..', substitute)
|
||||
one += ext
|
||||
# Windows doesn't like path components that end with a period
|
||||
if one and one[-1] in ('.', ' '):
|
||||
one = one[:-1]+'_'
|
||||
@ -132,8 +134,10 @@ def sanitize_file_name_unicode(name, substitute='_'):
|
||||
name]
|
||||
one = u''.join(chars)
|
||||
one = re.sub(r'\s', ' ', one).strip()
|
||||
one = re.sub(r'^\.+$', '_', one)
|
||||
bname, ext = os.path.splitext(one)
|
||||
one = re.sub(r'^\.+$', '_', bname)
|
||||
one = one.replace('..', substitute)
|
||||
one += ext
|
||||
# Windows doesn't like path components that end with a period or space
|
||||
if one and one[-1] in ('.', ' '):
|
||||
one = one[:-1]+'_'
|
||||
@ -578,6 +582,7 @@ def url_slash_cleaner(url):
|
||||
def get_download_filename(url, cookie_file=None):
|
||||
'''
|
||||
Get a local filename for a URL using the content disposition header
|
||||
Returns empty string if no content disposition header present
|
||||
'''
|
||||
from contextlib import closing
|
||||
from urllib2 import unquote as urllib2_unquote
|
||||
@ -591,8 +596,10 @@ def get_download_filename(url, cookie_file=None):
|
||||
cj.load(cookie_file)
|
||||
br.set_cookiejar(cj)
|
||||
|
||||
last_part_name = ''
|
||||
try:
|
||||
with closing(br.open(url)) as r:
|
||||
last_part_name = r.geturl().split('/')[-1]
|
||||
disposition = r.info().get('Content-disposition', '')
|
||||
for p in disposition.split(';'):
|
||||
if 'filename' in p:
|
||||
@ -612,7 +619,7 @@ def get_download_filename(url, cookie_file=None):
|
||||
traceback.print_exc()
|
||||
|
||||
if not filename:
|
||||
filename = r.geturl().split('/')[-1]
|
||||
filename = last_part_name
|
||||
|
||||
return filename
|
||||
|
||||
|
@ -4,7 +4,7 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
__appname__ = u'calibre'
|
||||
numeric_version = (0, 8, 5)
|
||||
numeric_version = (0, 8, 9)
|
||||
__version__ = u'.'.join(map(unicode, numeric_version))
|
||||
__author__ = u"Kovid Goyal <kovid@kovidgoyal.net>"
|
||||
|
||||
@ -32,6 +32,7 @@ isbsd = isfreebsd or isnetbsd
|
||||
islinux = not(iswindows or isosx or isbsd)
|
||||
isfrozen = hasattr(sys, 'frozen')
|
||||
isunix = isosx or islinux
|
||||
isportable = os.environ.get('CALIBRE_PORTABLE_BUILD', None) is not None
|
||||
|
||||
try:
|
||||
preferred_encoding = locale.getpreferredencoding()
|
||||
|
@ -3,57 +3,16 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
import textwrap, os, glob, functools, re
|
||||
import os, glob, functools, re
|
||||
from calibre import guess_type
|
||||
from calibre.customize import FileTypePlugin, MetadataReaderPlugin, \
|
||||
MetadataWriterPlugin, PreferencesPlugin, InterfaceActionBase, StoreBase
|
||||
from calibre.constants import numeric_version
|
||||
from calibre.ebooks.metadata.archive import ArchiveExtract, get_cbz_metadata
|
||||
from calibre.ebooks.metadata.opf2 import metadata_to_opf
|
||||
from calibre.ebooks.html.to_zip import HTML2ZIP
|
||||
|
||||
# To archive plugins {{{
|
||||
class HTML2ZIP(FileTypePlugin):
|
||||
name = 'HTML to ZIP'
|
||||
author = 'Kovid Goyal'
|
||||
description = textwrap.dedent(_('''\
|
||||
Follow all local links in an HTML file and create a ZIP \
|
||||
file containing all linked files. This plugin is run \
|
||||
every time you add an HTML file to the library.\
|
||||
'''))
|
||||
version = numeric_version
|
||||
file_types = set(['html', 'htm', 'xhtml', 'xhtm', 'shtm', 'shtml'])
|
||||
supported_platforms = ['windows', 'osx', 'linux']
|
||||
on_import = True
|
||||
|
||||
def run(self, htmlfile):
|
||||
from calibre.ptempfile import TemporaryDirectory
|
||||
from calibre.gui2.convert.gui_conversion import gui_convert
|
||||
from calibre.customize.conversion import OptionRecommendation
|
||||
from calibre.ebooks.epub import initialize_container
|
||||
|
||||
with TemporaryDirectory('_plugin_html2zip') as tdir:
|
||||
recs =[('debug_pipeline', tdir, OptionRecommendation.HIGH)]
|
||||
recs.append(['keep_ligatures', True, OptionRecommendation.HIGH])
|
||||
if self.site_customization and self.site_customization.strip():
|
||||
recs.append(['input_encoding', self.site_customization.strip(),
|
||||
OptionRecommendation.HIGH])
|
||||
gui_convert(htmlfile, tdir, recs, abort_after_input_dump=True)
|
||||
of = self.temporary_file('_plugin_html2zip.zip')
|
||||
tdir = os.path.join(tdir, 'input')
|
||||
opf = glob.glob(os.path.join(tdir, '*.opf'))[0]
|
||||
ncx = glob.glob(os.path.join(tdir, '*.ncx'))
|
||||
if ncx:
|
||||
os.remove(ncx[0])
|
||||
epub = initialize_container(of.name, os.path.basename(opf))
|
||||
epub.add_dir(tdir)
|
||||
epub.close()
|
||||
|
||||
return of.name
|
||||
|
||||
def customization_help(self, gui=False):
|
||||
return _('Character encoding for the input HTML files. Common choices '
|
||||
'include: cp1252, latin1, iso-8859-1 and utf-8.')
|
||||
|
||||
|
||||
class PML2PMLZ(FileTypePlugin):
|
||||
name = 'PML to PMLZ'
|
||||
@ -594,7 +553,7 @@ from calibre.devices.iliad.driver import ILIAD
|
||||
from calibre.devices.irexdr.driver import IREXDR1000, IREXDR800
|
||||
from calibre.devices.jetbook.driver import JETBOOK, MIBUK, JETBOOK_MINI
|
||||
from calibre.devices.kindle.driver import KINDLE, KINDLE2, KINDLE_DX
|
||||
from calibre.devices.nook.driver import NOOK, NOOK_COLOR, NOOK_TSR
|
||||
from calibre.devices.nook.driver import NOOK, NOOK_COLOR
|
||||
from calibre.devices.prs505.driver import PRS505
|
||||
from calibre.devices.user_defined.driver import USER_DEFINED
|
||||
from calibre.devices.android.driver import ANDROID, S60
|
||||
@ -611,7 +570,7 @@ from calibre.devices.teclast.driver import (TECLAST_K3, NEWSMY, IPAPYRUS,
|
||||
from calibre.devices.sne.driver import SNE
|
||||
from calibre.devices.misc import (PALMPRE, AVANT, SWEEX, PDNOVEL,
|
||||
GEMEI, VELOCITYMICRO, PDNOVEL_KOBO, LUMIREAD, ALURATEK_COLOR,
|
||||
TREKSTOR, EEEREADER, NEXTBOOK, ADAM)
|
||||
TREKSTOR, EEEREADER, NEXTBOOK, ADAM, MOOVYBOOK)
|
||||
from calibre.devices.folder_device.driver import FOLDER_DEVICE_FOR_CONFIG
|
||||
from calibre.devices.kobo.driver import KOBO
|
||||
from calibre.devices.bambook.driver import BAMBOOK
|
||||
@ -694,7 +653,7 @@ plugins += [
|
||||
KINDLE,
|
||||
KINDLE2,
|
||||
KINDLE_DX,
|
||||
NOOK, NOOK_COLOR, NOOK_TSR,
|
||||
NOOK, NOOK_COLOR,
|
||||
PRS505,
|
||||
ANDROID,
|
||||
S60,
|
||||
@ -746,6 +705,7 @@ plugins += [
|
||||
EEEREADER,
|
||||
NEXTBOOK,
|
||||
ADAM,
|
||||
MOOVYBOOK,
|
||||
ITUNES,
|
||||
BOEYE_BEX,
|
||||
BOEYE_BDX,
|
||||
@ -762,99 +722,132 @@ plugins += input_profiles + output_profiles
|
||||
class ActionAdd(InterfaceActionBase):
|
||||
name = 'Add Books'
|
||||
actual_plugin = 'calibre.gui2.actions.add:AddAction'
|
||||
description = _('Add books to calibre or the connected device')
|
||||
|
||||
class ActionFetchAnnotations(InterfaceActionBase):
|
||||
name = 'Fetch Annotations'
|
||||
actual_plugin = 'calibre.gui2.actions.annotate:FetchAnnotationsAction'
|
||||
description = _('Fetch annotations from a connected Kindle (experimental)')
|
||||
|
||||
class ActionGenerateCatalog(InterfaceActionBase):
|
||||
name = 'Generate Catalog'
|
||||
actual_plugin = 'calibre.gui2.actions.catalog:GenerateCatalogAction'
|
||||
description = _('Generate a catalog of the books in your calibre library')
|
||||
|
||||
class ActionConvert(InterfaceActionBase):
|
||||
name = 'Convert Books'
|
||||
actual_plugin = 'calibre.gui2.actions.convert:ConvertAction'
|
||||
description = _('Convert books to various ebook formats')
|
||||
|
||||
class ActionDelete(InterfaceActionBase):
|
||||
name = 'Remove Books'
|
||||
actual_plugin = 'calibre.gui2.actions.delete:DeleteAction'
|
||||
description = _('Delete books from your calibre library or connected device')
|
||||
|
||||
class ActionEditMetadata(InterfaceActionBase):
|
||||
name = 'Edit Metadata'
|
||||
actual_plugin = 'calibre.gui2.actions.edit_metadata:EditMetadataAction'
|
||||
description = _('Edit the metadata of books in your calibre library')
|
||||
|
||||
class ActionView(InterfaceActionBase):
|
||||
name = 'View'
|
||||
actual_plugin = 'calibre.gui2.actions.view:ViewAction'
|
||||
description = _('Read books in your calibre library')
|
||||
|
||||
class ActionFetchNews(InterfaceActionBase):
|
||||
name = 'Fetch News'
|
||||
actual_plugin = 'calibre.gui2.actions.fetch_news:FetchNewsAction'
|
||||
description = _('Download news from the internet in ebook form')
|
||||
|
||||
class ActionQuickview(InterfaceActionBase):
|
||||
name = 'Show Quickview'
|
||||
actual_plugin = 'calibre.gui2.actions.show_quickview:ShowQuickviewAction'
|
||||
description = _('Show a list of related books quickly')
|
||||
|
||||
class ActionSaveToDisk(InterfaceActionBase):
|
||||
name = 'Save To Disk'
|
||||
actual_plugin = 'calibre.gui2.actions.save_to_disk:SaveToDiskAction'
|
||||
description = _('Export books from your calibre library to the hard disk')
|
||||
|
||||
class ActionShowBookDetails(InterfaceActionBase):
|
||||
name = 'Show Book Details'
|
||||
actual_plugin = 'calibre.gui2.actions.show_book_details:ShowBookDetailsAction'
|
||||
description = _('Show book details in a separate popup')
|
||||
|
||||
class ActionRestart(InterfaceActionBase):
|
||||
name = 'Restart'
|
||||
actual_plugin = 'calibre.gui2.actions.restart:RestartAction'
|
||||
description = _('Restart calibre')
|
||||
|
||||
class ActionOpenFolder(InterfaceActionBase):
|
||||
name = 'Open Folder'
|
||||
actual_plugin = 'calibre.gui2.actions.open:OpenFolderAction'
|
||||
description = _('Open the folder that contains the book files in your'
|
||||
' calibre library')
|
||||
|
||||
class ActionSendToDevice(InterfaceActionBase):
|
||||
name = 'Send To Device'
|
||||
actual_plugin = 'calibre.gui2.actions.device:SendToDeviceAction'
|
||||
description = _('Send books to the connected device')
|
||||
|
||||
class ActionConnectShare(InterfaceActionBase):
|
||||
name = 'Connect Share'
|
||||
actual_plugin = 'calibre.gui2.actions.device:ConnectShareAction'
|
||||
description = _('Send books via email or the web also connect to iTunes or'
|
||||
' folders on your computer as if they are devices')
|
||||
|
||||
class ActionHelp(InterfaceActionBase):
|
||||
name = 'Help'
|
||||
actual_plugin = 'calibre.gui2.actions.help:HelpAction'
|
||||
description = _('Browse the calibre User Manual')
|
||||
|
||||
class ActionPreferences(InterfaceActionBase):
|
||||
name = 'Preferences'
|
||||
actual_plugin = 'calibre.gui2.actions.preferences:PreferencesAction'
|
||||
description = _('Customize calibre')
|
||||
|
||||
class ActionSimilarBooks(InterfaceActionBase):
|
||||
name = 'Similar Books'
|
||||
actual_plugin = 'calibre.gui2.actions.similar_books:SimilarBooksAction'
|
||||
description = _('Easily find books similar to the currently selected one')
|
||||
|
||||
class ActionChooseLibrary(InterfaceActionBase):
|
||||
name = 'Choose Library'
|
||||
actual_plugin = 'calibre.gui2.actions.choose_library:ChooseLibraryAction'
|
||||
description = _('Switch between different calibre libraries and perform'
|
||||
' maintenance on them')
|
||||
|
||||
class ActionAddToLibrary(InterfaceActionBase):
|
||||
name = 'Add To Library'
|
||||
actual_plugin = 'calibre.gui2.actions.add_to_library:AddToLibraryAction'
|
||||
description = _('Copy books from the devce to your calibre library')
|
||||
|
||||
class ActionEditCollections(InterfaceActionBase):
|
||||
name = 'Edit Collections'
|
||||
actual_plugin = 'calibre.gui2.actions.edit_collections:EditCollectionsAction'
|
||||
description = _('Edit the collections in which books are placed on your device')
|
||||
|
||||
class ActionCopyToLibrary(InterfaceActionBase):
|
||||
name = 'Copy To Library'
|
||||
actual_plugin = 'calibre.gui2.actions.copy_to_library:CopyToLibraryAction'
|
||||
description = _('Copy a book from one calibre library to another')
|
||||
|
||||
class ActionTweakEpub(InterfaceActionBase):
|
||||
name = 'Tweak ePub'
|
||||
actual_plugin = 'calibre.gui2.actions.tweak_epub:TweakEpubAction'
|
||||
description = _('Make small tweaks to epub files in your calibre library')
|
||||
|
||||
class ActionNextMatch(InterfaceActionBase):
|
||||
name = 'Next Match'
|
||||
actual_plugin = 'calibre.gui2.actions.next_match:NextMatchAction'
|
||||
description = _('Find the next or previous match when searching in '
|
||||
'your calibre library in highlight mode')
|
||||
|
||||
class ActionStore(InterfaceActionBase):
|
||||
name = 'Store'
|
||||
author = 'John Schember'
|
||||
actual_plugin = 'calibre.gui2.actions.store:StoreAction'
|
||||
description = _('Search for books from different book sellers')
|
||||
|
||||
def customization_help(self, gui=False):
|
||||
return 'Customize the behavior of the store search.'
|
||||
@ -867,13 +860,20 @@ class ActionStore(InterfaceActionBase):
|
||||
from calibre.gui2.store.config.store import save_settings as save
|
||||
save(config_widget)
|
||||
|
||||
class ActionPluginUpdater(InterfaceActionBase):
|
||||
name = 'Plugin Updater'
|
||||
author = 'Grant Drake'
|
||||
description = _('Get new calibre plugins or update your existing ones')
|
||||
actual_plugin = 'calibre.gui2.actions.plugin_updates:PluginUpdaterAction'
|
||||
|
||||
plugins += [ActionAdd, ActionFetchAnnotations, ActionGenerateCatalog,
|
||||
ActionConvert, ActionDelete, ActionEditMetadata, ActionView,
|
||||
ActionFetchNews, ActionSaveToDisk, ActionShowBookDetails,
|
||||
ActionRestart, ActionOpenFolder, ActionConnectShare,
|
||||
ActionFetchNews, ActionSaveToDisk, ActionQuickview,
|
||||
ActionShowBookDetails,ActionRestart, ActionOpenFolder, ActionConnectShare,
|
||||
ActionSendToDevice, ActionHelp, ActionPreferences, ActionSimilarBooks,
|
||||
ActionAddToLibrary, ActionEditCollections, ActionChooseLibrary,
|
||||
ActionCopyToLibrary, ActionTweakEpub, ActionNextMatch, ActionStore]
|
||||
ActionCopyToLibrary, ActionTweakEpub, ActionNextMatch, ActionStore,
|
||||
ActionPluginUpdater]
|
||||
|
||||
# }}}
|
||||
|
||||
@ -1108,7 +1108,7 @@ plugins += [LookAndFeel, Behavior, Columns, Toolbar, Search, InputOptions,
|
||||
class StoreAmazonKindleStore(StoreBase):
|
||||
name = 'Amazon Kindle'
|
||||
description = u'Kindle books from Amazon.'
|
||||
actual_plugin = 'calibre.gui2.store.amazon_plugin:AmazonKindleStore'
|
||||
actual_plugin = 'calibre.gui2.store.stores.amazon_plugin:AmazonKindleStore'
|
||||
|
||||
headquarters = 'US'
|
||||
formats = ['KINDLE']
|
||||
@ -1118,7 +1118,7 @@ class StoreAmazonDEKindleStore(StoreBase):
|
||||
name = 'Amazon DE Kindle'
|
||||
author = 'Charles Haley'
|
||||
description = u'Kindle Bücher von Amazon.'
|
||||
actual_plugin = 'calibre.gui2.store.amazon_de_plugin:AmazonDEKindleStore'
|
||||
actual_plugin = 'calibre.gui2.store.stores.amazon_de_plugin:AmazonDEKindleStore'
|
||||
|
||||
headquarters = 'DE'
|
||||
formats = ['KINDLE']
|
||||
@ -1128,7 +1128,7 @@ class StoreAmazonUKKindleStore(StoreBase):
|
||||
name = 'Amazon UK Kindle'
|
||||
author = 'Charles Haley'
|
||||
description = u'Kindle books from Amazon\'s UK web site. Also, includes French language ebooks.'
|
||||
actual_plugin = 'calibre.gui2.store.amazon_uk_plugin:AmazonUKKindleStore'
|
||||
actual_plugin = 'calibre.gui2.store.stores.amazon_uk_plugin:AmazonUKKindleStore'
|
||||
|
||||
headquarters = 'UK'
|
||||
formats = ['KINDLE']
|
||||
@ -1137,7 +1137,7 @@ class StoreAmazonUKKindleStore(StoreBase):
|
||||
class StoreArchiveOrgStore(StoreBase):
|
||||
name = 'Archive.org'
|
||||
description = u'An Internet library offering permanent access for researchers, historians, scholars, people with disabilities, and the general public to historical collections that exist in digital format.'
|
||||
actual_plugin = 'calibre.gui2.store.archive_org_plugin:ArchiveOrgStore'
|
||||
actual_plugin = 'calibre.gui2.store.stores.archive_org_plugin:ArchiveOrgStore'
|
||||
|
||||
drm_free_only = True
|
||||
headquarters = 'US'
|
||||
@ -1146,7 +1146,7 @@ class StoreArchiveOrgStore(StoreBase):
|
||||
class StoreBaenWebScriptionStore(StoreBase):
|
||||
name = 'Baen WebScription'
|
||||
description = u'Sci-Fi & Fantasy brought to you by Jim Baen.'
|
||||
actual_plugin = 'calibre.gui2.store.baen_webscription_plugin:BaenWebScriptionStore'
|
||||
actual_plugin = 'calibre.gui2.store.stores.baen_webscription_plugin:BaenWebScriptionStore'
|
||||
|
||||
drm_free_only = True
|
||||
headquarters = 'US'
|
||||
@ -1155,7 +1155,7 @@ class StoreBaenWebScriptionStore(StoreBase):
|
||||
class StoreBNStore(StoreBase):
|
||||
name = 'Barnes and Noble'
|
||||
description = u'The world\'s largest book seller. As the ultimate destination for book lovers, Barnes & Noble.com offers an incredible array of content.'
|
||||
actual_plugin = 'calibre.gui2.store.bn_plugin:BNStore'
|
||||
actual_plugin = 'calibre.gui2.store.stores.bn_plugin:BNStore'
|
||||
|
||||
headquarters = 'US'
|
||||
formats = ['NOOK']
|
||||
@ -1165,7 +1165,7 @@ class StoreBeamEBooksDEStore(StoreBase):
|
||||
name = 'Beam EBooks DE'
|
||||
author = 'Charles Haley'
|
||||
description = u'Bei uns finden Sie: Tausende deutschsprachige eBooks; Alle eBooks ohne hartes DRM; PDF, ePub und Mobipocket Format; Sofortige Verfügbarkeit - 24 Stunden am Tag; Günstige Preise; eBooks für viele Lesegeräte, PC,Mac und Smartphones; Viele Gratis eBooks'
|
||||
actual_plugin = 'calibre.gui2.store.beam_ebooks_de_plugin:BeamEBooksDEStore'
|
||||
actual_plugin = 'calibre.gui2.store.stores.beam_ebooks_de_plugin:BeamEBooksDEStore'
|
||||
|
||||
drm_free_only = True
|
||||
headquarters = 'DE'
|
||||
@ -1175,7 +1175,7 @@ class StoreBeamEBooksDEStore(StoreBase):
|
||||
class StoreBeWriteStore(StoreBase):
|
||||
name = 'BeWrite Books'
|
||||
description = u'Publishers of fine books. Highly selective and editorially driven. Does not offer: books for children or exclusively YA, erotica, swords-and-sorcery fantasy and space-opera-style science fiction. All other genres are represented.'
|
||||
actual_plugin = 'calibre.gui2.store.bewrite_plugin:BeWriteStore'
|
||||
actual_plugin = 'calibre.gui2.store.stores.bewrite_plugin:BeWriteStore'
|
||||
|
||||
drm_free_only = True
|
||||
headquarters = 'US'
|
||||
@ -1184,7 +1184,7 @@ class StoreBeWriteStore(StoreBase):
|
||||
class StoreDieselEbooksStore(StoreBase):
|
||||
name = 'Diesel eBooks'
|
||||
description = u'Instant access to over 2.4 million titles from hundreds of publishers including Harlequin, HarperCollins, John Wiley & Sons, McGraw-Hill, Simon & Schuster and Random House.'
|
||||
actual_plugin = 'calibre.gui2.store.diesel_ebooks_plugin:DieselEbooksStore'
|
||||
actual_plugin = 'calibre.gui2.store.stores.diesel_ebooks_plugin:DieselEbooksStore'
|
||||
|
||||
headquarters = 'US'
|
||||
formats = ['EPUB', 'PDF']
|
||||
@ -1193,7 +1193,7 @@ class StoreDieselEbooksStore(StoreBase):
|
||||
class StoreEbookscomStore(StoreBase):
|
||||
name = 'eBooks.com'
|
||||
description = u'Sells books in multiple electronic formats in all categories. Technical infrastructure is cutting edge, robust and scalable, with servers in the US and Europe.'
|
||||
actual_plugin = 'calibre.gui2.store.ebooks_com_plugin:EbookscomStore'
|
||||
actual_plugin = 'calibre.gui2.store.stores.ebooks_com_plugin:EbookscomStore'
|
||||
|
||||
headquarters = 'US'
|
||||
formats = ['EPUB', 'LIT', 'MOBI', 'PDF']
|
||||
@ -1203,7 +1203,7 @@ class StoreEPubBuyDEStore(StoreBase):
|
||||
name = 'EPUBBuy DE'
|
||||
author = 'Charles Haley'
|
||||
description = u'Bei EPUBBuy.com finden Sie ausschliesslich eBooks im weitverbreiteten EPUB-Format und ohne DRM. So haben Sie die freie Wahl, wo Sie Ihr eBook lesen: Tablet, eBook-Reader, Smartphone oder einfach auf Ihrem PC. So macht eBook-Lesen Spaß!'
|
||||
actual_plugin = 'calibre.gui2.store.epubbuy_de_plugin:EPubBuyDEStore'
|
||||
actual_plugin = 'calibre.gui2.store.stores.epubbuy_de_plugin:EPubBuyDEStore'
|
||||
|
||||
drm_free_only = True
|
||||
headquarters = 'DE'
|
||||
@ -1214,7 +1214,7 @@ class StoreEBookShoppeUKStore(StoreBase):
|
||||
name = 'ebookShoppe UK'
|
||||
author = u'Charles Haley'
|
||||
description = u'We made this website in an attempt to offer the widest range of UK eBooks possible across and as many formats as we could manage.'
|
||||
actual_plugin = 'calibre.gui2.store.ebookshoppe_uk_plugin:EBookShoppeUKStore'
|
||||
actual_plugin = 'calibre.gui2.store.stores.ebookshoppe_uk_plugin:EBookShoppeUKStore'
|
||||
|
||||
headquarters = 'UK'
|
||||
formats = ['EPUB', 'PDF']
|
||||
@ -1223,7 +1223,7 @@ class StoreEBookShoppeUKStore(StoreBase):
|
||||
class StoreEHarlequinStore(StoreBase):
|
||||
name = 'eHarlequin'
|
||||
description = u'A global leader in series romance and one of the world\'s leading publishers of books for women. Offers women a broad range of reading from romance to bestseller fiction, from young adult novels to erotic literature, from nonfiction to fantasy, from African-American novels to inspirational romance, and more.'
|
||||
actual_plugin = 'calibre.gui2.store.eharlequin_plugin:EHarlequinStore'
|
||||
actual_plugin = 'calibre.gui2.store.stores.eharlequin_plugin:EHarlequinStore'
|
||||
|
||||
headquarters = 'CA'
|
||||
formats = ['EPUB', 'PDF']
|
||||
@ -1232,7 +1232,7 @@ class StoreEHarlequinStore(StoreBase):
|
||||
class StoreEpubBudStore(StoreBase):
|
||||
name = 'ePub Bud'
|
||||
description = 'Well, it\'s pretty much just "YouTube for Children\'s eBooks. A not-for-profit organization devoted to brining self published childrens books to the world.'
|
||||
actual_plugin = 'calibre.gui2.store.epubbud_plugin:EpubBudStore'
|
||||
actual_plugin = 'calibre.gui2.store.stores.epubbud_plugin:EpubBudStore'
|
||||
|
||||
drm_free_only = True
|
||||
headquarters = 'US'
|
||||
@ -1241,7 +1241,7 @@ class StoreEpubBudStore(StoreBase):
|
||||
class StoreFeedbooksStore(StoreBase):
|
||||
name = 'Feedbooks'
|
||||
description = u'Feedbooks is a cloud publishing and distribution service, connected to a large ecosystem of reading systems and social networks. Provides a variety of genres from independent and classic books.'
|
||||
actual_plugin = 'calibre.gui2.store.feedbooks_plugin:FeedbooksStore'
|
||||
actual_plugin = 'calibre.gui2.store.stores.feedbooks_plugin:FeedbooksStore'
|
||||
|
||||
headquarters = 'FR'
|
||||
formats = ['EPUB', 'MOBI', 'PDF']
|
||||
@ -1250,7 +1250,7 @@ class StoreFoylesUKStore(StoreBase):
|
||||
name = 'Foyles UK'
|
||||
author = 'Charles Haley'
|
||||
description = u'Foyles of London\'s ebook store. Provides extensive range covering all subjects.'
|
||||
actual_plugin = 'calibre.gui2.store.foyles_uk_plugin:FoylesUKStore'
|
||||
actual_plugin = 'calibre.gui2.store.stores.foyles_uk_plugin:FoylesUKStore'
|
||||
|
||||
headquarters = 'UK'
|
||||
formats = ['EPUB', 'PDF']
|
||||
@ -1260,7 +1260,7 @@ class StoreGandalfStore(StoreBase):
|
||||
name = 'Gandalf'
|
||||
author = u'Tomasz Długosz'
|
||||
description = u'Księgarnia internetowa Gandalf.'
|
||||
actual_plugin = 'calibre.gui2.store.gandalf_plugin:GandalfStore'
|
||||
actual_plugin = 'calibre.gui2.store.stores.gandalf_plugin:GandalfStore'
|
||||
|
||||
headquarters = 'PL'
|
||||
formats = ['EPUB', 'PDF']
|
||||
@ -1268,7 +1268,7 @@ class StoreGandalfStore(StoreBase):
|
||||
class StoreGoogleBooksStore(StoreBase):
|
||||
name = 'Google Books'
|
||||
description = u'Google Books'
|
||||
actual_plugin = 'calibre.gui2.store.google_books_plugin:GoogleBooksStore'
|
||||
actual_plugin = 'calibre.gui2.store.stores.google_books_plugin:GoogleBooksStore'
|
||||
|
||||
headquarters = 'US'
|
||||
formats = ['EPUB', 'PDF', 'TXT']
|
||||
@ -1276,7 +1276,7 @@ class StoreGoogleBooksStore(StoreBase):
|
||||
class StoreGutenbergStore(StoreBase):
|
||||
name = 'Project Gutenberg'
|
||||
description = u'The first producer of free ebooks. Free in the United States because their copyright has expired. They may not be free of copyright in other countries. Readers outside of the United States must check the copyright laws of their countries before downloading or redistributing our ebooks.'
|
||||
actual_plugin = 'calibre.gui2.store.gutenberg_plugin:GutenbergStore'
|
||||
actual_plugin = 'calibre.gui2.store.stores.gutenberg_plugin:GutenbergStore'
|
||||
|
||||
drm_free_only = True
|
||||
headquarters = 'US'
|
||||
@ -1285,7 +1285,7 @@ class StoreGutenbergStore(StoreBase):
|
||||
class StoreKoboStore(StoreBase):
|
||||
name = 'Kobo'
|
||||
description = u'With over 2.3 million eBooks to browse we have engaged readers in over 200 countries in Kobo eReading. Our eBook listings include New York Times Bestsellers, award winners, classics and more!'
|
||||
actual_plugin = 'calibre.gui2.store.kobo_plugin:KoboStore'
|
||||
actual_plugin = 'calibre.gui2.store.stores.kobo_plugin:KoboStore'
|
||||
|
||||
headquarters = 'CA'
|
||||
formats = ['EPUB']
|
||||
@ -1295,7 +1295,7 @@ class StoreLegimiStore(StoreBase):
|
||||
name = 'Legimi'
|
||||
author = u'Tomasz Długosz'
|
||||
description = u'Tanie oraz darmowe ebooki, egazety i blogi w formacie EPUB, wprost na Twój e-czytnik, iPhone, iPad, Android i komputer'
|
||||
actual_plugin = 'calibre.gui2.store.legimi_plugin:LegimiStore'
|
||||
actual_plugin = 'calibre.gui2.store.stores.legimi_plugin:LegimiStore'
|
||||
|
||||
headquarters = 'PL'
|
||||
formats = ['EPUB']
|
||||
@ -1304,7 +1304,7 @@ class StoreLibreDEStore(StoreBase):
|
||||
name = 'Libri DE'
|
||||
author = 'Charles Haley'
|
||||
description = u'Sicher Bücher, Hörbücher und Downloads online bestellen.'
|
||||
actual_plugin = 'calibre.gui2.store.libri_de_plugin:LibreDEStore'
|
||||
actual_plugin = 'calibre.gui2.store.stores.libri_de_plugin:LibreDEStore'
|
||||
|
||||
headquarters = 'DE'
|
||||
formats = ['EPUB', 'PDF']
|
||||
@ -1313,7 +1313,7 @@ class StoreLibreDEStore(StoreBase):
|
||||
class StoreManyBooksStore(StoreBase):
|
||||
name = 'ManyBooks'
|
||||
description = u'Public domain and creative commons works from many sources.'
|
||||
actual_plugin = 'calibre.gui2.store.manybooks_plugin:ManyBooksStore'
|
||||
actual_plugin = 'calibre.gui2.store.stores.manybooks_plugin:ManyBooksStore'
|
||||
|
||||
drm_free_only = True
|
||||
headquarters = 'US'
|
||||
@ -1322,7 +1322,7 @@ class StoreManyBooksStore(StoreBase):
|
||||
class StoreMobileReadStore(StoreBase):
|
||||
name = 'MobileRead'
|
||||
description = u'Ebooks handcrafted with the utmost care.'
|
||||
actual_plugin = 'calibre.gui2.store.mobileread.mobileread_plugin:MobileReadStore'
|
||||
actual_plugin = 'calibre.gui2.store.stores.mobileread.mobileread_plugin:MobileReadStore'
|
||||
|
||||
drm_free_only = True
|
||||
headquarters = 'CH'
|
||||
@ -1332,25 +1332,24 @@ class StoreNextoStore(StoreBase):
|
||||
name = 'Nexto'
|
||||
author = u'Tomasz Długosz'
|
||||
description = u'Największy w Polsce sklep internetowy z audiobookami mp3, ebookami pdf oraz prasą do pobrania on-line.'
|
||||
actual_plugin = 'calibre.gui2.store.nexto_plugin:NextoStore'
|
||||
actual_plugin = 'calibre.gui2.store.stores.nexto_plugin:NextoStore'
|
||||
|
||||
headquarters = 'PL'
|
||||
formats = ['EPUB', 'PDF']
|
||||
affiliate = True
|
||||
|
||||
class StoreOpenLibraryStore(StoreBase):
|
||||
name = 'Open Library'
|
||||
description = u'One web page for every book ever published. The goal is to be a true online library. Over 20 million records from a variety of large catalogs as well as single contributions, with more on the way.'
|
||||
actual_plugin = 'calibre.gui2.store.open_library_plugin:OpenLibraryStore'
|
||||
class StoreOpenBooksStore(StoreBase):
|
||||
name = 'Open Books'
|
||||
description = u'Comprehensive listing of DRM free ebooks from a variety of sources provided by users of calibre.'
|
||||
actual_plugin = 'calibre.gui2.store.stores.open_books_plugin:OpenBooksStore'
|
||||
|
||||
drm_free_only = True
|
||||
headquarters = 'US'
|
||||
formats = ['DAISY', 'DJVU', 'EPUB', 'MOBI', 'PDF', 'TXT']
|
||||
|
||||
class StoreOReillyStore(StoreBase):
|
||||
name = 'OReilly'
|
||||
description = u'Programming and tech ebooks from OReilly.'
|
||||
actual_plugin = 'calibre.gui2.store.oreilly_plugin:OReillyStore'
|
||||
actual_plugin = 'calibre.gui2.store.stores.oreilly_plugin:OReillyStore'
|
||||
|
||||
drm_free_only = True
|
||||
headquarters = 'US'
|
||||
@ -1359,7 +1358,7 @@ class StoreOReillyStore(StoreBase):
|
||||
class StorePragmaticBookshelfStore(StoreBase):
|
||||
name = 'Pragmatic Bookshelf'
|
||||
description = u'The Pragmatic Bookshelf\'s collection of programming and tech books avaliable as ebooks.'
|
||||
actual_plugin = 'calibre.gui2.store.pragmatic_bookshelf_plugin:PragmaticBookshelfStore'
|
||||
actual_plugin = 'calibre.gui2.store.stores.pragmatic_bookshelf_plugin:PragmaticBookshelfStore'
|
||||
|
||||
drm_free_only = True
|
||||
headquarters = 'US'
|
||||
@ -1368,7 +1367,7 @@ class StorePragmaticBookshelfStore(StoreBase):
|
||||
class StoreSmashwordsStore(StoreBase):
|
||||
name = 'Smashwords'
|
||||
description = u'An ebook publishing and distribution platform for ebook authors, publishers and readers. Covers many genres and formats.'
|
||||
actual_plugin = 'calibre.gui2.store.smashwords_plugin:SmashwordsStore'
|
||||
actual_plugin = 'calibre.gui2.store.stores.smashwords_plugin:SmashwordsStore'
|
||||
|
||||
drm_free_only = True
|
||||
headquarters = 'US'
|
||||
@ -1379,7 +1378,7 @@ class StoreVirtualoStore(StoreBase):
|
||||
name = 'Virtualo'
|
||||
author = u'Tomasz Długosz'
|
||||
description = u'Księgarnia internetowa, która oferuje bezpieczny i szeroki dostęp do książek w formie cyfrowej.'
|
||||
actual_plugin = 'calibre.gui2.store.virtualo_plugin:VirtualoStore'
|
||||
actual_plugin = 'calibre.gui2.store.stores.virtualo_plugin:VirtualoStore'
|
||||
|
||||
headquarters = 'PL'
|
||||
formats = ['EPUB', 'PDF']
|
||||
@ -1388,7 +1387,7 @@ class StoreWaterstonesUKStore(StoreBase):
|
||||
name = 'Waterstones UK'
|
||||
author = 'Charles Haley'
|
||||
description = u'Waterstone\'s mission is to be the leading Bookseller on the High Street and online providing customers the widest choice, great value and expert advice from a team passionate about Bookselling.'
|
||||
actual_plugin = 'calibre.gui2.store.waterstones_uk_plugin:WaterstonesUKStore'
|
||||
actual_plugin = 'calibre.gui2.store.stores.waterstones_uk_plugin:WaterstonesUKStore'
|
||||
|
||||
headquarters = 'UK'
|
||||
formats = ['EPUB', 'PDF']
|
||||
@ -1396,7 +1395,7 @@ class StoreWaterstonesUKStore(StoreBase):
|
||||
class StoreWeightlessBooksStore(StoreBase):
|
||||
name = 'Weightless Books'
|
||||
description = u'An independent DRM-free ebooksite devoted to ebooks of all sorts.'
|
||||
actual_plugin = 'calibre.gui2.store.weightless_books_plugin:WeightlessBooksStore'
|
||||
actual_plugin = 'calibre.gui2.store.stores.weightless_books_plugin:WeightlessBooksStore'
|
||||
|
||||
drm_free_only = True
|
||||
headquarters = 'US'
|
||||
@ -1406,7 +1405,7 @@ class StoreWHSmithUKStore(StoreBase):
|
||||
name = 'WH Smith UK'
|
||||
author = 'Charles Haley'
|
||||
description = u"Shop for savings on Books, discounted Magazine subscriptions and great prices on Stationery, Toys & Games"
|
||||
actual_plugin = 'calibre.gui2.store.whsmith_uk_plugin:WHSmithUKStore'
|
||||
actual_plugin = 'calibre.gui2.store.stores.whsmith_uk_plugin:WHSmithUKStore'
|
||||
|
||||
headquarters = 'UK'
|
||||
formats = ['EPUB', 'PDF']
|
||||
@ -1414,7 +1413,7 @@ class StoreWHSmithUKStore(StoreBase):
|
||||
class StoreWizardsTowerBooksStore(StoreBase):
|
||||
name = 'Wizards Tower Books'
|
||||
description = u'A science fiction and fantasy publisher. Concentrates mainly on making out-of-print works available once more as e-books, and helping other small presses exploit the e-book market. Also publishes a small number of limited-print-run anthologies with a view to encouraging diversity in the science fiction and fantasy field.'
|
||||
actual_plugin = 'calibre.gui2.store.wizards_tower_books_plugin:WizardsTowerBooksStore'
|
||||
actual_plugin = 'calibre.gui2.store.stores.wizards_tower_books_plugin:WizardsTowerBooksStore'
|
||||
|
||||
drm_free_only = True
|
||||
headquarters = 'UK'
|
||||
@ -1424,7 +1423,7 @@ class StoreWoblinkStore(StoreBase):
|
||||
name = 'Woblink'
|
||||
author = u'Tomasz Długosz'
|
||||
description = u'Czytanie zdarza się wszędzie!'
|
||||
actual_plugin = 'calibre.gui2.store.woblink_plugin:WoblinkStore'
|
||||
actual_plugin = 'calibre.gui2.store.stores.woblink_plugin:WoblinkStore'
|
||||
|
||||
headquarters = 'PL'
|
||||
formats = ['EPUB']
|
||||
@ -1433,7 +1432,7 @@ class StoreZixoStore(StoreBase):
|
||||
name = 'Zixo'
|
||||
author = u'Tomasz Długosz'
|
||||
description = u'Księgarnia z ebookami oraz książkami audio. Aby otwierać książki w formacie Zixo należy zainstalować program dostępny na stronie księgarni. Umożliwia on m.in. dodawanie zakładek i dostosowywanie rozmiaru czcionki.'
|
||||
actual_plugin = 'calibre.gui2.store.zixo_plugin:ZixoStore'
|
||||
actual_plugin = 'calibre.gui2.store.stores.zixo_plugin:ZixoStore'
|
||||
|
||||
headquarters = 'PL'
|
||||
formats = ['PDF, ZIXO']
|
||||
@ -1464,7 +1463,7 @@ plugins += [
|
||||
StoreManyBooksStore,
|
||||
StoreMobileReadStore,
|
||||
StoreNextoStore,
|
||||
StoreOpenLibraryStore,
|
||||
StoreOpenBooksStore,
|
||||
StoreOReillyStore,
|
||||
StorePragmaticBookshelfStore,
|
||||
StoreSmashwordsStore,
|
||||
|
@ -259,6 +259,10 @@ class OutputFormatPlugin(Plugin):
|
||||
#: (option_name, recommended_value, recommendation_level)
|
||||
recommendations = set([])
|
||||
|
||||
@property
|
||||
def description(self):
|
||||
return _('Convert ebooks to the %s format')%self.file_type
|
||||
|
||||
def __init__(self, *args):
|
||||
Plugin.__init__(self, *args)
|
||||
self.report_progress = DummyReporter()
|
||||
|
@ -493,6 +493,8 @@ def initialize_plugin(plugin, path_to_zip_file):
|
||||
raise InvalidPlugin((_('Initialization of plugin %s failed with traceback:')
|
||||
%tb) + '\n'+tb)
|
||||
|
||||
def has_external_plugins():
|
||||
return bool(config['plugins'])
|
||||
|
||||
def initialize_plugins():
|
||||
global _initialized_plugins
|
||||
|
66
src/calibre/db/__init__.py
Normal file
@ -0,0 +1,66 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
|
||||
'''
|
||||
Rewrite of the calibre database backend.
|
||||
|
||||
Broad Objectives:
|
||||
|
||||
* Use the sqlite db only as a datastore. i.e. do not do
|
||||
sorting/searching/concatenation or anything else in sqlite. Instead
|
||||
mirror the sqlite tables in memory, create caches and lookup maps from
|
||||
them and create a set_* API that updates the memory caches and the sqlite
|
||||
correctly.
|
||||
|
||||
* Move from keeping a list of books in memory as a cache to a per table
|
||||
cache. This allows much faster search and sort operations at the expense
|
||||
of slightly slower lookup operations. That slowdown can be mitigated by
|
||||
keeping lots of maps and updating them in the set_* API. Also
|
||||
get_categories becomes blazingly fast.
|
||||
|
||||
* Separate the database layer from the cache layer more cleanly. Rather
|
||||
than having the db layer refer to the cache layer and vice versa, the
|
||||
cache layer will refer to the db layer only and the new API will be
|
||||
defined on the cache layer.
|
||||
|
||||
* Get rid of index_is_id and other poor design decisions
|
||||
|
||||
* Minimize the API as much as possible and define it cleanly
|
||||
|
||||
* Do not change the on disk format of metadata.db at all (this is for
|
||||
backwards compatibility)
|
||||
|
||||
* Get rid of the need for a separate db access thread by switching to apsw
|
||||
to access sqlite, which is thread safe
|
||||
|
||||
* The new API will have methods to efficiently do bulk operations and will
|
||||
use shared/exclusive/pending locks to serialize access to the in-mem data
|
||||
structures. Use the same locking scheme as sqlite itself does.
|
||||
|
||||
How this will proceed:
|
||||
|
||||
1. Create the new API
|
||||
2. Create a test suite for it
|
||||
3. Write a replacement for LibraryDatabase2 that uses the new API
|
||||
internally
|
||||
4. Lots of testing of calibre with the new LibraryDatabase2
|
||||
5. Gradually migrate code to use the (much faster) new api wherever possible (the new api
|
||||
will be exposed via db.new_api)
|
||||
|
||||
I plan to work on this slowly, in parallel to normal calibre development
|
||||
work.
|
||||
|
||||
Various things that require other things before they can be migrated:
|
||||
1. From initialize_dynamic(): set_saved_searches,
|
||||
load_user_template_functions. Also add custom
|
||||
columns/categories/searches info into
|
||||
self.field_metadata. Finally, implement metadata dirtied
|
||||
functionality.
|
||||
'''
|
736
src/calibre/db/backend.py
Normal file
@ -0,0 +1,736 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
# Imports {{{
|
||||
import os, shutil, uuid, json
|
||||
from functools import partial
|
||||
|
||||
import apsw
|
||||
|
||||
from calibre import isbytestring, force_unicode, prints
|
||||
from calibre.constants import (iswindows, filesystem_encoding,
|
||||
preferred_encoding)
|
||||
from calibre.ptempfile import PersistentTemporaryFile
|
||||
from calibre.db.schema_upgrades import SchemaUpgrade
|
||||
from calibre.library.field_metadata import FieldMetadata
|
||||
from calibre.ebooks.metadata import title_sort, author_to_author_sort
|
||||
from calibre.utils.icu import strcmp
|
||||
from calibre.utils.config import to_json, from_json, prefs, tweaks
|
||||
from calibre.utils.date import utcfromtimestamp, parse_date
|
||||
from calibre.utils.filenames import is_case_sensitive
|
||||
from calibre.db.tables import (OneToOneTable, ManyToOneTable, ManyToManyTable,
|
||||
SizeTable, FormatsTable, AuthorsTable, IdentifiersTable)
|
||||
# }}}
|
||||
|
||||
'''
|
||||
Differences in semantics from pysqlite:
|
||||
|
||||
1. execute/executemany operate in autocommit mode
|
||||
2. There is no fetchone() method on cursor objects, instead use next()
|
||||
3. There is no executescript
|
||||
|
||||
'''
|
||||
|
||||
class DynamicFilter(object): # {{{
|
||||
|
||||
'No longer used, present for legacy compatibility'
|
||||
|
||||
def __init__(self, name):
|
||||
self.name = name
|
||||
self.ids = frozenset([])
|
||||
|
||||
def __call__(self, id_):
|
||||
return int(id_ in self.ids)
|
||||
|
||||
def change(self, ids):
|
||||
self.ids = frozenset(ids)
|
||||
# }}}
|
||||
|
||||
class DBPrefs(dict): # {{{
|
||||
|
||||
'Store preferences as key:value pairs in the db'
|
||||
|
||||
def __init__(self, db):
|
||||
dict.__init__(self)
|
||||
self.db = db
|
||||
self.defaults = {}
|
||||
self.disable_setting = False
|
||||
for key, val in self.db.conn.get('SELECT key,val FROM preferences'):
|
||||
try:
|
||||
val = self.raw_to_object(val)
|
||||
except:
|
||||
prints('Failed to read value for:', key, 'from db')
|
||||
continue
|
||||
dict.__setitem__(self, key, val)
|
||||
|
||||
def raw_to_object(self, raw):
|
||||
if not isinstance(raw, unicode):
|
||||
raw = raw.decode(preferred_encoding)
|
||||
return json.loads(raw, object_hook=from_json)
|
||||
|
||||
def to_raw(self, val):
|
||||
return json.dumps(val, indent=2, default=to_json)
|
||||
|
||||
def __getitem__(self, key):
|
||||
try:
|
||||
return dict.__getitem__(self, key)
|
||||
except KeyError:
|
||||
return self.defaults[key]
|
||||
|
||||
def __delitem__(self, key):
|
||||
dict.__delitem__(self, key)
|
||||
self.db.conn.execute('DELETE FROM preferences WHERE key=?', (key,))
|
||||
|
||||
def __setitem__(self, key, val):
|
||||
if self.disable_setting:
|
||||
return
|
||||
raw = self.to_raw(val)
|
||||
self.db.conn.execute('INSERT OR REPLACE INTO preferences (key,val) VALUES (?,?)', (key,
|
||||
raw))
|
||||
dict.__setitem__(self, key, val)
|
||||
|
||||
def set(self, key, val):
|
||||
self.__setitem__(key, val)
|
||||
|
||||
# }}}
|
||||
|
||||
# Extra collators {{{
|
||||
def pynocase(one, two, encoding='utf-8'):
|
||||
if isbytestring(one):
|
||||
try:
|
||||
one = one.decode(encoding, 'replace')
|
||||
except:
|
||||
pass
|
||||
if isbytestring(two):
|
||||
try:
|
||||
two = two.decode(encoding, 'replace')
|
||||
except:
|
||||
pass
|
||||
return cmp(one.lower(), two.lower())
|
||||
|
||||
def _author_to_author_sort(x):
|
||||
if not x: return ''
|
||||
return author_to_author_sort(x.replace('|', ','))
|
||||
|
||||
def icu_collator(s1, s2):
|
||||
return strcmp(force_unicode(s1, 'utf-8'), force_unicode(s2, 'utf-8'))
|
||||
# }}}
|
||||
|
||||
# Unused aggregators {{{
|
||||
def Concatenate(sep=','):
|
||||
'''String concatenation aggregator for sqlite'''
|
||||
|
||||
def step(ctxt, value):
|
||||
if value is not None:
|
||||
ctxt.append(value)
|
||||
|
||||
def finalize(ctxt):
|
||||
if not ctxt:
|
||||
return None
|
||||
return sep.join(ctxt)
|
||||
|
||||
return ([], step, finalize)
|
||||
|
||||
def SortedConcatenate(sep=','):
|
||||
'''String concatenation aggregator for sqlite, sorted by supplied index'''
|
||||
|
||||
def step(ctxt, ndx, value):
|
||||
if value is not None:
|
||||
ctxt[ndx] = value
|
||||
|
||||
def finalize(ctxt):
|
||||
if len(ctxt) == 0:
|
||||
return None
|
||||
return sep.join(map(ctxt.get, sorted(ctxt.iterkeys())))
|
||||
|
||||
return ({}, step, finalize)
|
||||
|
||||
def IdentifiersConcat():
|
||||
'''String concatenation aggregator for the identifiers map'''
|
||||
|
||||
def step(ctxt, key, val):
|
||||
ctxt.append(u'%s:%s'%(key, val))
|
||||
|
||||
def finalize(ctxt):
|
||||
return ','.join(ctxt)
|
||||
|
||||
return ([], step, finalize)
|
||||
|
||||
def AumSortedConcatenate():
|
||||
'''String concatenation aggregator for the author sort map'''
|
||||
|
||||
def step(ctxt, ndx, author, sort, link):
|
||||
if author is not None:
|
||||
ctxt[ndx] = ':::'.join((author, sort, link))
|
||||
|
||||
def finalize(ctxt):
|
||||
keys = list(ctxt.iterkeys())
|
||||
l = len(keys)
|
||||
if l == 0:
|
||||
return None
|
||||
if l == 1:
|
||||
return ctxt[keys[0]]
|
||||
return ':#:'.join([ctxt[v] for v in sorted(keys)])
|
||||
|
||||
return ({}, step, finalize)
|
||||
|
||||
# }}}
|
||||
|
||||
class Connection(apsw.Connection): # {{{
|
||||
|
||||
BUSY_TIMEOUT = 2000 # milliseconds
|
||||
|
||||
def __init__(self, path):
|
||||
apsw.Connection.__init__(self, path)
|
||||
|
||||
self.setbusytimeout(self.BUSY_TIMEOUT)
|
||||
self.execute('pragma cache_size=5000')
|
||||
self.execute('pragma temp_store=2')
|
||||
|
||||
encoding = self.execute('pragma encoding').next()[0]
|
||||
self.createcollation('PYNOCASE', partial(pynocase,
|
||||
encoding=encoding))
|
||||
|
||||
self.createscalarfunction('title_sort', title_sort, 1)
|
||||
self.createscalarfunction('author_to_author_sort',
|
||||
_author_to_author_sort, 1)
|
||||
self.createscalarfunction('uuid4', lambda : str(uuid.uuid4()),
|
||||
0)
|
||||
|
||||
# Dummy functions for dynamically created filters
|
||||
self.createscalarfunction('books_list_filter', lambda x: 1, 1)
|
||||
self.createcollation('icucollate', icu_collator)
|
||||
|
||||
# Legacy aggregators (never used) but present for backwards compat
|
||||
self.createaggregatefunction('sortconcat', SortedConcatenate, 2)
|
||||
self.createaggregatefunction('sortconcat_bar',
|
||||
partial(SortedConcatenate, sep='|'), 2)
|
||||
self.createaggregatefunction('sortconcat_amper',
|
||||
partial(SortedConcatenate, sep='&'), 2)
|
||||
self.createaggregatefunction('identifiers_concat',
|
||||
IdentifiersConcat, 2)
|
||||
self.createaggregatefunction('concat', Concatenate, 1)
|
||||
self.createaggregatefunction('aum_sortconcat',
|
||||
AumSortedConcatenate, 4)
|
||||
|
||||
def create_dynamic_filter(self, name):
|
||||
f = DynamicFilter(name)
|
||||
self.createscalarfunction(name, f, 1)
|
||||
|
||||
def get(self, *args, **kw):
|
||||
ans = self.cursor().execute(*args)
|
||||
if kw.get('all', True):
|
||||
return ans.fetchall()
|
||||
try:
|
||||
return ans.next()[0]
|
||||
except (StopIteration, IndexError):
|
||||
return None
|
||||
|
||||
def execute(self, sql, bindings=None):
|
||||
cursor = self.cursor()
|
||||
return cursor.execute(sql, bindings)
|
||||
|
||||
def executemany(self, sql, sequence_of_bindings):
|
||||
return self.cursor().executemany(sql, sequence_of_bindings)
|
||||
|
||||
# }}}
|
||||
|
||||
class DB(object):
|
||||
|
||||
PATH_LIMIT = 40 if iswindows else 100
|
||||
WINDOWS_LIBRARY_PATH_LIMIT = 75
|
||||
|
||||
# Initialize database {{{
|
||||
|
||||
def __init__(self, library_path, default_prefs=None, read_only=False):
|
||||
try:
|
||||
if isbytestring(library_path):
|
||||
library_path = library_path.decode(filesystem_encoding)
|
||||
except:
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
|
||||
self.field_metadata = FieldMetadata()
|
||||
|
||||
self.library_path = os.path.abspath(library_path)
|
||||
self.dbpath = os.path.join(library_path, 'metadata.db')
|
||||
self.dbpath = os.environ.get('CALIBRE_OVERRIDE_DATABASE_PATH',
|
||||
self.dbpath)
|
||||
|
||||
if iswindows and len(self.library_path) + 4*self.PATH_LIMIT + 10 > 259:
|
||||
raise ValueError(_(
|
||||
'Path to library too long. Must be less than'
|
||||
' %d characters.')%(259-4*self.PATH_LIMIT-10))
|
||||
exists = self._exists = os.path.exists(self.dbpath)
|
||||
if not exists:
|
||||
# Be more strict when creating new libraries as the old calculation
|
||||
# allowed for max path lengths of 265 chars.
|
||||
if (iswindows and len(self.library_path) >
|
||||
self.WINDOWS_LIBRARY_PATH_LIMIT):
|
||||
raise ValueError(_(
|
||||
'Path to library too long. Must be less than'
|
||||
' %d characters.')%self.WINDOWS_LIBRARY_PATH_LIMIT)
|
||||
|
||||
if read_only and os.path.exists(self.dbpath):
|
||||
# Work on only a copy of metadata.db to ensure that
|
||||
# metadata.db is not changed
|
||||
pt = PersistentTemporaryFile('_metadata_ro.db')
|
||||
pt.close()
|
||||
shutil.copyfile(self.dbpath, pt.name)
|
||||
self.dbpath = pt.name
|
||||
|
||||
if not os.path.exists(os.path.dirname(self.dbpath)):
|
||||
os.makedirs(os.path.dirname(self.dbpath))
|
||||
|
||||
self._conn = None
|
||||
if self.user_version == 0:
|
||||
self.initialize_database()
|
||||
|
||||
if not os.path.exists(self.library_path):
|
||||
os.makedirs(self.library_path)
|
||||
self.is_case_sensitive = is_case_sensitive(self.library_path)
|
||||
|
||||
SchemaUpgrade(self.conn, self.library_path, self.field_metadata)
|
||||
|
||||
# Guarantee that the library_id is set
|
||||
self.library_id
|
||||
|
||||
# Fix legacy triggers and columns
|
||||
self.conn.execute('''
|
||||
DROP TRIGGER IF EXISTS author_insert_trg;
|
||||
CREATE TEMP TRIGGER author_insert_trg
|
||||
AFTER INSERT ON authors
|
||||
BEGIN
|
||||
UPDATE authors SET sort=author_to_author_sort(NEW.name) WHERE id=NEW.id;
|
||||
END;
|
||||
DROP TRIGGER IF EXISTS author_update_trg;
|
||||
CREATE TEMP TRIGGER author_update_trg
|
||||
BEFORE UPDATE ON authors
|
||||
BEGIN
|
||||
UPDATE authors SET sort=author_to_author_sort(NEW.name)
|
||||
WHERE id=NEW.id AND name <> NEW.name;
|
||||
END;
|
||||
UPDATE authors SET sort=author_to_author_sort(name) WHERE sort IS NULL;
|
||||
''')
|
||||
|
||||
self.initialize_prefs(default_prefs)
|
||||
self.initialize_custom_columns()
|
||||
self.initialize_tables()
|
||||
|
||||
def initialize_prefs(self, default_prefs): # {{{
|
||||
self.prefs = DBPrefs(self)
|
||||
|
||||
if default_prefs is not None and not self._exists:
|
||||
# Only apply default prefs to a new database
|
||||
for key in default_prefs:
|
||||
# be sure that prefs not to be copied are listed below
|
||||
if key not in frozenset(['news_to_be_synced']):
|
||||
self.prefs[key] = default_prefs[key]
|
||||
if 'field_metadata' in default_prefs:
|
||||
fmvals = [f for f in default_prefs['field_metadata'].values()
|
||||
if f['is_custom']]
|
||||
for f in fmvals:
|
||||
self.create_custom_column(f['label'], f['name'],
|
||||
f['datatype'], f['is_multiple'] is not None,
|
||||
f['is_editable'], f['display'])
|
||||
|
||||
defs = self.prefs.defaults
|
||||
defs['gui_restriction'] = defs['cs_restriction'] = ''
|
||||
defs['categories_using_hierarchy'] = []
|
||||
defs['column_color_rules'] = []
|
||||
|
||||
# Migrate the bool tristate tweak
|
||||
defs['bools_are_tristate'] = \
|
||||
tweaks.get('bool_custom_columns_are_tristate', 'yes') == 'yes'
|
||||
if self.prefs.get('bools_are_tristate') is None:
|
||||
self.prefs.set('bools_are_tristate', defs['bools_are_tristate'])
|
||||
|
||||
# Migrate column coloring rules
|
||||
if self.prefs.get('column_color_name_1', None) is not None:
|
||||
from calibre.library.coloring import migrate_old_rule
|
||||
old_rules = []
|
||||
for i in range(1, 6):
|
||||
col = self.prefs.get('column_color_name_'+str(i), None)
|
||||
templ = self.prefs.get('column_color_template_'+str(i), None)
|
||||
if col and templ:
|
||||
try:
|
||||
del self.prefs['column_color_name_'+str(i)]
|
||||
rules = migrate_old_rule(self.field_metadata, templ)
|
||||
for templ in rules:
|
||||
old_rules.append((col, templ))
|
||||
except:
|
||||
pass
|
||||
if old_rules:
|
||||
self.prefs['column_color_rules'] += old_rules
|
||||
|
||||
# Migrate saved search and user categories to db preference scheme
|
||||
def migrate_preference(key, default):
|
||||
oldval = prefs[key]
|
||||
if oldval != default:
|
||||
self.prefs[key] = oldval
|
||||
prefs[key] = default
|
||||
if key not in self.prefs:
|
||||
self.prefs[key] = default
|
||||
|
||||
migrate_preference('user_categories', {})
|
||||
migrate_preference('saved_searches', {})
|
||||
|
||||
# migrate grouped_search_terms
|
||||
if self.prefs.get('grouped_search_terms', None) is None:
|
||||
try:
|
||||
ogst = tweaks.get('grouped_search_terms', {})
|
||||
ngst = {}
|
||||
for t in ogst:
|
||||
ngst[icu_lower(t)] = ogst[t]
|
||||
self.prefs.set('grouped_search_terms', ngst)
|
||||
except:
|
||||
pass
|
||||
|
||||
# Rename any user categories with names that differ only in case
|
||||
user_cats = self.prefs.get('user_categories', [])
|
||||
catmap = {}
|
||||
for uc in user_cats:
|
||||
ucl = icu_lower(uc)
|
||||
if ucl not in catmap:
|
||||
catmap[ucl] = []
|
||||
catmap[ucl].append(uc)
|
||||
cats_changed = False
|
||||
for uc in catmap:
|
||||
if len(catmap[uc]) > 1:
|
||||
prints('found user category case overlap', catmap[uc])
|
||||
cat = catmap[uc][0]
|
||||
suffix = 1
|
||||
while icu_lower((cat + unicode(suffix))) in catmap:
|
||||
suffix += 1
|
||||
prints('Renaming user category %s to %s'%(cat, cat+unicode(suffix)))
|
||||
user_cats[cat + unicode(suffix)] = user_cats[cat]
|
||||
del user_cats[cat]
|
||||
cats_changed = True
|
||||
if cats_changed:
|
||||
self.prefs.set('user_categories', user_cats)
|
||||
# }}}
|
||||
|
||||
def initialize_custom_columns(self): # {{{
|
||||
with self.conn:
|
||||
# Delete previously marked custom columns
|
||||
for record in self.conn.get(
|
||||
'SELECT id FROM custom_columns WHERE mark_for_delete=1'):
|
||||
num = record[0]
|
||||
table, lt = self.custom_table_names(num)
|
||||
self.conn.execute('''\
|
||||
DROP INDEX IF EXISTS {table}_idx;
|
||||
DROP INDEX IF EXISTS {lt}_aidx;
|
||||
DROP INDEX IF EXISTS {lt}_bidx;
|
||||
DROP TRIGGER IF EXISTS fkc_update_{lt}_a;
|
||||
DROP TRIGGER IF EXISTS fkc_update_{lt}_b;
|
||||
DROP TRIGGER IF EXISTS fkc_insert_{lt};
|
||||
DROP TRIGGER IF EXISTS fkc_delete_{lt};
|
||||
DROP TRIGGER IF EXISTS fkc_insert_{table};
|
||||
DROP TRIGGER IF EXISTS fkc_delete_{table};
|
||||
DROP VIEW IF EXISTS tag_browser_{table};
|
||||
DROP VIEW IF EXISTS tag_browser_filtered_{table};
|
||||
DROP TABLE IF EXISTS {table};
|
||||
DROP TABLE IF EXISTS {lt};
|
||||
'''.format(table=table, lt=lt)
|
||||
)
|
||||
self.conn.execute('DELETE FROM custom_columns WHERE mark_for_delete=1')
|
||||
|
||||
# Load metadata for custom columns
|
||||
self.custom_column_label_map, self.custom_column_num_map = {}, {}
|
||||
triggers = []
|
||||
remove = []
|
||||
custom_tables = self.custom_tables
|
||||
for record in self.conn.get(
|
||||
'SELECT label,name,datatype,editable,display,normalized,id,is_multiple FROM custom_columns'):
|
||||
data = {
|
||||
'label':record[0],
|
||||
'name':record[1],
|
||||
'datatype':record[2],
|
||||
'editable':bool(record[3]),
|
||||
'display':json.loads(record[4]),
|
||||
'normalized':bool(record[5]),
|
||||
'num':record[6],
|
||||
'is_multiple':bool(record[7]),
|
||||
}
|
||||
if data['display'] is None:
|
||||
data['display'] = {}
|
||||
# set up the is_multiple separator dict
|
||||
if data['is_multiple']:
|
||||
if data['display'].get('is_names', False):
|
||||
seps = {'cache_to_list': '|', 'ui_to_list': '&', 'list_to_ui': ' & '}
|
||||
elif data['datatype'] == 'composite':
|
||||
seps = {'cache_to_list': ',', 'ui_to_list': ',', 'list_to_ui': ', '}
|
||||
else:
|
||||
seps = {'cache_to_list': '|', 'ui_to_list': ',', 'list_to_ui': ', '}
|
||||
else:
|
||||
seps = {}
|
||||
data['multiple_seps'] = seps
|
||||
|
||||
table, lt = self.custom_table_names(data['num'])
|
||||
if table not in custom_tables or (data['normalized'] and lt not in
|
||||
custom_tables):
|
||||
remove.append(data)
|
||||
continue
|
||||
|
||||
self.custom_column_label_map[data['label']] = data['num']
|
||||
self.custom_column_num_map[data['num']] = \
|
||||
self.custom_column_label_map[data['label']] = data
|
||||
|
||||
# Create Foreign Key triggers
|
||||
if data['normalized']:
|
||||
trigger = 'DELETE FROM %s WHERE book=OLD.id;'%lt
|
||||
else:
|
||||
trigger = 'DELETE FROM %s WHERE book=OLD.id;'%table
|
||||
triggers.append(trigger)
|
||||
|
||||
if remove:
|
||||
with self.conn:
|
||||
for data in remove:
|
||||
prints('WARNING: Custom column %r not found, removing.' %
|
||||
data['label'])
|
||||
self.conn.execute('DELETE FROM custom_columns WHERE id=?',
|
||||
(data['num'],))
|
||||
|
||||
if triggers:
|
||||
with self.conn:
|
||||
self.conn.execute('''\
|
||||
CREATE TEMP TRIGGER custom_books_delete_trg
|
||||
AFTER DELETE ON books
|
||||
BEGIN
|
||||
%s
|
||||
END;
|
||||
'''%(' \n'.join(triggers)))
|
||||
|
||||
# Setup data adapters
|
||||
def adapt_text(x, d):
|
||||
if d['is_multiple']:
|
||||
if x is None:
|
||||
return []
|
||||
if isinstance(x, (str, unicode, bytes)):
|
||||
x = x.split(d['multiple_seps']['ui_to_list'])
|
||||
x = [y.strip() for y in x if y.strip()]
|
||||
x = [y.decode(preferred_encoding, 'replace') if not isinstance(y,
|
||||
unicode) else y for y in x]
|
||||
return [u' '.join(y.split()) for y in x]
|
||||
else:
|
||||
return x if x is None or isinstance(x, unicode) else \
|
||||
x.decode(preferred_encoding, 'replace')
|
||||
|
||||
def adapt_datetime(x, d):
|
||||
if isinstance(x, (str, unicode, bytes)):
|
||||
x = parse_date(x, assume_utc=False, as_utc=False)
|
||||
return x
|
||||
|
||||
def adapt_bool(x, d):
|
||||
if isinstance(x, (str, unicode, bytes)):
|
||||
x = x.lower()
|
||||
if x == 'true':
|
||||
x = True
|
||||
elif x == 'false':
|
||||
x = False
|
||||
elif x == 'none':
|
||||
x = None
|
||||
else:
|
||||
x = bool(int(x))
|
||||
return x
|
||||
|
||||
def adapt_enum(x, d):
|
||||
v = adapt_text(x, d)
|
||||
if not v:
|
||||
v = None
|
||||
return v
|
||||
|
||||
def adapt_number(x, d):
|
||||
if x is None:
|
||||
return None
|
||||
if isinstance(x, (str, unicode, bytes)):
|
||||
if x.lower() == 'none':
|
||||
return None
|
||||
if d['datatype'] == 'int':
|
||||
return int(x)
|
||||
return float(x)
|
||||
|
||||
self.custom_data_adapters = {
|
||||
'float': adapt_number,
|
||||
'int': adapt_number,
|
||||
'rating':lambda x,d : x if x is None else min(10., max(0., float(x))),
|
||||
'bool': adapt_bool,
|
||||
'comments': lambda x,d: adapt_text(x, {'is_multiple':False}),
|
||||
'datetime' : adapt_datetime,
|
||||
'text':adapt_text,
|
||||
'series':adapt_text,
|
||||
'enumeration': adapt_enum
|
||||
}
|
||||
|
||||
# Create Tag Browser categories for custom columns
|
||||
for k in sorted(self.custom_column_label_map.iterkeys()):
|
||||
v = self.custom_column_label_map[k]
|
||||
if v['normalized']:
|
||||
is_category = True
|
||||
else:
|
||||
is_category = False
|
||||
is_m = v['multiple_seps']
|
||||
tn = 'custom_column_{0}'.format(v['num'])
|
||||
self.field_metadata.add_custom_field(label=v['label'],
|
||||
table=tn, column='value', datatype=v['datatype'],
|
||||
colnum=v['num'], name=v['name'], display=v['display'],
|
||||
is_multiple=is_m, is_category=is_category,
|
||||
is_editable=v['editable'], is_csp=False)
|
||||
|
||||
# }}}
|
||||
|
||||
def initialize_tables(self): # {{{
|
||||
tables = self.tables = {}
|
||||
for col in ('title', 'sort', 'author_sort', 'series_index', 'comments',
|
||||
'timestamp', 'pubdate', 'uuid', 'path', 'cover',
|
||||
'last_modified'):
|
||||
metadata = self.field_metadata[col].copy()
|
||||
if col == 'comments':
|
||||
metadata['table'], metadata['column'] = 'comments', 'text'
|
||||
if not metadata['table']:
|
||||
metadata['table'], metadata['column'] = 'books', ('has_cover'
|
||||
if col == 'cover' else col)
|
||||
if not metadata['column']:
|
||||
metadata['column'] = col
|
||||
tables[col] = OneToOneTable(col, metadata)
|
||||
|
||||
for col in ('series', 'publisher', 'rating'):
|
||||
tables[col] = ManyToOneTable(col, self.field_metadata[col].copy())
|
||||
|
||||
for col in ('authors', 'tags', 'formats', 'identifiers'):
|
||||
cls = {
|
||||
'authors':AuthorsTable,
|
||||
'formats':FormatsTable,
|
||||
'identifiers':IdentifiersTable,
|
||||
}.get(col, ManyToManyTable)
|
||||
tables[col] = cls(col, self.field_metadata[col].copy())
|
||||
|
||||
tables['size'] = SizeTable('size', self.field_metadata['size'].copy())
|
||||
|
||||
for label, data in self.custom_column_label_map.iteritems():
|
||||
label = '#' + label
|
||||
metadata = self.field_metadata[label].copy()
|
||||
link_table = self.custom_table_names(data['num'])[1]
|
||||
|
||||
if data['normalized']:
|
||||
if metadata['is_multiple']:
|
||||
tables[label] = ManyToManyTable(label, metadata,
|
||||
link_table=link_table)
|
||||
else:
|
||||
tables[label] = ManyToOneTable(label, metadata,
|
||||
link_table=link_table)
|
||||
if metadata['datatype'] == 'series':
|
||||
# Create series index table
|
||||
label += '_index'
|
||||
metadata = self.field_metadata[label].copy()
|
||||
metadata['column'] = 'extra'
|
||||
metadata['table'] = link_table
|
||||
tables[label] = OneToOneTable(label, metadata)
|
||||
else:
|
||||
tables[label] = OneToOneTable(label, metadata)
|
||||
# }}}
|
||||
|
||||
@property
|
||||
def conn(self):
|
||||
if self._conn is None:
|
||||
self._conn = Connection(self.dbpath)
|
||||
if self._exists and self.user_version == 0:
|
||||
self._conn.close()
|
||||
os.remove(self.dbpath)
|
||||
self._conn = Connection(self.dbpath)
|
||||
return self._conn
|
||||
|
||||
@dynamic_property
|
||||
def user_version(self):
|
||||
doc = 'The user version of this database'
|
||||
|
||||
def fget(self):
|
||||
return self.conn.get('pragma user_version;', all=False)
|
||||
|
||||
def fset(self, val):
|
||||
self.conn.execute('pragma user_version=%d'%int(val))
|
||||
|
||||
return property(doc=doc, fget=fget, fset=fset)
|
||||
|
||||
def initialize_database(self):
|
||||
metadata_sqlite = P('metadata_sqlite.sql', data=True,
|
||||
allow_user_override=False).decode('utf-8')
|
||||
cur = self.conn.cursor()
|
||||
cur.execute('BEGIN EXCLUSIVE TRANSACTION')
|
||||
try:
|
||||
cur.execute(metadata_sqlite)
|
||||
except:
|
||||
cur.execute('ROLLBACK')
|
||||
else:
|
||||
cur.execute('COMMIT')
|
||||
if self.user_version == 0:
|
||||
self.user_version = 1
|
||||
# }}}
|
||||
|
||||
# Database layer API {{{
|
||||
|
||||
def custom_table_names(self, num):
|
||||
return 'custom_column_%d'%num, 'books_custom_column_%d_link'%num
|
||||
|
||||
@property
|
||||
def custom_tables(self):
|
||||
return set([x[0] for x in self.conn.get(
|
||||
'SELECT name FROM sqlite_master WHERE type="table" AND '
|
||||
'(name GLOB "custom_column_*" OR name GLOB "books_custom_column_*")')])
|
||||
|
||||
@classmethod
|
||||
def exists_at(cls, path):
|
||||
return path and os.path.exists(os.path.join(path, 'metadata.db'))
|
||||
|
||||
@dynamic_property
|
||||
def library_id(self):
|
||||
doc = ('The UUID for this library. As long as the user only operates'
|
||||
' on libraries with calibre, it will be unique')
|
||||
|
||||
def fget(self):
|
||||
if getattr(self, '_library_id_', None) is None:
|
||||
ans = self.conn.get('SELECT uuid FROM library_id', all=False)
|
||||
if ans is None:
|
||||
ans = str(uuid.uuid4())
|
||||
self.library_id = ans
|
||||
else:
|
||||
self._library_id_ = ans
|
||||
return self._library_id_
|
||||
|
||||
def fset(self, val):
|
||||
self._library_id_ = unicode(val)
|
||||
self.conn.execute('''
|
||||
DELETE FROM library_id;
|
||||
INSERT INTO library_id (uuid) VALUES (?);
|
||||
''', (self._library_id_,))
|
||||
|
||||
return property(doc=doc, fget=fget, fset=fset)
|
||||
|
||||
def last_modified(self):
|
||||
''' Return last modified time as a UTC datetime object '''
|
||||
return utcfromtimestamp(os.stat(self.dbpath).st_mtime)
|
||||
|
||||
def read_tables(self):
|
||||
'''
|
||||
Read all data from the db into the python in-memory tables
|
||||
'''
|
||||
|
||||
with self.conn: # Use a single transaction, to ensure nothing modifies
|
||||
# the db while we are reading
|
||||
for table in self.tables.itervalues():
|
||||
try:
|
||||
table.read(self)
|
||||
except:
|
||||
prints('Failed to read table:', table.name)
|
||||
import pprint
|
||||
pprint.pprint(table.metadata)
|
||||
raise
|
||||
|
||||
# }}}
|
||||
|
11
src/calibre/db/cache.py
Normal file
@ -0,0 +1,11 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
|
||||
|
13
src/calibre/db/errors.py
Normal file
@ -0,0 +1,13 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
|
||||
class NoSuchFormat(ValueError):
|
||||
pass
|
||||
|
353
src/calibre/db/locking.py
Normal file
@ -0,0 +1,353 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
from threading import Lock, Condition, current_thread
|
||||
|
||||
class LockingError(RuntimeError):
|
||||
pass
|
||||
|
||||
def create_locks():
|
||||
'''
|
||||
Return a pair of locks: (read_lock, write_lock)
|
||||
|
||||
The read_lock can be acquired by multiple threads simultaneously, it can
|
||||
also be acquired multiple times by the same thread.
|
||||
|
||||
Only one thread can hold write_lock at a time, and only if there are no
|
||||
current read_locks. While the write_lock is held no
|
||||
other threads can acquire read locks. The write_lock can also be acquired
|
||||
multiple times by the same thread.
|
||||
|
||||
Both read_lock and write_lock are meant to be used in with statements (they
|
||||
operate on a single underlying lock.
|
||||
|
||||
WARNING: Be very careful to not try to acquire a read lock while the same
|
||||
thread holds a write lock and vice versa. That is, a given thread should
|
||||
always release *all* locks of type A before trying to acquire a lock of type
|
||||
B. Bad things will happen if you violate this rule, the most benign of
|
||||
which is the raising of a LockingError (I haven't been able to eliminate
|
||||
the possibility of deadlocking in this scenario).
|
||||
'''
|
||||
l = SHLock()
|
||||
return RWLockWrapper(l), RWLockWrapper(l, is_shared=False)
|
||||
|
||||
class SHLock(object):
|
||||
'''
|
||||
Shareable lock class. Used to implement the Multiple readers-single writer
|
||||
paradigm. As best as I can tell, neither writer nor reader starvation
|
||||
should be possible.
|
||||
|
||||
Based on code from: https://github.com/rfk/threading2
|
||||
'''
|
||||
|
||||
def __init__(self):
|
||||
self._lock = Lock()
|
||||
# When a shared lock is held, is_shared will give the cumulative
|
||||
# number of locks and _shared_owners maps each owning thread to
|
||||
# the number of locks is holds.
|
||||
self.is_shared = 0
|
||||
self._shared_owners = {}
|
||||
# When an exclusive lock is held, is_exclusive will give the number
|
||||
# of locks held and _exclusive_owner will give the owning thread
|
||||
self.is_exclusive = 0
|
||||
self._exclusive_owner = None
|
||||
# When someone is forced to wait for a lock, they add themselves
|
||||
# to one of these queues along with a "waiter" condition that
|
||||
# is used to wake them up.
|
||||
self._shared_queue = []
|
||||
self._exclusive_queue = []
|
||||
# This is for recycling waiter objects.
|
||||
self._free_waiters = []
|
||||
|
||||
def acquire(self, blocking=True, shared=False):
|
||||
'''
|
||||
Acquire the lock in shared or exclusive mode.
|
||||
|
||||
If blocking is False this method will return False if acquiring the
|
||||
lock failed.
|
||||
'''
|
||||
with self._lock:
|
||||
if shared:
|
||||
return self._acquire_shared(blocking)
|
||||
else:
|
||||
return self._acquire_exclusive(blocking)
|
||||
assert not (self.is_shared and self.is_exclusive)
|
||||
|
||||
def release(self):
|
||||
''' Release the lock. '''
|
||||
# This decrements the appropriate lock counters, and if the lock
|
||||
# becomes free, it looks for a queued thread to hand it off to.
|
||||
# By doing the handoff here we ensure fairness.
|
||||
me = current_thread()
|
||||
with self._lock:
|
||||
if self.is_exclusive:
|
||||
if self._exclusive_owner is not me:
|
||||
raise LockingError("release() called on unheld lock")
|
||||
self.is_exclusive -= 1
|
||||
if not self.is_exclusive:
|
||||
self._exclusive_owner = None
|
||||
# If there are waiting shared locks, issue them
|
||||
# all and them wake everyone up.
|
||||
if self._shared_queue:
|
||||
for (thread, waiter) in self._shared_queue:
|
||||
self.is_shared += 1
|
||||
self._shared_owners[thread] = 1
|
||||
waiter.notify()
|
||||
del self._shared_queue[:]
|
||||
# Otherwise, if there are waiting exclusive locks,
|
||||
# they get first dibbs on the lock.
|
||||
elif self._exclusive_queue:
|
||||
(thread, waiter) = self._exclusive_queue.pop(0)
|
||||
self._exclusive_owner = thread
|
||||
self.is_exclusive += 1
|
||||
waiter.notify()
|
||||
elif self.is_shared:
|
||||
try:
|
||||
self._shared_owners[me] -= 1
|
||||
if self._shared_owners[me] == 0:
|
||||
del self._shared_owners[me]
|
||||
except KeyError:
|
||||
raise LockingError("release() called on unheld lock")
|
||||
self.is_shared -= 1
|
||||
if not self.is_shared:
|
||||
# If there are waiting exclusive locks,
|
||||
# they get first dibbs on the lock.
|
||||
if self._exclusive_queue:
|
||||
(thread, waiter) = self._exclusive_queue.pop(0)
|
||||
self._exclusive_owner = thread
|
||||
self.is_exclusive += 1
|
||||
waiter.notify()
|
||||
else:
|
||||
assert not self._shared_queue
|
||||
else:
|
||||
raise LockingError("release() called on unheld lock")
|
||||
|
||||
def _acquire_shared(self, blocking=True):
|
||||
me = current_thread()
|
||||
# Each case: acquiring a lock we already hold.
|
||||
if self.is_shared and me in self._shared_owners:
|
||||
self.is_shared += 1
|
||||
self._shared_owners[me] += 1
|
||||
return True
|
||||
# If the lock is already spoken for by an exclusive, add us
|
||||
# to the shared queue and it will give us the lock eventually.
|
||||
if self.is_exclusive or self._exclusive_queue:
|
||||
if self._exclusive_owner is me:
|
||||
raise LockingError("can't downgrade SHLock object")
|
||||
if not blocking:
|
||||
return False
|
||||
waiter = self._take_waiter()
|
||||
try:
|
||||
self._shared_queue.append((me, waiter))
|
||||
waiter.wait()
|
||||
assert not self.is_exclusive
|
||||
finally:
|
||||
self._return_waiter(waiter)
|
||||
else:
|
||||
self.is_shared += 1
|
||||
self._shared_owners[me] = 1
|
||||
return True
|
||||
|
||||
def _acquire_exclusive(self, blocking=True):
|
||||
me = current_thread()
|
||||
# Each case: acquiring a lock we already hold.
|
||||
if self._exclusive_owner is me:
|
||||
assert self.is_exclusive
|
||||
self.is_exclusive += 1
|
||||
return True
|
||||
# Do not allow upgrade of lock
|
||||
if self.is_shared and me in self._shared_owners:
|
||||
raise LockingError("can't upgrade SHLock object")
|
||||
# If the lock is already spoken for, add us to the exclusive queue.
|
||||
# This will eventually give us the lock when it's our turn.
|
||||
if self.is_shared or self.is_exclusive:
|
||||
if not blocking:
|
||||
return False
|
||||
waiter = self._take_waiter()
|
||||
try:
|
||||
self._exclusive_queue.append((me, waiter))
|
||||
waiter.wait()
|
||||
finally:
|
||||
self._return_waiter(waiter)
|
||||
else:
|
||||
self._exclusive_owner = me
|
||||
self.is_exclusive += 1
|
||||
return True
|
||||
|
||||
def _take_waiter(self):
|
||||
try:
|
||||
return self._free_waiters.pop()
|
||||
except IndexError:
|
||||
return Condition(self._lock)#, verbose=True)
|
||||
|
||||
def _return_waiter(self, waiter):
|
||||
self._free_waiters.append(waiter)
|
||||
|
||||
class RWLockWrapper(object):
|
||||
|
||||
def __init__(self, shlock, is_shared=True):
|
||||
self._shlock = shlock
|
||||
self._is_shared = is_shared
|
||||
|
||||
def __enter__(self):
|
||||
self._shlock.acquire(shared=self._is_shared)
|
||||
return self
|
||||
|
||||
def __exit__(self, *args):
|
||||
self._shlock.release()
|
||||
|
||||
|
||||
# Tests {{{
|
||||
if __name__ == '__main__':
|
||||
import time, random, unittest
|
||||
from threading import Thread
|
||||
|
||||
class TestSHLock(unittest.TestCase):
|
||||
"""Testcases for SHLock class."""
|
||||
|
||||
def test_multithread_deadlock(self):
|
||||
lock = SHLock()
|
||||
def two_shared():
|
||||
r = RWLockWrapper(lock)
|
||||
with r:
|
||||
time.sleep(0.2)
|
||||
with r:
|
||||
pass
|
||||
def one_exclusive():
|
||||
time.sleep(0.1)
|
||||
w = RWLockWrapper(lock, is_shared=False)
|
||||
with w:
|
||||
pass
|
||||
threads = [Thread(target=two_shared), Thread(target=one_exclusive)]
|
||||
for t in threads:
|
||||
t.daemon = True
|
||||
t.start()
|
||||
for t in threads:
|
||||
t.join(5)
|
||||
live = [t for t in threads if t.is_alive()]
|
||||
self.assertListEqual(live, [], 'ShLock hung')
|
||||
|
||||
def test_upgrade(self):
|
||||
lock = SHLock()
|
||||
lock.acquire(shared=True)
|
||||
self.assertRaises(LockingError, lock.acquire, shared=False)
|
||||
lock.release()
|
||||
|
||||
def test_downgrade(self):
|
||||
lock = SHLock()
|
||||
lock.acquire(shared=False)
|
||||
self.assertRaises(LockingError, lock.acquire, shared=True)
|
||||
lock.release()
|
||||
|
||||
def test_recursive(self):
|
||||
lock = SHLock()
|
||||
lock.acquire(shared=True)
|
||||
lock.acquire(shared=True)
|
||||
self.assertEqual(lock.is_shared, 2)
|
||||
lock.release()
|
||||
lock.release()
|
||||
self.assertFalse(lock.is_shared)
|
||||
lock.acquire(shared=False)
|
||||
lock.acquire(shared=False)
|
||||
self.assertEqual(lock.is_exclusive, 2)
|
||||
lock.release()
|
||||
lock.release()
|
||||
self.assertFalse(lock.is_exclusive)
|
||||
|
||||
def test_release(self):
|
||||
lock = SHLock()
|
||||
self.assertRaises(LockingError, lock.release)
|
||||
|
||||
def get_lock(shared):
|
||||
lock.acquire(shared=shared)
|
||||
time.sleep(1)
|
||||
lock.release()
|
||||
|
||||
threads = [Thread(target=get_lock, args=(x,)) for x in (True,
|
||||
False)]
|
||||
for t in threads:
|
||||
t.daemon = True
|
||||
t.start()
|
||||
self.assertRaises(LockingError, lock.release)
|
||||
t.join(2)
|
||||
self.assertFalse(t.is_alive())
|
||||
self.assertFalse(lock.is_shared)
|
||||
self.assertFalse(lock.is_exclusive)
|
||||
|
||||
def test_acquire(self):
|
||||
lock = SHLock()
|
||||
|
||||
def get_lock(shared):
|
||||
lock.acquire(shared=shared)
|
||||
time.sleep(1)
|
||||
lock.release()
|
||||
|
||||
shared = Thread(target=get_lock, args=(True,))
|
||||
shared.daemon = True
|
||||
shared.start()
|
||||
time.sleep(0.1)
|
||||
self.assertTrue(lock.acquire(shared=True, blocking=False))
|
||||
lock.release()
|
||||
self.assertFalse(lock.acquire(shared=False, blocking=False))
|
||||
lock.acquire(shared=False)
|
||||
self.assertFalse(shared.is_alive())
|
||||
lock.release()
|
||||
self.assertTrue(lock.acquire(shared=False, blocking=False))
|
||||
lock.release()
|
||||
|
||||
exclusive = Thread(target=get_lock, args=(False,))
|
||||
exclusive.daemon = True
|
||||
exclusive.start()
|
||||
time.sleep(0.1)
|
||||
self.assertFalse(lock.acquire(shared=False, blocking=False))
|
||||
self.assertFalse(lock.acquire(shared=True, blocking=False))
|
||||
lock.acquire(shared=True)
|
||||
self.assertFalse(exclusive.is_alive())
|
||||
lock.release()
|
||||
lock.acquire(shared=False)
|
||||
lock.release()
|
||||
lock.acquire(shared=True)
|
||||
lock.release()
|
||||
self.assertFalse(lock.is_shared)
|
||||
self.assertFalse(lock.is_exclusive)
|
||||
|
||||
def test_contention(self):
|
||||
lock = SHLock()
|
||||
done = []
|
||||
def lots_of_acquires():
|
||||
for _ in xrange(1000):
|
||||
shared = random.choice([True,False])
|
||||
lock.acquire(shared=shared)
|
||||
lock.acquire(shared=shared)
|
||||
time.sleep(random.random() * 0.0001)
|
||||
lock.release()
|
||||
time.sleep(random.random() * 0.0001)
|
||||
lock.acquire(shared=shared)
|
||||
time.sleep(random.random() * 0.0001)
|
||||
lock.release()
|
||||
lock.release()
|
||||
done.append(True)
|
||||
threads = [Thread(target=lots_of_acquires) for _ in xrange(10)]
|
||||
for t in threads:
|
||||
t.daemon = True
|
||||
t.start()
|
||||
for t in threads:
|
||||
t.join(20)
|
||||
live = [t for t in threads if t.is_alive()]
|
||||
self.assertListEqual(live, [], 'ShLock hung')
|
||||
self.assertEqual(len(done), len(threads), 'SHLock locking failed')
|
||||
self.assertFalse(lock.is_shared)
|
||||
self.assertFalse(lock.is_exclusive)
|
||||
|
||||
|
||||
suite = unittest.TestLoader().loadTestsFromTestCase(TestSHLock)
|
||||
unittest.TextTestRunner(verbosity=2).run(suite)
|
||||
|
||||
# }}}
|
||||
|
618
src/calibre/db/schema_upgrades.py
Normal file
@ -0,0 +1,618 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import os
|
||||
|
||||
from calibre import prints
|
||||
from calibre.utils.date import isoformat, DEFAULT_DATE
|
||||
|
||||
class SchemaUpgrade(object):
|
||||
|
||||
def __init__(self, conn, library_path, field_metadata):
|
||||
conn.execute('BEGIN EXCLUSIVE TRANSACTION')
|
||||
self.conn = conn
|
||||
self.library_path = library_path
|
||||
self.field_metadata = field_metadata
|
||||
# Upgrade database
|
||||
try:
|
||||
while True:
|
||||
uv = self.conn.execute('pragma user_version').next()[0]
|
||||
meth = getattr(self, 'upgrade_version_%d'%uv, None)
|
||||
if meth is None:
|
||||
break
|
||||
else:
|
||||
prints('Upgrading database to version %d...'%(uv+1))
|
||||
meth()
|
||||
self.conn.execute('pragma user_version=%d'%(uv+1))
|
||||
except:
|
||||
self.conn.execute('ROLLBACK')
|
||||
raise
|
||||
else:
|
||||
self.conn.execute('COMMIT')
|
||||
finally:
|
||||
self.conn = self.field_metadata = None
|
||||
|
||||
def upgrade_version_1(self):
|
||||
'''
|
||||
Normalize indices.
|
||||
'''
|
||||
self.conn.execute('''\
|
||||
DROP INDEX IF EXISTS authors_idx;
|
||||
CREATE INDEX authors_idx ON books (author_sort COLLATE NOCASE, sort COLLATE NOCASE);
|
||||
DROP INDEX IF EXISTS series_idx;
|
||||
CREATE INDEX series_idx ON series (name COLLATE NOCASE);
|
||||
DROP INDEX IF EXISTS series_sort_idx;
|
||||
CREATE INDEX series_sort_idx ON books (series_index, id);
|
||||
''')
|
||||
|
||||
def upgrade_version_2(self):
|
||||
''' Fix Foreign key constraints for deleting from link tables. '''
|
||||
script = '''\
|
||||
DROP TRIGGER IF EXISTS fkc_delete_books_%(ltable)s_link;
|
||||
CREATE TRIGGER fkc_delete_on_%(table)s
|
||||
BEFORE DELETE ON %(table)s
|
||||
BEGIN
|
||||
SELECT CASE
|
||||
WHEN (SELECT COUNT(id) FROM books_%(ltable)s_link WHERE %(ltable_col)s=OLD.id) > 0
|
||||
THEN RAISE(ABORT, 'Foreign key violation: %(table)s is still referenced')
|
||||
END;
|
||||
END;
|
||||
DELETE FROM %(table)s WHERE (SELECT COUNT(id) FROM books_%(ltable)s_link WHERE %(ltable_col)s=%(table)s.id) < 1;
|
||||
'''
|
||||
self.conn.execute(script%dict(ltable='authors', table='authors', ltable_col='author'))
|
||||
self.conn.execute(script%dict(ltable='publishers', table='publishers', ltable_col='publisher'))
|
||||
self.conn.execute(script%dict(ltable='tags', table='tags', ltable_col='tag'))
|
||||
self.conn.execute(script%dict(ltable='series', table='series', ltable_col='series'))
|
||||
|
||||
def upgrade_version_3(self):
|
||||
' Add path to result cache '
|
||||
self.conn.execute('''
|
||||
DROP VIEW IF EXISTS meta;
|
||||
CREATE VIEW meta AS
|
||||
SELECT id, title,
|
||||
(SELECT concat(name) FROM authors WHERE authors.id IN (SELECT author from books_authors_link WHERE book=books.id)) authors,
|
||||
(SELECT name FROM publishers WHERE publishers.id IN (SELECT publisher from books_publishers_link WHERE book=books.id)) publisher,
|
||||
(SELECT rating FROM ratings WHERE ratings.id IN (SELECT rating from books_ratings_link WHERE book=books.id)) rating,
|
||||
timestamp,
|
||||
(SELECT MAX(uncompressed_size) FROM data WHERE book=books.id) size,
|
||||
(SELECT concat(name) FROM tags WHERE tags.id IN (SELECT tag from books_tags_link WHERE book=books.id)) tags,
|
||||
(SELECT text FROM comments WHERE book=books.id) comments,
|
||||
(SELECT name FROM series WHERE series.id IN (SELECT series FROM books_series_link WHERE book=books.id)) series,
|
||||
series_index,
|
||||
sort,
|
||||
author_sort,
|
||||
(SELECT concat(format) FROM data WHERE data.book=books.id) formats,
|
||||
isbn,
|
||||
path
|
||||
FROM books;
|
||||
''')
|
||||
|
||||
def upgrade_version_4(self):
|
||||
'Rationalize books table'
|
||||
self.conn.execute('''
|
||||
CREATE TEMPORARY TABLE
|
||||
books_backup(id,title,sort,timestamp,series_index,author_sort,isbn,path);
|
||||
INSERT INTO books_backup SELECT id,title,sort,timestamp,series_index,author_sort,isbn,path FROM books;
|
||||
DROP TABLE books;
|
||||
CREATE TABLE books ( id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
title TEXT NOT NULL DEFAULT 'Unknown' COLLATE NOCASE,
|
||||
sort TEXT COLLATE NOCASE,
|
||||
timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
pubdate TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
series_index REAL NOT NULL DEFAULT 1.0,
|
||||
author_sort TEXT COLLATE NOCASE,
|
||||
isbn TEXT DEFAULT "" COLLATE NOCASE,
|
||||
lccn TEXT DEFAULT "" COLLATE NOCASE,
|
||||
path TEXT NOT NULL DEFAULT "",
|
||||
flags INTEGER NOT NULL DEFAULT 1
|
||||
);
|
||||
INSERT INTO
|
||||
books (id,title,sort,timestamp,pubdate,series_index,author_sort,isbn,path)
|
||||
SELECT id,title,sort,timestamp,timestamp,series_index,author_sort,isbn,path FROM books_backup;
|
||||
DROP TABLE books_backup;
|
||||
|
||||
DROP VIEW IF EXISTS meta;
|
||||
CREATE VIEW meta AS
|
||||
SELECT id, title,
|
||||
(SELECT concat(name) FROM authors WHERE authors.id IN (SELECT author from books_authors_link WHERE book=books.id)) authors,
|
||||
(SELECT name FROM publishers WHERE publishers.id IN (SELECT publisher from books_publishers_link WHERE book=books.id)) publisher,
|
||||
(SELECT rating FROM ratings WHERE ratings.id IN (SELECT rating from books_ratings_link WHERE book=books.id)) rating,
|
||||
timestamp,
|
||||
(SELECT MAX(uncompressed_size) FROM data WHERE book=books.id) size,
|
||||
(SELECT concat(name) FROM tags WHERE tags.id IN (SELECT tag from books_tags_link WHERE book=books.id)) tags,
|
||||
(SELECT text FROM comments WHERE book=books.id) comments,
|
||||
(SELECT name FROM series WHERE series.id IN (SELECT series FROM books_series_link WHERE book=books.id)) series,
|
||||
series_index,
|
||||
sort,
|
||||
author_sort,
|
||||
(SELECT concat(format) FROM data WHERE data.book=books.id) formats,
|
||||
isbn,
|
||||
path,
|
||||
lccn,
|
||||
pubdate,
|
||||
flags
|
||||
FROM books;
|
||||
''')
|
||||
|
||||
def upgrade_version_5(self):
|
||||
'Update indexes/triggers for new books table'
|
||||
self.conn.execute('''
|
||||
CREATE INDEX authors_idx ON books (author_sort COLLATE NOCASE);
|
||||
CREATE INDEX books_idx ON books (sort COLLATE NOCASE);
|
||||
CREATE TRIGGER books_delete_trg
|
||||
AFTER DELETE ON books
|
||||
BEGIN
|
||||
DELETE FROM books_authors_link WHERE book=OLD.id;
|
||||
DELETE FROM books_publishers_link WHERE book=OLD.id;
|
||||
DELETE FROM books_ratings_link WHERE book=OLD.id;
|
||||
DELETE FROM books_series_link WHERE book=OLD.id;
|
||||
DELETE FROM books_tags_link WHERE book=OLD.id;
|
||||
DELETE FROM data WHERE book=OLD.id;
|
||||
DELETE FROM comments WHERE book=OLD.id;
|
||||
DELETE FROM conversion_options WHERE book=OLD.id;
|
||||
END;
|
||||
CREATE TRIGGER books_insert_trg
|
||||
AFTER INSERT ON books
|
||||
BEGIN
|
||||
UPDATE books SET sort=title_sort(NEW.title) WHERE id=NEW.id;
|
||||
END;
|
||||
CREATE TRIGGER books_update_trg
|
||||
AFTER UPDATE ON books
|
||||
BEGIN
|
||||
UPDATE books SET sort=title_sort(NEW.title) WHERE id=NEW.id;
|
||||
END;
|
||||
|
||||
UPDATE books SET sort=title_sort(title) WHERE sort IS NULL;
|
||||
'''
|
||||
)
|
||||
|
||||
|
||||
def upgrade_version_6(self):
|
||||
'Show authors in order'
|
||||
self.conn.execute('''
|
||||
DROP VIEW IF EXISTS meta;
|
||||
CREATE VIEW meta AS
|
||||
SELECT id, title,
|
||||
(SELECT sortconcat(bal.id, name) FROM books_authors_link AS bal JOIN authors ON(author = authors.id) WHERE book = books.id) authors,
|
||||
(SELECT name FROM publishers WHERE publishers.id IN (SELECT publisher from books_publishers_link WHERE book=books.id)) publisher,
|
||||
(SELECT rating FROM ratings WHERE ratings.id IN (SELECT rating from books_ratings_link WHERE book=books.id)) rating,
|
||||
timestamp,
|
||||
(SELECT MAX(uncompressed_size) FROM data WHERE book=books.id) size,
|
||||
(SELECT concat(name) FROM tags WHERE tags.id IN (SELECT tag from books_tags_link WHERE book=books.id)) tags,
|
||||
(SELECT text FROM comments WHERE book=books.id) comments,
|
||||
(SELECT name FROM series WHERE series.id IN (SELECT series FROM books_series_link WHERE book=books.id)) series,
|
||||
series_index,
|
||||
sort,
|
||||
author_sort,
|
||||
(SELECT concat(format) FROM data WHERE data.book=books.id) formats,
|
||||
isbn,
|
||||
path,
|
||||
lccn,
|
||||
pubdate,
|
||||
flags
|
||||
FROM books;
|
||||
''')
|
||||
|
||||
def upgrade_version_7(self):
|
||||
'Add uuid column'
|
||||
self.conn.execute('''
|
||||
ALTER TABLE books ADD COLUMN uuid TEXT;
|
||||
DROP TRIGGER IF EXISTS books_insert_trg;
|
||||
DROP TRIGGER IF EXISTS books_update_trg;
|
||||
UPDATE books SET uuid=uuid4();
|
||||
|
||||
CREATE TRIGGER books_insert_trg AFTER INSERT ON books
|
||||
BEGIN
|
||||
UPDATE books SET sort=title_sort(NEW.title),uuid=uuid4() WHERE id=NEW.id;
|
||||
END;
|
||||
|
||||
CREATE TRIGGER books_update_trg AFTER UPDATE ON books
|
||||
BEGIN
|
||||
UPDATE books SET sort=title_sort(NEW.title) WHERE id=NEW.id;
|
||||
END;
|
||||
|
||||
DROP VIEW IF EXISTS meta;
|
||||
CREATE VIEW meta AS
|
||||
SELECT id, title,
|
||||
(SELECT sortconcat(bal.id, name) FROM books_authors_link AS bal JOIN authors ON(author = authors.id) WHERE book = books.id) authors,
|
||||
(SELECT name FROM publishers WHERE publishers.id IN (SELECT publisher from books_publishers_link WHERE book=books.id)) publisher,
|
||||
(SELECT rating FROM ratings WHERE ratings.id IN (SELECT rating from books_ratings_link WHERE book=books.id)) rating,
|
||||
timestamp,
|
||||
(SELECT MAX(uncompressed_size) FROM data WHERE book=books.id) size,
|
||||
(SELECT concat(name) FROM tags WHERE tags.id IN (SELECT tag from books_tags_link WHERE book=books.id)) tags,
|
||||
(SELECT text FROM comments WHERE book=books.id) comments,
|
||||
(SELECT name FROM series WHERE series.id IN (SELECT series FROM books_series_link WHERE book=books.id)) series,
|
||||
series_index,
|
||||
sort,
|
||||
author_sort,
|
||||
(SELECT concat(format) FROM data WHERE data.book=books.id) formats,
|
||||
isbn,
|
||||
path,
|
||||
lccn,
|
||||
pubdate,
|
||||
flags,
|
||||
uuid
|
||||
FROM books;
|
||||
''')
|
||||
|
||||
def upgrade_version_8(self):
|
||||
'Add Tag Browser views'
|
||||
def create_tag_browser_view(table_name, column_name):
|
||||
self.conn.execute('''
|
||||
DROP VIEW IF EXISTS tag_browser_{tn};
|
||||
CREATE VIEW tag_browser_{tn} AS SELECT
|
||||
id,
|
||||
name,
|
||||
(SELECT COUNT(id) FROM books_{tn}_link WHERE {cn}={tn}.id) count
|
||||
FROM {tn};
|
||||
'''.format(tn=table_name, cn=column_name))
|
||||
|
||||
for tn in ('authors', 'tags', 'publishers', 'series'):
|
||||
cn = tn[:-1]
|
||||
if tn == 'series':
|
||||
cn = tn
|
||||
create_tag_browser_view(tn, cn)
|
||||
|
||||
def upgrade_version_9(self):
|
||||
'Add custom columns'
|
||||
self.conn.execute('''
|
||||
CREATE TABLE custom_columns (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
label TEXT NOT NULL,
|
||||
name TEXT NOT NULL,
|
||||
datatype TEXT NOT NULL,
|
||||
mark_for_delete BOOL DEFAULT 0 NOT NULL,
|
||||
editable BOOL DEFAULT 1 NOT NULL,
|
||||
display TEXT DEFAULT "{}" NOT NULL,
|
||||
is_multiple BOOL DEFAULT 0 NOT NULL,
|
||||
normalized BOOL NOT NULL,
|
||||
UNIQUE(label)
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS custom_columns_idx ON custom_columns (label);
|
||||
CREATE INDEX IF NOT EXISTS formats_idx ON data (format);
|
||||
''')
|
||||
|
||||
def upgrade_version_10(self):
|
||||
'Add restricted Tag Browser views'
|
||||
def create_tag_browser_view(table_name, column_name, view_column_name):
|
||||
script = ('''
|
||||
DROP VIEW IF EXISTS tag_browser_{tn};
|
||||
CREATE VIEW tag_browser_{tn} AS SELECT
|
||||
id,
|
||||
{vcn},
|
||||
(SELECT COUNT(id) FROM books_{tn}_link WHERE {cn}={tn}.id) count
|
||||
FROM {tn};
|
||||
DROP VIEW IF EXISTS tag_browser_filtered_{tn};
|
||||
CREATE VIEW tag_browser_filtered_{tn} AS SELECT
|
||||
id,
|
||||
{vcn},
|
||||
(SELECT COUNT(books_{tn}_link.id) FROM books_{tn}_link WHERE
|
||||
{cn}={tn}.id AND books_list_filter(book)) count
|
||||
FROM {tn};
|
||||
'''.format(tn=table_name, cn=column_name, vcn=view_column_name))
|
||||
self.conn.execute(script)
|
||||
|
||||
for field in self.field_metadata.itervalues():
|
||||
if field['is_category'] and not field['is_custom'] and 'link_column' in field:
|
||||
table = self.conn.get(
|
||||
'SELECT name FROM sqlite_master WHERE type="table" AND name=?',
|
||||
('books_%s_link'%field['table'],), all=False)
|
||||
if table is not None:
|
||||
create_tag_browser_view(field['table'], field['link_column'], field['column'])
|
||||
|
||||
def upgrade_version_11(self):
|
||||
'Add average rating to tag browser views'
|
||||
def create_std_tag_browser_view(table_name, column_name,
|
||||
view_column_name, sort_column_name):
|
||||
script = ('''
|
||||
DROP VIEW IF EXISTS tag_browser_{tn};
|
||||
CREATE VIEW tag_browser_{tn} AS SELECT
|
||||
id,
|
||||
{vcn},
|
||||
(SELECT COUNT(id) FROM books_{tn}_link WHERE {cn}={tn}.id) count,
|
||||
(SELECT AVG(ratings.rating)
|
||||
FROM books_{tn}_link AS tl, books_ratings_link AS bl, ratings
|
||||
WHERE tl.{cn}={tn}.id AND bl.book=tl.book AND
|
||||
ratings.id = bl.rating AND ratings.rating <> 0) avg_rating,
|
||||
{scn} AS sort
|
||||
FROM {tn};
|
||||
DROP VIEW IF EXISTS tag_browser_filtered_{tn};
|
||||
CREATE VIEW tag_browser_filtered_{tn} AS SELECT
|
||||
id,
|
||||
{vcn},
|
||||
(SELECT COUNT(books_{tn}_link.id) FROM books_{tn}_link WHERE
|
||||
{cn}={tn}.id AND books_list_filter(book)) count,
|
||||
(SELECT AVG(ratings.rating)
|
||||
FROM books_{tn}_link AS tl, books_ratings_link AS bl, ratings
|
||||
WHERE tl.{cn}={tn}.id AND bl.book=tl.book AND
|
||||
ratings.id = bl.rating AND ratings.rating <> 0 AND
|
||||
books_list_filter(bl.book)) avg_rating,
|
||||
{scn} AS sort
|
||||
FROM {tn};
|
||||
|
||||
'''.format(tn=table_name, cn=column_name,
|
||||
vcn=view_column_name, scn= sort_column_name))
|
||||
self.conn.execute(script)
|
||||
|
||||
def create_cust_tag_browser_view(table_name, link_table_name):
|
||||
script = '''
|
||||
DROP VIEW IF EXISTS tag_browser_{table};
|
||||
CREATE VIEW tag_browser_{table} AS SELECT
|
||||
id,
|
||||
value,
|
||||
(SELECT COUNT(id) FROM {lt} WHERE value={table}.id) count,
|
||||
(SELECT AVG(r.rating)
|
||||
FROM {lt},
|
||||
books_ratings_link AS bl,
|
||||
ratings AS r
|
||||
WHERE {lt}.value={table}.id AND bl.book={lt}.book AND
|
||||
r.id = bl.rating AND r.rating <> 0) avg_rating,
|
||||
value AS sort
|
||||
FROM {table};
|
||||
|
||||
DROP VIEW IF EXISTS tag_browser_filtered_{table};
|
||||
CREATE VIEW tag_browser_filtered_{table} AS SELECT
|
||||
id,
|
||||
value,
|
||||
(SELECT COUNT({lt}.id) FROM {lt} WHERE value={table}.id AND
|
||||
books_list_filter(book)) count,
|
||||
(SELECT AVG(r.rating)
|
||||
FROM {lt},
|
||||
books_ratings_link AS bl,
|
||||
ratings AS r
|
||||
WHERE {lt}.value={table}.id AND bl.book={lt}.book AND
|
||||
r.id = bl.rating AND r.rating <> 0 AND
|
||||
books_list_filter(bl.book)) avg_rating,
|
||||
value AS sort
|
||||
FROM {table};
|
||||
'''.format(lt=link_table_name, table=table_name)
|
||||
self.conn.execute(script)
|
||||
|
||||
for field in self.field_metadata.itervalues():
|
||||
if field['is_category'] and not field['is_custom'] and 'link_column' in field:
|
||||
table = self.conn.get(
|
||||
'SELECT name FROM sqlite_master WHERE type="table" AND name=?',
|
||||
('books_%s_link'%field['table'],), all=False)
|
||||
if table is not None:
|
||||
create_std_tag_browser_view(field['table'], field['link_column'],
|
||||
field['column'], field['category_sort'])
|
||||
|
||||
db_tables = self.conn.get('''SELECT name FROM sqlite_master
|
||||
WHERE type='table'
|
||||
ORDER BY name''')
|
||||
tables = []
|
||||
for (table,) in db_tables:
|
||||
tables.append(table)
|
||||
for table in tables:
|
||||
link_table = 'books_%s_link'%table
|
||||
if table.startswith('custom_column_') and link_table in tables:
|
||||
create_cust_tag_browser_view(table, link_table)
|
||||
|
||||
self.conn.execute('UPDATE authors SET sort=author_to_author_sort(name)')
|
||||
|
||||
def upgrade_version_12(self):
|
||||
'DB based preference store'
|
||||
script = '''
|
||||
DROP TABLE IF EXISTS preferences;
|
||||
CREATE TABLE preferences(id INTEGER PRIMARY KEY,
|
||||
key TEXT NON NULL,
|
||||
val TEXT NON NULL,
|
||||
UNIQUE(key));
|
||||
'''
|
||||
self.conn.execute(script)
|
||||
|
||||
def upgrade_version_13(self):
|
||||
'Dirtied table for OPF metadata backups'
|
||||
script = '''
|
||||
DROP TABLE IF EXISTS metadata_dirtied;
|
||||
CREATE TABLE metadata_dirtied(id INTEGER PRIMARY KEY,
|
||||
book INTEGER NOT NULL,
|
||||
UNIQUE(book));
|
||||
INSERT INTO metadata_dirtied (book) SELECT id FROM books;
|
||||
'''
|
||||
self.conn.execute(script)
|
||||
|
||||
def upgrade_version_14(self):
|
||||
'Cache has_cover'
|
||||
self.conn.execute('ALTER TABLE books ADD COLUMN has_cover BOOL DEFAULT 0')
|
||||
data = self.conn.get('SELECT id,path FROM books', all=True)
|
||||
def has_cover(path):
|
||||
if path:
|
||||
path = os.path.join(self.library_path, path.replace('/', os.sep),
|
||||
'cover.jpg')
|
||||
return os.path.exists(path)
|
||||
return False
|
||||
|
||||
ids = [(x[0],) for x in data if has_cover(x[1])]
|
||||
self.conn.executemany('UPDATE books SET has_cover=1 WHERE id=?', ids)
|
||||
|
||||
def upgrade_version_15(self):
|
||||
'Remove commas from tags'
|
||||
self.conn.execute("UPDATE OR IGNORE tags SET name=REPLACE(name, ',', ';')")
|
||||
self.conn.execute("UPDATE OR IGNORE tags SET name=REPLACE(name, ',', ';;')")
|
||||
self.conn.execute("UPDATE OR IGNORE tags SET name=REPLACE(name, ',', '')")
|
||||
|
||||
def upgrade_version_16(self):
|
||||
self.conn.execute('''
|
||||
DROP TRIGGER IF EXISTS books_update_trg;
|
||||
CREATE TRIGGER books_update_trg
|
||||
AFTER UPDATE ON books
|
||||
BEGIN
|
||||
UPDATE books SET sort=title_sort(NEW.title)
|
||||
WHERE id=NEW.id AND OLD.title <> NEW.title;
|
||||
END;
|
||||
''')
|
||||
|
||||
def upgrade_version_17(self):
|
||||
'custom book data table (for plugins)'
|
||||
script = '''
|
||||
DROP TABLE IF EXISTS books_plugin_data;
|
||||
CREATE TABLE books_plugin_data(id INTEGER PRIMARY KEY,
|
||||
book INTEGER NON NULL,
|
||||
name TEXT NON NULL,
|
||||
val TEXT NON NULL,
|
||||
UNIQUE(book,name));
|
||||
DROP TRIGGER IF EXISTS books_delete_trg;
|
||||
CREATE TRIGGER books_delete_trg
|
||||
AFTER DELETE ON books
|
||||
BEGIN
|
||||
DELETE FROM books_authors_link WHERE book=OLD.id;
|
||||
DELETE FROM books_publishers_link WHERE book=OLD.id;
|
||||
DELETE FROM books_ratings_link WHERE book=OLD.id;
|
||||
DELETE FROM books_series_link WHERE book=OLD.id;
|
||||
DELETE FROM books_tags_link WHERE book=OLD.id;
|
||||
DELETE FROM data WHERE book=OLD.id;
|
||||
DELETE FROM comments WHERE book=OLD.id;
|
||||
DELETE FROM conversion_options WHERE book=OLD.id;
|
||||
DELETE FROM books_plugin_data WHERE book=OLD.id;
|
||||
END;
|
||||
'''
|
||||
self.conn.execute(script)
|
||||
|
||||
def upgrade_version_18(self):
|
||||
'''
|
||||
Add a library UUID.
|
||||
Add an identifiers table.
|
||||
Add a languages table.
|
||||
Add a last_modified column.
|
||||
NOTE: You cannot downgrade after this update, if you do
|
||||
any changes you make to book isbns will be lost.
|
||||
'''
|
||||
script = '''
|
||||
DROP TABLE IF EXISTS library_id;
|
||||
CREATE TABLE library_id ( id INTEGER PRIMARY KEY,
|
||||
uuid TEXT NOT NULL,
|
||||
UNIQUE(uuid)
|
||||
);
|
||||
|
||||
DROP TABLE IF EXISTS identifiers;
|
||||
CREATE TABLE identifiers ( id INTEGER PRIMARY KEY,
|
||||
book INTEGER NON NULL,
|
||||
type TEXT NON NULL DEFAULT "isbn" COLLATE NOCASE,
|
||||
val TEXT NON NULL COLLATE NOCASE,
|
||||
UNIQUE(book, type)
|
||||
);
|
||||
|
||||
DROP TABLE IF EXISTS languages;
|
||||
CREATE TABLE languages ( id INTEGER PRIMARY KEY,
|
||||
lang_code TEXT NON NULL COLLATE NOCASE,
|
||||
UNIQUE(lang_code)
|
||||
);
|
||||
|
||||
DROP TABLE IF EXISTS books_languages_link;
|
||||
CREATE TABLE books_languages_link ( id INTEGER PRIMARY KEY,
|
||||
book INTEGER NOT NULL,
|
||||
lang_code INTEGER NOT NULL,
|
||||
item_order INTEGER NOT NULL DEFAULT 0,
|
||||
UNIQUE(book, lang_code)
|
||||
);
|
||||
|
||||
DROP TRIGGER IF EXISTS fkc_delete_on_languages;
|
||||
CREATE TRIGGER fkc_delete_on_languages
|
||||
BEFORE DELETE ON languages
|
||||
BEGIN
|
||||
SELECT CASE
|
||||
WHEN (SELECT COUNT(id) FROM books_languages_link WHERE lang_code=OLD.id) > 0
|
||||
THEN RAISE(ABORT, 'Foreign key violation: language is still referenced')
|
||||
END;
|
||||
END;
|
||||
|
||||
DROP TRIGGER IF EXISTS fkc_delete_on_languages_link;
|
||||
CREATE TRIGGER fkc_delete_on_languages_link
|
||||
BEFORE INSERT ON books_languages_link
|
||||
BEGIN
|
||||
SELECT CASE
|
||||
WHEN (SELECT id from books WHERE id=NEW.book) IS NULL
|
||||
THEN RAISE(ABORT, 'Foreign key violation: book not in books')
|
||||
WHEN (SELECT id from languages WHERE id=NEW.lang_code) IS NULL
|
||||
THEN RAISE(ABORT, 'Foreign key violation: lang_code not in languages')
|
||||
END;
|
||||
END;
|
||||
|
||||
DROP TRIGGER IF EXISTS fkc_update_books_languages_link_a;
|
||||
CREATE TRIGGER fkc_update_books_languages_link_a
|
||||
BEFORE UPDATE OF book ON books_languages_link
|
||||
BEGIN
|
||||
SELECT CASE
|
||||
WHEN (SELECT id from books WHERE id=NEW.book) IS NULL
|
||||
THEN RAISE(ABORT, 'Foreign key violation: book not in books')
|
||||
END;
|
||||
END;
|
||||
DROP TRIGGER IF EXISTS fkc_update_books_languages_link_b;
|
||||
CREATE TRIGGER fkc_update_books_languages_link_b
|
||||
BEFORE UPDATE OF lang_code ON books_languages_link
|
||||
BEGIN
|
||||
SELECT CASE
|
||||
WHEN (SELECT id from languages WHERE id=NEW.lang_code) IS NULL
|
||||
THEN RAISE(ABORT, 'Foreign key violation: lang_code not in languages')
|
||||
END;
|
||||
END;
|
||||
|
||||
DROP INDEX IF EXISTS books_languages_link_aidx;
|
||||
CREATE INDEX books_languages_link_aidx ON books_languages_link (lang_code);
|
||||
DROP INDEX IF EXISTS books_languages_link_bidx;
|
||||
CREATE INDEX books_languages_link_bidx ON books_languages_link (book);
|
||||
DROP INDEX IF EXISTS languages_idx;
|
||||
CREATE INDEX languages_idx ON languages (lang_code COLLATE NOCASE);
|
||||
|
||||
DROP TRIGGER IF EXISTS books_delete_trg;
|
||||
CREATE TRIGGER books_delete_trg
|
||||
AFTER DELETE ON books
|
||||
BEGIN
|
||||
DELETE FROM books_authors_link WHERE book=OLD.id;
|
||||
DELETE FROM books_publishers_link WHERE book=OLD.id;
|
||||
DELETE FROM books_ratings_link WHERE book=OLD.id;
|
||||
DELETE FROM books_series_link WHERE book=OLD.id;
|
||||
DELETE FROM books_tags_link WHERE book=OLD.id;
|
||||
DELETE FROM books_languages_link WHERE book=OLD.id;
|
||||
DELETE FROM data WHERE book=OLD.id;
|
||||
DELETE FROM comments WHERE book=OLD.id;
|
||||
DELETE FROM conversion_options WHERE book=OLD.id;
|
||||
DELETE FROM books_plugin_data WHERE book=OLD.id;
|
||||
DELETE FROM identifiers WHERE book=OLD.id;
|
||||
END;
|
||||
|
||||
INSERT INTO identifiers (book, val) SELECT id,isbn FROM books WHERE isbn;
|
||||
|
||||
ALTER TABLE books ADD COLUMN last_modified TIMESTAMP NOT NULL DEFAULT "%s";
|
||||
|
||||
'''%isoformat(DEFAULT_DATE, sep=' ')
|
||||
# Sqlite does not support non constant default values in alter
|
||||
# statements
|
||||
self.conn.execute(script)
|
||||
|
||||
def upgrade_version_19(self):
|
||||
recipes = self.conn.get('SELECT id,title,script FROM feeds')
|
||||
if recipes:
|
||||
from calibre.web.feeds.recipes import (custom_recipes,
|
||||
custom_recipe_filename)
|
||||
bdir = os.path.dirname(custom_recipes.file_path)
|
||||
for id_, title, script in recipes:
|
||||
existing = frozenset(map(int, custom_recipes.iterkeys()))
|
||||
if id_ in existing:
|
||||
id_ = max(existing) + 1000
|
||||
id_ = str(id_)
|
||||
fname = custom_recipe_filename(id_, title)
|
||||
custom_recipes[id_] = (title, fname)
|
||||
if isinstance(script, unicode):
|
||||
script = script.encode('utf-8')
|
||||
with open(os.path.join(bdir, fname), 'wb') as f:
|
||||
f.write(script)
|
||||
|
||||
def upgrade_version_20(self):
|
||||
'''
|
||||
Add a link column to the authors table.
|
||||
'''
|
||||
|
||||
script = '''
|
||||
ALTER TABLE authors ADD COLUMN link TEXT NOT NULL DEFAULT "";
|
||||
'''
|
||||
self.conn.execute(script)
|
||||
|
||||
|
167
src/calibre/db/tables.py
Normal file
@ -0,0 +1,167 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
from dateutil.tz import tzoffset
|
||||
|
||||
from calibre.constants import plugins
|
||||
from calibre.utils.date import parse_date, local_tz
|
||||
from calibre.ebooks.metadata import author_to_author_sort
|
||||
|
||||
_c_speedup = plugins['speedup'][0]
|
||||
|
||||
def _c_convert_timestamp(val):
|
||||
if not val:
|
||||
return None
|
||||
try:
|
||||
ret = _c_speedup.parse_date(val.strip())
|
||||
except:
|
||||
ret = None
|
||||
if ret is None:
|
||||
return parse_date(val, as_utc=False)
|
||||
year, month, day, hour, minutes, seconds, tzsecs = ret
|
||||
return datetime(year, month, day, hour, minutes, seconds,
|
||||
tzinfo=tzoffset(None, tzsecs)).astimezone(local_tz)
|
||||
|
||||
class Table(object):
|
||||
|
||||
def __init__(self, name, metadata, link_table=None):
|
||||
self.name, self.metadata = name, metadata
|
||||
|
||||
# self.unserialize() maps values from the db to python objects
|
||||
self.unserialize = \
|
||||
{
|
||||
'datetime': _c_convert_timestamp,
|
||||
'bool': bool
|
||||
}.get(
|
||||
metadata['datatype'], lambda x: x)
|
||||
if name == 'authors':
|
||||
# Legacy
|
||||
self.unserialize = lambda x: x.replace('|', ',') if x else None
|
||||
|
||||
self.link_table = (link_table if link_table else
|
||||
'books_%s_link'%self.metadata['table'])
|
||||
|
||||
class OneToOneTable(Table):
|
||||
|
||||
'''
|
||||
Represents data that is unique per book (it may not actually be unique) but
|
||||
each item is assigned to a book in a one-to-one mapping. For example: uuid,
|
||||
timestamp, size, etc.
|
||||
'''
|
||||
|
||||
def read(self, db):
|
||||
self.book_col_map = {}
|
||||
idcol = 'id' if self.metadata['table'] == 'books' else 'book'
|
||||
for row in db.conn.execute('SELECT {0}, {1} FROM {2}'.format(idcol,
|
||||
self.metadata['column'], self.metadata['table'])):
|
||||
self.book_col_map[row[0]] = self.unserialize(row[1])
|
||||
|
||||
class SizeTable(OneToOneTable):
|
||||
|
||||
def read(self, db):
|
||||
self.book_col_map = {}
|
||||
for row in db.conn.execute(
|
||||
'SELECT books.id, (SELECT MAX(uncompressed_size) FROM data '
|
||||
'WHERE data.book=books.id) FROM books'):
|
||||
self.book_col_map[row[0]] = self.unserialize(row[1])
|
||||
|
||||
class ManyToOneTable(Table):
|
||||
|
||||
'''
|
||||
Represents data where one data item can map to many books, for example:
|
||||
series or publisher.
|
||||
|
||||
Each book however has only one value for data of this type.
|
||||
'''
|
||||
|
||||
def read(self, db):
|
||||
self.id_map = {}
|
||||
self.extra_map = {}
|
||||
self.col_book_map = {}
|
||||
self.book_col_map = {}
|
||||
self.read_id_maps(db)
|
||||
self.read_maps(db)
|
||||
|
||||
def read_id_maps(self, db):
|
||||
for row in db.conn.execute('SELECT id, {0} FROM {1}'.format(
|
||||
self.metadata['column'], self.metadata['table'])):
|
||||
if row[1]:
|
||||
self.id_map[row[0]] = self.unserialize(row[1])
|
||||
|
||||
def read_maps(self, db):
|
||||
for row in db.conn.execute(
|
||||
'SELECT book, {0} FROM {1}'.format(
|
||||
self.metadata['link_column'], self.link_table)):
|
||||
if row[1] not in self.col_book_map:
|
||||
self.col_book_map[row[1]] = []
|
||||
self.col_book_map[row[1]].append(row[0])
|
||||
self.book_col_map[row[0]] = row[1]
|
||||
|
||||
class ManyToManyTable(ManyToOneTable):
|
||||
|
||||
'''
|
||||
Represents data that has a many-to-many mapping with books. i.e. each book
|
||||
can have more than one value and each value can be mapped to more than one
|
||||
book. For example: tags or authors.
|
||||
'''
|
||||
|
||||
def read_maps(self, db):
|
||||
for row in db.conn.execute(
|
||||
'SELECT book, {0} FROM {1}'.format(
|
||||
self.metadata['link_column'], self.link_table)):
|
||||
if row[1] not in self.col_book_map:
|
||||
self.col_book_map[row[1]] = []
|
||||
self.col_book_map[row[1]].append(row[0])
|
||||
if row[0] not in self.book_col_map:
|
||||
self.book_col_map[row[0]] = []
|
||||
self.book_col_map[row[0]].append(row[1])
|
||||
|
||||
class AuthorsTable(ManyToManyTable):
|
||||
|
||||
def read_id_maps(self, db):
|
||||
self.alink_map = {}
|
||||
for row in db.conn.execute(
|
||||
'SELECT id, name, sort, link FROM authors'):
|
||||
self.id_map[row[0]] = row[1]
|
||||
self.extra_map[row[0]] = (row[2] if row[2] else
|
||||
author_to_author_sort(row[1]))
|
||||
self.alink_map[row[0]] = row[3]
|
||||
|
||||
class FormatsTable(ManyToManyTable):
|
||||
|
||||
def read_id_maps(self, db):
|
||||
pass
|
||||
|
||||
def read_maps(self, db):
|
||||
for row in db.conn.execute('SELECT book, format, name FROM data'):
|
||||
if row[1] is not None:
|
||||
if row[1] not in self.col_book_map:
|
||||
self.col_book_map[row[1]] = []
|
||||
self.col_book_map[row[1]].append(row[0])
|
||||
if row[0] not in self.book_col_map:
|
||||
self.book_col_map[row[0]] = []
|
||||
self.book_col_map[row[0]].append((row[1], row[2]))
|
||||
|
||||
class IdentifiersTable(ManyToManyTable):
|
||||
|
||||
def read_id_maps(self, db):
|
||||
pass
|
||||
|
||||
def read_maps(self, db):
|
||||
for row in db.conn.execute('SELECT book, type, val FROM identifiers'):
|
||||
if row[1] is not None and row[2] is not None:
|
||||
if row[1] not in self.col_book_map:
|
||||
self.col_book_map[row[1]] = []
|
||||
self.col_book_map[row[1]].append(row[0])
|
||||
if row[0] not in self.book_col_map:
|
||||
self.book_col_map[row[0]] = []
|
||||
self.book_col_map[row[0]].append((row[1], row[2]))
|
||||
|
@ -19,16 +19,17 @@ class ANDROID(USBMS):
|
||||
|
||||
VENDOR_ID = {
|
||||
# HTC
|
||||
0x0bb4 : { 0x0c02 : [0x100, 0x0227, 0x0226, 0x222],
|
||||
0x0c01 : [0x100, 0x0227, 0x0226],
|
||||
0x0ff9 : [0x0100, 0x0227, 0x0226],
|
||||
0x0c87 : [0x0100, 0x0227, 0x0226],
|
||||
0xc92 : [0x100],
|
||||
0xc97 : [0x226],
|
||||
0xc99 : [0x0100],
|
||||
0xca2 : [0x226],
|
||||
0xca3 : [0x100],
|
||||
0xca4 : [0x226],
|
||||
0x0bb4 : { 0xc02 : [0x100, 0x0227, 0x0226, 0x222],
|
||||
0xc01 : [0x100, 0x0227, 0x0226],
|
||||
0xff9 : [0x0100, 0x0227, 0x0226],
|
||||
0xc87 : [0x0100, 0x0227, 0x0226],
|
||||
0xc91 : [0x0100, 0x0227, 0x0226],
|
||||
0xc92 : [0x100, 0x0227, 0x0226, 0x222],
|
||||
0xc97 : [0x100, 0x0227, 0x0226, 0x222],
|
||||
0xc99 : [0x100, 0x0227, 0x0226, 0x222],
|
||||
0xca2 : [0x100, 0x0227, 0x0226, 0x222],
|
||||
0xca3 : [0x100, 0x0227, 0x0226, 0x222],
|
||||
0xca4 : [0x100, 0x0227, 0x0226, 0x222],
|
||||
},
|
||||
|
||||
# Eken
|
||||
@ -45,8 +46,11 @@ class ANDROID(USBMS):
|
||||
0xfce : { 0xd12e : [0x0100]},
|
||||
|
||||
# Google
|
||||
0x18d1 : { 0x4e11 : [0x0100, 0x226, 0x227], 0x4e12: [0x0100, 0x226,
|
||||
0x227], 0x4e21: [0x0100, 0x226, 0x227], 0xb058: [0x0222]},
|
||||
0x18d1 : {
|
||||
0x4e11 : [0x0100, 0x226, 0x227],
|
||||
0x4e12: [0x0100, 0x226, 0x227],
|
||||
0x4e21: [0x0100, 0x226, 0x227],
|
||||
0xb058: [0x0222, 0x226, 0x227]},
|
||||
|
||||
# Samsung
|
||||
0x04e8 : { 0x681d : [0x0222, 0x0223, 0x0224, 0x0400],
|
||||
@ -68,7 +72,8 @@ class ANDROID(USBMS):
|
||||
0x413c : { 0xb007 : [0x0100, 0x0224, 0x0226]},
|
||||
|
||||
# LG
|
||||
0x1004 : { 0x61cc : [0x100], 0x61ce : [0x100], 0x618e : [0x226] },
|
||||
0x1004 : { 0x61cc : [0x100], 0x61ce : [0x100], 0x618e : [0x226,
|
||||
0x9999] },
|
||||
|
||||
# Archos
|
||||
0x0e79 : {
|
||||
@ -97,6 +102,9 @@ class ANDROID(USBMS):
|
||||
# ZTE
|
||||
0x19d2 : { 0x1353 : [0x226] },
|
||||
|
||||
# Advent
|
||||
0x0955 : { 0x7100 : [0x9999] }, # This is the same as the Notion Ink Adam
|
||||
|
||||
}
|
||||
EBOOK_DIR_MAIN = ['eBooks/import', 'wordplayer/calibretransfer', 'Books']
|
||||
EXTRA_CUSTOMIZATION_MESSAGE = _('Comma separated list of directories to '
|
||||
@ -107,7 +115,7 @@ class ANDROID(USBMS):
|
||||
VENDOR_NAME = ['HTC', 'MOTOROLA', 'GOOGLE_', 'ANDROID', 'ACER',
|
||||
'GT-I5700', 'SAMSUNG', 'DELL', 'LINUX', 'GOOGLE', 'ARCHOS',
|
||||
'TELECHIP', 'HUAWEI', 'T-MOBILE', 'SEMC', 'LGE', 'NVIDIA',
|
||||
'GENERIC-', 'ZTE']
|
||||
'GENERIC-', 'ZTE', 'MID']
|
||||
WINDOWS_MAIN_MEM = ['ANDROID_PHONE', 'A855', 'A853', 'INC.NEXUS_ONE',
|
||||
'__UMS_COMPOSITE', '_MB200', 'MASS_STORAGE', '_-_CARD', 'SGH-I897',
|
||||
'GT-I9000', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID',
|
||||
@ -116,11 +124,11 @@ class ANDROID(USBMS):
|
||||
'IDEOS_TABLET', 'MYTOUCH_4G', 'UMS_COMPOSITE', 'SCH-I800_CARD',
|
||||
'7', 'A956', 'A955', 'A43', 'ANDROID_PLATFORM', 'TEGRA_2',
|
||||
'MB860', 'MULTI-CARD', 'MID7015A', 'INCREDIBLE', 'A7EB', 'STREAK',
|
||||
'MB525']
|
||||
'MB525', 'ANDROID2.3', 'SGH-I997']
|
||||
WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
|
||||
'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
|
||||
'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD',
|
||||
'__UMS_COMPOSITE']
|
||||
'__UMS_COMPOSITE', 'SGH-I997_CARD']
|
||||
|
||||
OSX_MAIN_MEM = 'Android Device Main Memory'
|
||||
|
||||
|
@ -5,7 +5,7 @@ __copyright__ = '2010, Gregory Riker'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
|
||||
import cStringIO, ctypes, datetime, os, re, shutil, subprocess, sys, tempfile, time
|
||||
import cStringIO, ctypes, datetime, os, re, shutil, sys, tempfile, time
|
||||
from calibre.constants import __appname__, __version__, DEBUG
|
||||
from calibre import fit_image, confirm_config_name
|
||||
from calibre.constants import isosx, iswindows
|
||||
@ -13,8 +13,7 @@ from calibre.devices.errors import OpenFeedback, UserFeedback
|
||||
from calibre.devices.usbms.deviceconfig import DeviceConfig
|
||||
from calibre.devices.interface import DevicePlugin
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||
from calibre.ebooks.metadata import authors_to_string, MetaInformation, \
|
||||
title_sort
|
||||
from calibre.ebooks.metadata import authors_to_string, MetaInformation, title_sort
|
||||
from calibre.ebooks.metadata.book.base import Metadata
|
||||
from calibre.ebooks.metadata.epub import set_metadata
|
||||
from calibre.library.server.utils import strftime
|
||||
@ -107,6 +106,8 @@ class DriverBase(DeviceConfig, DevicePlugin):
|
||||
# Needed for config_widget to work
|
||||
FORMATS = ['epub', 'pdf']
|
||||
USER_CAN_ADD_NEW_FORMATS = False
|
||||
KEEP_TEMP_FILES_AFTER_UPLOAD = True
|
||||
CAN_DO_DEVICE_DB_PLUGBOARD = True
|
||||
|
||||
# Hide the standard customization widgets
|
||||
SUPPORTS_SUB_DIRS = False
|
||||
@ -119,11 +120,17 @@ class DriverBase(DeviceConfig, DevicePlugin):
|
||||
'iBooks Category'),
|
||||
_('Cache covers from iTunes/iBooks') +
|
||||
':::' +
|
||||
_('Enable to cache and display covers from iTunes/iBooks')
|
||||
_('Enable to cache and display covers from iTunes/iBooks'),
|
||||
_(u'"Copy files to iTunes Media folder %s" is enabled in iTunes Preferences|Advanced')%u'\u2026' +
|
||||
':::' +
|
||||
_("<p>This setting should match your iTunes <i>Preferences</i>|<i>Advanced</i> setting.</p>"
|
||||
"<p>Disabling will store copies of books transferred to iTunes in your calibre configuration directory.</p>"
|
||||
"<p>Enabling indicates that iTunes is configured to store copies in your iTunes Media folder.</p>")
|
||||
]
|
||||
EXTRA_CUSTOMIZATION_DEFAULT = [
|
||||
True,
|
||||
True,
|
||||
False,
|
||||
]
|
||||
|
||||
|
||||
@ -135,7 +142,8 @@ class ITUNES(DriverBase):
|
||||
'''
|
||||
Calling sequences:
|
||||
Initialization:
|
||||
can_handle() or can_handle_windows()
|
||||
can_handle() | can_handle_windows()
|
||||
_launch_iTunes()
|
||||
reset()
|
||||
open()
|
||||
card_prefix()
|
||||
@ -163,8 +171,12 @@ class ITUNES(DriverBase):
|
||||
settings()
|
||||
set_progress_reporter()
|
||||
upload_books()
|
||||
_get_fpath()
|
||||
_update_epub_metadata()
|
||||
_remove_existing_copy()
|
||||
_remove_from_device()
|
||||
_remove_from_iTunes()
|
||||
_add_new_copy()
|
||||
_add_library_book()
|
||||
_update_iTunes_metadata()
|
||||
add_books_to_metadata()
|
||||
use_plugboard_ext()
|
||||
set_plugboard()
|
||||
@ -181,13 +193,14 @@ class ITUNES(DriverBase):
|
||||
supported_platforms = ['osx','windows']
|
||||
author = 'GRiker'
|
||||
#: The version of this plugin as a 3-tuple (major, minor, revision)
|
||||
version = (1,0,0)
|
||||
version = (1,1,0)
|
||||
|
||||
DISPLAY_DISABLE_DIALOG = "display_disable_apple_driver_dialog"
|
||||
|
||||
# EXTRA_CUSTOMIZATION_MESSAGE indexes
|
||||
USE_SERIES_AS_CATEGORY = 0
|
||||
CACHE_COVERS = 1
|
||||
USE_ITUNES_STORAGE = 2
|
||||
|
||||
OPEN_FEEDBACK_MESSAGE = _(
|
||||
'Apple device detected, launching iTunes, please wait ...')
|
||||
@ -276,7 +289,7 @@ class ITUNES(DriverBase):
|
||||
description_prefix = "added by calibre"
|
||||
ejected = False
|
||||
iTunes= None
|
||||
iTunes_media = None
|
||||
iTunes_local_storage = None
|
||||
library_orphans = None
|
||||
log = Log()
|
||||
manual_sync_mode = False
|
||||
@ -412,11 +425,11 @@ class ITUNES(DriverBase):
|
||||
this_book.datetime = parse_date(str(book.date_added())).timetuple()
|
||||
except:
|
||||
this_book.datetime = time.gmtime()
|
||||
this_book.db_id = None
|
||||
this_book.device_collections = []
|
||||
this_book.library_id = library_books[this_book.path] if this_book.path in library_books else None
|
||||
this_book.size = book.size()
|
||||
this_book.uuid = book.composer()
|
||||
this_book.cid = None
|
||||
# Hack to discover if we're running in GUI environment
|
||||
if self.report_progress is not None:
|
||||
this_book.thumbnail = self._generate_thumbnail(this_book.path, book)
|
||||
@ -433,7 +446,8 @@ class ITUNES(DriverBase):
|
||||
}
|
||||
|
||||
if self.report_progress is not None:
|
||||
self.report_progress((i+1)/book_count, _('%d of %d') % (i+1, book_count))
|
||||
self.report_progress((i+1)/book_count,
|
||||
_('%(num)d of %(tot)d') % dict(num=i+1, tot=book_count))
|
||||
self._purge_orphans(library_books, cached_books)
|
||||
|
||||
elif iswindows:
|
||||
@ -451,10 +465,10 @@ class ITUNES(DriverBase):
|
||||
this_book.datetime = parse_date(str(book.DateAdded)).timetuple()
|
||||
except:
|
||||
this_book.datetime = time.gmtime()
|
||||
this_book.db_id = None
|
||||
this_book.device_collections = []
|
||||
this_book.library_id = library_books[this_book.path] if this_book.path in library_books else None
|
||||
this_book.size = book.Size
|
||||
this_book.cid = None
|
||||
# Hack to discover if we're running in GUI environment
|
||||
if self.report_progress is not None:
|
||||
this_book.thumbnail = self._generate_thumbnail(this_book.path, book)
|
||||
@ -472,7 +486,8 @@ class ITUNES(DriverBase):
|
||||
|
||||
if self.report_progress is not None:
|
||||
self.report_progress((i+1)/book_count,
|
||||
_('%d of %d') % (i+1, book_count))
|
||||
_('%(num)d of %(tot)d') % dict(num=i+1,
|
||||
tot=book_count))
|
||||
self._purge_orphans(library_books, cached_books)
|
||||
|
||||
finally:
|
||||
@ -490,7 +505,7 @@ class ITUNES(DriverBase):
|
||||
|
||||
def can_handle(self, device_info, debug=False):
|
||||
'''
|
||||
Unix version of :method:`can_handle_windows`
|
||||
OSX version of :method:`can_handle_windows`
|
||||
|
||||
:param device_info: Is a tupe of (vid, pid, bcd, manufacturer, product,
|
||||
serial number)
|
||||
@ -821,7 +836,7 @@ class ITUNES(DriverBase):
|
||||
# Confirm/create thumbs archive
|
||||
if not os.path.exists(self.cache_dir):
|
||||
if DEBUG:
|
||||
self.log.info(" creating thumb cache '%s'" % self.cache_dir)
|
||||
self.log.info(" creating thumb cache at '%s'" % self.cache_dir)
|
||||
os.makedirs(self.cache_dir)
|
||||
|
||||
if not os.path.exists(self.archive_path):
|
||||
@ -833,6 +848,17 @@ class ITUNES(DriverBase):
|
||||
if DEBUG:
|
||||
self.log.info(" existing thumb cache at '%s'" % self.archive_path)
|
||||
|
||||
# If enabled in config options, create/confirm an iTunes storage folder
|
||||
if not self.settings().extra_customization[self.USE_ITUNES_STORAGE]:
|
||||
self.iTunes_local_storage = os.path.join(config_dir,'iTunes storage')
|
||||
if not os.path.exists(self.iTunes_local_storage):
|
||||
if DEBUG:
|
||||
self.log(" creating iTunes_local_storage at '%s'" % self.iTunes_local_storage)
|
||||
os.mkdir(self.iTunes_local_storage)
|
||||
else:
|
||||
if DEBUG:
|
||||
self.log(" existing iTunes_local_storage at '%s'" % self.iTunes_local_storage)
|
||||
|
||||
def remove_books_from_metadata(self, paths, booklists):
|
||||
'''
|
||||
Remove books from the metadata list. This function must not communicate
|
||||
@ -1020,17 +1046,14 @@ class ITUNES(DriverBase):
|
||||
|
||||
if DEBUG:
|
||||
self.log.info("ITUNES.upload_books()")
|
||||
self._dump_files(files, header='upload_books()',indent=2)
|
||||
self._dump_update_list(header='upload_books()',indent=2)
|
||||
|
||||
if isosx:
|
||||
for (i,file) in enumerate(files):
|
||||
format = file.rpartition('.')[2].lower()
|
||||
for (i,fpath) in enumerate(files):
|
||||
format = fpath.rpartition('.')[2].lower()
|
||||
path = self.path_template % (metadata[i].title,
|
||||
authors_to_string(metadata[i].authors),
|
||||
format)
|
||||
self._remove_existing_copy(path, metadata[i])
|
||||
fpath = self._get_fpath(file, metadata[i], format, update_md=True)
|
||||
db_added, lb_added = self._add_new_copy(fpath, metadata[i])
|
||||
thumb = self._cover_to_thumb(path, metadata[i], db_added, lb_added, format)
|
||||
this_book = self._create_new_book(fpath, metadata[i], path, db_added, lb_added, thumb, format)
|
||||
@ -1054,20 +1077,20 @@ class ITUNES(DriverBase):
|
||||
|
||||
# Report progress
|
||||
if self.report_progress is not None:
|
||||
self.report_progress((i+1)/file_count, _('%d of %d') % (i+1, file_count))
|
||||
self.report_progress((i+1)/file_count,
|
||||
_('%(num)d of %(tot)d') % dict(num=i+1, tot=file_count))
|
||||
|
||||
elif iswindows:
|
||||
try:
|
||||
pythoncom.CoInitialize()
|
||||
self.iTunes = win32com.client.Dispatch("iTunes.Application")
|
||||
|
||||
for (i,file) in enumerate(files):
|
||||
format = file.rpartition('.')[2].lower()
|
||||
for (i,fpath) in enumerate(files):
|
||||
format = fpath.rpartition('.')[2].lower()
|
||||
path = self.path_template % (metadata[i].title,
|
||||
authors_to_string(metadata[i].authors),
|
||||
format)
|
||||
self._remove_existing_copy(path, metadata[i])
|
||||
fpath = self._get_fpath(file, metadata[i],format, update_md=True)
|
||||
db_added, lb_added = self._add_new_copy(fpath, metadata[i])
|
||||
|
||||
if self.manual_sync_mode and not db_added:
|
||||
@ -1098,7 +1121,8 @@ class ITUNES(DriverBase):
|
||||
|
||||
# Report progress
|
||||
if self.report_progress is not None:
|
||||
self.report_progress((i+1)/file_count, _('%d of %d') % (i+1, file_count))
|
||||
self.report_progress((i+1)/file_count,
|
||||
_('%(num)d of %(tot)d') % dict(num=i+1, tot=file_count))
|
||||
finally:
|
||||
pythoncom.CoUninitialize()
|
||||
|
||||
@ -1211,6 +1235,7 @@ class ITUNES(DriverBase):
|
||||
'''
|
||||
windows assumes pythoncom wrapper
|
||||
'''
|
||||
if DEBUG:
|
||||
self.log.info(" ITUNES._add_library_book()")
|
||||
if isosx:
|
||||
added = self.iTunes.add(appscript.mactypes.File(file))
|
||||
@ -1274,6 +1299,8 @@ class ITUNES(DriverBase):
|
||||
|
||||
def _add_new_copy(self, fpath, metadata):
|
||||
'''
|
||||
fp = cached_book['lib_book'].location().path
|
||||
fp = cached_book['lib_book'].Location
|
||||
'''
|
||||
if DEBUG:
|
||||
self.log.info(" ITUNES._add_new_copy()")
|
||||
@ -1281,17 +1308,27 @@ class ITUNES(DriverBase):
|
||||
db_added = None
|
||||
lb_added = None
|
||||
|
||||
# If using iTunes_local_storage, copy the file, redirect iTunes to use local copy
|
||||
if not self.settings().extra_customization[self.USE_ITUNES_STORAGE]:
|
||||
local_copy = os.path.join(self.iTunes_local_storage, str(metadata.uuid) + os.path.splitext(fpath)[1])
|
||||
shutil.copyfile(fpath,local_copy)
|
||||
fpath = local_copy
|
||||
|
||||
if self.manual_sync_mode:
|
||||
'''
|
||||
Unsupported direct-connect mode.
|
||||
'''
|
||||
self.log.warning(" unsupported direct connect mode")
|
||||
db_added = self._add_device_book(fpath, metadata)
|
||||
if not getattr(fpath, 'deleted_after_upload', False):
|
||||
lb_added = self._add_library_book(fpath, metadata)
|
||||
if lb_added:
|
||||
if DEBUG:
|
||||
self.log.info(" file added to Library|Books for iTunes<->iBooks tracking")
|
||||
if not lb_added and DEBUG:
|
||||
self.log.warn(" failed to add '%s' to iTunes, iTunes Media folder inaccessible" % metadata.title)
|
||||
else:
|
||||
lb_added = self._add_library_book(fpath, metadata)
|
||||
if DEBUG:
|
||||
self.log.info(" file added to Library|Books for pending sync")
|
||||
if not lb_added:
|
||||
raise UserFeedback("iTunes Media folder inaccessible",
|
||||
details="Failed to add '%s' to iTunes" % metadata.title,
|
||||
level=UserFeedback.WARN)
|
||||
|
||||
return db_added, lb_added
|
||||
|
||||
@ -1300,14 +1337,17 @@ class ITUNES(DriverBase):
|
||||
assumes pythoncom wrapper for db_added
|
||||
as of iTunes 9.2, iBooks 1.1, can't set artwork for PDF files via automation
|
||||
'''
|
||||
if DEBUG:
|
||||
self.log.info(" ITUNES._cover_to_thumb()")
|
||||
|
||||
thumb = None
|
||||
if metadata.cover:
|
||||
|
||||
if format == 'epub':
|
||||
# Pre-shrink cover
|
||||
# self.MAX_COVER_WIDTH, self.MAX_COVER_HEIGHT
|
||||
'''
|
||||
Pre-shrink cover
|
||||
self.MAX_COVER_WIDTH, self.MAX_COVER_HEIGHT
|
||||
'''
|
||||
try:
|
||||
img = PILImage.open(metadata.cover)
|
||||
width = img.size[0]
|
||||
@ -1315,8 +1355,8 @@ class ITUNES(DriverBase):
|
||||
scaled, nwidth, nheight = fit_image(width, height, self.MAX_COVER_WIDTH, self.MAX_COVER_HEIGHT)
|
||||
if scaled:
|
||||
if DEBUG:
|
||||
self.log.info(" '%s' scaled from %sx%s to %sx%s" %
|
||||
(metadata.cover,width,height,nwidth,nheight))
|
||||
self.log.info(" cover scaled from %sx%s to %sx%s" %
|
||||
(width,height,nwidth,nheight))
|
||||
img = img.resize((nwidth, nheight), PILImage.ANTIALIAS)
|
||||
cd = cStringIO.StringIO()
|
||||
img.convert('RGB').save(cd, 'JPEG')
|
||||
@ -1335,9 +1375,11 @@ class ITUNES(DriverBase):
|
||||
return thumb
|
||||
|
||||
if isosx:
|
||||
# The following commands generate an error, but the artwork does in fact
|
||||
# get sent to the device. Seems like a bug in Apple's automation interface?
|
||||
# Could also be a problem with the integrity of the cover data?
|
||||
'''
|
||||
The following commands generate an error, but the artwork does in fact
|
||||
get sent to the device. Seems like a bug in Apple's automation interface?
|
||||
Could also be a problem with the integrity of the cover data?
|
||||
'''
|
||||
if lb_added:
|
||||
try:
|
||||
lb_added.artworks[1].data_.set(cover_data)
|
||||
@ -1360,9 +1402,8 @@ class ITUNES(DriverBase):
|
||||
#ipython(user_ns=locals())
|
||||
pass
|
||||
|
||||
|
||||
elif iswindows:
|
||||
# Write the data to a real file for Windows iTunes
|
||||
''' Write the data to a real file for Windows iTunes '''
|
||||
tc = os.path.join(tempfile.gettempdir(), "cover.jpg")
|
||||
with open(tc,'wb') as tmp_cover:
|
||||
tmp_cover.write(cover_data)
|
||||
@ -1421,7 +1462,8 @@ class ITUNES(DriverBase):
|
||||
|
||||
this_book = Book(metadata.title, authors_to_string(metadata.authors))
|
||||
this_book.datetime = time.gmtime()
|
||||
this_book.db_id = None
|
||||
#this_book.cid = metadata.id
|
||||
this_book.cid = None
|
||||
this_book.device_collections = []
|
||||
this_book.format = format
|
||||
this_book.library_id = lb_added # ??? GR
|
||||
@ -1429,7 +1471,6 @@ class ITUNES(DriverBase):
|
||||
this_book.thumbnail = thumb
|
||||
this_book.iTunes_id = lb_added # ??? GR
|
||||
this_book.uuid = metadata.uuid
|
||||
|
||||
if isosx:
|
||||
if lb_added:
|
||||
this_book.size = self._get_device_book_size(fpath, lb_added.size())
|
||||
@ -1460,24 +1501,6 @@ class ITUNES(DriverBase):
|
||||
|
||||
return this_book
|
||||
|
||||
def _delete_iTunesMetadata_plist(self,fpath):
|
||||
'''
|
||||
Delete the plist file from the file to force recache
|
||||
'''
|
||||
zf = ZipFile(fpath,'a')
|
||||
fnames = zf.namelist()
|
||||
pl_name = 'iTunesMetadata.plist'
|
||||
try:
|
||||
plist = [x for x in fnames if pl_name in x][0]
|
||||
except:
|
||||
plist = None
|
||||
if plist:
|
||||
if DEBUG:
|
||||
self.log.info(" _delete_iTunesMetadata_plist():")
|
||||
self.log.info(" deleting '%s'\n from '%s'" % (pl_name,fpath))
|
||||
zf.delete(pl_name)
|
||||
zf.close()
|
||||
|
||||
def _discover_manual_sync_mode(self, wait=0):
|
||||
'''
|
||||
Assumes pythoncom for windows
|
||||
@ -1498,7 +1521,7 @@ class ITUNES(DriverBase):
|
||||
else:
|
||||
self.log.error(" book_playlist not found")
|
||||
|
||||
if len(dev_books):
|
||||
if dev_books is not None and len(dev_books):
|
||||
first_book = dev_books[0]
|
||||
if False:
|
||||
self.log.info(" determing manual mode by modifying '%s' by %s" % (first_book.name(), first_book.artist()))
|
||||
@ -1529,7 +1552,7 @@ class ITUNES(DriverBase):
|
||||
dev_books = pl.Tracks
|
||||
break
|
||||
|
||||
if dev_books.Count:
|
||||
if dev_books is not None and dev_books.Count:
|
||||
first_book = dev_books.Item(1)
|
||||
#if DEBUG:
|
||||
#self.log.info(" determing manual mode by modifying '%s' by %s" % (first_book.Name, first_book.Artist))
|
||||
@ -1662,18 +1685,6 @@ class ITUNES(DriverBase):
|
||||
zf.close()
|
||||
return (title, author, timestamp)
|
||||
|
||||
def _dump_files(self, files, header=None,indent=0):
|
||||
if header:
|
||||
msg = '\n%sfiles passed to %s:' % (' '*indent,header)
|
||||
self.log.info(msg)
|
||||
self.log.info( "%s%s" % (' '*indent,'-' * len(msg)))
|
||||
for file in files:
|
||||
if getattr(file, 'orig_file_path', None) is not None:
|
||||
self.log.info(" %s%s" % (' '*indent,file.orig_file_path))
|
||||
elif getattr(file, 'name', None) is not None:
|
||||
self.log.info(" %s%s" % (' '*indent,file.name))
|
||||
self.log.info()
|
||||
|
||||
def _dump_hex(self, src, length=16):
|
||||
'''
|
||||
'''
|
||||
@ -1697,7 +1708,7 @@ class ITUNES(DriverBase):
|
||||
self.log.info()
|
||||
|
||||
def _dump_update_list(self,header=None,indent=0):
|
||||
if header:
|
||||
if header and self.update_list:
|
||||
msg = '\n%sself.update_list %s' % (' '*indent,header)
|
||||
self.log.info(msg)
|
||||
self.log.info( "%s%s" % (' '*indent,'-' * len(msg)))
|
||||
@ -1716,7 +1727,6 @@ class ITUNES(DriverBase):
|
||||
(' '*indent,
|
||||
ub['title'],
|
||||
ub['author']))
|
||||
self.log.info()
|
||||
|
||||
def _find_device_book(self, search):
|
||||
'''
|
||||
@ -2115,35 +2125,6 @@ class ITUNES(DriverBase):
|
||||
self.log.error(" no iPad|Books playlist found")
|
||||
return pl
|
||||
|
||||
def _get_fpath(self,file, metadata, format, update_md=False):
|
||||
'''
|
||||
If the database copy will be deleted after upload, we have to
|
||||
use file (the PersistentTemporaryFile), which will be around until
|
||||
calibre exits.
|
||||
If we're using the database copy, delete the plist
|
||||
'''
|
||||
if DEBUG:
|
||||
self.log.info(" ITUNES._get_fpath()")
|
||||
|
||||
fpath = file
|
||||
if not getattr(fpath, 'deleted_after_upload', False):
|
||||
if getattr(file, 'orig_file_path', None) is not None:
|
||||
# Database copy
|
||||
fpath = file.orig_file_path
|
||||
self._delete_iTunesMetadata_plist(fpath)
|
||||
elif getattr(file, 'name', None) is not None:
|
||||
# PTF
|
||||
fpath = file.name
|
||||
else:
|
||||
# Recipe - PTF
|
||||
if DEBUG:
|
||||
self.log.info(" file will be deleted after upload")
|
||||
|
||||
if format == 'epub' and update_md:
|
||||
self._update_epub_metadata(fpath, metadata)
|
||||
|
||||
return fpath
|
||||
|
||||
def _get_library_books(self):
|
||||
'''
|
||||
Populate a dict of paths from iTunes Library|Books
|
||||
@ -2347,6 +2328,7 @@ class ITUNES(DriverBase):
|
||||
self.iTunes = appscript.app('iTunes')
|
||||
self.initial_status = 'already running'
|
||||
|
||||
'''
|
||||
# Read the current storage path for iTunes media
|
||||
cmd = "defaults read com.apple.itunes NSNavLastRootDirectory"
|
||||
proc = subprocess.Popen( cmd, shell=True, cwd=os.curdir, stdout=subprocess.PIPE)
|
||||
@ -2357,12 +2339,13 @@ class ITUNES(DriverBase):
|
||||
else:
|
||||
self.log.error(" could not confirm valid iTunes.media_dir from %s" % 'com.apple.itunes')
|
||||
self.log.error(" media_dir: %s" % media_dir)
|
||||
'''
|
||||
|
||||
if DEBUG:
|
||||
self.log.info(" %s %s" % (__appname__, __version__))
|
||||
self.log.info(" [OSX %s - %s (%s), driver version %d.%d.%d]" %
|
||||
(self.iTunes.name(), self.iTunes.version(), self.initial_status,
|
||||
self.version[0],self.version[1],self.version[2]))
|
||||
self.log.info(" iTunes_media: %s" % self.iTunes_media)
|
||||
self.log.info(" calibre_library_path: %s" % self.calibre_library_path)
|
||||
|
||||
if iswindows:
|
||||
@ -2402,6 +2385,7 @@ class ITUNES(DriverBase):
|
||||
' iTunes automation interface non-responsive, ' +
|
||||
'recommend reinstalling iTunes')
|
||||
|
||||
'''
|
||||
# Read the current storage path for iTunes media from the XML file
|
||||
media_dir = ''
|
||||
string = None
|
||||
@ -2420,13 +2404,13 @@ class ITUNES(DriverBase):
|
||||
self.log.error(" '%s' not found" % media_dir)
|
||||
else:
|
||||
self.log.error(" no media dir found: string: %s" % string)
|
||||
'''
|
||||
|
||||
if DEBUG:
|
||||
self.log.info(" %s %s" % (__appname__, __version__))
|
||||
self.log.info(" [Windows %s - %s (%s), driver version %d.%d.%d]" %
|
||||
(self.iTunes.Windows[0].name, self.iTunes.Version, self.initial_status,
|
||||
self.version[0],self.version[1],self.version[2]))
|
||||
self.log.info(" iTunes_media: %s" % self.iTunes_media)
|
||||
self.log.info(" calibre_library_path: %s" % self.calibre_library_path)
|
||||
|
||||
def _purge_orphans(self,library_books, cached_books):
|
||||
@ -2476,13 +2460,14 @@ class ITUNES(DriverBase):
|
||||
(self.cached_books[book]['title'] == metadata.title and \
|
||||
self.cached_books[book]['author'] == authors_to_string(metadata.authors)):
|
||||
self.update_list.append(self.cached_books[book])
|
||||
self._remove_from_device(self.cached_books[book])
|
||||
|
||||
if DEBUG:
|
||||
self.log.info( " deleting device book '%s'" % (metadata.title))
|
||||
if not getattr(file, 'deleted_after_upload', False):
|
||||
self._remove_from_iTunes(self.cached_books[book])
|
||||
self._remove_from_device(self.cached_books[book])
|
||||
|
||||
if DEBUG:
|
||||
self.log.info(" deleting library book '%s'" % metadata.title)
|
||||
self._remove_from_iTunes(self.cached_books[book])
|
||||
break
|
||||
else:
|
||||
if DEBUG:
|
||||
@ -2495,9 +2480,9 @@ class ITUNES(DriverBase):
|
||||
(self.cached_books[book]['title'] == metadata.title and \
|
||||
self.cached_books[book]['author'] == authors_to_string(metadata.authors)):
|
||||
self.update_list.append(self.cached_books[book])
|
||||
self._remove_from_iTunes(self.cached_books[book])
|
||||
if DEBUG:
|
||||
self.log.info( " deleting library book '%s'" % metadata.title)
|
||||
self._remove_from_iTunes(self.cached_books[book])
|
||||
break
|
||||
else:
|
||||
if DEBUG:
|
||||
@ -2507,6 +2492,7 @@ class ITUNES(DriverBase):
|
||||
'''
|
||||
Windows assumes pythoncom wrapper
|
||||
'''
|
||||
if DEBUG:
|
||||
self.log.info(" ITUNES._remove_from_device()")
|
||||
if isosx:
|
||||
if DEBUG:
|
||||
@ -2528,96 +2514,115 @@ class ITUNES(DriverBase):
|
||||
|
||||
def _remove_from_iTunes(self, cached_book):
|
||||
'''
|
||||
iTunes does not delete books from storage when removing from database
|
||||
We only want to delete stored copies if the file is stored in iTunes
|
||||
We don't want to delete files stored outside of iTunes.
|
||||
Also confirm that storage_path does not point into calibre's storage.
|
||||
iTunes does not delete books from storage when removing from database via automation
|
||||
'''
|
||||
if DEBUG:
|
||||
self.log.info(" ITUNES._remove_from_iTunes():")
|
||||
|
||||
if isosx:
|
||||
''' Manually remove the book from iTunes storage '''
|
||||
try:
|
||||
storage_path = os.path.split(cached_book['lib_book'].location().path)
|
||||
if cached_book['lib_book'].location().path.startswith(self.iTunes_media) and \
|
||||
not storage_path[0].startswith(prefs['library_path']):
|
||||
title_storage_path = storage_path[0]
|
||||
fp = cached_book['lib_book'].location().path
|
||||
if DEBUG:
|
||||
self.log.info(" removing title_storage_path: %s" % title_storage_path)
|
||||
self.log.info(" processing %s" % fp)
|
||||
if fp.startswith(prefs['library_path']):
|
||||
self.log.info(" '%s' stored in calibre database, not removed" % cached_book['title'])
|
||||
elif not self.settings().extra_customization[self.USE_ITUNES_STORAGE] and \
|
||||
fp.startswith(self.iTunes_local_storage) and \
|
||||
os.path.exists(fp):
|
||||
# Delete the copy in iTunes_local_storage
|
||||
os.remove(fp)
|
||||
if DEBUG:
|
||||
self.log(" removing from iTunes_local_storage")
|
||||
else:
|
||||
# Delete from iTunes Media folder
|
||||
if os.path.exists(fp):
|
||||
os.remove(fp)
|
||||
if DEBUG:
|
||||
self.log.info(" deleting from iTunes storage")
|
||||
author_storage_path = os.path.split(fp)[0]
|
||||
try:
|
||||
shutil.rmtree(title_storage_path)
|
||||
os.rmdir(author_storage_path)
|
||||
if DEBUG:
|
||||
self.log.info(" removing empty author directory")
|
||||
except:
|
||||
self.log.info(" '%s' not empty" % title_storage_path)
|
||||
|
||||
# Clean up title/author directories
|
||||
author_storage_path = os.path.split(title_storage_path)[0]
|
||||
self.log.info(" author_storage_path: %s" % author_storage_path)
|
||||
author_files = os.listdir(author_storage_path)
|
||||
if '.DS_Store' in author_files:
|
||||
author_files.pop(author_files.index('.DS_Store'))
|
||||
if not author_files:
|
||||
shutil.rmtree(author_storage_path)
|
||||
os.rmdir(author_storage_path)
|
||||
if DEBUG:
|
||||
self.log.info(" removing empty author_storage_path")
|
||||
self.log.info(" removing empty author directory")
|
||||
else:
|
||||
if DEBUG:
|
||||
self.log.info(" author_storage_path not empty (%d objects):" % len(author_files))
|
||||
self.log.info(" %s" % '\n'.join(author_files))
|
||||
else:
|
||||
self.log.info(" '%s' (stored external to iTunes, no files deleted)" % cached_book['title'])
|
||||
self.log.info(" '%s' does not exist at storage location" % cached_book['title'])
|
||||
|
||||
except:
|
||||
# We get here if there was an error with .location().path
|
||||
if DEBUG:
|
||||
self.log.info(" '%s' not in iTunes storage" % cached_book['title'])
|
||||
self.log.info(" '%s' not found in iTunes storage" % cached_book['title'])
|
||||
|
||||
# Delete the book from the iTunes database
|
||||
try:
|
||||
self.iTunes.delete(cached_book['lib_book'])
|
||||
if DEBUG:
|
||||
self.log.info(" removing from iTunes database")
|
||||
except:
|
||||
if DEBUG:
|
||||
self.log.info(" unable to remove '%s' from iTunes" % cached_book['title'])
|
||||
self.log.info(" unable to remove from iTunes database")
|
||||
|
||||
elif iswindows:
|
||||
'''
|
||||
Assume we're wrapped in a pythoncom
|
||||
Windows stores the book under a common author directory, so we just delete the .epub
|
||||
'''
|
||||
fp = None
|
||||
try:
|
||||
book = cached_book['lib_book']
|
||||
path = book.Location
|
||||
fp = book.Location
|
||||
except:
|
||||
book = self._find_library_book(cached_book)
|
||||
if book:
|
||||
path = book.Location
|
||||
fp = book.Location
|
||||
|
||||
if book:
|
||||
if self.iTunes_media and path.startswith(self.iTunes_media) and \
|
||||
not path.startswith(prefs['library_path']):
|
||||
storage_path = os.path.split(path)
|
||||
if DEBUG:
|
||||
self.log.info(" removing '%s' at %s" %
|
||||
(cached_book['title'], path))
|
||||
self.log.info(" processing %s" % fp)
|
||||
if fp.startswith(prefs['library_path']):
|
||||
self.log.info(" '%s' stored in calibre database, not removed" % cached_book['title'])
|
||||
elif not self.settings().extra_customization[self.USE_ITUNES_STORAGE] and \
|
||||
fp.startswith(self.iTunes_local_storage) and \
|
||||
os.path.exists(fp):
|
||||
# Delete the copy in iTunes_local_storage
|
||||
os.remove(fp)
|
||||
if DEBUG:
|
||||
self.log(" removing from iTunes_local_storage")
|
||||
else:
|
||||
# Delete from iTunes Media folder
|
||||
if os.path.exists(fp):
|
||||
os.remove(fp)
|
||||
if DEBUG:
|
||||
self.log.info(" deleting from iTunes storage")
|
||||
author_storage_path = os.path.split(fp)[0]
|
||||
try:
|
||||
os.remove(path)
|
||||
os.rmdir(author_storage_path)
|
||||
if DEBUG:
|
||||
self.log.info(" removing empty author directory")
|
||||
except:
|
||||
self.log.warning(" '%s' not in iTunes storage" % path)
|
||||
try:
|
||||
os.rmdir(storage_path[0])
|
||||
self.log.info(" removed folder '%s'" % storage_path[0])
|
||||
except:
|
||||
self.log.info(" folder '%s' not found or not empty" % storage_path[0])
|
||||
pass
|
||||
else:
|
||||
self.log.info(" '%s' does not exist at storage location" % cached_book['title'])
|
||||
else:
|
||||
if DEBUG:
|
||||
self.log.info(" '%s' not found in iTunes storage" % cached_book['title'])
|
||||
|
||||
# Delete from iTunes database
|
||||
else:
|
||||
self.log.info(" '%s' (stored external to iTunes, no files deleted)" % cached_book['title'])
|
||||
else:
|
||||
if DEBUG:
|
||||
self.log.info(" '%s' not found in iTunes" % cached_book['title'])
|
||||
# Delete the book from the iTunes database
|
||||
try:
|
||||
book.Delete()
|
||||
if DEBUG:
|
||||
self.log.info(" removing from iTunes database")
|
||||
except:
|
||||
if DEBUG:
|
||||
self.log.info(" unable to remove '%s' from iTunes" % cached_book['title'])
|
||||
self.log.info(" unable to remove from iTunes database")
|
||||
|
||||
def title_sorter(self, title):
|
||||
return re.sub('^\s*A\s+|^\s*The\s+|^\s*An\s+', '', title).rstrip()
|
||||
@ -2625,6 +2630,7 @@ class ITUNES(DriverBase):
|
||||
def _update_epub_metadata(self, fpath, metadata):
|
||||
'''
|
||||
'''
|
||||
if DEBUG:
|
||||
self.log.info(" ITUNES._update_epub_metadata()")
|
||||
|
||||
# Fetch plugboard updates
|
||||
@ -2796,7 +2802,7 @@ class ITUNES(DriverBase):
|
||||
if metadata_x.series and self.settings().extra_customization[self.USE_SERIES_AS_CATEGORY]:
|
||||
if DEBUG:
|
||||
self.log.info(" ITUNES._update_iTunes_metadata()")
|
||||
self.log.info(" using Series name as Genre")
|
||||
self.log.info(" using Series name '%s' as Genre" % metadata_x.series)
|
||||
|
||||
# Format the index as a sort key
|
||||
index = metadata_x.series_index
|
||||
@ -2976,8 +2982,8 @@ class ITUNES(DriverBase):
|
||||
newmi = book.deepcopy_metadata()
|
||||
newmi.template_to_attribute(book, pb)
|
||||
if pb is not None and DEBUG:
|
||||
self.log.info(" transforming %s using %s:" % (format, pb))
|
||||
self.log.info(" title: %s %s" % (book.title, ">>> %s" %
|
||||
#self.log.info(" transforming %s using %s:" % (format, pb))
|
||||
self.log.info(" title: '%s' %s" % (book.title, ">>> '%s'" %
|
||||
newmi.title if book.title != newmi.title else ''))
|
||||
self.log.info(" title_sort: %s %s" % (book.title_sort, ">>> %s" %
|
||||
newmi.title_sort if book.title_sort != newmi.title_sort else ''))
|
||||
@ -2992,6 +2998,7 @@ class ITUNES(DriverBase):
|
||||
self.log.info(" tags: %s %s" % (book.tags, ">>> %s" %
|
||||
newmi.tags if book.tags != newmi.tags else ''))
|
||||
else:
|
||||
if DEBUG:
|
||||
self.log(" matching plugboard not found")
|
||||
|
||||
else:
|
||||
@ -3081,12 +3088,12 @@ class ITUNES_ASYNC(ITUNES):
|
||||
this_book.datetime = parse_date(str(library_books[book].date_added())).timetuple()
|
||||
except:
|
||||
this_book.datetime = time.gmtime()
|
||||
this_book.db_id = None
|
||||
this_book.device_collections = []
|
||||
#this_book.library_id = library_books[this_book.path] if this_book.path in library_books else None
|
||||
this_book.library_id = library_books[book]
|
||||
this_book.size = library_books[book].size()
|
||||
this_book.uuid = library_books[book].composer()
|
||||
this_book.cid = None
|
||||
# Hack to discover if we're running in GUI environment
|
||||
if self.report_progress is not None:
|
||||
this_book.thumbnail = self._generate_thumbnail(this_book.path, library_books[book])
|
||||
@ -3104,7 +3111,8 @@ class ITUNES_ASYNC(ITUNES):
|
||||
}
|
||||
|
||||
if self.report_progress is not None:
|
||||
self.report_progress((i+1)/book_count, _('%d of %d') % (i+1, book_count))
|
||||
self.report_progress((i+1)/book_count,
|
||||
_('%(num)d of %(tot)d') % dict(num=i+1, tot=book_count))
|
||||
|
||||
elif iswindows:
|
||||
try:
|
||||
@ -3122,11 +3130,11 @@ class ITUNES_ASYNC(ITUNES):
|
||||
this_book.datetime = parse_date(str(library_books[book].DateAdded)).timetuple()
|
||||
except:
|
||||
this_book.datetime = time.gmtime()
|
||||
this_book.db_id = None
|
||||
this_book.device_collections = []
|
||||
this_book.library_id = library_books[book]
|
||||
this_book.size = library_books[book].Size
|
||||
this_book.uuid = library_books[book].Composer
|
||||
this_book.cid = None
|
||||
# Hack to discover if we're running in GUI environment
|
||||
if self.report_progress is not None:
|
||||
this_book.thumbnail = self._generate_thumbnail(this_book.path, library_books[book])
|
||||
@ -3144,7 +3152,8 @@ class ITUNES_ASYNC(ITUNES):
|
||||
|
||||
if self.report_progress is not None:
|
||||
self.report_progress((i+1)/book_count,
|
||||
_('%d of %d') % (i+1, book_count))
|
||||
_('%(num)d of %(tot)d') % dict(num=i+1,
|
||||
tot=book_count))
|
||||
|
||||
finally:
|
||||
pythoncom.CoUninitialize()
|
||||
@ -3238,6 +3247,17 @@ class ITUNES_ASYNC(ITUNES):
|
||||
if DEBUG:
|
||||
self.log.info(" existing thumb cache at '%s'" % self.archive_path)
|
||||
|
||||
# If enabled in config options, create/confirm an iTunes storage folder
|
||||
if not self.settings().extra_customization[self.USE_ITUNES_STORAGE]:
|
||||
self.iTunes_local_storage = os.path.join(config_dir,'iTunes storage')
|
||||
if not os.path.exists(self.iTunes_local_storage):
|
||||
if DEBUG:
|
||||
self.log(" creating iTunes_local_storage at '%s'" % self.iTunes_local_storage)
|
||||
os.mkdir(self.iTunes_local_storage)
|
||||
else:
|
||||
if DEBUG:
|
||||
self.log(" existing iTunes_local_storage at '%s'" % self.iTunes_local_storage)
|
||||
|
||||
def sync_booklists(self, booklists, end_session=True):
|
||||
'''
|
||||
Update metadata on device.
|
||||
|
@ -61,7 +61,7 @@ class LIBREAIR(N516):
|
||||
|
||||
BCD = [0x399]
|
||||
VENDOR_NAME = 'ALURATEK'
|
||||
WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = '_FILE-STOR_GADGET'
|
||||
WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = 'FILE-STOR_GADGET'
|
||||
EBOOK_DIR_MAIN = 'Books'
|
||||
|
||||
class ALEX(N516):
|
||||
|
@ -49,6 +49,9 @@ class DevicePlugin(Plugin):
|
||||
#: Whether the metadata on books can be set via the GUI.
|
||||
CAN_SET_METADATA = ['title', 'authors', 'collections']
|
||||
|
||||
#: Whether the device can handle device_db metadata plugboards
|
||||
CAN_DO_DEVICE_DB_PLUGBOARD = False
|
||||
|
||||
# Set this to None if the books on the device are files that the GUI can
|
||||
# access in order to add the books from the device to the library
|
||||
BACKLOADING_ERROR_MESSAGE = _('Cannot get files from this device')
|
||||
@ -327,12 +330,7 @@ class DevicePlugin(Plugin):
|
||||
free space on the device. The text of the FreeSpaceError must contain the
|
||||
word "card" if ``on_card`` is not None otherwise it must contain the word "memory".
|
||||
|
||||
:param files: A list of paths and/or file-like objects. If they are paths and
|
||||
the paths point to temporary files, they may have an additional
|
||||
attribute, original_file_path pointing to the originals. They may have
|
||||
another optional attribute, deleted_after_upload which if True means
|
||||
that the file pointed to by original_file_path will be deleted after
|
||||
being uploaded to the device.
|
||||
:param files: A list of paths
|
||||
:param names: A list of file names that the books should have
|
||||
once uploaded to the device. len(names) == len(files)
|
||||
:param metadata: If not None, it is a list of :class:`Metadata` objects.
|
||||
|
@ -20,11 +20,11 @@ class IRIVER_STORY(USBMS):
|
||||
FORMATS = ['epub', 'fb2', 'pdf', 'djvu', 'txt']
|
||||
|
||||
VENDOR_ID = [0x1006]
|
||||
PRODUCT_ID = [0x4023, 0x4024, 0x4025]
|
||||
BCD = [0x0323]
|
||||
PRODUCT_ID = [0x4023, 0x4024, 0x4025, 0x4034]
|
||||
BCD = [0x0323, 0x0326]
|
||||
|
||||
VENDOR_NAME = 'IRIVER'
|
||||
WINDOWS_MAIN_MEM = ['STORY', 'STORY_EB05', 'STORY_WI-FI']
|
||||
WINDOWS_MAIN_MEM = ['STORY', 'STORY_EB05', 'STORY_WI-FI', 'STORY_EB07']
|
||||
WINDOWS_CARD_A_MEM = ['STORY', 'STORY_SD']
|
||||
|
||||
#OSX_MAIN_MEM = 'Kindle Internal Storage Media'
|
||||
|
@ -57,6 +57,7 @@ class KOBO(USBMS):
|
||||
def initialize(self):
|
||||
USBMS.initialize(self)
|
||||
self.book_class = Book
|
||||
self.dbversion = 7
|
||||
|
||||
def books(self, oncard=None, end_session=True):
|
||||
from calibre.ebooks.metadata.meta import path_to_ext
|
||||
@ -100,7 +101,7 @@ class KOBO(USBMS):
|
||||
for idx,b in enumerate(bl):
|
||||
bl_cache[b.lpath] = idx
|
||||
|
||||
def update_booklist(prefix, path, title, authors, mime, date, ContentType, ImageID, readstatus, MimeType):
|
||||
def update_booklist(prefix, path, title, authors, mime, date, ContentType, ImageID, readstatus, MimeType, expired, favouritesindex, accessibility):
|
||||
changed = False
|
||||
try:
|
||||
lpath = path.partition(self.normalize_path(prefix))[2]
|
||||
@ -111,12 +112,27 @@ class KOBO(USBMS):
|
||||
|
||||
playlist_map = {}
|
||||
|
||||
if lpath not in playlist_map:
|
||||
playlist_map[lpath] = []
|
||||
|
||||
if readstatus == 1:
|
||||
playlist_map[lpath]= "Im_Reading"
|
||||
playlist_map[lpath].append('Im_Reading')
|
||||
elif readstatus == 2:
|
||||
playlist_map[lpath]= "Read"
|
||||
playlist_map[lpath].append('Read')
|
||||
elif readstatus == 3:
|
||||
playlist_map[lpath]= "Closed"
|
||||
playlist_map[lpath].append('Closed')
|
||||
|
||||
# Related to a bug in the Kobo firmware that leaves an expired row for deleted books
|
||||
# this shows an expired Collection so the user can decide to delete the book
|
||||
if expired == 3:
|
||||
playlist_map[lpath].append('Expired')
|
||||
# A SHORTLIST is supported on the touch but the data field is there on most earlier models
|
||||
if favouritesindex == 1:
|
||||
playlist_map[lpath].append('Shortlist')
|
||||
|
||||
# Label Previews
|
||||
if accessibility == 6:
|
||||
playlist_map[lpath].append('Preview')
|
||||
|
||||
path = self.normalize_path(path)
|
||||
# print "Normalized FileName: " + path
|
||||
@ -126,7 +142,13 @@ class KOBO(USBMS):
|
||||
bl_cache[lpath] = None
|
||||
if ImageID is not None:
|
||||
imagename = self.normalize_path(self._main_prefix + '.kobo/images/' + ImageID + ' - NickelBookCover.parsed')
|
||||
if not os.path.exists(imagename):
|
||||
# Try the Touch version if the image does not exist
|
||||
imagename = self.normalize_path(self._main_prefix + '.kobo/images/' + ImageID + ' - N3_LIBRARY_FULL.parsed')
|
||||
|
||||
#print "Image name Normalized: " + imagename
|
||||
if not os.path.exists(imagename):
|
||||
debug_print("Strange - The image name does not exist - title: ", title)
|
||||
if imagename is not None:
|
||||
bl[idx].thumbnail = ImageWrapper(imagename)
|
||||
if (ContentType != '6' and MimeType != 'Shortcover'):
|
||||
@ -138,7 +160,7 @@ class KOBO(USBMS):
|
||||
debug_print(" Strange: The file: ", prefix, lpath, " does mot exist!")
|
||||
if lpath in playlist_map and \
|
||||
playlist_map[lpath] not in bl[idx].device_collections:
|
||||
bl[idx].device_collections.append(playlist_map[lpath])
|
||||
bl[idx].device_collections = playlist_map.get(lpath,[])
|
||||
else:
|
||||
if ContentType == '6' and MimeType == 'Shortcover':
|
||||
book = Book(prefix, lpath, title, authors, mime, date, ContentType, ImageID, size=1048576)
|
||||
@ -157,7 +179,7 @@ class KOBO(USBMS):
|
||||
raise
|
||||
|
||||
# print 'Update booklist'
|
||||
book.device_collections = [playlist_map[lpath]] if lpath in playlist_map else []
|
||||
book.device_collections = playlist_map.get(lpath,[])# if lpath in playlist_map else []
|
||||
|
||||
if bl.add_book(book, replace_metadata=False):
|
||||
changed = True
|
||||
@ -186,24 +208,51 @@ class KOBO(USBMS):
|
||||
result = cursor.fetchone()
|
||||
self.dbversion = result[0]
|
||||
|
||||
debug_print("Database Version: ", self.dbversion)
|
||||
if self.dbversion >= 16:
|
||||
query= 'select Title, Attribution, DateCreated, ContentID, MimeType, ContentType, ' \
|
||||
'ImageID, ReadStatus from content where BookID is Null'
|
||||
'ImageID, ReadStatus, ___ExpirationStatus, FavouritesIndex, Accessibility from content where ' \
|
||||
'BookID is Null and ( ___ExpirationStatus <> "3" or ___ExpirationStatus is Null)'
|
||||
elif self.dbversion < 16 and self.dbversion >= 14:
|
||||
query= 'select Title, Attribution, DateCreated, ContentID, MimeType, ContentType, ' \
|
||||
'ImageID, ReadStatus, ___ExpirationStatus, FavouritesIndex, "-1" as Accessibility from content where ' \
|
||||
'BookID is Null and ( ___ExpirationStatus <> "3" or ___ExpirationStatus is Null)'
|
||||
elif self.dbversion < 14 and self.dbversion >= 8:
|
||||
query= 'select Title, Attribution, DateCreated, ContentID, MimeType, ContentType, ' \
|
||||
'ImageID, ReadStatus, ___ExpirationStatus, "-1" as FavouritesIndex, "-1" as Accessibility from content where ' \
|
||||
'BookID is Null and ( ___ExpirationStatus <> "3" or ___ExpirationStatus is Null)'
|
||||
else:
|
||||
query= 'select Title, Attribution, DateCreated, ContentID, MimeType, ContentType, ' \
|
||||
'ImageID, ReadStatus, "-1" as ___ExpirationStatus, "-1" as FavouritesIndex, "-1" as Accessibility from content where BookID is Null'
|
||||
|
||||
try:
|
||||
cursor.execute (query)
|
||||
except Exception as e:
|
||||
err = str(e)
|
||||
if not ('___ExpirationStatus' in err or 'FavouritesIndex' in err or
|
||||
'Accessibility' in err):
|
||||
raise
|
||||
query= ('select Title, Attribution, DateCreated, ContentID, MimeType, ContentType, '
|
||||
'ImageID, ReadStatus, "-1" as ___ExpirationStatus, "-1" as '
|
||||
'FavouritesIndex, "-1" as Accessibility from content where '
|
||||
'BookID is Null')
|
||||
cursor.execute(query)
|
||||
|
||||
changed = False
|
||||
for i, row in enumerate(cursor):
|
||||
# self.report_progress((i+1) / float(numrows), _('Getting list of books on device...'))
|
||||
|
||||
if row[3].startswith("file:///usr/local/Kobo/help/"):
|
||||
# These are internal to the Kobo device and do not exist
|
||||
continue
|
||||
path = self.path_from_contentid(row[3], row[5], row[4], oncard)
|
||||
mime = mime_type_ext(path_to_ext(path)) if path.find('kepub') == -1 else 'application/epub+zip'
|
||||
# debug_print("mime:", mime)
|
||||
|
||||
if oncard != 'carda' and oncard != 'cardb' and not row[3].startswith("file:///mnt/sd/"):
|
||||
changed = update_booklist(self._main_prefix, path, row[0], row[1], mime, row[2], row[5], row[6], row[7], row[4])
|
||||
changed = update_booklist(self._main_prefix, path, row[0], row[1], mime, row[2], row[5], row[6], row[7], row[4], row[8], row[9], row[10])
|
||||
# print "shortbook: " + path
|
||||
elif oncard == 'carda' and row[3].startswith("file:///mnt/sd/"):
|
||||
changed = update_booklist(self._card_a_prefix, path, row[0], row[1], mime, row[2], row[5], row[6], row[7], row[4])
|
||||
changed = update_booklist(self._card_a_prefix, path, row[0], row[1], mime, row[2], row[5], row[6], row[7], row[4], row[8], row[9], row[10])
|
||||
|
||||
if changed:
|
||||
need_sync = True
|
||||
@ -267,8 +316,19 @@ class KOBO(USBMS):
|
||||
cursor.execute('delete from content_keys where volumeid = ?', t)
|
||||
|
||||
# Delete the chapters associated with the book next
|
||||
t = (ContentID,ContentID,)
|
||||
cursor.execute('delete from content where BookID = ? or ContentID = ?', t)
|
||||
t = (ContentID,)
|
||||
# Kobo does not delete the Book row (ie the row where the BookID is Null)
|
||||
# The next server sync should remove the row
|
||||
cursor.execute('delete from content where BookID = ?', t)
|
||||
try:
|
||||
cursor.execute('update content set ReadStatus=0, FirstTimeReading = \'true\', ___PercentRead=0, ___ExpirationStatus=3 ' \
|
||||
'where BookID is Null and ContentID =?',t)
|
||||
except Exception as e:
|
||||
if 'no such column' not in str(e):
|
||||
raise
|
||||
cursor.execute('update content set ReadStatus=0, FirstTimeReading = \'true\', ___PercentRead=0 ' \
|
||||
'where BookID is Null and ContentID =?',t)
|
||||
|
||||
|
||||
connection.commit()
|
||||
|
||||
@ -286,7 +346,7 @@ class KOBO(USBMS):
|
||||
path_prefix = '.kobo/images/'
|
||||
path = self._main_prefix + path_prefix + ImageID
|
||||
|
||||
file_endings = (' - iPhoneThumbnail.parsed', ' - bbMediumGridList.parsed', ' - NickelBookCover.parsed',)
|
||||
file_endings = (' - iPhoneThumbnail.parsed', ' - bbMediumGridList.parsed', ' - NickelBookCover.parsed', ' - N3_LIBRARY_FULL.parsed', ' - N3_LIBRARY_GRID.parsed', ' - N3_LIBRARY_LIST.parsed', ' - N3_SOCIAL_CURRENTREAD.parsed',)
|
||||
|
||||
for ending in file_endings:
|
||||
fpath = path + ending
|
||||
@ -450,6 +510,9 @@ class KOBO(USBMS):
|
||||
path = self._main_prefix + path + '.kobo'
|
||||
# print "Path: " + path
|
||||
elif (ContentType == "6" or ContentType == "10") and MimeType == 'application/x-kobo-epub+zip':
|
||||
if path.startswith("file:///mnt/onboard/"):
|
||||
path = self._main_prefix + path.replace("file:///mnt/onboard/", '')
|
||||
else:
|
||||
path = self._main_prefix + '.kobo/kepub/' + path
|
||||
# print "Internal: " + path
|
||||
else:
|
||||
@ -502,7 +565,92 @@ class KOBO(USBMS):
|
||||
paths[source_id] = os.path.join(prefix, *(path.split('/')))
|
||||
return paths
|
||||
|
||||
def reset_readstatus(self, connection, oncard):
|
||||
cursor = connection.cursor()
|
||||
|
||||
# Reset Im_Reading list in the database
|
||||
if oncard == 'carda':
|
||||
query= 'update content set ReadStatus=0, FirstTimeReading = \'true\' where BookID is Null and ContentID like \'file:///mnt/sd/%\''
|
||||
elif oncard != 'carda' and oncard != 'cardb':
|
||||
query= 'update content set ReadStatus=0, FirstTimeReading = \'true\' where BookID is Null and ContentID not like \'file:///mnt/sd/%\''
|
||||
|
||||
try:
|
||||
cursor.execute (query)
|
||||
except:
|
||||
debug_print(' Database Exception: Unable to reset ReadStatus list')
|
||||
raise
|
||||
else:
|
||||
connection.commit()
|
||||
debug_print(' Commit: Reset ReadStatus list')
|
||||
|
||||
cursor.close()
|
||||
|
||||
def set_readstatus(self, connection, ContentID, ReadStatus):
|
||||
cursor = connection.cursor()
|
||||
t = (ContentID,)
|
||||
cursor.execute('select DateLastRead from Content where BookID is Null and ContentID = ?', t)
|
||||
result = cursor.fetchone()
|
||||
if result is None:
|
||||
datelastread = '1970-01-01T00:00:00'
|
||||
else:
|
||||
datelastread = result[0] if result[0] is not None else '1970-01-01T00:00:00'
|
||||
|
||||
t = (ReadStatus,datelastread,ContentID,)
|
||||
|
||||
try:
|
||||
cursor.execute('update content set ReadStatus=?,FirstTimeReading=\'false\',DateLastRead=? where BookID is Null and ContentID = ?', t)
|
||||
except:
|
||||
debug_print(' Database Exception: Unable update ReadStatus')
|
||||
raise
|
||||
else:
|
||||
connection.commit()
|
||||
debug_print(' Commit: Setting ReadStatus List')
|
||||
cursor.close()
|
||||
|
||||
def reset_favouritesindex(self, connection, oncard):
|
||||
# Reset FavouritesIndex list in the database
|
||||
if oncard == 'carda':
|
||||
query= 'update content set FavouritesIndex=-1 where BookID is Null and ContentID like \'file:///mnt/sd/%\''
|
||||
elif oncard != 'carda' and oncard != 'cardb':
|
||||
query= 'update content set FavouritesIndex=-1 where BookID is Null and ContentID not like \'file:///mnt/sd/%\''
|
||||
|
||||
cursor = connection.cursor()
|
||||
try:
|
||||
cursor.execute (query)
|
||||
except Exception as e:
|
||||
debug_print(' Database Exception: Unable to reset Shortlist list')
|
||||
if 'no such column' not in str(e):
|
||||
raise
|
||||
else:
|
||||
connection.commit()
|
||||
debug_print(' Commit: Reset FavouritesIndex list')
|
||||
|
||||
def set_favouritesindex(self, connection, ContentID):
|
||||
cursor = connection.cursor()
|
||||
|
||||
t = (ContentID,)
|
||||
|
||||
try:
|
||||
cursor.execute('update content set FavouritesIndex=1 where BookID is Null and ContentID = ?', t)
|
||||
except Exception as e:
|
||||
debug_print(' Database Exception: Unable set book as Shortlist')
|
||||
if 'no such column' not in str(e):
|
||||
raise
|
||||
else:
|
||||
connection.commit()
|
||||
debug_print(' Commit: Set FavouritesIndex')
|
||||
|
||||
def update_device_database_collections(self, booklists, collections_attributes, oncard):
|
||||
# Define lists for the ReadStatus
|
||||
readstatuslist = {
|
||||
"Im_Reading":1,
|
||||
"Read":2,
|
||||
"Closed":3,
|
||||
}
|
||||
|
||||
accessibilitylist = {
|
||||
"Preview":6,
|
||||
}
|
||||
# debug_print('Starting update_device_database_collections', collections_attributes)
|
||||
|
||||
# Force collections_attributes to be 'tags' as no other is currently supported
|
||||
@ -521,146 +669,44 @@ class KOBO(USBMS):
|
||||
# return bytestrings if the content cannot the decoded as unicode
|
||||
connection.text_factory = lambda x: unicode(x, "utf-8", "ignore")
|
||||
|
||||
cursor = connection.cursor()
|
||||
|
||||
|
||||
if collections:
|
||||
|
||||
# Need to reset the collections outside the particular loops
|
||||
# otherwise the last item will not be removed
|
||||
self.reset_readstatus(connection, oncard)
|
||||
if self.dbversion >= 14:
|
||||
self.reset_favouritesindex(connection, oncard)
|
||||
|
||||
# Process any collections that exist
|
||||
for category, books in collections.items():
|
||||
if category == 'Im_Reading':
|
||||
# Reset Im_Reading list in the database
|
||||
if oncard == 'carda':
|
||||
query= 'update content set ReadStatus=0, FirstTimeReading = \'true\' where BookID is Null and ReadStatus = 1 and ContentID like \'file:///mnt/sd/%\''
|
||||
elif oncard != 'carda' and oncard != 'cardb':
|
||||
query= 'update content set ReadStatus=0, FirstTimeReading = \'true\' where BookID is Null and ReadStatus = 1 and ContentID not like \'file:///mnt/sd/%\''
|
||||
|
||||
try:
|
||||
cursor.execute (query)
|
||||
except:
|
||||
debug_print('Database Exception: Unable to reset Im_Reading list')
|
||||
raise
|
||||
else:
|
||||
# debug_print('Commit: Reset Im_Reading list')
|
||||
connection.commit()
|
||||
|
||||
debug_print("Category: ", category, " id = ", readstatuslist.get(category))
|
||||
for book in books:
|
||||
# debug_print('Title:', book.title, 'lpath:', book.path)
|
||||
book.device_collections = ['Im_Reading']
|
||||
debug_print(' Title:', book.title, 'category: ', category)
|
||||
if category not in book.device_collections:
|
||||
book.device_collections.append(category)
|
||||
|
||||
extension = os.path.splitext(book.path)[1]
|
||||
ContentType = self.get_content_type_from_extension(extension) if extension != '' else self.get_content_type_from_path(book.path)
|
||||
|
||||
ContentID = self.contentid_from_path(book.path, ContentType)
|
||||
|
||||
t = (ContentID,)
|
||||
cursor.execute('select DateLastRead from Content where BookID is Null and ContentID = ?', t)
|
||||
result = cursor.fetchone()
|
||||
if result is None:
|
||||
datelastread = '1970-01-01T00:00:00'
|
||||
else:
|
||||
datelastread = result[0] if result[0] is not None else '1970-01-01T00:00:00'
|
||||
|
||||
t = (datelastread,ContentID,)
|
||||
|
||||
try:
|
||||
cursor.execute('update content set ReadStatus=1,FirstTimeReading=\'false\',DateLastRead=? where BookID is Null and ContentID = ?', t)
|
||||
except:
|
||||
debug_print('Database Exception: Unable create Im_Reading list')
|
||||
raise
|
||||
else:
|
||||
connection.commit()
|
||||
# debug_print('Database: Commit create Im_Reading list')
|
||||
if category == 'Read':
|
||||
# Reset Im_Reading list in the database
|
||||
if oncard == 'carda':
|
||||
query= 'update content set ReadStatus=0, FirstTimeReading = \'true\' where BookID is Null and ReadStatus = 2 and ContentID like \'file:///mnt/sd/%\''
|
||||
elif oncard != 'carda' and oncard != 'cardb':
|
||||
query= 'update content set ReadStatus=0, FirstTimeReading = \'true\' where BookID is Null and ReadStatus = 2 and ContentID not like \'file:///mnt/sd/%\''
|
||||
|
||||
try:
|
||||
cursor.execute (query)
|
||||
except:
|
||||
debug_print('Database Exception: Unable to reset Im_Reading list')
|
||||
raise
|
||||
else:
|
||||
# debug_print('Commit: Reset Im_Reading list')
|
||||
connection.commit()
|
||||
|
||||
for book in books:
|
||||
# debug_print('Title:', book.title, 'lpath:', book.path)
|
||||
book.device_collections = ['Read']
|
||||
|
||||
extension = os.path.splitext(book.path)[1]
|
||||
ContentType = self.get_content_type_from_extension(extension) if extension != '' else self.get_content_type_from_path(book.path)
|
||||
|
||||
ContentID = self.contentid_from_path(book.path, ContentType)
|
||||
# datelastread = time.strftime("%Y-%m-%dT%H:%M:%S", time.gmtime())
|
||||
|
||||
t = (ContentID,)
|
||||
|
||||
try:
|
||||
cursor.execute('update content set ReadStatus=2,FirstTimeReading=\'true\' where BookID is Null and ContentID = ?', t)
|
||||
except:
|
||||
debug_print('Database Exception: Unable set book as Finished')
|
||||
raise
|
||||
else:
|
||||
connection.commit()
|
||||
# debug_print('Database: Commit set ReadStatus as Finished')
|
||||
if category == 'Closed':
|
||||
# Reset Im_Reading list in the database
|
||||
if oncard == 'carda':
|
||||
query= 'update content set ReadStatus=0, FirstTimeReading = \'true\' where BookID is Null and ReadStatus = 3 and ContentID like \'file:///mnt/sd/%\''
|
||||
elif oncard != 'carda' and oncard != 'cardb':
|
||||
query= 'update content set ReadStatus=0, FirstTimeReading = \'true\' where BookID is Null and ReadStatus = 3 and ContentID not like \'file:///mnt/sd/%\''
|
||||
|
||||
try:
|
||||
cursor.execute (query)
|
||||
except:
|
||||
debug_print('Database Exception: Unable to reset Closed list')
|
||||
raise
|
||||
else:
|
||||
# debug_print('Commit: Reset Closed list')
|
||||
connection.commit()
|
||||
|
||||
for book in books:
|
||||
# debug_print('Title:', book.title, 'lpath:', book.path)
|
||||
book.device_collections = ['Closed']
|
||||
|
||||
extension = os.path.splitext(book.path)[1]
|
||||
ContentType = self.get_content_type_from_extension(extension) if extension != '' else self.get_content_type_from_path(book.path)
|
||||
|
||||
ContentID = self.contentid_from_path(book.path, ContentType)
|
||||
# datelastread = time.strftime("%Y-%m-%dT%H:%M:%S", time.gmtime())
|
||||
|
||||
t = (ContentID,)
|
||||
|
||||
try:
|
||||
cursor.execute('update content set ReadStatus=3,FirstTimeReading=\'true\' where BookID is Null and ContentID = ?', t)
|
||||
except:
|
||||
debug_print('Database Exception: Unable set book as Closed')
|
||||
raise
|
||||
else:
|
||||
connection.commit()
|
||||
# debug_print('Database: Commit set ReadStatus as Closed')
|
||||
if category in readstatuslist.keys():
|
||||
# Manage ReadStatus
|
||||
self.set_readstatus(connection, ContentID, readstatuslist.get(category))
|
||||
if category == 'Shortlist' and self.dbversion >= 14:
|
||||
# Manage FavouritesIndex/Shortlist
|
||||
self.set_favouritesindex(connection, ContentID)
|
||||
if category in accessibilitylist.keys():
|
||||
# Do not manage the Accessibility List
|
||||
pass
|
||||
else: # No collections
|
||||
# Since no collections exist the ReadStatus needs to be reset to 0 (Unread)
|
||||
print "Reseting ReadStatus to 0"
|
||||
# Reset Im_Reading list in the database
|
||||
if oncard == 'carda':
|
||||
query= 'update content set ReadStatus=0, FirstTimeReading = \'true\' where BookID is Null and ContentID like \'file:///mnt/sd/%\''
|
||||
elif oncard != 'carda' and oncard != 'cardb':
|
||||
query= 'update content set ReadStatus=0, FirstTimeReading = \'true\' where BookID is Null and ContentID not like \'file:///mnt/sd/%\''
|
||||
debug_print("No Collections - reseting ReadStatus")
|
||||
self.reset_readstatus(connection, oncard)
|
||||
if self.dbversion >= 14:
|
||||
debug_print("No Collections - reseting FavouritesIndex")
|
||||
self.reset_favouritesindex(connection, oncard)
|
||||
|
||||
try:
|
||||
cursor.execute (query)
|
||||
except:
|
||||
debug_print('Database Exception: Unable to reset Im_Reading list')
|
||||
raise
|
||||
else:
|
||||
# debug_print('Commit: Reset Im_Reading list')
|
||||
connection.commit()
|
||||
|
||||
cursor.close()
|
||||
connection.close()
|
||||
|
||||
# debug_print('Finished update_device_database_collections', collections_attributes)
|
||||
|
@ -64,14 +64,24 @@ int do_mount(const char *dev, const char *mp) {
|
||||
snprintf(options, 1000, "rw,noexec,nosuid,sync,nodev");
|
||||
snprintf(uids, 100, "%d", getuid());
|
||||
snprintf(gids, 100, "%d", getgid());
|
||||
#else
|
||||
#ifdef __FreeBSD__
|
||||
snprintf(options, 1000, "rw,noexec,nosuid,sync,-u=%d,-g=%d",getuid(),getgid());
|
||||
#else
|
||||
snprintf(options, 1000, "rw,noexec,nosuid,sync,nodev,quiet,shortname=mixed,uid=%d,gid=%d,umask=077,fmask=0177,dmask=0077,utf8,iocharset=iso8859-1", getuid(), getgid());
|
||||
#endif
|
||||
#endif
|
||||
|
||||
ensure_root();
|
||||
|
||||
#ifdef __NetBSD__
|
||||
execlp("mount_msdos", "mount_msdos", "-u", uids, "-g", gids, "-o", options, dev, mp, NULL);
|
||||
#else
|
||||
#ifdef __FreeBSD__
|
||||
execlp("mount", "mount", "-t", "msdosfs", "-o", options, dev, mp, NULL);
|
||||
#else
|
||||
execlp("mount", "mount", "-t", "auto", "-o", options, dev, mp, NULL);
|
||||
#endif
|
||||
#endif
|
||||
errsv = errno;
|
||||
fprintf(stderr, "Failed to mount with error: %s\n", strerror(errsv));
|
||||
@ -91,8 +101,12 @@ int call_eject(const char *dev, const char *mp) {
|
||||
ensure_root();
|
||||
#ifdef __NetBSD__
|
||||
execlp("eject", "eject", dev, NULL);
|
||||
#else
|
||||
#ifdef __FreeBSD__
|
||||
execlp("umount", "umount", dev, NULL);
|
||||
#else
|
||||
execlp("eject", "eject", "-s", dev, NULL);
|
||||
#endif
|
||||
#endif
|
||||
/* execlp failed */
|
||||
errsv = errno;
|
||||
@ -121,7 +135,11 @@ int call_umount(const char *dev, const char *mp) {
|
||||
|
||||
if (pid == 0) { /* Child process */
|
||||
ensure_root();
|
||||
#ifdef __FreeBSD__
|
||||
execlp("umount", "umount", mp, NULL);
|
||||
#else
|
||||
execlp("umount", "umount", "-l", mp, NULL);
|
||||
#endif
|
||||
/* execlp failed */
|
||||
errsv = errno;
|
||||
fprintf(stderr, "Failed to umount with error: %s\n", strerror(errsv));
|
||||
|