This commit is contained in:
Sengian 2011-07-03 10:55:41 +02:00
commit b7804c5aaf
366 changed files with 236047 additions and 136740 deletions

View File

@ -14,6 +14,7 @@ resources/scripts.pickle
resources/ebook-convert-complete.pickle
resources/builtin_recipes.xml
resources/builtin_recipes.zip
resources/template-functions.json
setup/installer/windows/calibre/build.log
src/calibre/translations/.errors
src/cssutils/.svn/

View File

@ -19,6 +19,270 @@
# new recipes:
# - title:
- version: 0.8.8
date: 2011-07-01
new features:
- title: "Make author names in the Book Details panel clickable. Clicking them takes you to the wikipedia page for the author by default. You may have to tell calibre to display author names in the Book details panel first via Preferences->Look & Feel->Book details. You can change the link for individual authors by right clicking on the author's name in the Tag Browser and selecting Manage Authors."
- title: "Get Books: Add 'Open Books' as an available book source"
- title: "Get Books: When a free download is available for a search result, for example, for public domain books, allow direct download of the book into your calibre library."
- title: "Support for detecting and mounting reader devices on FreeBSD."
tickets: [802708]
- title: "When creating a composite custom column, allow the use of HTML to create links and other markup that display in the Book details panel"
- title: "Add the swap_around_comma function to the template language."
- title: "Drivers for HTC G2, Advent Vega, iRiver Story HD, Lark FreeMe and Moovyman mp7"
- title: "Quick View: Survives changing libraries. Also allow sorting by series index as well as name."
- title: "Connect to iTunes: Add an option to control how the driver works depending on whether you have iTunes setup to copy files to its media directory or not. Set this option by customizing the Apple driver in Preferences->Plugins. Having iTunes copy media to its storage folder is no longer neccessary. See http://www.mobileread.com/forums/showthread.php?t=118559 for details"
- title: "Remove the delete library functionality from calibre, instead you can now remove a library, so calibre will forget about it, but you have to delete the files manually"
bug fixes:
- title: "Fix a regression introduced in 0.8.7 in the Tag Browser that could cause calibre to crash after performing various actions"
- title: "Fix an unhandled error when deleting all saved searches"
tickets: [804383]
- title: "Fix row numbers in a previous selection being incorrect after a sort operation."
- title: "Fix ISBN identifier type not recognized if it is in upper case"
tickets: [802288]
- title: "Fix a regression in 0.8.7 that broke reading metadata from MOBI files in the Edit metadata dialog."
tickets: [801981]
- title: "Fix handling of filenames that have an even number of periods before the file extension."
tickets: [801939]
- title: "Fix lack of thread saefty in template format system, that could lead to incorrect template evaluation in some cases."
tickets: [801944]
- title: "Fix conversion to PDB when the input document has no text"
tickets: [801888]
- title: "Fix clicking on first letter of author names generating incorrect search."
- title: "Also fix updating bulk metadata in custom column causing unnneccessary Tag Browser refreshes."
- title: "Fix a regression in 0.8.7 that broke renaming items via the Tag Browser"
- title: "Fix a regression in 0.8.7 that caused the regex builder wizard to fail with LIT files as the input"
improved recipes:
- Zaman Gazetesi
- Infobae
- El Cronista
- Critica de la Argentina
- Buenos Aires Economico
- El Universal (Venezuela)
- wprost
- Financial Times UK
new recipes:
- title: "Today's Zaman by thomass"
- title: "Athens News by Darko Miletic"
- title: "Catholic News Agency"
author: Jetkey
- title: "Arizona Republic"
author: Jim Olo
- title: "Add Ming Pao Vancouver and Toronto"
author: Eddie Lau
- version: 0.8.7
date: 2011-06-24
new features:
- title: "Connect to iTunes: You now need to tell iTunes to keep its own copy of every ebook. Do this in iTunes by going to Preferences->Advanced and setting the 'Copy files to iTunes Media folder when adding to library' option. To learn about why this is necessary, see: http://www.mobileread.com/forums/showthread.php?t=140260"
- title: "Add a couple of date related functions to the calibre template langauge to get 'todays' date and create text based on the value of a date type field"
- title: "Improved reading of metadata from FB2 files, with support for reading isbns, tags, published date, etc."
- title: "Driver for the Imagine IMEB5"
tickets: [800642]
- title: "Show the currently used network proxies in Preferences->Miscellaneous"
- title: "Kobo Touch driver: Show Favorites as a device collection. Various other minor fixes."
- title: "Content server now sends the Content-Disposition header when sending ebook files."
- title: "Allow search and replace on comments custom columns."
- title: "Add a new action 'Quick View' to show the books in your library by the author/tags/series/etc. of the currently selected book, in a separate window. You can add it to your toolbar or right click menu by going to Preferences->Toolbars."
- title: "Get Books: Add libri.de as a book source. Fix a bug that caused some books downloads to fail. Fixes to the Legimi and beam-ebooks.de stores"
tickets: [799367]
bug fixes:
- title: "Fix a memory leak that could result in the leaking of several MB of memory with large libraries"
tickets: [800952]
- title: "Fix the read metadata from format button in the edit metadata dialog using the wrong timezone when setting published date"
tickets: [799777]
- title: "Generating catalog: Fix occassional file in use errors when generating catalogs on windows"
- title: "Fix clicking on News in Tag Browser not working in non English locales."
tickets: [799471]
- title: "HTML Input: Fix a regression in 0.8.6 that caused CSS stylesheets to be ignored"
tickets: [799171]
- title: "Fix a regression that caused restore database to stop working on some windows sytems"
- title: "EPUB Output: Convert <br> tags with text in them into <divs> as ADE cannot handle them."
tickets: [794427]
improved recipes:
- Le Temps
- Perfil
- Financial Times UK
new recipes:
- title: "Daytona Beach Journal"
author: BRGriff
- title: "El club del ebook and Frontline"
author: Darko Miletic
- version: 0.8.6
date: 2011-06-17
new features:
- title: "Builtin support for downloading and installing/updating calibre plugins. Go to Preferences->Plugins and click 'Get new plugins'"
description: "When updates for installed plugins are available, calibre will automatically (unobtrusively) notify you"
type: major
- title: "Metadata download configuration: Allow defining a set of 'default' fields for metadata download and quichly switching to/from them"
- title: "Allow clicking on the news category in the Tag Browser to display all downloaded periodicals"
- title: "Driver for the Libre Air"
- title: "Email sending: Allow user to stop email jobs (note that stopping may not actually prevent the email from being sent, depending on when the stop happens). Also automatically abort email sending if it takes longer than 15mins."
tickets: [795960]
bug fixes:
- title: "MOBI Output: Allow setting of background color on tables also set the border attribute on the table if the table has any border related css defined."
tickets: [797580]
- title: "Nook TSR: Put news sent to the device in My Files/Newspapers instaed of My Files/Books."
tickets: [796674]
- title: "MOBI Output: Fix a bug where linking to the very first element in an HTML file could sometimes result in the link pointing to the last element in the previous file."
tickets: [797214]
- title: "CSV catalog: Convert HTML comments to plain text"
- title: "HTML Input: Ignore links to text files."
tickets: [791568]
- title: "EPUB Output: Change orphaned <td> tags to <div> as they cause ADE to crash."
- title: "Fix 'Stop selected jobs' button trying to stop the same job multiple times"
- title: "Database: Explicitly test for case sensitivity on OS X instead of assuming a case insensitive filesystem."
tickets: [796258]
- title: "Get Books: More fixes to the Amazon store plugin"
- title: "FB2 Input: Do not specify font families/background colors"
improved recipes:
- Philadelphia Inquirer
- Macleans Magazone
- Metro UK
new recipes:
- title: "Christian Post, Down To Earth and Words Without Borders"
author: sexymax15
- title: "Noticias R7"
author: Diniz Bortolotto
- title: "UK Daily Mirror"
author: Dave Asbury
- title: "New Musical Express Magazine"
author: scissors
- version: 0.8.5
date: 2011-06-10
new features:
- title: "A new 'portable' calibre build, useful if you like to carry around calibre and its library on a USB key"
type: major
description: "For details, see: http://calibre-ebook.com/download_portable"
- title: "E-book viewer: Remember the last used font size multiplier."
tickets: [774343]
- title: "Preliminary support for the Kobo Touch. Drivers for the ZTE v9 tablet, Samsung S2, Notion Ink Adam and PocketBook 360+"
- title: "When downloading metadata merge rather than replace tags"
- title: "Edit metadata dialog: When pasting in an ISBN, if not valid ISBN if present on the clipboard popup a box for the user to enter the ISBN"
- title: "Windows build: Add code to load .pyd python extensions from a zip file. This allows many more files in the calibre installation to be zipped up, speeding up the installer."
- title: "Add an action to remove all formats from the selected books to the remove books button"
bug fixes:
- title: "Various minor bug fixes to the column coloring code"
- title: "Fix the not() template function"
- title: "Nook Color/TSR: When sending books to the storage card place them in the My Files/Books subdirectory. Also do not upload cover thumbnails as users report that the NC/TSR don't use them."
tickets: [792842]
- title: "Get Books: Update plugins for Amazon and B&N stores to handle website changes. Enable some stores by default on first run. Add Zixo store"
tickets: [792762]
- title: "Comic Input: Replace the # character in filenames as it can cause problem with conversion/vieweing."
tickets: [792723]
- title: "When writing files to zipfile, reset timestamp if it doesn't fit in 1980's vintage storage structures"
- title: "Amazon metadata plugin: Fix parsing of published date from amazon.de when it has februar in it"
improved recipes:
- Ambito
- GoComics
- Le Monde Diplomatique
- Max Planck
- express.de
new recipes:
- title: Ambito Financiero
author: Darko Miletic
- title: Stiin Tas Technica
author: Silviu Cotoara
- title: "Metro News NL"
author: DrMerry
- title: "Brigitte.de, Polizeipresse DE and Heise Online"
author: schuster
- version: 0.8.4
date: 2011-06-03

View File

@ -1,7 +1,5 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
__copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
'''
ambito.com
'''
@ -11,51 +9,56 @@ from calibre.web.feeds.news import BasicNewsRecipe
class Ambito(BasicNewsRecipe):
title = 'Ambito.com'
__author__ = 'Darko Miletic'
description = 'Informacion Libre las 24 horas'
publisher = 'Ambito.com'
category = 'news, politics, Argentina'
description = 'Ambito.com con noticias del Diario Ambito Financiero de Buenos Aires'
publisher = 'Editorial Nefir S.A.'
category = 'news, politics, economy, finances, Argentina'
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
encoding = 'iso-8859-1'
cover_url = 'http://www.ambito.com/img/logo_.jpg'
remove_javascript = True
encoding = 'cp1252'
masthead_url = 'http://www.ambito.com/img/logo_.jpg'
use_embedded_content = False
language = 'es_AR'
publication_type = 'newsportal'
extra_css = """
body{font-family: "Trebuchet MS",Verdana,sans-serif}
.volanta{font-size: small}
.t2_portada{font-size: xx-large; font-family: Georgia,serif; color: #026698}
"""
html2lrf_options = [
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
}
keep_only_tags = [dict(name='div', attrs={'align':'justify'})]
remove_tags = [dict(name=['object','link'])]
remove_tags = [dict(name=['object','link','embed','iframe','meta','link','table','img'])]
remove_attributes = ['align']
feeds = [
(u'Principales Noticias', u'http://www.ambito.com/rss/noticiasp.asp' )
,(u'Economia' , u'http://www.ambito.com/rss/noticias.asp?S=Econom%EDa' )
,(u'Politica' , u'http://www.ambito.com/rss/noticias.asp?S=Pol%EDtica' )
,(u'Informacion General' , u'http://www.ambito.com/rss/noticias.asp?S=Informaci%F3n%20General')
,(u'Agro' , u'http://www.ambito.com/rss/noticias.asp?S=Agro' )
,(u'Campo' , u'http://www.ambito.com/rss/noticias.asp?S=Agro' )
,(u'Internacionales' , u'http://www.ambito.com/rss/noticias.asp?S=Internacionales' )
,(u'Deportes' , u'http://www.ambito.com/rss/noticias.asp?S=Deportes' )
,(u'Espectaculos' , u'http://www.ambito.com/rss/noticias.asp?S=Espect%E1culos' )
,(u'Tecnologia' , u'http://www.ambito.com/rss/noticias.asp?S=Tecnologia' )
,(u'Salud' , u'http://www.ambito.com/rss/noticias.asp?S=Salud' )
,(u'Tecnologia' , u'http://www.ambito.com/rss/noticias.asp?S=Tecnolog%EDa' )
,(u'Ambito Nacional' , u'http://www.ambito.com/rss/noticias.asp?S=Ambito%20Nacional' )
]
def print_version(self, url):
return url.replace('http://www.ambito.com/noticia.asp?','http://www.ambito.com/noticias/imprimir.asp?')
return url.replace('/noticia.asp?','/noticias/imprimir.asp?')
def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Language" content="es-AR"/>'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll('a'):
str = item.string
if str is None:
str = self.tag_to_string(item)
item.replaceWith(str)
return soup
language = 'es_AR'

View File

@ -0,0 +1,87 @@
__license__ = 'GPL v3'
__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
'''
ambito.com/diario
'''
import time
from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe
class Ambito_Financiero(BasicNewsRecipe):
title = 'Ambito Financiero'
__author__ = 'Darko Miletic'
description = 'Informacion Libre las 24 horas'
publisher = 'Editorial Nefir S.A.'
category = 'news, politics, economy, Argentina'
no_stylesheets = True
encoding = 'cp1252'
masthead_url = 'http://www.ambito.com/diario/img/logo_af.gif'
publication_type = 'newspaper'
needs_subscription = 'optional'
use_embedded_content = False
language = 'es_AR'
PREFIX = 'http://www.ambito.com'
INDEX = PREFIX + '/diario/index.asp'
LOGIN = PREFIX + '/diario/login/entrada.asp'
extra_css = """
body{font-family: "Trebuchet MS",Verdana,sans-serif}
.volanta{font-size: small}
.t2_portada{font-size: xx-large; font-family: Georgia,serif; color: #026698}
"""
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
}
keep_only_tags = [dict(name='div', attrs={'align':'justify'})]
remove_tags = [dict(name=['object','link','embed','iframe','meta','link','table','img'])]
remove_attributes = ['align']
def get_browser(self):
br = BasicNewsRecipe.get_browser()
br.open(self.INDEX)
if self.username is not None and self.password is not None:
br.open(self.LOGIN)
br.select_form(name='frmlogin')
br['USER_NAME'] = self.username
br['USER_PASS'] = self.password
br.submit()
return br
def print_version(self, url):
return url.replace('/diario/noticia.asp?','/noticias/imprimir.asp?')
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll('a'):
str = item.string
if str is None:
str = self.tag_to_string(item)
item.replaceWith(str)
return soup
def parse_index(self):
soup = self.index_to_soup(self.INDEX)
cover_item = soup.find('img',attrs={'class':'fotodespliegue'})
if cover_item:
self.cover_url = self.PREFIX + cover_item['src']
articles = []
checker = []
for feed_link in soup.findAll('a', attrs={'class':['t0_portada','t2_portada','bajada']}):
url = self.PREFIX + feed_link['href']
title = self.tag_to_string(feed_link)
date = strftime("%a, %d %b %Y %H:%M:%S +0000",time.gmtime())
if url not in checker:
checker.append(url)
articles.append({
'title' :title
,'date' :date
,'url' :url
,'description':u''
})
return [(self.title, articles)]

View File

@ -0,0 +1,68 @@
__license__ = 'GPL v3'
__copyright__ = '2010, jolo'
'''
azrepublic.com
'''
from calibre.web.feeds.recipes import BasicNewsRecipe
class AdvancedUserRecipe1307301031(BasicNewsRecipe):
title = u'AZRepublic'
__author__ = 'Jim Olo'
language = 'en'
description = "The Arizona Republic is Arizona's leading provider of news and information, and has published a daily newspaper in Phoenix for more than 110 years"
publisher = 'AZRepublic/AZCentral'
masthead_url = 'http://freedom2t.com/wp-content/uploads/press_az_republic_v2.gif'
cover_url = 'http://www.valleyleadership.org/Common/Img/2line4c_AZRepublic%20with%20azcentral%20logo.jpg'
category = 'news, politics, USA, AZ, Arizona'
oldest_article = 7
max_articles_per_feed = 100
remove_empty_feeds = True
no_stylesheets = True
remove_javascript = True
# extra_css = '.headline {font-size: medium;} \n .fact { padding-top: 10pt }'
extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .headline {font-size: medium} .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
remove_attributes = ['width','height','h2','subHeadline','style']
remove_tags = [
dict(name='div', attrs={'id':['slidingBillboard', 'top728x90', 'subindex-header', 'topSearch']}),
dict(name='div', attrs={'id':['simplesearch', 'azcLoginBox', 'azcLoginBoxInner', 'topNav']}),
dict(name='div', attrs={'id':['carsDrop', 'homesDrop', 'rentalsDrop', 'classifiedDrop']}),
dict(name='div', attrs={'id':['nav', 'mp', 'subnav', 'jobsDrop']}),
dict(name='h6', attrs={'class':['section-header']}),
dict(name='a', attrs={'href':['#comments']}),
dict(name='div', attrs={'class':['articletools clearfix', 'floatRight']}),
dict(name='div', attrs={'id':['fbFrame', 'ob', 'storyComments', 'storyGoogleAdBox']}),
dict(name='div', attrs={'id':['storyTopHomes', 'openRight', 'footerwrap', 'copyright']}),
dict(name='div', attrs={'id':['blogsHed', 'blog_comments', 'blogByline','blogTopics']}),
dict(name='div', attrs={'id':['membersRightMain', 'dealsfooter', 'azrTopHed', 'azrRightCol']}),
dict(name='div', attrs={'id':['ttdHeader', 'ttdTimeWeather']}),
dict(name='div', attrs={'id':['membersRightMain', 'deals-header-wrap']}),
dict(name='div', attrs={'id':['todoTopSearchBar', 'byline clearfix', 'subdex-topnav']}),
dict(name='h1', attrs={'id':['SEOtext']}),
dict(name='table', attrs={'class':['ap-mediabox-table']}),
dict(name='p', attrs={'class':['ap_para']}),
dict(name='span', attrs={'class':['source-org vcard', 'org fn']}),
dict(name='a', attrs={'href':['http://hosted2.ap.org/APDEFAULT/privacy']}),
dict(name='a', attrs={'href':['http://hosted2.ap.org/APDEFAULT/terms']}),
dict(name='div', attrs={'id':['onespot_nextclick']}),
]
feeds = [
(u'FrontPage', u'http://www.azcentral.com/rss/feeds/republicfront.xml'),
(u'TopUS-News', u'http://hosted.ap.org/lineups/USHEADS.rss?SITE=AZPHG&SECTION=HOME'),
(u'WorldNews', u'http://hosted.ap.org/lineups/WORLDHEADS.rss?SITE=AZPHG&SECTION=HOME'),
(u'TopBusiness', u'http://hosted.ap.org/lineups/BUSINESSHEADS.rss?SITE=AZPHG&SECTION=HOME'),
(u'Entertainment', u'http://hosted.ap.org/lineups/ENTERTAINMENT.rss?SITE=AZPHG&SECTION=HOME'),
(u'ArizonaNews', u'http://www.azcentral.com/rss/feeds/news.xml'),
(u'Gilbert', u'http://www.azcentral.com/rss/feeds/gilbert.xml'),
(u'Chandler', u'http://www.azcentral.com/rss/feeds/chandler.xml'),
(u'DiningReviews', u'http://www.azcentral.com/rss/feeds/diningreviews.xml'),
(u'AZBusiness', u'http://www.azcentral.com/rss/feeds/business.xml'),
(u'ArizonaDeals', u'http://www.azcentral.com/members/Blog%7E/RealDealsblog'),
(u'GroceryDeals', u'http://www.azcentral.com/members/Blog%7E/RealDealsblog/tag/2646')
]

View File

@ -0,0 +1,70 @@
__license__ = 'GPL v3'
__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
'''
www.athensnews.gr
'''
from calibre.web.feeds.news import BasicNewsRecipe
class AthensNews(BasicNewsRecipe):
title = 'Athens News'
__author__ = 'Darko Miletic'
description = 'Greece in English since 1952'
publisher = 'NEP Publishing Company SA'
category = 'news, politics, Greece, Athens'
oldest_article = 1
max_articles_per_feed = 200
no_stylesheets = True
encoding = 'utf8'
use_embedded_content = False
language = 'en_GR'
remove_empty_feeds = True
publication_type = 'newspaper'
masthead_url = 'http://www.athensnews.gr/sites/athensnews/themes/athensnewsv3/images/logo.jpg'
extra_css = """
body{font-family: Arial,Helvetica,sans-serif }
img{margin-bottom: 0.4em; display:block}
.big{font-size: xx-large; font-family: Georgia,serif}
.articlepubdate{font-size: small; color: gray; font-family: Georgia,serif}
.lezanta{font-size: x-small; font-weight: bold; text-align: left; margin-bottom: 1em; display: block}
"""
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
, 'linearize_tables' : True
}
remove_tags = [
dict(name=['meta','link'])
]
keep_only_tags=[
dict(name='span',attrs={'class':'big'})
,dict(name='td', attrs={'class':['articlepubdate','text']})
]
remove_attributes=['lang']
feeds = [
(u'News' , u'http://www.athensnews.gr/category/1/feed' )
,(u'Politics' , u'http://www.athensnews.gr/category/8/feed' )
,(u'Business' , u'http://www.athensnews.gr/category/2/feed' )
,(u'Economy' , u'http://www.athensnews.gr/category/11/feed')
,(u'Community' , u'http://www.athensnews.gr/category/5/feed' )
,(u'Arts' , u'http://www.athensnews.gr/category/3/feed' )
,(u'Living in Athens', u'http://www.athensnews.gr/category/7/feed' )
,(u'Sports' , u'http://www.athensnews.gr/category/4/feed' )
,(u'Travel' , u'http://www.athensnews.gr/category/6/feed' )
,(u'Letters' , u'http://www.athensnews.gr/category/44/feed')
,(u'Media' , u'http://www.athensnews.gr/multimedia/feed' )
]
def print_version(self, url):
return url + '?action=print'
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return soup

View File

@ -0,0 +1,36 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe(BasicNewsRecipe):
title = u'Brigitte.de'
__author__ = 'schuster'
oldest_article = 14
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
language = 'de'
remove_javascript = True
remove_empty_feeds = True
timeout = 10
cover_url = 'http://www.medienmilch.de/typo3temp/pics/Brigitte-Logo_d5feb4a6e4.jpg'
masthead_url = 'http://www.medienmilch.de/typo3temp/pics/Brigitte-Logo_d5feb4a6e4.jpg'
remove_tags = [dict(attrs={'class':['linklist', 'head', 'indent right relatedContent', 'artikel-meta segment', 'segment', 'comment commentFormWrapper segment borderBG', 'segment borderBG comments', 'segment borderBG box', 'center', 'segment nextPageLink', 'inCar']}),
dict(id=['header', 'artTools', 'context', 'interact', 'footer-navigation', 'bwNet', 'copy', 'keyboardNavigationHint']),
dict(name=['hjtrs', 'kud'])]
feeds = [(u'Mode', u'http://www.brigitte.de/mode/feed.rss'),
(u'Beauty', u'http://www.brigitte.de/beauty/feed.rss'),
(u'Luxus', u'http://www.brigitte.de/luxus/feed.rss'),
(u'Figur', u'http://www.brigitte.de/figur/feed.rss'),
(u'Gesundheit', u'http://www.brigitte.de/gesundheit/feed.rss'),
(u'Liebe&Sex', u'http://www.brigitte.de/liebe-sex/feed.rss'),
(u'Gesellschaft', u'http://www.brigitte.de/gesellschaft/feed.rss'),
(u'Kultur', u'http://www.brigitte.de/kultur/feed.rss'),
(u'Reise', u'http://www.brigitte.de/reise/feed.rss'),
(u'Kochen', u'http://www.brigitte.de/kochen/feed.rss'),
(u'Wohnen', u'http://www.brigitte.de/wohnen/feed.rss'),
(u'Job', u'http://www.brigitte.de/job/feed.rss'),
(u'Erfahrungen', u'http://www.brigitte.de/erfahrungen/feed.rss'),
]

View File

@ -1,19 +1,16 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
__copyright__ = '2009-2011, Darko Miletic <darko.miletic at gmail.com>'
'''
elargentino.com
www.diariobae.com
'''
from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag
class BsAsEconomico(BasicNewsRecipe):
title = 'Buenos Aires Economico'
__author__ = 'Darko Miletic'
description = 'Revista Argentina'
publisher = 'ElArgentino.com'
description = 'Diario BAE es el diario economico-politico con mas influencia en la Argentina. Fuente de empresarios y politicos del pais y el exterior. El pozo estaria aportando en periodos breves un volumen equivalente a 800m3 diarios. Pero todavia deben efectuarse otras perforaciones adicionales.'
publisher = 'Diario BAE'
category = 'news, politics, economy, Argentina'
oldest_article = 2
max_articles_per_feed = 100
@ -21,52 +18,42 @@ class BsAsEconomico(BasicNewsRecipe):
use_embedded_content = False
encoding = 'utf-8'
language = 'es_AR'
cover_url = strftime('http://www.diariobae.com/imgs_portadas/%Y%m%d_portadasBAE.jpg')
masthead_url = 'http://www.diariobae.com/img/logo_bae.png'
remove_empty_feeds = True
publication_type = 'newspaper'
extra_css = """
body{font-family: Georgia,"Times New Roman",Times,serif}
#titulo{font-size: x-large}
#epi{font-size: small; font-style: italic; font-weight: bold}
img{display: block; margin-top: 1em}
"""
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
}
lang = 'es-AR'
direction = 'ltr'
INDEX = 'http://www.elargentino.com/medios/121/Buenos-Aires-Economico.html'
extra_css = ' .titulo{font-size: x-large; font-weight: bold} .volantaImp{font-size: small; font-weight: bold} '
html2lrf_options = [
'--comment' , description
, '--category' , category
, '--publisher', publisher
remove_tags_before= dict(attrs={'id':'titulo'})
remove_tags_after = dict(attrs={'id':'autor' })
remove_tags = [
dict(name=['meta','base','iframe','link','lang'])
,dict(attrs={'id':'barra_tw'})
]
remove_attributes = ['data-count','data-via']
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0cm; margin-top: 0em; margin-bottom: 0.5em} "'
keep_only_tags = [dict(name='div', attrs={'class':'ContainerPop'})]
remove_tags = [dict(name='link')]
feeds = [(u'Articulos', u'http://www.elargentino.com/Highlights.aspx?ParentType=Section&ParentId=121&Content-Type=text/xml&ChannelDesc=Buenos%20Aires%20Econ%C3%B3mico')]
def print_version(self, url):
main, sep, article_part = url.partition('/nota-')
article_id, rsep, rrest = article_part.partition('-')
return u'http://www.elargentino.com/Impresion.aspx?Id=' + article_id
feeds = [
(u'Argentina' , u'http://www.diariobae.com/rss/argentina.xml' )
,(u'Valores' , u'http://www.diariobae.com/rss/valores.xml' )
,(u'Finanzas' , u'http://www.diariobae.com/rss/finanzas.xml' )
,(u'Negocios' , u'http://www.diariobae.com/rss/negocios.xml' )
,(u'Mundo' , u'http://www.diariobae.com/rss/mundo.xml' )
,(u'5 dias' , u'http://www.diariobae.com/rss/5dias.xml' )
,(u'Espectaculos', u'http://www.diariobae.com/rss/espectaculos.xml')
]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
soup.html['lang'] = self.lang
soup.html['dir' ] = self.direction
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
soup.head.insert(0,mlang)
soup.head.insert(1,mcharset)
return soup
def get_cover_url(self):
cover_url = None
soup = self.index_to_soup(self.INDEX)
cover_item = soup.find('div',attrs={'class':'colder'})
if cover_item:
clean_url = self.image_url_processor(None,cover_item.div.img['src'])
cover_url = 'http://www.elargentino.com' + clean_url + '&height=600'
return cover_url
def image_url_processor(self, baseurl, url):
base, sep, rest = url.rpartition('?Id=')
img, sep2, rrest = rest.partition('&')
return base + sep + img

View File

@ -0,0 +1,13 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1301972345(BasicNewsRecipe):
title = u'Catholic News Agency'
language = 'en'
__author__ = 'Jetkey'
oldest_article = 5
max_articles_per_feed = 20
feeds = [(u'U.S. News', u'http://feeds.feedburner.com/catholicnewsagency/dailynews-us'),
(u'Vatican', u'http://feeds.feedburner.com/catholicnewsagency/dailynews-vatican'),
(u'Bishops Corner', u'http://feeds.feedburner.com/catholicnewsagency/columns/bishopscorner'),
(u'Saint of the Day', u'http://feeds.feedburner.com/catholicnewsagency/saintoftheday')]

View File

@ -0,0 +1,37 @@
#created by sexymax15 ....sexymax15@gmail.com
#christian post recipe
from calibre.web.feeds.news import BasicNewsRecipe
class ChristianPost(BasicNewsRecipe):
title = 'The Christian Post'
__author__ = 'sexymax15'
description = 'Homepage'
language = 'en'
no_stylesheets = True
use_embedded_content = False
oldest_article = 30
max_articles_per_feed = 15
remove_empty_feeds = True
no_stylesheets = True
remove_javascript = True
extra_css = '''
h1 {color:#008852;font-family:Arial,Helvetica,sans-serif; font-size:20px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:18px;}
h2 {color:#4D4D4D;font-family:Arial,Helvetica,sans-serif; font-size:16px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:16px; } '''
feeds = [
('Homepage', 'http://www.christianpost.com/services/rss/feed/'),
('Most Popular', 'http://www.christianpost.com/services/rss/feed/most-popular'),
('Entertainment', 'http://www.christianpost.com/services/rss/feed/entertainment/'),
('Politics', 'http://www.christianpost.com/services/rss/feed/politics/'),
('Living', 'http://www.christianpost.com/services/rss/feed/living/'),
('Business', 'http://www.christianpost.com/services/rss/feed/business/'),
('Opinion', 'http://www.christianpost.com/services/rss/feed/opinion/')
]
def print_version(self, url):
return url +'print.html'

View File

@ -1,69 +0,0 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
'''
criticadigital.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class CriticaDigital(BasicNewsRecipe):
title = 'Critica de la Argentina'
__author__ = 'Darko Miletic and Sujata Raman'
description = 'Noticias de Argentina'
oldest_article = 2
max_articles_per_feed = 100
language = 'es_AR'
no_stylesheets = True
use_embedded_content = False
encoding = 'cp1252'
extra_css = '''
h1{font-family:"Trebuchet MS";}
h3{color:#9A0000; font-family:Tahoma; font-size:x-small;}
h2{color:#504E53; font-family:Arial,Helvetica,sans-serif ;font-size:small;}
#epigrafe{font-family:Arial,Helvetica,sans-serif ;color:#666666 ; font-size:x-small;}
p {font-family:Arial,Helvetica,sans-serif;}
#fecha{color:#858585; font-family:Tahoma; font-size:x-small;}
#autor{color:#858585; font-family:Tahoma; font-size:x-small;}
#hora{color:#F00000;font-family:Tahoma; font-size:x-small;}
'''
keep_only_tags = [
dict(name='div', attrs={'class':['bloqueTitulosNoticia','cfotonota']})
,dict(name='div', attrs={'id':'boxautor'})
,dict(name='p', attrs={'id':'textoNota'})
]
remove_tags = [
dict(name='div', attrs={'class':'box300' })
,dict(name='div', style=True )
,dict(name='div', attrs={'class':'titcomentario'})
,dict(name='div', attrs={'class':'comentario' })
,dict(name='div', attrs={'class':'paginador' })
]
feeds = [
(u'Politica', u'http://www.criticadigital.com/herramientas/rss.php?ch=politica' )
,(u'Economia', u'http://www.criticadigital.com/herramientas/rss.php?ch=economia' )
,(u'Deportes', u'http://www.criticadigital.com/herramientas/rss.php?ch=deportes' )
,(u'Espectaculos', u'http://www.criticadigital.com/herramientas/rss.php?ch=espectaculos')
,(u'Mundo', u'http://www.criticadigital.com/herramientas/rss.php?ch=mundo' )
,(u'Policiales', u'http://www.criticadigital.com/herramientas/rss.php?ch=policiales' )
,(u'Sociedad', u'http://www.criticadigital.com/herramientas/rss.php?ch=sociedad' )
,(u'Salud', u'http://www.criticadigital.com/herramientas/rss.php?ch=salud' )
,(u'Tecnologia', u'http://www.criticadigital.com/herramientas/rss.php?ch=tecnologia' )
,(u'Santa Fe', u'http://www.criticadigital.com/herramientas/rss.php?ch=santa_fe' )
]
def get_cover_url(self):
cover_url = None
index = 'http://www.criticadigital.com/impresa/'
soup = self.index_to_soup(index)
link_item = soup.find('div',attrs={'class':'tapa'})
if link_item:
cover_url = index + link_item.img['src']
return cover_url

View File

@ -0,0 +1,52 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1306061239(BasicNewsRecipe):
title = u'The Daily Mirror'
description = 'News as provide by The Daily Mirror -UK'
__author__ = 'Dave Asbury'
language = 'en_GB'
cover_url = 'http://yookeo.com/screens/m/i/mirror.co.uk.jpg'
masthead_url = 'http://www.nmauk.co.uk/nma/images/daily_mirror.gif'
oldest_article = 1
max_articles_per_feed = 100
remove_empty_feeds = True
remove_javascript = True
no_stylesheets = True
keep_only_tags = [
dict(name='h1'),
dict(attrs={'class':['article-attr']}),
dict(name='div', attrs={'class' : [ 'article-body', 'crosshead']})
]
remove_tags = [
dict(name='div', attrs={'class' : ['caption', 'article-resize']}),
dict( attrs={'class':'append-html'})
]
feeds = [
(u'News', u'http://www.mirror.co.uk/news/rss.xml')
,(u'Tech News', u'http://www.mirror.co.uk/news/technology/rss.xml')
,(u'Weird World','http://www.mirror.co.uk/news/weird-world/rss.xml')
,(u'Film Gossip','http://www.mirror.co.uk/celebs/film/rss.xml')
,(u'Music News','http://www.mirror.co.uk/celebs/music/rss.xml')
,(u'Celebs and Tv Gossip','http://www.mirror.co.uk/celebs/tv/rss.xml')
,(u'Sport','http://www.mirror.co.uk/sport/rss.xml')
,(u'Life Style','http://www.mirror.co.uk/life-style/rss.xml')
,(u'Advice','http://www.mirror.co.uk/advice/rss.xml')
,(u'Travel','http://www.mirror.co.uk/advice/travel/rss.xml')
# example of commented out feed not needed ,(u'Travel','http://www.mirror.co.uk/advice/travel/rss.xml')
]

View File

@ -0,0 +1,78 @@
from calibre.web.feeds.news import BasicNewsRecipe
class DaytonBeachNewsJournal(BasicNewsRecipe):
title ='Daytona Beach News Journal'
__author__ = 'BRGriff'
pubisher = 'News-JournalOnline.com'
description = 'Daytona Beach, Florida, Newspaper'
category = 'News, Daytona Beach, Florida'
oldest_article = 1
max_articles_per_feed = 100
remove_javascript = True
use_embedded_content = False
no_stylesheets = True
language = 'en'
filterDuplicates = True
remove_attributes = ['style']
keep_only_tags = [dict(name='div', attrs={'class':'page-header'}),
dict(name='div', attrs={'class':'asset-body'})
]
remove_tags = [dict(name='div', attrs={'class':['byline-section', 'asset-meta']})
]
feeds = [
#####NEWS#####
(u"News", u"http://www.news-journalonline.com/rss.xml"),
(u"Breaking News", u"http://www.news-journalonline.com/breakingnews/rss.xml"),
(u"Local - East Volusia", u"http://www.news-journalonline.com/news/local/east-volusia/rss.xml"),
(u"Local - West Volusia", u"http://www.news-journalonline.com/news/local/west-volusia/rss.xml"),
(u"Local - Southeast", u"http://www.news-journalonline.com/news/local/southeast-volusia/rss.xml"),
(u"Local - Flagler", u"http://www.news-journalonline.com/news/local/flagler/rss.xml"),
(u"Florida", u"http://www.news-journalonline.com/news/florida/rss.xml"),
(u"National/World", u"http://www.news-journalonline.com/news/nationworld/rss.xml"),
(u"Politics", u"http://www.news-journalonline.com/news/politics/rss.xml"),
(u"News of Record", u"http://www.news-journalonline.com/news/news-of-record/rss.xml"),
####BUSINESS####
(u"Business", u"http://www.news-journalonline.com/business/rss.xml"),
#(u"Jobs", u"http://www.news-journalonline.com/business/jobs/rss.xml"),
#(u"Markets", u"http://www.news-journalonline.com/business/markets/rss.xml"),
#(u"Real Estate", u"http://www.news-journalonline.com/business/real-estate/rss.xml"),
#(u"Technology", u"http://www.news-journalonline.com/business/technology/rss.xml"),
####SPORTS####
(u"Sports", u"http://www.news-journalonline.com/sports/rss.xml"),
(u"Racing", u"http://www.news-journalonline.com/racing/rss.xml"),
(u"Highschool", u"http://www.news-journalonline.com/sports/highschool/rss.xml"),
(u"College", u"http://www.news-journalonline.com/sports/college/rss.xml"),
(u"Basketball", u"http://www.news-journalonline.com/sports/basketball/rss.xml"),
(u"Football", u"http://www.news-journalonline.com/sports/football/rss.xml"),
(u"Golf", u"http://www.news-journalonline.com/sports/golf/rss.xml"),
(u"Other Sports", u"http://www.news-journalonline.com/sports/other/rss.xml"),
####LIFESTYLE####
(u"Lifestyle", u"http://www.news-journalonline.com/lifestyle/rss.xml"),
#(u"Fashion", u"http://www.news-journalonline.com/lifestyle/fashion/rss.xml"),
(u"Food", u"http://www.news-journalonline.com/lifestyle/food/rss.xml"),
#(u"Health", u"http://www.news-journalonline.com/lifestyle/health/rss.xml"),
(u"Home and Garden", u"http://www.news-journalonline.com/lifestyle/home-and-garden/rss.xml"),
(u"Living", u"http://www.news-journalonline.com/lifestyle/living/rss.xml"),
(u"Religion", u"http://www.news-journalonline.com/lifestyle/religion/rss.xml"),
#(u"Travel", u"http://www.news-journalonline.com/lifestyle/travel/rss.xml"),
####OPINION####
#(u"Opinion", u"http://www.news-journalonline.com/opinion/rss.xml"),
#(u"Letters to Editor", u"http://www.news-journalonline.com/opinion/letters-to-the-editor/rss.xml"),
#(u"Columns", u"http://www.news-journalonline.com/columns/rss.xml"),
#(u"Podcasts", u"http://www.news-journalonline.com/podcasts/rss.xml"),
####ENTERTAINMENT#### ##Weekly Feature##
(u"Entertainment", u"http://www.go386.com/rss.xml"),
(u"Go Out", u"http://www.go386.com/go/rss.xml"),
(u"Music", u"http://www.go386.com/music/rss.xml"),
(u"Movies", u"http://www.go386.com/movies/rss.xml"),
#(u"Culture", u"http://www.go386.com/culture/rss.xml"),
]
extra_css = '''
.page-header{font-family:Arial,Helvetica,sans-serif; font-style:bold;font-size:22pt;}
.asset-body{font-family:Helvetica,Arial,sans-serif; font-size:16pt;}
'''

View File

@ -0,0 +1,18 @@
from calibre.web.feeds.recipes import BasicNewsRecipe
class AdvancedUserRecipe1307834113(BasicNewsRecipe):
title = u'Down To Earth'
oldest_article = 300
__author__ = 'sexymax15'
max_articles_per_feed = 30
no_stylesheets = True
remove_javascript = True
remove_attributes = ['width','height']
use_embedded_content = False
language = 'en_IN'
remove_empty_feeds = True
remove_tags_before = dict(name='div', id='PageContent')
remove_tags_after = [dict(name='div'),{'class':'box'}]
remove_tags =[{'class':'box'}]
feeds = [(u'editor', u'http://www.downtoearth.org.in/taxonomy/term/20348/0/feed'), (u'cover story', u'http://www.downtoearth.org.in/taxonomy/term/20345/0/feed'), (u'special report', u'http://www.downtoearth.org.in/taxonomy/term/20384/0/feed'), (u'features', u'http://www.downtoearth.org.in/taxonomy/term/20350/0/feed'), (u'news', u'http://www.downtoearth.org.in/taxonomy/term/20366/0/feed'), (u'debate', u'http://www.downtoearth.org.in/taxonomy/term/20347/0/feed'), (u'natural disasters', u'http://www.downtoearth.org.in/taxonomy/term/20822/0/feed')]

View File

@ -0,0 +1,61 @@
__license__ = 'GPL v3'
__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
'''
www.clubdelebook.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class ElClubDelEbook(BasicNewsRecipe):
title = 'El club del ebook'
__author__ = 'Darko Miletic'
description = 'El Club del eBook, es la primera fuente de informacion sobre ebooks de Argentina. Aca vas a encontrar noticias, tips, tutoriales, recursos y opiniones sobre el mundo de los libros electronicos.'
tags = 'ebook, libro electronico, e-book, ebooks, libros electronicos, e-books'
oldest_article = 7
max_articles_per_feed = 100
language = 'es_AR'
encoding = 'utf-8'
no_stylesheets = True
use_embedded_content = True
publication_type = 'blog'
masthead_url = 'http://dl.dropbox.com/u/2845131/elclubdelebook.png'
extra_css = """
body{font-family: Arial,Helvetica,sans-serif}
img{ margin-bottom: 0.8em;
border: 1px solid #333333;
padding: 4px; display: block
}
"""
conversion_options = {
'comment' : description
, 'tags' : tags
, 'publisher': title
, 'language' : language
}
remove_tags = [dict(attrs={'id':'crp_related'})]
remove_tags_after = dict(attrs={'id':'crp_related'})
feeds = [(u'Articulos', u'http://feeds.feedburner.com/ElClubDelEbook')]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll('a'):
limg = item.find('img')
if item.string is not None:
str = item.string
item.replaceWith(str)
else:
if limg:
item.name = 'div'
item.attrs = []
else:
str = self.tag_to_string(item)
item.replaceWith(str)
for item in soup.findAll('img'):
if not item.has_key('alt'):
item['alt'] = 'image'
return soup

View File

@ -1,72 +1,59 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
__copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
'''
cronista.com
www.cronista.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class ElCronista(BasicNewsRecipe):
title = 'El Cronista'
class Pagina12(BasicNewsRecipe):
title = 'El Cronista Comercial'
__author__ = 'Darko Miletic'
description = 'Noticias de Argentina'
description = 'El Cronista Comercial es el Diario economico-politico mas valorado. Es la fuente mas confiable de informacion en temas de economia, finanzas y negocios enmarcados politicamente.'
publisher = 'Cronista.com'
category = 'news, politics, economy, finances, Argentina'
oldest_article = 2
language = 'es_AR'
max_articles_per_feed = 100
max_articles_per_feed = 200
no_stylesheets = True
encoding = 'utf8'
use_embedded_content = False
encoding = 'cp1252'
language = 'es_AR'
remove_empty_feeds = True
publication_type = 'newspaper'
masthead_url = 'http://www.cronista.com/export/sites/diarioelcronista/arte/header-logo.gif'
extra_css = """
body{font-family: Arial,Helvetica,sans-serif }
h2{font-family: Georgia,"Times New Roman",Times,serif }
img{margin-bottom: 0.4em; display:block}
.nom{font-weight: bold; vertical-align: baseline}
.autor-cfoto{border-bottom: 1px solid #D2D2D2;
border-top: 1px solid #D2D2D2;
display: inline-block;
margin: 0 10px 10px 0;
padding: 10px;
width: 210px}
.under{font-weight: bold}
.time{font-size: small}
"""
html2lrf_options = [
'--comment' , description
, '--category' , 'news, Argentina'
, '--publisher' , title
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
}
remove_tags = [
dict(name=['meta','link','base','iframe','object','embed'])
,dict(attrs={'class':['user-tools','tabsmedia']})
]
remove_attributes = ['lang']
remove_tags_before = dict(attrs={'class':'top'})
remove_tags_after = dict(attrs={'class':'content-nota'})
feeds = [(u'Ultimas noticias', u'http://www.cronista.com/rss.html')]
keep_only_tags = [
dict(name='table', attrs={'width':'100%' })
,dict(name='h1' , attrs={'class':'Arialgris16normal'})
]
remove_tags = [dict(name='a', attrs={'class':'Arialazul12'})]
feeds = [
(u'Economia' , u'http://www.cronista.com/adjuntos/8/rss/Economia_EI.xml' )
,(u'Negocios' , u'http://www.cronista.com/adjuntos/8/rss/negocios_EI.xml' )
,(u'Ultimo momento' , u'http://www.cronista.com/adjuntos/8/rss/ultimo_momento.xml' )
,(u'Finanzas y Mercados' , u'http://www.cronista.com/adjuntos/8/rss/Finanzas_Mercados_EI.xml' )
,(u'Financial Times' , u'http://www.cronista.com/adjuntos/8/rss/FT_EI.xml' )
,(u'Opinion edicion impresa' , u'http://www.cronista.com/adjuntos/8/rss/opinion_edicion_impresa.xml' )
,(u'Socialmente Responsables', u'http://www.cronista.com/adjuntos/8/rss/Socialmente_Responsables.xml')
,(u'Asuntos Legales' , u'http://www.cronista.com/adjuntos/8/rss/asuntoslegales.xml' )
,(u'IT Business' , u'http://www.cronista.com/adjuntos/8/rss/itbusiness.xml' )
,(u'Management y RR.HH.' , u'http://www.cronista.com/adjuntos/8/rss/management.xml' )
,(u'Inversiones Personales' , u'http://www.cronista.com/adjuntos/8/rss/inversionespersonales.xml' )
]
def print_version(self, url):
main, sep, rest = url.partition('.com/notas/')
article_id, lsep, rrest = rest.partition('-')
return 'http://www.cronista.com/interior/index.php?p=imprimir_nota&idNota=' + article_id
def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
soup.head.insert(0,mtag)
soup.head.base.extract()
htext = soup.find('h1',attrs={'class':'Arialgris16normal'})
htext.name = 'p'
soup.prettify()
for item in soup.findAll(style=True):
del item['style']
return soup
def get_cover_url(self):
cover_url = None
index = 'http://www.cronista.com/contenidos/'
soup = self.index_to_soup(index + 'ee.html')
link_item = soup.find('a',attrs={'href':"javascript:Close()"})
if link_item:
cover_url = index + link_item.img['src']
return cover_url

View File

@ -1,5 +1,5 @@
__license__ = 'GPL v3'
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
__copyright__ = '2010-2011, Darko Miletic <darko.miletic at gmail.com>'
'''
www.eluniversal.com
'''
@ -15,12 +15,20 @@ class ElUniversal(BasicNewsRecipe):
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
remove_empty_feeds = True
encoding = 'cp1252'
publisher = 'El Universal'
category = 'news, Caracas, Venezuela, world'
language = 'es_VE'
publication_type = 'newspaper'
cover_url = strftime('http://static.eluniversal.com/%Y/%m/%d/portada.jpg')
extra_css = """
.txt60{font-family: Tahoma,Geneva,sans-serif; font-size: small}
.txt29{font-family: Tahoma,Geneva,sans-serif; font-size: small; color: gray}
.txt38{font-family: Georgia,"Times New Roman",Times,serif; font-size: xx-large}
.txt35{font-family: Georgia,"Times New Roman",Times,serif; font-size: large}
body{font-family: Verdana,Arial,Helvetica,sans-serif}
"""
conversion_options = {
'comments' : description
,'tags' : category
@ -28,10 +36,11 @@ class ElUniversal(BasicNewsRecipe):
,'publisher' : publisher
}
keep_only_tags = [dict(name='div', attrs={'class':'Nota'})]
remove_tags_before=dict(attrs={'class':'header-print MB10'})
remove_tags_after= dict(attrs={'id':'SizeText'})
remove_tags = [
dict(name=['object','link','script','iframe'])
,dict(name='div',attrs={'class':'Herramientas'})
dict(name=['object','link','script','iframe','meta'])
,dict(attrs={'class':'header-print MB10'})
]
feeds = [

View File

@ -1,7 +1,7 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008 - 2009, Darko Miletic <darko.miletic at gmail.com>'
__copyright__ = 'Copyright 2011 Starson17'
'''
engadget.com
'''
@ -9,14 +9,29 @@ engadget.com
from calibre.web.feeds.news import BasicNewsRecipe
class Engadget(BasicNewsRecipe):
title = u'Engadget'
__author__ = 'Darko Miletic'
title = u'Engadget_Full'
__author__ = 'Starson17'
__version__ = 'v1.00'
__date__ = '02, July 2011'
description = 'Tech news'
language = 'en'
oldest_article = 7
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = True
use_embedded_content = False
remove_javascript = True
remove_empty_feeds = True
feeds = [ (u'Posts', u'http://www.engadget.com/rss.xml')]
keep_only_tags = [dict(name='div', attrs={'class':['post_content permalink ','post_content permalink alt-post-full']})]
remove_tags = [dict(name='div', attrs={'class':['filed_under','post_footer']})]
remove_tags_after = [dict(name='div', attrs={'class':['post_footer']})]
feeds = [(u'Posts', u'http://www.engadget.com/rss.xml')]
extra_css = '''
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
'''

View File

@ -1,32 +1,41 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
__copyright__ = '2010-2011, Darko Miletic <darko.miletic at gmail.com>'
'''
ft.com
www.ft.com
'''
import datetime
from calibre.web.feeds.news import BasicNewsRecipe
class FinancialTimes(BasicNewsRecipe):
title = u'Financial Times'
__author__ = 'Darko Miletic and Sujata Raman'
description = ('Financial world news. Available after 5AM '
'GMT, daily.')
class FinancialTimes_rss(BasicNewsRecipe):
title = 'Financial Times'
__author__ = 'Darko Miletic'
description = "The Financial Times (FT) is one of the world's leading business news and information organisations, recognised internationally for its authority, integrity and accuracy."
publisher = 'The Financial Times Ltd.'
category = 'news, finances, politics, World'
oldest_article = 2
language = 'en'
max_articles_per_feed = 100
max_articles_per_feed = 250
no_stylesheets = True
use_embedded_content = False
needs_subscription = True
simultaneous_downloads= 1
delay = 1
encoding = 'utf8'
publication_type = 'newspaper'
masthead_url = 'http://im.media.ft.com/m/img/masthead_main.jpg'
LOGIN = 'https://registration.ft.com/registration/barrier/login'
INDEX = 'http://www.ft.com'
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
, 'linearize_tables' : True
}
def get_browser(self):
br = BasicNewsRecipe.get_browser()
br.open(self.INDEX)
if self.username is not None and self.password is not None:
br.open(self.LOGIN)
br.select_form(name='loginForm')
@ -35,31 +44,63 @@ class FinancialTimes(BasicNewsRecipe):
br.submit()
return br
keep_only_tags = [ dict(name='div', attrs={'id':'cont'}) ]
remove_tags_after = dict(name='p', attrs={'class':'copyright'})
keep_only_tags = [dict(name='div', attrs={'class':['fullstory fullstoryHeader','fullstory fullstoryBody','ft-story-header','ft-story-body','index-detail']})]
remove_tags = [
dict(name='div', attrs={'id':'floating-con'})
,dict(name=['meta','iframe','base','object','embed','link'])
,dict(attrs={'class':['storyTools','story-package','screen-copy','story-package separator','expandable-image']})
]
remove_attributes = ['width','height','lang']
extra_css = '''
body{font-family:Arial,Helvetica,sans-serif;}
h2(font-size:large;}
.ft-story-header(font-size:xx-small;}
.ft-story-body(font-size:small;}
a{color:#003399;}
extra_css = """
body{font-family: Georgia,Times,"Times New Roman",serif}
h2{font-size:large}
.ft-story-header{font-size: x-small}
.container{font-size:x-small;}
h3{font-size:x-small;color:#003399;}
'''
.copyright{font-size: x-small}
img{margin-top: 0.8em; display: block}
.lastUpdated{font-family: Arial,Helvetica,sans-serif; font-size: x-small}
.byline,.ft-story-body,.ft-story-header{font-family: Arial,Helvetica,sans-serif}
"""
feeds = [
(u'UK' , u'http://www.ft.com/rss/home/uk' )
,(u'US' , u'http://www.ft.com/rss/home/us' )
,(u'Europe' , u'http://www.ft.com/rss/home/europe' )
,(u'Asia' , u'http://www.ft.com/rss/home/asia' )
,(u'Middle East', u'http://www.ft.com/rss/home/middleeast')
]
def preprocess_html(self, soup):
content_type = soup.find('meta', {'http-equiv':'Content-Type'})
if content_type:
content_type['content'] = 'text/html; charset=utf-8'
items = ['promo-box','promo-title',
'promo-headline','promo-image',
'promo-intro','promo-link','subhead']
for item in items:
for it in soup.findAll(item):
it.name = 'div'
it.attrs = []
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll('a'):
limg = item.find('img')
if item.string is not None:
str = item.string
item.replaceWith(str)
else:
if limg:
item.name = 'div'
item.attrs = []
else:
str = self.tag_to_string(item)
item.replaceWith(str)
for item in soup.findAll('img'):
if not item.has_key('alt'):
item['alt'] = 'image'
return soup
def get_cover_url(self):
cdate = datetime.date.today()
if cdate.isoweekday() == 7:
cdate -= datetime.timedelta(days=1)
return cdate.strftime('http://specials.ft.com/vtf_pdf/%d%m%y_FRONT1_USA.pdf')

View File

@ -1,15 +1,19 @@
__license__ = 'GPL v3'
__copyright__ = '2010-2011, Darko Miletic <darko.miletic at gmail.com>'
'''
ft.com
www.ft.com/uk-edition
'''
import datetime
from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe
class FinancialTimes(BasicNewsRecipe):
title = u'Financial Times - UK printed edition'
title = 'Financial Times - UK printed edition'
__author__ = 'Darko Miletic'
description = 'Financial world news'
description = "The Financial Times (FT) is one of the world's leading business news and information organisations, recognised internationally for its authority, integrity and accuracy."
publisher = 'The Financial Times Ltd.'
category = 'news, finances, politics, UK, World'
oldest_article = 2
language = 'en_GB'
max_articles_per_feed = 250
@ -17,14 +21,23 @@ class FinancialTimes(BasicNewsRecipe):
use_embedded_content = False
needs_subscription = True
encoding = 'utf8'
simultaneous_downloads= 1
delay = 1
publication_type = 'newspaper'
masthead_url = 'http://im.media.ft.com/m/img/masthead_main.jpg'
LOGIN = 'https://registration.ft.com/registration/barrier/login'
INDEX = 'http://www.ft.com/uk-edition'
PREFIX = 'http://www.ft.com'
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
, 'linearize_tables' : True
}
def get_browser(self):
br = BasicNewsRecipe.get_browser()
br.open(self.INDEX)
if self.username is not None and self.password is not None:
br.open(self.LOGIN)
br.select_form(name='loginForm')
@ -33,29 +46,34 @@ class FinancialTimes(BasicNewsRecipe):
br.submit()
return br
keep_only_tags = [ dict(name='div', attrs={'id':'cont'}) ]
remove_tags_after = dict(name='p', attrs={'class':'copyright'})
keep_only_tags = [dict(name='div', attrs={'class':['fullstory fullstoryHeader','fullstory fullstoryBody','ft-story-header','ft-story-body','index-detail']})]
remove_tags = [
dict(name='div', attrs={'id':'floating-con'})
,dict(name=['meta','iframe','base','object','embed','link'])
,dict(attrs={'class':['storyTools','story-package','screen-copy','story-package separator','expandable-image']})
]
remove_attributes = ['width','height','lang']
extra_css = """
body{font-family:Arial,Helvetica,sans-serif;}
h2{font-size:large;}
.ft-story-header{font-size:xx-small;}
.ft-story-body{font-size:small;}
a{color:#003399;}
body{font-family: Georgia,Times,"Times New Roman",serif}
h2{font-size:large}
.ft-story-header{font-size: x-small}
.container{font-size:x-small;}
h3{font-size:x-small;color:#003399;}
.copyright{font-size: x-small}
img{margin-top: 0.8em; display: block}
.lastUpdated{font-family: Arial,Helvetica,sans-serif; font-size: x-small}
.byline,.ft-story-body,.ft-story-header{font-family: Arial,Helvetica,sans-serif}
"""
def get_artlinks(self, elem):
articles = []
for item in elem.findAll('a',href=True):
url = self.PREFIX + item['href']
rawlink = item['href']
if rawlink.startswith('http://'):
url = rawlink
else:
url = self.PREFIX + rawlink
title = self.tag_to_string(item)
date = strftime(self.timefmt)
articles.append({
@ -86,5 +104,35 @@ class FinancialTimes(BasicNewsRecipe):
return feeds
def preprocess_html(self, soup):
return self.adeify_images(soup)
items = ['promo-box','promo-title',
'promo-headline','promo-image',
'promo-intro','promo-link','subhead']
for item in items:
for it in soup.findAll(item):
it.name = 'div'
it.attrs = []
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll('a'):
limg = item.find('img')
if item.string is not None:
str = item.string
item.replaceWith(str)
else:
if limg:
item.name = 'div'
item.attrs = []
else:
str = self.tag_to_string(item)
item.replaceWith(str)
for item in soup.findAll('img'):
if not item.has_key('alt'):
item['alt'] = 'image'
return soup
def get_cover_url(self):
cdate = datetime.date.today()
if cdate.isoweekday() == 7:
cdate -= datetime.timedelta(days=1)
return cdate.strftime('http://specials.ft.com/vtf_pdf/%d%m%y_FRONT1_LON.pdf')

View File

@ -0,0 +1,81 @@
__license__ = 'GPL v3'
__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
'''
frontlineonnet.com
'''
import re
from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe
class Frontlineonnet(BasicNewsRecipe):
title = 'Frontline'
__author__ = 'Darko Miletic'
description = "India's national magazine"
publisher = 'Frontline'
category = 'news, politics, India'
no_stylesheets = True
delay = 1
INDEX = 'http://frontlineonnet.com/'
use_embedded_content = False
encoding = 'cp1252'
language = 'en_IN'
publication_type = 'magazine'
masthead_url = 'http://frontlineonnet.com/images/newfline.jpg'
extra_css = """
body{font-family: Verdana,Arial,Helvetica,sans-serif}
img{margin-top:0.5em; margin-bottom: 0.7em; display: block}
"""
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
, 'linearize_tables' : True
}
preprocess_regexps = [
(re.compile(r'.*?<base', re.DOTALL|re.IGNORECASE),lambda match: '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"><html dir="ltr" xml:lang="en-IN"><head><title>title</title><base')
,(re.compile(r'<base .*?>', re.DOTALL|re.IGNORECASE),lambda match: '</head><body>')
,(re.compile(r'<byline>', re.DOTALL|re.IGNORECASE),lambda match: '<div class="byline">')
,(re.compile(r'</byline>', re.DOTALL|re.IGNORECASE),lambda match: '</div>')
,(re.compile(r'<center>', re.DOTALL|re.IGNORECASE),lambda match: '<div class="ctr">')
,(re.compile(r'</center>', re.DOTALL|re.IGNORECASE),lambda match: '</div>')
]
keep_only_tags= [
dict(name='font', attrs={'class':'storyhead'})
,dict(attrs={'class':'byline'})
]
remove_attributes=['size','noshade','border']
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll('img'):
if not item.has_key('alt'):
item['alt'] = 'image'
return soup
def parse_index(self):
articles = []
soup = self.index_to_soup(self.INDEX)
for feed_link in soup.findAll('a',href=True):
if feed_link['href'].startswith('stories/'):
url = self.INDEX + feed_link['href']
title = self.tag_to_string(feed_link)
date = strftime(self.timefmt)
articles.append({
'title' :title
,'date' :date
,'url' :url
,'description':''
})
return [('Frontline', articles)]
def print_version(self, url):
return "http://www.hinduonnet.com/thehindu/thscrip/print.pl?prd=fline&file=" + url.rpartition('/')[2]
def image_url_processor(self, baseurl, url):
return url.replace('../images/', self.INDEX + 'images/').strip()

View File

@ -11,8 +11,8 @@ import mechanize, re
class GoComics(BasicNewsRecipe):
title = 'GoComics'
__author__ = 'Starson17'
__version__ = '1.05'
__date__ = '19 may 2011'
__version__ = '1.06'
__date__ = '07 June 2011'
description = u'200+ Comics - Customize for more days/comics: Defaults to 7 days, 25 comics - 20 general, 5 editorial.'
category = 'news, comics'
language = 'en'
@ -56,225 +56,318 @@ class GoComics(BasicNewsRecipe):
def parse_index(self):
feeds = []
for title, url in [
######## COMICS - GENERAL ########
(u"2 Cows and a Chicken", u"http://www.gocomics.com/2cowsandachicken"),
# (u"9 to 5", u"http://www.gocomics.com/9to5"),
# (u"The Academia Waltz", u"http://www.gocomics.com/academiawaltz"),
# (u"Adam@Home", u"http://www.gocomics.com/adamathome"),
# (u"Agnes", u"http://www.gocomics.com/agnes"),
# (u"Andy Capp", u"http://www.gocomics.com/andycapp"),
# (u"Animal Crackers", u"http://www.gocomics.com/animalcrackers"),
# (u"Annie", u"http://www.gocomics.com/annie"),
(u"The Argyle Sweater", u"http://www.gocomics.com/theargylesweater"),
# (u"Ask Shagg", u"http://www.gocomics.com/askshagg"),
#(u"9 Chickweed Lane", u"http://www.gocomics.com/9chickweedlane"),
(u"9 to 5", u"http://www.gocomics.com/9to5"),
#(u"Adam At Home", u"http://www.gocomics.com/adamathome"),
(u"Agnes", u"http://www.gocomics.com/agnes"),
#(u"Alley Oop", u"http://www.gocomics.com/alleyoop"),
#(u"Andy Capp", u"http://www.gocomics.com/andycapp"),
#(u"Animal Crackers", u"http://www.gocomics.com/animalcrackers"),
#(u"Annie", u"http://www.gocomics.com/annie"),
#(u"Arlo & Janis", u"http://www.gocomics.com/arloandjanis"),
#(u"Ask Shagg", u"http://www.gocomics.com/askshagg"),
(u"B.C.", u"http://www.gocomics.com/bc"),
# (u"Back in the Day", u"http://www.gocomics.com/backintheday"),
# (u"Bad Reporter", u"http://www.gocomics.com/badreporter"),
# (u"Baldo", u"http://www.gocomics.com/baldo"),
# (u"Ballard Street", u"http://www.gocomics.com/ballardstreet"),
# (u"Barkeater Lake", u"http://www.gocomics.com/barkeaterlake"),
# (u"The Barn", u"http://www.gocomics.com/thebarn"),
# (u"Basic Instructions", u"http://www.gocomics.com/basicinstructions"),
# (u"Bewley", u"http://www.gocomics.com/bewley"),
# (u"Big Top", u"http://www.gocomics.com/bigtop"),
# (u"Biographic", u"http://www.gocomics.com/biographic"),
(u"Birdbrains", u"http://www.gocomics.com/birdbrains"),
# (u"Bleeker: The Rechargeable Dog", u"http://www.gocomics.com/bleeker"),
# (u"Bliss", u"http://www.gocomics.com/bliss"),
#(u"Back in the Day", u"http://www.gocomics.com/backintheday"),
#(u"Bad Reporter", u"http://www.gocomics.com/badreporter"),
#(u"Baldo", u"http://www.gocomics.com/baldo"),
#(u"Ballard Street", u"http://www.gocomics.com/ballardstreet"),
#(u"Barkeater Lake", u"http://www.gocomics.com/barkeaterlake"),
#(u"Basic Instructions", u"http://www.gocomics.com/basicinstructions"),
#(u"Ben", u"http://www.gocomics.com/ben"),
#(u"Betty", u"http://www.gocomics.com/betty"),
#(u"Bewley", u"http://www.gocomics.com/bewley"),
#(u"Big Nate", u"http://www.gocomics.com/bignate"),
#(u"Big Top", u"http://www.gocomics.com/bigtop"),
#(u"Biographic", u"http://www.gocomics.com/biographic"),
#(u"Birdbrains", u"http://www.gocomics.com/birdbrains"),
#(u"Bleeker: The Rechargeable Dog", u"http://www.gocomics.com/bleeker"),
#(u"Bliss", u"http://www.gocomics.com/bliss"),
(u"Bloom County", u"http://www.gocomics.com/bloomcounty"),
# (u"Bo Nanas", u"http://www.gocomics.com/bonanas"),
# (u"Bob the Squirrel", u"http://www.gocomics.com/bobthesquirrel"),
# (u"The Boiling Point", u"http://www.gocomics.com/theboilingpoint"),
# (u"Boomerangs", u"http://www.gocomics.com/boomerangs"),
# (u"The Boondocks", u"http://www.gocomics.com/boondocks"),
# (u"Bottomliners", u"http://www.gocomics.com/bottomliners"),
# (u"Bound and Gagged", u"http://www.gocomics.com/boundandgagged"),
# (u"Brainwaves", u"http://www.gocomics.com/brainwaves"),
# (u"Brenda Starr", u"http://www.gocomics.com/brendastarr"),
# (u"Brewster Rockit", u"http://www.gocomics.com/brewsterrockit"),
# (u"Broom Hilda", u"http://www.gocomics.com/broomhilda"),
#(u"Bo Nanas", u"http://www.gocomics.com/bonanas"),
#(u"Bob the Squirrel", u"http://www.gocomics.com/bobthesquirrel"),
#(u"Boomerangs", u"http://www.gocomics.com/boomerangs"),
#(u"Bottomliners", u"http://www.gocomics.com/bottomliners"),
#(u"Bound and Gagged", u"http://www.gocomics.com/boundandgagged"),
#(u"Brainwaves", u"http://www.gocomics.com/brainwaves"),
#(u"Brenda Starr", u"http://www.gocomics.com/brendastarr"),
#(u"Brevity", u"http://www.gocomics.com/brevity"),
#(u"Brewster Rockit", u"http://www.gocomics.com/brewsterrockit"),
#(u"Broom Hilda", u"http://www.gocomics.com/broomhilda"),
(u"Calvin and Hobbes", u"http://www.gocomics.com/calvinandhobbes"),
# (u"Candorville", u"http://www.gocomics.com/candorville"),
# (u"Cathy", u"http://www.gocomics.com/cathy"),
# (u"C'est la Vie", u"http://www.gocomics.com/cestlavie"),
# (u"Chuckle Bros", u"http://www.gocomics.com/chucklebros"),
# (u"Citizen Dog", u"http://www.gocomics.com/citizendog"),
# (u"The City", u"http://www.gocomics.com/thecity"),
# (u"Cleats", u"http://www.gocomics.com/cleats"),
# (u"Close to Home", u"http://www.gocomics.com/closetohome"),
# (u"Compu-toon", u"http://www.gocomics.com/compu-toon"),
# (u"Cornered", u"http://www.gocomics.com/cornered"),
(u"Cul de Sac", u"http://www.gocomics.com/culdesac"),
# (u"Daddy's Home", u"http://www.gocomics.com/daddyshome"),
# (u"Deep Cover", u"http://www.gocomics.com/deepcover"),
# (u"Dick Tracy", u"http://www.gocomics.com/dicktracy"),
# (u"The Dinette Set", u"http://www.gocomics.com/dinetteset"),
# (u"Dog Eat Doug", u"http://www.gocomics.com/dogeatdoug"),
# (u"Domestic Abuse", u"http://www.gocomics.com/domesticabuse"),
# (u"Doodles", u"http://www.gocomics.com/doodles"),
#(u"Candorville", u"http://www.gocomics.com/candorville"),
#(u"Cathy", u"http://www.gocomics.com/cathy"),
#(u"C'est la Vie", u"http://www.gocomics.com/cestlavie"),
#(u"Cheap Thrills", u"http://www.gocomics.com/cheapthrills"),
#(u"Chuckle Bros", u"http://www.gocomics.com/chucklebros"),
#(u"Citizen Dog", u"http://www.gocomics.com/citizendog"),
#(u"Cleats", u"http://www.gocomics.com/cleats"),
#(u"Close to Home", u"http://www.gocomics.com/closetohome"),
#(u"Committed", u"http://www.gocomics.com/committed"),
#(u"Compu-toon", u"http://www.gocomics.com/compu-toon"),
#(u"Cornered", u"http://www.gocomics.com/cornered"),
#(u"Cow & Boy", u"http://www.gocomics.com/cow&boy"),
#(u"Cul de Sac", u"http://www.gocomics.com/culdesac"),
#(u"Daddy's Home", u"http://www.gocomics.com/daddyshome"),
#(u"Deep Cover", u"http://www.gocomics.com/deepcover"),
#(u"Dick Tracy", u"http://www.gocomics.com/dicktracy"),
(u"Dog Eat Doug", u"http://www.gocomics.com/dogeatdoug"),
#(u"Domestic Abuse", u"http://www.gocomics.com/domesticabuse"),
(u"Doodles", u"http://www.gocomics.com/doodles"),
(u"Doonesbury", u"http://www.gocomics.com/doonesbury"),
# (u"The Doozies", u"http://www.gocomics.com/thedoozies"),
# (u"The Duplex", u"http://www.gocomics.com/duplex"),
# (u"Eek!", u"http://www.gocomics.com/eek"),
# (u"The Elderberries", u"http://www.gocomics.com/theelderberries"),
# (u"Flight Deck", u"http://www.gocomics.com/flightdeck"),
# (u"Flo and Friends", u"http://www.gocomics.com/floandfriends"),
# (u"The Flying McCoys", u"http://www.gocomics.com/theflyingmccoys"),
(u"For Better or For Worse", u"http://www.gocomics.com/forbetterorforworse"),
# (u"For Heaven's Sake", u"http://www.gocomics.com/forheavenssake"),
# (u"Fort Knox", u"http://www.gocomics.com/fortknox"),
# (u"FoxTrot", u"http://www.gocomics.com/foxtrot"),
(u"FoxTrot Classics", u"http://www.gocomics.com/foxtrotclassics"),
# (u"Frank & Ernest", u"http://www.gocomics.com/frankandernest"),
# (u"Fred Basset", u"http://www.gocomics.com/fredbasset"),
# (u"Free Range", u"http://www.gocomics.com/freerange"),
# (u"Frog Applause", u"http://www.gocomics.com/frogapplause"),
# (u"The Fusco Brothers", u"http://www.gocomics.com/thefuscobrothers"),
#(u"Drabble", u"http://www.gocomics.com/drabble"),
#(u"Eek!", u"http://www.gocomics.com/eek"),
#(u"F Minus", u"http://www.gocomics.com/fminus"),
#(u"Family Tree", u"http://www.gocomics.com/familytree"),
#(u"Farcus", u"http://www.gocomics.com/farcus"),
(u"Fat Cats Classics", u"http://www.gocomics.com/fatcatsclassics"),
#(u"Ferd'nand", u"http://www.gocomics.com/ferdnand"),
#(u"Flight Deck", u"http://www.gocomics.com/flightdeck"),
(u"Flo and Friends", u"http://www.gocomics.com/floandfriends"),
#(u"For Better or For Worse", u"http://www.gocomics.com/forbetterorforworse"),
#(u"For Heaven's Sake", u"http://www.gocomics.com/forheavenssake"),
#(u"Fort Knox", u"http://www.gocomics.com/fortknox"),
#(u"FoxTrot Classics", u"http://www.gocomics.com/foxtrotclassics"),
(u"FoxTrot", u"http://www.gocomics.com/foxtrot"),
#(u"Frank & Ernest", u"http://www.gocomics.com/frankandernest"),
#(u"Frazz", u"http://www.gocomics.com/frazz"),
#(u"Fred Basset", u"http://www.gocomics.com/fredbasset"),
#(u"Free Range", u"http://www.gocomics.com/freerange"),
#(u"Frog Applause", u"http://www.gocomics.com/frogapplause"),
#(u"Garfield Minus Garfield", u"http://www.gocomics.com/garfieldminusgarfield"),
(u"Garfield", u"http://www.gocomics.com/garfield"),
# (u"Garfield Minus Garfield", u"http://www.gocomics.com/garfieldminusgarfield"),
# (u"Gasoline Alley", u"http://www.gocomics.com/gasolinealley"),
# (u"Gil Thorp", u"http://www.gocomics.com/gilthorp"),
# (u"Ginger Meggs", u"http://www.gocomics.com/gingermeggs"),
# (u"Girls & Sports", u"http://www.gocomics.com/girlsandsports"),
# (u"Haiku Ewe", u"http://www.gocomics.com/haikuewe"),
# (u"Heart of the City", u"http://www.gocomics.com/heartofthecity"),
# (u"Heathcliff", u"http://www.gocomics.com/heathcliff"),
# (u"Herb and Jamaal", u"http://www.gocomics.com/herbandjamaal"),
# (u"Home and Away", u"http://www.gocomics.com/homeandaway"),
# (u"Housebroken", u"http://www.gocomics.com/housebroken"),
# (u"Hubert and Abby", u"http://www.gocomics.com/hubertandabby"),
# (u"Imagine This", u"http://www.gocomics.com/imaginethis"),
# (u"In the Bleachers", u"http://www.gocomics.com/inthebleachers"),
# (u"In the Sticks", u"http://www.gocomics.com/inthesticks"),
# (u"Ink Pen", u"http://www.gocomics.com/inkpen"),
# (u"It's All About You", u"http://www.gocomics.com/itsallaboutyou"),
# (u"Joe Vanilla", u"http://www.gocomics.com/joevanilla"),
# (u"La Cucaracha", u"http://www.gocomics.com/lacucaracha"),
# (u"Last Kiss", u"http://www.gocomics.com/lastkiss"),
# (u"Legend of Bill", u"http://www.gocomics.com/legendofbill"),
# (u"Liberty Meadows", u"http://www.gocomics.com/libertymeadows"),
(u"Lio", u"http://www.gocomics.com/lio"),
# (u"Little Dog Lost", u"http://www.gocomics.com/littledoglost"),
# (u"Little Otto", u"http://www.gocomics.com/littleotto"),
# (u"Loose Parts", u"http://www.gocomics.com/looseparts"),
# (u"Love Is...", u"http://www.gocomics.com/loveis"),
# (u"Maintaining", u"http://www.gocomics.com/maintaining"),
# (u"The Meaning of Lila", u"http://www.gocomics.com/meaningoflila"),
# (u"Middle-Aged White Guy", u"http://www.gocomics.com/middleagedwhiteguy"),
# (u"The Middletons", u"http://www.gocomics.com/themiddletons"),
# (u"Momma", u"http://www.gocomics.com/momma"),
# (u"Mutt & Jeff", u"http://www.gocomics.com/muttandjeff"),
# (u"Mythtickle", u"http://www.gocomics.com/mythtickle"),
# (u"Nest Heads", u"http://www.gocomics.com/nestheads"),
# (u"NEUROTICA", u"http://www.gocomics.com/neurotica"),
(u"New Adventures of Queen Victoria", u"http://www.gocomics.com/thenewadventuresofqueenvictoria"),
(u"Non Sequitur", u"http://www.gocomics.com/nonsequitur"),
# (u"The Norm", u"http://www.gocomics.com/thenorm"),
# (u"On A Claire Day", u"http://www.gocomics.com/onaclaireday"),
# (u"One Big Happy", u"http://www.gocomics.com/onebighappy"),
# (u"The Other Coast", u"http://www.gocomics.com/theothercoast"),
# (u"Out of the Gene Pool Re-Runs", u"http://www.gocomics.com/outofthegenepool"),
# (u"Overboard", u"http://www.gocomics.com/overboard"),
# (u"Pibgorn", u"http://www.gocomics.com/pibgorn"),
# (u"Pibgorn Sketches", u"http://www.gocomics.com/pibgornsketches"),
#(u"Gasoline Alley", u"http://www.gocomics.com/gasolinealley"),
#(u"Geech Classics", u"http://www.gocomics.com/geechclassics"),
#(u"Get Fuzzy", u"http://www.gocomics.com/getfuzzy"),
#(u"Gil Thorp", u"http://www.gocomics.com/gilthorp"),
#(u"Ginger Meggs", u"http://www.gocomics.com/gingermeggs"),
#(u"Girls & Sports", u"http://www.gocomics.com/girlsandsports"),
#(u"Graffiti", u"http://www.gocomics.com/graffiti"),
#(u"Grand Avenue", u"http://www.gocomics.com/grandavenue"),
#(u"Haiku Ewe", u"http://www.gocomics.com/haikuewe"),
#(u"Heart of the City", u"http://www.gocomics.com/heartofthecity"),
(u"Heathcliff", u"http://www.gocomics.com/heathcliff"),
#(u"Herb and Jamaal", u"http://www.gocomics.com/herbandjamaal"),
#(u"Herman", u"http://www.gocomics.com/herman"),
#(u"Home and Away", u"http://www.gocomics.com/homeandaway"),
#(u"Housebroken", u"http://www.gocomics.com/housebroken"),
#(u"Hubert and Abby", u"http://www.gocomics.com/hubertandabby"),
#(u"Imagine This", u"http://www.gocomics.com/imaginethis"),
#(u"In the Bleachers", u"http://www.gocomics.com/inthebleachers"),
#(u"In the Sticks", u"http://www.gocomics.com/inthesticks"),
#(u"Ink Pen", u"http://www.gocomics.com/inkpen"),
#(u"It's All About You", u"http://www.gocomics.com/itsallaboutyou"),
#(u"Jane's World", u"http://www.gocomics.com/janesworld"),
#(u"Joe Vanilla", u"http://www.gocomics.com/joevanilla"),
#(u"Jump Start", u"http://www.gocomics.com/jumpstart"),
#(u"Kit 'N' Carlyle", u"http://www.gocomics.com/kitandcarlyle"),
#(u"La Cucaracha", u"http://www.gocomics.com/lacucaracha"),
#(u"Last Kiss", u"http://www.gocomics.com/lastkiss"),
#(u"Legend of Bill", u"http://www.gocomics.com/legendofbill"),
#(u"Liberty Meadows", u"http://www.gocomics.com/libertymeadows"),
#(u"Li'l Abner Classics", u"http://www.gocomics.com/lilabnerclassics"),
#(u"Lio", u"http://www.gocomics.com/lio"),
#(u"Little Dog Lost", u"http://www.gocomics.com/littledoglost"),
#(u"Little Otto", u"http://www.gocomics.com/littleotto"),
#(u"Lola", u"http://www.gocomics.com/lola"),
#(u"Loose Parts", u"http://www.gocomics.com/looseparts"),
#(u"Love Is...", u"http://www.gocomics.com/loveis"),
#(u"Luann", u"http://www.gocomics.com/luann"),
#(u"Maintaining", u"http://www.gocomics.com/maintaining"),
(u"Marmaduke", u"http://www.gocomics.com/marmaduke"),
#(u"Meg! Classics", u"http://www.gocomics.com/megclassics"),
#(u"Middle-Aged White Guy", u"http://www.gocomics.com/middleagedwhiteguy"),
#(u"Minimum Security", u"http://www.gocomics.com/minimumsecurity"),
#(u"Moderately Confused", u"http://www.gocomics.com/moderatelyconfused"),
(u"Momma", u"http://www.gocomics.com/momma"),
#(u"Monty", u"http://www.gocomics.com/monty"),
#(u"Motley Classics", u"http://www.gocomics.com/motleyclassics"),
(u"Mutt & Jeff", u"http://www.gocomics.com/muttandjeff"),
#(u"Mythtickle", u"http://www.gocomics.com/mythtickle"),
#(u"Nancy", u"http://www.gocomics.com/nancy"),
#(u"Natural Selection", u"http://www.gocomics.com/naturalselection"),
#(u"Nest Heads", u"http://www.gocomics.com/nestheads"),
#(u"NEUROTICA", u"http://www.gocomics.com/neurotica"),
#(u"New Adventures of Queen Victoria", u"http://www.gocomics.com/thenewadventuresofqueenvictoria"),
#(u"Non Sequitur", u"http://www.gocomics.com/nonsequitur"),
#(u"Off The Mark", u"http://www.gocomics.com/offthemark"),
#(u"On A Claire Day", u"http://www.gocomics.com/onaclaireday"),
#(u"One Big Happy Classics", u"http://www.gocomics.com/onebighappyclassics"),
#(u"One Big Happy", u"http://www.gocomics.com/onebighappy"),
#(u"Out of the Gene Pool Re-Runs", u"http://www.gocomics.com/outofthegenepool"),
#(u"Over the Hedge", u"http://www.gocomics.com/overthehedge"),
#(u"Overboard", u"http://www.gocomics.com/overboard"),
#(u"PC and Pixel", u"http://www.gocomics.com/pcandpixel"),
(u"Peanuts", u"http://www.gocomics.com/peanuts"),
#(u"Pearls Before Swine", u"http://www.gocomics.com/pearlsbeforeswine"),
#(u"Pibgorn Sketches", u"http://www.gocomics.com/pibgornsketches"),
#(u"Pibgorn", u"http://www.gocomics.com/pibgorn"),
(u"Pickles", u"http://www.gocomics.com/pickles"),
# (u"Pinkerton", u"http://www.gocomics.com/pinkerton"),
# (u"Pluggers", u"http://www.gocomics.com/pluggers"),
(u"Pooch Cafe", u"http://www.gocomics.com/poochcafe"),
# (u"PreTeena", u"http://www.gocomics.com/preteena"),
# (u"The Quigmans", u"http://www.gocomics.com/thequigmans"),
# (u"Rabbits Against Magic", u"http://www.gocomics.com/rabbitsagainstmagic"),
(u"Real Life Adventures", u"http://www.gocomics.com/reallifeadventures"),
# (u"Red and Rover", u"http://www.gocomics.com/redandrover"),
# (u"Red Meat", u"http://www.gocomics.com/redmeat"),
# (u"Reynolds Unwrapped", u"http://www.gocomics.com/reynoldsunwrapped"),
# (u"Ronaldinho Gaucho", u"http://www.gocomics.com/ronaldinhogaucho"),
# (u"Rubes", u"http://www.gocomics.com/rubes"),
# (u"Scary Gary", u"http://www.gocomics.com/scarygary"),
(u"Shoe", u"http://www.gocomics.com/shoe"),
# (u"Shoecabbage", u"http://www.gocomics.com/shoecabbage"),
# (u"Skin Horse", u"http://www.gocomics.com/skinhorse"),
# (u"Slowpoke", u"http://www.gocomics.com/slowpoke"),
# (u"Speed Bump", u"http://www.gocomics.com/speedbump"),
# (u"State of the Union", u"http://www.gocomics.com/stateoftheunion"),
(u"Stone Soup", u"http://www.gocomics.com/stonesoup"),
# (u"Strange Brew", u"http://www.gocomics.com/strangebrew"),
# (u"Sylvia", u"http://www.gocomics.com/sylvia"),
# (u"Tank McNamara", u"http://www.gocomics.com/tankmcnamara"),
# (u"Tiny Sepuku", u"http://www.gocomics.com/tinysepuku"),
# (u"TOBY", u"http://www.gocomics.com/toby"),
# (u"Tom the Dancing Bug", u"http://www.gocomics.com/tomthedancingbug"),
# (u"Too Much Coffee Man", u"http://www.gocomics.com/toomuchcoffeeman"),
# (u"W.T. Duck", u"http://www.gocomics.com/wtduck"),
# (u"Watch Your Head", u"http://www.gocomics.com/watchyourhead"),
# (u"Wee Pals", u"http://www.gocomics.com/weepals"),
# (u"Winnie the Pooh", u"http://www.gocomics.com/winniethepooh"),
(u"Wizard of Id", u"http://www.gocomics.com/wizardofid"),
# (u"Working It Out", u"http://www.gocomics.com/workingitout"),
# (u"Yenny", u"http://www.gocomics.com/yenny"),
# (u"Zack Hill", u"http://www.gocomics.com/zackhill"),
#(u"Pinkerton", u"http://www.gocomics.com/pinkerton"),
#(u"Pluggers", u"http://www.gocomics.com/pluggers"),
#(u"Pooch Cafe", u"http://www.gocomics.com/poochcafe"),
#(u"PreTeena", u"http://www.gocomics.com/preteena"),
#(u"Prickly City", u"http://www.gocomics.com/pricklycity"),
#(u"Rabbits Against Magic", u"http://www.gocomics.com/rabbitsagainstmagic"),
#(u"Raising Duncan Classics", u"http://www.gocomics.com/raisingduncanclassics"),
#(u"Real Life Adventures", u"http://www.gocomics.com/reallifeadventures"),
#(u"Reality Check", u"http://www.gocomics.com/realitycheck"),
#(u"Red and Rover", u"http://www.gocomics.com/redandrover"),
#(u"Red Meat", u"http://www.gocomics.com/redmeat"),
#(u"Reynolds Unwrapped", u"http://www.gocomics.com/reynoldsunwrapped"),
#(u"Rip Haywire", u"http://www.gocomics.com/riphaywire"),
#(u"Ripley's Believe It or Not!", u"http://www.gocomics.com/ripleysbelieveitornot"),
#(u"Ronaldinho Gaucho", u"http://www.gocomics.com/ronaldinhogaucho"),
#(u"Rose Is Rose", u"http://www.gocomics.com/roseisrose"),
#(u"Rubes", u"http://www.gocomics.com/rubes"),
#(u"Rudy Park", u"http://www.gocomics.com/rudypark"),
#(u"Scary Gary", u"http://www.gocomics.com/scarygary"),
#(u"Shirley and Son Classics", u"http://www.gocomics.com/shirleyandsonclassics"),
#(u"Shoe", u"http://www.gocomics.com/shoe"),
#(u"Shoecabbage", u"http://www.gocomics.com/shoecabbage"),
#(u"Skin Horse", u"http://www.gocomics.com/skinhorse"),
#(u"Slowpoke", u"http://www.gocomics.com/slowpoke"),
#(u"Soup To Nutz", u"http://www.gocomics.com/souptonutz"),
#(u"Speed Bump", u"http://www.gocomics.com/speedbump"),
#(u"Spot The Frog", u"http://www.gocomics.com/spotthefrog"),
#(u"State of the Union", u"http://www.gocomics.com/stateoftheunion"),
#(u"Stone Soup", u"http://www.gocomics.com/stonesoup"),
#(u"Strange Brew", u"http://www.gocomics.com/strangebrew"),
#(u"Sylvia", u"http://www.gocomics.com/sylvia"),
#(u"Tank McNamara", u"http://www.gocomics.com/tankmcnamara"),
#(u"Tarzan Classics", u"http://www.gocomics.com/tarzanclassics"),
#(u"That's Life", u"http://www.gocomics.com/thatslife"),
#(u"The Academia Waltz", u"http://www.gocomics.com/academiawaltz"),
#(u"The Argyle Sweater", u"http://www.gocomics.com/theargylesweater"),
#(u"The Barn", u"http://www.gocomics.com/thebarn"),
#(u"The Boiling Point", u"http://www.gocomics.com/theboilingpoint"),
#(u"The Boondocks", u"http://www.gocomics.com/boondocks"),
#(u"The Born Loser", u"http://www.gocomics.com/thebornloser"),
#(u"The Buckets", u"http://www.gocomics.com/thebuckets"),
#(u"The City", u"http://www.gocomics.com/thecity"),
#(u"The Dinette Set", u"http://www.gocomics.com/dinetteset"),
#(u"The Doozies", u"http://www.gocomics.com/thedoozies"),
#(u"The Duplex", u"http://www.gocomics.com/duplex"),
#(u"The Elderberries", u"http://www.gocomics.com/theelderberries"),
#(u"The Flying McCoys", u"http://www.gocomics.com/theflyingmccoys"),
#(u"The Fusco Brothers", u"http://www.gocomics.com/thefuscobrothers"),
#(u"The Grizzwells", u"http://www.gocomics.com/thegrizzwells"),
#(u"The Humble Stumble", u"http://www.gocomics.com/thehumblestumble"),
#(u"The Knight Life", u"http://www.gocomics.com/theknightlife"),
#(u"The Meaning of Lila", u"http://www.gocomics.com/meaningoflila"),
#(u"The Middletons", u"http://www.gocomics.com/themiddletons"),
#(u"The Norm", u"http://www.gocomics.com/thenorm"),
#(u"The Other Coast", u"http://www.gocomics.com/theothercoast"),
#(u"The Quigmans", u"http://www.gocomics.com/thequigmans"),
#(u"The Sunshine Club", u"http://www.gocomics.com/thesunshineclub"),
#(u"Tiny Sepuk", u"http://www.gocomics.com/tinysepuk"),
#(u"TOBY", u"http://www.gocomics.com/toby"),
#(u"Tom the Dancing Bug", u"http://www.gocomics.com/tomthedancingbug"),
#(u"Too Much Coffee Man", u"http://www.gocomics.com/toomuchcoffeeman"),
#(u"Unstrange Phenomena", u"http://www.gocomics.com/unstrangephenomena"),
#(u"W.T. Duck", u"http://www.gocomics.com/wtduck"),
#(u"Watch Your Head", u"http://www.gocomics.com/watchyourhead"),
#(u"Wee Pals", u"http://www.gocomics.com/weepals"),
#(u"Winnie the Pooh", u"http://www.gocomics.com/winniethepooh"),
#(u"Wizard of Id", u"http://www.gocomics.com/wizardofid"),
#(u"Working Daze", u"http://www.gocomics.com/workingdaze"),
#(u"Working It Out", u"http://www.gocomics.com/workingitout"),
#(u"Yenny", u"http://www.gocomics.com/yenny"),
#(u"Zack Hill", u"http://www.gocomics.com/zackhill"),
(u"Ziggy", u"http://www.gocomics.com/ziggy"),
######## COMICS - EDITORIAL ########
("Lalo Alcaraz","http://www.gocomics.com/laloalcaraz"),
("Nick Anderson","http://www.gocomics.com/nickanderson"),
("Chuck Asay","http://www.gocomics.com/chuckasay"),
("Tony Auth","http://www.gocomics.com/tonyauth"),
("Donna Barstow","http://www.gocomics.com/donnabarstow"),
# ("Bruce Beattie","http://www.gocomics.com/brucebeattie"),
# ("Clay Bennett","http://www.gocomics.com/claybennett"),
# ("Lisa Benson","http://www.gocomics.com/lisabenson"),
# ("Steve Benson","http://www.gocomics.com/stevebenson"),
# ("Chip Bok","http://www.gocomics.com/chipbok"),
# ("Steve Breen","http://www.gocomics.com/stevebreen"),
# ("Chris Britt","http://www.gocomics.com/chrisbritt"),
# ("Stuart Carlson","http://www.gocomics.com/stuartcarlson"),
# ("Ken Catalino","http://www.gocomics.com/kencatalino"),
# ("Paul Conrad","http://www.gocomics.com/paulconrad"),
# ("Jeff Danziger","http://www.gocomics.com/jeffdanziger"),
# ("Matt Davies","http://www.gocomics.com/mattdavies"),
# ("John Deering","http://www.gocomics.com/johndeering"),
# ("Bob Gorrell","http://www.gocomics.com/bobgorrell"),
# ("Walt Handelsman","http://www.gocomics.com/walthandelsman"),
# ("Clay Jones","http://www.gocomics.com/clayjones"),
# ("Kevin Kallaugher","http://www.gocomics.com/kevinkallaugher"),
# ("Steve Kelley","http://www.gocomics.com/stevekelley"),
# ("Dick Locher","http://www.gocomics.com/dicklocher"),
# ("Chan Lowe","http://www.gocomics.com/chanlowe"),
# ("Mike Luckovich","http://www.gocomics.com/mikeluckovich"),
# ("Gary Markstein","http://www.gocomics.com/garymarkstein"),
# ("Glenn McCoy","http://www.gocomics.com/glennmccoy"),
# ("Jim Morin","http://www.gocomics.com/jimmorin"),
# ("Jack Ohman","http://www.gocomics.com/jackohman"),
# ("Pat Oliphant","http://www.gocomics.com/patoliphant"),
# ("Joel Pett","http://www.gocomics.com/joelpett"),
# ("Ted Rall","http://www.gocomics.com/tedrall"),
# ("Michael Ramirez","http://www.gocomics.com/michaelramirez"),
# ("Marshall Ramsey","http://www.gocomics.com/marshallramsey"),
# ("Steve Sack","http://www.gocomics.com/stevesack"),
# ("Ben Sargent","http://www.gocomics.com/bensargent"),
# ("Drew Sheneman","http://www.gocomics.com/drewsheneman"),
# ("John Sherffius","http://www.gocomics.com/johnsherffius"),
# ("Small World","http://www.gocomics.com/smallworld"),
# ("Scott Stantis","http://www.gocomics.com/scottstantis"),
# ("Wayne Stayskal","http://www.gocomics.com/waynestayskal"),
# ("Dana Summers","http://www.gocomics.com/danasummers"),
# ("Paul Szep","http://www.gocomics.com/paulszep"),
# ("Mike Thompson","http://www.gocomics.com/mikethompson"),
# ("Tom Toles","http://www.gocomics.com/tomtoles"),
# ("Gary Varvel","http://www.gocomics.com/garyvarvel"),
# ("ViewsAfrica","http://www.gocomics.com/viewsafrica"),
# ("ViewsAmerica","http://www.gocomics.com/viewsamerica"),
# ("ViewsAsia","http://www.gocomics.com/viewsasia"),
# ("ViewsBusiness","http://www.gocomics.com/viewsbusiness"),
# ("ViewsEurope","http://www.gocomics.com/viewseurope"),
# ("ViewsLatinAmerica","http://www.gocomics.com/viewslatinamerica"),
# ("ViewsMidEast","http://www.gocomics.com/viewsmideast"),
# ("Views of the World","http://www.gocomics.com/viewsoftheworld"),
# ("Kerry Waghorn","http://www.gocomics.com/facesinthenews"),
# ("Dan Wasserman","http://www.gocomics.com/danwasserman"),
# ("Signe Wilkinson","http://www.gocomics.com/signewilkinson"),
# ("Wit of the World","http://www.gocomics.com/witoftheworld"),
# ("Don Wright","http://www.gocomics.com/donwright"),
#
######## EDITORIAL CARTOONS #####################
(u"Adam Zyglis", u"http://www.gocomics.com/adamzyglis"),
#(u"Andy Singer", u"http://www.gocomics.com/andysinger"),
#(u"Ben Sargent",u"http://www.gocomics.com/bensargent"),
#(u"Bill Day", u"http://www.gocomics.com/billday"),
#(u"Bill Schorr", u"http://www.gocomics.com/billschorr"),
#(u"Bob Englehart", u"http://www.gocomics.com/bobenglehart"),
(u"Bob Gorrell",u"http://www.gocomics.com/bobgorrell"),
#(u"Brian Fairrington", u"http://www.gocomics.com/brianfairrington"),
#(u"Bruce Beattie", u"http://www.gocomics.com/brucebeattie"),
#(u"Cam Cardow", u"http://www.gocomics.com/camcardow"),
#(u"Chan Lowe",u"http://www.gocomics.com/chanlowe"),
#(u"Chip Bok",u"http://www.gocomics.com/chipbok"),
#(u"Chris Britt",u"http://www.gocomics.com/chrisbritt"),
#(u"Chuck Asay",u"http://www.gocomics.com/chuckasay"),
#(u"Clay Bennett",u"http://www.gocomics.com/claybennett"),
#(u"Clay Jones",u"http://www.gocomics.com/clayjones"),
#(u"Dan Wasserman",u"http://www.gocomics.com/danwasserman"),
#(u"Dana Summers",u"http://www.gocomics.com/danasummers"),
#(u"Daryl Cagle", u"http://www.gocomics.com/darylcagle"),
#(u"David Fitzsimmons", u"http://www.gocomics.com/davidfitzsimmons"),
(u"Dick Locher",u"http://www.gocomics.com/dicklocher"),
#(u"Don Wright",u"http://www.gocomics.com/donwright"),
#(u"Donna Barstow",u"http://www.gocomics.com/donnabarstow"),
#(u"Drew Litton", u"http://www.gocomics.com/drewlitton"),
#(u"Drew Sheneman",u"http://www.gocomics.com/drewsheneman"),
#(u"Ed Stein", u"http://www.gocomics.com/edstein"),
#(u"Eric Allie", u"http://www.gocomics.com/ericallie"),
#(u"Gary Markstein", u"http://www.gocomics.com/garymarkstein"),
#(u"Gary McCoy", u"http://www.gocomics.com/garymccoy"),
#(u"Gary Varvel", u"http://www.gocomics.com/garyvarvel"),
#(u"Glenn McCoy",u"http://www.gocomics.com/glennmccoy"),
#(u"Henry Payne", u"http://www.gocomics.com/henrypayne"),
#(u"Jack Ohman",u"http://www.gocomics.com/jackohman"),
#(u"JD Crowe", u"http://www.gocomics.com/jdcrowe"),
#(u"Jeff Danziger",u"http://www.gocomics.com/jeffdanziger"),
#(u"Jeff Parker", u"http://www.gocomics.com/jeffparker"),
#(u"Jeff Stahler", u"http://www.gocomics.com/jeffstahler"),
#(u"Jerry Holbert", u"http://www.gocomics.com/jerryholbert"),
#(u"Jim Morin",u"http://www.gocomics.com/jimmorin"),
#(u"Joel Pett",u"http://www.gocomics.com/joelpett"),
#(u"John Cole", u"http://www.gocomics.com/johncole"),
#(u"John Darkow", u"http://www.gocomics.com/johndarkow"),
#(u"John Deering",u"http://www.gocomics.com/johndeering"),
#(u"John Sherffius", u"http://www.gocomics.com/johnsherffius"),
#(u"Ken Catalino",u"http://www.gocomics.com/kencatalino"),
#(u"Kerry Waghorn",u"http://www.gocomics.com/facesinthenews"),
#(u"Kevin Kallaugher",u"http://www.gocomics.com/kevinkallaugher"),
#(u"Lalo Alcaraz",u"http://www.gocomics.com/laloalcaraz"),
#(u"Larry Wright", u"http://www.gocomics.com/larrywright"),
#(u"Lisa Benson", u"http://www.gocomics.com/lisabenson"),
#(u"Marshall Ramsey", u"http://www.gocomics.com/marshallramsey"),
#(u"Matt Bors", u"http://www.gocomics.com/mattbors"),
#(u"Matt Davies",u"http://www.gocomics.com/mattdavies"),
#(u"Michael Ramirez", u"http://www.gocomics.com/michaelramirez"),
#(u"Mike Keefe", u"http://www.gocomics.com/mikekeefe"),
#(u"Mike Luckovich", u"http://www.gocomics.com/mikeluckovich"),
#(u"MIke Thompson", u"http://www.gocomics.com/mikethompson"),
#(u"Monte Wolverton", u"http://www.gocomics.com/montewolverton"),
#(u"Mr. Fish", u"http://www.gocomics.com/mrfish"),
#(u"Nate Beeler", u"http://www.gocomics.com/natebeeler"),
#(u"Nick Anderson", u"http://www.gocomics.com/nickanderson"),
#(u"Pat Bagley", u"http://www.gocomics.com/patbagley"),
#(u"Pat Oliphant",u"http://www.gocomics.com/patoliphant"),
#(u"Paul Conrad",u"http://www.gocomics.com/paulconrad"),
#(u"Paul Szep", u"http://www.gocomics.com/paulszep"),
#(u"RJ Matson", u"http://www.gocomics.com/rjmatson"),
#(u"Rob Rogers", u"http://www.gocomics.com/robrogers"),
#(u"Robert Ariail", u"http://www.gocomics.com/robertariail"),
#(u"Scott Stantis", u"http://www.gocomics.com/scottstantis"),
#(u"Signe Wilkinson", u"http://www.gocomics.com/signewilkinson"),
#(u"Small World",u"http://www.gocomics.com/smallworld"),
#(u"Steve Benson", u"http://www.gocomics.com/stevebenson"),
#(u"Steve Breen", u"http://www.gocomics.com/stevebreen"),
#(u"Steve Kelley", u"http://www.gocomics.com/stevekelley"),
#(u"Steve Sack", u"http://www.gocomics.com/stevesack"),
#(u"Stuart Carlson",u"http://www.gocomics.com/stuartcarlson"),
#(u"Ted Rall",u"http://www.gocomics.com/tedrall"),
#(u"(Th)ink", u"http://www.gocomics.com/think"),
#(u"Tom Toles",u"http://www.gocomics.com/tomtoles"),
(u"Tony Auth",u"http://www.gocomics.com/tonyauth"),
#(u"Views of the World",u"http://www.gocomics.com/viewsoftheworld"),
#(u"ViewsAfrica",u"http://www.gocomics.com/viewsafrica"),
#(u"ViewsAmerica",u"http://www.gocomics.com/viewsamerica"),
#(u"ViewsAsia",u"http://www.gocomics.com/viewsasia"),
#(u"ViewsBusiness",u"http://www.gocomics.com/viewsbusiness"),
#(u"ViewsEurope",u"http://www.gocomics.com/viewseurope"),
#(u"ViewsLatinAmerica",u"http://www.gocomics.com/viewslatinamerica"),
#(u"ViewsMidEast",u"http://www.gocomics.com/viewsmideast"),
(u"Walt Handelsman",u"http://www.gocomics.com/walthandelsman"),
#(u"Wayne Stayskal",u"http://www.gocomics.com/waynestayskal"),
#(u"Wit of the World",u"http://www.gocomics.com/witoftheworld"),
]:
print 'Working on: ', title
articles = self.make_links(url)
@ -352,3 +445,4 @@ class GoComics(BasicNewsRecipe):
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
'''

View File

@ -1,5 +1,6 @@
from calibre.web.feeds.news import BasicNewsRecipe
import re
from datetime import date, timedelta
class HBR(BasicNewsRecipe):
@ -12,13 +13,14 @@ class HBR(BasicNewsRecipe):
no_stylesheets = True
LOGIN_URL = 'http://hbr.org/login?request_url=/'
INDEX = 'http://hbr.org/current'
INDEX = 'http://hbr.org/archive-toc/BR'
keep_only_tags = [dict(name='div', id='pageContainer')]
remove_tags = [dict(id=['mastheadContainer', 'magazineHeadline',
'articleToolbarTopRD', 'pageRightSubColumn', 'pageRightColumn',
'todayOnHBRListWidget', 'mostWidget', 'keepUpWithHBR',
'mailingListTout', 'partnerCenter', 'pageFooter',
'superNavHeadContainer', 'hbrDisqus',
'articleToolbarTop', 'articleToolbarBottom', 'articleToolbarRD']),
dict(name='iframe')]
extra_css = '''
@ -55,9 +57,14 @@ class HBR(BasicNewsRecipe):
def hbr_get_toc(self):
soup = self.index_to_soup(self.INDEX)
url = soup.find('a', text=lambda t:'Full Table of Contents' in t).parent.get('href')
return self.index_to_soup('http://hbr.org'+url)
today = date.today()
future = today + timedelta(days=30)
for x in [x.strftime('%y%m') for x in (future, today)]:
url = self.INDEX + x
soup = self.index_to_soup(url)
if not soup.find(text='Issue Not Found'):
return soup
raise Exception('Could not find current issue')
def hbr_parse_section(self, container, feeds):
current_section = None

View File

@ -6,7 +6,7 @@ class HBR(BasicNewsRecipe):
title = 'Harvard Business Review Blogs'
description = 'To subscribe go to http://hbr.harvardbusiness.org'
needs_subscription = True
__author__ = 'Kovid Goyal and Sujata Raman, enhanced by BrianG'
__author__ = 'Kovid Goyal, enhanced by BrianG'
language = 'en'
no_stylesheets = True

Binary file not shown.

After

Width:  |  Height:  |  Size: 508 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 514 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 400 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.3 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 770 B

After

Width:  |  Height:  |  Size: 1.1 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.4 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.4 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.1 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 703 B

View File

@ -6,7 +6,7 @@ class TheIndependent(BasicNewsRecipe):
language = 'en_GB'
__author__ = 'Krittika Goyal'
oldest_article = 1 #days
max_articles_per_feed = 25
max_articles_per_feed = 30
encoding = 'latin1'
no_stylesheets = True
@ -25,24 +25,39 @@ class TheIndependent(BasicNewsRecipe):
'http://www.independent.co.uk/news/uk/rss'),
('World',
'http://www.independent.co.uk/news/world/rss'),
('Sport',
'http://www.independent.co.uk/sport/rss'),
('Arts and Entertainment',
'http://www.independent.co.uk/arts-entertainment/rss'),
('Business',
'http://www.independent.co.uk/news/business/rss'),
('Life and Style',
'http://www.independent.co.uk/life-style/gadgets-and-tech/news/rss'),
('Science',
'http://www.independent.co.uk/news/science/rss'),
('People',
'http://www.independent.co.uk/news/people/rss'),
('Science',
'http://www.independent.co.uk/news/science/rss'),
('Media',
'http://www.independent.co.uk/news/media/rss'),
('Health and Families',
'http://www.independent.co.uk/life-style/health-and-families/rss'),
('Education',
'http://www.independent.co.uk/news/education/rss'),
('Obituaries',
'http://www.independent.co.uk/news/obituaries/rss'),
('Opinion',
'http://www.independent.co.uk/opinion/rss'),
('Environment',
'http://www.independent.co.uk/environment/rss'),
('Sport',
'http://www.independent.co.uk/sport/rss'),
('Life and Style',
'http://www.independent.co.uk/life-style/rss'),
('Arts and Entertainment',
'http://www.independent.co.uk/arts-entertainment/rss'),
('Travel',
'http://www.independent.co.uk/travel/rss'),
('Money',
'http://www.independent.co.uk/money/rss'),
]
def preprocess_html(self, soup):

View File

@ -1,5 +1,5 @@
__license__ = 'GPL v3'
__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
__copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
'''
infobae.com
'''
@ -9,7 +9,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
class Infobae(BasicNewsRecipe):
title = 'Infobae.com'
__author__ = 'Darko Miletic and Sujata Raman'
description = 'Informacion Libre las 24 horas'
description = 'Infobae.com es el sitio de noticias con mayor actualizacion de Latinoamérica. Noticias actualizadas las 24 horas, los 365 días del año.'
publisher = 'Infobae.com'
category = 'news, politics, Argentina'
oldest_article = 1
@ -17,13 +17,13 @@ class Infobae(BasicNewsRecipe):
no_stylesheets = True
use_embedded_content = False
language = 'es_AR'
encoding = 'cp1252'
masthead_url = 'http://www.infobae.com/imgs/header/header.gif'
remove_javascript = True
encoding = 'utf8'
masthead_url = 'http://www.infobae.com/media/img/static/logo-infobae.gif'
remove_empty_feeds = True
extra_css = '''
body{font-family:Arial,Helvetica,sans-serif;}
.popUpTitulo{color:#0D4261; font-size: xx-large}
body{font-family: Arial,Helvetica,sans-serif}
img{display: block}
.categoria{font-size: small; text-transform: uppercase}
'''
conversion_options = {
@ -31,26 +31,44 @@ class Infobae(BasicNewsRecipe):
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
, 'linearize_tables' : True
}
feeds = [
(u'Noticias' , u'http://www.infobae.com/adjuntos/html/RSS/hoy.xml' )
,(u'Salud' , u'http://www.infobae.com/adjuntos/html/RSS/salud.xml' )
,(u'Tecnologia', u'http://www.infobae.com/adjuntos/html/RSS/tecnologia.xml')
,(u'Deportes' , u'http://www.infobae.com/adjuntos/html/RSS/deportes.xml' )
keep_only_tags = [dict(attrs={'class':['titularnota','nota','post-title','post-entry','entry-title','entry-info','entry-content']})]
remove_tags_after = dict(attrs={'class':['interior-noticia','nota-desc','tags']})
remove_tags = [
dict(name=['base','meta','link','iframe','object','embed','ins'])
,dict(attrs={'class':['barranota','tags']})
]
def print_version(self, url):
article_part = url.rpartition('/')[2]
article_id= article_part.partition('-')[0]
return 'http://www.infobae.com/notas/nota_imprimir.php?Idx=' + article_id
feeds = [
(u'Saludable' , u'http://www.infobae.com/rss/saludable.xml')
,(u'Economia' , u'http://www.infobae.com/rss/economia.xml' )
,(u'En Numeros', u'http://www.infobae.com/rss/rating.xml' )
,(u'Finanzas' , u'http://www.infobae.com/rss/finanzas.xml' )
,(u'Mundo' , u'http://www.infobae.com/rss/mundo.xml' )
,(u'Sociedad' , u'http://www.infobae.com/rss/sociedad.xml' )
,(u'Politica' , u'http://www.infobae.com/rss/politica.xml' )
,(u'Deportes' , u'http://www.infobae.com/rss/deportes.xml' )
]
def postprocess_html(self, soup, first):
for tag in soup.findAll(name='strong'):
tag.name = 'b'
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll('a'):
limg = item.find('img')
if item.string is not None:
str = item.string
item.replaceWith(str)
else:
if limg:
item.name = 'div'
item.attrs = []
else:
str = self.tag_to_string(item)
item.replaceWith(str)
for item in soup.findAll('img'):
if not item.has_key('alt'):
item['alt'] = 'image'
return soup

View File

@ -99,7 +99,7 @@ class LeMonde(BasicNewsRecipe):
keep_only_tags = [
dict(name='div', attrs={'class':['contenu']})
]
remove_tags = [dict(name='div', attrs={'class':['LM_atome']})]
remove_tags_after = [dict(id='appel_temoignage')]
def get_article_url(self, article):

View File

@ -14,7 +14,7 @@ class LeTemps(BasicNewsRecipe):
title = u'Le Temps'
oldest_article = 7
max_articles_per_feed = 100
__author__ = 'Sujata Raman'
__author__ = 'Kovid Goyal'
description = 'French news. Needs a subscription from http://www.letemps.ch'
no_stylesheets = True
remove_javascript = True
@ -27,6 +27,7 @@ class LeTemps(BasicNewsRecipe):
def get_browser(self):
br = BasicNewsRecipe.get_browser(self)
br.open('http://www.letemps.ch/login')
br.select_form(nr=1)
br['username'] = self.username
br['password'] = self.password
raw = br.submit().read()

View File

@ -1,5 +1,5 @@
__license__ = 'GPL v3'
__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
__copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
'''
mondediplo.com
'''
@ -11,7 +11,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
class LeMondeDiplomatiqueEn(BasicNewsRecipe):
title = 'Le Monde diplomatique - English edition'
__author__ = 'Darko Miletic'
description = 'Real journalism making sense of the world around us'
description = "Le Monde diplomatique is the place you go when you want to know what's really happening. This is a major international paper that is truly independent, that sees the world in fresh ways, that focuses on places no other publications reach. We offer a clear, considered view of the conflicting interests and complexities of a modern global world. LMD in English is a concise version of the Paris-based parent edition, publishing all the major stories each month, expertly translated, and with some London-based commissions too. We offer a taster of LMD quality on our website where a selection of articles are available each month."
publisher = 'Le Monde diplomatique'
category = 'news, politics, world'
no_stylesheets = True
@ -26,7 +26,13 @@ class LeMondeDiplomatiqueEn(BasicNewsRecipe):
INDEX = PREFIX + strftime('%Y/%m/')
use_embedded_content = False
language = 'en'
extra_css = ' body{font-family: "Luxi sans","Lucida sans","Lucida Grande",Lucida,"Lucida Sans Unicode",sans-serif} .surtitre{font-size: 1.2em; font-variant: small-caps; margin-bottom: 0.5em} .chapo{font-size: 1.2em; font-weight: bold; margin: 1em 0 0.5em} .texte{font-family: Georgia,"Times New Roman",serif} h1{color: #990000} .notes{border-top: 1px solid #CCCCCC; font-size: 0.9em; line-height: 1.4em} '
extra_css = """
body{font-family: "Luxi sans","Lucida sans","Lucida Grande",Lucida,"Lucida Sans Unicode",sans-serif}
.surtitre{font-size: 1.2em; font-variant: small-caps; margin-bottom: 0.5em}
.chapo{font-size: 1.2em; font-weight: bold; margin: 1em 0 0.5em}
.texte{font-family: Georgia,"Times New Roman",serif} h1{color: #990000}
.notes{border-top: 1px solid #CCCCCC; font-size: 0.9em; line-height: 1.4em}
"""
conversion_options = {
'comment' : description
@ -51,7 +57,7 @@ class LeMondeDiplomatiqueEn(BasicNewsRecipe):
, dict(name='div',attrs={'class':'notes surlignable'})
]
remove_tags = [dict(name=['object','link','script','iframe','base'])]
remove_attributes = ['height','width']
remove_attributes = ['height','width','name','lang']
def parse_index(self):
articles = []
@ -75,3 +81,24 @@ class LeMondeDiplomatiqueEn(BasicNewsRecipe):
})
return [(self.title, articles)]
def get_cover_url(self):
cover_url = None
soup = self.index_to_soup(self.INDEX)
cover_item = soup.find('div',attrs={'class':'current'})
if cover_item:
ap = cover_item.find('img',attrs={'class':'spip_logos'})
if ap:
cover_url = self.INDEX + ap['src']
return cover_url
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll('a'):
if item.string is not None:
str = item.string
item.replaceWith(str)
else:
str = self.tag_to_string(item)
item.replaceWith(str)
return soup

View File

@ -1,239 +1,28 @@
#!/usr/bin/env python
from calibre.web.feeds.news import BasicNewsRecipe
__license__ = 'GPL v3'
'''
macleans.ca
'''
from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag
from datetime import timedelta, date
class Macleans(BasicNewsRecipe):
class AdvancedUserRecipe1308306308(BasicNewsRecipe):
title = u'Macleans Magazine'
__author__ = 'Nick Redding'
language = 'en_CA'
description = ('Macleans Magazine')
__author__ = 'sexymax15'
oldest_article = 30
max_articles_per_feed = 12
use_embedded_content = False
remove_empty_feeds = True
no_stylesheets = True
timefmt = ' [%b %d]'
remove_javascript = True
remove_tags = [dict(name ='img'),dict (id='header'),{'class':'postmetadata'}]
remove_tags_after = {'class':'postmetadata'}
# customization notes: delete sections you are not interested in
# set oldest_article to the maximum number of days back from today to include articles
sectionlist = [
['http://www2.macleans.ca/','Front Page'],
['http://www2.macleans.ca/category/canada/','Canada'],
['http://www2.macleans.ca/category/world-from-the-magazine/','World'],
['http://www2.macleans.ca/category/business','Business'],
['http://www2.macleans.ca/category/arts-culture/','Culture'],
['http://www2.macleans.ca/category/opinion','Opinion'],
['http://www2.macleans.ca/category/health-from-the-magazine/','Health'],
['http://www2.macleans.ca/category/environment-from-the-magazine/','Environment'],
['http://www2.macleans.ca/category/education/','On Campus'],
['http://www2.macleans.ca/category/travel-from-the-magazine/','Travel']
]
oldest_article = 7
# formatting for print version of articles
extra_css = '''h2{font-family:Times,serif; font-size:large;}
small {font-family:Times,serif; font-size:xx-small; list-style-type: none;}
'''
# tag handling for print version of articles
keep_only_tags = [dict(id='tw-print')]
remove_tags = [dict({'class':'postmetadata'})]
def preprocess_html(self,soup):
for img_tag in soup.findAll('img'):
parent_tag = img_tag.parent
if parent_tag.name == 'a':
new_tag = Tag(soup,'p')
new_tag.insert(0,img_tag)
parent_tag.replaceWith(new_tag)
elif parent_tag.name == 'p':
if not self.tag_to_string(parent_tag) == '':
new_div = Tag(soup,'div')
new_tag = Tag(soup,'p')
new_tag.insert(0,img_tag)
parent_tag.replaceWith(new_div)
new_div.insert(0,new_tag)
new_div.insert(1,parent_tag)
return soup
def parse_index(self):
articles = {}
key = None
ans = []
def parse_index_page(page_url,page_title):
def decode_date(datestr):
dmysplit = datestr.strip().lower().split(',')
mdsplit = dmysplit[1].split()
m = ['january','february','march','april','may','june','july','august','september','october','november','december'].index(mdsplit[0])+1
d = int(mdsplit[1])
y = int(dmysplit[2].split()[0])
return date(y,m,d)
def article_title(tag):
atag = tag.find('a',href=True)
if not atag:
return ''
return self.tag_to_string(atag)
def article_url(tag):
atag = tag.find('a',href=True)
if not atag:
return ''
return atag['href']+'print/'
def article_description(tag):
for p_tag in tag.findAll('p'):
d = self.tag_to_string(p_tag,False)
if not d == '':
return d
return ''
def compound_h4_h3_title(tag):
if tag.h4:
if tag.h3:
return self.tag_to_string(tag.h4,False)+u'\u2014'+self.tag_to_string(tag.h3,False)
else:
return self.tag_to_string(tag.h4,False)
elif tag.h3:
return self.tag_to_string(tag.h3,False)
else:
return ''
def compound_h2_h4_title(tag):
if tag.h2:
if tag.h4:
return self.tag_to_string(tag.h2,False)+u'\u2014'+self.tag_to_string(tag.h4,False)
else:
return self.tag_to_string(tag.h2,False)
elif tag.h4:
return self.tag_to_string(tag.h4,False)
else:
return ''
def handle_article(header_tag, outer_tag):
if header_tag:
url = article_url(header_tag)
title = article_title(header_tag)
author_date_tag = outer_tag.h4
if author_date_tag:
author_date = self.tag_to_string(author_date_tag,False).split(' - ')
author = author_date[0].strip()
article_date = decode_date(author_date[1])
earliest_date = date.today() - timedelta(days=self.oldest_article)
if article_date < earliest_date:
self.log("Skipping article dated %s" % author_date[1])
else:
excerpt_div = outer_tag.find('div','excerpt')
if excerpt_div:
description = article_description(excerpt_div)
else:
description = ''
if not articles.has_key(page_title):
articles[page_title] = []
articles[page_title].append(dict(title=title,url=url,date=author_date[1],description=description,author=author,content=''))
def handle_category_article(cat, header_tag, outer_tag):
url = article_url(header_tag)
title = article_title(header_tag)
if not title == '':
title = cat+u'\u2014'+title
a_tag = outer_tag.find('span','authorLink')
if a_tag:
author = self.tag_to_string(a_tag,False)
a_tag.parent.extract()
else:
author = ''
description = article_description(outer_tag)
if not articles.has_key(page_title):
articles[page_title] = []
articles[page_title].append(dict(title=title,url=url,date='',description=description,author=author,content=''))
soup = self.index_to_soup(page_url)
if page_title == 'Front Page':
# special processing for the front page
top_stories = soup.find('div',{ "id" : "macleansFeatured" })
if top_stories:
for div_slide in top_stories.findAll('div','slide'):
url = article_url(div_slide)
div_title = div_slide.find('div','header')
if div_title:
title = self.tag_to_string(div_title,False)
else:
title = ''
description = article_description(div_slide)
if not articles.has_key(page_title):
articles[page_title] = []
articles[page_title].append(dict(title=title,url=url,date='',description=description,author='',content=''))
from_macleans = soup.find('div',{ "id" : "fromMacleans" })
if from_macleans:
for li_tag in from_macleans.findAll('li','fromMacleansArticle'):
title = compound_h4_h3_title(li_tag)
url = article_url(li_tag)
description = article_description(li_tag)
if not articles.has_key(page_title):
articles[page_title] = []
articles[page_title].append(dict(title=title,url=url,date='',description=description,author='',content=''))
blog_central = soup.find('div',{ "id" : "bloglist" })
if blog_central:
for li_tag in blog_central.findAll('li'):
title = compound_h2_h4_title(li_tag)
if li_tag.h4:
url = article_url(li_tag.h4)
if not articles.has_key(page_title):
articles[page_title] = []
articles[page_title].append(dict(title=title,url=url,date='',description='',author='',content=''))
# need_to_know = soup.find('div',{ "id" : "needToKnow" })
# if need_to_know:
# for div_tag in need_to_know('div',attrs={'class' : re.compile("^needToKnowArticle")}):
# title = compound_h4_h3_title(div_tag)
# url = article_url(div_tag)
# description = article_description(div_tag)
# if not articles.has_key(page_title):
# articles[page_title] = []
# articles[page_title].append(dict(title=title,url=url,date='',description=description,author='',content=''))
for news_category in soup.findAll('div','newsCategory'):
news_cat = self.tag_to_string(news_category.h4,False)
handle_category_article(news_cat, news_category.find('h2'), news_category.find('div'))
for news_item in news_category.findAll('li'):
handle_category_article(news_cat,news_item.h3,news_item)
return
# find the div containing the highlight article
div_post = soup.find('div','post')
if div_post:
h1_tag = div_post.h1
handle_article(h1_tag,div_post)
# find the divs containing the rest of the articles
div_other = div_post.find('div', { "id" : "categoryOtherPosts" })
if div_other:
for div_entry in div_other.findAll('div','entry'):
h2_tag = div_entry.h2
handle_article(h2_tag,div_entry)
for page_name,page_title in self.sectionlist:
parse_index_page(page_name,page_title)
ans.append(page_title)
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
return ans
feeds = [(u'Blog Central', u'http://www2.macleans.ca/category/blog-central/feed/'),
(u'Canada', u'http://www2.macleans.ca/category/canada/feed/'),
(u'World', u'http://www2.macleans.ca/category/world-from-the-magazine/feed/'),
(u'Business', u'http://www2.macleans.ca/category/business/feed/'),
(u'Arts & Culture', u'http://www2.macleans.ca/category/arts-culture/feed/'),
(u'Opinion', u'http://www2.macleans.ca/category/opinion/feed/'),
(u'Health', u'http://www2.macleans.ca/category/health-from-the-magazine/feed/'),
(u'Environment', u'http://www2.macleans.ca/category/environment-from-the-magazine/feed/')]
def print_version(self, url):
return url + 'print/'

View File

@ -3,7 +3,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1295081935(BasicNewsRecipe):
title = u'Mail & Guardian ZA News'
__author__ = '77ja65'
language = 'en'
language = 'en_ZA'
oldest_article = 7
max_articles_per_feed = 30
no_stylesheets = True

View File

@ -0,0 +1,45 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1306097511(BasicNewsRecipe):
title = u'Metro Nieuws NL'
oldest_article = 2
max_articles_per_feed = 100
__author__ = u'DrMerry'
description = u'Metro Nederland'
language = u'nl'
simultaneous_downloads = 5
delay = 1
# timefmt = ' [%A, %d %B, %Y]'
timefmt = ''
no_stylesheets = True
remove_javascript = True
remove_empty_feeds = True
cover_url = 'http://www.readmetro.com/img/en/metroholland/last/1/small.jpg'
remove_empty_feeds = True
publication_type = 'newspaper'
remove_tags_before = dict(name='div', attrs={'id':'date'})
remove_tags_after = dict(name='div', attrs={'id':'column-1-3'})
encoding = 'utf-8'
extra_css = '#date {font-size: 10px} .article-image-caption {font-size: 8px}'
remove_tags = [dict(name='div', attrs={'class':[ 'metroCommentFormWrap',
'commentForm', 'metroCommentInnerWrap', 'article-slideshow-counter-container', 'article-slideshow-control', 'ad', 'header-links',
'art-rgt','pluck-app pluck-comm', 'share-and-byline', 'article-tools-below-title', 'col-179 ', 'related-links', 'clear padding-top-15', 'share-tools', 'article-page-auto-pushes', 'footer-edit']}),
dict(name='div', attrs={'id':['article-2', 'article-4', 'article-1', 'navigation', 'footer', 'header', 'comments', 'sidebar']}),
dict(name='iframe')]
feeds = [
(u'Binnenland', u'http://www.metronieuws.nl/rss.xml?c=1277377288-3'),
(u'Economie', u'http://www.metronieuws.nl/rss.xml?c=1278070988-0'),
(u'Den Haag', u'http://www.metronieuws.nl/rss.xml?c=1289013337-3'),
(u'Rotterdam', u'http://www.metronieuws.nl/rss.xml?c=1289013337-2'),
(u'Amsterdam', u'http://www.metronieuws.nl/rss.xml?c=1289013337-1'),
(u'Columns', u'http://www.metronieuws.nl/rss.xml?c=1277377288-17'),
(u'Entertainment', u'http://www.metronieuws.nl/rss.xml?c=1277377288-2'),
(u'Dot', u'http://www.metronieuws.nl/rss.xml?c=1283166782-12'),
(u'Familie', u'http://www.metronieuws.nl/rss.xml?c=1283166782-9'),
(u'Blogs', u'http://www.metronieuws.nl/rss.xml?c=1295586825-6'),
(u'Reizen', u'http://www.metronieuws.nl/rss.xml?c=1277377288-13'),
(u'Carrière', u'http://www.metronieuws.nl/rss.xml?c=1278070988-1'),
(u'Sport', u'http://www.metronieuws.nl/rss.xml?c=1277377288-12')
]

View File

@ -1,29 +1,34 @@
import re
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1306097511(BasicNewsRecipe):
title = u'Metro UK'
no_stylesheets = True
oldest_article = 1
max_articles_per_feed = 200
description = 'News as provide by The Metro -UK'
__author__ = 'Dave Asbury'
no_stylesheets = True
oldest_article = 1
max_articles_per_feed = 25
remove_empty_feeds = True
remove_javascript = True
preprocess_regexps = [(re.compile(r'Tweet'), lambda a : '')]
language = 'en_GB'
simultaneous_downloads= 3
masthead_url = 'http://e-edition.metro.co.uk/images/metro_logo.gif'
extra_css = 'h2 {font: sans-serif medium;}'
keep_only_tags = [
dict(name='h1'),dict(name='h2', attrs={'class':'h2'}),
dict(attrs={'class':['img-cnt figure']}),
dict(attrs={'class':['art-img']}),
dict(name='h1'),
dict(name='h2', attrs={'class':'h2'}),
dict(name='div', attrs={'class':'art-lft'})
]
remove_tags = [dict(name='div', attrs={'class':[ 'metroCommentFormWrap',
'commentForm', 'metroCommentInnerWrap',
'art-rgt','pluck-app pluck-comm','news m12 clrd clr-l p5t', 'flt-r' ]})]
remove_tags = [dict(name='div', attrs={'class':[ 'news m12 clrd clr-b p5t shareBtm', 'commentForm', 'metroCommentInnerWrap',
'art-rgt','pluck-app pluck-comm','news m12 clrd clr-l p5t', 'flt-r' ]}),
dict(attrs={'class':[ 'metroCommentFormWrap','commentText','commentsNav','avatar','submDateAndTime']})
]
feeds = [
(u'News', u'http://www.metro.co.uk/rss/news/'), (u'Money', u'http://www.metro.co.uk/rss/money/'), (u'Sport', u'http://www.metro.co.uk/rss/sport/'), (u'Film', u'http://www.metro.co.uk/rss/metrolife/film/'), (u'Music', u'http://www.metro.co.uk/rss/metrolife/music/'), (u'TV', u'http://www.metro.co.uk/rss/tv/'), (u'Showbiz', u'http://www.metro.co.uk/rss/showbiz/'), (u'Weird News', u'http://www.metro.co.uk/rss/weird/'), (u'Travel', u'http://www.metro.co.uk/rss/travel/'), (u'Lifestyle', u'http://www.metro.co.uk/rss/lifestyle/'), (u'Books', u'http://www.metro.co.uk/rss/lifestyle/books/'), (u'Food', u'http://www.metro.co.uk/rss/lifestyle/restaurants/')]

View File

@ -1,17 +1,23 @@
# -*- coding: utf-8 -*-
__license__ = 'GPL v3'
__copyright__ = '2010-2011, Eddie Lau'
# Region - Hong Kong, Vancouver, Toronto
__Region__ = 'Hong Kong'
# Users of Kindle 3 with limited system-level CJK support
# please replace the following "True" with "False".
__MakePeriodical__ = True
# Turn below to true if your device supports display of CJK titles
__UseChineseTitle__ = False
# Trun below to true if you wish to use life.mingpao.com as the main article source
# Set it to False if you want to skip images
__KeepImages__ = True
# (HK only) Turn below to true if you wish to use life.mingpao.com as the main article source
__UseLife__ = True
'''
Change Log:
2011/06/26: add fetching Vancouver and Toronto versions of the paper, also provide captions for images using life.mingpao fetch source
provide options to remove all images in the file
2011/05/12: switch the main parse source to life.mingpao.com, which has more photos on the article pages
2011/03/06: add new articles for finance section, also a new section "Columns"
2011/02/28: rearrange the sections
@ -34,29 +40,17 @@ Change Log:
import os, datetime, re
from calibre.web.feeds.recipes import BasicNewsRecipe
from contextlib import nested
from calibre.ebooks.BeautifulSoup import BeautifulSoup
from calibre.ebooks.metadata.opf2 import OPFCreator
from calibre.ebooks.metadata.toc import TOC
from calibre.ebooks.metadata import MetaInformation
class MPHKRecipe(BasicNewsRecipe):
# MAIN CLASS
class MPRecipe(BasicNewsRecipe):
if __Region__ == 'Hong Kong':
title = 'Ming Pao - Hong Kong'
oldest_article = 1
max_articles_per_feed = 100
__author__ = 'Eddie Lau'
description = 'Hong Kong Chinese Newspaper (http://news.mingpao.com)'
publisher = 'MingPao'
category = 'Chinese, News, Hong Kong'
remove_javascript = True
use_embedded_content = False
no_stylesheets = True
language = 'zh'
encoding = 'Big5-HKSCS'
recursions = 0
conversion_options = {'linearize_tables':True}
timefmt = ''
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} font>b {font-size:200%; font-weight:bold;}'
masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif'
keep_only_tags = [dict(name='h1'),
@ -65,11 +59,22 @@ class MPHKRecipe(BasicNewsRecipe):
dict(attrs={'id':['newscontent']}), # entertainment and column page content
dict(attrs={'id':['newscontent01','newscontent02']}),
dict(attrs={'class':['photo']}),
dict(name='table', attrs={'width':['100%'], 'border':['0'], 'cellspacing':['5'], 'cellpadding':['0']}), # content in printed version of life.mingpao.com
dict(name='img', attrs={'width':['180'], 'alt':['按圖放大']}) # images for source from life.mingpao.com
]
if __KeepImages__:
remove_tags = [dict(name='style'),
dict(attrs={'id':['newscontent135']}), # for the finance page from mpfinance.com
dict(name='table')] # for content fetched from life.mingpao.com
dict(name='font', attrs={'size':['2'], 'color':['666666']}), # article date in life.mingpao.com article
#dict(name='table') # for content fetched from life.mingpao.com
]
else:
remove_tags = [dict(name='style'),
dict(attrs={'id':['newscontent135']}), # for the finance page from mpfinance.com
dict(name='font', attrs={'size':['2'], 'color':['666666']}), # article date in life.mingpao.com article
dict(name='img'),
#dict(name='table') # for content fetched from life.mingpao.com
]
remove_attributes = ['width']
preprocess_regexps = [
(re.compile(r'<h5>', re.DOTALL|re.IGNORECASE),
@ -84,6 +89,55 @@ class MPHKRecipe(BasicNewsRecipe):
(re.compile(r"<br><br></b>", re.DOTALL|re.IGNORECASE),
lambda match: "</b>")
]
elif __Region__ == 'Vancouver':
title = 'Ming Pao - Vancouver'
description = 'Vancouver Chinese Newspaper (http://www.mingpaovan.com)'
category = 'Chinese, News, Vancouver'
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} b>font {font-size:200%; font-weight:bold;}'
masthead_url = 'http://www.mingpaovan.com/image/mainlogo2_VAN2.gif'
keep_only_tags = [dict(name='table', attrs={'width':['450'], 'border':['0'], 'cellspacing':['0'], 'cellpadding':['1']}),
dict(name='table', attrs={'width':['450'], 'border':['0'], 'cellspacing':['3'], 'cellpadding':['3'], 'id':['tblContent3']}),
dict(name='table', attrs={'width':['180'], 'border':['0'], 'cellspacing':['0'], 'cellpadding':['0'], 'bgcolor':['F0F0F0']}),
]
if __KeepImages__:
remove_tags = [dict(name='img', attrs={'src':['../../../image/magnifier.gif']})] # the magnifier icon
else:
remove_tags = [dict(name='img')]
remove_attributes = ['width']
preprocess_regexps = [(re.compile(r'&nbsp;', re.DOTALL|re.IGNORECASE),
lambda match: ''),
]
elif __Region__ == 'Toronto':
title = 'Ming Pao - Toronto'
description = 'Toronto Chinese Newspaper (http://www.mingpaotor.com)'
category = 'Chinese, News, Toronto'
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} b>font {font-size:200%; font-weight:bold;}'
masthead_url = 'http://www.mingpaotor.com/image/mainlogo2_TOR2.gif'
keep_only_tags = [dict(name='table', attrs={'width':['450'], 'border':['0'], 'cellspacing':['0'], 'cellpadding':['1']}),
dict(name='table', attrs={'width':['450'], 'border':['0'], 'cellspacing':['3'], 'cellpadding':['3'], 'id':['tblContent3']}),
dict(name='table', attrs={'width':['180'], 'border':['0'], 'cellspacing':['0'], 'cellpadding':['0'], 'bgcolor':['F0F0F0']}),
]
if __KeepImages__:
remove_tags = [dict(name='img', attrs={'src':['../../../image/magnifier.gif']})] # the magnifier icon
else:
remove_tags = [dict(name='img')]
remove_attributes = ['width']
preprocess_regexps = [(re.compile(r'&nbsp;', re.DOTALL|re.IGNORECASE),
lambda match: ''),
]
oldest_article = 1
max_articles_per_feed = 100
__author__ = 'Eddie Lau'
publisher = 'MingPao'
remove_javascript = True
use_embedded_content = False
no_stylesheets = True
language = 'zh'
encoding = 'Big5-HKSCS'
recursions = 0
conversion_options = {'linearize_tables':True}
timefmt = ''
def image_url_processor(cls, baseurl, url):
# trick: break the url at the first occurance of digit, add an additional
@ -124,8 +178,18 @@ class MPHKRecipe(BasicNewsRecipe):
def get_dtlocal(self):
dt_utc = datetime.datetime.utcnow()
# convert UTC to local hk time - at around HKT 6.00am, all news are available
dt_local = dt_utc - datetime.timedelta(-2.0/24)
if __Region__ == 'Hong Kong':
# convert UTC to local hk time - at HKT 5.30am, all news are available
dt_local = dt_utc + datetime.timedelta(8.0/24) - datetime.timedelta(5.5/24)
# dt_local = dt_utc.astimezone(pytz.timezone('Asia/Hong_Kong')) - datetime.timedelta(5.5/24)
elif __Region__ == 'Vancouver':
# convert UTC to local Vancouver time - at PST time 5.30am, all news are available
dt_local = dt_utc + datetime.timedelta(-8.0/24) - datetime.timedelta(5.5/24)
#dt_local = dt_utc.astimezone(pytz.timezone('America/Vancouver')) - datetime.timedelta(5.5/24)
elif __Region__ == 'Toronto':
# convert UTC to local Toronto time - at EST time 8.30am, all news are available
dt_local = dt_utc + datetime.timedelta(-5.0/24) - datetime.timedelta(8.5/24)
#dt_local = dt_utc.astimezone(pytz.timezone('America/Toronto')) - datetime.timedelta(8.5/24)
return dt_local
def get_fetchdate(self):
@ -135,13 +199,15 @@ class MPHKRecipe(BasicNewsRecipe):
return self.get_dtlocal().strftime("%Y-%m-%d")
def get_fetchday(self):
# dt_utc = datetime.datetime.utcnow()
# convert UTC to local hk time - at around HKT 6.00am, all news are available
# dt_local = dt_utc - datetime.timedelta(-2.0/24)
return self.get_dtlocal().strftime("%d")
def get_cover_url(self):
if __Region__ == 'Hong Kong':
cover = 'http://news.mingpao.com/' + self.get_fetchdate() + '/' + self.get_fetchdate() + '_' + self.get_fetchday() + 'gacov.jpg'
elif __Region__ == 'Vancouver':
cover = 'http://www.mingpaovan.com/ftp/News/' + self.get_fetchdate() + '/' + self.get_fetchday() + 'pgva1s.jpg'
elif __Region__ == 'Toronto':
cover = 'http://www.mingpaotor.com/ftp/News/' + self.get_fetchdate() + '/' + self.get_fetchday() + 'pgtas.jpg'
br = BasicNewsRecipe.get_browser()
try:
br.open(cover)
@ -153,6 +219,7 @@ class MPHKRecipe(BasicNewsRecipe):
feeds = []
dateStr = self.get_fetchdate()
if __Region__ == 'Hong Kong':
if __UseLife__:
for title, url, keystr in [(u'\u8981\u805e Headline', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalga', 'nal'),
(u'\u6e2f\u805e Local', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalgb', 'nal'),
@ -222,7 +289,34 @@ class MPHKRecipe(BasicNewsRecipe):
col_articles = self.parse_col_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn')
if col_articles:
feeds.append((u'\u5c08\u6b04 Columns', col_articles))
elif __Region__ == 'Vancouver':
for title, url in [(u'\u8981\u805e Headline', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VAindex.htm'),
(u'\u52a0\u570b Canada', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VBindex.htm'),
(u'\u793e\u5340 Local', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VDindex.htm'),
(u'\u6e2f\u805e Hong Kong', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/HK-VGindex.htm'),
(u'\u570b\u969b World', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VTindex.htm'),
(u'\u4e2d\u570b China', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VCindex.htm'),
(u'\u7d93\u6fdf Economics', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VEindex.htm'),
(u'\u9ad4\u80b2 Sports', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VSindex.htm'),
(u'\u5f71\u8996 Film/TV', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/HK-MAindex.htm'),
(u'\u526f\u520a Supplements', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/WWindex.htm'),]:
articles = self.parse_section3(url, 'http://www.mingpaovan.com/')
if articles:
feeds.append((title, articles))
elif __Region__ == 'Toronto':
for title, url in [(u'\u8981\u805e Headline', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TAindex.htm'),
(u'\u52a0\u570b Canada', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TDindex.htm'),
(u'\u793e\u5340 Local', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TFindex.htm'),
(u'\u4e2d\u570b China', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TCAindex.htm'),
(u'\u570b\u969b World', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TTAindex.htm'),
(u'\u6e2f\u805e Hong Kong', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/HK-GAindex.htm'),
(u'\u7d93\u6fdf Economics', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/THindex.htm'),
(u'\u9ad4\u80b2 Sports', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TSindex.htm'),
(u'\u5f71\u8996 Film/TV', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/HK-MAindex.htm'),
(u'\u526f\u520a Supplements', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/WWindex.htm'),]:
articles = self.parse_section3(url, 'http://www.mingpaotor.com/')
if articles:
feeds.append((title, articles))
return feeds
# parse from news.mingpao.com
@ -256,11 +350,30 @@ class MPHKRecipe(BasicNewsRecipe):
title = self.tag_to_string(i)
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind(keystr) == -1):
url = url.replace('dailynews3.cfm', 'dailynews3a.cfm') # use printed version of the article
current_articles.append({'title': title, 'url': url, 'description': ''})
included_urls.append(url)
current_articles.reverse()
return current_articles
# parse from www.mingpaovan.com
def parse_section3(self, url, baseUrl):
self.get_fetchdate()
soup = self.index_to_soup(url)
divs = soup.findAll(attrs={'class': ['ListContentLargeLink']})
current_articles = []
included_urls = []
divs.reverse()
for i in divs:
title = self.tag_to_string(i)
urlstr = i.get('href', False)
urlstr = baseUrl + '/' + urlstr.replace('../../../', '')
if urlstr not in included_urls:
current_articles.append({'title': title, 'url': urlstr, 'description': '', 'date': ''})
included_urls.append(urlstr)
current_articles.reverse()
return current_articles
def parse_ed_section(self, url):
self.get_fetchdate()
soup = self.index_to_soup(url)
@ -338,7 +451,12 @@ class MPHKRecipe(BasicNewsRecipe):
if dir is None:
dir = self.output_dir
if __UseChineseTitle__ == True:
if __Region__ == 'Hong Kong':
title = u'\u660e\u5831 (\u9999\u6e2f)'
elif __Region__ == 'Vancouver':
title = u'\u660e\u5831 (\u6eab\u54e5\u83ef)'
elif __Region__ == 'Toronto':
title = u'\u660e\u5831 (\u591a\u502b\u591a)'
else:
title = self.short_title()
# if not generating a periodical, force date to apply in title

View File

@ -0,0 +1,594 @@
__license__ = 'GPL v3'
__copyright__ = '2010-2011, Eddie Lau'
# Region - Hong Kong, Vancouver, Toronto
__Region__ = 'Toronto'
# Users of Kindle 3 with limited system-level CJK support
# please replace the following "True" with "False".
__MakePeriodical__ = True
# Turn below to true if your device supports display of CJK titles
__UseChineseTitle__ = False
# Set it to False if you want to skip images
__KeepImages__ = True
# (HK only) Turn below to true if you wish to use life.mingpao.com as the main article source
__UseLife__ = True
'''
Change Log:
2011/06/26: add fetching Vancouver and Toronto versions of the paper, also provide captions for images using life.mingpao fetch source
provide options to remove all images in the file
2011/05/12: switch the main parse source to life.mingpao.com, which has more photos on the article pages
2011/03/06: add new articles for finance section, also a new section "Columns"
2011/02/28: rearrange the sections
[Disabled until Kindle has better CJK support and can remember last (section,article) read in Sections & Articles
View] make it the same title if generating a periodical, so past issue will be automatically put into "Past Issues"
folder in Kindle 3
2011/02/20: skip duplicated links in finance section, put photos which may extend a whole page to the back of the articles
clean up the indentation
2010/12/07: add entertainment section, use newspaper front page as ebook cover, suppress date display in section list
(to avoid wrong date display in case the user generates the ebook in a time zone different from HKT)
2010/11/22: add English section, remove eco-news section which is not updated daily, correct
ordering of articles
2010/11/12: add news image and eco-news section
2010/11/08: add parsing of finance section
2010/11/06: temporary work-around for Kindle device having no capability to display unicode
in section/article list.
2010/10/31: skip repeated articles in section pages
'''
import os, datetime, re
from calibre.web.feeds.recipes import BasicNewsRecipe
from contextlib import nested
from calibre.ebooks.BeautifulSoup import BeautifulSoup
from calibre.ebooks.metadata.opf2 import OPFCreator
from calibre.ebooks.metadata.toc import TOC
from calibre.ebooks.metadata import MetaInformation
# MAIN CLASS
class MPRecipe(BasicNewsRecipe):
if __Region__ == 'Hong Kong':
title = 'Ming Pao - Hong Kong'
description = 'Hong Kong Chinese Newspaper (http://news.mingpao.com)'
category = 'Chinese, News, Hong Kong'
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} font>b {font-size:200%; font-weight:bold;}'
masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif'
keep_only_tags = [dict(name='h1'),
dict(name='font', attrs={'style':['font-size:14pt; line-height:160%;']}), # for entertainment page title
dict(name='font', attrs={'color':['AA0000']}), # for column articles title
dict(attrs={'id':['newscontent']}), # entertainment and column page content
dict(attrs={'id':['newscontent01','newscontent02']}),
dict(attrs={'class':['photo']}),
dict(name='table', attrs={'width':['100%'], 'border':['0'], 'cellspacing':['5'], 'cellpadding':['0']}), # content in printed version of life.mingpao.com
dict(name='img', attrs={'width':['180'], 'alt':['按圖放大']}) # images for source from life.mingpao.com
]
if __KeepImages__:
remove_tags = [dict(name='style'),
dict(attrs={'id':['newscontent135']}), # for the finance page from mpfinance.com
dict(name='font', attrs={'size':['2'], 'color':['666666']}), # article date in life.mingpao.com article
#dict(name='table') # for content fetched from life.mingpao.com
]
else:
remove_tags = [dict(name='style'),
dict(attrs={'id':['newscontent135']}), # for the finance page from mpfinance.com
dict(name='font', attrs={'size':['2'], 'color':['666666']}), # article date in life.mingpao.com article
dict(name='img'),
#dict(name='table') # for content fetched from life.mingpao.com
]
remove_attributes = ['width']
preprocess_regexps = [
(re.compile(r'<h5>', re.DOTALL|re.IGNORECASE),
lambda match: '<h1>'),
(re.compile(r'</h5>', re.DOTALL|re.IGNORECASE),
lambda match: '</h1>'),
(re.compile(r'<p><a href=.+?</a></p>', re.DOTALL|re.IGNORECASE), # for entertainment page
lambda match: ''),
# skip <br> after title in life.mingpao.com fetched article
(re.compile(r"<div id='newscontent'><br>", re.DOTALL|re.IGNORECASE),
lambda match: "<div id='newscontent'>"),
(re.compile(r"<br><br></b>", re.DOTALL|re.IGNORECASE),
lambda match: "</b>")
]
elif __Region__ == 'Vancouver':
title = 'Ming Pao - Vancouver'
description = 'Vancouver Chinese Newspaper (http://www.mingpaovan.com)'
category = 'Chinese, News, Vancouver'
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} b>font {font-size:200%; font-weight:bold;}'
masthead_url = 'http://www.mingpaovan.com/image/mainlogo2_VAN2.gif'
keep_only_tags = [dict(name='table', attrs={'width':['450'], 'border':['0'], 'cellspacing':['0'], 'cellpadding':['1']}),
dict(name='table', attrs={'width':['450'], 'border':['0'], 'cellspacing':['3'], 'cellpadding':['3'], 'id':['tblContent3']}),
dict(name='table', attrs={'width':['180'], 'border':['0'], 'cellspacing':['0'], 'cellpadding':['0'], 'bgcolor':['F0F0F0']}),
]
if __KeepImages__:
remove_tags = [dict(name='img', attrs={'src':['../../../image/magnifier.gif']})] # the magnifier icon
else:
remove_tags = [dict(name='img')]
remove_attributes = ['width']
preprocess_regexps = [(re.compile(r'&nbsp;', re.DOTALL|re.IGNORECASE),
lambda match: ''),
]
elif __Region__ == 'Toronto':
title = 'Ming Pao - Toronto'
description = 'Toronto Chinese Newspaper (http://www.mingpaotor.com)'
category = 'Chinese, News, Toronto'
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} b>font {font-size:200%; font-weight:bold;}'
masthead_url = 'http://www.mingpaotor.com/image/mainlogo2_TOR2.gif'
keep_only_tags = [dict(name='table', attrs={'width':['450'], 'border':['0'], 'cellspacing':['0'], 'cellpadding':['1']}),
dict(name='table', attrs={'width':['450'], 'border':['0'], 'cellspacing':['3'], 'cellpadding':['3'], 'id':['tblContent3']}),
dict(name='table', attrs={'width':['180'], 'border':['0'], 'cellspacing':['0'], 'cellpadding':['0'], 'bgcolor':['F0F0F0']}),
]
if __KeepImages__:
remove_tags = [dict(name='img', attrs={'src':['../../../image/magnifier.gif']})] # the magnifier icon
else:
remove_tags = [dict(name='img')]
remove_attributes = ['width']
preprocess_regexps = [(re.compile(r'&nbsp;', re.DOTALL|re.IGNORECASE),
lambda match: ''),
]
oldest_article = 1
max_articles_per_feed = 100
__author__ = 'Eddie Lau'
publisher = 'MingPao'
remove_javascript = True
use_embedded_content = False
no_stylesheets = True
language = 'zh'
encoding = 'Big5-HKSCS'
recursions = 0
conversion_options = {'linearize_tables':True}
timefmt = ''
def image_url_processor(cls, baseurl, url):
# trick: break the url at the first occurance of digit, add an additional
# '_' at the front
# not working, may need to move this to preprocess_html() method
# minIdx = 10000
# i0 = url.find('0')
# if i0 >= 0 and i0 < minIdx:
# minIdx = i0
# i1 = url.find('1')
# if i1 >= 0 and i1 < minIdx:
# minIdx = i1
# i2 = url.find('2')
# if i2 >= 0 and i2 < minIdx:
# minIdx = i2
# i3 = url.find('3')
# if i3 >= 0 and i0 < minIdx:
# minIdx = i3
# i4 = url.find('4')
# if i4 >= 0 and i4 < minIdx:
# minIdx = i4
# i5 = url.find('5')
# if i5 >= 0 and i5 < minIdx:
# minIdx = i5
# i6 = url.find('6')
# if i6 >= 0 and i6 < minIdx:
# minIdx = i6
# i7 = url.find('7')
# if i7 >= 0 and i7 < minIdx:
# minIdx = i7
# i8 = url.find('8')
# if i8 >= 0 and i8 < minIdx:
# minIdx = i8
# i9 = url.find('9')
# if i9 >= 0 and i9 < minIdx:
# minIdx = i9
return url
def get_dtlocal(self):
dt_utc = datetime.datetime.utcnow()
if __Region__ == 'Hong Kong':
# convert UTC to local hk time - at HKT 5.30am, all news are available
dt_local = dt_utc + datetime.timedelta(8.0/24) - datetime.timedelta(5.5/24)
# dt_local = dt_utc.astimezone(pytz.timezone('Asia/Hong_Kong')) - datetime.timedelta(5.5/24)
elif __Region__ == 'Vancouver':
# convert UTC to local Vancouver time - at PST time 5.30am, all news are available
dt_local = dt_utc + datetime.timedelta(-8.0/24) - datetime.timedelta(5.5/24)
#dt_local = dt_utc.astimezone(pytz.timezone('America/Vancouver')) - datetime.timedelta(5.5/24)
elif __Region__ == 'Toronto':
# convert UTC to local Toronto time - at EST time 8.30am, all news are available
dt_local = dt_utc + datetime.timedelta(-5.0/24) - datetime.timedelta(8.5/24)
#dt_local = dt_utc.astimezone(pytz.timezone('America/Toronto')) - datetime.timedelta(8.5/24)
return dt_local
def get_fetchdate(self):
return self.get_dtlocal().strftime("%Y%m%d")
def get_fetchformatteddate(self):
return self.get_dtlocal().strftime("%Y-%m-%d")
def get_fetchday(self):
return self.get_dtlocal().strftime("%d")
def get_cover_url(self):
if __Region__ == 'Hong Kong':
cover = 'http://news.mingpao.com/' + self.get_fetchdate() + '/' + self.get_fetchdate() + '_' + self.get_fetchday() + 'gacov.jpg'
elif __Region__ == 'Vancouver':
cover = 'http://www.mingpaovan.com/ftp/News/' + self.get_fetchdate() + '/' + self.get_fetchday() + 'pgva1s.jpg'
elif __Region__ == 'Toronto':
cover = 'http://www.mingpaotor.com/ftp/News/' + self.get_fetchdate() + '/' + self.get_fetchday() + 'pgtas.jpg'
br = BasicNewsRecipe.get_browser()
try:
br.open(cover)
except:
cover = None
return cover
def parse_index(self):
feeds = []
dateStr = self.get_fetchdate()
if __Region__ == 'Hong Kong':
if __UseLife__:
for title, url, keystr in [(u'\u8981\u805e Headline', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalga', 'nal'),
(u'\u6e2f\u805e Local', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalgb', 'nal'),
(u'\u6559\u80b2 Education', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalgf', 'nal'),
(u'\u793e\u8a55/\u7b46\u9663 Editorial', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr', 'nal'),
(u'\u8ad6\u58c7 Forum', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalfa', 'nal'),
(u'\u4e2d\u570b China', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalca', 'nal'),
(u'\u570b\u969b World', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalta', 'nal'),
(u'\u7d93\u6fdf Finance', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea', 'nal'),
(u'\u9ad4\u80b2 Sport', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalsp', 'nal'),
(u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal'),
(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')]:
articles = self.parse_section2(url, keystr)
if articles:
feeds.append((title, articles))
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
articles = self.parse_section(url)
if articles:
feeds.append((title, articles))
else:
for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'),
(u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'),
(u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm')]:
articles = self.parse_section(url)
if articles:
feeds.append((title, articles))
# special- editorial
ed_articles = self.parse_ed_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr')
if ed_articles:
feeds.append((u'\u793e\u8a55/\u7b46\u9663 Editorial', ed_articles))
for title, url in [(u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'),
(u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm'),
(u'\u570b\u969b World', 'http://news.mingpao.com/' + dateStr + '/taindex.htm')]:
articles = self.parse_section(url)
if articles:
feeds.append((title, articles))
# special - finance
#fin_articles = self.parse_fin_section('http://www.mpfinance.com/htm/Finance/' + dateStr + '/News/ea,eb,ecindex.htm')
fin_articles = self.parse_fin_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea')
if fin_articles:
feeds.append((u'\u7d93\u6fdf Finance', fin_articles))
for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
(u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]:
articles = self.parse_section(url)
if articles:
feeds.append((title, articles))
# special - entertainment
ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm')
if ent_articles:
feeds.append((u'\u5f71\u8996 Film/TV', ent_articles))
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
articles = self.parse_section(url)
if articles:
feeds.append((title, articles))
# special- columns
col_articles = self.parse_col_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn')
if col_articles:
feeds.append((u'\u5c08\u6b04 Columns', col_articles))
elif __Region__ == 'Vancouver':
for title, url in [(u'\u8981\u805e Headline', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VAindex.htm'),
(u'\u52a0\u570b Canada', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VBindex.htm'),
(u'\u793e\u5340 Local', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VDindex.htm'),
(u'\u6e2f\u805e Hong Kong', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/HK-VGindex.htm'),
(u'\u570b\u969b World', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VTindex.htm'),
(u'\u4e2d\u570b China', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VCindex.htm'),
(u'\u7d93\u6fdf Economics', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VEindex.htm'),
(u'\u9ad4\u80b2 Sports', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VSindex.htm'),
(u'\u5f71\u8996 Film/TV', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/HK-MAindex.htm'),
(u'\u526f\u520a Supplements', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/WWindex.htm'),]:
articles = self.parse_section3(url, 'http://www.mingpaovan.com/')
if articles:
feeds.append((title, articles))
elif __Region__ == 'Toronto':
for title, url in [(u'\u8981\u805e Headline', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TAindex.htm'),
(u'\u52a0\u570b Canada', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TDindex.htm'),
(u'\u793e\u5340 Local', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TFindex.htm'),
(u'\u4e2d\u570b China', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TCAindex.htm'),
(u'\u570b\u969b World', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TTAindex.htm'),
(u'\u6e2f\u805e Hong Kong', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/HK-GAindex.htm'),
(u'\u7d93\u6fdf Economics', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/THindex.htm'),
(u'\u9ad4\u80b2 Sports', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TSindex.htm'),
(u'\u5f71\u8996 Film/TV', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/HK-MAindex.htm'),
(u'\u526f\u520a Supplements', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/WWindex.htm'),]:
articles = self.parse_section3(url, 'http://www.mingpaotor.com/')
if articles:
feeds.append((title, articles))
return feeds
# parse from news.mingpao.com
def parse_section(self, url):
dateStr = self.get_fetchdate()
soup = self.index_to_soup(url)
divs = soup.findAll(attrs={'class': ['bullet','bullet_grey']})
current_articles = []
included_urls = []
divs.reverse()
for i in divs:
a = i.find('a', href = True)
title = self.tag_to_string(a)
url = a.get('href', False)
url = 'http://news.mingpao.com/' + dateStr + '/' +url
if url not in included_urls and url.rfind('Redirect') == -1:
current_articles.append({'title': title, 'url': url, 'description':'', 'date':''})
included_urls.append(url)
current_articles.reverse()
return current_articles
# parse from life.mingpao.com
def parse_section2(self, url, keystr):
self.get_fetchdate()
soup = self.index_to_soup(url)
a = soup.findAll('a', href=True)
a.reverse()
current_articles = []
included_urls = []
for i in a:
title = self.tag_to_string(i)
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind(keystr) == -1):
url = url.replace('dailynews3.cfm', 'dailynews3a.cfm') # use printed version of the article
current_articles.append({'title': title, 'url': url, 'description': ''})
included_urls.append(url)
current_articles.reverse()
return current_articles
# parse from www.mingpaovan.com
def parse_section3(self, url, baseUrl):
self.get_fetchdate()
soup = self.index_to_soup(url)
divs = soup.findAll(attrs={'class': ['ListContentLargeLink']})
current_articles = []
included_urls = []
divs.reverse()
for i in divs:
title = self.tag_to_string(i)
urlstr = i.get('href', False)
urlstr = baseUrl + '/' + urlstr.replace('../../../', '')
if urlstr not in included_urls:
current_articles.append({'title': title, 'url': urlstr, 'description': '', 'date': ''})
included_urls.append(urlstr)
current_articles.reverse()
return current_articles
def parse_ed_section(self, url):
self.get_fetchdate()
soup = self.index_to_soup(url)
a = soup.findAll('a', href=True)
a.reverse()
current_articles = []
included_urls = []
for i in a:
title = self.tag_to_string(i)
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('nal') == -1):
current_articles.append({'title': title, 'url': url, 'description': ''})
included_urls.append(url)
current_articles.reverse()
return current_articles
def parse_fin_section(self, url):
self.get_fetchdate()
soup = self.index_to_soup(url)
a = soup.findAll('a', href= True)
current_articles = []
included_urls = []
for i in a:
#url = 'http://www.mpfinance.com/cfm/' + i.get('href', False)
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
#if url not in included_urls and not url.rfind(dateStr) == -1 and url.rfind('index') == -1:
if url not in included_urls and (not url.rfind('txt') == -1) and (not url.rfind('nal') == -1):
title = self.tag_to_string(i)
current_articles.append({'title': title, 'url': url, 'description':''})
included_urls.append(url)
return current_articles
def parse_ent_section(self, url):
self.get_fetchdate()
soup = self.index_to_soup(url)
a = soup.findAll('a', href=True)
a.reverse()
current_articles = []
included_urls = []
for i in a:
title = self.tag_to_string(i)
url = 'http://ol.mingpao.com/cfm/' + i.get('href', False)
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('star') == -1):
current_articles.append({'title': title, 'url': url, 'description': ''})
included_urls.append(url)
current_articles.reverse()
return current_articles
def parse_col_section(self, url):
self.get_fetchdate()
soup = self.index_to_soup(url)
a = soup.findAll('a', href=True)
a.reverse()
current_articles = []
included_urls = []
for i in a:
title = self.tag_to_string(i)
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('ncl') == -1):
current_articles.append({'title': title, 'url': url, 'description': ''})
included_urls.append(url)
current_articles.reverse()
return current_articles
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll(style=True):
del item['width']
for item in soup.findAll(stype=True):
del item['absmiddle']
return soup
def create_opf(self, feeds, dir=None):
if dir is None:
dir = self.output_dir
if __UseChineseTitle__ == True:
if __Region__ == 'Hong Kong':
title = u'\u660e\u5831 (\u9999\u6e2f)'
elif __Region__ == 'Vancouver':
title = u'\u660e\u5831 (\u6eab\u54e5\u83ef)'
elif __Region__ == 'Toronto':
title = u'\u660e\u5831 (\u591a\u502b\u591a)'
else:
title = self.short_title()
# if not generating a periodical, force date to apply in title
if __MakePeriodical__ == False:
title = title + ' ' + self.get_fetchformatteddate()
if True:
mi = MetaInformation(title, [self.publisher])
mi.publisher = self.publisher
mi.author_sort = self.publisher
if __MakePeriodical__ == True:
mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
else:
mi.publication_type = self.publication_type+':'+self.short_title()
#mi.timestamp = nowf()
mi.timestamp = self.get_dtlocal()
mi.comments = self.description
if not isinstance(mi.comments, unicode):
mi.comments = mi.comments.decode('utf-8', 'replace')
#mi.pubdate = nowf()
mi.pubdate = self.get_dtlocal()
opf_path = os.path.join(dir, 'index.opf')
ncx_path = os.path.join(dir, 'index.ncx')
opf = OPFCreator(dir, mi)
# Add mastheadImage entry to <guide> section
mp = getattr(self, 'masthead_path', None)
if mp is not None and os.access(mp, os.R_OK):
from calibre.ebooks.metadata.opf2 import Guide
ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
ref.type = 'masthead'
ref.title = 'Masthead Image'
opf.guide.append(ref)
manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
manifest.append(os.path.join(dir, 'index.html'))
manifest.append(os.path.join(dir, 'index.ncx'))
# Get cover
cpath = getattr(self, 'cover_path', None)
if cpath is None:
pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
if self.default_cover(pf):
cpath = pf.name
if cpath is not None and os.access(cpath, os.R_OK):
opf.cover = cpath
manifest.append(cpath)
# Get masthead
mpath = getattr(self, 'masthead_path', None)
if mpath is not None and os.access(mpath, os.R_OK):
manifest.append(mpath)
opf.create_manifest_from_files_in(manifest)
for mani in opf.manifest:
if mani.path.endswith('.ncx'):
mani.id = 'ncx'
if mani.path.endswith('mastheadImage.jpg'):
mani.id = 'masthead-image'
entries = ['index.html']
toc = TOC(base_path=dir)
self.play_order_counter = 0
self.play_order_map = {}
def feed_index(num, parent):
f = feeds[num]
for j, a in enumerate(f):
if getattr(a, 'downloaded', False):
adir = 'feed_%d/article_%d/'%(num, j)
auth = a.author
if not auth:
auth = None
desc = a.text_summary
if not desc:
desc = None
else:
desc = self.description_limiter(desc)
entries.append('%sindex.html'%adir)
po = self.play_order_map.get(entries[-1], None)
if po is None:
self.play_order_counter += 1
po = self.play_order_counter
parent.add_item('%sindex.html'%adir, None, a.title if a.title else _('Untitled Article'),
play_order=po, author=auth, description=desc)
last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
for sp in a.sub_pages:
prefix = os.path.commonprefix([opf_path, sp])
relp = sp[len(prefix):]
entries.append(relp.replace(os.sep, '/'))
last = sp
if os.path.exists(last):
with open(last, 'rb') as fi:
src = fi.read().decode('utf-8')
soup = BeautifulSoup(src)
body = soup.find('body')
if body is not None:
prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last))))
templ = self.navbar.generate(True, num, j, len(f),
not self.has_single_feed,
a.orig_url, self.publisher, prefix=prefix,
center=self.center_navbar)
elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div')
body.insert(len(body.contents), elem)
with open(last, 'wb') as fi:
fi.write(unicode(soup).encode('utf-8'))
if len(feeds) == 0:
raise Exception('All feeds are empty, aborting.')
if len(feeds) > 1:
for i, f in enumerate(feeds):
entries.append('feed_%d/index.html'%i)
po = self.play_order_map.get(entries[-1], None)
if po is None:
self.play_order_counter += 1
po = self.play_order_counter
auth = getattr(f, 'author', None)
if not auth:
auth = None
desc = getattr(f, 'description', None)
if not desc:
desc = None
feed_index(i, toc.add_item('feed_%d/index.html'%i, None,
f.title, play_order=po, description=desc, author=auth))
else:
entries.append('feed_%d/index.html'%0)
feed_index(0, toc)
for i, p in enumerate(entries):
entries[i] = os.path.join(dir, p.replace('/', os.sep))
opf.create_spine(entries)
opf.set_toc(toc)
with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
opf.render(opf_file, ncx_file)

View File

@ -0,0 +1,594 @@
__license__ = 'GPL v3'
__copyright__ = '2010-2011, Eddie Lau'
# Region - Hong Kong, Vancouver, Toronto
__Region__ = 'Vancouver'
# Users of Kindle 3 with limited system-level CJK support
# please replace the following "True" with "False".
__MakePeriodical__ = True
# Turn below to true if your device supports display of CJK titles
__UseChineseTitle__ = False
# Set it to False if you want to skip images
__KeepImages__ = True
# (HK only) Turn below to true if you wish to use life.mingpao.com as the main article source
__UseLife__ = True
'''
Change Log:
2011/06/26: add fetching Vancouver and Toronto versions of the paper, also provide captions for images using life.mingpao fetch source
provide options to remove all images in the file
2011/05/12: switch the main parse source to life.mingpao.com, which has more photos on the article pages
2011/03/06: add new articles for finance section, also a new section "Columns"
2011/02/28: rearrange the sections
[Disabled until Kindle has better CJK support and can remember last (section,article) read in Sections & Articles
View] make it the same title if generating a periodical, so past issue will be automatically put into "Past Issues"
folder in Kindle 3
2011/02/20: skip duplicated links in finance section, put photos which may extend a whole page to the back of the articles
clean up the indentation
2010/12/07: add entertainment section, use newspaper front page as ebook cover, suppress date display in section list
(to avoid wrong date display in case the user generates the ebook in a time zone different from HKT)
2010/11/22: add English section, remove eco-news section which is not updated daily, correct
ordering of articles
2010/11/12: add news image and eco-news section
2010/11/08: add parsing of finance section
2010/11/06: temporary work-around for Kindle device having no capability to display unicode
in section/article list.
2010/10/31: skip repeated articles in section pages
'''
import os, datetime, re
from calibre.web.feeds.recipes import BasicNewsRecipe
from contextlib import nested
from calibre.ebooks.BeautifulSoup import BeautifulSoup
from calibre.ebooks.metadata.opf2 import OPFCreator
from calibre.ebooks.metadata.toc import TOC
from calibre.ebooks.metadata import MetaInformation
# MAIN CLASS
class MPRecipe(BasicNewsRecipe):
if __Region__ == 'Hong Kong':
title = 'Ming Pao - Hong Kong'
description = 'Hong Kong Chinese Newspaper (http://news.mingpao.com)'
category = 'Chinese, News, Hong Kong'
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} font>b {font-size:200%; font-weight:bold;}'
masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif'
keep_only_tags = [dict(name='h1'),
dict(name='font', attrs={'style':['font-size:14pt; line-height:160%;']}), # for entertainment page title
dict(name='font', attrs={'color':['AA0000']}), # for column articles title
dict(attrs={'id':['newscontent']}), # entertainment and column page content
dict(attrs={'id':['newscontent01','newscontent02']}),
dict(attrs={'class':['photo']}),
dict(name='table', attrs={'width':['100%'], 'border':['0'], 'cellspacing':['5'], 'cellpadding':['0']}), # content in printed version of life.mingpao.com
dict(name='img', attrs={'width':['180'], 'alt':['按圖放大']}) # images for source from life.mingpao.com
]
if __KeepImages__:
remove_tags = [dict(name='style'),
dict(attrs={'id':['newscontent135']}), # for the finance page from mpfinance.com
dict(name='font', attrs={'size':['2'], 'color':['666666']}), # article date in life.mingpao.com article
#dict(name='table') # for content fetched from life.mingpao.com
]
else:
remove_tags = [dict(name='style'),
dict(attrs={'id':['newscontent135']}), # for the finance page from mpfinance.com
dict(name='font', attrs={'size':['2'], 'color':['666666']}), # article date in life.mingpao.com article
dict(name='img'),
#dict(name='table') # for content fetched from life.mingpao.com
]
remove_attributes = ['width']
preprocess_regexps = [
(re.compile(r'<h5>', re.DOTALL|re.IGNORECASE),
lambda match: '<h1>'),
(re.compile(r'</h5>', re.DOTALL|re.IGNORECASE),
lambda match: '</h1>'),
(re.compile(r'<p><a href=.+?</a></p>', re.DOTALL|re.IGNORECASE), # for entertainment page
lambda match: ''),
# skip <br> after title in life.mingpao.com fetched article
(re.compile(r"<div id='newscontent'><br>", re.DOTALL|re.IGNORECASE),
lambda match: "<div id='newscontent'>"),
(re.compile(r"<br><br></b>", re.DOTALL|re.IGNORECASE),
lambda match: "</b>")
]
elif __Region__ == 'Vancouver':
title = 'Ming Pao - Vancouver'
description = 'Vancouver Chinese Newspaper (http://www.mingpaovan.com)'
category = 'Chinese, News, Vancouver'
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} b>font {font-size:200%; font-weight:bold;}'
masthead_url = 'http://www.mingpaovan.com/image/mainlogo2_VAN2.gif'
keep_only_tags = [dict(name='table', attrs={'width':['450'], 'border':['0'], 'cellspacing':['0'], 'cellpadding':['1']}),
dict(name='table', attrs={'width':['450'], 'border':['0'], 'cellspacing':['3'], 'cellpadding':['3'], 'id':['tblContent3']}),
dict(name='table', attrs={'width':['180'], 'border':['0'], 'cellspacing':['0'], 'cellpadding':['0'], 'bgcolor':['F0F0F0']}),
]
if __KeepImages__:
remove_tags = [dict(name='img', attrs={'src':['../../../image/magnifier.gif']})] # the magnifier icon
else:
remove_tags = [dict(name='img')]
remove_attributes = ['width']
preprocess_regexps = [(re.compile(r'&nbsp;', re.DOTALL|re.IGNORECASE),
lambda match: ''),
]
elif __Region__ == 'Toronto':
title = 'Ming Pao - Toronto'
description = 'Toronto Chinese Newspaper (http://www.mingpaotor.com)'
category = 'Chinese, News, Toronto'
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} b>font {font-size:200%; font-weight:bold;}'
masthead_url = 'http://www.mingpaotor.com/image/mainlogo2_TOR2.gif'
keep_only_tags = [dict(name='table', attrs={'width':['450'], 'border':['0'], 'cellspacing':['0'], 'cellpadding':['1']}),
dict(name='table', attrs={'width':['450'], 'border':['0'], 'cellspacing':['3'], 'cellpadding':['3'], 'id':['tblContent3']}),
dict(name='table', attrs={'width':['180'], 'border':['0'], 'cellspacing':['0'], 'cellpadding':['0'], 'bgcolor':['F0F0F0']}),
]
if __KeepImages__:
remove_tags = [dict(name='img', attrs={'src':['../../../image/magnifier.gif']})] # the magnifier icon
else:
remove_tags = [dict(name='img')]
remove_attributes = ['width']
preprocess_regexps = [(re.compile(r'&nbsp;', re.DOTALL|re.IGNORECASE),
lambda match: ''),
]
oldest_article = 1
max_articles_per_feed = 100
__author__ = 'Eddie Lau'
publisher = 'MingPao'
remove_javascript = True
use_embedded_content = False
no_stylesheets = True
language = 'zh'
encoding = 'Big5-HKSCS'
recursions = 0
conversion_options = {'linearize_tables':True}
timefmt = ''
def image_url_processor(cls, baseurl, url):
# trick: break the url at the first occurance of digit, add an additional
# '_' at the front
# not working, may need to move this to preprocess_html() method
# minIdx = 10000
# i0 = url.find('0')
# if i0 >= 0 and i0 < minIdx:
# minIdx = i0
# i1 = url.find('1')
# if i1 >= 0 and i1 < minIdx:
# minIdx = i1
# i2 = url.find('2')
# if i2 >= 0 and i2 < minIdx:
# minIdx = i2
# i3 = url.find('3')
# if i3 >= 0 and i0 < minIdx:
# minIdx = i3
# i4 = url.find('4')
# if i4 >= 0 and i4 < minIdx:
# minIdx = i4
# i5 = url.find('5')
# if i5 >= 0 and i5 < minIdx:
# minIdx = i5
# i6 = url.find('6')
# if i6 >= 0 and i6 < minIdx:
# minIdx = i6
# i7 = url.find('7')
# if i7 >= 0 and i7 < minIdx:
# minIdx = i7
# i8 = url.find('8')
# if i8 >= 0 and i8 < minIdx:
# minIdx = i8
# i9 = url.find('9')
# if i9 >= 0 and i9 < minIdx:
# minIdx = i9
return url
def get_dtlocal(self):
dt_utc = datetime.datetime.utcnow()
if __Region__ == 'Hong Kong':
# convert UTC to local hk time - at HKT 5.30am, all news are available
dt_local = dt_utc + datetime.timedelta(8.0/24) - datetime.timedelta(5.5/24)
# dt_local = dt_utc.astimezone(pytz.timezone('Asia/Hong_Kong')) - datetime.timedelta(5.5/24)
elif __Region__ == 'Vancouver':
# convert UTC to local Vancouver time - at PST time 5.30am, all news are available
dt_local = dt_utc + datetime.timedelta(-8.0/24) - datetime.timedelta(5.5/24)
#dt_local = dt_utc.astimezone(pytz.timezone('America/Vancouver')) - datetime.timedelta(5.5/24)
elif __Region__ == 'Toronto':
# convert UTC to local Toronto time - at EST time 8.30am, all news are available
dt_local = dt_utc + datetime.timedelta(-5.0/24) - datetime.timedelta(8.5/24)
#dt_local = dt_utc.astimezone(pytz.timezone('America/Toronto')) - datetime.timedelta(8.5/24)
return dt_local
def get_fetchdate(self):
return self.get_dtlocal().strftime("%Y%m%d")
def get_fetchformatteddate(self):
return self.get_dtlocal().strftime("%Y-%m-%d")
def get_fetchday(self):
return self.get_dtlocal().strftime("%d")
def get_cover_url(self):
if __Region__ == 'Hong Kong':
cover = 'http://news.mingpao.com/' + self.get_fetchdate() + '/' + self.get_fetchdate() + '_' + self.get_fetchday() + 'gacov.jpg'
elif __Region__ == 'Vancouver':
cover = 'http://www.mingpaovan.com/ftp/News/' + self.get_fetchdate() + '/' + self.get_fetchday() + 'pgva1s.jpg'
elif __Region__ == 'Toronto':
cover = 'http://www.mingpaotor.com/ftp/News/' + self.get_fetchdate() + '/' + self.get_fetchday() + 'pgtas.jpg'
br = BasicNewsRecipe.get_browser()
try:
br.open(cover)
except:
cover = None
return cover
def parse_index(self):
feeds = []
dateStr = self.get_fetchdate()
if __Region__ == 'Hong Kong':
if __UseLife__:
for title, url, keystr in [(u'\u8981\u805e Headline', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalga', 'nal'),
(u'\u6e2f\u805e Local', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalgb', 'nal'),
(u'\u6559\u80b2 Education', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalgf', 'nal'),
(u'\u793e\u8a55/\u7b46\u9663 Editorial', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr', 'nal'),
(u'\u8ad6\u58c7 Forum', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalfa', 'nal'),
(u'\u4e2d\u570b China', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalca', 'nal'),
(u'\u570b\u969b World', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalta', 'nal'),
(u'\u7d93\u6fdf Finance', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea', 'nal'),
(u'\u9ad4\u80b2 Sport', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalsp', 'nal'),
(u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal'),
(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')]:
articles = self.parse_section2(url, keystr)
if articles:
feeds.append((title, articles))
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
articles = self.parse_section(url)
if articles:
feeds.append((title, articles))
else:
for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'),
(u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'),
(u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm')]:
articles = self.parse_section(url)
if articles:
feeds.append((title, articles))
# special- editorial
ed_articles = self.parse_ed_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr')
if ed_articles:
feeds.append((u'\u793e\u8a55/\u7b46\u9663 Editorial', ed_articles))
for title, url in [(u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'),
(u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm'),
(u'\u570b\u969b World', 'http://news.mingpao.com/' + dateStr + '/taindex.htm')]:
articles = self.parse_section(url)
if articles:
feeds.append((title, articles))
# special - finance
#fin_articles = self.parse_fin_section('http://www.mpfinance.com/htm/Finance/' + dateStr + '/News/ea,eb,ecindex.htm')
fin_articles = self.parse_fin_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea')
if fin_articles:
feeds.append((u'\u7d93\u6fdf Finance', fin_articles))
for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
(u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]:
articles = self.parse_section(url)
if articles:
feeds.append((title, articles))
# special - entertainment
ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm')
if ent_articles:
feeds.append((u'\u5f71\u8996 Film/TV', ent_articles))
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
articles = self.parse_section(url)
if articles:
feeds.append((title, articles))
# special- columns
col_articles = self.parse_col_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn')
if col_articles:
feeds.append((u'\u5c08\u6b04 Columns', col_articles))
elif __Region__ == 'Vancouver':
for title, url in [(u'\u8981\u805e Headline', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VAindex.htm'),
(u'\u52a0\u570b Canada', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VBindex.htm'),
(u'\u793e\u5340 Local', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VDindex.htm'),
(u'\u6e2f\u805e Hong Kong', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/HK-VGindex.htm'),
(u'\u570b\u969b World', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VTindex.htm'),
(u'\u4e2d\u570b China', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VCindex.htm'),
(u'\u7d93\u6fdf Economics', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VEindex.htm'),
(u'\u9ad4\u80b2 Sports', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VSindex.htm'),
(u'\u5f71\u8996 Film/TV', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/HK-MAindex.htm'),
(u'\u526f\u520a Supplements', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/WWindex.htm'),]:
articles = self.parse_section3(url, 'http://www.mingpaovan.com/')
if articles:
feeds.append((title, articles))
elif __Region__ == 'Toronto':
for title, url in [(u'\u8981\u805e Headline', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TAindex.htm'),
(u'\u52a0\u570b Canada', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TDindex.htm'),
(u'\u793e\u5340 Local', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TFindex.htm'),
(u'\u4e2d\u570b China', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TCAindex.htm'),
(u'\u570b\u969b World', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TTAindex.htm'),
(u'\u6e2f\u805e Hong Kong', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/HK-GAindex.htm'),
(u'\u7d93\u6fdf Economics', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/THindex.htm'),
(u'\u9ad4\u80b2 Sports', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TSindex.htm'),
(u'\u5f71\u8996 Film/TV', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/HK-MAindex.htm'),
(u'\u526f\u520a Supplements', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/WWindex.htm'),]:
articles = self.parse_section3(url, 'http://www.mingpaotor.com/')
if articles:
feeds.append((title, articles))
return feeds
# parse from news.mingpao.com
def parse_section(self, url):
dateStr = self.get_fetchdate()
soup = self.index_to_soup(url)
divs = soup.findAll(attrs={'class': ['bullet','bullet_grey']})
current_articles = []
included_urls = []
divs.reverse()
for i in divs:
a = i.find('a', href = True)
title = self.tag_to_string(a)
url = a.get('href', False)
url = 'http://news.mingpao.com/' + dateStr + '/' +url
if url not in included_urls and url.rfind('Redirect') == -1:
current_articles.append({'title': title, 'url': url, 'description':'', 'date':''})
included_urls.append(url)
current_articles.reverse()
return current_articles
# parse from life.mingpao.com
def parse_section2(self, url, keystr):
self.get_fetchdate()
soup = self.index_to_soup(url)
a = soup.findAll('a', href=True)
a.reverse()
current_articles = []
included_urls = []
for i in a:
title = self.tag_to_string(i)
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind(keystr) == -1):
url = url.replace('dailynews3.cfm', 'dailynews3a.cfm') # use printed version of the article
current_articles.append({'title': title, 'url': url, 'description': ''})
included_urls.append(url)
current_articles.reverse()
return current_articles
# parse from www.mingpaovan.com
def parse_section3(self, url, baseUrl):
self.get_fetchdate()
soup = self.index_to_soup(url)
divs = soup.findAll(attrs={'class': ['ListContentLargeLink']})
current_articles = []
included_urls = []
divs.reverse()
for i in divs:
title = self.tag_to_string(i)
urlstr = i.get('href', False)
urlstr = baseUrl + '/' + urlstr.replace('../../../', '')
if urlstr not in included_urls:
current_articles.append({'title': title, 'url': urlstr, 'description': '', 'date': ''})
included_urls.append(urlstr)
current_articles.reverse()
return current_articles
def parse_ed_section(self, url):
self.get_fetchdate()
soup = self.index_to_soup(url)
a = soup.findAll('a', href=True)
a.reverse()
current_articles = []
included_urls = []
for i in a:
title = self.tag_to_string(i)
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('nal') == -1):
current_articles.append({'title': title, 'url': url, 'description': ''})
included_urls.append(url)
current_articles.reverse()
return current_articles
def parse_fin_section(self, url):
self.get_fetchdate()
soup = self.index_to_soup(url)
a = soup.findAll('a', href= True)
current_articles = []
included_urls = []
for i in a:
#url = 'http://www.mpfinance.com/cfm/' + i.get('href', False)
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
#if url not in included_urls and not url.rfind(dateStr) == -1 and url.rfind('index') == -1:
if url not in included_urls and (not url.rfind('txt') == -1) and (not url.rfind('nal') == -1):
title = self.tag_to_string(i)
current_articles.append({'title': title, 'url': url, 'description':''})
included_urls.append(url)
return current_articles
def parse_ent_section(self, url):
self.get_fetchdate()
soup = self.index_to_soup(url)
a = soup.findAll('a', href=True)
a.reverse()
current_articles = []
included_urls = []
for i in a:
title = self.tag_to_string(i)
url = 'http://ol.mingpao.com/cfm/' + i.get('href', False)
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('star') == -1):
current_articles.append({'title': title, 'url': url, 'description': ''})
included_urls.append(url)
current_articles.reverse()
return current_articles
def parse_col_section(self, url):
self.get_fetchdate()
soup = self.index_to_soup(url)
a = soup.findAll('a', href=True)
a.reverse()
current_articles = []
included_urls = []
for i in a:
title = self.tag_to_string(i)
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('ncl') == -1):
current_articles.append({'title': title, 'url': url, 'description': ''})
included_urls.append(url)
current_articles.reverse()
return current_articles
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll(style=True):
del item['width']
for item in soup.findAll(stype=True):
del item['absmiddle']
return soup
def create_opf(self, feeds, dir=None):
if dir is None:
dir = self.output_dir
if __UseChineseTitle__ == True:
if __Region__ == 'Hong Kong':
title = u'\u660e\u5831 (\u9999\u6e2f)'
elif __Region__ == 'Vancouver':
title = u'\u660e\u5831 (\u6eab\u54e5\u83ef)'
elif __Region__ == 'Toronto':
title = u'\u660e\u5831 (\u591a\u502b\u591a)'
else:
title = self.short_title()
# if not generating a periodical, force date to apply in title
if __MakePeriodical__ == False:
title = title + ' ' + self.get_fetchformatteddate()
if True:
mi = MetaInformation(title, [self.publisher])
mi.publisher = self.publisher
mi.author_sort = self.publisher
if __MakePeriodical__ == True:
mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
else:
mi.publication_type = self.publication_type+':'+self.short_title()
#mi.timestamp = nowf()
mi.timestamp = self.get_dtlocal()
mi.comments = self.description
if not isinstance(mi.comments, unicode):
mi.comments = mi.comments.decode('utf-8', 'replace')
#mi.pubdate = nowf()
mi.pubdate = self.get_dtlocal()
opf_path = os.path.join(dir, 'index.opf')
ncx_path = os.path.join(dir, 'index.ncx')
opf = OPFCreator(dir, mi)
# Add mastheadImage entry to <guide> section
mp = getattr(self, 'masthead_path', None)
if mp is not None and os.access(mp, os.R_OK):
from calibre.ebooks.metadata.opf2 import Guide
ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
ref.type = 'masthead'
ref.title = 'Masthead Image'
opf.guide.append(ref)
manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
manifest.append(os.path.join(dir, 'index.html'))
manifest.append(os.path.join(dir, 'index.ncx'))
# Get cover
cpath = getattr(self, 'cover_path', None)
if cpath is None:
pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
if self.default_cover(pf):
cpath = pf.name
if cpath is not None and os.access(cpath, os.R_OK):
opf.cover = cpath
manifest.append(cpath)
# Get masthead
mpath = getattr(self, 'masthead_path', None)
if mpath is not None and os.access(mpath, os.R_OK):
manifest.append(mpath)
opf.create_manifest_from_files_in(manifest)
for mani in opf.manifest:
if mani.path.endswith('.ncx'):
mani.id = 'ncx'
if mani.path.endswith('mastheadImage.jpg'):
mani.id = 'masthead-image'
entries = ['index.html']
toc = TOC(base_path=dir)
self.play_order_counter = 0
self.play_order_map = {}
def feed_index(num, parent):
f = feeds[num]
for j, a in enumerate(f):
if getattr(a, 'downloaded', False):
adir = 'feed_%d/article_%d/'%(num, j)
auth = a.author
if not auth:
auth = None
desc = a.text_summary
if not desc:
desc = None
else:
desc = self.description_limiter(desc)
entries.append('%sindex.html'%adir)
po = self.play_order_map.get(entries[-1], None)
if po is None:
self.play_order_counter += 1
po = self.play_order_counter
parent.add_item('%sindex.html'%adir, None, a.title if a.title else _('Untitled Article'),
play_order=po, author=auth, description=desc)
last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
for sp in a.sub_pages:
prefix = os.path.commonprefix([opf_path, sp])
relp = sp[len(prefix):]
entries.append(relp.replace(os.sep, '/'))
last = sp
if os.path.exists(last):
with open(last, 'rb') as fi:
src = fi.read().decode('utf-8')
soup = BeautifulSoup(src)
body = soup.find('body')
if body is not None:
prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last))))
templ = self.navbar.generate(True, num, j, len(f),
not self.has_single_feed,
a.orig_url, self.publisher, prefix=prefix,
center=self.center_navbar)
elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div')
body.insert(len(body.contents), elem)
with open(last, 'wb') as fi:
fi.write(unicode(soup).encode('utf-8'))
if len(feeds) == 0:
raise Exception('All feeds are empty, aborting.')
if len(feeds) > 1:
for i, f in enumerate(feeds):
entries.append('feed_%d/index.html'%i)
po = self.play_order_map.get(entries[-1], None)
if po is None:
self.play_order_counter += 1
po = self.play_order_counter
auth = getattr(f, 'author', None)
if not auth:
auth = None
desc = getattr(f, 'description', None)
if not desc:
desc = None
feed_index(i, toc.add_item('feed_%d/index.html'%i, None,
f.title, play_order=po, description=desc, author=auth))
else:
entries.append('feed_%d/index.html'%0)
feed_index(0, toc)
for i, p in enumerate(entries):
entries[i] = os.path.join(dir, p.replace('/', os.sep))
opf.create_spine(entries)
opf.set_toc(toc)
with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
opf.render(opf_file, ncx_file)

42
recipes/nme.recipe Normal file
View File

@ -0,0 +1,42 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1306061239(BasicNewsRecipe):
title = u'New Musical Express Magazine'
__author__ = "scissors"
language = 'en'
remove_empty_feeds = True
remove_javascript = True
no_stylesheets = True
oldest_article = 7
max_articles_per_feed = 100
cover_url = 'http://tawanda3000.files.wordpress.com/2011/02/nme-logo.jpg'
remove_tags = [
dict( attrs={'class':'clear_icons'}),
dict( attrs={'class':'share_links'}),
dict( attrs={'id':'right_panel'}),
dict( attrs={'class':'today box'})
]
keep_only_tags = [
dict(name='h1'),
#dict(name='h3'),
dict(attrs={'class' : 'BText'}),
dict(attrs={'class' : 'Bmore'}),
dict(attrs={'class' : 'bPosts'}),
dict(attrs={'class' : 'text'}),
dict(attrs={'id' : 'article_gallery'}),
dict(attrs={'class' : 'article_text'})
]
feeds = [
(u'NME News', u'http://feeds2.feedburner.com/nmecom/rss/newsxml'),
(u'Reviews', u'http://feeds2.feedburner.com/nme/SdML'),
(u'Blogs', u'http://www.nme.com/blog/index.php?blog=140&tempskin=_rss2'),
]

View File

@ -0,0 +1,40 @@
import re
from calibre.web.feeds.news import BasicNewsRecipe
class PortalR7(BasicNewsRecipe):
title = 'Noticias R7'
__author__ = 'Diniz Bortolotto'
description = 'Noticias Portal R7'
oldest_article = 2
max_articles_per_feed = 20
encoding = 'utf8'
publisher = 'Rede Record'
category = 'news, Brazil'
language = 'pt_BR'
publication_type = 'newsportal'
use_embedded_content = False
no_stylesheets = True
remove_javascript = True
remove_attributes = ['style']
feeds = [
(u'Brasil', u'http://www.r7.com/data/rss/brasil.xml'),
(u'Economia', u'http://www.r7.com/data/rss/economia.xml'),
(u'Internacional', u'http://www.r7.com/data/rss/internacional.xml'),
(u'Tecnologia e Ci\xeancia', u'http://www.r7.com/data/rss/tecnologiaCiencia.xml')
]
reverse_article_order = True
keep_only_tags = [dict(name='div', attrs={'class':'materia'})]
remove_tags = [
dict(id=['espalhe', 'report-erro']),
dict(name='ul', attrs={'class':'controles'}),
dict(name='ul', attrs={'class':'relacionados'}),
dict(name='div', attrs={'class':'materia_banner'}),
dict(name='div', attrs={'class':'materia_controles'})
]
preprocess_regexps = [
(re.compile(r'<div class="materia">.*<div class="materia_cabecalho">',re.DOTALL|re.IGNORECASE),
lambda match: '<div class="materia"><div class="materia_cabecalho">')
]

View File

@ -26,6 +26,7 @@ class Perfil(BasicNewsRecipe):
.foto1 h1{font-size: x-small}
h1{font-family: Georgia,"Times New Roman",serif}
img{margin-bottom: 0.4em}
.hora{font-size: x-small; color: red}
"""
conversion_options = {
@ -60,7 +61,26 @@ class Perfil(BasicNewsRecipe):
,(u'Tecnologia' , u'http://www.perfil.com/rss/tecnologia.xml' )
]
def get_article_url(self, article):
return article.get('guid', None)
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll('a'):
limg = item.find('img')
if item.string is not None:
str = item.string
item.replaceWith(str)
else:
if limg:
item.name = 'div'
item.attrs = []
else:
str = self.tag_to_string(item)
item.replaceWith(str)
for item in soup.findAll('img'):
if not item.has_key('alt'):
item['alt'] = 'image'
return soup

View File

@ -1,85 +1,45 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
'''
philly.com/inquirer/
'''
from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.web.feeds.news import BasicNewsRecipe
class Philly(BasicNewsRecipe):
title = 'Philadelphia Inquirer'
__author__ = 'RadikalDissent and Sujata Raman'
class AdvancedUserRecipe1308312288(BasicNewsRecipe):
title = u'Philadelphia Inquirer'
__author__ = 'sexymax15'
language = 'en'
description = 'Daily news from the Philadelphia Inquirer'
no_stylesheets = True
oldest_article = 15
max_articles_per_feed = 20
use_embedded_content = False
oldest_article = 1
max_articles_per_feed = 25
remove_empty_feeds = True
no_stylesheets = True
remove_javascript = True
extra_css = '''
h1{font-family:verdana,arial,helvetica,sans-serif; font-size: large;}
h2{font-family:verdana,arial,helvetica,sans-serif; font-size: small;}
.body-content{font-family:verdana,arial,helvetica,sans-serif; font-size: small;}
.byline {font-size: small; color: #666666; font-style:italic; }
.lastline {font-size: small; color: #666666; font-style:italic;}
.contact {font-size: small; color: #666666;}
.contact p {font-size: small; color: #666666;}
#photoCaption { font-family:verdana,arial,helvetica,sans-serif; font-size:x-small;}
.photoCaption { font-family:verdana,arial,helvetica,sans-serif; font-size:x-small;}
#photoCredit{ font-family:verdana,arial,helvetica,sans-serif; font-size:x-small; color:#666666;}
.photoCredit{ font-family:verdana,arial,helvetica,sans-serif; font-size:x-small; color:#666666;}
.article_timestamp{font-size:x-small; color:#666666;}
a {font-family:verdana,arial,helvetica,sans-serif; font-size: x-small;}
'''
# remove_tags_before = {'class':'article_timestamp'}
#remove_tags_after = {'class':'graylabel'}
keep_only_tags= [dict(name=['h1','p'])]
remove_tags = [dict(name=['hr','dl','dt','img','meta','iframe','link','script','form','input','label']),
dict(id=['toggleConfirmEmailDiv','toggleTOS','toggleUsernameMsgDiv','toggleConfirmYear','navT1_philly','secondaryNav','navPlacement','globalPrimaryNav'
,'ugc-footer-philly','bv_footer_include','footer','header',
'container_rag_bottom','section_rectangle','contentrightside'])
,{'class':['megamenu3 megamenu','container misc','container_inner misc_inner'
,'misccontainer_left_32','headlineonly','misccontainer_middle_32'
,'misccontainer_right_32','headline formBegin',
'post_balloon','relatedlist','linkssubhead','b_sq','dotted-rule-above'
,'container','headlines-digest','graylabel','container_inner'
,'rlinks_colorbar1','rlinks_colorbar2','supercontainer','container_5col_left','container_image_left',
'digest-headline2','digest-lead','container_5col_leftmiddle',
'container_5col_middlemiddle','container_5col_rightmiddle'
,'container_5col_right','divclear','supercontainer_outer force-width',
'supercontainer','containertitle kicker-title',
'pollquestion','pollchoice','photomore','pollbutton','container rssbox','containertitle video ',
'containertitle_image ','container_tabtwo','selected'
,'shadetabs','selected','tabcontentstyle','tabcontent','inner_container'
,'arrow','container_ad','containertitlespacer','adUnit','tracking','sitemsg_911 clearfix']}]
keep_only_tags = [
dict(name='div', attrs={'class':'story-content'}),
dict(name='div', attrs={'id': 'contentinside'})
]
extra_css = """
h1{font-family: Georgia,serif; font-size: xx-large}
remove_tags = [
dict(name='div', attrs={'class':['linkssubhead','post_balloon','relatedlist','pollquestion','b_sq']}),
dict(name='dl', attrs={'class':'relatedlist'}),
dict(name='div', attrs={'id':['photoNav','sidebar_adholder']}),
dict(name='a', attrs={'class': ['headlineonly','bl']}),
dict(name='img', attrs={'class':'img_noborder'})
]
# def print_version(self, url):
# return url + '?viewAll=y'
"""
feeds = [
('Front Page', 'http://www.philly.com/inquirer_front_page.rss'),
('Business', 'http://www.philly.com/inq_business.rss'),
#('News', 'http://www.philly.com/inquirer/news/index.rss'),
('Nation', 'http://www.philly.com/inq_news_world_us.rss'),
('Local', 'http://www.philly.com/inquirer_local.rss'),
('Health', 'http://www.philly.com/inquirer_health_science.rss'),
('Education', 'http://www.philly.com/inquirer_education.rss'),
('Editorial and opinion', 'http://www.philly.com/inq_news_editorial.rss'),
('Sports', 'http://www.philly.com/inquirer_sports.rss')
]
feeds = [(u'News', u'http://www.philly.com/philly_news.rss')]
def get_article_url(self, article):
ans = article.link
try:
self.log('Looking for full story link in', ans)
soup = self.index_to_soup(ans)
x = soup.find(text="View All")
if x is not None:
ans = ans + '?viewAll=y'
self.log('Found full story link', ans)
except:
pass
return ans
def postprocess_html(self, soup,first):
for tag in soup.findAll(name='div',attrs={'class':"container_ate_qandatitle"}):
tag.extract()
for tag in soup.findAll(name='br'):
tag.extract()
return soup

80
recipes/scmp.recipe Normal file
View File

@ -0,0 +1,80 @@
__license__ = 'GPL v3'
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
'''
scmp.com
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
class SCMP(BasicNewsRecipe):
title = 'South China Morning Post'
__author__ = 'llam'
description = "SCMP.com, Hong Kong's premier online English daily provides exclusive up-to-date news, audio video news, podcasts, RSS Feeds, Blogs, breaking news, top stories, award winning news and analysis on Hong Kong and China."
publisher = 'South China Morning Post Publishers Ltd.'
category = 'SCMP, Online news, Hong Kong News, China news, Business news, English newspaper, daily newspaper, Lifestyle news, Sport news, Audio Video news, Asia news, World news, economy news, investor relations news, RSS Feeds'
oldest_article = 2
delay = 1
max_articles_per_feed = 200
no_stylesheets = True
encoding = 'utf-8'
use_embedded_content = False
language = 'en_CN'
remove_empty_feeds = True
needs_subscription = True
publication_type = 'newspaper'
masthead_url = 'http://www.scmp.com/images/logo_scmp_home.gif'
extra_css = ' body{font-family: Arial,Helvetica,sans-serif } '
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
}
def get_browser(self):
br = BasicNewsRecipe.get_browser()
#br.set_debug_http(True)
#br.set_debug_responses(True)
#br.set_debug_redirects(True)
if self.username is not None and self.password is not None:
br.open('http://www.scmp.com/portal/site/SCMP/')
br.select_form(name='loginForm')
br['Login' ] = self.username
br['Password'] = self.password
br.submit()
return br
remove_attributes=['width','height','border']
keep_only_tags = [
dict(attrs={'id':['ART','photoBox']})
,dict(attrs={'class':['article_label','article_byline','article_body']})
]
preprocess_regexps = [
(re.compile(r'<P><table((?!<table).)*class="embscreen"((?!</table>).)*</table>', re.DOTALL|re.IGNORECASE),
lambda match: ''),
]
feeds = [
(u'Business' , u'http://www.scmp.com/rss/business.xml' )
,(u'Hong Kong' , u'http://www.scmp.com/rss/hong_kong.xml' )
,(u'China' , u'http://www.scmp.com/rss/china.xml' )
,(u'Asia & World' , u'http://www.scmp.com/rss/news_asia_world.xml')
,(u'Opinion' , u'http://www.scmp.com/rss/opinion.xml' )
,(u'LifeSTYLE' , u'http://www.scmp.com/rss/lifestyle.xml' )
,(u'Sport' , u'http://www.scmp.com/rss/sport.xml' )
]
def print_version(self, url):
rpart, sep, rest = url.rpartition('&')
return rpart #+ sep + urllib.quote_plus(rest)
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
items = soup.findAll(src="/images/label_icon.gif")
[item.extract() for item in items]
return self.adeify_images(soup)

View File

@ -0,0 +1,40 @@
# -*- coding: utf-8 -*-
from calibre.web.feeds.news import BasicNewsRecipe
class TodaysZaman_en(BasicNewsRecipe):
title = u'Sızıntı Dergisi'
__author__ = u'thomass'
description = 'a Turkey based daily for national and international news in the fields of business, diplomacy, politics, culture, arts, sports and economics, in addition to commentaries, specials and features'
oldest_article = 30
max_articles_per_feed =80
no_stylesheets = True
#delay = 1
#use_embedded_content = False
encoding = 'utf-8'
#publisher = ' '
category = 'dergi, ilim, kültür, bilim,Türkçe'
language = 'tr'
publication_type = 'magazine'
#extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
#keep_only_tags = [dict(name='h1', attrs={'class':['georgia_30']})]
#remove_attributes = ['aria-describedby']
#remove_tags = [dict(name='div', attrs={'id':['renk10']}) ]
cover_img_url = 'http://www.sizinti.com.tr/images/sizintiprint.jpg'
masthead_url = 'http://www.sizinti.com.tr/images/sizintiprint.jpg'
remove_tags_before = dict(id='content-right')
#remove_empty_feeds= True
#remove_attributes = ['width','height']
feeds = [
( u'Sızıntı', u'http://www.sizinti.com.tr/rss'),
]
#def preprocess_html(self, soup):
# return self.adeify_images(soup)
#def print_version(self, url): #there is a probem caused by table format
#return url.replace('http://www.todayszaman.com/newsDetail_getNewsById.action?load=detay&', 'http://www.todayszaman.com/newsDetail_openPrintPage.action?')

View File

@ -0,0 +1,56 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
stiintasitehnica.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Stiintasitehnica(BasicNewsRecipe):
title = u'\u0218tiin\u021b\u0103 \u015fi Tehnic\u0103'
__author__ = u'Silviu Cotoar\u0103'
description = u'\u0218tiin\u021b\u0103 \u015fi Tehnic\u0103'
publisher = u'\u0218tiin\u021b\u0103 \u015fi Tehnic\u0103'
oldest_article = 50
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = u'Ziare,Reviste,Stiinta,Tehnica'
encoding = 'utf-8'
cover_url = 'http://www.stiintasitehnica.com/images/logo.jpg'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
keep_only_tags = [
dict(name='div', attrs={'id':'mainColumn2'})
]
remove_tags = [
dict(name='span', attrs={'class':['redEar']})
, dict(name='table', attrs={'class':['connect_widget_interactive_area']})
, dict(name='div', attrs={'class':['panel-overlay']})
, dict(name='div', attrs={'id':['pointer']})
, dict(name='img', attrs={'class':['nav-next', 'nav-prev']})
, dict(name='table', attrs={'class':['connect_widget_interactive_area']})
, dict(name='hr', attrs={'class':['dotted']})
]
remove_tags_after = [
dict(name='hr', attrs={'class':['dotted']})
]
feeds = [
(u'Feeds', u'http://www.stiintasitehnica.com/rss/stiri.xml')
]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -56,6 +56,7 @@ class TelegraphUK(BasicNewsRecipe):
,(u'Sport' , u'http://www.telegraph.co.uk/sport/rss' )
,(u'Earth News' , u'http://www.telegraph.co.uk/earth/earthnews/rss' )
,(u'Comment' , u'http://www.telegraph.co.uk/comment/rss' )
,(u'Travel' , u'http://www.telegraph.co.uk/travel/rss' )
,(u'How about that?', u'http://www.telegraph.co.uk/news/newstopics/howaboutthat/rss' )
]

View File

@ -0,0 +1,53 @@
from calibre.web.feeds.news import BasicNewsRecipe
class TodaysZaman_en(BasicNewsRecipe):
title = u'Todays Zaman'
__author__ = u'thomass'
description = 'a Turkey based daily for national and international news in the fields of business, diplomacy, politics, culture, arts, sports and economics, in addition to commentaries, specials and features'
oldest_article = 2
max_articles_per_feed =100
no_stylesheets = True
#delay = 1
#use_embedded_content = False
encoding = 'utf-8'
#publisher = ' '
category = 'news, haberler,TR,gazete'
language = 'en_TR'
publication_type = 'newspaper'
#extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
#keep_only_tags = [dict(name='font', attrs={'class':['newsDetail','agenda2NewsSpot']}),dict(name='span', attrs={'class':['agenda2Title']}),dict(name='div', attrs={'id':['gallery']})]
keep_only_tags = [dict(name='h1', attrs={'class':['georgia_30']}),dict(name='span', attrs={'class':['left-date','detailDate','detailCName']}),dict(name='td', attrs={'id':['newsSpot','newsText']})] #resim ekleme: ,dict(name='div', attrs={'id':['gallery','detailDate',]})
remove_attributes = ['aria-describedby']
remove_tags = [dict(name='img', attrs={'src':['/images/icon_print.gif','http://gmodules.com/ig/images/plus_google.gif','/images/template/jazz/agenda/i1.jpg', 'http://medya.todayszaman.com/todayszaman/images/logo/logo.bmp']}),dict(name='hr', attrs={'class':[ 'interactive-hr']}),dict(name='div', attrs={'class':[ 'empty_height_18','empty_height_9']}) ,dict(name='td', attrs={'id':[ 'superTitle']}),dict(name='span', attrs={'class':[ 't-count enabled t-count-focus']}),dict(name='a', attrs={'id':[ 'count']}),dict(name='td', attrs={'class':[ 'left-date']}) ]
cover_img_url = 'http://medya.todayszaman.com/todayszaman/images/logo/logo.bmp'
masthead_url = 'http://medya.todayszaman.com/todayszaman/images/logo/logo.bmp'
remove_empty_feeds= True
# remove_attributes = ['width','height']
feeds = [
( u'Home', u'http://www.todayszaman.com/rss?sectionId=0'),
( u'News', u'http://www.todayszaman.com/rss?sectionId=100'),
( u'Business', u'http://www.todayszaman.com/rss?sectionId=105'),
( u'Interviews', u'http://www.todayszaman.com/rss?sectionId=8'),
( u'Columnists', u'http://www.todayszaman.com/rss?sectionId=6'),
( u'Op-Ed', u'http://www.todayszaman.com/rss?sectionId=109'),
( u'Arts & Culture', u'http://www.todayszaman.com/rss?sectionId=110'),
( u'Expat Zone', u'http://www.todayszaman.com/rss?sectionId=132'),
( u'Sports', u'http://www.todayszaman.com/rss?sectionId=5'),
( u'Features', u'http://www.todayszaman.com/rss?sectionId=116'),
( u'Travel', u'http://www.todayszaman.com/rss?sectionId=117'),
( u'Leisure', u'http://www.todayszaman.com/rss?sectionId=118'),
( u'Weird But True', u'http://www.todayszaman.com/rss?sectionId=134'),
( u'Life', u'http://www.todayszaman.com/rss?sectionId=133'),
( u'Health', u'http://www.todayszaman.com/rss?sectionId=126'),
( u'Press Review', u'http://www.todayszaman.com/rss?sectionId=130'),
( u'Todays think tanks', u'http://www.todayszaman.com/rss?sectionId=159'),
]
#def preprocess_html(self, soup):
# return self.adeify_images(soup)
#def print_version(self, url): #there is a probem caused by table format
#return url.replace('http://www.todayszaman.com/newsDetail_getNewsById.action?load=detay&', 'http://www.todayszaman.com/newsDetail_openPrintPage.action?')

View File

@ -0,0 +1,25 @@
#recipe created by sexymax15.....sexymax15@gmail.com
#Words without Borders recipe
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1308302002(BasicNewsRecipe):
title = u'Words Without Borders'
language = 'en'
__author__ = 'sexymax15'
oldest_article = 90
max_articles_per_feed = 30
use_embedded_content = False
remove_empty_feeds = True
no_stylesheets = True
remove_javascript = True
keep_only_tags = {'class':'span-14 article'}
remove_tags_after = [{'class':'addthis_toolbox addthis_default_style no_print'}]
remove_tags = [{'class':['posterous_quote_citation','button']}]
extra_css = """
h1{font-family: Georgia,serif; font-size: large}h2{font-family: Georgia,serif; font-size: large} """
feeds = [(u'wwb', u'http://feeds.feedburner.com/wwborders?format=xml')]

View File

@ -2,6 +2,7 @@
__license__ = 'GPL v3'
__copyright__ = '2010, matek09, matek09@gmail.com'
__copyright__ = 'Modified 2011, Mariusz Wolek <mariusz_dot_wolek @ gmail dot com>'
from calibre.web.feeds.news import BasicNewsRecipe
import re
@ -30,15 +31,17 @@ class Wprost(BasicNewsRecipe):
keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'def element-autor'}))'''
preprocess_regexps = [(re.compile(r'style="display: none;"'), lambda match: ''),
(re.compile(r'display: block;'), lambda match: '')]
(re.compile(r'display: block;'), lambda match: ''),
(re.compile(r'\<td\>\<tr\>\<\/table\>'), lambda match: ''),
(re.compile(r'\<table .*?\>'), lambda match: ''),
(re.compile(r'\<tr>'), lambda match: ''),
(re.compile(r'\<td .*?\>'), lambda match: '')]
remove_tags =[]
remove_tags.append(dict(name = 'div', attrs = {'class' : 'def element-date'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'def silver'}))
remove_tags.append(dict(name = 'div', attrs = {'id' : 'content-main-column-right'}))
extra_css = '''
.div-header {font-size: x-small; font-weight: bold}
'''
@ -88,4 +91,3 @@ class Wprost(BasicNewsRecipe):
'description' : ''
}

View File

@ -51,7 +51,7 @@ class WallStreetJournal(BasicNewsRecipe):
br['password'] = self.password
res = br.submit()
raw = res.read()
if 'Welcome,' not in raw:
if 'Welcome,' not in raw and '>Logout<' not in raw:
raise ValueError('Failed to log in to wsj.com, check your '
'username and password')
return br

View File

@ -1,20 +1,55 @@
# -*- coding: utf-8 -*-
from calibre.web.feeds.news import BasicNewsRecipe
class ZamanRecipe(BasicNewsRecipe):
title = u'Zaman'
__author__ = u'Deniz Og\xfcz'
class Zaman (BasicNewsRecipe):
title = u'ZAMAN Gazetesi'
__author__ = u'thomass'
oldest_article = 2
max_articles_per_feed =100
# no_stylesheets = True
#delay = 1
#use_embedded_content = False
encoding = 'ISO 8859-9'
publisher = 'Zaman'
category = 'news, haberler,TR,gazete'
language = 'tr'
oldest_article = 1
max_articles_per_feed = 10
publication_type = 'newspaper '
extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
conversion_options = {
'tags' : category
,'language' : language
,'publisher' : publisher
,'linearize_tables': False
}
cover_img_url = 'https://fbcdn-profile-a.akamaihd.net/hprofile-ak-snc4/188140_81722291869_2111820_n.jpg'
masthead_url = 'http://medya.zaman.com.tr/extentions/zaman.com.tr/img/section/logo-section.png'
cover_url = 'http://medya.zaman.com.tr/zamantryeni/pics/zamanonline.gif'
feeds = [(u'Gundem', u'http://www.zaman.com.tr/gundem.rss'),
(u'Son Dakika', u'http://www.zaman.com.tr/sondakika.rss'),
(u'Spor', u'http://www.zaman.com.tr/spor.rss'),
(u'Ekonomi', u'http://www.zaman.com.tr/ekonomi.rss'),
(u'Politika', u'http://www.zaman.com.tr/politika.rss'),
(u'D\u0131\u015f Haberler', u'http://www.zaman.com.tr/dishaberler.rss'),
(u'Yazarlar', u'http://www.zaman.com.tr/yazarlar.rss'),]
def print_version(self, url):
return url.replace('www.zaman.com.tr/haber.do?', 'www.zaman.com.tr/yazdir.do?')
keep_only_tags = [dict(name='div', attrs={'id':[ 'news-detail-content']}), dict(name='td', attrs={'class':['columnist-detail','columnist_head']}) ]
remove_tags = [ dict(name='div', attrs={'id':['news-detail-news-text-font-size','news-detail-gallery','news-detail-news-bottom-social']}),dict(name='div', attrs={'class':['radioEmbedBg','radyoProgramAdi']}),dict(name='a', attrs={'class':['webkit-html-attribute-value webkit-html-external-link']}),dict(name='table', attrs={'id':['yaziYorumTablosu']}),dict(name='img', attrs={'src':['http://medya.zaman.com.tr/pics/paylas.gif','http://medya.zaman.com.tr/extentions/zaman.com.tr/img/columnist/ma-16.png']})]
#remove_attributes = ['width','height']
remove_empty_feeds= True
feeds = [
( u'Anasayfa', u'http://www.zaman.com.tr/anasayfa.rss'),
( u'Son Dakika', u'http://www.zaman.com.tr/sondakika.rss'),
( u'En çok Okunanlar', u'http://www.zaman.com.tr/max_all.rss'),
( u'Gündem', u'http://www.zaman.com.tr/gundem.rss'),
( u'Yazarlar', u'http://www.zaman.com.tr/yazarlar.rss'),
( u'Politika', u'http://www.zaman.com.tr/politika.rss'),
( u'Ekonomi', u'http://www.zaman.com.tr/ekonomi.rss'),
( u'Dış Haberler', u'http://www.zaman.com.tr/dishaberler.rss'),
( u'Yorumlar', u'http://www.zaman.com.tr/yorumlar.rss'),
( u'Röportaj', u'http://www.zaman.com.tr/roportaj.rss'),
( u'Spor', u'http://www.zaman.com.tr/spor.rss'),
( u'Kürsü', u'http://www.zaman.com.tr/kursu.rss'),
( u'Kültür Sanat', u'http://www.zaman.com.tr/kultursanat.rss'),
( u'Televizyon', u'http://www.zaman.com.tr/televizyon.rss'),
( u'Manşet', u'http://www.zaman.com.tr/manset.rss'),
]

View File

@ -20,7 +20,7 @@
<script type="text/javascript"
src="{prefix}/static/jquery.multiselect.min.js"></script>
<script type="text/javascript" src="{prefix}/static/stacktrace.js"></script>
<script type="text/javascript" src="{prefix}/static/browse/browse.js"></script>
<script type="text/javascript">

View File

@ -129,7 +129,13 @@ function toplevel() {
// }}}
function render_error(msg) {
return '<div class="ui-widget"><div class="ui-state-error ui-corner-all" style="padding: 0pt 0.7em"><p><span class="ui-icon ui-icon-alert" style="float: left; margin-right: 0.3em">&nbsp;</span><strong>Error: </strong>'+msg+"</p></div></div>"
var st = "";
try {
var st = printStackTrace();
st = st.join('\n\n');
} catch(e) {
}
return '<div class="ui-widget"><div class="ui-state-error ui-corner-all" style="padding: 0pt 0.7em"><p><span class="ui-icon ui-icon-alert" style="float: left; margin-right: 0.3em">&nbsp;</span><strong>Error: </strong>'+msg+"<pre>"+st+"</pre></p></div></div>"
}
// Category feed {{{

View File

@ -1,5 +1,5 @@
Monocle = {
VERSION: "1.0.0"
VERSION: "2.0.0"
};
@ -170,7 +170,8 @@ Monocle.Browser.has.iframeTouchBug = Monocle.Browser.iOSVersionBelow("4.2");
Monocle.Browser.has.selectThruBug = Monocle.Browser.iOSVersionBelow("4.2");
Monocle.Browser.has.mustScrollSheaf = Monocle.Browser.is.MobileSafari;
Monocle.Browser.has.iframeDoubleWidthBug = Monocle.Browser.has.mustScrollSheaf;
Monocle.Browser.has.iframeDoubleWidthBug =
Monocle.Browser.has.mustScrollSheaf || Monocle.Browser.on.Kindle3;
Monocle.Browser.has.floatColumnBug = Monocle.Browser.is.WebKit;
@ -181,6 +182,11 @@ Monocle.Browser.has.jumpFlickerBug =
Monocle.Browser.on.MacOSX && Monocle.Browser.is.WebKit;
Monocle.Browser.has.columnOverflowPaintBug = Monocle.Browser.is.WebKit &&
!Monocle.Browser.is.MobileSafari &&
navigator.userAgent.indexOf("AppleWebKit/534") > 0;
if (typeof window.console == "undefined") {
window.console = {
messages: [],
@ -241,6 +247,7 @@ Monocle.Factory = function (element, label, index, reader) {
function initialize() {
if (!p.label) { return; }
var node = p.reader.properties.graph;
node[p.label] = node[p.label] || [];
if (typeof p.index == 'undefined' && node[p.label][p.index]) {
@ -274,7 +281,11 @@ Monocle.Factory = function (element, label, index, reader) {
function make(tagName, oLabel, index_or_options, or_options) {
var oIndex, options;
if (arguments.length == 2) {
if (arguments.length == 1) {
oLabel = null,
oIndex = 0;
options = {};
} else if (arguments.length == 2) {
oIndex = 0;
options = {};
} else if (arguments.length == 4) {
@ -376,6 +387,22 @@ Monocle.pieceLoaded('factory');
Monocle.Events = {}
Monocle.Events.dispatch = function (elem, evtType, data, cancelable) {
if (!document.createEvent) {
return true;
}
var evt = document.createEvent("Events");
evt.initEvent(evtType, false, cancelable || false);
evt.m = data;
try {
return elem.dispatchEvent(evt);
} catch(e) {
console.warn("Failed to dispatch event: "+evtType);
return false;
}
}
Monocle.Events.listen = function (elem, evtType, fn, useCapture) {
if (elem.addEventListener) {
return elem.addEventListener(evtType, fn, useCapture || false);
@ -405,7 +432,7 @@ Monocle.Events.listenForContact = function (elem, fns, options) {
pageY: ci.pageY
};
var target = evt.target || window.srcElement;
var target = evt.target || evt.srcElement;
while (target.nodeType != 1 && target.parentNode) {
target = target.parentNode;
}
@ -527,13 +554,18 @@ Monocle.Events.deafenForContact = function (elem, listeners) {
}
Monocle.Events.listenForTap = function (elem, fn) {
Monocle.Events.listenForTap = function (elem, fn, activeClass) {
var startPos;
if (Monocle.Browser.on.Kindle3) {
Monocle.Events.listen(elem, 'click', function () {});
}
var annul = function () {
startPos = null;
if (activeClass && elem.dom) { elem.dom.removeClass(activeClass); }
}
var annulIfOutOfBounds = function (evt) {
if (evt.type.match(/^mouse/)) {
return;
@ -545,7 +577,7 @@ Monocle.Events.listenForTap = function (elem, fn) {
evt.m.registrantX < 0 || evt.m.registrantX > elem.offsetWidth ||
evt.m.registrantY < 0 || evt.m.registrantY > elem.offsetHeight
) {
startPos = null;
annul();
} else {
evt.preventDefault();
}
@ -557,6 +589,7 @@ Monocle.Events.listenForTap = function (elem, fn) {
start: function (evt) {
startPos = [evt.m.pageX, evt.m.pageY];
evt.preventDefault();
if (activeClass && elem.dom) { elem.dom.addClass(activeClass); }
},
move: annulIfOutOfBounds,
end: function (evt) {
@ -565,10 +598,9 @@ Monocle.Events.listenForTap = function (elem, fn) {
evt.m.startOffset = startPos;
fn(evt);
}
annul();
},
cancel: function (evt) {
startPos = null;
}
cancel: annul
},
{
useCapture: false
@ -997,6 +1029,9 @@ Monocle.Reader = function (node, bookData, options, onLoadCallback) {
createReaderElements();
p.defaultStyles = addPageStyles(k.DEFAULT_STYLE_RULES, false);
if (options.stylesheet) {
p.initialStyles = addPageStyles(options.stylesheet, false);
}
primeFrames(options.primeURL, function () {
applyStyles();
@ -1077,6 +1112,7 @@ Monocle.Reader = function (node, bookData, options, onLoadCallback) {
if (Monocle.Browser.is.WebKit) {
frame.contentDocument.documentElement.style.overflow = "hidden";
}
dispatchEvent('monocle:frameprimed', { frame: frame, pageIndex: pageCount });
if ((pageCount += 1) == pageMax) {
Monocle.defer(callback);
}
@ -1131,6 +1167,7 @@ Monocle.Reader = function (node, bookData, options, onLoadCallback) {
var pageCount = 0;
if (typeof callback == 'function') {
var watcher = function (evt) {
dispatchEvent('monocle:firstcomponentchange', evt.m);
if ((pageCount += 1) == p.flipper.pageCount) {
deafen('monocle:componentchange', watcher);
callback();
@ -1239,7 +1276,7 @@ Monocle.Reader = function (node, bookData, options, onLoadCallback) {
page.appendChild(runner);
ctrlData.elements.push(runner);
}
} else if (cType == "modal" || cType == "popover") {
} else if (cType == "modal" || cType == "popover" || cType == "hud") {
ctrlElem = ctrl.createControlElements(overlay);
overlay.appendChild(ctrlElem);
ctrlData.elements.push(ctrlElem);
@ -1312,24 +1349,33 @@ Monocle.Reader = function (node, bookData, options, onLoadCallback) {
var controlData = dataForControl(ctrl);
if (!controlData) {
console.warn("No data for control: " + ctrl);
return;
return false;
}
if (controlData.hidden == false) {
return;
if (showingControl(ctrl)) {
return false;
}
var overlay = dom.find('overlay');
if (controlData.usesOverlay && controlData.controlType != "hud") {
for (var i = 0, ii = p.controls.length; i < ii; ++i) {
if (p.controls[i].usesOverlay && !p.controls[i].hidden) {
return false;
}
}
overlay.style.display = "block";
}
for (var i = 0; i < controlData.elements.length; ++i) {
controlData.elements[i].style.display = "block";
}
var overlay = dom.find('overlay');
if (controlData.usesOverlay) {
overlay.style.display = "block";
}
if (controlData.controlType == "popover") {
overlay.listeners = Monocle.Events.listenForContact(
overlay,
{
start: function (evt) {
obj = evt.target || window.event.srcElement;
var obj = evt.target || window.event.srcElement;
do {
if (obj == controlData.elements[0]) { return true; }
} while (obj && (obj = obj.parentNode));
@ -1346,22 +1392,18 @@ Monocle.Reader = function (node, bookData, options, onLoadCallback) {
ctrl.properties.hidden = false;
}
dispatchEvent('controlshow', ctrl, false);
return true;
}
function showingControl(ctrl) {
var controlData = dataForControl(ctrl);
return controlData.hidden == false;
}
function dispatchEvent(evtType, data, cancelable) {
if (!document.createEvent) {
return true;
}
var evt = document.createEvent("Events");
evt.initEvent(evtType, false, cancelable || false);
evt.m = data;
try {
return dom.find('box').dispatchEvent(evt);
} catch(e) {
console.warn("Failed to dispatch event: " + evtType);
return false;
}
return Monocle.Events.dispatch(dom.find('box'), evtType, data, cancelable);
}
@ -1502,6 +1544,7 @@ Monocle.Reader = function (node, bookData, options, onLoadCallback) {
API.addControl = addControl;
API.hideControl = hideControl;
API.showControl = showControl;
API.showingControl = showingControl;
API.dispatchEvent = dispatchEvent;
API.listen = listen;
API.deafen = deafen;
@ -1527,22 +1570,32 @@ Monocle.Reader.DEFAULT_CLASS_PREFIX = 'monelem_'
Monocle.Reader.FLIPPER_DEFAULT_CLASS = "Slider";
Monocle.Reader.FLIPPER_LEGACY_CLASS = "Legacy";
Monocle.Reader.DEFAULT_STYLE_RULES = [
"html * {" +
"html#RS\\:monocle * {" +
"-webkit-font-smoothing: subpixel-antialiased;" +
"text-rendering: auto !important;" +
"word-wrap: break-word !important;" +
"overflow: visible !important;" +
(Monocle.Browser.has.floatColumnBug ? "float: none !important;" : "") +
"}" +
"body {" +
"}",
"html#RS\\:monocle body {" +
"margin: 0 !important;" +
"padding: 0 !important;" +
"-webkit-text-size-adjust: none;" +
"}" +
"table, img {" +
"}",
"html#RS\\:monocle body * {" +
"max-width: 100% !important;" +
"max-height: 90% !important;" +
"}",
"html#RS\\:monocle img, html#RS\\:monocle video, html#RS\\:monocle object {" +
"max-height: 95% !important;" +
"}"
]
if (Monocle.Browser.has.columnOverflowPaintBug) {
Monocle.Reader.DEFAULT_STYLE_RULES.push(
"::-webkit-scrollbar { width: 0; height: 0; }"
)
}
Monocle.pieceLoaded('reader');
/* BOOK */
@ -1586,6 +1639,16 @@ Monocle.Book = function (dataSource) {
locus.load = true;
locus.componentId = p.componentIds[0];
return locus;
} else if (
cIndex < 0 &&
locus.componentId &&
currComponent.properties.id != locus.componentId
) {
pageDiv.m.reader.dispatchEvent(
"monocle:notfound",
{ href: locus.componentId }
);
return null;
} else if (cIndex < 0) {
component = currComponent;
locus.componentId = pageDiv.m.activeFrame.m.component.properties.id;
@ -1619,6 +1682,8 @@ Monocle.Book = function (dataSource) {
result.page += locus.direction;
} else if (typeof(locus.anchor) == "string") {
result.page = component.pageForChapter(locus.anchor, pageDiv);
} else if (typeof(locus.xpath) == "string") {
result.page = component.pageForXPath(locus.xpath, pageDiv);
} else if (typeof(locus.position) == "string") {
if (locus.position == "start") {
result.page = 1;
@ -1638,6 +1703,7 @@ Monocle.Book = function (dataSource) {
if (result.page < 1) {
if (cIndex == 0) {
result.page = 1;
result.boundarystart = true;
} else {
result.load = true;
result.componentId = p.componentIds[cIndex - 1];
@ -1647,6 +1713,7 @@ Monocle.Book = function (dataSource) {
} else if (result.page > lastPageNum['new']) {
if (cIndex == p.lastCIndex) {
result.page = lastPageNum['new'];
result.boundaryend = true;
} else {
result.load = true;
result.componentId = p.componentIds[cIndex + 1];
@ -1660,7 +1727,13 @@ Monocle.Book = function (dataSource) {
function setPageAt(pageDiv, locus) {
locus = pageNumberAt(pageDiv, locus);
if (!locus.load) {
if (locus && !locus.load) {
var evtData = { locus: locus, page: pageDiv }
if (locus.boundarystart) {
pageDiv.m.reader.dispatchEvent('monocle:boundarystart', evtData);
} else if (locus.boundaryend) {
pageDiv.m.reader.dispatchEvent('monocle:boundaryend', evtData);
} else {
var component = p.components[p.componentIds.indexOf(locus.componentId)];
pageDiv.m.place = pageDiv.m.place || new Monocle.Place();
pageDiv.m.place.setPlace(component, locus.page);
@ -1673,6 +1746,7 @@ Monocle.Book = function (dataSource) {
}
pageDiv.m.reader.dispatchEvent("monocle:pagechange", evtData);
}
}
return locus;
}
@ -1683,6 +1757,10 @@ Monocle.Book = function (dataSource) {
locus = pageNumberAt(pageDiv, locus);
}
if (!locus) {
return;
}
if (!locus.load) {
callback(locus);
return;
@ -1690,7 +1768,9 @@ Monocle.Book = function (dataSource) {
var findPageNumber = function () {
locus = setPageAt(pageDiv, locus);
if (locus.load) {
if (!locus) {
return;
} else if (locus.load) {
loadPageAt(pageDiv, locus, callback, progressCallback)
} else {
callback(locus);
@ -1715,10 +1795,12 @@ Monocle.Book = function (dataSource) {
}
function setOrLoadPageAt(pageDiv, locus, callback, progressCallback) {
function setOrLoadPageAt(pageDiv, locus, callback, onProgress, onFail) {
locus = setPageAt(pageDiv, locus);
if (locus.load) {
loadPageAt(pageDiv, locus, callback, progressCallback);
if (!locus) {
if (onFail) { onFail(); }
} else if (locus.load) {
loadPageAt(pageDiv, locus, callback, onProgress);
} else {
callback(locus);
}
@ -1864,13 +1946,18 @@ Monocle.Place = function () {
}
function percentageThrough() {
function percentAtTopOfPage() {
return p.percent - 1.0 / p.component.lastPageNumber();
}
function percentAtBottomOfPage() {
return p.percent;
}
function pageAtPercentageThrough(pc) {
return Math.max(Math.round(p.component.lastPageNumber() * pc), 1);
function pageAtPercentageThrough(percent) {
return Math.max(Math.round(p.component.lastPageNumber() * percent), 1);
}
@ -1911,6 +1998,8 @@ Monocle.Place = function () {
}
if (options.direction) {
locus.page += options.direction;
} else {
locus.percent = percentAtBottomOfPage();
}
return locus;
}
@ -1942,7 +2031,9 @@ Monocle.Place = function () {
API.setPlace = setPlace;
API.setPercentageThrough = setPercentageThrough;
API.componentId = componentId;
API.percentageThrough = percentageThrough;
API.percentAtTopOfPage = percentAtTopOfPage;
API.percentAtBottomOfPage = percentAtBottomOfPage;
API.percentageThrough = percentAtBottomOfPage;
API.pageAtPercentageThrough = pageAtPercentageThrough;
API.pageNumber = pageNumber;
API.chapterInfo = chapterInfo;
@ -2158,11 +2249,13 @@ Monocle.Component = function (book, id, index, chapters, source) {
if (p.chapters[0] && typeof p.chapters[0].percent == "number") {
return;
}
var doc = pageDiv.m.activeFrame.contentDocument;
for (var i = 0; i < p.chapters.length; ++i) {
var chp = p.chapters[i];
chp.percent = 0;
if (chp.fragment) {
chp.percent = pageDiv.m.dimensions.percentageThroughOfId(chp.fragment);
var node = doc.getElementById(chp.fragment);
chp.percent = pageDiv.m.dimensions.percentageThroughOfNode(node);
}
}
return p.chapters;
@ -2187,14 +2280,37 @@ Monocle.Component = function (book, id, index, chapters, source) {
if (!fragment) {
return 1;
}
var pc2pn = function (pc) { return Math.floor(pc * p.pageLength) + 1 }
for (var i = 0; i < p.chapters.length; ++i) {
if (p.chapters[i].fragment == fragment) {
return pc2pn(p.chapters[i].percent);
return percentToPageNumber(p.chapters[i].percent);
}
}
var percent = pageDiv.m.dimensions.percentageThroughOfId(fragment);
return pc2pn(percent);
var doc = pageDiv.m.activeFrame.contentDocument;
var node = doc.getElementById(fragment);
var percent = pageDiv.m.dimensions.percentageThroughOfNode(node);
return percentToPageNumber(percent);
}
function pageForXPath(xpath, pageDiv) {
var doc = pageDiv.m.activeFrame.contentDocument;
var percent = 0;
if (typeof doc.evaluate == "function") {
var node = doc.evaluate(
xpath,
doc,
null,
9,
null
).singleNodeValue;
var percent = pageDiv.m.dimensions.percentageThroughOfNode(node);
}
return percentToPageNumber(percent);
}
function percentToPageNumber(pc) {
return Math.floor(pc * p.pageLength) + 1;
}
@ -2207,6 +2323,7 @@ Monocle.Component = function (book, id, index, chapters, source) {
API.updateDimensions = updateDimensions;
API.chapterForPage = chapterForPage;
API.pageForChapter = pageForChapter;
API.pageForXPath = pageForXPath;
API.lastPageNumber = lastPageNumber;
return API;
@ -2415,9 +2532,11 @@ Monocle.Dimensions.Vert = function (pageDiv) {
}
function percentageThroughOfId(id) {
function percentageThroughOfNode(target) {
if (!target) {
return 0;
}
var doc = p.page.m.activeFrame.contentDocument;
var target = doc.getElementById(id);
var offset = 0;
if (target.getBoundingClientRect) {
offset = target.getBoundingClientRect().top;
@ -2456,7 +2575,7 @@ Monocle.Dimensions.Vert = function (pageDiv) {
API.hasChanged = hasChanged;
API.measure = measure;
API.pages = pages;
API.percentageThroughOfId = percentageThroughOfId;
API.percentageThroughOfNode = percentageThroughOfNode;
API.locusToOffset = locusToOffset;
initialize();
@ -2713,8 +2832,7 @@ Monocle.Dimensions.Columns = function (pageDiv) {
(!p.measurements) ||
(p.measurements.width != newMeasurements.width) ||
(p.measurements.height != newMeasurements.height) ||
(p.measurements.scrollWidth != newMeasurements.scrollWidth) ||
(p.measurements.fontSize != newMeasurements.fontSize)
(p.measurements.scrollWidth != newMeasurements.scrollWidth)
);
}
@ -2736,12 +2854,18 @@ Monocle.Dimensions.Columns = function (pageDiv) {
if (!lc || !lc.getBoundingClientRect) {
console.warn('Empty document for page['+p.page.m.pageIndex+']');
p.measurements.scrollWidth = p.measurements.width;
} else if (lc.getBoundingClientRect().bottom > p.measurements.height) {
} else {
var bcr = lc.getBoundingClientRect();
if (
bcr.right > p.measurements.width ||
bcr.bottom > p.measurements.height
) {
p.measurements.scrollWidth = p.measurements.width * 2;
} else {
p.measurements.scrollWidth = p.measurements.width;
}
}
}
p.length = Math.ceil(p.measurements.scrollWidth / p.measurements.width);
p.dirty = false;
@ -2758,12 +2882,11 @@ Monocle.Dimensions.Columns = function (pageDiv) {
}
function percentageThroughOfId(id) {
var doc = p.page.m.activeFrame.contentDocument;
var target = doc.getElementById(id);
function percentageThroughOfNode(target) {
if (!target) {
return 0;
}
var doc = p.page.m.activeFrame.contentDocument;
var offset = 0;
if (target.getBoundingClientRect) {
offset = target.getBoundingClientRect().left;
@ -2785,20 +2908,30 @@ Monocle.Dimensions.Columns = function (pageDiv) {
function componentChanged(evt) {
if (evt.m['page'] != p.page) { return; }
var doc = evt.m['document'];
if (Monocle.Browser.has.columnOverflowPaintBug) {
var div = doc.createElement('div');
Monocle.Styles.applyRules(div, k.BODY_STYLES);
div.style.cssText += "overflow: scroll !important;";
while (doc.body.childNodes.length) {
div.appendChild(doc.body.firstChild);
}
doc.body.appendChild(div);
} else {
Monocle.Styles.applyRules(doc.body, k.BODY_STYLES);
if (Monocle.Browser.is.WebKit) {
doc.documentElement.style.overflow = 'hidden';
}
}
p.dirty = true;
}
function setColumnWidth() {
var cw = p.page.m.sheafDiv.clientWidth;
var doc = p.page.m.activeFrame.contentDocument;
if (currBodyStyleValue('column-width') != cw+"px") {
Monocle.Styles.affix(doc.body, 'column-width', cw+"px");
Monocle.Styles.affix(columnedElement(), 'column-width', cw+"px");
p.dirty = true;
}
}
@ -2809,8 +2942,7 @@ Monocle.Dimensions.Columns = function (pageDiv) {
return {
width: sheaf.clientWidth,
height: sheaf.clientHeight,
scrollWidth: scrollerWidth(),
fontSize: currBodyStyleValue('font-size')
scrollWidth: scrollerWidth()
}
}
@ -2819,16 +2951,24 @@ Monocle.Dimensions.Columns = function (pageDiv) {
if (Monocle.Browser.has.mustScrollSheaf) {
return p.page.m.sheafDiv;
} else {
return p.page.m.activeFrame.contentDocument.body;
return columnedElement();
}
}
function columnedElement() {
var elem = p.page.m.activeFrame.contentDocument.body;
return Monocle.Browser.has.columnOverflowPaintBug ? elem.firstChild : elem;
}
function scrollerWidth() {
var bdy = p.page.m.activeFrame.contentDocument.body;
if (Monocle.Browser.has.iframeDoubleWidthBug) {
if (Monocle.Browser.on.Android) {
return bdy.scrollWidth * 1.5; // I actually have no idea why 1.5.
if (Monocle.Browser.on.Kindle3) {
return scrollerElement().scrollWidth;
} else if (Monocle.Browser.on.Android) {
return bdy.scrollWidth;
} else if (Monocle.Browser.iOSVersion < "4.1") {
var hbw = bdy.scrollWidth / 2;
var sew = scrollerElement().scrollWidth;
@ -2838,15 +2978,18 @@ Monocle.Dimensions.Columns = function (pageDiv) {
var hbw = bdy.scrollWidth / 2;
return hbw;
}
} else if (Monocle.Browser.is.Gecko) {
var lc = bdy.lastChild;
while (lc && lc.nodeType != 1) {
lc = lc.previousSibling;
}
if (lc && lc.getBoundingClientRect) {
return lc.getBoundingClientRect().right;
} else if (bdy.getBoundingClientRect) {
var elems = bdy.getElementsByTagName('*');
var bdyRect = bdy.getBoundingClientRect();
var l = bdyRect.left, r = bdyRect.right;
for (var i = elems.length - 1; i >= 0; --i) {
var rect = elems[i].getBoundingClientRect();
l = Math.min(l, rect.left);
r = Math.max(r, rect.right);
}
return Math.abs(l) + Math.abs(r);
}
return scrollerElement().scrollWidth;
}
@ -2867,8 +3010,14 @@ Monocle.Dimensions.Columns = function (pageDiv) {
function translateToLocus(locus) {
var offset = locusToOffset(locus);
p.page.m.offset = 0 - offset;
if (k.SETX && !Monocle.Browser.has.columnOverflowPaintBug) {
var bdy = p.page.m.activeFrame.contentDocument.body;
Monocle.Styles.affix(bdy, "transform", "translateX("+offset+"px)");
} else {
var scrElem = scrollerElement();
scrElem.scrollLeft = 0 - offset;
}
return offset;
}
@ -2876,7 +3025,7 @@ Monocle.Dimensions.Columns = function (pageDiv) {
API.hasChanged = hasChanged;
API.measure = measure;
API.pages = pages;
API.percentageThroughOfId = percentageThroughOfId;
API.percentageThroughOfNode = percentageThroughOfNode;
API.locusToOffset = locusToOffset;
API.translateToLocus = translateToLocus;
@ -2898,6 +3047,8 @@ Monocle.Dimensions.Columns.BODY_STYLES = {
"column-fill": "auto"
}
Monocle.Dimensions.Columns.SETX = true; // Set to false for scrollLeft.
if (Monocle.Browser.has.iframeDoubleWidthBug) {
Monocle.Dimensions.Columns.BODY_STYLES["min-width"] = "200%";
} else {
@ -2924,6 +3075,8 @@ Monocle.Flippers.Slider = function (reader) {
function addPage(pageDiv) {
pageDiv.m.dimensions = new Monocle.Dimensions.Columns(pageDiv);
Monocle.Styles.setX(pageDiv, "0px");
}
@ -2963,6 +3116,7 @@ Monocle.Flippers.Slider = function (reader) {
function interactiveMode(bState) {
p.reader.dispatchEvent('monocle:interactive:'+(bState ? 'on' : 'off'));
if (!Monocle.Browser.has.selectThruBug) {
return;
}
@ -2994,10 +3148,10 @@ Monocle.Flippers.Slider = function (reader) {
function moveTo(locus, callback) {
var fn = function () {
prepareNextPage(announceTurn);
if (typeof callback == "function") {
callback();
}
prepareNextPage(function () {
if (typeof callback == "function") { callback(); }
announceTurn();
});
}
setPage(upperPage(), locus, fn);
}
@ -3045,12 +3199,26 @@ Monocle.Flippers.Slider = function (reader) {
if (dir == k.FORWARDS) {
if (getPlace().onLastPageOfBook()) {
p.reader.dispatchEvent(
'monocle:boundaryend',
{
locus: getPlace().getLocus({ direction : dir }),
page: upperPage()
}
);
resetTurnData();
return;
}
onGoingForward(boxPointX);
} else if (dir == k.BACKWARDS) {
if (getPlace().onFirstPageOfBook()) {
p.reader.dispatchEvent(
'monocle:boundarystart',
{
locus: getPlace().getLocus({ direction : dir }),
page: upperPage()
}
);
resetTurnData();
return;
}
@ -3215,14 +3383,14 @@ Monocle.Flippers.Slider = function (reader) {
function announceTurn() {
hideWaitControl(upperPage());
hideWaitControl(lowerPage());
p.reader.dispatchEvent('monocle:turn');
resetTurnData();
}
function resetTurnData() {
hideWaitControl(upperPage());
hideWaitControl(lowerPage());
p.turnData = {};
}
@ -3268,7 +3436,7 @@ Monocle.Flippers.Slider = function (reader) {
(new Date()).getTime() - stamp > duration ||
Math.abs(currX - finalX) <= Math.abs((currX + step) - finalX)
) {
clearTimeout(elem.setXTransitionInterval)
clearTimeout(elem.setXTransitionInterval);
Monocle.Styles.setX(elem, finalX);
if (elem.setXTCB) {
elem.setXTCB();
@ -3366,13 +3534,17 @@ Monocle.Flippers.Slider = function (reader) {
function jumpIn(pageDiv, callback) {
var dur = Monocle.Browser.has.jumpFlickerBug ? 1 : 0;
Monocle.defer(function () {
setX(pageDiv, 0, { duration: dur }, callback);
});
}
function jumpOut(pageDiv, callback) {
var dur = Monocle.Browser.has.jumpFlickerBug ? 1 : 0;
Monocle.defer(function () {
setX(pageDiv, 0 - pageDiv.offsetWidth, { duration: dur }, callback);
});
}
@ -3382,7 +3554,9 @@ Monocle.Flippers.Slider = function (reader) {
duration: k.durations.SLIDE,
timing: 'ease-in'
};
Monocle.defer(function () {
setX(upperPage(), 0, slideOpts, callback);
});
}
@ -3391,7 +3565,9 @@ Monocle.Flippers.Slider = function (reader) {
duration: k.durations.SLIDE,
timing: 'ease-in'
};
Monocle.defer(function () {
setX(upperPage(), 0 - upperPage().offsetWidth, slideOpts, callback);
});
}
@ -3418,13 +3594,13 @@ Monocle.Flippers.Slider = function (reader) {
function showWaitControl(page) {
var ctrl = p.reader.dom.find('flippers_slider_wait', page.m.pageIndex);
ctrl.style.opacity = 0.5;
ctrl.style.visibility = "visible";
}
function hideWaitControl(page) {
var ctrl = p.reader.dom.find('flippers_slider_wait', page.m.pageIndex);
ctrl.style.opacity = 0;
ctrl.style.visibility = "hidden";
}
API.pageCount = p.pageCount;

View File

@ -0,0 +1,371 @@
// Domain Public by Eric Wendelin http://eriwen.com/ (2008)
// Luke Smith http://lucassmith.name/ (2008)
// Loic Dachary <loic@dachary.org> (2008)
// Johan Euphrosine <proppy@aminche.com> (2008)
// Oyvind Sean Kinsey http://kinsey.no/blog (2010)
// Victor Homyakov <victor-homyakov@users.sourceforge.net> (2010)
//
// Information and discussions
// http://jspoker.pokersource.info/skin/test-printstacktrace.html
// http://eriwen.com/javascript/js-stack-trace/
// http://eriwen.com/javascript/stacktrace-update/
// http://pastie.org/253058
//
// guessFunctionNameFromLines comes from firebug
//
// Software License Agreement (BSD License)
//
// Copyright (c) 2007, Parakey Inc.
// All rights reserved.
//
// Redistribution and use of this software in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above
// copyright notice, this list of conditions and the
// following disclaimer.
//
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the
// following disclaimer in the documentation and/or other
// materials provided with the distribution.
//
// * Neither the name of Parakey Inc. nor the names of its
// contributors may be used to endorse or promote products
// derived from this software without specific prior
// written permission of Parakey Inc.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
// FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
// IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
/**
* Main function giving a function stack trace with a forced or passed in Error
*
* @cfg {Error} e The error to create a stacktrace from (optional)
* @cfg {Boolean} guess If we should try to resolve the names of anonymous functions
* @return {Array} of Strings with functions, lines, files, and arguments where possible
*/
function printStackTrace(options) {
options = options || {guess: true};
var ex = options.e || null, guess = !!options.guess;
var p = new printStackTrace.implementation(), result = p.run(ex);
return (guess) ? p.guessAnonymousFunctions(result) : result;
}
printStackTrace.implementation = function() {
};
printStackTrace.implementation.prototype = {
run: function(ex) {
ex = ex || this.createException();
// Do not use the stored mode: different exceptions in Chrome
// may or may not have arguments or stack
var mode = this.mode(ex);
// Use either the stored mode, or resolve it
//var mode = this._mode || this.mode(ex);
if (mode === 'other') {
return this.other(arguments.callee);
} else {
return this[mode](ex);
}
},
createException: function() {
try {
this.undef();
return null;
} catch (e) {
return e;
}
},
/**
* @return {String} mode of operation for the environment in question.
*/
mode: function(e) {
if (e['arguments'] && e.stack) {
return (this._mode = 'chrome');
} else if (e.message && typeof window !== 'undefined' && window.opera) {
return (this._mode = e.stacktrace ? 'opera10' : 'opera');
} else if (e.stack) {
return (this._mode = 'firefox');
}
return (this._mode = 'other');
},
/**
* Given a context, function name, and callback function, overwrite it so that it calls
* printStackTrace() first with a callback and then runs the rest of the body.
*
* @param {Object} context of execution (e.g. window)
* @param {String} functionName to instrument
* @param {Function} function to call with a stack trace on invocation
*/
instrumentFunction: function(context, functionName, callback) {
context = context || window;
var original = context[functionName];
context[functionName] = function instrumented() {
callback.call(this, printStackTrace().slice(4));
return context[functionName]._instrumented.apply(this, arguments);
};
context[functionName]._instrumented = original;
},
/**
* Given a context and function name of a function that has been
* instrumented, revert the function to it's original (non-instrumented)
* state.
*
* @param {Object} context of execution (e.g. window)
* @param {String} functionName to de-instrument
*/
deinstrumentFunction: function(context, functionName) {
if (context[functionName].constructor === Function &&
context[functionName]._instrumented &&
context[functionName]._instrumented.constructor === Function) {
context[functionName] = context[functionName]._instrumented;
}
},
/**
* Given an Error object, return a formatted Array based on Chrome's stack string.
*
* @param e - Error object to inspect
* @return Array<String> of function calls, files and line numbers
*/
chrome: function(e) {
//return e.stack.replace(/^[^\(]+?[\n$]/gm, '').replace(/^\s+at\s+/gm, '').replace(/^Object.<anonymous>\s*\(/gm, '{anonymous}()@').split('\n');
return e.stack.replace(/^\S[^\(]+?[\n$]/gm, '').
replace(/^\s+at\s+/gm, '').
replace(/^([^\(]+?)([\n$])/gm, '{anonymous}()@$1$2').
replace(/^Object.<anonymous>\s*\(([^\)]+)\)/gm, '{anonymous}()@$1').split('\n');
},
/**
* Given an Error object, return a formatted Array based on Firefox's stack string.
*
* @param e - Error object to inspect
* @return Array<String> of function calls, files and line numbers
*/
firefox: function(e) {
return e.stack.replace(/(?:\n@:0)?\s+$/m, '').replace(/^\(/gm, '{anonymous}(').split('\n');
},
/**
* Given an Error object, return a formatted Array based on Opera 10's stacktrace string.
*
* @param e - Error object to inspect
* @return Array<String> of function calls, files and line numbers
*/
opera10: function(e) {
var stack = e.stacktrace;
var lines = stack.split('\n'), ANON = '{anonymous}', lineRE = /.*line (\d+), column (\d+) in ((<anonymous function\:?\s*(\S+))|([^\(]+)\([^\)]*\))(?: in )?(.*)\s*$/i, i, j, len;
for (i = 2, j = 0, len = lines.length; i < len - 2; i++) {
if (lineRE.test(lines[i])) {
var location = RegExp.$6 + ':' + RegExp.$1 + ':' + RegExp.$2;
var fnName = RegExp.$3;
fnName = fnName.replace(/<anonymous function\:?\s?(\S+)?>/g, ANON);
lines[j++] = fnName + '@' + location;
}
}
lines.splice(j, lines.length - j);
return lines;
},
// Opera 7.x-9.x only!
opera: function(e) {
var lines = e.message.split('\n'), ANON = '{anonymous}', lineRE = /Line\s+(\d+).*script\s+(http\S+)(?:.*in\s+function\s+(\S+))?/i, i, j, len;
for (i = 4, j = 0, len = lines.length; i < len; i += 2) {
//TODO: RegExp.exec() would probably be cleaner here
if (lineRE.test(lines[i])) {
lines[j++] = (RegExp.$3 ? RegExp.$3 + '()@' + RegExp.$2 + RegExp.$1 : ANON + '()@' + RegExp.$2 + ':' + RegExp.$1) + ' -- ' + lines[i + 1].replace(/^\s+/, '');
}
}
lines.splice(j, lines.length - j);
return lines;
},
// Safari, IE, and others
other: function(curr) {
var ANON = '{anonymous}', fnRE = /function\s*([\w\-$]+)?\s*\(/i, stack = [], fn, args, maxStackSize = 10;
while (curr && stack.length < maxStackSize) {
fn = fnRE.test(curr.toString()) ? RegExp.$1 || ANON : ANON;
args = Array.prototype.slice.call(curr['arguments'] || []);
stack[stack.length] = fn + '(' + this.stringifyArguments(args) + ')';
curr = curr.caller;
}
return stack;
},
/**
* Given arguments array as a String, subsituting type names for non-string types.
*
* @param {Arguments} object
* @return {Array} of Strings with stringified arguments
*/
stringifyArguments: function(args) {
var slice = Array.prototype.slice;
for (var i = 0; i < args.length; ++i) {
var arg = args[i];
if (arg === undefined) {
args[i] = 'undefined';
} else if (arg === null) {
args[i] = 'null';
} else if (arg.constructor) {
if (arg.constructor === Array) {
if (arg.length < 3) {
args[i] = '[' + this.stringifyArguments(arg) + ']';
} else {
args[i] = '[' + this.stringifyArguments(slice.call(arg, 0, 1)) + '...' + this.stringifyArguments(slice.call(arg, -1)) + ']';
}
} else if (arg.constructor === Object) {
args[i] = '#object';
} else if (arg.constructor === Function) {
args[i] = '#function';
} else if (arg.constructor === String) {
args[i] = '"' + arg + '"';
}
}
}
return args.join(',');
},
sourceCache: {},
/**
* @return the text from a given URL.
*/
ajax: function(url) {
var req = this.createXMLHTTPObject();
if (!req) {
return;
}
req.open('GET', url, false);
req.setRequestHeader('User-Agent', 'XMLHTTP/1.0');
req.send('');
return req.responseText;
},
/**
* Try XHR methods in order and store XHR factory.
*
* @return <Function> XHR function or equivalent
*/
createXMLHTTPObject: function() {
var xmlhttp, XMLHttpFactories = [
function() {
return new XMLHttpRequest();
}, function() {
return new ActiveXObject('Msxml2.XMLHTTP');
}, function() {
return new ActiveXObject('Msxml3.XMLHTTP');
}, function() {
return new ActiveXObject('Microsoft.XMLHTTP');
}
];
for (var i = 0; i < XMLHttpFactories.length; i++) {
try {
xmlhttp = XMLHttpFactories[i]();
// Use memoization to cache the factory
this.createXMLHTTPObject = XMLHttpFactories[i];
return xmlhttp;
} catch (e) {
}
}
},
/**
* Given a URL, check if it is in the same domain (so we can get the source
* via Ajax).
*
* @param url <String> source url
* @return False if we need a cross-domain request
*/
isSameDomain: function(url) {
return url.indexOf(location.hostname) !== -1;
},
/**
* Get source code from given URL if in the same domain.
*
* @param url <String> JS source URL
* @return <Array> Array of source code lines
*/
getSource: function(url) {
if (!(url in this.sourceCache)) {
this.sourceCache[url] = this.ajax(url).split('\n');
}
return this.sourceCache[url];
},
guessAnonymousFunctions: function(stack) {
for (var i = 0; i < stack.length; ++i) {
var reStack = /\{anonymous\}\(.*\)@(\w+:\/\/([\-\w\.]+)+(:\d+)?[^:]+):(\d+):?(\d+)?/;
var frame = stack[i], m = reStack.exec(frame);
if (m) {
var file = m[1], lineno = m[4], charno = m[7] || 0; //m[7] is character position in Chrome
if (file && this.isSameDomain(file) && lineno) {
var functionName = this.guessAnonymousFunction(file, lineno, charno);
stack[i] = frame.replace('{anonymous}', functionName);
}
}
}
return stack;
},
guessAnonymousFunction: function(url, lineNo, charNo) {
var ret;
try {
ret = this.findFunctionName(this.getSource(url), lineNo);
} catch (e) {
ret = 'getSource failed with url: ' + url + ', exception: ' + e.toString();
}
return ret;
},
findFunctionName: function(source, lineNo) {
// FIXME findFunctionName fails for compressed source
// (more than one function on the same line)
// TODO use captured args
// function {name}({args}) m[1]=name m[2]=args
var reFunctionDeclaration = /function\s+([^(]*?)\s*\(([^)]*)\)/;
// {name} = function ({args}) TODO args capture
// /['"]?([0-9A-Za-z_]+)['"]?\s*[:=]\s*function(?:[^(]*)/
var reFunctionExpression = /['"]?([0-9A-Za-z_]+)['"]?\s*[:=]\s*function\b/;
// {name} = eval()
var reFunctionEvaluation = /['"]?([0-9A-Za-z_]+)['"]?\s*[:=]\s*(?:eval|new Function)\b/;
// Walk backwards in the source lines until we find
// the line which matches one of the patterns above
var code = "", line, maxLines = 10, m;
for (var i = 0; i < maxLines; ++i) {
// FIXME lineNo is 1-based, source[] is 0-based
line = source[lineNo - i];
if (line) {
code = line + code;
m = reFunctionExpression.exec(code);
if (m && m[1]) {
return m[1];
}
m = reFunctionDeclaration.exec(code);
if (m && m[1]) {
//return m[1] + "(" + (m[2] || "") + ")";
return m[1];
}
m = reFunctionEvaluation.exec(code);
if (m && m[1]) {
return m[1];
}
}
}
return '(?)';
}
};

View File

@ -292,13 +292,17 @@ maximum_resort_levels = 5
generate_cover_title_font = None
generate_cover_foot_font = None
#: Control behavior of double clicks on the book list
# Behavior of doubleclick on the books list. Choices: open_viewer, do_nothing,
#: Control behavior of the book list
# You can control the behavior of doubleclicks on the books list.
# Choices: open_viewer, do_nothing,
# edit_cell, edit_metadata. Selecting edit_metadata has the side effect of
# disabling editing a field using a single click.
# Default: open_viewer.
# Example: doubleclick_on_library_view = 'do_nothing'
# You can also control whether the book list scrolls horizontal per column or
# per pixel. Default is per column.
doubleclick_on_library_view = 'open_viewer'
horizontal_scrolling_per_column = True
#: Language to use when sorting.
# Setting this tweak will force sorting to use the

Binary file not shown.

Before

Width:  |  Height:  |  Size: 25 KiB

After

Width:  |  Height:  |  Size: 25 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 641 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 9.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 10 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 8.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 13 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 11 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 18 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 14 KiB

View File

@ -1,6 +1,7 @@
CREATE TABLE authors ( id INTEGER PRIMARY KEY,
name TEXT NOT NULL COLLATE NOCASE,
sort TEXT COLLATE NOCASE,
link TEXT NOT NULL DEFAULT "",
UNIQUE(name)
);
CREATE TABLE books ( id INTEGER PRIMARY KEY AUTOINCREMENT,
@ -13,8 +14,10 @@ CREATE TABLE books ( id INTEGER PRIMARY KEY AUTOINCREMENT,
isbn TEXT DEFAULT "" COLLATE NOCASE,
lccn TEXT DEFAULT "" COLLATE NOCASE,
path TEXT NOT NULL DEFAULT "",
flags INTEGER NOT NULL DEFAULT 1
, uuid TEXT, has_cover BOOL DEFAULT 0, last_modified TIMESTAMP NOT NULL DEFAULT "2000-01-01 00:00:00+00:00");
flags INTEGER NOT NULL DEFAULT 1,
uuid TEXT,
has_cover BOOL DEFAULT 0,
last_modified TIMESTAMP NOT NULL DEFAULT "2000-01-01 00:00:00+00:00");
CREATE TABLE books_authors_link ( id INTEGER PRIMARY KEY,
book INTEGER NOT NULL,
author INTEGER NOT NULL,
@ -543,4 +546,4 @@ CREATE TRIGGER series_update_trg
BEGIN
UPDATE series SET sort=NEW.name WHERE id=NEW.id;
END;
pragma user_version=20;
pragma user_version=21;

View File

@ -1,43 +0,0 @@
{
"and": "def evaluate(self, formatter, kwargs, mi, locals, *args):\n i = 0\n while i < len(args):\n if not args[i]:\n return ''\n i += 1\n return '1'\n",
"contains": "def evaluate(self, formatter, kwargs, mi, locals,\n val, test, value_if_present, value_if_not):\n if re.search(test, val, flags=re.I):\n return value_if_present\n else:\n return value_if_not\n",
"divide": "def evaluate(self, formatter, kwargs, mi, locals, x, y):\n x = float(x if x else 0)\n y = float(y if y else 0)\n return unicode(x / y)\n",
"uppercase": "def evaluate(self, formatter, kwargs, mi, locals, val):\n return val.upper()\n",
"strcat": "def evaluate(self, formatter, kwargs, mi, locals, *args):\n i = 0\n res = ''\n for i in range(0, len(args)):\n res += args[i]\n return res\n",
"in_list": "def evaluate(self, formatter, kwargs, mi, locals, val, sep, pat, fv, nfv):\n l = [v.strip() for v in val.split(sep) if v.strip()]\n if l:\n for v in l:\n if re.search(pat, v, flags=re.I):\n return fv\n return nfv\n",
"not": "def evaluate(self, formatter, kwargs, mi, locals, *args):\n i = 0\n while i < len(args):\n if args[i]:\n return '1'\n i += 1\n return ''\n",
"ifempty": "def evaluate(self, formatter, kwargs, mi, locals, val, value_if_empty):\n if val:\n return val\n else:\n return value_if_empty\n",
"booksize": "def evaluate(self, formatter, kwargs, mi, locals):\n if mi.book_size is not None:\n try:\n return str(mi.book_size)\n except:\n pass\n return ''\n",
"select": "def evaluate(self, formatter, kwargs, mi, locals, val, key):\n if not val:\n return ''\n vals = [v.strip() for v in val.split(',')]\n for v in vals:\n if v.startswith(key+':'):\n return v[len(key)+1:]\n return ''\n",
"strcmp": "def evaluate(self, formatter, kwargs, mi, locals, x, y, lt, eq, gt):\n v = strcmp(x, y)\n if v < 0:\n return lt\n if v == 0:\n return eq\n return gt\n",
"first_non_empty": "def evaluate(self, formatter, kwargs, mi, locals, *args):\n i = 0\n while i < len(args):\n if args[i]:\n return args[i]\n i += 1\n return ''\n",
"re": "def evaluate(self, formatter, kwargs, mi, locals, val, pattern, replacement):\n return re.sub(pattern, replacement, val, flags=re.I)\n",
"subtract": "def evaluate(self, formatter, kwargs, mi, locals, x, y):\n x = float(x if x else 0)\n y = float(y if y else 0)\n return unicode(x - y)\n",
"list_item": "def evaluate(self, formatter, kwargs, mi, locals, val, index, sep):\n if not val:\n return ''\n index = int(index)\n val = val.split(sep)\n try:\n return val[index]\n except:\n return ''\n",
"shorten": "def evaluate(self, formatter, kwargs, mi, locals,\n val, leading, center_string, trailing):\n l = max(0, int(leading))\n t = max(0, int(trailing))\n if len(val) > l + len(center_string) + t:\n return val[0:l] + center_string + ('' if t == 0 else val[-t:])\n else:\n return val\n",
"field": "def evaluate(self, formatter, kwargs, mi, locals, name):\n return formatter.get_value(name, [], kwargs)\n",
"add": "def evaluate(self, formatter, kwargs, mi, locals, x, y):\n x = float(x if x else 0)\n y = float(y if y else 0)\n return unicode(x + y)\n",
"lookup": "def evaluate(self, formatter, kwargs, mi, locals, val, *args):\n if len(args) == 2: # here for backwards compatibility\n if val:\n return formatter.vformat('{'+args[0].strip()+'}', [], kwargs)\n else:\n return formatter.vformat('{'+args[1].strip()+'}', [], kwargs)\n if (len(args) % 2) != 1:\n raise ValueError(_('lookup requires either 2 or an odd number of arguments'))\n i = 0\n while i < len(args):\n if i + 1 >= len(args):\n return formatter.vformat('{' + args[i].strip() + '}', [], kwargs)\n if re.search(args[i], val, flags=re.I):\n return formatter.vformat('{'+args[i+1].strip() + '}', [], kwargs)\n i += 2\n",
"template": "def evaluate(self, formatter, kwargs, mi, locals, template):\n template = template.replace('[[', '{').replace(']]', '}')\n return formatter.__class__().safe_format(template, kwargs, 'TEMPLATE', mi)\n",
"print": "def evaluate(self, formatter, kwargs, mi, locals, *args):\n print args\n return None\n",
"merge_lists": "def evaluate(self, formatter, kwargs, mi, locals, list1, list2, separator):\n l1 = [l.strip() for l in list1.split(separator) if l.strip()]\n l2 = [l.strip() for l in list2.split(separator) if l.strip()]\n lcl1 = set([icu_lower(l) for l in l1])\n res = []\n for i in l1:\n res.append(i)\n for i in l2:\n if icu_lower(i) not in lcl1:\n res.append(i)\n return ', '.join(sorted(res, key=sort_key))\n",
"str_in_list": "def evaluate(self, formatter, kwargs, mi, locals, val, sep, str, fv, nfv):\n l = [v.strip() for v in val.split(sep) if v.strip()]\n c = [v.strip() for v in str.split(sep) if v.strip()]\n if l:\n for v in l:\n for t in c:\n if strcmp(t, v) == 0:\n return fv\n return nfv\n",
"titlecase": "def evaluate(self, formatter, kwargs, mi, locals, val):\n return titlecase(val)\n",
"subitems": "def evaluate(self, formatter, kwargs, mi, locals, val, start_index, end_index):\n if not val:\n return ''\n si = int(start_index)\n ei = int(end_index)\n items = [v.strip() for v in val.split(',')]\n rv = set()\n for item in items:\n component = item.split('.')\n try:\n if ei == 0:\n rv.add('.'.join(component[si:]))\n else:\n rv.add('.'.join(component[si:ei]))\n except:\n pass\n return ', '.join(sorted(rv, key=sort_key))\n",
"sublist": "def evaluate(self, formatter, kwargs, mi, locals, val, start_index, end_index, sep):\n if not val:\n return ''\n si = int(start_index)\n ei = int(end_index)\n val = val.split(sep)\n try:\n if ei == 0:\n return sep.join(val[si:])\n else:\n return sep.join(val[si:ei])\n except:\n return ''\n",
"test": "def evaluate(self, formatter, kwargs, mi, locals, val, value_if_set, value_not_set):\n if val:\n return value_if_set\n else:\n return value_not_set\n",
"eval": "def evaluate(self, formatter, kwargs, mi, locals, template):\n from formatter import eval_formatter\n template = template.replace('[[', '{').replace(']]', '}')\n return eval_formatter.safe_format(template, locals, 'EVAL', None)\n",
"multiply": "def evaluate(self, formatter, kwargs, mi, locals, x, y):\n x = float(x if x else 0)\n y = float(y if y else 0)\n return unicode(x * y)\n",
"format_date": "def evaluate(self, formatter, kwargs, mi, locals, val, format_string):\n if not val or val == 'None':\n return ''\n try:\n dt = parse_date(val)\n s = format_date(dt, format_string)\n except:\n s = 'BAD DATE'\n return s\n",
"capitalize": "def evaluate(self, formatter, kwargs, mi, locals, val):\n return capitalize(val)\n",
"identifier_in_list": "def evaluate(self, formatter, kwargs, mi, locals, val, ident, fv, nfv):\n l = [v.strip() for v in val.split(',') if v.strip()]\n (id, _, regexp) = ident.partition(':')\n if not id:\n return nfv\n id += ':'\n if l:\n for v in l:\n if v.startswith(id):\n if not regexp or re.search(regexp, v[len(id):], flags=re.I):\n return fv\n return nfv\n",
"count": "def evaluate(self, formatter, kwargs, mi, locals, val, sep):\n return unicode(len(val.split(sep)))\n",
"lowercase": "def evaluate(self, formatter, kwargs, mi, locals, val):\n return val.lower()\n",
"substr": "def evaluate(self, formatter, kwargs, mi, locals, str_, start_, end_):\n return str_[int(start_): len(str_) if int(end_) == 0 else int(end_)]\n",
"or": "def evaluate(self, formatter, kwargs, mi, locals, *args):\n i = 0\n while i < len(args):\n if args[i]:\n return '1'\n i += 1\n return ''\n",
"switch": "def evaluate(self, formatter, kwargs, mi, locals, val, *args):\n if (len(args) % 2) != 1:\n raise ValueError(_('switch requires an odd number of arguments'))\n i = 0\n while i < len(args):\n if i + 1 >= len(args):\n return args[i]\n if re.search(args[i], val, flags=re.I):\n return args[i+1]\n i += 2\n",
"ondevice": "def evaluate(self, formatter, kwargs, mi, locals):\n if mi.ondevice_col:\n return _('Yes')\n return ''\n",
"assign": "def evaluate(self, formatter, kwargs, mi, locals, target, value):\n locals[target] = value\n return value\n",
"raw_field": "def evaluate(self, formatter, kwargs, mi, locals, name):\n return unicode(getattr(mi, name, None))\n",
"cmp": "def evaluate(self, formatter, kwargs, mi, locals, x, y, lt, eq, gt):\n x = float(x if x and x != 'None' else 0)\n y = float(y if y and y != 'None' else 0)\n if x < y:\n return lt\n if x == y:\n return eq\n return gt\n"
}

View File

@ -32,16 +32,11 @@
<xsl:value-of select="fb:description/fb:title-info/fb:book-title"/>
</title>
<style type="text/css">
a { color : #0002CC }
a:hover { color : #BF0000 }
body { background-color : #FEFEFE; color : #000000; font-family : Verdana, Geneva, Arial, Helvetica, sans-serif; text-align : justify }
body { text-align : justify }
h1{ font-size : 160%; font-style : normal; font-weight : bold; text-align : left; border : 1px solid Black; background-color : #E7E7E7; margin-left : 0px; page-break-before : always; }
h2{ font-size : 130%; font-style : normal; font-weight : bold; text-align : left; background-color : #EEEEEE; border : 1px solid Gray; page-break-before : always; }
h3{ font-size : 110%; font-style : normal; font-weight : bold; text-align : left; background-color : #F1F1F1; border : 1px solid Silver;}
h4{ font-size : 100%; font-style : normal; font-weight : bold; text-align : left; border : 1px solid Gray; background-color : #F4F4F4;}
@ -56,13 +51,11 @@
hr { color : Black }
div {font-family : "Times New Roman", Times, serif; text-align : justify}
ul {margin-left: 0}
.epigraph{width:50%; margin-left : 35%;}
div.paragraph { text-align: justify; text-indent: 2em; }
div.paragraph { text-indent: 2em; }
</style>
<link rel="stylesheet" type="text/css" href="inline-styles.css" />
</head>

View File

@ -8,8 +8,8 @@ __docformat__ = 'restructuredtext en'
import sys, os, textwrap, subprocess, shutil, tempfile, atexit, stat, shlex
from setup import Command, islinux, isfreebsd, isbsd, basenames, modules, functions, \
__appname__, __version__
from setup import (Command, islinux, isbsd, basenames, modules, functions,
__appname__, __version__)
HEADER = '''\
#!/usr/bin/env python2

View File

@ -0,0 +1,689 @@
/*
* Memory DLL loading code
* Version 0.0.2 with additions from Thomas Heller
*
* Copyright (c) 2004-2005 by Joachim Bauch / mail@joachim-bauch.de
* http://www.joachim-bauch.de
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is MemoryModule.c
*
* The Initial Developer of the Original Code is Joachim Bauch.
*
* Portions created by Joachim Bauch are Copyright (C) 2004-2005
* Joachim Bauch. All Rights Reserved.
*
* Portions Copyright (C) 2005 Thomas Heller.
*
*/
// disable warnings about pointer <-> DWORD conversions
#pragma warning( disable : 4311 4312 )
#include <Windows.h>
#include <winnt.h>
#if DEBUG_OUTPUT
#include <stdio.h>
#endif
#ifndef IMAGE_SIZEOF_BASE_RELOCATION
// Vista SDKs no longer define IMAGE_SIZEOF_BASE_RELOCATION!?
# define IMAGE_SIZEOF_BASE_RELOCATION (sizeof(IMAGE_BASE_RELOCATION))
#endif
#include "MemoryModule.h"
/*
XXX We need to protect at least walking the 'loaded' linked list with a lock!
*/
/******************************************************************/
FINDPROC findproc;
void *findproc_data = NULL;
struct NAME_TABLE {
char *name;
DWORD ordinal;
};
typedef struct tagMEMORYMODULE {
PIMAGE_NT_HEADERS headers;
unsigned char *codeBase;
HMODULE *modules;
int numModules;
int initialized;
struct NAME_TABLE *name_table;
char *name;
int refcount;
struct tagMEMORYMODULE *next, *prev;
} MEMORYMODULE, *PMEMORYMODULE;
typedef BOOL (WINAPI *DllEntryProc)(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpReserved);
#define GET_HEADER_DICTIONARY(module, idx) &(module)->headers->OptionalHeader.DataDirectory[idx]
MEMORYMODULE *loaded; /* linked list of loaded memory modules */
/* private - insert a loaded library in a linked list */
static void _Register(char *name, MEMORYMODULE *module)
{
module->next = loaded;
if (loaded)
loaded->prev = module;
module->prev = NULL;
loaded = module;
}
/* private - remove a loaded library from a linked list */
static void _Unregister(MEMORYMODULE *module)
{
free(module->name);
if (module->prev)
module->prev->next = module->next;
if (module->next)
module->next->prev = module->prev;
if (module == loaded)
loaded = module->next;
}
/* public - replacement for GetModuleHandle() */
HMODULE MyGetModuleHandle(LPCTSTR lpModuleName)
{
MEMORYMODULE *p = loaded;
while (p) {
// If already loaded, only increment the reference count
if (0 == stricmp(lpModuleName, p->name)) {
return (HMODULE)p;
}
p = p->next;
}
return GetModuleHandle(lpModuleName);
}
/* public - replacement for LoadLibrary, but searches FIRST for memory
libraries, then for normal libraries. So, it will load libraries AS memory
module if they are found by findproc().
*/
HMODULE MyLoadLibrary(char *lpFileName)
{
MEMORYMODULE *p = loaded;
HMODULE hMod;
while (p) {
// If already loaded, only increment the reference count
if (0 == stricmp(lpFileName, p->name)) {
p->refcount++;
return (HMODULE)p;
}
p = p->next;
}
if (findproc && findproc_data) {
void *pdata = findproc(lpFileName, findproc_data);
if (pdata) {
hMod = MemoryLoadLibrary(lpFileName, pdata);
free(p);
return hMod;
}
}
hMod = LoadLibrary(lpFileName);
return hMod;
}
/* public - replacement for GetProcAddress() */
FARPROC MyGetProcAddress(HMODULE hModule, LPCSTR lpProcName)
{
MEMORYMODULE *p = loaded;
while (p) {
if ((HMODULE)p == hModule)
return MemoryGetProcAddress(p, lpProcName);
p = p->next;
}
return GetProcAddress(hModule, lpProcName);
}
/* public - replacement for FreeLibrary() */
BOOL MyFreeLibrary(HMODULE hModule)
{
MEMORYMODULE *p = loaded;
while (p) {
if ((HMODULE)p == hModule) {
if (--p->refcount == 0) {
_Unregister(p);
MemoryFreeLibrary(p);
}
return TRUE;
}
p = p->next;
}
return FreeLibrary(hModule);
}
#if DEBUG_OUTPUT
static void
OutputLastError(const char *msg)
{
LPVOID tmp;
char *tmpmsg;
FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS,
NULL, GetLastError(), MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), (LPTSTR)&tmp, 0, NULL);
tmpmsg = (char *)LocalAlloc(LPTR, strlen(msg) + strlen(tmp) + 3);
sprintf(tmpmsg, "%s: %s", msg, tmp);
OutputDebugString(tmpmsg);
LocalFree(tmpmsg);
LocalFree(tmp);
}
#endif
/*
static int dprintf(char *fmt, ...)
{
char Buffer[4096];
va_list marker;
int result;
va_start(marker, fmt);
result = vsprintf(Buffer, fmt, marker);
OutputDebugString(Buffer);
return result;
}
*/
static void
CopySections(const unsigned char *data, PIMAGE_NT_HEADERS old_headers, PMEMORYMODULE module)
{
int i, size;
unsigned char *codeBase = module->codeBase;
unsigned char *dest;
PIMAGE_SECTION_HEADER section = IMAGE_FIRST_SECTION(module->headers);
for (i=0; i<module->headers->FileHeader.NumberOfSections; i++, section++)
{
if (section->SizeOfRawData == 0)
{
// section doesn't contain data in the dll itself, but may define
// uninitialized data
size = old_headers->OptionalHeader.SectionAlignment;
if (size > 0)
{
dest = (unsigned char *)VirtualAlloc(codeBase + section->VirtualAddress,
size,
MEM_COMMIT,
PAGE_READWRITE);
section->Misc.PhysicalAddress = (DWORD)dest;
memset(dest, 0, size);
}
// section is empty
continue;
}
// commit memory block and copy data from dll
dest = (unsigned char *)VirtualAlloc(codeBase + section->VirtualAddress,
section->SizeOfRawData,
MEM_COMMIT,
PAGE_READWRITE);
memcpy(dest, data + section->PointerToRawData, section->SizeOfRawData);
section->Misc.PhysicalAddress = (DWORD)dest;
}
}
// Protection flags for memory pages (Executable, Readable, Writeable)
static int ProtectionFlags[2][2][2] = {
{
// not executable
{PAGE_NOACCESS, PAGE_WRITECOPY},
{PAGE_READONLY, PAGE_READWRITE},
}, {
// executable
{PAGE_EXECUTE, PAGE_EXECUTE_WRITECOPY},
{PAGE_EXECUTE_READ, PAGE_EXECUTE_READWRITE},
},
};
static void
FinalizeSections(PMEMORYMODULE module)
{
int i;
PIMAGE_SECTION_HEADER section = IMAGE_FIRST_SECTION(module->headers);
// loop through all sections and change access flags
for (i=0; i<module->headers->FileHeader.NumberOfSections; i++, section++)
{
DWORD protect, oldProtect, size;
int executable = (section->Characteristics & IMAGE_SCN_MEM_EXECUTE) != 0;
int readable = (section->Characteristics & IMAGE_SCN_MEM_READ) != 0;
int writeable = (section->Characteristics & IMAGE_SCN_MEM_WRITE) != 0;
if (section->Characteristics & IMAGE_SCN_MEM_DISCARDABLE)
{
// section is not needed any more and can safely be freed
VirtualFree((LPVOID)section->Misc.PhysicalAddress, section->SizeOfRawData, MEM_DECOMMIT);
continue;
}
// determine protection flags based on characteristics
protect = ProtectionFlags[executable][readable][writeable];
if (section->Characteristics & IMAGE_SCN_MEM_NOT_CACHED)
protect |= PAGE_NOCACHE;
// determine size of region
size = section->SizeOfRawData;
if (size == 0)
{
if (section->Characteristics & IMAGE_SCN_CNT_INITIALIZED_DATA)
size = module->headers->OptionalHeader.SizeOfInitializedData;
else if (section->Characteristics & IMAGE_SCN_CNT_UNINITIALIZED_DATA)
size = module->headers->OptionalHeader.SizeOfUninitializedData;
}
if (size > 0)
{
// change memory access flags
if (VirtualProtect((LPVOID)section->Misc.PhysicalAddress, section->SizeOfRawData, protect, &oldProtect) == 0)
#if DEBUG_OUTPUT
OutputLastError("Error protecting memory page")
#endif
;
}
}
}
static void
PerformBaseRelocation(PMEMORYMODULE module, DWORD delta)
{
DWORD i;
unsigned char *codeBase = module->codeBase;
PIMAGE_DATA_DIRECTORY directory = GET_HEADER_DICTIONARY(module, IMAGE_DIRECTORY_ENTRY_BASERELOC);
if (directory->Size > 0)
{
PIMAGE_BASE_RELOCATION relocation = (PIMAGE_BASE_RELOCATION)(codeBase + directory->VirtualAddress);
for (; relocation->VirtualAddress > 0; )
{
unsigned char *dest = (unsigned char *)(codeBase + relocation->VirtualAddress);
unsigned short *relInfo = (unsigned short *)((unsigned char *)relocation + IMAGE_SIZEOF_BASE_RELOCATION);
for (i=0; i<((relocation->SizeOfBlock-IMAGE_SIZEOF_BASE_RELOCATION) / 2); i++, relInfo++)
{
DWORD *patchAddrHL;
int type, offset;
// the upper 4 bits define the type of relocation
type = *relInfo >> 12;
// the lower 12 bits define the offset
offset = *relInfo & 0xfff;
switch (type)
{
case IMAGE_REL_BASED_ABSOLUTE:
// skip relocation
break;
case IMAGE_REL_BASED_HIGHLOW:
// change complete 32 bit address
patchAddrHL = (DWORD *)(dest + offset);
*patchAddrHL += delta;
break;
default:
//printf("Unknown relocation: %d\n", type);
break;
}
}
// advance to next relocation block
relocation = (PIMAGE_BASE_RELOCATION)(((DWORD)relocation) + relocation->SizeOfBlock);
}
}
}
static int
BuildImportTable(PMEMORYMODULE module)
{
int result=1;
unsigned char *codeBase = module->codeBase;
PIMAGE_DATA_DIRECTORY directory = GET_HEADER_DICTIONARY(module, IMAGE_DIRECTORY_ENTRY_IMPORT);
if (directory->Size > 0)
{
PIMAGE_IMPORT_DESCRIPTOR importDesc = (PIMAGE_IMPORT_DESCRIPTOR)(codeBase + directory->VirtualAddress);
for (; !IsBadReadPtr(importDesc, sizeof(IMAGE_IMPORT_DESCRIPTOR)) && importDesc->Name; importDesc++)
{
DWORD *thunkRef, *funcRef;
HMODULE handle;
handle = MyLoadLibrary(codeBase + importDesc->Name);
if (handle == INVALID_HANDLE_VALUE)
{
//LastError should already be set
#if DEBUG_OUTPUT
OutputLastError("Can't load library");
#endif
result = 0;
break;
}
module->modules = (HMODULE *)realloc(module->modules, (module->numModules+1)*(sizeof(HMODULE)));
if (module->modules == NULL)
{
SetLastError(ERROR_NOT_ENOUGH_MEMORY);
result = 0;
break;
}
module->modules[module->numModules++] = handle;
if (importDesc->OriginalFirstThunk)
{
thunkRef = (DWORD *)(codeBase + importDesc->OriginalFirstThunk);
funcRef = (DWORD *)(codeBase + importDesc->FirstThunk);
} else {
// no hint table
thunkRef = (DWORD *)(codeBase + importDesc->FirstThunk);
funcRef = (DWORD *)(codeBase + importDesc->FirstThunk);
}
for (; *thunkRef; thunkRef++, funcRef++)
{
if IMAGE_SNAP_BY_ORDINAL(*thunkRef) {
*funcRef = (DWORD)MyGetProcAddress(handle, (LPCSTR)IMAGE_ORDINAL(*thunkRef));
} else {
PIMAGE_IMPORT_BY_NAME thunkData = (PIMAGE_IMPORT_BY_NAME)(codeBase + *thunkRef);
*funcRef = (DWORD)MyGetProcAddress(handle, (LPCSTR)&thunkData->Name);
}
if (*funcRef == 0)
{
SetLastError(ERROR_PROC_NOT_FOUND);
result = 0;
break;
}
}
if (!result)
break;
}
}
return result;
}
/*
MemoryLoadLibrary - load a library AS MEMORY MODULE, or return
existing MEMORY MODULE with increased refcount.
This allows to load a library AGAIN as memory module which is
already loaded as HMODULE!
*/
HMEMORYMODULE MemoryLoadLibrary(char *name, const void *data)
{
PMEMORYMODULE result;
PIMAGE_DOS_HEADER dos_header;
PIMAGE_NT_HEADERS old_header;
unsigned char *code, *headers;
DWORD locationDelta;
DllEntryProc DllEntry;
BOOL successfull;
MEMORYMODULE *p = loaded;
while (p) {
// If already loaded, only increment the reference count
if (0 == stricmp(name, p->name)) {
p->refcount++;
return (HMODULE)p;
}
p = p->next;
}
/* Do NOT check for GetModuleHandle here! */
dos_header = (PIMAGE_DOS_HEADER)data;
if (dos_header->e_magic != IMAGE_DOS_SIGNATURE)
{
SetLastError(ERROR_BAD_FORMAT);
#if DEBUG_OUTPUT
OutputDebugString("Not a valid executable file.\n");
#endif
return NULL;
}
old_header = (PIMAGE_NT_HEADERS)&((const unsigned char *)(data))[dos_header->e_lfanew];
if (old_header->Signature != IMAGE_NT_SIGNATURE)
{
SetLastError(ERROR_BAD_FORMAT);
#if DEBUG_OUTPUT
OutputDebugString("No PE header found.\n");
#endif
return NULL;
}
// reserve memory for image of library
code = (unsigned char *)VirtualAlloc((LPVOID)(old_header->OptionalHeader.ImageBase),
old_header->OptionalHeader.SizeOfImage,
MEM_RESERVE,
PAGE_READWRITE);
if (code == NULL)
// try to allocate memory at arbitrary position
code = (unsigned char *)VirtualAlloc(NULL,
old_header->OptionalHeader.SizeOfImage,
MEM_RESERVE,
PAGE_READWRITE);
if (code == NULL)
{
SetLastError(ERROR_NOT_ENOUGH_MEMORY);
#if DEBUG_OUTPUT
OutputLastError("Can't reserve memory");
#endif
return NULL;
}
result = (PMEMORYMODULE)HeapAlloc(GetProcessHeap(), 0, sizeof(MEMORYMODULE));
result->codeBase = code;
result->numModules = 0;
result->modules = NULL;
result->initialized = 0;
result->next = result->prev = NULL;
result->refcount = 1;
result->name = strdup(name);
result->name_table = NULL;
// XXX: is it correct to commit the complete memory region at once?
// calling DllEntry raises an exception if we don't...
VirtualAlloc(code,
old_header->OptionalHeader.SizeOfImage,
MEM_COMMIT,
PAGE_READWRITE);
// commit memory for headers
headers = (unsigned char *)VirtualAlloc(code,
old_header->OptionalHeader.SizeOfHeaders,
MEM_COMMIT,
PAGE_READWRITE);
// copy PE header to code
memcpy(headers, dos_header, dos_header->e_lfanew + old_header->OptionalHeader.SizeOfHeaders);
result->headers = (PIMAGE_NT_HEADERS)&((const unsigned char *)(headers))[dos_header->e_lfanew];
// update position
result->headers->OptionalHeader.ImageBase = (DWORD)code;
// copy sections from DLL file block to new memory location
CopySections(data, old_header, result);
// adjust base address of imported data
locationDelta = (DWORD)(code - old_header->OptionalHeader.ImageBase);
if (locationDelta != 0)
PerformBaseRelocation(result, locationDelta);
// load required dlls and adjust function table of imports
if (!BuildImportTable(result))
goto error;
// mark memory pages depending on section headers and release
// sections that are marked as "discardable"
FinalizeSections(result);
// get entry point of loaded library
if (result->headers->OptionalHeader.AddressOfEntryPoint != 0)
{
DllEntry = (DllEntryProc)(code + result->headers->OptionalHeader.AddressOfEntryPoint);
if (DllEntry == 0)
{
SetLastError(ERROR_BAD_FORMAT); /* XXX ? */
#if DEBUG_OUTPUT
OutputDebugString("Library has no entry point.\n");
#endif
goto error;
}
// notify library about attaching to process
successfull = (*DllEntry)((HINSTANCE)code, DLL_PROCESS_ATTACH, 0);
if (!successfull)
{
#if DEBUG_OUTPUT
OutputDebugString("Can't attach library.\n");
#endif
goto error;
}
result->initialized = 1;
}
_Register(name, result);
return (HMEMORYMODULE)result;
error:
// cleanup
free(result->name);
MemoryFreeLibrary(result);
return NULL;
}
int _compare(const struct NAME_TABLE *p1, const struct NAME_TABLE *p2)
{
return stricmp(p1->name, p2->name);
}
int _find(const char **name, const struct NAME_TABLE *p)
{
return stricmp(*name, p->name);
}
struct NAME_TABLE *GetNameTable(PMEMORYMODULE module)
{
unsigned char *codeBase;
PIMAGE_EXPORT_DIRECTORY exports;
PIMAGE_DATA_DIRECTORY directory;
DWORD i, *nameRef;
WORD *ordinal;
struct NAME_TABLE *p, *ptab;
if (module->name_table)
return module->name_table;
codeBase = module->codeBase;
directory = GET_HEADER_DICTIONARY(module, IMAGE_DIRECTORY_ENTRY_EXPORT);
exports = (PIMAGE_EXPORT_DIRECTORY)(codeBase + directory->VirtualAddress);
nameRef = (DWORD *)(codeBase + exports->AddressOfNames);
ordinal = (WORD *)(codeBase + exports->AddressOfNameOrdinals);
p = ((PMEMORYMODULE)module)->name_table = (struct NAME_TABLE *)malloc(sizeof(struct NAME_TABLE)
* exports->NumberOfNames);
if (p == NULL)
return NULL;
ptab = p;
for (i=0; i<exports->NumberOfNames; ++i) {
p->name = (char *)(codeBase + *nameRef++);
p->ordinal = *ordinal++;
++p;
}
qsort(ptab, exports->NumberOfNames, sizeof(struct NAME_TABLE), _compare);
return ptab;
}
FARPROC MemoryGetProcAddress(HMEMORYMODULE module, const char *name)
{
unsigned char *codeBase = ((PMEMORYMODULE)module)->codeBase;
int idx=-1;
PIMAGE_EXPORT_DIRECTORY exports;
PIMAGE_DATA_DIRECTORY directory = GET_HEADER_DICTIONARY((PMEMORYMODULE)module, IMAGE_DIRECTORY_ENTRY_EXPORT);
if (directory->Size == 0)
// no export table found
return NULL;
exports = (PIMAGE_EXPORT_DIRECTORY)(codeBase + directory->VirtualAddress);
if (exports->NumberOfNames == 0 || exports->NumberOfFunctions == 0)
// DLL doesn't export anything
return NULL;
if (HIWORD(name)) {
struct NAME_TABLE *ptab;
struct NAME_TABLE *found;
ptab = GetNameTable((PMEMORYMODULE)module);
if (ptab == NULL)
// some failure
return NULL;
found = bsearch(&name, ptab, exports->NumberOfNames, sizeof(struct NAME_TABLE), _find);
if (found == NULL)
// exported symbol not found
return NULL;
idx = found->ordinal;
}
else
idx = LOWORD(name) - exports->Base;
if ((DWORD)idx > exports->NumberOfFunctions)
// name <-> ordinal number don't match
return NULL;
// AddressOfFunctions contains the RVAs to the "real" functions
return (FARPROC)(codeBase + *(DWORD *)(codeBase + exports->AddressOfFunctions + (idx*4)));
}
void MemoryFreeLibrary(HMEMORYMODULE mod)
{
int i;
PMEMORYMODULE module = (PMEMORYMODULE)mod;
if (module != NULL)
{
if (module->initialized != 0)
{
// notify library about detaching from process
DllEntryProc DllEntry = (DllEntryProc)(module->codeBase + module->headers->OptionalHeader.AddressOfEntryPoint);
(*DllEntry)((HINSTANCE)module->codeBase, DLL_PROCESS_DETACH, 0);
module->initialized = 0;
}
if (module->modules != NULL)
{
// free previously opened libraries
for (i=0; i<module->numModules; i++)
if (module->modules[i] != INVALID_HANDLE_VALUE)
MyFreeLibrary(module->modules[i]);
free(module->modules);
}
if (module->codeBase != NULL)
// release memory of library
VirtualFree(module->codeBase, 0, MEM_RELEASE);
if (module->name_table != NULL)
free(module->name_table);
HeapFree(GetProcessHeap(), 0, module);
}
}

View File

@ -0,0 +1,58 @@
/*
* Memory DLL loading code
* Version 0.0.2
*
* Copyright (c) 2004-2005 by Joachim Bauch / mail@joachim-bauch.de
* http://www.joachim-bauch.de
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is MemoryModule.h
*
* The Initial Developer of the Original Code is Joachim Bauch.
*
* Portions created by Joachim Bauch are Copyright (C) 2004-2005
* Joachim Bauch. All Rights Reserved.
*
*/
#ifndef __MEMORY_MODULE_HEADER
#define __MEMORY_MODULE_HEADER
#include <Windows.h>
typedef void *HMEMORYMODULE;
#ifdef __cplusplus
extern "C" {
#endif
typedef void *(*FINDPROC)();
extern FINDPROC findproc;
extern void *findproc_data;
HMEMORYMODULE MemoryLoadLibrary(char *, const void *);
FARPROC MemoryGetProcAddress(HMEMORYMODULE, const char *);
void MemoryFreeLibrary(HMEMORYMODULE);
BOOL MyFreeLibrary(HMODULE hModule);
HMODULE MyLoadLibrary(char *lpFileName);
FARPROC MyGetProcAddress(HMODULE hModule, LPCSTR lpProcName);
HMODULE MyGetModuleHandle(LPCTSTR lpModuleName);
#ifdef __cplusplus
}
#endif
#endif // __MEMORY_MODULE_HEADER

View File

@ -8,7 +8,7 @@ __docformat__ = 'restructuredtext en'
import os, shutil, subprocess
from setup import Command, __appname__
from setup import Command, __appname__, __version__
from setup.installer import VMInstaller
class Win(Command):
@ -43,4 +43,11 @@ class Win32(VMInstaller):
self.warn('Failed to freeze')
raise SystemExit(1)
installer = 'dist/%s-portable-%s.zip'%(__appname__, __version__)
subprocess.check_call(('scp',
'xp_build:build/%s/%s'%(__appname__, installer), 'dist'))
if not os.path.exists(installer):
self.warn('Failed to get portable installer')
raise SystemExit(1)

View File

@ -1,6 +1,6 @@
<?xml version="1.0" encoding="utf-8"?>
<WixLocalization Culture="en-us" xmlns="http://schemas.microsoft.com/wix/2006/localization">
<String Id="AdvancedWelcomeEulaDlgDescriptionPerUser">If you are upgrading from a {app} version older than 0.6.17, please uninstall {app} first. Click Advanced to change installation settings.</String>
<String Id="AdvancedWelcomeEulaDlgDescriptionPerUser">Click Advanced to change installation settings.</String>
<String Id="ProgressTextFileCost">Computing space requirements, this may take upto five minutes...</String>
<String Id="ProgressTextCostInitialize">Computing space requirements, this may take upto five minutes...</String>
<String Id="ProgressTextCostFinalize">Computing space requirements, this may take upto five minutes...</String>

View File

@ -8,19 +8,19 @@ __docformat__ = 'restructuredtext en'
import sys, os, shutil, glob, py_compile, subprocess, re, zipfile, time
from setup import Command, modules, functions, basenames, __version__, \
__appname__
from setup import (Command, modules, functions, basenames, __version__,
__appname__)
from setup.build_environment import msvc, MT, RC
from setup.installer.windows.wix import WixMixIn
OPENSSL_DIR = r'Q:\openssl'
QT_DIR = 'Q:\\Qt\\4.7.3'
QT_DLLS = ['Core', 'Gui', 'Network', 'Svg', 'WebKit', 'Xml', 'XmlPatterns']
LIBUSB_DIR = 'C:\\libusb'
LIBUNRAR = 'C:\\Program Files\\UnrarDLL\\unrar.dll'
SW = r'C:\cygwin\home\kovid\sw'
IMAGEMAGICK = os.path.join(SW, 'build', 'ImageMagick-6.6.6',
'VisualMagick', 'bin')
CRT = r'C:\Microsoft.VC90.CRT'
VERSION = re.sub('[a-z]\d+', '', __version__)
WINVER = VERSION+'.0'
@ -50,7 +50,7 @@ def walk(dir):
class Win32Freeze(Command, WixMixIn):
description = 'Free windows calibre installation'
description = 'Freeze windows calibre installation'
def add_options(self, parser):
parser.add_option('--no-ice', default=False, action='store_true',
@ -71,33 +71,69 @@ class Win32Freeze(Command, WixMixIn):
self.rc_template = self.j(self.d(self.a(__file__)), 'template.rc')
self.py_ver = ''.join(map(str, sys.version_info[:2]))
self.lib_dir = self.j(self.base, 'Lib')
self.pydlib = self.j(self.base, 'pydlib')
self.pylib = self.j(self.base, 'pylib.zip')
self.dll_dir = self.j(self.base, 'DLLs')
self.plugins_dir = os.path.join(self.base, 'plugins2')
self.portable_base = self.j(self.d(self.base), 'Calibre Portable')
self.obj_dir = self.j(self.src_root, 'build', 'launcher')
self.initbase()
self.build_launchers()
self.add_plugins()
self.freeze()
self.embed_manifests()
self.install_site_py()
self.archive_lib_dir()
self.remove_CRT_from_manifests()
self.create_installer()
self.build_portable()
def remove_CRT_from_manifests(self):
'''
The dependency on the CRT is removed from the manifests of all DLLs.
This allows the CRT loaded by the .exe files to be used instead.
'''
search_pat = re.compile(r'(?is)<dependency>.*Microsoft\.VC\d+\.CRT')
repl_pat = re.compile(
r'(?is)<dependency>.*?Microsoft\.VC\d+\.CRT.*?</dependency>')
for dll in glob.glob(self.j(self.dll_dir, '*.dll')):
bn = self.b(dll)
with open(dll, 'rb') as f:
raw = f.read()
match = search_pat.search(raw)
if match is None:
continue
self.info('Removing CRT dependency from manifest of: %s'%bn)
# Blank out the bytes corresponding to the dependency specification
nraw = repl_pat.sub(lambda m: b' '*len(m.group()), raw)
if len(nraw) != len(raw):
raise Exception('Something went wrong with %s'%bn)
with open(dll, 'wb') as f:
f.write(nraw)
def initbase(self):
if self.e(self.base):
shutil.rmtree(self.base)
os.makedirs(self.base)
def add_plugins(self):
self.info('Adding plugins...')
tgt = self.plugins_dir
if os.path.exists(tgt):
shutil.rmtree(tgt)
os.mkdir(tgt)
base = self.j(self.SRC, 'calibre', 'plugins')
for f in glob.glob(self.j(base, '*.pyd')):
# We dont want the manifests as the manifest in the exe will be
# used instead
shutil.copy2(f, tgt)
def freeze(self):
shutil.copy2(self.j(self.src_root, 'LICENSE'), self.base)
self.info('Adding plugins...')
tgt = os.path.join(self.base, 'plugins')
if not os.path.exists(tgt):
os.mkdir(tgt)
base = self.j(self.SRC, 'calibre', 'plugins')
for pat in ('*.pyd', '*.manifest'):
for f in glob.glob(self.j(base, pat)):
shutil.copy2(f, tgt)
self.info('Adding CRT')
shutil.copytree(CRT, self.j(self.base, os.path.basename(CRT)))
self.info('Adding resources...')
tgt = self.j(self.base, 'resources')
@ -106,7 +142,6 @@ class Win32Freeze(Command, WixMixIn):
shutil.copytree(self.j(self.src_root, 'resources'), tgt)
self.info('Adding Qt and python...')
self.dll_dir = self.j(self.base, 'DLLs')
shutil.copytree(r'C:\Python%s\DLLs'%self.py_ver, self.dll_dir,
ignore=shutil.ignore_patterns('msvc*.dll', 'Microsoft.*'))
for x in glob.glob(self.j(OPENSSL_DIR, 'bin', '*.dll')):
@ -144,6 +179,24 @@ class Win32Freeze(Command, WixMixIn):
shutil.copytree(self.j(comext, 'shell'), self.j(sp_dir, 'win32com', 'shell'))
shutil.rmtree(comext)
# Fix PyCrypto, removing the bootstrap .py modules that load the .pyd
# modules, since they do not work when in a zip file
for crypto_dir in glob.glob(self.j(sp_dir, 'pycrypto-*', 'Crypto')):
for dirpath, dirnames, filenames in os.walk(crypto_dir):
for f in filenames:
name, ext = os.path.splitext(f)
if ext == '.pyd':
with open(self.j(dirpath, name+'.py')) as f:
raw = f.read().strip()
if (not raw.startswith('def __bootstrap__') or not
raw.endswith('__bootstrap__()')):
raise Exception('The PyCrypto file %r has non'
' bootstrap code'%self.j(dirpath, f))
for ext in ('.py', '.pyc', '.pyo'):
x = self.j(dirpath, name+ext)
if os.path.exists(x):
os.remove(x)
for pat in (r'PyQt4\uic\port_v3', ):
x = glob.glob(self.j(self.lib_dir, 'site-packages', pat))[0]
shutil.rmtree(x)
@ -194,14 +247,13 @@ class Win32Freeze(Command, WixMixIn):
if os.path.exists(tg):
shutil.rmtree(tg)
shutil.copytree(imfd, tg)
for dirpath, dirnames, filenames in os.walk(tdir):
for x in filenames:
if not x.endswith('.dll'):
os.remove(self.j(dirpath, x))
print
print 'Adding third party dependencies'
tdir = os.path.join(self.base, 'driver')
os.makedirs(tdir)
for pat in ('*.dll', '*.sys', '*.cat', '*.inf'):
for f in glob.glob(os.path.join(LIBUSB_DIR, pat)):
shutil.copyfile(f, os.path.join(tdir, os.path.basename(f)))
print '\tAdding unrar'
shutil.copyfile(LIBUNRAR,
os.path.join(self.dll_dir, os.path.basename(LIBUNRAR)))
@ -256,7 +308,7 @@ class Win32Freeze(Command, WixMixIn):
def embed_resources(self, module, desc=None):
icon_base = self.j(self.src_root, 'icons')
icon_map = {'calibre':'library', 'ebook-viewer':'viewer',
'lrfviewer':'viewer'}
'lrfviewer':'viewer', 'calibre-portable':'library'}
file_type = 'DLL' if module.endswith('.dll') else 'APP'
template = open(self.rc_template, 'rb').read()
bname = self.b(module)
@ -313,13 +365,67 @@ class Win32Freeze(Command, WixMixIn):
self.info(p.stderr.read())
sys.exit(1)
def build_portable(self):
base = self.portable_base
if os.path.exists(base):
shutil.rmtree(base)
os.makedirs(base)
src = self.j(self.src_root, 'setup', 'installer', 'windows',
'portable.c')
obj = self.j(self.obj_dir, self.b(src)+'.obj')
cflags = '/c /EHsc /MT /W3 /Ox /nologo /D_UNICODE'.split()
if self.newer(obj, [src]):
self.info('Compiling', obj)
cmd = [msvc.cc] + cflags + ['/Fo'+obj, '/Tc'+src]
self.run_builder(cmd)
exe = self.j(base, 'calibre-portable.exe')
if self.newer(exe, [obj]):
self.info('Linking', exe)
cmd = [msvc.linker] + ['/INCREMENTAL:NO', '/MACHINE:X86',
'/LIBPATH:'+self.obj_dir, '/SUBSYSTEM:WINDOWS',
'/RELEASE',
'/OUT:'+exe, self.embed_resources(exe),
obj, 'User32.lib']
self.run_builder(cmd)
self.info('Creating portable installer')
shutil.copytree(self.base, self.j(base, 'Calibre'))
os.mkdir(self.j(base, 'Calibre Library'))
os.mkdir(self.j(base, 'Calibre Settings'))
name = '%s-portable-%s.zip'%(__appname__, __version__)
with zipfile.ZipFile(self.j('dist', name), 'w', zipfile.ZIP_DEFLATED) as zf:
self.add_dir_to_zip(zf, base, 'Calibre Portable')
def add_dir_to_zip(self, zf, path, prefix=''):
'''
Add a directory recursively to the zip file with an optional prefix.
'''
if prefix:
zi = zipfile.ZipInfo(prefix+'/')
zi.external_attr = 16
zf.writestr(zi, '')
cwd = os.path.abspath(os.getcwd())
try:
os.chdir(path)
fp = (prefix + ('/' if prefix else '')).replace('//', '/')
for f in os.listdir('.'):
arcname = fp + f
if os.path.isdir(f):
self.add_dir_to_zip(zf, f, prefix=arcname)
else:
zf.write(f, arcname)
finally:
os.chdir(cwd)
def build_launchers(self):
self.obj_dir = self.j(self.src_root, 'build', 'launcher')
if not os.path.exists(self.obj_dir):
os.makedirs(self.obj_dir)
base = self.j(self.src_root, 'setup', 'installer', 'windows')
sources = [self.j(base, x) for x in ['util.c']]
headers = [self.j(base, x) for x in ['util.h']]
sources = [self.j(base, x) for x in ['util.c', 'MemoryModule.c']]
headers = [self.j(base, x) for x in ['util.h', 'MemoryModule.h']]
objects = [self.j(self.obj_dir, self.b(x)+'.obj') for x in sources]
cflags = '/c /EHsc /MD /W3 /Ox /nologo /D_UNICODE'.split()
cflags += ['/DPYDLL="python%s.dll"'%self.py_ver, '/IC:/Python%s/include'%self.py_ver]
@ -371,43 +477,49 @@ class Win32Freeze(Command, WixMixIn):
def archive_lib_dir(self):
self.info('Putting all python code into a zip file for performance')
if os.path.exists(self.pydlib):
shutil.rmtree(self.pydlib)
os.makedirs(self.pydlib)
self.zf_timestamp = time.localtime(time.time())[:6]
self.zf_names = set()
with zipfile.ZipFile(self.pylib, 'w', zipfile.ZIP_STORED) as zf:
# Add the .pyds from python and calibre to the zip file
for x in (self.plugins_dir, self.dll_dir):
for pyd in os.listdir(x):
if pyd.endswith('.pyd') and pyd != 'sqlite_custom.pyd':
# sqlite_custom has to be a file for
# sqlite_load_extension to work
self.add_to_zipfile(zf, pyd, x)
os.remove(self.j(x, pyd))
# Add everything in Lib except site-packages to the zip file
for x in os.listdir(self.lib_dir):
if x == 'site-packages':
continue
self.add_to_zipfile(zf, x, self.lib_dir)
sp = self.j(self.lib_dir, 'site-packages')
handled = set(['site.pyo'])
for pth in ('PIL.pth', 'pywin32.pth'):
handled.add(pth)
shutil.copyfile(self.j(sp, pth), self.j(self.pydlib, pth))
for d in self.get_pth_dirs(self.j(sp, pth)):
shutil.copytree(d, self.j(self.pydlib, self.b(d)), True)
handled.add(self.b(d))
# Special handling for PIL and pywin32
handled = set(['PIL.pth', 'pywin32.pth', 'PIL', 'win32'])
self.add_to_zipfile(zf, 'PIL', sp)
base = self.j(sp, 'win32', 'lib')
for x in os.listdir(base):
if os.path.splitext(x)[1] not in ('.exe',):
self.add_to_zipfile(zf, x, base)
base = self.d(base)
for x in os.listdir(base):
if not os.path.isdir(self.j(base, x)):
if os.path.splitext(x)[1] not in ('.exe',):
self.add_to_zipfile(zf, x, base)
handled.add('easy-install.pth')
for d in self.get_pth_dirs(self.j(sp, 'easy-install.pth')):
handled.add(self.b(d))
zip_safe = self.is_zip_safe(d)
for x in os.listdir(d):
if x == 'EGG-INFO':
continue
if zip_safe:
self.add_to_zipfile(zf, x, d)
else:
absp = self.j(d, x)
dest = self.j(self.pydlib, x)
if os.path.isdir(absp):
shutil.copytree(absp, dest, True)
else:
shutil.copy2(absp, dest)
# The rest of site-packages
# We dont want the site.py from site-packages
handled.add('site.pyo')
for x in os.listdir(sp):
if x in handled or x.endswith('.egg-info'):
continue
@ -415,33 +527,18 @@ class Win32Freeze(Command, WixMixIn):
if os.path.isdir(absp):
if not os.listdir(absp):
continue
if self.is_zip_safe(absp):
self.add_to_zipfile(zf, x, sp)
else:
shutil.copytree(absp, self.j(self.pydlib, x), True)
else:
if x.endswith('.pyd'):
shutil.copy2(absp, self.j(self.pydlib, x))
else:
self.add_to_zipfile(zf, x, sp)
shutil.rmtree(self.lib_dir)
def is_zip_safe(self, path):
for f in walk(path):
ext = os.path.splitext(f)[1].lower()
if ext in ('.pyd', '.dll', '.exe'):
return False
return True
def get_pth_dirs(self, pth):
base = os.path.dirname(pth)
for line in open(pth).readlines():
line = line.strip()
if not line or line.startswith('#') or line.startswith('import'):
continue
if line == 'win32\\lib':
continue
candidate = self.j(base, line)
if os.path.exists(candidate):
yield candidate
@ -463,10 +560,10 @@ class Win32Freeze(Command, WixMixIn):
self.add_to_zipfile(zf, name + os.sep + x, base)
else:
ext = os.path.splitext(name)[1].lower()
if ext in ('.pyd', '.dll', '.exe'):
if ext in ('.dll',):
raise ValueError('Cannot add %r to zipfile'%abspath)
zinfo.external_attr = 0600 << 16
if ext in ('.py', '.pyc', '.pyo'):
if ext in ('.py', '.pyc', '.pyo', '.pyd'):
with open(abspath, 'rb') as f:
zf.writestr(zinfo, f.read())

View File

@ -53,6 +53,13 @@ SQLite
Put sqlite3*.h from the sqlite windows amlgamation in ~/sw/include
APSW
-----
Download source from http://code.google.com/p/apsw/downloads/list and run in visual studio prompt
python setup.py fetch --all build --missing-checksum-ok --enable-all-extensions install test
OpenSSL
--------
@ -88,7 +95,9 @@ Qt uses its own routine to locate and load "system libraries" including the open
Now, run configure and make::
configure -opensource -release -qt-zlib -qt-gif -qt-libmng -qt-libpng -qt-libtiff -qt-libjpeg -release -platform win32-msvc2008 -no-qt3support -webkit -xmlpatterns -no-phonon -no-style-plastique -no-style-cleanlooks -no-style-motif -no-style-cde -no-declarative -no-scripttools -no-audio-backend -no-multimedia -no-dbus -no-openvg -no-opengl -no-qt3support -confirm-license -nomake examples -nomake demos -nomake docs -openssl -I Q:\openssl\include -L Q:\openssl\lib && nmake
-no-plugin-manifests is needed so that loading the plugins does not fail looking for the CRT assembly
configure -opensource -release -qt-zlib -qt-gif -qt-libmng -qt-libpng -qt-libtiff -qt-libjpeg -release -platform win32-msvc2008 -no-qt3support -webkit -xmlpatterns -no-phonon -no-style-plastique -no-style-cleanlooks -no-style-motif -no-style-cde -no-declarative -no-scripttools -no-audio-backend -no-multimedia -no-dbus -no-openvg -no-opengl -no-qt3support -confirm-license -nomake examples -nomake demos -nomake docs -no-plugin-manifests -openssl -I Q:\openssl\include -L Q:\openssl\lib && nmake
SIP
-----

View File

@ -0,0 +1,151 @@
#ifndef UNICODE
#define UNICODE
#endif
#include <windows.h>
#include <tchar.h>
#include <stdio.h>
#define BUFSIZE 4096
void show_error(LPCTSTR msg) {
MessageBeep(MB_ICONERROR);
MessageBox(NULL, msg, TEXT("Error"), MB_OK|MB_ICONERROR);
}
void show_detailed_error(LPCTSTR preamble, LPCTSTR msg, int code) {
LPTSTR buf;
buf = (LPTSTR)LocalAlloc(LMEM_ZEROINIT, sizeof(TCHAR)*
(_tcslen(msg) + _tcslen(preamble) + 80));
_sntprintf_s(buf,
LocalSize(buf) / sizeof(TCHAR), _TRUNCATE,
TEXT("%s\r\n %s (Error Code: %d)\r\n"),
preamble, msg, code);
show_error(buf);
LocalFree(buf);
}
void show_last_error_crt(LPCTSTR preamble) {
TCHAR buf[BUFSIZE];
int err = 0;
_get_errno(&err);
_wcserror_s(buf, BUFSIZE, err);
show_detailed_error(preamble, buf, err);
}
void show_last_error(LPCTSTR preamble) {
TCHAR *msg = NULL;
DWORD dw = GetLastError();
FormatMessage(
FORMAT_MESSAGE_ALLOCATE_BUFFER |
FORMAT_MESSAGE_FROM_SYSTEM |
FORMAT_MESSAGE_IGNORE_INSERTS,
NULL,
dw,
MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
&msg,
0, NULL );
show_detailed_error(preamble, msg, (int)dw);
}
LPTSTR get_app_dir() {
LPTSTR buf, buf2, buf3;
DWORD sz;
TCHAR drive[4] = TEXT("\0\0\0");
errno_t err;
buf = (LPTSTR)calloc(BUFSIZE, sizeof(TCHAR));
buf2 = (LPTSTR)calloc(BUFSIZE, sizeof(TCHAR));
buf3 = (LPTSTR)calloc(BUFSIZE, sizeof(TCHAR));
sz = GetModuleFileName(NULL, buf, BUFSIZE);
if (sz == 0 || sz > BUFSIZE-1) {
show_error(TEXT("Failed to get path to calibre-portable.exe"));
ExitProcess(1);
}
err = _tsplitpath_s(buf, drive, 4, buf2, BUFSIZE, NULL, 0, NULL, 0);
if (err != 0) {
show_last_error_crt(TEXT("Failed to split path to calibre-portable.exe"));
ExitProcess(1);
}
_sntprintf_s(buf3, BUFSIZE-1, _TRUNCATE, TEXT("%s%s"), drive, buf2);
free(buf); free(buf2);
return buf3;
}
void launch_calibre(LPCTSTR exe, LPCTSTR config_dir, LPCTSTR library_dir) {
DWORD dwFlags=0;
STARTUPINFO si;
PROCESS_INFORMATION pi;
BOOL fSuccess;
TCHAR cmdline[BUFSIZE];
if (! SetEnvironmentVariable(TEXT("CALIBRE_CONFIG_DIRECTORY"), config_dir)) {
show_last_error(TEXT("Failed to set environment variables"));
ExitProcess(1);
}
if (! SetEnvironmentVariable(TEXT("CALIBRE_PORTABLE_BUILD"), exe)) {
show_last_error(TEXT("Failed to set environment variables"));
ExitProcess(1);
}
dwFlags = CREATE_UNICODE_ENVIRONMENT | CREATE_NEW_PROCESS_GROUP;
_sntprintf_s(cmdline, BUFSIZE, _TRUNCATE, TEXT(" \"--with-library=%s\""), library_dir);
ZeroMemory( &si, sizeof(si) );
si.cb = sizeof(si);
ZeroMemory( &pi, sizeof(pi) );
fSuccess = CreateProcess(exe, cmdline,
NULL, // Process handle not inheritable
NULL, // Thread handle not inheritable
FALSE, // Set handle inheritance to FALSE
dwFlags, // Creation flags http://msdn.microsoft.com/en-us/library/ms684863(v=vs.85).aspx
NULL, // Use parent's environment block
NULL, // Use parent's starting directory
&si, // Pointer to STARTUPINFO structure
&pi // Pointer to PROCESS_INFORMATION structure
);
if (fSuccess == 0) {
show_last_error(TEXT("Failed to launch the calibre program"));
}
// Close process and thread handles.
CloseHandle( pi.hProcess );
CloseHandle( pi.hThread );
}
int WINAPI wWinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance, PWSTR pCmdLine, int nCmdShow)
{
LPTSTR app_dir, config_dir, exe, library_dir;
app_dir = get_app_dir();
config_dir = (LPTSTR)calloc(BUFSIZE, sizeof(TCHAR));
library_dir = (LPTSTR)calloc(BUFSIZE, sizeof(TCHAR));
exe = (LPTSTR)calloc(BUFSIZE, sizeof(TCHAR));
_sntprintf_s(config_dir, BUFSIZE, _TRUNCATE, TEXT("%sCalibre Settings"), app_dir);
_sntprintf_s(exe, BUFSIZE, _TRUNCATE, TEXT("%sCalibre\\calibre.exe"), app_dir);
_sntprintf_s(library_dir, BUFSIZE, _TRUNCATE, TEXT("%sCalibre Library"), app_dir);
launch_calibre(exe, config_dir, library_dir);
free(app_dir); free(config_dir); free(exe); free(library_dir);
return 0;
}

View File

@ -1,12 +1,72 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import sys, os, linecache
import sys
import os
import zipimport
import _memimporter
DEBUG_ZIPIMPORT = False
class ZipExtensionImporter(zipimport.zipimporter):
'''
Taken, with thanks, from the py2exe source code
'''
def __init__(self, *args, **kwargs):
zipimport.zipimporter.__init__(self, *args, **kwargs)
# We know there are no dlls in the zip file, so dont set findproc
# (performance optimization)
#_memimporter.set_find_proc(self.locate_dll_image)
def find_module(self, fullname, path=None):
result = zipimport.zipimporter.find_module(self, fullname, path)
if result:
return result
fullname = fullname.replace(".", "\\")
if (fullname + '.pyd') in self._files:
return self
return None
def locate_dll_image(self, name):
# A callback function for_memimporter.import_module. Tries to
# locate additional dlls. Returns the image as Python string,
# or None if not found.
if name in self._files:
return self.get_data(name)
return None
def load_module(self, fullname):
if sys.modules.has_key(fullname):
mod = sys.modules[fullname]
if DEBUG_ZIPIMPORT:
sys.stderr.write("import %s # previously loaded from zipfile %s\n" % (fullname, self.archive))
return mod
try:
return zipimport.zipimporter.load_module(self, fullname)
except zipimport.ZipImportError:
pass
initname = "init" + fullname.split(".")[-1] # name of initfunction
filename = fullname.replace(".", "\\")
path = filename + '.pyd'
if path in self._files:
if DEBUG_ZIPIMPORT:
sys.stderr.write("# found %s in zipfile %s\n" % (path, self.archive))
code = self.get_data(path)
mod = _memimporter.import_module(code, initname, fullname, path)
mod.__file__ = "%s\\%s" % (self.archive, path)
mod.__loader__ = self
if DEBUG_ZIPIMPORT:
sys.stderr.write("import %s # loaded from zipfile %s\n" % (fullname, mod.__file__))
return mod
raise zipimport.ZipImportError, "can't find module %s" % fullname
def __repr__(self):
return "<%s object %r>" % (self.__class__.__name__, self.archive)
def abs__file__():
@ -32,7 +92,7 @@ def aliasmbcs():
def add_calibre_vars():
sys.resources_location = os.path.join(sys.app_dir, 'resources')
sys.extensions_location = os.path.join(sys.app_dir, 'plugins')
sys.extensions_location = os.path.join(sys.app_dir, 'plugins2')
dv = os.environ.get('CALIBRE_DEVELOP_FROM', None)
if dv and os.path.exists(dv):
@ -42,42 +102,6 @@ def makepath(*paths):
dir = os.path.abspath(os.path.join(*paths))
return dir, os.path.normcase(dir)
def addpackage(sitedir, name):
"""Process a .pth file within the site-packages directory:
For each line in the file, either combine it with sitedir to a path,
or execute it if it starts with 'import '.
"""
fullname = os.path.join(sitedir, name)
try:
f = open(fullname, "rU")
except IOError:
return
with f:
for line in f:
if line.startswith("#"):
continue
if line.startswith(("import ", "import\t")):
exec line
continue
line = line.rstrip()
dir, dircase = makepath(sitedir, line)
if os.path.exists(dir):
sys.path.append(dir)
def addsitedir(sitedir):
"""Add 'sitedir' argument to sys.path if missing and handle .pth files in
'sitedir'"""
sitedir, sitedircase = makepath(sitedir)
try:
names = os.listdir(sitedir)
except os.error:
return
dotpth = os.extsep + "pth"
names = [name for name in names if name.endswith(dotpth)]
for name in sorted(names):
addpackage(sitedir, name)
def run_entry_point():
bname, mod, func = sys.calibre_basename, sys.calibre_module, sys.calibre_function
sys.argv[0] = bname+'.exe'
@ -89,6 +113,10 @@ def main():
sys.setdefaultencoding('utf-8')
aliasmbcs()
sys.path_hooks.insert(0, ZipExtensionImporter)
sys.path_importer_cache.clear()
import linecache
def fake_getline(filename, lineno, module_globals=None):
return ''
linecache.orig_getline = linecache.getline
@ -96,10 +124,11 @@ def main():
abs__file__()
addsitedir(os.path.join(sys.app_dir, 'pydlib'))
add_calibre_vars()
# Needed for pywintypes to be able to load its DLL
sys.path.append(os.path.join(sys.app_dir, 'DLLs'))
return run_entry_point()

View File

@ -1,18 +1,130 @@
/*
* Copyright 2009 Kovid Goyal
* The memimporter code is taken from the py2exe project
*/
#include "util.h"
#include <delayimp.h>
#include <io.h>
#include <fcntl.h>
static char GUI_APP = 0;
static char python_dll[] = PYDLL;
void set_gui_app(char yes) { GUI_APP = yes; }
char is_gui_app() { return GUI_APP; }
// memimporter {{{
#include "MemoryModule.h"
static char **DLL_Py_PackageContext = NULL;
static PyObject **DLL_ImportError = NULL;
static char module_doc[] =
"Importer which can load extension modules from memory";
static void *memdup(void *ptr, Py_ssize_t size)
{
void *p = malloc(size);
if (p == NULL)
return NULL;
memcpy(p, ptr, size);
return p;
}
/*
Be sure to detect errors in FindLibrary - undetected errors lead to
very strange behaviour.
*/
static void* FindLibrary(char *name, PyObject *callback)
{
PyObject *result;
char *p;
Py_ssize_t size;
if (callback == NULL)
return NULL;
result = PyObject_CallFunction(callback, "s", name);
if (result == NULL) {
PyErr_Clear();
return NULL;
}
if (-1 == PyString_AsStringAndSize(result, &p, &size)) {
PyErr_Clear();
Py_DECREF(result);
return NULL;
}
p = memdup(p, size);
Py_DECREF(result);
return p;
}
static PyObject *set_find_proc(PyObject *self, PyObject *args)
{
PyObject *callback = NULL;
if (!PyArg_ParseTuple(args, "|O:set_find_proc", &callback))
return NULL;
Py_DECREF((PyObject *)findproc_data);
Py_INCREF(callback);
findproc_data = (void *)callback;
return Py_BuildValue("i", 1);
}
static PyObject *
import_module(PyObject *self, PyObject *args)
{
char *data;
int size;
char *initfuncname;
char *modname;
char *pathname;
HMEMORYMODULE hmem;
FARPROC do_init;
char *oldcontext;
/* code, initfuncname, fqmodulename, path */
if (!PyArg_ParseTuple(args, "s#sss:import_module",
&data, &size,
&initfuncname, &modname, &pathname))
return NULL;
hmem = MemoryLoadLibrary(pathname, data);
if (!hmem) {
PyErr_Format(*DLL_ImportError,
"MemoryLoadLibrary() failed loading %s", pathname);
return NULL;
}
do_init = MemoryGetProcAddress(hmem, initfuncname);
if (!do_init) {
MemoryFreeLibrary(hmem);
PyErr_Format(*DLL_ImportError,
"Could not find function %s in memory loaded pyd", initfuncname);
return NULL;
}
oldcontext = *DLL_Py_PackageContext;
*DLL_Py_PackageContext = modname;
do_init();
*DLL_Py_PackageContext = oldcontext;
if (PyErr_Occurred())
return NULL;
/* Retrieve from sys.modules */
return PyImport_ImportModule(modname);
}
static PyMethodDef methods[] = {
{ "import_module", import_module, METH_VARARGS,
"import_module(code, initfunc, dllname[, finder]) -> module" },
{ "set_find_proc", set_find_proc, METH_VARARGS },
{ NULL, NULL }, /* Sentinel */
};
// }}}
static int _show_error(const wchar_t *preamble, const wchar_t *msg, const int code) {
wchar_t *buf, *cbuf;
buf = (wchar_t*)LocalAlloc(LMEM_ZEROINIT, sizeof(wchar_t)*
@ -61,7 +173,7 @@ int show_last_error(wchar_t *preamble) {
NULL,
dw,
MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
msg,
&msg,
0, NULL );
return _show_error(preamble, msg, (int)dw);
@ -185,7 +297,7 @@ void initialize_interpreter(wchar_t *outr, wchar_t *errr,
char *dummy_argv[1] = {""};
buf = (char*)calloc(MAX_PATH, sizeof(char));
path = (char*)calloc(3*MAX_PATH, sizeof(char));
path = (char*)calloc(MAX_PATH, sizeof(char));
if (!buf || !path) ExitProcess(_show_error(L"Out of memory", L"", 1));
sz = GetModuleFileNameA(NULL, buf, MAX_PATH);
@ -198,8 +310,7 @@ void initialize_interpreter(wchar_t *outr, wchar_t *errr,
buf[strlen(buf)-1] = '\0';
_snprintf_s(python_home, MAX_PATH, _TRUNCATE, "%s", buf);
_snprintf_s(path, 3*MAX_PATH, _TRUNCATE, "%s\\pylib.zip;%s\\pydlib;%s\\DLLs",
buf, buf, buf);
_snprintf_s(path, MAX_PATH, _TRUNCATE, "%s\\pylib.zip", buf);
free(buf);
@ -227,7 +338,10 @@ void initialize_interpreter(wchar_t *outr, wchar_t *errr,
if (!flag) ExitProcess(_show_error(L"Failed to get debug flag", L"", 1));
//*flag = 1;
DLL_Py_PackageContext = (char**)GetProcAddress(dll, "_Py_PackageContext");
if (!DLL_Py_PackageContext) ExitProcess(_show_error(L"Failed to load _Py_PackageContext from dll", L"", 1));
DLL_ImportError = (PyObject**)GetProcAddress(dll, "PyExc_ImportError");
if (!DLL_ImportError) ExitProcess(_show_error(L"Failed to load PyExc_ImportError from dll", L"", 1));
Py_SetProgramName(program_name);
Py_SetPythonHome(python_home);
@ -263,6 +377,10 @@ void initialize_interpreter(wchar_t *outr, wchar_t *errr,
PyList_SetItem(argv, i, v);
}
PySys_SetObject("argv", argv);
findproc = FindLibrary;
Py_InitModule3("_memimporter", methods, module_doc);
}

View File

@ -11,6 +11,10 @@
SummaryCodepage='1252' />
<Media Id="1" Cabinet="{app}.cab" CompressionLevel="{compression}" EmbedCab="yes" />
<!-- The following line ensures that DLLs are replaced even if their version is the same as before. This
is necessary because of the manifest nuking that was part of making calibre isolated. But I think it
is more rigorous anyway. -->
<Property Id='REINSTALLMODE' Value='emus'/>
<Upgrade Id="{upgrade_code}">
<UpgradeVersion Maximum="{version}"
@ -33,7 +37,6 @@
</Property>
<Directory Id='TARGETDIR' Name='SourceDir'>
<Merge Id="VCRedist" SourceFile="{crt_msm}" DiskId="1" Language="0"/>
<Directory Id='ProgramFilesFolder' Name='PFiles'>
<Directory Id='APPLICATIONFOLDER' Name='{app}' />
</Directory>
@ -100,10 +103,6 @@
<ComponentRef Id="RememberInstallDir"/>
</Feature>
<Feature Id="VCRedist" Title="Visual C++ 8.0 Runtime" AllowAdvertise="no" Display="hidden" Level="1">
<MergeRef Id="VCRedist"/>
</Feature>
<Feature Id="FSMS" Title="Start menu shortcuts" Level="1"
Description="Program shortcuts installed in the Start Menu">
<ComponentRef Id="StartMenuShortcuts"/>
@ -149,12 +148,13 @@
Set default folder name and allow only per machine installs.
For a per-machine installation, the default installation location
will be [ProgramFilesFolder][ApplicationFolderName] and the user
will be able to change it in the setup UI. This is because the installer
has to install the VC90 merge module into the system winsxs folder for python
to work, so per user installs are impossible anyway.
will be able to change it in the setup UI. This is no longer necessary
(i.e. per user installs should work) but left this way as I
dont want to deal with the complications
-->
<Property Id="ApplicationFolderName" Value="Calibre2" />
<Property Id="WixAppFolder" Value="WixPerMachineFolder" />
<Property Id="ALLUSERS" Value="1" />
<WixVariable Id="WixUISupportPerUser" Value="0" />
<!-- Add option to launch calibre after install -->
@ -164,10 +164,6 @@
<CustomAction Id="LaunchApplication" BinaryKey="WixCA"
DllEntry="WixShellExec" Impersonate="yes"/>
<InstallUISequence>
<FileCost Suppress="yes" />
</InstallUISequence>
</Product>
</Wix>

View File

@ -35,7 +35,6 @@ class WixMixIn:
exe_map = self.smap,
main_icon = self.j(self.src_root, 'icons', 'library.ico'),
web_icon = self.j(self.src_root, 'icons', 'web.ico'),
crt_msm = self.j(self.SW, 'Microsoft_VC90_CRT_x86.msm')
)
template = open(self.j(self.d(__file__), 'en-us.xml'),
'rb').read()

View File

@ -26,6 +26,7 @@ def installers():
installers = list(map(installer_name, ('dmg', 'msi', 'tar.bz2')))
installers.append(installer_name('tar.bz2', is64bit=True))
installers.insert(0, 'dist/%s-%s.tar.gz'%(__appname__, __version__))
installers.append('dist/%s-portable-%s.zip'%(__appname__, __version__))
return installers
def installer_description(fname):
@ -38,6 +39,8 @@ def installer_description(fname):
return 'Windows installer'
if fname.endswith('.dmg'):
return 'OS X dmg'
if fname.endswith('.zip'):
return 'Calibre Portable'
return 'Unknown file'
class ReUpload(Command): # {{{
@ -90,9 +93,11 @@ class UploadToGoogleCode(Command): # {{{
def upload_one(self, fname):
self.info('Uploading', fname)
typ = 'Type-Source' if fname.endswith('.gz') else 'Type-Installer'
typ = 'Type-' + ('Source' if fname.endswith('.gz') else 'Archive' if
fname.endswith('.zip') else 'Installer')
ext = os.path.splitext(fname)[1][1:]
op = 'OpSys-'+{'msi':'Windows','dmg':'OSX','bz2':'Linux','gz':'All'}[ext]
op = 'OpSys-'+{'msi':'Windows','zip':'Windows',
'dmg':'OSX','bz2':'Linux','gz':'All'}[ext]
desc = installer_description(fname)
start = time.time()
path = self.upload(os.path.abspath(fname), desc,

View File

@ -106,10 +106,12 @@ def sanitize_file_name(name, substitute='_', as_unicode=False):
name = name.encode(filesystem_encoding, 'ignore')
one = _filename_sanitize.sub(substitute, name)
one = re.sub(r'\s', ' ', one).strip()
one = re.sub(r'^\.+$', '_', one)
bname, ext = os.path.splitext(one)
one = re.sub(r'^\.+$', '_', bname)
if as_unicode:
one = one.decode(filesystem_encoding)
one = one.replace('..', substitute)
one += ext
# Windows doesn't like path components that end with a period
if one and one[-1] in ('.', ' '):
one = one[:-1]+'_'
@ -132,8 +134,10 @@ def sanitize_file_name_unicode(name, substitute='_'):
name]
one = u''.join(chars)
one = re.sub(r'\s', ' ', one).strip()
one = re.sub(r'^\.+$', '_', one)
bname, ext = os.path.splitext(one)
one = re.sub(r'^\.+$', '_', bname)
one = one.replace('..', substitute)
one += ext
# Windows doesn't like path components that end with a period or space
if one and one[-1] in ('.', ' '):
one = one[:-1]+'_'
@ -578,6 +582,7 @@ def url_slash_cleaner(url):
def get_download_filename(url, cookie_file=None):
'''
Get a local filename for a URL using the content disposition header
Returns empty string if no content disposition header present
'''
from contextlib import closing
from urllib2 import unquote as urllib2_unquote
@ -591,8 +596,10 @@ def get_download_filename(url, cookie_file=None):
cj.load(cookie_file)
br.set_cookiejar(cj)
last_part_name = ''
try:
with closing(br.open(url)) as r:
last_part_name = r.geturl().split('/')[-1]
disposition = r.info().get('Content-disposition', '')
for p in disposition.split(';'):
if 'filename' in p:
@ -612,7 +619,7 @@ def get_download_filename(url, cookie_file=None):
traceback.print_exc()
if not filename:
filename = r.geturl().split('/')[-1]
filename = last_part_name
return filename

View File

@ -4,7 +4,7 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
__appname__ = u'calibre'
numeric_version = (0, 8, 4)
numeric_version = (0, 8, 8)
__version__ = u'.'.join(map(unicode, numeric_version))
__author__ = u"Kovid Goyal <kovid@kovidgoyal.net>"
@ -32,6 +32,7 @@ isbsd = isfreebsd or isnetbsd
islinux = not(iswindows or isosx or isbsd)
isfrozen = hasattr(sys, 'frozen')
isunix = isosx or islinux
isportable = os.environ.get('CALIBRE_PORTABLE_BUILD', None) is not None
try:
preferred_encoding = locale.getpreferredencoding()

View File

@ -586,15 +586,15 @@ from calibre.devices.apple.driver import ITUNES
from calibre.devices.hanlin.driver import HANLINV3, HANLINV5, BOOX, SPECTRA
from calibre.devices.blackberry.driver import BLACKBERRY
from calibre.devices.cybook.driver import CYBOOK, ORIZON
from calibre.devices.eb600.driver import EB600, COOL_ER, SHINEBOOK, \
POCKETBOOK360, GER2, ITALICA, ECLICTO, DBOOK, INVESBOOK, \
BOOQ, ELONEX, POCKETBOOK301, MENTOR, POCKETBOOK602, \
POCKETBOOK701
from calibre.devices.eb600.driver import (EB600, COOL_ER, SHINEBOOK,
POCKETBOOK360, GER2, ITALICA, ECLICTO, DBOOK, INVESBOOK,
BOOQ, ELONEX, POCKETBOOK301, MENTOR, POCKETBOOK602,
POCKETBOOK701, POCKETBOOK360P)
from calibre.devices.iliad.driver import ILIAD
from calibre.devices.irexdr.driver import IREXDR1000, IREXDR800
from calibre.devices.jetbook.driver import JETBOOK, MIBUK, JETBOOK_MINI
from calibre.devices.kindle.driver import KINDLE, KINDLE2, KINDLE_DX
from calibre.devices.nook.driver import NOOK, NOOK_COLOR, NOOK_TSR
from calibre.devices.nook.driver import NOOK, NOOK_COLOR
from calibre.devices.prs505.driver import PRS505
from calibre.devices.user_defined.driver import USER_DEFINED
from calibre.devices.android.driver import ANDROID, S60
@ -603,14 +603,15 @@ from calibre.devices.eslick.driver import ESLICK, EBK52
from calibre.devices.nuut2.driver import NUUT2
from calibre.devices.iriver.driver import IRIVER_STORY
from calibre.devices.binatone.driver import README
from calibre.devices.hanvon.driver import N516, EB511, ALEX, AZBOOKA, THEBOOK
from calibre.devices.hanvon.driver import (N516, EB511, ALEX, AZBOOKA, THEBOOK,
LIBREAIR)
from calibre.devices.edge.driver import EDGE
from calibre.devices.teclast.driver import TECLAST_K3, NEWSMY, IPAPYRUS, \
SOVOS, PICO, SUNSTECH_EB700, ARCHOS7O, STASH, WEXLER
from calibre.devices.teclast.driver import (TECLAST_K3, NEWSMY, IPAPYRUS,
SOVOS, PICO, SUNSTECH_EB700, ARCHOS7O, STASH, WEXLER)
from calibre.devices.sne.driver import SNE
from calibre.devices.misc import PALMPRE, AVANT, SWEEX, PDNOVEL, \
GEMEI, VELOCITYMICRO, PDNOVEL_KOBO, LUMIREAD, ALURATEK_COLOR, \
TREKSTOR, EEEREADER, NEXTBOOK
from calibre.devices.misc import (PALMPRE, AVANT, SWEEX, PDNOVEL,
GEMEI, VELOCITYMICRO, PDNOVEL_KOBO, LUMIREAD, ALURATEK_COLOR,
TREKSTOR, EEEREADER, NEXTBOOK, ADAM, MOOVYBOOK)
from calibre.devices.folder_device.driver import FOLDER_DEVICE_FOR_CONFIG
from calibre.devices.kobo.driver import KOBO
from calibre.devices.bambook.driver import BAMBOOK
@ -689,11 +690,11 @@ plugins += [
JETBOOK_MINI,
MIBUK,
SHINEBOOK,
POCKETBOOK360, POCKETBOOK301, POCKETBOOK602, POCKETBOOK701,
POCKETBOOK360, POCKETBOOK301, POCKETBOOK602, POCKETBOOK701, POCKETBOOK360P,
KINDLE,
KINDLE2,
KINDLE_DX,
NOOK, NOOK_COLOR, NOOK_TSR,
NOOK, NOOK_COLOR,
PRS505,
ANDROID,
S60,
@ -716,7 +717,7 @@ plugins += [
EB600,
README,
N516,
THEBOOK,
THEBOOK, LIBREAIR,
EB511,
ELONEX,
TECLAST_K3,
@ -744,6 +745,8 @@ plugins += [
TREKSTOR,
EEEREADER,
NEXTBOOK,
ADAM,
MOOVYBOOK,
ITUNES,
BOEYE_BEX,
BOEYE_BDX,
@ -760,99 +763,132 @@ plugins += input_profiles + output_profiles
class ActionAdd(InterfaceActionBase):
name = 'Add Books'
actual_plugin = 'calibre.gui2.actions.add:AddAction'
description = _('Add books to calibre or the connected device')
class ActionFetchAnnotations(InterfaceActionBase):
name = 'Fetch Annotations'
actual_plugin = 'calibre.gui2.actions.annotate:FetchAnnotationsAction'
description = _('Fetch annotations from a connected Kindle (experimental)')
class ActionGenerateCatalog(InterfaceActionBase):
name = 'Generate Catalog'
actual_plugin = 'calibre.gui2.actions.catalog:GenerateCatalogAction'
description = _('Generate a catalog of the books in your calibre library')
class ActionConvert(InterfaceActionBase):
name = 'Convert Books'
actual_plugin = 'calibre.gui2.actions.convert:ConvertAction'
description = _('Convert books to various ebook formats')
class ActionDelete(InterfaceActionBase):
name = 'Remove Books'
actual_plugin = 'calibre.gui2.actions.delete:DeleteAction'
description = _('Delete books from your calibre library or connected device')
class ActionEditMetadata(InterfaceActionBase):
name = 'Edit Metadata'
actual_plugin = 'calibre.gui2.actions.edit_metadata:EditMetadataAction'
description = _('Edit the metadata of books in your calibre library')
class ActionView(InterfaceActionBase):
name = 'View'
actual_plugin = 'calibre.gui2.actions.view:ViewAction'
description = _('Read books in your calibre library')
class ActionFetchNews(InterfaceActionBase):
name = 'Fetch News'
actual_plugin = 'calibre.gui2.actions.fetch_news:FetchNewsAction'
description = _('Download news from the internet in ebook form')
class ActionQuickview(InterfaceActionBase):
name = 'Show Quickview'
actual_plugin = 'calibre.gui2.actions.show_quickview:ShowQuickviewAction'
description = _('Show a list of related books quickly')
class ActionSaveToDisk(InterfaceActionBase):
name = 'Save To Disk'
actual_plugin = 'calibre.gui2.actions.save_to_disk:SaveToDiskAction'
description = _('Export books from your calibre library to the hard disk')
class ActionShowBookDetails(InterfaceActionBase):
name = 'Show Book Details'
actual_plugin = 'calibre.gui2.actions.show_book_details:ShowBookDetailsAction'
description = _('Show book details in a separate popup')
class ActionRestart(InterfaceActionBase):
name = 'Restart'
actual_plugin = 'calibre.gui2.actions.restart:RestartAction'
description = _('Restart calibre')
class ActionOpenFolder(InterfaceActionBase):
name = 'Open Folder'
actual_plugin = 'calibre.gui2.actions.open:OpenFolderAction'
description = _('Open the folder that contains the book files in your'
' calibre library')
class ActionSendToDevice(InterfaceActionBase):
name = 'Send To Device'
actual_plugin = 'calibre.gui2.actions.device:SendToDeviceAction'
description = _('Send books to the connected device')
class ActionConnectShare(InterfaceActionBase):
name = 'Connect Share'
actual_plugin = 'calibre.gui2.actions.device:ConnectShareAction'
description = _('Send books via email or the web also connect to iTunes or'
' folders on your computer as if they are devices')
class ActionHelp(InterfaceActionBase):
name = 'Help'
actual_plugin = 'calibre.gui2.actions.help:HelpAction'
description = _('Browse the calibre User Manual')
class ActionPreferences(InterfaceActionBase):
name = 'Preferences'
actual_plugin = 'calibre.gui2.actions.preferences:PreferencesAction'
description = _('Customize calibre')
class ActionSimilarBooks(InterfaceActionBase):
name = 'Similar Books'
actual_plugin = 'calibre.gui2.actions.similar_books:SimilarBooksAction'
description = _('Easily find books similar to the currently selected one')
class ActionChooseLibrary(InterfaceActionBase):
name = 'Choose Library'
actual_plugin = 'calibre.gui2.actions.choose_library:ChooseLibraryAction'
description = _('Switch between different calibre libraries and perform'
' maintenance on them')
class ActionAddToLibrary(InterfaceActionBase):
name = 'Add To Library'
actual_plugin = 'calibre.gui2.actions.add_to_library:AddToLibraryAction'
description = _('Copy books from the devce to your calibre library')
class ActionEditCollections(InterfaceActionBase):
name = 'Edit Collections'
actual_plugin = 'calibre.gui2.actions.edit_collections:EditCollectionsAction'
description = _('Edit the collections in which books are placed on your device')
class ActionCopyToLibrary(InterfaceActionBase):
name = 'Copy To Library'
actual_plugin = 'calibre.gui2.actions.copy_to_library:CopyToLibraryAction'
description = _('Copy a book from one calibre library to another')
class ActionTweakEpub(InterfaceActionBase):
name = 'Tweak ePub'
actual_plugin = 'calibre.gui2.actions.tweak_epub:TweakEpubAction'
description = _('Make small tweaks to epub files in your calibre library')
class ActionNextMatch(InterfaceActionBase):
name = 'Next Match'
actual_plugin = 'calibre.gui2.actions.next_match:NextMatchAction'
description = _('Find the next or previous match when searching in '
'your calibre library in highlight mode')
class ActionStore(InterfaceActionBase):
name = 'Store'
author = 'John Schember'
actual_plugin = 'calibre.gui2.actions.store:StoreAction'
description = _('Search for books from different book sellers')
def customization_help(self, gui=False):
return 'Customize the behavior of the store search.'
@ -865,13 +901,20 @@ class ActionStore(InterfaceActionBase):
from calibre.gui2.store.config.store import save_settings as save
save(config_widget)
class ActionPluginUpdater(InterfaceActionBase):
name = 'Plugin Updater'
author = 'Grant Drake'
description = _('Get new calibre plugins or update your existing ones')
actual_plugin = 'calibre.gui2.actions.plugin_updates:PluginUpdaterAction'
plugins += [ActionAdd, ActionFetchAnnotations, ActionGenerateCatalog,
ActionConvert, ActionDelete, ActionEditMetadata, ActionView,
ActionFetchNews, ActionSaveToDisk, ActionShowBookDetails,
ActionRestart, ActionOpenFolder, ActionConnectShare,
ActionFetchNews, ActionSaveToDisk, ActionQuickview,
ActionShowBookDetails,ActionRestart, ActionOpenFolder, ActionConnectShare,
ActionSendToDevice, ActionHelp, ActionPreferences, ActionSimilarBooks,
ActionAddToLibrary, ActionEditCollections, ActionChooseLibrary,
ActionCopyToLibrary, ActionTweakEpub, ActionNextMatch, ActionStore]
ActionCopyToLibrary, ActionTweakEpub, ActionNextMatch, ActionStore,
ActionPluginUpdater]
# }}}
@ -1106,7 +1149,7 @@ plugins += [LookAndFeel, Behavior, Columns, Toolbar, Search, InputOptions,
class StoreAmazonKindleStore(StoreBase):
name = 'Amazon Kindle'
description = u'Kindle books from Amazon.'
actual_plugin = 'calibre.gui2.store.amazon_plugin:AmazonKindleStore'
actual_plugin = 'calibre.gui2.store.stores.amazon_plugin:AmazonKindleStore'
headquarters = 'US'
formats = ['KINDLE']
@ -1116,7 +1159,7 @@ class StoreAmazonDEKindleStore(StoreBase):
name = 'Amazon DE Kindle'
author = 'Charles Haley'
description = u'Kindle Bücher von Amazon.'
actual_plugin = 'calibre.gui2.store.amazon_de_plugin:AmazonDEKindleStore'
actual_plugin = 'calibre.gui2.store.stores.amazon_de_plugin:AmazonDEKindleStore'
headquarters = 'DE'
formats = ['KINDLE']
@ -1126,7 +1169,7 @@ class StoreAmazonUKKindleStore(StoreBase):
name = 'Amazon UK Kindle'
author = 'Charles Haley'
description = u'Kindle books from Amazon\'s UK web site. Also, includes French language ebooks.'
actual_plugin = 'calibre.gui2.store.amazon_uk_plugin:AmazonUKKindleStore'
actual_plugin = 'calibre.gui2.store.stores.amazon_uk_plugin:AmazonUKKindleStore'
headquarters = 'UK'
formats = ['KINDLE']
@ -1135,7 +1178,7 @@ class StoreAmazonUKKindleStore(StoreBase):
class StoreArchiveOrgStore(StoreBase):
name = 'Archive.org'
description = u'An Internet library offering permanent access for researchers, historians, scholars, people with disabilities, and the general public to historical collections that exist in digital format.'
actual_plugin = 'calibre.gui2.store.archive_org_plugin:ArchiveOrgStore'
actual_plugin = 'calibre.gui2.store.stores.archive_org_plugin:ArchiveOrgStore'
drm_free_only = True
headquarters = 'US'
@ -1144,7 +1187,7 @@ class StoreArchiveOrgStore(StoreBase):
class StoreBaenWebScriptionStore(StoreBase):
name = 'Baen WebScription'
description = u'Sci-Fi & Fantasy brought to you by Jim Baen.'
actual_plugin = 'calibre.gui2.store.baen_webscription_plugin:BaenWebScriptionStore'
actual_plugin = 'calibre.gui2.store.stores.baen_webscription_plugin:BaenWebScriptionStore'
drm_free_only = True
headquarters = 'US'
@ -1153,7 +1196,7 @@ class StoreBaenWebScriptionStore(StoreBase):
class StoreBNStore(StoreBase):
name = 'Barnes and Noble'
description = u'The world\'s largest book seller. As the ultimate destination for book lovers, Barnes & Noble.com offers an incredible array of content.'
actual_plugin = 'calibre.gui2.store.bn_plugin:BNStore'
actual_plugin = 'calibre.gui2.store.stores.bn_plugin:BNStore'
headquarters = 'US'
formats = ['NOOK']
@ -1163,7 +1206,7 @@ class StoreBeamEBooksDEStore(StoreBase):
name = 'Beam EBooks DE'
author = 'Charles Haley'
description = u'Bei uns finden Sie: Tausende deutschsprachige eBooks; Alle eBooks ohne hartes DRM; PDF, ePub und Mobipocket Format; Sofortige Verfügbarkeit - 24 Stunden am Tag; Günstige Preise; eBooks für viele Lesegeräte, PC,Mac und Smartphones; Viele Gratis eBooks'
actual_plugin = 'calibre.gui2.store.beam_ebooks_de_plugin:BeamEBooksDEStore'
actual_plugin = 'calibre.gui2.store.stores.beam_ebooks_de_plugin:BeamEBooksDEStore'
drm_free_only = True
headquarters = 'DE'
@ -1173,7 +1216,7 @@ class StoreBeamEBooksDEStore(StoreBase):
class StoreBeWriteStore(StoreBase):
name = 'BeWrite Books'
description = u'Publishers of fine books. Highly selective and editorially driven. Does not offer: books for children or exclusively YA, erotica, swords-and-sorcery fantasy and space-opera-style science fiction. All other genres are represented.'
actual_plugin = 'calibre.gui2.store.bewrite_plugin:BeWriteStore'
actual_plugin = 'calibre.gui2.store.stores.bewrite_plugin:BeWriteStore'
drm_free_only = True
headquarters = 'US'
@ -1182,7 +1225,7 @@ class StoreBeWriteStore(StoreBase):
class StoreDieselEbooksStore(StoreBase):
name = 'Diesel eBooks'
description = u'Instant access to over 2.4 million titles from hundreds of publishers including Harlequin, HarperCollins, John Wiley & Sons, McGraw-Hill, Simon & Schuster and Random House.'
actual_plugin = 'calibre.gui2.store.diesel_ebooks_plugin:DieselEbooksStore'
actual_plugin = 'calibre.gui2.store.stores.diesel_ebooks_plugin:DieselEbooksStore'
headquarters = 'US'
formats = ['EPUB', 'PDF']
@ -1191,7 +1234,7 @@ class StoreDieselEbooksStore(StoreBase):
class StoreEbookscomStore(StoreBase):
name = 'eBooks.com'
description = u'Sells books in multiple electronic formats in all categories. Technical infrastructure is cutting edge, robust and scalable, with servers in the US and Europe.'
actual_plugin = 'calibre.gui2.store.ebooks_com_plugin:EbookscomStore'
actual_plugin = 'calibre.gui2.store.stores.ebooks_com_plugin:EbookscomStore'
headquarters = 'US'
formats = ['EPUB', 'LIT', 'MOBI', 'PDF']
@ -1201,7 +1244,7 @@ class StoreEPubBuyDEStore(StoreBase):
name = 'EPUBBuy DE'
author = 'Charles Haley'
description = u'Bei EPUBBuy.com finden Sie ausschliesslich eBooks im weitverbreiteten EPUB-Format und ohne DRM. So haben Sie die freie Wahl, wo Sie Ihr eBook lesen: Tablet, eBook-Reader, Smartphone oder einfach auf Ihrem PC. So macht eBook-Lesen Spaß!'
actual_plugin = 'calibre.gui2.store.epubbuy_de_plugin:EPubBuyDEStore'
actual_plugin = 'calibre.gui2.store.stores.epubbuy_de_plugin:EPubBuyDEStore'
drm_free_only = True
headquarters = 'DE'
@ -1212,7 +1255,7 @@ class StoreEBookShoppeUKStore(StoreBase):
name = 'ebookShoppe UK'
author = u'Charles Haley'
description = u'We made this website in an attempt to offer the widest range of UK eBooks possible across and as many formats as we could manage.'
actual_plugin = 'calibre.gui2.store.ebookshoppe_uk_plugin:EBookShoppeUKStore'
actual_plugin = 'calibre.gui2.store.stores.ebookshoppe_uk_plugin:EBookShoppeUKStore'
headquarters = 'UK'
formats = ['EPUB', 'PDF']
@ -1221,7 +1264,7 @@ class StoreEBookShoppeUKStore(StoreBase):
class StoreEHarlequinStore(StoreBase):
name = 'eHarlequin'
description = u'A global leader in series romance and one of the world\'s leading publishers of books for women. Offers women a broad range of reading from romance to bestseller fiction, from young adult novels to erotic literature, from nonfiction to fantasy, from African-American novels to inspirational romance, and more.'
actual_plugin = 'calibre.gui2.store.eharlequin_plugin:EHarlequinStore'
actual_plugin = 'calibre.gui2.store.stores.eharlequin_plugin:EHarlequinStore'
headquarters = 'CA'
formats = ['EPUB', 'PDF']
@ -1230,7 +1273,7 @@ class StoreEHarlequinStore(StoreBase):
class StoreEpubBudStore(StoreBase):
name = 'ePub Bud'
description = 'Well, it\'s pretty much just "YouTube for Children\'s eBooks. A not-for-profit organization devoted to brining self published childrens books to the world.'
actual_plugin = 'calibre.gui2.store.epubbud_plugin:EpubBudStore'
actual_plugin = 'calibre.gui2.store.stores.epubbud_plugin:EpubBudStore'
drm_free_only = True
headquarters = 'US'
@ -1239,7 +1282,7 @@ class StoreEpubBudStore(StoreBase):
class StoreFeedbooksStore(StoreBase):
name = 'Feedbooks'
description = u'Feedbooks is a cloud publishing and distribution service, connected to a large ecosystem of reading systems and social networks. Provides a variety of genres from independent and classic books.'
actual_plugin = 'calibre.gui2.store.feedbooks_plugin:FeedbooksStore'
actual_plugin = 'calibre.gui2.store.stores.feedbooks_plugin:FeedbooksStore'
headquarters = 'FR'
formats = ['EPUB', 'MOBI', 'PDF']
@ -1248,7 +1291,7 @@ class StoreFoylesUKStore(StoreBase):
name = 'Foyles UK'
author = 'Charles Haley'
description = u'Foyles of London\'s ebook store. Provides extensive range covering all subjects.'
actual_plugin = 'calibre.gui2.store.foyles_uk_plugin:FoylesUKStore'
actual_plugin = 'calibre.gui2.store.stores.foyles_uk_plugin:FoylesUKStore'
headquarters = 'UK'
formats = ['EPUB', 'PDF']
@ -1258,7 +1301,7 @@ class StoreGandalfStore(StoreBase):
name = 'Gandalf'
author = u'Tomasz Długosz'
description = u'Księgarnia internetowa Gandalf.'
actual_plugin = 'calibre.gui2.store.gandalf_plugin:GandalfStore'
actual_plugin = 'calibre.gui2.store.stores.gandalf_plugin:GandalfStore'
headquarters = 'PL'
formats = ['EPUB', 'PDF']
@ -1266,7 +1309,7 @@ class StoreGandalfStore(StoreBase):
class StoreGoogleBooksStore(StoreBase):
name = 'Google Books'
description = u'Google Books'
actual_plugin = 'calibre.gui2.store.google_books_plugin:GoogleBooksStore'
actual_plugin = 'calibre.gui2.store.stores.google_books_plugin:GoogleBooksStore'
headquarters = 'US'
formats = ['EPUB', 'PDF', 'TXT']
@ -1274,7 +1317,7 @@ class StoreGoogleBooksStore(StoreBase):
class StoreGutenbergStore(StoreBase):
name = 'Project Gutenberg'
description = u'The first producer of free ebooks. Free in the United States because their copyright has expired. They may not be free of copyright in other countries. Readers outside of the United States must check the copyright laws of their countries before downloading or redistributing our ebooks.'
actual_plugin = 'calibre.gui2.store.gutenberg_plugin:GutenbergStore'
actual_plugin = 'calibre.gui2.store.stores.gutenberg_plugin:GutenbergStore'
drm_free_only = True
headquarters = 'US'
@ -1283,7 +1326,7 @@ class StoreGutenbergStore(StoreBase):
class StoreKoboStore(StoreBase):
name = 'Kobo'
description = u'With over 2.3 million eBooks to browse we have engaged readers in over 200 countries in Kobo eReading. Our eBook listings include New York Times Bestsellers, award winners, classics and more!'
actual_plugin = 'calibre.gui2.store.kobo_plugin:KoboStore'
actual_plugin = 'calibre.gui2.store.stores.kobo_plugin:KoboStore'
headquarters = 'CA'
formats = ['EPUB']
@ -1293,15 +1336,25 @@ class StoreLegimiStore(StoreBase):
name = 'Legimi'
author = u'Tomasz Długosz'
description = u'Tanie oraz darmowe ebooki, egazety i blogi w formacie EPUB, wprost na Twój e-czytnik, iPhone, iPad, Android i komputer'
actual_plugin = 'calibre.gui2.store.legimi_plugin:LegimiStore'
actual_plugin = 'calibre.gui2.store.stores.legimi_plugin:LegimiStore'
headquarters = 'PL'
formats = ['EPUB']
class StoreLibreDEStore(StoreBase):
name = 'Libri DE'
author = 'Charles Haley'
description = u'Sicher Bücher, Hörbücher und Downloads online bestellen.'
actual_plugin = 'calibre.gui2.store.stores.libri_de_plugin:LibreDEStore'
headquarters = 'DE'
formats = ['EPUB', 'PDF']
affiliate = True
class StoreManyBooksStore(StoreBase):
name = 'ManyBooks'
description = u'Public domain and creative commons works from many sources.'
actual_plugin = 'calibre.gui2.store.manybooks_plugin:ManyBooksStore'
actual_plugin = 'calibre.gui2.store.stores.manybooks_plugin:ManyBooksStore'
drm_free_only = True
headquarters = 'US'
@ -1310,7 +1363,7 @@ class StoreManyBooksStore(StoreBase):
class StoreMobileReadStore(StoreBase):
name = 'MobileRead'
description = u'Ebooks handcrafted with the utmost care.'
actual_plugin = 'calibre.gui2.store.mobileread.mobileread_plugin:MobileReadStore'
actual_plugin = 'calibre.gui2.store.stores.mobileread.mobileread_plugin:MobileReadStore'
drm_free_only = True
headquarters = 'CH'
@ -1320,16 +1373,24 @@ class StoreNextoStore(StoreBase):
name = 'Nexto'
author = u'Tomasz Długosz'
description = u'Największy w Polsce sklep internetowy z audiobookami mp3, ebookami pdf oraz prasą do pobrania on-line.'
actual_plugin = 'calibre.gui2.store.nexto_plugin:NextoStore'
actual_plugin = 'calibre.gui2.store.stores.nexto_plugin:NextoStore'
headquarters = 'PL'
formats = ['EPUB', 'PDF']
affiliate = True
class StoreOpenBooksStore(StoreBase):
name = 'Open Books'
description = u'Comprehensive listing of DRM free ebooks from a variety of sources provided by users of calibre.'
actual_plugin = 'calibre.gui2.store.stores.open_books_plugin:OpenBooksStore'
drm_free_only = True
headquarters = 'US'
class StoreOpenLibraryStore(StoreBase):
name = 'Open Library'
description = u'One web page for every book ever published. The goal is to be a true online library. Over 20 million records from a variety of large catalogs as well as single contributions, with more on the way.'
actual_plugin = 'calibre.gui2.store.open_library_plugin:OpenLibraryStore'
actual_plugin = 'calibre.gui2.store.stores.open_library_plugin:OpenLibraryStore'
drm_free_only = True
headquarters = 'US'
@ -1338,7 +1399,7 @@ class StoreOpenLibraryStore(StoreBase):
class StoreOReillyStore(StoreBase):
name = 'OReilly'
description = u'Programming and tech ebooks from OReilly.'
actual_plugin = 'calibre.gui2.store.oreilly_plugin:OReillyStore'
actual_plugin = 'calibre.gui2.store.stores.oreilly_plugin:OReillyStore'
drm_free_only = True
headquarters = 'US'
@ -1347,7 +1408,7 @@ class StoreOReillyStore(StoreBase):
class StorePragmaticBookshelfStore(StoreBase):
name = 'Pragmatic Bookshelf'
description = u'The Pragmatic Bookshelf\'s collection of programming and tech books avaliable as ebooks.'
actual_plugin = 'calibre.gui2.store.pragmatic_bookshelf_plugin:PragmaticBookshelfStore'
actual_plugin = 'calibre.gui2.store.stores.pragmatic_bookshelf_plugin:PragmaticBookshelfStore'
drm_free_only = True
headquarters = 'US'
@ -1356,7 +1417,7 @@ class StorePragmaticBookshelfStore(StoreBase):
class StoreSmashwordsStore(StoreBase):
name = 'Smashwords'
description = u'An ebook publishing and distribution platform for ebook authors, publishers and readers. Covers many genres and formats.'
actual_plugin = 'calibre.gui2.store.smashwords_plugin:SmashwordsStore'
actual_plugin = 'calibre.gui2.store.stores.smashwords_plugin:SmashwordsStore'
drm_free_only = True
headquarters = 'US'
@ -1367,7 +1428,7 @@ class StoreVirtualoStore(StoreBase):
name = 'Virtualo'
author = u'Tomasz Długosz'
description = u'Księgarnia internetowa, która oferuje bezpieczny i szeroki dostęp do książek w formie cyfrowej.'
actual_plugin = 'calibre.gui2.store.virtualo_plugin:VirtualoStore'
actual_plugin = 'calibre.gui2.store.stores.virtualo_plugin:VirtualoStore'
headquarters = 'PL'
formats = ['EPUB', 'PDF']
@ -1376,7 +1437,7 @@ class StoreWaterstonesUKStore(StoreBase):
name = 'Waterstones UK'
author = 'Charles Haley'
description = u'Waterstone\'s mission is to be the leading Bookseller on the High Street and online providing customers the widest choice, great value and expert advice from a team passionate about Bookselling.'
actual_plugin = 'calibre.gui2.store.waterstones_uk_plugin:WaterstonesUKStore'
actual_plugin = 'calibre.gui2.store.stores.waterstones_uk_plugin:WaterstonesUKStore'
headquarters = 'UK'
formats = ['EPUB', 'PDF']
@ -1384,7 +1445,7 @@ class StoreWaterstonesUKStore(StoreBase):
class StoreWeightlessBooksStore(StoreBase):
name = 'Weightless Books'
description = u'An independent DRM-free ebooksite devoted to ebooks of all sorts.'
actual_plugin = 'calibre.gui2.store.weightless_books_plugin:WeightlessBooksStore'
actual_plugin = 'calibre.gui2.store.stores.weightless_books_plugin:WeightlessBooksStore'
drm_free_only = True
headquarters = 'US'
@ -1394,7 +1455,7 @@ class StoreWHSmithUKStore(StoreBase):
name = 'WH Smith UK'
author = 'Charles Haley'
description = u"Shop for savings on Books, discounted Magazine subscriptions and great prices on Stationery, Toys & Games"
actual_plugin = 'calibre.gui2.store.whsmith_uk_plugin:WHSmithUKStore'
actual_plugin = 'calibre.gui2.store.stores.whsmith_uk_plugin:WHSmithUKStore'
headquarters = 'UK'
formats = ['EPUB', 'PDF']
@ -1402,7 +1463,7 @@ class StoreWHSmithUKStore(StoreBase):
class StoreWizardsTowerBooksStore(StoreBase):
name = 'Wizards Tower Books'
description = u'A science fiction and fantasy publisher. Concentrates mainly on making out-of-print works available once more as e-books, and helping other small presses exploit the e-book market. Also publishes a small number of limited-print-run anthologies with a view to encouraging diversity in the science fiction and fantasy field.'
actual_plugin = 'calibre.gui2.store.wizards_tower_books_plugin:WizardsTowerBooksStore'
actual_plugin = 'calibre.gui2.store.stores.wizards_tower_books_plugin:WizardsTowerBooksStore'
drm_free_only = True
headquarters = 'UK'
@ -1412,11 +1473,20 @@ class StoreWoblinkStore(StoreBase):
name = 'Woblink'
author = u'Tomasz Długosz'
description = u'Czytanie zdarza się wszędzie!'
actual_plugin = 'calibre.gui2.store.woblink_plugin:WoblinkStore'
actual_plugin = 'calibre.gui2.store.stores.woblink_plugin:WoblinkStore'
headquarters = 'PL'
formats = ['EPUB']
class StoreZixoStore(StoreBase):
name = 'Zixo'
author = u'Tomasz Długosz'
description = u'Księgarnia z ebookami oraz książkami audio. Aby otwierać książki w formacie Zixo należy zainstalować program dostępny na stronie księgarni. Umożliwia on m.in. dodawanie zakładek i dostosowywanie rozmiaru czcionki.'
actual_plugin = 'calibre.gui2.store.stores.zixo_plugin:ZixoStore'
headquarters = 'PL'
formats = ['PDF, ZIXO']
plugins += [
StoreArchiveOrgStore,
StoreAmazonKindleStore,
@ -1439,9 +1509,11 @@ plugins += [
StoreGutenbergStore,
StoreKoboStore,
StoreLegimiStore,
StoreLibreDEStore,
StoreManyBooksStore,
StoreMobileReadStore,
StoreNextoStore,
StoreOpenBooksStore,
StoreOpenLibraryStore,
StoreOReillyStore,
StorePragmaticBookshelfStore,
@ -1451,7 +1523,8 @@ plugins += [
StoreWeightlessBooksStore,
StoreWHSmithUKStore,
StoreWizardsTowerBooksStore,
StoreWoblinkStore
StoreWoblinkStore,
StoreZixoStore
]
# }}}

View File

@ -259,6 +259,10 @@ class OutputFormatPlugin(Plugin):
#: (option_name, recommended_value, recommendation_level)
recommendations = set([])
@property
def description(self):
return _('Convert ebooks to the %s format')%self.file_type
def __init__(self, *args):
Plugin.__init__(self, *args)
self.report_progress = DummyReporter()

View File

@ -355,11 +355,17 @@ def remove_plugin(plugin_or_name):
name = getattr(plugin_or_name, 'name', plugin_or_name)
plugins = config['plugins']
removed = False
if name in plugins.keys():
if name in plugins:
removed = True
try:
zfp = os.path.join(plugin_dir, name+'.zip')
if os.path.exists(zfp):
os.remove(zfp)
zfp = plugins[name]
if os.path.exists(zfp):
os.remove(zfp)
except:
pass
plugins.pop(name)
config['plugins'] = plugins
initialize_plugins()
@ -487,6 +493,8 @@ def initialize_plugin(plugin, path_to_zip_file):
raise InvalidPlugin((_('Initialization of plugin %s failed with traceback:')
%tb) + '\n'+tb)
def has_external_plugins():
return bool(config['plugins'])
def initialize_plugins():
global _initialized_plugins
@ -495,8 +503,15 @@ def initialize_plugins():
builtin_names]
for p in conflicts:
remove_plugin(p)
for zfp in list(config['plugins'].itervalues()) + builtin_plugins:
external_plugins = config['plugins']
for zfp in list(external_plugins) + builtin_plugins:
try:
if not isinstance(zfp, type):
# We have a plugin name
pname = zfp
zfp = os.path.join(plugin_dir, zfp+'.zip')
if not os.path.exists(zfp):
zfp = external_plugins[pname]
try:
plugin = load_plugin(zfp) if not isinstance(zfp, type) else zfp
except PluginNotFound:

View File

@ -0,0 +1,67 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
'''
Rewrite of the calibre database backend.
Broad Objectives:
* Use the sqlite db only as a datastore. i.e. do not do
sorting/searching/concatenation or anything else in sqlite. Instead
mirror the sqlite tables in memory, create caches and lookup maps from
them and create a set_* API that updates the memory caches and the sqlite
correctly.
* Move from keeping a list of books in memory as a cache to a per table
cache. This allows much faster search and sort operations at the expense
of slightly slower lookup operations. That slowdown can be mitigated by
keeping lots of maps and updating them in the set_* API. Also
get_categories becomes blazingly fast.
* Separate the database layer from the cache layer more cleanly. Rather
than having the db layer refer to the cache layer and vice versa, the
cache layer will refer to the db layer only and the new API will be
defined on the cache layer.
* Get rid of index_is_id and other poor design decisions
* Minimize the API as much as possible and define it cleanly
* Do not change the on disk format of metadata.db at all (this is for
backwards compatibility)
* Get rid of the need for a separate db access thread by switching to apsw
to access sqlite, which is thread safe
* The new API will have methods to efficiently do bulk operations and will
use shared/exclusive/pending locks to serialize access to the in-mem data
structures. Use the same locking scheme as sqlite itself does.
How this will proceed:
1. Create the new API
2. Create a test suite for it
3. Write a replacement for LibraryDatabase2 that uses the new API
internally
4. Lots of testing of calibre with the new LibraryDatabase2
5. Gradually migrate code to use the (much faster) new api wherever possible (the new api
will be exposed via db.new_api)
I plan to work on this slowly, in parallel to normal calibre development
work.
Various things that require other things before they can be migrated:
1. From initialize_dynamic(): set_saved_searches,
load_user_template_functions. Also add custom
columns/categories/searches info into
self.field_metadata. Finally, implement metadata dirtied
functionality.
2. Test Schema upgrades
'''

650
src/calibre/db/backend.py Normal file
View File

@ -0,0 +1,650 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
# Imports {{{
import os, shutil, uuid, json
from functools import partial
import apsw
from calibre import isbytestring, force_unicode, prints
from calibre.constants import (iswindows, filesystem_encoding,
preferred_encoding)
from calibre.ptempfile import PersistentTemporaryFile
from calibre.library.schema_upgrades import SchemaUpgrade
from calibre.library.field_metadata import FieldMetadata
from calibre.ebooks.metadata import title_sort, author_to_author_sort
from calibre.utils.icu import strcmp
from calibre.utils.config import to_json, from_json, prefs, tweaks
from calibre.utils.date import utcfromtimestamp, parse_date
from calibre.db.tables import (OneToOneTable, ManyToOneTable, ManyToManyTable,
SizeTable, FormatsTable, AuthorsTable, IdentifiersTable)
# }}}
'''
Differences in semantics from pysqlite:
1. execute/executemany/executescript operate in autocommit mode
'''
class DynamicFilter(object): # {{{
'No longer used, present for legacy compatibility'
def __init__(self, name):
self.name = name
self.ids = frozenset([])
def __call__(self, id_):
return int(id_ in self.ids)
def change(self, ids):
self.ids = frozenset(ids)
# }}}
class DBPrefs(dict): # {{{
'Store preferences as key:value pairs in the db'
def __init__(self, db):
dict.__init__(self)
self.db = db
self.defaults = {}
self.disable_setting = False
for key, val in self.db.conn.get('SELECT key,val FROM preferences'):
try:
val = self.raw_to_object(val)
except:
prints('Failed to read value for:', key, 'from db')
continue
dict.__setitem__(self, key, val)
def raw_to_object(self, raw):
if not isinstance(raw, unicode):
raw = raw.decode(preferred_encoding)
return json.loads(raw, object_hook=from_json)
def to_raw(self, val):
return json.dumps(val, indent=2, default=to_json)
def __getitem__(self, key):
try:
return dict.__getitem__(self, key)
except KeyError:
return self.defaults[key]
def __delitem__(self, key):
dict.__delitem__(self, key)
self.db.conn.execute('DELETE FROM preferences WHERE key=?', (key,))
def __setitem__(self, key, val):
if self.disable_setting:
return
raw = self.to_raw(val)
self.db.conn.execute('INSERT OR REPLACE INTO preferences (key,val) VALUES (?,?)', (key,
raw))
dict.__setitem__(self, key, val)
def set(self, key, val):
self.__setitem__(key, val)
# }}}
# Extra collators {{{
def pynocase(one, two, encoding='utf-8'):
if isbytestring(one):
try:
one = one.decode(encoding, 'replace')
except:
pass
if isbytestring(two):
try:
two = two.decode(encoding, 'replace')
except:
pass
return cmp(one.lower(), two.lower())
def _author_to_author_sort(x):
if not x: return ''
return author_to_author_sort(x.replace('|', ','))
def icu_collator(s1, s2):
return strcmp(force_unicode(s1, 'utf-8'), force_unicode(s2, 'utf-8'))
# }}}
class Connection(apsw.Connection): # {{{
BUSY_TIMEOUT = 2000 # milliseconds
def __init__(self, path):
apsw.Connection.__init__(self, path)
self.setbusytimeout(self.BUSY_TIMEOUT)
self.execute('pragma cache_size=5000')
self.conn.execute('pragma temp_store=2')
encoding = self.execute('pragma encoding').fetchone()[0]
self.conn.create_collation('PYNOCASE', partial(pynocase,
encoding=encoding))
self.conn.create_function('title_sort', 1, title_sort)
self.conn.create_function('author_to_author_sort', 1,
_author_to_author_sort)
self.conn.create_function('uuid4', 0, lambda : str(uuid.uuid4()))
# Dummy functions for dynamically created filters
self.conn.create_function('books_list_filter', 1, lambda x: 1)
self.conn.create_collation('icucollate', icu_collator)
def create_dynamic_filter(self, name):
f = DynamicFilter(name)
self.conn.create_function(name, 1, f)
def get(self, *args, **kw):
ans = self.cursor().execute(*args)
if kw.get('all', True):
return ans.fetchall()
for row in ans:
return ans[0]
def execute(self, sql, bindings=None):
cursor = self.cursor()
return cursor.execute(sql, bindings)
def executemany(self, sql, sequence_of_bindings):
return self.cursor().executemany(sql, sequence_of_bindings)
def executescript(self, sql):
with self:
# Use an explicit savepoint so that even if this is called
# while a transaction is active, it is atomic
return self.cursor().execute(sql)
# }}}
class DB(object, SchemaUpgrade):
PATH_LIMIT = 40 if iswindows else 100
WINDOWS_LIBRARY_PATH_LIMIT = 75
# Initialize database {{{
def __init__(self, library_path, default_prefs=None, read_only=False):
try:
if isbytestring(library_path):
library_path = library_path.decode(filesystem_encoding)
except:
import traceback
traceback.print_exc()
self.field_metadata = FieldMetadata()
self.library_path = os.path.abspath(library_path)
self.dbpath = os.path.join(library_path, 'metadata.db')
self.dbpath = os.environ.get('CALIBRE_OVERRIDE_DATABASE_PATH',
self.dbpath)
if iswindows and len(self.library_path) + 4*self.PATH_LIMIT + 10 > 259:
raise ValueError(_(
'Path to library too long. Must be less than'
' %d characters.')%(259-4*self.PATH_LIMIT-10))
exists = self._exists = os.path.exists(self.dbpath)
if not exists:
# Be more strict when creating new libraries as the old calculation
# allowed for max path lengths of 265 chars.
if (iswindows and len(self.library_path) >
self.WINDOWS_LIBRARY_PATH_LIMIT):
raise ValueError(_(
'Path to library too long. Must be less than'
' %d characters.')%self.WINDOWS_LIBRARY_PATH_LIMIT)
if read_only and os.path.exists(self.dbpath):
# Work on only a copy of metadata.db to ensure that
# metadata.db is not changed
pt = PersistentTemporaryFile('_metadata_ro.db')
pt.close()
shutil.copyfile(self.dbpath, pt.name)
self.dbpath = pt.name
self.is_case_sensitive = (not iswindows and
not os.path.exists(self.dbpath.replace('metadata.db',
'MeTAdAtA.dB')))
self._conn = None
if self.user_version == 0:
self.initialize_database()
with self.conn:
SchemaUpgrade.__init__(self)
# Guarantee that the library_id is set
self.library_id
self.initialize_prefs(default_prefs)
# Fix legacy triggers and columns
self.conn.executescript('''
DROP TRIGGER IF EXISTS author_insert_trg;
CREATE TEMP TRIGGER author_insert_trg
AFTER INSERT ON authors
BEGIN
UPDATE authors SET sort=author_to_author_sort(NEW.name) WHERE id=NEW.id;
END;
DROP TRIGGER IF EXISTS author_update_trg;
CREATE TEMP TRIGGER author_update_trg
BEFORE UPDATE ON authors
BEGIN
UPDATE authors SET sort=author_to_author_sort(NEW.name)
WHERE id=NEW.id AND name <> NEW.name;
END;
UPDATE authors SET sort=author_to_author_sort(name) WHERE sort IS NULL;
''')
self.initialize_custom_columns()
self.initialize_tables()
def initialize_prefs(self, default_prefs): # {{{
self.prefs = DBPrefs(self)
if default_prefs is not None and not self._exists:
# Only apply default prefs to a new database
for key in default_prefs:
# be sure that prefs not to be copied are listed below
if key not in frozenset(['news_to_be_synced']):
self.prefs[key] = default_prefs[key]
if 'field_metadata' in default_prefs:
fmvals = [f for f in default_prefs['field_metadata'].values()
if f['is_custom']]
for f in fmvals:
self.create_custom_column(f['label'], f['name'],
f['datatype'], f['is_multiple'] is not None,
f['is_editable'], f['display'])
defs = self.prefs.defaults
defs['gui_restriction'] = defs['cs_restriction'] = ''
defs['categories_using_hierarchy'] = []
defs['column_color_rules'] = []
# Migrate the bool tristate tweak
defs['bools_are_tristate'] = \
tweaks.get('bool_custom_columns_are_tristate', 'yes') == 'yes'
if self.prefs.get('bools_are_tristate') is None:
self.prefs.set('bools_are_tristate', defs['bools_are_tristate'])
# Migrate column coloring rules
if self.prefs.get('column_color_name_1', None) is not None:
from calibre.library.coloring import migrate_old_rule
old_rules = []
for i in range(1, 6):
col = self.prefs.get('column_color_name_'+str(i), None)
templ = self.prefs.get('column_color_template_'+str(i), None)
if col and templ:
try:
del self.prefs['column_color_name_'+str(i)]
rules = migrate_old_rule(self.field_metadata, templ)
for templ in rules:
old_rules.append((col, templ))
except:
pass
if old_rules:
self.prefs['column_color_rules'] += old_rules
# Migrate saved search and user categories to db preference scheme
def migrate_preference(key, default):
oldval = prefs[key]
if oldval != default:
self.prefs[key] = oldval
prefs[key] = default
if key not in self.prefs:
self.prefs[key] = default
migrate_preference('user_categories', {})
migrate_preference('saved_searches', {})
# migrate grouped_search_terms
if self.prefs.get('grouped_search_terms', None) is None:
try:
ogst = tweaks.get('grouped_search_terms', {})
ngst = {}
for t in ogst:
ngst[icu_lower(t)] = ogst[t]
self.prefs.set('grouped_search_terms', ngst)
except:
pass
# Rename any user categories with names that differ only in case
user_cats = self.prefs.get('user_categories', [])
catmap = {}
for uc in user_cats:
ucl = icu_lower(uc)
if ucl not in catmap:
catmap[ucl] = []
catmap[ucl].append(uc)
cats_changed = False
for uc in catmap:
if len(catmap[uc]) > 1:
prints('found user category case overlap', catmap[uc])
cat = catmap[uc][0]
suffix = 1
while icu_lower((cat + unicode(suffix))) in catmap:
suffix += 1
prints('Renaming user category %s to %s'%(cat, cat+unicode(suffix)))
user_cats[cat + unicode(suffix)] = user_cats[cat]
del user_cats[cat]
cats_changed = True
if cats_changed:
self.prefs.set('user_categories', user_cats)
# }}}
def initialize_custom_columns(self): # {{{
with self.conn:
# Delete previously marked custom columns
for record in self.conn.get(
'SELECT id FROM custom_columns WHERE mark_for_delete=1'):
num = record[0]
table, lt = self.custom_table_names(num)
self.conn.execute('''\
DROP INDEX IF EXISTS {table}_idx;
DROP INDEX IF EXISTS {lt}_aidx;
DROP INDEX IF EXISTS {lt}_bidx;
DROP TRIGGER IF EXISTS fkc_update_{lt}_a;
DROP TRIGGER IF EXISTS fkc_update_{lt}_b;
DROP TRIGGER IF EXISTS fkc_insert_{lt};
DROP TRIGGER IF EXISTS fkc_delete_{lt};
DROP TRIGGER IF EXISTS fkc_insert_{table};
DROP TRIGGER IF EXISTS fkc_delete_{table};
DROP VIEW IF EXISTS tag_browser_{table};
DROP VIEW IF EXISTS tag_browser_filtered_{table};
DROP TABLE IF EXISTS {table};
DROP TABLE IF EXISTS {lt};
'''.format(table=table, lt=lt)
)
self.conn.execute('DELETE FROM custom_columns WHERE mark_for_delete=1')
# Load metadata for custom columns
self.custom_column_label_map, self.custom_column_num_map = {}, {}
triggers = []
remove = []
custom_tables = self.custom_tables
for record in self.conn.get(
'SELECT label,name,datatype,editable,display,normalized,id,is_multiple FROM custom_columns'):
data = {
'label':record[0],
'name':record[1],
'datatype':record[2],
'editable':bool(record[3]),
'display':json.loads(record[4]),
'normalized':bool(record[5]),
'num':record[6],
'is_multiple':bool(record[7]),
}
if data['display'] is None:
data['display'] = {}
# set up the is_multiple separator dict
if data['is_multiple']:
if data['display'].get('is_names', False):
seps = {'cache_to_list': '|', 'ui_to_list': '&', 'list_to_ui': ' & '}
elif data['datatype'] == 'composite':
seps = {'cache_to_list': ',', 'ui_to_list': ',', 'list_to_ui': ', '}
else:
seps = {'cache_to_list': '|', 'ui_to_list': ',', 'list_to_ui': ', '}
else:
seps = {}
data['multiple_seps'] = seps
table, lt = self.custom_table_names(data['num'])
if table not in custom_tables or (data['normalized'] and lt not in
custom_tables):
remove.append(data)
continue
self.custom_column_label_map[data['label']] = data['num']
self.custom_column_num_map[data['num']] = \
self.custom_column_label_map[data['label']] = data
# Create Foreign Key triggers
if data['normalized']:
trigger = 'DELETE FROM %s WHERE book=OLD.id;'%lt
else:
trigger = 'DELETE FROM %s WHERE book=OLD.id;'%table
triggers.append(trigger)
if remove:
with self.conn:
for data in remove:
prints('WARNING: Custom column %r not found, removing.' %
data['label'])
self.conn.execute('DELETE FROM custom_columns WHERE id=?',
(data['num'],))
if triggers:
with self.conn:
self.conn.execute('''\
CREATE TEMP TRIGGER custom_books_delete_trg
AFTER DELETE ON books
BEGIN
%s
END;
'''%(' \n'.join(triggers)))
# Setup data adapters
def adapt_text(x, d):
if d['is_multiple']:
if x is None:
return []
if isinstance(x, (str, unicode, bytes)):
x = x.split(d['multiple_seps']['ui_to_list'])
x = [y.strip() for y in x if y.strip()]
x = [y.decode(preferred_encoding, 'replace') if not isinstance(y,
unicode) else y for y in x]
return [u' '.join(y.split()) for y in x]
else:
return x if x is None or isinstance(x, unicode) else \
x.decode(preferred_encoding, 'replace')
def adapt_datetime(x, d):
if isinstance(x, (str, unicode, bytes)):
x = parse_date(x, assume_utc=False, as_utc=False)
return x
def adapt_bool(x, d):
if isinstance(x, (str, unicode, bytes)):
x = x.lower()
if x == 'true':
x = True
elif x == 'false':
x = False
elif x == 'none':
x = None
else:
x = bool(int(x))
return x
def adapt_enum(x, d):
v = adapt_text(x, d)
if not v:
v = None
return v
def adapt_number(x, d):
if x is None:
return None
if isinstance(x, (str, unicode, bytes)):
if x.lower() == 'none':
return None
if d['datatype'] == 'int':
return int(x)
return float(x)
self.custom_data_adapters = {
'float': adapt_number,
'int': adapt_number,
'rating':lambda x,d : x if x is None else min(10., max(0., float(x))),
'bool': adapt_bool,
'comments': lambda x,d: adapt_text(x, {'is_multiple':False}),
'datetime' : adapt_datetime,
'text':adapt_text,
'series':adapt_text,
'enumeration': adapt_enum
}
# Create Tag Browser categories for custom columns
for k in sorted(self.custom_column_label_map.iterkeys()):
v = self.custom_column_label_map[k]
if v['normalized']:
is_category = True
else:
is_category = False
is_m = v['multiple_seps']
tn = 'custom_column_{0}'.format(v['num'])
self.field_metadata.add_custom_field(label=v['label'],
table=tn, column='value', datatype=v['datatype'],
colnum=v['num'], name=v['name'], display=v['display'],
is_multiple=is_m, is_category=is_category,
is_editable=v['editable'], is_csp=False)
# }}}
def initialize_tables(self): # {{{
tables = self.tables = {}
for col in ('title', 'sort', 'author_sort', 'series_index', 'comments',
'timestamp', 'published', 'uuid', 'path', 'cover',
'last_modified'):
metadata = self.field_metadata[col].copy()
if metadata['table'] is None:
metadata['table'], metadata['column'] == 'books', ('has_cover'
if col == 'cover' else col)
tables[col] = OneToOneTable(col, metadata)
for col in ('series', 'publisher', 'rating'):
tables[col] = ManyToOneTable(col, self.field_metadata[col].copy())
for col in ('authors', 'tags', 'formats', 'identifiers'):
cls = {
'authors':AuthorsTable,
'formats':FormatsTable,
'identifiers':IdentifiersTable,
}.get(col, ManyToManyTable)
tables[col] = cls(col, self.field_metadata[col].copy())
tables['size'] = SizeTable('size', self.field_metadata['size'].copy())
for label, data in self.custom_column_label_map.iteritems():
metadata = self.field_metadata[label].copy()
link_table = self.custom_table_names(data['num'])[1]
if data['normalized']:
if metadata['is_multiple']:
tables[label] = ManyToManyTable(label, metadata,
link_table=link_table)
else:
tables[label] = ManyToOneTable(label, metadata,
link_table=link_table)
if metadata['datatype'] == 'series':
# Create series index table
label += '_index'
metadata = self.field_metadata[label].copy()
metadata['column'] = 'extra'
metadata['table'] = link_table
tables[label] = OneToOneTable(label, metadata)
else:
tables[label] = OneToOneTable(label, metadata)
# }}}
@property
def conn(self):
if self._conn is None:
self._conn = apsw.Connection(self.dbpath)
if self._exists and self.user_version == 0:
self._conn.close()
os.remove(self.dbpath)
self._conn = apsw.Connection(self.dbpath)
return self._conn
@dynamic_property
def user_version(self):
doc = 'The user version of this database'
def fget(self):
return self.conn.get('pragma user_version;', all=False)
def fset(self, val):
self.conn.execute('pragma user_version=%d'%int(val))
return property(doc=doc, fget=fget, fset=fset)
def initialize_database(self):
metadata_sqlite = P('metadata_sqlite.sql', data=True,
allow_user_override=False).decode('utf-8')
self.conn.executescript(metadata_sqlite)
if self.user_version == 0:
self.user_version = 1
# }}}
# Database layer API {{{
def custom_table_names(self, num):
return 'custom_column_%d'%num, 'books_custom_column_%d_link'%num
@property
def custom_tables(self):
return set([x[0] for x in self.conn.get(
'SELECT name FROM sqlite_master WHERE type="table" AND '
'(name GLOB "custom_column_*" OR name GLOB "books_custom_column_*")')])
@classmethod
def exists_at(cls, path):
return path and os.path.exists(os.path.join(path, 'metadata.db'))
@dynamic_property
def library_id(self):
doc = ('The UUID for this library. As long as the user only operates'
' on libraries with calibre, it will be unique')
def fget(self):
if getattr(self, '_library_id_', None) is None:
ans = self.conn.get('SELECT uuid FROM library_id', all=False)
if ans is None:
ans = str(uuid.uuid4())
self.library_id = ans
else:
self._library_id_ = ans
return self._library_id_
def fset(self, val):
self._library_id_ = unicode(val)
self.conn.execute('''
DELETE FROM library_id;
INSERT INTO library_id (uuid) VALUES (?);
''', self._library_id_)
return property(doc=doc, fget=fget, fset=fset)
def last_modified(self):
''' Return last modified time as a UTC datetime object '''
return utcfromtimestamp(os.stat(self.dbpath).st_mtime)
def read_tables(self):
'''
Read all data from the db into the python in-memory tables
'''
with self.conn: # Use a single transaction, to ensure nothing modifies
# the db while we are reading
for table in self.tables.itervalues():
try:
table.read()
except:
prints('Failed to read table:', table.name)
raise
# }}}

Some files were not shown because too many files have changed in this diff Show More