mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
merge
This commit is contained in:
commit
033ea53de1
@ -1,93 +1,105 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
__copyright__ = '2008 Kovid Goyal kovid@kovidgoyal.net, 2010 Darko Miletic <darko.miletic at gmail.com>'
|
||||||
__docformat__ = 'restructuredtext en'
|
|
||||||
|
|
||||||
'''
|
'''
|
||||||
businessweek.com
|
www.businessweek.com
|
||||||
'''
|
'''
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class BusinessWeek(BasicNewsRecipe):
|
class BusinessWeek(BasicNewsRecipe):
|
||||||
title = 'Business Week'
|
title = 'Business Week'
|
||||||
description = 'Business News, Stock Market and Financial Advice'
|
__author__ = 'Kovid Goyal and Darko Miletic'
|
||||||
__author__ = 'ChuckEggDotCom and Sujata Raman'
|
description = 'Read the latest international business news & stock market news. Get updated company profiles, financial advice, global economy and technology news.'
|
||||||
language = 'en'
|
publisher = 'Bloomberg L.P.'
|
||||||
|
category = 'Business, business news, stock market, stock market news, financial advice, company profiles, financial advice, global economy, technology news'
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 200
|
||||||
|
no_stylesheets = True
|
||||||
|
encoding = 'utf8'
|
||||||
|
use_embedded_content = False
|
||||||
|
language = 'en'
|
||||||
|
remove_empty_feeds = True
|
||||||
|
publication_type = 'magazine'
|
||||||
|
cover_url = 'http://images.businessweek.com/mz/covers/current_120x160.jpg'
|
||||||
|
masthead_url = 'http://assets.businessweek.com/images/bw-logo.png'
|
||||||
|
extra_css = """
|
||||||
|
body{font-family: Helvetica,Arial,sans-serif }
|
||||||
|
img{margin-bottom: 0.4em; display:block}
|
||||||
|
.tagline{color: gray; font-style: italic}
|
||||||
|
.photoCredit{font-size: small; color: gray}
|
||||||
|
"""
|
||||||
|
|
||||||
oldest_article = 7
|
conversion_options = {
|
||||||
max_articles_per_feed = 10
|
'comment' : description
|
||||||
no_stylesheets = True
|
, 'tags' : category
|
||||||
|
, 'publisher' : publisher
|
||||||
|
, 'language' : language
|
||||||
|
}
|
||||||
|
|
||||||
recursions = 1
|
remove_tags = [
|
||||||
match_regexps = [r'http://www.businessweek.com/.*_page_[1-9].*']
|
dict(attrs={'class':'inStory'})
|
||||||
extra_css = '''
|
,dict(name=['meta','link','iframe','base','embed','object','table','th','tr','td'])
|
||||||
h1{font-family :Arial,Helvetica,sans-serif; font-size:large;}
|
,dict(attrs={'id':['inset','videoDisplay']})
|
||||||
.news_story_title{font-family :Arial,Helvetica,sans-serif; font-size:large;font-weight:bold;}
|
]
|
||||||
h2{font-family :Arial,Helvetica,sans-serif; font-size:medium;color:#666666;}
|
keep_only_tags = [dict(name='div', attrs={'id':['story-body','storyBody','article_body','articleBody']})]
|
||||||
h3{text-transform:uppercase;font-family :Arial,Helvetica,sans-serif; font-size:large;font-weight:bold;}
|
remove_attributes = ['lang']
|
||||||
h4{font-family :Arial,Helvetica,sans-serif; font-size:small;font-weight:bold;}
|
match_regexps = [r'http://www.businessweek.com/.*_page_[1-9].*']
|
||||||
p{font-family :Arial,Helvetica,sans-serif; }
|
|
||||||
#lede600{font-size:x-small;}
|
|
||||||
#storybody{font-size:x-small;}
|
|
||||||
p{font-family :Arial,Helvetica,sans-serif;}
|
|
||||||
.strap{font-family :Arial,Helvetica,sans-serif; font-size:x-small; color:#064599;}
|
|
||||||
.byline{font-family :Arial,Helvetica,sans-serif; font-size:x-small;}
|
|
||||||
.postedBy{font-family :Arial,Helvetica,sans-serif; font-size:x-small;color:#666666;}
|
|
||||||
.trackback{font-family :Arial,Helvetica,sans-serif; font-size:x-small;color:#666666;}
|
|
||||||
.date{font-family :Arial,Helvetica,sans-serif; font-size:x-small;color:#666666;}
|
|
||||||
.wrapper{font-family :Arial,Helvetica,sans-serif; font-size:x-small;}
|
|
||||||
.photoCredit{font-family :Arial,Helvetica,sans-serif; font-size:x-small;color:#666666;}
|
|
||||||
.tagline{font-family :Arial,Helvetica,sans-serif; font-size:x-small;color:#666666;}
|
|
||||||
.pageCount{color:#666666;font-family :Arial,Helvetica,sans-serif; font-size:x-small;}
|
|
||||||
.note{font-family :Arial,Helvetica,sans-serif; font-size:small;color:#666666;font-style:italic;}
|
|
||||||
.highlight{font-family :Arial,Helvetica,sans-serif; font-size:small;background-color:#FFF200;}
|
|
||||||
.annotation{font-family :Arial,Helvetica,sans-serif; font-size:x-small;color:#666666;}
|
|
||||||
'''
|
|
||||||
|
|
||||||
remove_tags = [ dict(name='div', attrs={'id':["log","feedback","footer","secondarynav","secondnavbar","header","email","bw2-header","column2","wrapper-bw2-footer","wrapper-mgh-footer","inset","commentForm","commentDisplay","bwExtras","bw2-umbrella","readerComments","leg","rightcol"]}),
|
|
||||||
dict(name='div', attrs={'class':["menu",'sponsorbox smallertext',"TopNavTile","graybottom leaderboard"]}),
|
|
||||||
dict(name='img', alt ="News"),
|
|
||||||
dict(name='td', width ="1"),
|
|
||||||
]
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
(u'Top Stories', u'http://www.businessweek.com/topStories/rss/topStories.rss'),
|
feeds = [
|
||||||
(u'Top News', u'http://www.businessweek.com/rss/bwdaily.rss'),
|
(u'Top Stories', u'http://www.businessweek.com/topStories/rss/topStories.rss'),
|
||||||
(u'Asia', u'http://www.businessweek.com/rss/asia.rss'),
|
(u'Top News' , u'http://www.businessweek.com/rss/bwdaily.rss' ),
|
||||||
(u'Autos', u'http://www.businessweek.com/rss/autos/index.rss'),
|
(u'Asia', u'http://www.businessweek.com/rss/asia.rss'),
|
||||||
(u'Classic Cars', u'http://rss.businessweek.com/bw_rss/classiccars'),
|
(u'Autos', u'http://www.businessweek.com/rss/autos/index.rss'),
|
||||||
(u'Hybrids', u'http://rss.businessweek.com/bw_rss/hybrids'),
|
(u'Classic Cars', u'http://rss.businessweek.com/bw_rss/classiccars'),
|
||||||
(u'Europe', u'http://www.businessweek.com/rss/europe.rss'),
|
(u'Hybrids', u'http://rss.businessweek.com/bw_rss/hybrids'),
|
||||||
(u'Auto Reviews', u'http://rss.businessweek.com/bw_rss/autoreviews'),
|
(u'Europe', u'http://www.businessweek.com/rss/europe.rss'),
|
||||||
(u'Innovation & Design', u'http://www.businessweek.com/rss/innovate.rss'),
|
(u'Auto Reviews', u'http://rss.businessweek.com/bw_rss/autoreviews'),
|
||||||
(u'Architecture', u'http://www.businessweek.com/rss/architecture.rss'),
|
(u'Innovation & Design', u'http://www.businessweek.com/rss/innovate.rss'),
|
||||||
(u'Brand Equity', u'http://www.businessweek.com/rss/brandequity.rss'),
|
(u'Architecture', u'http://www.businessweek.com/rss/architecture.rss'),
|
||||||
(u'Auto Design', u'http://www.businessweek.com/rss/carbuff.rss'),
|
(u'Brand Equity', u'http://www.businessweek.com/rss/brandequity.rss'),
|
||||||
(u'Game Room', u'http://rss.businessweek.com/bw_rss/gameroom'),
|
(u'Auto Design', u'http://www.businessweek.com/rss/carbuff.rss'),
|
||||||
(u'Technology', u'http://www.businessweek.com/rss/technology.rss'),
|
(u'Game Room', u'http://rss.businessweek.com/bw_rss/gameroom'),
|
||||||
(u'Investing', u'http://rss.businessweek.com/bw_rss/investor'),
|
(u'Technology', u'http://www.businessweek.com/rss/technology.rss'),
|
||||||
(u'Small Business', u'http://www.businessweek.com/rss/smallbiz.rss'),
|
(u'Investing', u'http://rss.businessweek.com/bw_rss/investor'),
|
||||||
(u'Careers', u'http://rss.businessweek.com/bw_rss/careers'),
|
(u'Small Business', u'http://www.businessweek.com/rss/smallbiz.rss'),
|
||||||
(u'B-Schools', u'http://www.businessweek.com/rss/bschools.rss'),
|
(u'Careers', u'http://rss.businessweek.com/bw_rss/careers'),
|
||||||
(u'Magazine Selections', u'http://www.businessweek.com/rss/magazine.rss'),
|
(u'B-Schools', u'http://www.businessweek.com/rss/bschools.rss'),
|
||||||
(u'CEO Guide to Tech', u'http://www.businessweek.com/rss/ceo_guide_tech.rss'),
|
(u'Magazine Selections', u'http://www.businessweek.com/rss/magazine.rss'),
|
||||||
]
|
(u'CEO Guide to Tech', u'http://www.businessweek.com/rss/ceo_guide_tech.rss'),
|
||||||
|
]
|
||||||
|
|
||||||
def get_article_url(self, article):
|
def get_article_url(self, article):
|
||||||
|
|
||||||
url = article.get('guid', None)
|
url = article.get('guid', None)
|
||||||
|
if 'podcasts' in url:
|
||||||
|
return None
|
||||||
|
if 'surveys' in url:
|
||||||
|
return None
|
||||||
|
if 'images' in url:
|
||||||
|
return None
|
||||||
|
if 'feedroom' in url:
|
||||||
|
return None
|
||||||
|
if '/magazine/toc/' in url:
|
||||||
|
return None
|
||||||
|
rurl, sep, rest = url.rpartition('?')
|
||||||
|
if rurl:
|
||||||
|
return rurl
|
||||||
|
return rest
|
||||||
|
|
||||||
if 'podcasts' in url or 'surveys' in url:
|
def print_version(self, url):
|
||||||
url = None
|
if '/news/' in url or '/blog/' in url:
|
||||||
|
return url
|
||||||
return url
|
if '/magazine' in url:
|
||||||
|
rurl = url.replace('http://www.businessweek.com/','http://www.businessweek.com/printer/')
|
||||||
def postprocess_html(self, soup, first):
|
else:
|
||||||
|
rurl = url.replace('http://www.businessweek.com/','http://www.businessweek.com/print/')
|
||||||
for tag in soup.findAll(name=['ul','li','table','td','tr','span']):
|
return rurl.replace('/investing/','/investor/')
|
||||||
tag.name = 'div'
|
|
||||||
for tag in soup.findAll(name= 'div',attrs={ 'id':'pageNav'}):
|
|
||||||
tag.extract()
|
|
||||||
return soup
|
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
for alink in soup.findAll('a'):
|
||||||
|
if alink.string is not None:
|
||||||
|
tstr = alink.string
|
||||||
|
alink.replaceWith(tstr)
|
||||||
|
return soup
|
||||||
|
@ -4,95 +4,73 @@ __copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
|
|||||||
www.businessworld.in
|
www.businessworld.in
|
||||||
'''
|
'''
|
||||||
|
|
||||||
from calibre import strftime
|
import re
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class BusinessWorldMagazine(BasicNewsRecipe):
|
class BusinessWorldMagazine(BasicNewsRecipe):
|
||||||
title = 'Business World Magazine'
|
title = 'Business World Magazine'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Kovid Goyal'
|
||||||
description = 'News from India'
|
description = 'News from India'
|
||||||
publisher = 'ABP Pvt Ltd Publication'
|
publisher = 'ABP Pvt Ltd Publication'
|
||||||
category = 'news, politics, finances, India, Asia'
|
category = 'news, politics, finances, India, Asia'
|
||||||
delay = 1
|
delay = 1
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
INDEX = 'http://www.businessworld.in/bw/Magazine_Current_Issue'
|
INDEX = 'http://www.businessworld.in/businessworld/magazine_latest_issue.php'
|
||||||
ROOT = 'http://www.businessworld.in'
|
ROOT = 'http://www.businessworld.in'
|
||||||
use_embedded_content = False
|
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
language = 'en_IN'
|
language = 'en_IN'
|
||||||
extra_css = """
|
auto_cleanup = True
|
||||||
img{display: block; margin-bottom: 0.5em}
|
|
||||||
body{font-family: Arial,Helvetica,sans-serif}
|
|
||||||
h2{color: gray; display: block}
|
|
||||||
"""
|
|
||||||
|
|
||||||
conversion_options = {
|
|
||||||
'comment' : description
|
|
||||||
, 'tags' : category
|
|
||||||
, 'publisher' : publisher
|
|
||||||
, 'language' : language
|
|
||||||
}
|
|
||||||
|
|
||||||
def is_in_list(self,linklist,url):
|
|
||||||
for litem in linklist:
|
|
||||||
if litem == url:
|
|
||||||
return True
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
|
br = self.browser
|
||||||
|
br.open(self.ROOT)
|
||||||
|
raw = br.open(br.click_link(text_regex=re.compile('Current.*Issue',
|
||||||
|
re.I))).read()
|
||||||
|
soup = self.index_to_soup(raw)
|
||||||
|
mc = soup.find(attrs={'class':'mag_cover'})
|
||||||
|
if mc is not None:
|
||||||
|
img = mc.find('img', src=True)
|
||||||
|
if img is not None:
|
||||||
|
self.cover_url = img['src']
|
||||||
|
|
||||||
|
feeds = []
|
||||||
|
current_section = None
|
||||||
articles = []
|
articles = []
|
||||||
linklist = []
|
for tag in soup.findAll(['h3', 'h2']):
|
||||||
soup = self.index_to_soup(self.INDEX)
|
inner_a = tag.find('a')
|
||||||
|
if tag.name == 'h3' and inner_a is not None:
|
||||||
tough = soup.find('div', attrs={'id':'tough'})
|
continue
|
||||||
if tough:
|
if tag.name == 'h2' and (inner_a is None or current_section is
|
||||||
for item in tough.findAll('h1'):
|
None):
|
||||||
description = ''
|
continue
|
||||||
title_prefix = ''
|
|
||||||
feed_link = item.find('a')
|
if tag.name == 'h3':
|
||||||
if feed_link and feed_link.has_key('href'):
|
if current_section is not None and articles:
|
||||||
url = self.ROOT + feed_link['href']
|
feeds.append((current_section, articles))
|
||||||
if not self.is_in_list(linklist,url):
|
current_section = self.tag_to_string(tag)
|
||||||
title = title_prefix + self.tag_to_string(feed_link)
|
self.log('Found section:', current_section)
|
||||||
date = strftime(self.timefmt)
|
articles = []
|
||||||
articles.append({
|
elif tag.name == 'h2':
|
||||||
'title' :title
|
url = inner_a.get('href', None)
|
||||||
,'date' :date
|
if url is None: continue
|
||||||
,'url' :url
|
if url.startswith('/'): url = self.ROOT + url
|
||||||
,'description':description
|
title = self.tag_to_string(inner_a)
|
||||||
})
|
h1 = tag.findPreviousSibling('h1')
|
||||||
linklist.append(url)
|
if h1 is not None:
|
||||||
|
title = self.tag_to_string(h1) + title
|
||||||
for item in soup.findAll('div', attrs={'class':'nametitle'}):
|
self.log('\tFound article:', title)
|
||||||
description = ''
|
articles.append({'title':title, 'url':url, 'date':'',
|
||||||
title_prefix = ''
|
'description':''})
|
||||||
feed_link = item.find('a')
|
|
||||||
if feed_link and feed_link.has_key('href'):
|
if current_section and articles:
|
||||||
url = self.ROOT + feed_link['href']
|
feeds.append((current_section, articles))
|
||||||
if not self.is_in_list(linklist,url):
|
|
||||||
title = title_prefix + self.tag_to_string(feed_link)
|
return feeds
|
||||||
date = strftime(self.timefmt)
|
|
||||||
articles.append({
|
|
||||||
'title' :title
|
|
||||||
,'date' :date
|
|
||||||
,'url' :url
|
|
||||||
,'description':description
|
|
||||||
})
|
|
||||||
linklist.append(url)
|
|
||||||
return [(soup.head.title.string, articles)]
|
|
||||||
|
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'id':'printwrapper'})]
|
|
||||||
remove_tags = [dict(name=['object','link','meta','base','iframe','link','table'])]
|
|
||||||
|
|
||||||
def print_version(self, url):
|
|
||||||
return url.replace('/bw/','/bw/storyContent/')
|
|
||||||
|
|
||||||
def get_cover_url(self):
|
|
||||||
cover_url = None
|
|
||||||
soup = self.index_to_soup(self.INDEX)
|
|
||||||
cover_item = soup.find('img',attrs={'class':'toughbor'})
|
|
||||||
if cover_item:
|
|
||||||
cover_url = self.ROOT + cover_item['src']
|
|
||||||
return cover_url
|
|
||||||
|
128
recipes/cio_magazine.recipe
Normal file
128
recipes/cio_magazine.recipe
Normal file
@ -0,0 +1,128 @@
|
|||||||
|
# Los primeros comentarios son las dificultades que he tenido con el Piton
|
||||||
|
# Cuando da error UTF8 revisa los comentarios (acentos). En notepad++ Search, Goto, posicion y lo ves.
|
||||||
|
# Editar con Notepad++ Si pone - donde no debe es que ha indentado mal... Edit - Blank operations - tab to space
|
||||||
|
# He entendido lo que significa el from... son paths dentro de pylib.zip...
|
||||||
|
# Con from importa solo un simbolo...con import,la libreria completa
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
# sys no hace falta... lo intente usar para escribir en stderr
|
||||||
|
from calibre import strftime
|
||||||
|
# Para convertir el tiempo del articulo
|
||||||
|
import string, re
|
||||||
|
# Para usar expresiones regulares
|
||||||
|
# Visto en pylib.zip... la primera letra es mayuscula
|
||||||
|
# Estas dos ultimas han sido un vago intento de establecer una cookie (no usado)
|
||||||
|
|
||||||
|
class CIO_Magazine(BasicNewsRecipe):
|
||||||
|
title = 'CIO Magazine'
|
||||||
|
oldest_article = 14
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
auto_cleanup = True
|
||||||
|
__author__ = 'Julio Map'
|
||||||
|
description = 'CIO is the leading information brand for today-s busy Chief information Officer - CIO Magazine bi-monthly '
|
||||||
|
language = 'en'
|
||||||
|
encoding = 'utf8'
|
||||||
|
cover_url = 'http://www.cio.com/homepage/images/hp-cio-logo-linkedin.png'
|
||||||
|
|
||||||
|
remove_tags_before = dict(name='div', attrs={'id':'container'})
|
||||||
|
# Absolutamente innecesario... al final he visto un print_version (ver mas adelante)
|
||||||
|
|
||||||
|
# Dentro de una revista dada...
|
||||||
|
# issue_details contiene el titulo y las secciones de este ejemplar
|
||||||
|
# DetailModule esta dentro de issue_details contiene las urls y resumenes
|
||||||
|
# Dentro de un articulo dado...
|
||||||
|
# Article-default-body contiene el texto. Pero como digo, he encontrado una print_version
|
||||||
|
|
||||||
|
no_stylesheets = True
|
||||||
|
remove_javascript = True
|
||||||
|
|
||||||
|
def print_version(self,url):
|
||||||
|
# A esta funcion le llama el sistema... no hay que llamarla uno mismo (porque seria llamada dos veces)
|
||||||
|
# Existe una version imprimible de los articulos cambiando
|
||||||
|
# http://www.cio.com/article/<num>/<titulo> por
|
||||||
|
# http://www.cio.com/article/print/<num> que contiene todas las paginas dentro del div id=container
|
||||||
|
if url.startswith('/'):
|
||||||
|
url = 'http://www.cio.com'+url
|
||||||
|
segments = url.split('/')
|
||||||
|
printURL = '/'.join(segments[0:4]) + '/print/' + segments[4] +'#'
|
||||||
|
return printURL
|
||||||
|
|
||||||
|
|
||||||
|
def parse_index(self):
|
||||||
|
###########################################################################
|
||||||
|
# This method should be implemented in recipes that parse a website
|
||||||
|
# instead of feeds to generate a list of articles. Typical uses are for
|
||||||
|
# news sources that have a Print Edition webpage that lists all the
|
||||||
|
# articles in the current print edition. If this function is implemented,
|
||||||
|
# it will be used in preference to BasicNewsRecipe.parse_feeds().
|
||||||
|
#
|
||||||
|
# It must return a list. Each element of the list must be a 2-element
|
||||||
|
# tuple of the form ('feed title', list of articles).
|
||||||
|
#
|
||||||
|
# Each list of articles must contain dictionaries of the form:
|
||||||
|
#
|
||||||
|
# {
|
||||||
|
# 'title' : article title,
|
||||||
|
# 'url' : URL of print version,
|
||||||
|
# 'date' : The publication date of the article as a string,
|
||||||
|
# 'description' : A summary of the article
|
||||||
|
# 'content' : The full article (can be an empty string). This is used by FullContentProfile
|
||||||
|
# }
|
||||||
|
#
|
||||||
|
# For an example, see the recipe for downloading The Atlantic.
|
||||||
|
# In addition, you can add 'author' for the author of the article.
|
||||||
|
###############################################################################
|
||||||
|
|
||||||
|
# Primero buscamos cual es la ultima revista que se ha creado
|
||||||
|
soupinicial = self.index_to_soup('http://www.cio.com/magazine')
|
||||||
|
# Es el primer enlace que hay en el DIV con class content_body
|
||||||
|
a= soupinicial.find(True, attrs={'class':'content_body'}).find('a', href=True)
|
||||||
|
INDEX = re.sub(r'\?.*', '', a['href'])
|
||||||
|
# Como cio.com usa enlaces relativos, le anteponemos el domain name.
|
||||||
|
if INDEX.startswith('/'): # protegiendonos de que dejen de usarlos
|
||||||
|
INDEX = 'http://www.cio.com'+INDEX
|
||||||
|
# Y nos aseguramos en los logs que lo estamos haciendo bien
|
||||||
|
print ("INDEX en parse_index: ", INDEX)
|
||||||
|
|
||||||
|
# Ya sabemos cual es la revista... procesemosla.
|
||||||
|
soup = self.index_to_soup(INDEX)
|
||||||
|
|
||||||
|
articles = {}
|
||||||
|
key = None
|
||||||
|
feeds = []
|
||||||
|
# Para empezar nos quedamos solo con dos DIV, 'heading' y ' issue_item'
|
||||||
|
# Del primero sacamos las categorias (key) y del segundo las urls y resumenes
|
||||||
|
for div in soup.findAll(True,
|
||||||
|
attrs={'class':['heading', 'issue_item']}):
|
||||||
|
|
||||||
|
if div['class'] == 'heading':
|
||||||
|
key = string.capwords(self.tag_to_string(div.span))
|
||||||
|
print ("Key: ",key) # Esto es para depurar
|
||||||
|
articles[key] = []
|
||||||
|
feeds.append(key)
|
||||||
|
|
||||||
|
elif div['class'] == 'issue_item':
|
||||||
|
a = div.find('a', href=True)
|
||||||
|
if not a:
|
||||||
|
continue
|
||||||
|
url = re.sub(r'\?.*', '', a['href'])
|
||||||
|
print("url: ",url) # Esto es para depurar
|
||||||
|
title = self.tag_to_string(a, use_alt=True).strip() # Ya para nota, quitar al final las dos ultimas palabras
|
||||||
|
pubdate = strftime('%a, %d %b') # No es la fecha de publicacion sino la de colecta
|
||||||
|
summary = div.find('p') # Dentro de la div 'issue_item' el unico parrafo que hay es el resumen
|
||||||
|
description = '' # Si hay summary la description sera el summary... si no, la dejamos en blanco
|
||||||
|
|
||||||
|
if summary:
|
||||||
|
description = self.tag_to_string(summary, use_alt=False)
|
||||||
|
print ("Description = ", description)
|
||||||
|
|
||||||
|
|
||||||
|
feed = key if key is not None else 'Uncategorized' # Esto esta copiado del NY times
|
||||||
|
if not articles.has_key(feed):
|
||||||
|
articles[feed] = []
|
||||||
|
if not 'podcasts' in url:
|
||||||
|
articles[feed].append(
|
||||||
|
dict(title=title, url=url, date=pubdate,
|
||||||
|
description=description,
|
||||||
|
content=''))
|
||||||
|
feeds = [(key, articles[key]) for key in feeds if articles.has_key(key)]
|
||||||
|
return feeds
|
@ -15,8 +15,10 @@ class Guardian(BasicNewsRecipe):
|
|||||||
title = u'The Guardian and The Observer'
|
title = u'The Guardian and The Observer'
|
||||||
if date.today().weekday() == 6:
|
if date.today().weekday() == 6:
|
||||||
base_url = "http://www.guardian.co.uk/theobserver"
|
base_url = "http://www.guardian.co.uk/theobserver"
|
||||||
|
cover_pic = 'Observer digital edition'
|
||||||
else:
|
else:
|
||||||
base_url = "http://www.guardian.co.uk/theguardian"
|
base_url = "http://www.guardian.co.uk/theguardian"
|
||||||
|
cover_pic = 'Guardian digital edition'
|
||||||
|
|
||||||
__author__ = 'Seabound and Sujata Raman'
|
__author__ = 'Seabound and Sujata Raman'
|
||||||
language = 'en_GB'
|
language = 'en_GB'
|
||||||
@ -79,7 +81,7 @@ class Guardian(BasicNewsRecipe):
|
|||||||
# soup = self.index_to_soup("http://www.guardian.co.uk/theobserver")
|
# soup = self.index_to_soup("http://www.guardian.co.uk/theobserver")
|
||||||
soup = self.index_to_soup(self.base_url)
|
soup = self.index_to_soup(self.base_url)
|
||||||
# find cover pic
|
# find cover pic
|
||||||
img = soup.find( 'img',attrs ={'alt':'Guardian digital edition'})
|
img = soup.find( 'img',attrs ={'alt':self.cover_pic})
|
||||||
if img is not None:
|
if img is not None:
|
||||||
self.cover_url = img['src']
|
self.cover_url = img['src']
|
||||||
# end find cover pic
|
# end find cover pic
|
||||||
|
29
recipes/hindustan_times.recipe
Normal file
29
recipes/hindustan_times.recipe
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class HindustanTimes(BasicNewsRecipe):
|
||||||
|
title = u'Hindustan Times'
|
||||||
|
language = 'en_IN'
|
||||||
|
__author__ = 'Krittika Goyal'
|
||||||
|
oldest_article = 1 #days
|
||||||
|
max_articles_per_feed = 25
|
||||||
|
use_embedded_content = False
|
||||||
|
|
||||||
|
no_stylesheets = True
|
||||||
|
auto_cleanup = True
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
('News',
|
||||||
|
'http://feeds.hindustantimes.com/HT-NewsSectionPage-Topstories'),
|
||||||
|
('Views',
|
||||||
|
'http://feeds.hindustantimes.com/HT-ViewsSectionpage-Topstories'),
|
||||||
|
('Cricket',
|
||||||
|
'http://feeds.hindustantimes.com/HT-Cricket-TopStories'),
|
||||||
|
('Business',
|
||||||
|
'http://feeds.hindustantimes.com/HT-BusinessSectionpage-TopStories'),
|
||||||
|
('Entertainment',
|
||||||
|
'http://feeds.hindustantimes.com/HT-HomePage-Entertainment'),
|
||||||
|
('Lifestyle',
|
||||||
|
'http://feeds.hindustantimes.com/HT-Homepage-LifestyleNews'),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
BIN
recipes/icons/japan_times.png
Normal file
BIN
recipes/icons/japan_times.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 1.2 KiB |
BIN
recipes/icons/rtnews.png
Normal file
BIN
recipes/icons/rtnews.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 606 B |
BIN
recipes/icons/twitchfilms.png
Normal file
BIN
recipes/icons/twitchfilms.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 200 B |
@ -1,76 +1,25 @@
|
|||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class IndiaToday(BasicNewsRecipe):
|
class IndiaToday(BasicNewsRecipe):
|
||||||
|
title = u'India Today'
|
||||||
title = 'India Today'
|
language = 'en_IN'
|
||||||
__author__ = 'Kovid Goyal'
|
__author__ = 'Krittika Goyal'
|
||||||
language = 'en_IN'
|
oldest_article = 15 #days
|
||||||
timefmt = ' [%d %m, %Y]'
|
max_articles_per_feed = 25
|
||||||
|
|
||||||
oldest_article = 700
|
|
||||||
max_articles_per_feed = 10
|
|
||||||
|
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
|
auto_cleanup = True
|
||||||
|
|
||||||
remove_tags_before = dict(id='content_story_title')
|
feeds = [
|
||||||
remove_tags_after = dict(id='rightblockdiv')
|
('Latest News', 'http://indiatoday.intoday.in/rss/article.jsp?sid=4'),
|
||||||
remove_tags = [dict(id=['rightblockdiv', 'share_links'])]
|
('Cover Story', 'http://indiatoday.intoday.in/rss/article.jsp?sid=30'),
|
||||||
|
('Nation', 'http://indiatoday.intoday.in/rss/article.jsp?sid=36'),
|
||||||
extra_css = '#content_story_title { font-size: 170%; font-weight: bold;}'
|
('States', 'http://indiatoday.intoday.in/rss/article.jsp?sid=21'),
|
||||||
conversion_options = { 'linearize_tables': True }
|
('Economy', 'http://indiatoday.intoday.in/rss/article.jsp?sid=34'),
|
||||||
|
('World', 'http://indiatoday.intoday.in/rss/article.jsp?sid=61'),
|
||||||
def it_get_index(self):
|
('Sport', 'http://indiatoday.intoday.in/rss/article.jsp?sid=41'),
|
||||||
soup = self.index_to_soup('http://indiatoday.intoday.in/site/archive')
|
|
||||||
a = soup.find('a', href=lambda x: x and 'issueId=' in x)
|
|
||||||
url = 'http://indiatoday.intoday.in/site/'+a.get('href')
|
|
||||||
img = a.find('img')
|
|
||||||
self.cover_url = img.get('src')
|
|
||||||
return self.index_to_soup(url)
|
|
||||||
|
|
||||||
def parse_index(self):
|
|
||||||
soup = self.it_get_index()
|
|
||||||
feeds, current_section, current_articles = [], None, []
|
|
||||||
for x in soup.findAll(name=['h1', 'a']):
|
|
||||||
if x.name == 'h1':
|
|
||||||
if current_section and current_articles:
|
|
||||||
feeds.append((current_section, current_articles))
|
|
||||||
current_section = self.tag_to_string(x)
|
|
||||||
current_articles = []
|
|
||||||
self.log('\tFound section:', current_section)
|
|
||||||
elif x.name == 'a' and 'Story' in x.get('href', ''):
|
|
||||||
title = self.tag_to_string(x)
|
|
||||||
url = x.get('href')
|
|
||||||
url = url.replace(' ', '%20')
|
|
||||||
if not url.startswith('/'):
|
|
||||||
url = 'http://indiatoday.intoday.in/site/' + url
|
|
||||||
if title and url:
|
|
||||||
url += '?complete=1'
|
|
||||||
self.log('\tFound article:', title)
|
|
||||||
self.log('\t\t', url)
|
|
||||||
desc = ''
|
|
||||||
h3 = x.parent.findNextSibling('h3')
|
|
||||||
if h3 is not None:
|
|
||||||
desc = 'By ' + self.tag_to_string(h3)
|
|
||||||
h4 = h3.findNextSibling('h4')
|
|
||||||
if h4 is not None:
|
|
||||||
desc = self.tag_to_string(h4) + ' ' + desc
|
|
||||||
if desc:
|
|
||||||
self.log('\t\t', desc)
|
|
||||||
current_articles.append({'title':title, 'description':desc,
|
|
||||||
'url':url, 'date':''})
|
|
||||||
|
|
||||||
if current_section and current_articles:
|
|
||||||
feeds.append((current_section, current_articles))
|
|
||||||
|
|
||||||
return feeds
|
|
||||||
|
|
||||||
def postprocess_html(self, soup, first):
|
|
||||||
a = soup.find(text='Print')
|
|
||||||
if a is not None:
|
|
||||||
tr = a.findParent('tr')
|
|
||||||
if tr is not None:
|
|
||||||
tr.extract()
|
|
||||||
return soup
|
|
||||||
|
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
@ -7,56 +7,33 @@ www.inquirer.net
|
|||||||
'''
|
'''
|
||||||
|
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import Tag
|
|
||||||
|
|
||||||
class InquirerNet(BasicNewsRecipe):
|
class InquirerNet(BasicNewsRecipe):
|
||||||
title = 'Inquirer.net'
|
title = 'Inquirer.net'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Krittika Goyal'
|
||||||
description = 'News from Philipines'
|
description = 'News from Philipines'
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
encoding = 'cp1252'
|
encoding = 'utf8'
|
||||||
publisher = 'inquirer.net'
|
publisher = 'inquirer.net'
|
||||||
category = 'news, politics, philipines'
|
category = 'news, politics, philipines'
|
||||||
lang = 'en'
|
lang = 'en'
|
||||||
language = 'en'
|
language = 'en'
|
||||||
|
|
||||||
extra_css = ' .fontheadline{font-size: x-large} .fontsubheadline{font-size: large} .fontkick{font-size: medium}'
|
use_embedded_content = False
|
||||||
|
|
||||||
html2lrf_options = [
|
no_stylesheets = True
|
||||||
'--comment', description
|
auto_cleanup = True
|
||||||
, '--category', category
|
|
||||||
, '--publisher', publisher
|
|
||||||
, '--ignore-tables'
|
|
||||||
]
|
|
||||||
|
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
|
|
||||||
|
|
||||||
remove_tags = [dict(name=['object','link','script','iframe','form'])]
|
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Breaking news', u'http://services.inquirer.net/rss/breakingnews.xml' )
|
(u'Inquirer', u'http://www.inquirer.net/fullfeed')
|
||||||
,(u'Top stories' , u'http://services.inquirer.net/rss/topstories.xml' )
|
|
||||||
,(u'Sports' , u'http://services.inquirer.net/rss/brk_breakingnews.xml' )
|
|
||||||
,(u'InfoTech' , u'http://services.inquirer.net/rss/infotech_tech.xml' )
|
|
||||||
,(u'InfoTech' , u'http://services.inquirer.net/rss/infotech_tech.xml' )
|
|
||||||
,(u'Business' , u'http://services.inquirer.net/rss/inq7money_breaking_news.xml' )
|
|
||||||
,(u'Editorial' , u'http://services.inquirer.net/rss/opinion_editorial.xml' )
|
|
||||||
,(u'Global Nation', u'http://services.inquirer.net/rss/globalnation_breakingnews.xml')
|
|
||||||
]
|
]
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def get_browser(self):
|
||||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
br = BasicNewsRecipe.get_browser(self)
|
||||||
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
|
br.set_handle_gzip(True)
|
||||||
soup.head.insert(0,mlang)
|
return br
|
||||||
soup.head.insert(1,mcharset)
|
|
||||||
for item in soup.findAll(style=True):
|
|
||||||
del item['style']
|
|
||||||
return soup
|
|
||||||
|
|
||||||
def print_version(self, url):
|
|
||||||
rest, sep, art = url.rpartition('/view/')
|
|
||||||
art_id, sp, rrest = art.partition('/')
|
|
||||||
return 'http://services.inquirer.net/print/print.php?article_id=' + art_id
|
|
||||||
|
@ -1,7 +1,5 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
japantimes.co.jp
|
japantimes.co.jp
|
||||||
'''
|
'''
|
||||||
@ -9,24 +7,61 @@ japantimes.co.jp
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class JapanTimes(BasicNewsRecipe):
|
class JapanTimes(BasicNewsRecipe):
|
||||||
title = u'The Japan Times'
|
title = 'The Japan Times'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'News from Japan'
|
description = "Daily news and features on Japan from the most widely read English-language newspaper in Japan. Coverage includes national news, business news, sports news, commentary and features on living in Japan, entertainment, the arts, education and more."
|
||||||
language = 'en'
|
language = 'en_JP'
|
||||||
|
category = 'news, politics, japan'
|
||||||
oldest_article = 7
|
publisher = 'The Japan Times'
|
||||||
max_articles_per_feed = 100
|
oldest_article = 5
|
||||||
|
max_articles_per_feed = 150
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
|
encoding = 'utf8'
|
||||||
|
publication_type = 'newspaper'
|
||||||
|
masthead_url = 'http://search.japantimes.co.jp/images/header_title.gif'
|
||||||
|
extra_css = 'body{font-family: Geneva,Arial,Helvetica,sans-serif}'
|
||||||
|
|
||||||
keep_only_tags = [ dict(name='div', attrs={'id':'searchresult'}) ]
|
conversion_options = {
|
||||||
remove_tags_after = [ dict(name='div', attrs={'id':'mainbody' }) ]
|
'comment' : description
|
||||||
|
, 'tags' : category
|
||||||
|
, 'publisher' : publisher
|
||||||
|
, 'language' : language
|
||||||
|
, 'linearize_tables' : True
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
keep_only_tags = [dict(name='div', attrs={'id':'printresult'})]
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name='div' , attrs={'id':'ads' })
|
dict(name=['iframe','meta','link','embed','object','base'])
|
||||||
,dict(name='table', attrs={'width':470})
|
,dict(attrs={'id':'searchfooter'})
|
||||||
]
|
]
|
||||||
|
feeds = [(u'The Japan Times', u'http://feeds.feedburner.com/japantimes')]
|
||||||
|
remove_attributes = ['border']
|
||||||
|
|
||||||
|
def get_article_url(self, article):
|
||||||
|
rurl = BasicNewsRecipe.get_article_url(self, article)
|
||||||
|
return rurl.partition('?')[0]
|
||||||
|
|
||||||
feeds = [
|
def print_version(self, url):
|
||||||
(u'The Japan Times', u'http://feedproxy.google.com/japantimes')
|
return url.replace('/cgi-bin/','/print/')
|
||||||
]
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
for item in soup.findAll('img'):
|
||||||
|
if not item.has_key('alt'):
|
||||||
|
item['alt'] = 'image'
|
||||||
|
for item in soup.findAll('photo'):
|
||||||
|
item.name = 'div'
|
||||||
|
for item in soup.head.findAll('paragraph'):
|
||||||
|
item.extract()
|
||||||
|
for item in soup.findAll('wwfilename'):
|
||||||
|
item.extract()
|
||||||
|
for item in soup.findAll('jtcategory'):
|
||||||
|
item.extract()
|
||||||
|
for item in soup.findAll('nomooter'):
|
||||||
|
item.extract()
|
||||||
|
for item in soup.body.findAll('paragraph'):
|
||||||
|
item.name = 'p'
|
||||||
|
return soup
|
||||||
|
@ -14,54 +14,11 @@ class PeopleMag(BasicNewsRecipe):
|
|||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
max_articles_per_feed = 50
|
max_articles_per_feed = 50
|
||||||
|
use_embedded_content = False
|
||||||
|
|
||||||
extra_css = '''
|
no_stylesheets = True
|
||||||
h1{font-family:verdana,arial,helvetica,sans-serif; font-size: large;}
|
auto_cleanup = True
|
||||||
h2{font-family:verdana,arial,helvetica,sans-serif; font-size: small;}
|
auto_cleanup_keep = '//div[@id="article-image"]'
|
||||||
.body-content{font-family:verdana,arial,helvetica,sans-serif; font-size: small;}
|
|
||||||
.byline {font-size: small; color: #666666; font-style:italic; }
|
|
||||||
.lastline {font-size: small; color: #666666; font-style:italic;}
|
|
||||||
.contact {font-size: small; color: #666666;}
|
|
||||||
.contact p {font-size: small; color: #666666;}
|
|
||||||
.photoCaption { font-family:verdana,arial,helvetica,sans-serif; font-size:x-small;}
|
|
||||||
.photoCredit{ font-family:verdana,arial,helvetica,sans-serif; font-size:x-small; color:#666666;}
|
|
||||||
.article_timestamp{font-size:x-small; color:#666666;}
|
|
||||||
a {font-family:verdana,arial,helvetica,sans-serif; font-size: x-small;}
|
|
||||||
'''
|
|
||||||
|
|
||||||
|
|
||||||
keep_only_tags = [
|
|
||||||
dict(name='div', attrs={'class': 'panel_news_article_main'}),
|
|
||||||
dict(name='div', attrs={'class':'article_content'}),
|
|
||||||
dict(name='div', attrs={'class': 'headline'}),
|
|
||||||
dict(name='div', attrs={'class': 'post'}),
|
|
||||||
dict(name='div', attrs={'class': 'packageheadlines'}),
|
|
||||||
dict(name='div', attrs={'class': 'snap_preview'}),
|
|
||||||
dict(name='div', attrs={'id': 'articlebody'})
|
|
||||||
]
|
|
||||||
|
|
||||||
remove_tags = [
|
|
||||||
dict(name='div', attrs={'class':'share_comments'}),
|
|
||||||
dict(name='p', attrs={'class':'twitter_facebook'}),
|
|
||||||
dict(name='div', attrs={'class':'share_comments_bottom'}),
|
|
||||||
dict(name='h2', attrs={'id':'related_content'}),
|
|
||||||
dict(name='div', attrs={'class':'next_article'}),
|
|
||||||
dict(name='div', attrs={'class':'prev_article'}),
|
|
||||||
dict(name='ul', attrs={'id':'sharebar'}),
|
|
||||||
dict(name='div', attrs={'class':'sharelinkcont'}),
|
|
||||||
dict(name='div', attrs={'class':'categories'}),
|
|
||||||
dict(name='ul', attrs={'class':'categories'}),
|
|
||||||
dict(name='div', attrs={'class':'related_content'}),
|
|
||||||
dict(name='div', attrs={'id':'promo'}),
|
|
||||||
dict(name='div', attrs={'class':'linksWrapper'}),
|
|
||||||
dict(name='p', attrs={'class':'tag tvnews'}),
|
|
||||||
dict(name='p', attrs={'class':'tag movienews'}),
|
|
||||||
dict(name='p', attrs={'class':'tag musicnews'}),
|
|
||||||
dict(name='p', attrs={'class':'tag couples'}),
|
|
||||||
dict(name='p', attrs={'class':'tag gooddeeds'}),
|
|
||||||
dict(name='p', attrs={'class':'tag weddings'}),
|
|
||||||
dict(name='p', attrs={'class':'tag health'})
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
@ -69,26 +26,4 @@ class PeopleMag(BasicNewsRecipe):
|
|||||||
('US Headlines', 'http://www.usmagazine.com/celebrity_news/rss')
|
('US Headlines', 'http://www.usmagazine.com/celebrity_news/rss')
|
||||||
]
|
]
|
||||||
|
|
||||||
def get_article_url(self, article):
|
|
||||||
ans = article.link
|
|
||||||
|
|
||||||
try:
|
|
||||||
self.log('Looking for full story link in', ans)
|
|
||||||
soup = self.index_to_soup(ans)
|
|
||||||
x = soup.find(text="View All")
|
|
||||||
|
|
||||||
if x is not None:
|
|
||||||
ans = ans + '?viewAll=y'
|
|
||||||
self.log('Found full story link', ans)
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
return ans
|
|
||||||
|
|
||||||
def postprocess_html(self, soup,first):
|
|
||||||
|
|
||||||
for tag in soup.findAll(name='div',attrs={'class':"container_ate_qandatitle"}):
|
|
||||||
tag.extract()
|
|
||||||
for tag in soup.findAll(name='br'):
|
|
||||||
tag.extract()
|
|
||||||
|
|
||||||
return soup
|
|
||||||
|
64
recipes/rtnews.recipe
Normal file
64
recipes/rtnews.recipe
Normal file
@ -0,0 +1,64 @@
|
|||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
'''
|
||||||
|
rt.com
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class RT_eng(BasicNewsRecipe):
|
||||||
|
title = 'RT in English'
|
||||||
|
__author__ = 'Darko Miletic'
|
||||||
|
description = 'RT is the first Russian 24/7 English-language news channel which brings the Russian view on global news.'
|
||||||
|
publisher = 'Autonomous Nonprofit Organization "TV-Novosti"'
|
||||||
|
category = 'news, politics, economy, finances, Russia, world'
|
||||||
|
oldest_article = 2
|
||||||
|
no_stylesheets = True
|
||||||
|
encoding = 'utf8'
|
||||||
|
masthead_url = 'http://rt.com/s/css/img/printlogo.gif'
|
||||||
|
use_embedded_content = False
|
||||||
|
remove_empty_feeds = True
|
||||||
|
language = 'en_RU'
|
||||||
|
publication_type = 'newsportal'
|
||||||
|
extra_css = """
|
||||||
|
body{font-family: Arial,Helvetica,sans-serif}
|
||||||
|
h1{font-family: Georgia,"Times New Roman",Times,serif}
|
||||||
|
.grey{color: gray}
|
||||||
|
.fs12{font-size: small}
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comment' : description
|
||||||
|
, 'tags' : category
|
||||||
|
, 'publisher': publisher
|
||||||
|
, 'language' : language
|
||||||
|
}
|
||||||
|
|
||||||
|
keep_only_tags = [dict(name='div', attrs={'class':'all'})]
|
||||||
|
remove_tags = [
|
||||||
|
dict(name=['object','link','embed','iframe','meta','link'])
|
||||||
|
,dict(attrs={'class':'crumbs oh'})
|
||||||
|
]
|
||||||
|
remove_attributes = ['clear']
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Politics' , u'http://rt.com/politics/rss/' )
|
||||||
|
,(u'USA' , u'http://rt.com/usa/news/rss/' )
|
||||||
|
,(u'Business' , u'http://rt.com/business/news/rss/' )
|
||||||
|
,(u'Sport' , u'http://rt.com/sport/rss/' )
|
||||||
|
,(u'Art&Culture', u'http://rt.com/art-and-culture/news/rss/')
|
||||||
|
]
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
return url + 'print/'
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
for item in soup.findAll('a'):
|
||||||
|
str = item.string
|
||||||
|
if str is None:
|
||||||
|
str = self.tag_to_string(item)
|
||||||
|
item.replaceWith(str)
|
||||||
|
return soup
|
@ -1,12 +1,9 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2009-2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
twitchfilm.net/site/
|
twitchfilm.net/news/
|
||||||
'''
|
'''
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import Tag
|
|
||||||
|
|
||||||
class Twitchfilm(BasicNewsRecipe):
|
class Twitchfilm(BasicNewsRecipe):
|
||||||
title = 'Twitch Films'
|
title = 'Twitch Films'
|
||||||
@ -15,29 +12,46 @@ class Twitchfilm(BasicNewsRecipe):
|
|||||||
oldest_article = 30
|
oldest_article = 30
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = True
|
use_embedded_content = False
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
publisher = 'Twitch'
|
publisher = 'Twitch'
|
||||||
|
masthead_url = 'http://twitchfilm.com/img/logo.png'
|
||||||
category = 'twitch, twitchfilm, movie news, movie reviews, cult cinema, independent cinema, anime, foreign cinema, geek talk'
|
category = 'twitch, twitchfilm, movie news, movie reviews, cult cinema, independent cinema, anime, foreign cinema, geek talk'
|
||||||
language = 'en'
|
language = 'en'
|
||||||
|
|
||||||
lang = 'en-US'
|
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comment' : description
|
'comment' : description
|
||||||
, 'tags' : category
|
, 'tags' : category
|
||||||
, 'publisher' : publisher
|
, 'publisher': publisher
|
||||||
, 'language' : lang
|
, 'language' : language
|
||||||
, 'pretty_print' : True
|
|
||||||
}
|
}
|
||||||
|
|
||||||
remove_tags = [dict(name='div', attrs={'class':'feedflare'})]
|
keep_only_tags=[dict(attrs={'class':'asset-header'})]
|
||||||
|
remove_tags_after=dict(attrs={'class':'asset-body'})
|
||||||
|
remove_tags = [ dict(name='div', attrs={'class':['social','categories']})
|
||||||
|
, dict(attrs={'id':'main-asset'})
|
||||||
|
, dict(name=['meta','link','iframe','embed','object'])
|
||||||
|
]
|
||||||
|
|
||||||
feeds = [(u'News', u'http://feedproxy.google.com/TwitchEverything')]
|
feeds = [(u'News', u'http://feeds.twitchfilm.net/TwitchEverything')]
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
mtag = Tag(soup,'meta',[('http-equiv','Content-Type'),('context','text/html; charset=utf-8')])
|
for item in soup.findAll(style=True):
|
||||||
soup.head.insert(0,mtag)
|
del item['style']
|
||||||
soup.html['lang'] = self.lang
|
for item in soup.findAll('a'):
|
||||||
return self.adeify_images(soup)
|
limg = item.find('img')
|
||||||
|
if item.string is not None:
|
||||||
|
str = item.string
|
||||||
|
item.replaceWith(str)
|
||||||
|
else:
|
||||||
|
if limg:
|
||||||
|
item.name = 'div'
|
||||||
|
item.attrs = []
|
||||||
|
else:
|
||||||
|
str = self.tag_to_string(item)
|
||||||
|
item.replaceWith(str)
|
||||||
|
for item in soup.findAll('img'):
|
||||||
|
if not item.has_key('alt'):
|
||||||
|
item['alt'] = 'image'
|
||||||
|
return soup
|
||||||
|
|
||||||
|
@ -13,6 +13,7 @@ class USAToday(BasicNewsRecipe):
|
|||||||
title = 'USA Today'
|
title = 'USA Today'
|
||||||
__author__ = 'Kovid Goyal'
|
__author__ = 'Kovid Goyal'
|
||||||
oldest_article = 1
|
oldest_article = 1
|
||||||
|
publication_type = 'newspaper'
|
||||||
timefmt = ''
|
timefmt = ''
|
||||||
max_articles_per_feed = 20
|
max_articles_per_feed = 20
|
||||||
language = 'en'
|
language = 'en'
|
||||||
|
@ -94,9 +94,11 @@ class WallStreetJournal(BasicNewsRecipe):
|
|||||||
if date is not None:
|
if date is not None:
|
||||||
self.timefmt = ' [%s]'%self.tag_to_string(date)
|
self.timefmt = ' [%s]'%self.tag_to_string(date)
|
||||||
|
|
||||||
cov = soup.find('a', attrs={'class':'icon pdf'}, href=True)
|
cov = soup.find('div', attrs={'class':'itpSectionHeaderPdf'})
|
||||||
if cov is not None:
|
if cov is not None:
|
||||||
self.cover_url = cov['href']
|
a = cov.find('a', href=True)
|
||||||
|
if a is not None:
|
||||||
|
self.cover_url = a['href']
|
||||||
|
|
||||||
feeds = []
|
feeds = []
|
||||||
div = soup.find('div', attrs={'class':'itpHeader'})
|
div = soup.find('div', attrs={'class':'itpHeader'})
|
||||||
|
@ -61,7 +61,7 @@ authors_completer_append_separator = False
|
|||||||
# selecting 'manage authors', and pressing 'Recalculate all author sort values'.
|
# selecting 'manage authors', and pressing 'Recalculate all author sort values'.
|
||||||
# The author name suffixes are words that are ignored when they occur at the
|
# The author name suffixes are words that are ignored when they occur at the
|
||||||
# end of an author name. The case of the suffix is ignored and trailing
|
# end of an author name. The case of the suffix is ignored and trailing
|
||||||
# periods are automatically handled.
|
# periods are automatically handled. The same is true for prefixes.
|
||||||
# The author name copy words are a set of words which if they occur in an
|
# The author name copy words are a set of words which if they occur in an
|
||||||
# author name cause the automatically generated author sort string to be
|
# author name cause the automatically generated author sort string to be
|
||||||
# identical to the author name. This means that the sort for a string like Acme
|
# identical to the author name. This means that the sort for a string like Acme
|
||||||
|
@ -653,6 +653,15 @@ class KOBO(USBMS):
|
|||||||
debug_print(' Commit: Set FavouritesIndex')
|
debug_print(' Commit: Set FavouritesIndex')
|
||||||
|
|
||||||
def update_device_database_collections(self, booklists, collections_attributes, oncard):
|
def update_device_database_collections(self, booklists, collections_attributes, oncard):
|
||||||
|
# Only process categories in this list
|
||||||
|
supportedcategories = {
|
||||||
|
"Im_Reading":1,
|
||||||
|
"Read":2,
|
||||||
|
"Closed":3,
|
||||||
|
"Shortlist":4,
|
||||||
|
# "Preview":99, # Unsupported as we don't want to change it
|
||||||
|
}
|
||||||
|
|
||||||
# Define lists for the ReadStatus
|
# Define lists for the ReadStatus
|
||||||
readstatuslist = {
|
readstatuslist = {
|
||||||
"Im_Reading":1,
|
"Im_Reading":1,
|
||||||
@ -692,6 +701,7 @@ class KOBO(USBMS):
|
|||||||
|
|
||||||
# Process any collections that exist
|
# Process any collections that exist
|
||||||
for category, books in collections.items():
|
for category, books in collections.items():
|
||||||
|
if category in supportedcategories:
|
||||||
debug_print("Category: ", category, " id = ", readstatuslist.get(category))
|
debug_print("Category: ", category, " id = ", readstatuslist.get(category))
|
||||||
for book in books:
|
for book in books:
|
||||||
debug_print(' Title:', book.title, 'category: ', category)
|
debug_print(' Title:', book.title, 'category: ', category)
|
||||||
|
@ -368,7 +368,10 @@ OptionRecommendation(name='remove_paragraph_spacing_indent_size',
|
|||||||
recommended_value=1.5, level=OptionRecommendation.LOW,
|
recommended_value=1.5, level=OptionRecommendation.LOW,
|
||||||
help=_('When calibre removes blank lines between paragraphs, it automatically '
|
help=_('When calibre removes blank lines between paragraphs, it automatically '
|
||||||
'sets a paragraph indent, to ensure that paragraphs can be easily '
|
'sets a paragraph indent, to ensure that paragraphs can be easily '
|
||||||
'distinguished. This option controls the width of that indent (in em).')
|
'distinguished. This option controls the width of that indent (in em). '
|
||||||
|
'If you set this value to 0, then the indent specified in the input '
|
||||||
|
'document is used, unless you also set the insert line between '
|
||||||
|
'paragraphs option.')
|
||||||
),
|
),
|
||||||
|
|
||||||
OptionRecommendation(name='prefer_metadata_cover',
|
OptionRecommendation(name='prefer_metadata_cover',
|
||||||
@ -394,8 +397,9 @@ OptionRecommendation(name='insert_blank_line_size',
|
|||||||
OptionRecommendation(name='remove_first_image',
|
OptionRecommendation(name='remove_first_image',
|
||||||
recommended_value=False, level=OptionRecommendation.LOW,
|
recommended_value=False, level=OptionRecommendation.LOW,
|
||||||
help=_('Remove the first image from the input ebook. Useful if the '
|
help=_('Remove the first image from the input ebook. Useful if the '
|
||||||
'first image in the source file is a cover and you are specifying '
|
'input document has a cover image that is not identified as a cover. '
|
||||||
'an external cover.'
|
'In this case, if you set a cover in calibre, the output document will '
|
||||||
|
'end up with two cover images if you do not specify this option.'
|
||||||
)
|
)
|
||||||
),
|
),
|
||||||
|
|
||||||
@ -1024,7 +1028,7 @@ OptionRecommendation(name='sr3_replace',
|
|||||||
self.output_plugin.file_type not in ('mobi', 'lrf'):
|
self.output_plugin.file_type not in ('mobi', 'lrf'):
|
||||||
from calibre.ebooks.oeb.transforms.linearize_tables import LinearizeTables
|
from calibre.ebooks.oeb.transforms.linearize_tables import LinearizeTables
|
||||||
LinearizeTables()(self.oeb, self.opts)
|
LinearizeTables()(self.oeb, self.opts)
|
||||||
|
|
||||||
if self.opts.unsmarten_punctuation:
|
if self.opts.unsmarten_punctuation:
|
||||||
from calibre.ebooks.oeb.transforms.unsmarten import UnsmartenPunctuation
|
from calibre.ebooks.oeb.transforms.unsmarten import UnsmartenPunctuation
|
||||||
UnsmartenPunctuation()(self.oeb, self.opts)
|
UnsmartenPunctuation()(self.oeb, self.opts)
|
||||||
|
@ -75,7 +75,7 @@ class IgnoreFile(Exception):
|
|||||||
|
|
||||||
def __init__(self, msg, errno):
|
def __init__(self, msg, errno):
|
||||||
Exception.__init__(self, msg)
|
Exception.__init__(self, msg)
|
||||||
self.doesnt_exist = errno == 2
|
self.doesnt_exist = errno == errno.ENOENT
|
||||||
self.errno = errno
|
self.errno = errno
|
||||||
|
|
||||||
class HTMLFile(object):
|
class HTMLFile(object):
|
||||||
|
@ -65,20 +65,27 @@ def author_to_author_sort(author, method=None):
|
|||||||
suffixes = set([x.lower() for x in tweaks['author_name_suffixes']])
|
suffixes = set([x.lower() for x in tweaks['author_name_suffixes']])
|
||||||
suffixes |= set([x+u'.' for x in suffixes])
|
suffixes |= set([x+u'.' for x in suffixes])
|
||||||
|
|
||||||
last = tokens[-1].lower()
|
suffix = u''
|
||||||
suffix = None
|
while True:
|
||||||
if last in suffixes:
|
if not tokens:
|
||||||
suffix = tokens[-1]
|
return author
|
||||||
tokens = tokens[:-1]
|
last = tokens[-1].lower()
|
||||||
|
if last in suffixes:
|
||||||
|
suffix = tokens[-1] + ' ' + suffix
|
||||||
|
tokens = tokens[:-1]
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
suffix = suffix.strip()
|
||||||
|
|
||||||
if method == u'comma' and u',' in u''.join(tokens):
|
if method == u'comma' and u',' in u''.join(tokens):
|
||||||
return author
|
return author
|
||||||
|
|
||||||
atokens = tokens[-1:] + tokens[:-1]
|
atokens = tokens[-1:] + tokens[:-1]
|
||||||
|
num_toks = len(atokens)
|
||||||
if suffix:
|
if suffix:
|
||||||
atokens.append(suffix)
|
atokens.append(suffix)
|
||||||
|
|
||||||
if method != u'nocomma' and len(atokens) > 1:
|
if method != u'nocomma' and num_toks > 1:
|
||||||
atokens[0] += u','
|
atokens[0] += u','
|
||||||
|
|
||||||
return u' '.join(atokens)
|
return u' '.join(atokens)
|
||||||
|
@ -330,9 +330,11 @@ class MetadataUpdater(object):
|
|||||||
prefs = load_defaults('mobi_output')
|
prefs = load_defaults('mobi_output')
|
||||||
pas = prefs.get('prefer_author_sort', False)
|
pas = prefs.get('prefer_author_sort', False)
|
||||||
kindle_pdoc = prefs.get('personal_doc', None)
|
kindle_pdoc = prefs.get('personal_doc', None)
|
||||||
|
share_not_sync = prefs.get('share_not_sync', False)
|
||||||
except:
|
except:
|
||||||
pas = False
|
pas = False
|
||||||
kindle_pdoc = None
|
kindle_pdoc = None
|
||||||
|
share_not_sync = False
|
||||||
if mi.author_sort and pas:
|
if mi.author_sort and pas:
|
||||||
authors = mi.author_sort
|
authors = mi.author_sort
|
||||||
update_exth_record((100, normalize(authors).encode(self.codec, 'replace')))
|
update_exth_record((100, normalize(authors).encode(self.codec, 'replace')))
|
||||||
@ -376,7 +378,7 @@ class MetadataUpdater(object):
|
|||||||
# Add a 113 record if not present to allow Amazon syncing
|
# Add a 113 record if not present to allow Amazon syncing
|
||||||
if (113 not in self.original_exth_records and
|
if (113 not in self.original_exth_records and
|
||||||
self.original_exth_records.get(501, None) == 'EBOK' and
|
self.original_exth_records.get(501, None) == 'EBOK' and
|
||||||
not added_501):
|
not added_501 and not share_not_sync):
|
||||||
from uuid import uuid4
|
from uuid import uuid4
|
||||||
update_exth_record((113, str(uuid4())))
|
update_exth_record((113, str(uuid4())))
|
||||||
if 503 in self.original_exth_records:
|
if 503 in self.original_exth_records:
|
||||||
|
@ -116,7 +116,8 @@ def cap_author_token(token):
|
|||||||
lt = lower(token)
|
lt = lower(token)
|
||||||
if lt in ('von', 'de', 'el', 'van', 'le'):
|
if lt in ('von', 'de', 'el', 'van', 'le'):
|
||||||
return lt
|
return lt
|
||||||
if re.match(r'([a-z]\.){2,}$', lt) is not None:
|
# no digits no spez. characters
|
||||||
|
if re.match(r'([^\d\W]\.){2,}$', lt, re.UNICODE) is not None:
|
||||||
# Normalize tokens of the form J.K. to J. K.
|
# Normalize tokens of the form J.K. to J. K.
|
||||||
parts = token.split('.')
|
parts = token.split('.')
|
||||||
return '. '.join(map(capitalize, parts)).strip()
|
return '. '.join(map(capitalize, parts)).strip()
|
||||||
|
@ -28,7 +28,7 @@ class Ozon(Source):
|
|||||||
touched_fields = frozenset(['title', 'authors', 'identifier:isbn', 'identifier:ozon',
|
touched_fields = frozenset(['title', 'authors', 'identifier:isbn', 'identifier:ozon',
|
||||||
'publisher', 'pubdate', 'comments', 'series', 'rating', 'language'])
|
'publisher', 'pubdate', 'comments', 'series', 'rating', 'language'])
|
||||||
# Test purpose only, test function does not like when sometimes some filed are empty
|
# Test purpose only, test function does not like when sometimes some filed are empty
|
||||||
#touched_fields = frozenset(['title', 'authors', 'identifier:isbn', 'identifier:ozon',
|
# touched_fields = frozenset(['title', 'authors', 'identifier:isbn', 'identifier:ozon',
|
||||||
# 'publisher', 'pubdate', 'comments'])
|
# 'publisher', 'pubdate', 'comments'])
|
||||||
|
|
||||||
supports_gzip_transfer_encoding = True
|
supports_gzip_transfer_encoding = True
|
||||||
@ -109,8 +109,16 @@ class Ozon(Source):
|
|||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
def get_metadata(self, log, entries, title, authors, identifiers): # {{{
|
def get_metadata(self, log, entries, title, authors, identifiers): # {{{
|
||||||
|
# some book titles have extra charactes like this
|
||||||
|
# TODO: make a twick
|
||||||
|
reRemoveFromTitle = None
|
||||||
|
#reRemoveFromTitle = re.compile(r'[?!:.,;+-/&%"\'=]')
|
||||||
|
|
||||||
title = unicode(title).upper() if title else ''
|
title = unicode(title).upper() if title else ''
|
||||||
authors = map(unicode.upper, map(unicode, authors)) if authors else None
|
if reRemoveFromTitle:
|
||||||
|
title = reRemoveFromTitle.sub('', title)
|
||||||
|
authors = map(_normalizeAuthorNameWithInitials,
|
||||||
|
map(unicode.upper, map(unicode, authors))) if authors else None
|
||||||
ozon_id = identifiers.get('ozon', None)
|
ozon_id = identifiers.get('ozon', None)
|
||||||
|
|
||||||
unk = unicode(_('Unknown')).upper()
|
unk = unicode(_('Unknown')).upper()
|
||||||
@ -124,6 +132,7 @@ class Ozon(Source):
|
|||||||
def in_authors(authors, miauthors):
|
def in_authors(authors, miauthors):
|
||||||
for author in authors:
|
for author in authors:
|
||||||
for miauthor in miauthors:
|
for miauthor in miauthors:
|
||||||
|
#log.debug(u'=> %s <> %s'%(author, miauthor))
|
||||||
if author in miauthor: return True
|
if author in miauthor: return True
|
||||||
return None
|
return None
|
||||||
|
|
||||||
@ -131,7 +140,10 @@ class Ozon(Source):
|
|||||||
match = True
|
match = True
|
||||||
if title:
|
if title:
|
||||||
mititle = unicode(mi.title).upper() if mi.title else ''
|
mititle = unicode(mi.title).upper() if mi.title else ''
|
||||||
|
if reRemoveFromTitle:
|
||||||
|
mititle = reRemoveFromTitle.sub('', mititle)
|
||||||
match = title in mititle
|
match = title in mititle
|
||||||
|
#log.debug(u't=> %s <> %s'%(title, mititle))
|
||||||
if match and authors:
|
if match and authors:
|
||||||
miauthors = map(unicode.upper, map(unicode, mi.authors)) if mi.authors else []
|
miauthors = map(unicode.upper, map(unicode, mi.authors)) if mi.authors else []
|
||||||
match = in_authors(authors, miauthors)
|
match = in_authors(authors, miauthors)
|
||||||
@ -190,7 +202,8 @@ class Ozon(Source):
|
|||||||
|
|
||||||
title = entry.xpath(xp_template.format('Name'))
|
title = entry.xpath(xp_template.format('Name'))
|
||||||
author = entry.xpath(xp_template.format('Author'))
|
author = entry.xpath(xp_template.format('Author'))
|
||||||
mi = Metadata(title, author.split(','))
|
norm_authors = map(_normalizeAuthorNameWithInitials, map(unicode.strip, unicode(author).split(u',')))
|
||||||
|
mi = Metadata(title, norm_authors)
|
||||||
|
|
||||||
ozon_id = entry.xpath(xp_template.format('ID'))
|
ozon_id = entry.xpath(xp_template.format('ID'))
|
||||||
mi.identifiers = {'ozon':ozon_id}
|
mi.identifiers = {'ozon':ozon_id}
|
||||||
@ -202,6 +215,11 @@ class Ozon(Source):
|
|||||||
if cover:
|
if cover:
|
||||||
mi.ozon_cover_url = _translateToBigCoverUrl(cover)
|
mi.ozon_cover_url = _translateToBigCoverUrl(cover)
|
||||||
|
|
||||||
|
pub_year = entry.xpath(xp_template.format('Year'))
|
||||||
|
if pub_year:
|
||||||
|
mi.pubdate = toPubdate(log, pub_year)
|
||||||
|
#log.debug('pubdate %s'%mi.pubdate)
|
||||||
|
|
||||||
rating = entry.xpath(xp_template.format('ClientRatingValue'))
|
rating = entry.xpath(xp_template.format('ClientRatingValue'))
|
||||||
if rating:
|
if rating:
|
||||||
try:
|
try:
|
||||||
@ -269,13 +287,17 @@ class Ozon(Source):
|
|||||||
raw = self.browser.open_novisit(url, timeout=timeout).read()
|
raw = self.browser.open_novisit(url, timeout=timeout).read()
|
||||||
doc = html.fromstring(raw)
|
doc = html.fromstring(raw)
|
||||||
|
|
||||||
|
xpt_prod_det_at = u'string(//div[contains(@class, "product-detail")]//*[contains(normalize-space(text()), "%s")]/a[1]/@title)'
|
||||||
|
xpt_prod_det_tx = u'substring-after(//div[contains(@class, "product-detail")]//text()[contains(., "%s")], ":")'
|
||||||
|
|
||||||
# series
|
# series
|
||||||
xpt = u'normalize-space(//div[@class="frame_content"]//div[contains(normalize-space(text()), "Серия:")]//a/@title)'
|
xpt = xpt_prod_det_at % u'Сери'
|
||||||
|
# % u'Серия:'
|
||||||
series = doc.xpath(xpt)
|
series = doc.xpath(xpt)
|
||||||
if series:
|
if series:
|
||||||
metadata.series = series
|
metadata.series = series
|
||||||
|
|
||||||
xpt = u'substring-after(//meta[@name="description"]/@content, "ISBN")'
|
xpt = u'normalize-space(substring-after(//meta[@name="description"]/@content, "ISBN"))'
|
||||||
isbn_str = doc.xpath(xpt)
|
isbn_str = doc.xpath(xpt)
|
||||||
if isbn_str:
|
if isbn_str:
|
||||||
all_isbns = [check_isbn(isbn) for isbn in self.isbnRegex.findall(isbn_str) if check_isbn(isbn)]
|
all_isbns = [check_isbn(isbn) for isbn in self.isbnRegex.findall(isbn_str) if check_isbn(isbn)]
|
||||||
@ -283,38 +305,42 @@ class Ozon(Source):
|
|||||||
metadata.all_isbns = all_isbns
|
metadata.all_isbns = all_isbns
|
||||||
metadata.isbn = all_isbns[0]
|
metadata.isbn = all_isbns[0]
|
||||||
|
|
||||||
xpt = u'//div[@class="frame_content"]//div[contains(normalize-space(text()), "Издатель")]//a[@title="Издательство"]'
|
xpt = xpt_prod_det_at % u'Издатель'
|
||||||
publishers = doc.xpath(xpt)
|
publishers = doc.xpath(xpt)
|
||||||
if publishers:
|
if publishers:
|
||||||
metadata.publisher = publishers[0].text
|
metadata.publisher = publishers
|
||||||
|
|
||||||
xpt = u'string(../text()[contains(., "г.")])'
|
displ_lang = None
|
||||||
yearIn = publishers[0].xpath(xpt)
|
xpt = xpt_prod_det_tx % u'Язык'
|
||||||
|
langs = doc.xpath(xpt)
|
||||||
|
if langs:
|
||||||
|
lng_splt = langs.split(u',')
|
||||||
|
if lng_splt:
|
||||||
|
displ_lang = lng_splt[0].strip()
|
||||||
|
metadata.language = _translageLanguageToCode(displ_lang)
|
||||||
|
#log.debug(u'language: %s'%displ_lang)
|
||||||
|
|
||||||
|
# can be set before from xml search responce
|
||||||
|
if not metadata.pubdate:
|
||||||
|
xpt = u'normalize-space(//div[@class="product-misc"]//text()[contains(., "г.")])'
|
||||||
|
yearIn = doc.xpath(xpt)
|
||||||
if yearIn:
|
if yearIn:
|
||||||
matcher = re.search(r'\d{4}', yearIn)
|
matcher = re.search(r'\d{4}', yearIn)
|
||||||
if matcher:
|
if matcher:
|
||||||
year = int(matcher.group(0))
|
metadata.pubdate = toPubdate(log, matcher.group(0))
|
||||||
# only year is available, so use 1-st of Jan
|
|
||||||
metadata.pubdate = datetime.datetime(year, 1, 1) #<- failed comparation in identify.py
|
|
||||||
#metadata.pubdate = datetime(year, 1, 1)
|
|
||||||
xpt = u'substring-after(string(../text()[contains(., "Язык")]), ": ")'
|
|
||||||
displLang = publishers[0].xpath(xpt)
|
|
||||||
lang_code =_translageLanguageToCode(displLang)
|
|
||||||
if lang_code:
|
|
||||||
metadata.language = lang_code
|
|
||||||
|
|
||||||
# overwrite comments from HTML if any
|
# overwrite comments from HTML if any
|
||||||
# tr/td[contains(.//text(), "От издателя")] -> does not work, why?
|
xpt = u'//table[@id="detail_description"]//tr/td'
|
||||||
xpt = u'//div[contains(@class, "detail")]//tr/td//text()[contains(., "От издателя")]'\
|
|
||||||
u'/ancestor::tr[1]/following-sibling::tr[1]/td[contains(./@class, "description")][1]'
|
|
||||||
comment_elem = doc.xpath(xpt)
|
comment_elem = doc.xpath(xpt)
|
||||||
if comment_elem:
|
if comment_elem:
|
||||||
comments = unicode(etree.tostring(comment_elem[0]))
|
comments = unicode(etree.tostring(comment_elem[0]))
|
||||||
if comments:
|
if comments:
|
||||||
# cleanup root tag, TODO: remove tags like object/embeded
|
# cleanup root tag, TODO: remove tags like object/embeded
|
||||||
comments = re.sub(r'^<td.+?>|</td>.+?$', u'', comments).strip()
|
comments = re.sub(r'\A.*?<td.*?>|</td>.*\Z', u'', comments.strip(), re.MULTILINE).strip()
|
||||||
if comments:
|
if comments and (not metadata.comments or len(comments) > len(metadata.comments)):
|
||||||
metadata.comments = comments
|
metadata.comments = comments
|
||||||
|
else:
|
||||||
|
log.debug('HTML book description skipped in favour of search service xml responce')
|
||||||
else:
|
else:
|
||||||
log.debug('No book description found in HTML')
|
log.debug('No book description found in HTML')
|
||||||
# }}}
|
# }}}
|
||||||
@ -390,10 +416,40 @@ def _translageLanguageToCode(displayLang): # {{{
|
|||||||
u'Итальянский': 'it',
|
u'Итальянский': 'it',
|
||||||
u'Испанский': 'es',
|
u'Испанский': 'es',
|
||||||
u'Китайский': 'zh',
|
u'Китайский': 'zh',
|
||||||
u'Японский': 'ja' }
|
u'Японский': 'ja',
|
||||||
|
u'Финский' : 'fi',
|
||||||
|
u'Польский' : 'pl',}
|
||||||
return langTbl.get(displayLang, None)
|
return langTbl.get(displayLang, None)
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
|
# [В.П. Колесников | Колесников В.П.]-> В. П. BКолесников
|
||||||
|
def _normalizeAuthorNameWithInitials(name): # {{{
|
||||||
|
res = name
|
||||||
|
if name:
|
||||||
|
re1 = u'^(?P<lname>\S+)\s+(?P<fname>[^\d\W]\.)(?:\s*(?P<mname>[^\d\W]\.))?$'
|
||||||
|
re2 = u'^(?P<fname>[^\d\W]\.)(?:\s*(?P<mname>[^\d\W]\.))?\s+(?P<lname>\S+)$'
|
||||||
|
matcher = re.match(re1, unicode(name), re.UNICODE)
|
||||||
|
if not matcher:
|
||||||
|
matcher = re.match(re2, unicode(name), re.UNICODE)
|
||||||
|
|
||||||
|
if matcher:
|
||||||
|
d = matcher.groupdict()
|
||||||
|
res = ' '.join(x for x in (d['fname'], d['mname'], d['lname']) if x)
|
||||||
|
return res
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
def toPubdate(log, yearAsString):
|
||||||
|
res = None
|
||||||
|
if yearAsString:
|
||||||
|
try:
|
||||||
|
year = int(yearAsString)
|
||||||
|
# only year is available, so use 1-st of Jan
|
||||||
|
res = datetime.datetime(year, 1, 1)
|
||||||
|
except:
|
||||||
|
log.error('cannot parse to date %s'%yearAsString)
|
||||||
|
return res
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__': # tests {{{
|
if __name__ == '__main__': # tests {{{
|
||||||
# To run these test use: calibre-debug -e src/calibre/ebooks/metadata/sources/ozon.py
|
# To run these test use: calibre-debug -e src/calibre/ebooks/metadata/sources/ozon.py
|
||||||
# comment some touched_fields before run thoses tests
|
# comment some touched_fields before run thoses tests
|
||||||
@ -403,40 +459,45 @@ if __name__ == '__main__': # tests {{{
|
|||||||
|
|
||||||
test_identify_plugin(Ozon.name,
|
test_identify_plugin(Ozon.name,
|
||||||
[
|
[
|
||||||
|
# (
|
||||||
(
|
# {'identifiers':{}, 'title':u'Норвежский язык: Практический курс',
|
||||||
|
# 'authors':[u'Колесников В.П.', u'Г.В. Шатков']},
|
||||||
|
# [title_test(u'Норвежский язык: Практический курс', exact=True),
|
||||||
|
# authors_test([u'В. П. Колесников', u'Г. В. Шатков'])]
|
||||||
|
# ),
|
||||||
|
(
|
||||||
{'identifiers':{'isbn': '9785916572629'} },
|
{'identifiers':{'isbn': '9785916572629'} },
|
||||||
[title_test(u'На все четыре стороны', exact=True),
|
[title_test(u'На все четыре стороны', exact=True),
|
||||||
authors_test([u'А. А. Гилл'])]
|
authors_test([u'А. А. Гилл'])]
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
{'identifiers':{}, 'title':u'Der Himmel Kennt Keine Gunstlinge',
|
{'identifiers':{}, 'title':u'Der Himmel Kennt Keine Gunstlinge',
|
||||||
'authors':[u'Erich Maria Remarque']},
|
'authors':[u'Erich Maria Remarque']},
|
||||||
[title_test(u'Der Himmel Kennt Keine Gunstlinge', exact=True),
|
[title_test(u'Der Himmel Kennt Keine Gunstlinge', exact=True),
|
||||||
authors_test([u'Erich Maria Remarque'])]
|
authors_test([u'Erich Maria Remarque'])]
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
{'identifiers':{ }, 'title':u'Метро 2033',
|
{'identifiers':{ }, 'title':u'Метро 2033',
|
||||||
'authors':[u'Дмитрий Глуховский']},
|
'authors':[u'Дмитрий Глуховский']},
|
||||||
[title_test(u'Метро 2033', exact=False)]
|
[title_test(u'Метро 2033', exact=False)]
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
{'identifiers':{'isbn': '9785170727209'}, 'title':u'Метро 2033',
|
{'identifiers':{'isbn': '9785170727209'}, 'title':u'Метро 2033',
|
||||||
'authors':[u'Дмитрий Глуховский']},
|
'authors':[u'Дмитрий Глуховский']},
|
||||||
[title_test(u'Метро 2033', exact=True),
|
[title_test(u'Метро 2033', exact=True),
|
||||||
authors_test([u'Дмитрий Глуховский']),
|
authors_test([u'Дмитрий Глуховский']),
|
||||||
isbn_test('9785170727209')]
|
isbn_test('9785170727209')]
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
{'identifiers':{'isbn': '5-699-13613-4'}, 'title':u'Метро 2033',
|
{'identifiers':{'isbn': '5-699-13613-4'}, 'title':u'Метро 2033',
|
||||||
'authors':[u'Дмитрий Глуховский']},
|
'authors':[u'Дмитрий Глуховский']},
|
||||||
[title_test(u'Метро 2033', exact=True),
|
[title_test(u'Метро 2033', exact=True),
|
||||||
authors_test([u'Дмитрий Глуховский'])]
|
authors_test([u'Дмитрий Глуховский'])]
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
{'identifiers':{}, 'title':u'Метро',
|
{'identifiers':{}, 'title':u'Метро',
|
||||||
'authors':[u'Глуховский']},
|
'authors':[u'Глуховский']},
|
||||||
[title_test(u'Метро', exact=False)]
|
[title_test(u'Метро', exact=False)]
|
||||||
),
|
),
|
||||||
])
|
])
|
||||||
# }}}
|
# }}}
|
||||||
|
@ -55,6 +55,11 @@ class MOBIOutput(OutputFormatPlugin):
|
|||||||
' specified directory. If the directory already '
|
' specified directory. If the directory already '
|
||||||
'exists, it will be deleted.')
|
'exists, it will be deleted.')
|
||||||
),
|
),
|
||||||
|
OptionRecommendation(name='share_not_sync', recommended_value=False,
|
||||||
|
help=_('Enable sharing of book content via Facebook etc. '
|
||||||
|
' on the Kindle. WARNING: Using this feature means that '
|
||||||
|
' the book will not auto sync its last read position '
|
||||||
|
' on multiple devices. Complain to Amazon.'))
|
||||||
])
|
])
|
||||||
|
|
||||||
def check_for_periodical(self):
|
def check_for_periodical(self):
|
||||||
|
@ -61,6 +61,13 @@ class MobiWriter(object):
|
|||||||
|
|
||||||
def __call__(self, oeb, path_or_stream):
|
def __call__(self, oeb, path_or_stream):
|
||||||
self.log = oeb.log
|
self.log = oeb.log
|
||||||
|
pt = None
|
||||||
|
if oeb.metadata.publication_type:
|
||||||
|
x = unicode(oeb.metadata.publication_type[0]).split(':')
|
||||||
|
if len(x) > 1:
|
||||||
|
pt = x[1].lower()
|
||||||
|
self.publication_type = pt
|
||||||
|
|
||||||
if hasattr(path_or_stream, 'write'):
|
if hasattr(path_or_stream, 'write'):
|
||||||
return self.dump_stream(oeb, path_or_stream)
|
return self.dump_stream(oeb, path_or_stream)
|
||||||
with open(path_or_stream, 'w+b') as stream:
|
with open(path_or_stream, 'w+b') as stream:
|
||||||
@ -346,12 +353,14 @@ class MobiWriter(object):
|
|||||||
|
|
||||||
bt = 0x002
|
bt = 0x002
|
||||||
if self.primary_index_record_idx is not None:
|
if self.primary_index_record_idx is not None:
|
||||||
if self.indexer.is_flat_periodical:
|
if False and self.indexer.is_flat_periodical:
|
||||||
|
# Disabled as setting this to 0x102 causes the Kindle to not
|
||||||
|
# auto archive the issues
|
||||||
bt = 0x102
|
bt = 0x102
|
||||||
elif self.indexer.is_periodical:
|
elif self.indexer.is_periodical:
|
||||||
# If you change this, remember to change the cdetype in the EXTH
|
# If you change this, remember to change the cdetype in the EXTH
|
||||||
# header as well
|
# header as well
|
||||||
bt = 0x103
|
bt = {'newspaper':0x101}.get(self.publication_type, 0x103)
|
||||||
|
|
||||||
record0.write(pack(b'>IIIII',
|
record0.write(pack(b'>IIIII',
|
||||||
0xe8, bt, 65001, uid, 6))
|
0xe8, bt, 65001, uid, 6))
|
||||||
@ -520,20 +529,22 @@ class MobiWriter(object):
|
|||||||
|
|
||||||
if isinstance(uuid, unicode):
|
if isinstance(uuid, unicode):
|
||||||
uuid = uuid.encode('utf-8')
|
uuid = uuid.encode('utf-8')
|
||||||
exth.write(pack(b'>II', 113, len(uuid) + 8))
|
if not self.opts.share_not_sync:
|
||||||
exth.write(uuid)
|
exth.write(pack(b'>II', 113, len(uuid) + 8))
|
||||||
nrecs += 1
|
exth.write(uuid)
|
||||||
|
nrecs += 1
|
||||||
|
|
||||||
# Write cdetype
|
# Write cdetype
|
||||||
if self.is_periodical:
|
if not self.is_periodical:
|
||||||
# If you set the book type header field to 0x101 use NWPR here if
|
exth.write(pack(b'>II', 501, 12))
|
||||||
# you use 0x103 use MAGZ
|
exth.write(b'EBOK')
|
||||||
data = b'MAGZ'
|
nrecs += 1
|
||||||
else:
|
else:
|
||||||
data = b'EBOK'
|
# Should be b'NWPR' for doc type of 0x101 and b'MAGZ' for doctype
|
||||||
exth.write(pack(b'>II', 501, len(data)+8))
|
# of 0x103 but the old writer didn't write them, and I dont know
|
||||||
exth.write(data)
|
# what it should be for type 0x102 (b'BLOG'?) so write nothing
|
||||||
nrecs += 1
|
# instead
|
||||||
|
pass
|
||||||
|
|
||||||
# Add a publication date entry
|
# Add a publication date entry
|
||||||
if oeb.metadata['date']:
|
if oeb.metadata['date']:
|
||||||
|
@ -160,7 +160,9 @@ class Serializer(object):
|
|||||||
buf.write(b'title="')
|
buf.write(b'title="')
|
||||||
self.serialize_text(ref.title, quot=True)
|
self.serialize_text(ref.title, quot=True)
|
||||||
buf.write(b'" ')
|
buf.write(b'" ')
|
||||||
if ref.title == 'start':
|
if (ref.title.lower() == 'start' or
|
||||||
|
(ref.type and ref.type.lower() in ('start',
|
||||||
|
'other.start'))):
|
||||||
self._start_href = ref.href
|
self._start_href = ref.href
|
||||||
self.serialize_href(ref.href)
|
self.serialize_href(ref.href)
|
||||||
# Space required or won't work, I kid you not
|
# Space required or won't work, I kid you not
|
||||||
@ -348,8 +350,9 @@ class Serializer(object):
|
|||||||
'''
|
'''
|
||||||
buf = self.buf
|
buf = self.buf
|
||||||
id_offsets = self.id_offsets
|
id_offsets = self.id_offsets
|
||||||
|
start_href = getattr(self, '_start_href', None)
|
||||||
for href, hoffs in self.href_offsets.items():
|
for href, hoffs in self.href_offsets.items():
|
||||||
is_start = (href and href == getattr(self, '_start_href', None))
|
is_start = (href and href == start_href)
|
||||||
# Iterate over all filepos items
|
# Iterate over all filepos items
|
||||||
if href not in id_offsets:
|
if href not in id_offsets:
|
||||||
self.logger.warn('Hyperlink target %r not found' % href)
|
self.logger.warn('Hyperlink target %r not found' % href)
|
||||||
|
@ -320,9 +320,11 @@ class CSSFlattener(object):
|
|||||||
if self.context.insert_blank_line:
|
if self.context.insert_blank_line:
|
||||||
cssdict['margin-top'] = cssdict['margin-bottom'] = \
|
cssdict['margin-top'] = cssdict['margin-bottom'] = \
|
||||||
'%fem'%self.context.insert_blank_line_size
|
'%fem'%self.context.insert_blank_line_size
|
||||||
if (self.context.remove_paragraph_spacing and
|
indent_size = self.context.remove_paragraph_spacing_indent_size
|
||||||
|
keep_indents = indent_size == 0.0 and not self.context.insert_blank_line
|
||||||
|
if (self.context.remove_paragraph_spacing and not keep_indents and
|
||||||
cssdict.get('text-align', None) not in ('center', 'right')):
|
cssdict.get('text-align', None) not in ('center', 'right')):
|
||||||
cssdict['text-indent'] = "%1.1fem" % self.context.remove_paragraph_spacing_indent_size
|
cssdict['text-indent'] = "%1.1fem" % indent_size
|
||||||
|
|
||||||
if cssdict:
|
if cssdict:
|
||||||
items = cssdict.items()
|
items = cssdict.items()
|
||||||
|
@ -53,7 +53,7 @@ def pdftohtml(output_dir, pdf_path, no_images):
|
|||||||
p = popen(cmd, stderr=logf._fd, stdout=logf._fd,
|
p = popen(cmd, stderr=logf._fd, stdout=logf._fd,
|
||||||
stdin=subprocess.PIPE)
|
stdin=subprocess.PIPE)
|
||||||
except OSError as err:
|
except OSError as err:
|
||||||
if err.errno == 2:
|
if err.errno == errno.ENOENT:
|
||||||
raise ConversionError(_('Could not find pdftohtml, check it is in your PATH'))
|
raise ConversionError(_('Could not find pdftohtml, check it is in your PATH'))
|
||||||
else:
|
else:
|
||||||
raise
|
raise
|
||||||
|
@ -11,6 +11,7 @@ Write content to PDF.
|
|||||||
import os
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
|
|
||||||
|
from calibre import isosx
|
||||||
from calibre.ptempfile import PersistentTemporaryDirectory
|
from calibre.ptempfile import PersistentTemporaryDirectory
|
||||||
from calibre.ebooks.pdf.pageoptions import unit, paper_size, \
|
from calibre.ebooks.pdf.pageoptions import unit, paper_size, \
|
||||||
orientation
|
orientation
|
||||||
@ -164,6 +165,12 @@ class PDFWriter(QObject): # {{{
|
|||||||
self.logger.debug('\tRendering item %s as %i.pdf' % (os.path.basename(str(self.view.url().toLocalFile())), len(self.combine_queue)))
|
self.logger.debug('\tRendering item %s as %i.pdf' % (os.path.basename(str(self.view.url().toLocalFile())), len(self.combine_queue)))
|
||||||
printer = get_pdf_printer(self.opts)
|
printer = get_pdf_printer(self.opts)
|
||||||
printer.setOutputFileName(item_path)
|
printer.setOutputFileName(item_path)
|
||||||
|
# We have to set the engine to Native on OS X after the call to set
|
||||||
|
# filename. Setting a filename with .pdf as the extension causes
|
||||||
|
# Qt to set the format to use Qt's PDF engine even if native was
|
||||||
|
# previously set on the printer.
|
||||||
|
if isosx:
|
||||||
|
printer.setOutputFormat(QPrinter.NativeFormat)
|
||||||
self.view.print_(printer)
|
self.view.print_(printer)
|
||||||
printer.abort()
|
printer.abort()
|
||||||
self._render_book()
|
self._render_book()
|
||||||
@ -179,6 +186,8 @@ class PDFWriter(QObject): # {{{
|
|||||||
item_path = os.path.join(self.tmp_path, 'cover.pdf')
|
item_path = os.path.join(self.tmp_path, 'cover.pdf')
|
||||||
printer = get_pdf_printer(self.opts)
|
printer = get_pdf_printer(self.opts)
|
||||||
printer.setOutputFileName(item_path)
|
printer.setOutputFileName(item_path)
|
||||||
|
if isosx:
|
||||||
|
printer.setOutputFormat(QPrinter.NativeFormat)
|
||||||
self.combine_queue.insert(0, item_path)
|
self.combine_queue.insert(0, item_path)
|
||||||
p = QPixmap()
|
p = QPixmap()
|
||||||
p.loadFromData(self.cover_data)
|
p.loadFromData(self.cover_data)
|
||||||
@ -202,7 +211,7 @@ class PDFWriter(QObject): # {{{
|
|||||||
inputPDF = PdfFileReader(item_stream)
|
inputPDF = PdfFileReader(item_stream)
|
||||||
for page in inputPDF.pages:
|
for page in inputPDF.pages:
|
||||||
outPDF.addPage(page)
|
outPDF.addPage(page)
|
||||||
outPDF.write(self.out_stream)
|
outPDF.write(self.out_stream)
|
||||||
finally:
|
finally:
|
||||||
self._delete_tmpdir()
|
self._delete_tmpdir()
|
||||||
self.loop.exit(0)
|
self.loop.exit(0)
|
||||||
@ -229,6 +238,8 @@ class ImagePDFWriter(object):
|
|||||||
def render_images(self, outpath, mi, items):
|
def render_images(self, outpath, mi, items):
|
||||||
printer = get_pdf_printer(self.opts, for_comic=True)
|
printer = get_pdf_printer(self.opts, for_comic=True)
|
||||||
printer.setOutputFileName(outpath)
|
printer.setOutputFileName(outpath)
|
||||||
|
if isosx:
|
||||||
|
printer.setOutputFormat(QPrinter.NativeFormat)
|
||||||
printer.setDocName(mi.title)
|
printer.setDocName(mi.title)
|
||||||
printer.setCreator(u'%s [%s]'%(__appname__, __version__))
|
printer.setCreator(u'%s [%s]'%(__appname__, __version__))
|
||||||
# Seems to be no way to set author
|
# Seems to be no way to set author
|
||||||
|
@ -1,3 +1,8 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
|
from __future__ import (unicode_literals, division, absolute_import,
|
||||||
|
print_function)
|
||||||
|
|
||||||
import re, sys
|
import re, sys
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
|
|
||||||
@ -72,10 +77,15 @@ class Document:
|
|||||||
self.options[k] = v
|
self.options[k] = v
|
||||||
self.html = None
|
self.html = None
|
||||||
self.log = log
|
self.log = log
|
||||||
|
self.keep_elements = set()
|
||||||
|
|
||||||
def _html(self, force=False):
|
def _html(self, force=False):
|
||||||
if force or self.html is None:
|
if force or self.html is None:
|
||||||
self.html = self._parse(self.input)
|
self.html = self._parse(self.input)
|
||||||
|
path = self.options['keep_elements']
|
||||||
|
if path is not None:
|
||||||
|
self.keep_elements = set(self.html.xpath(path))
|
||||||
|
|
||||||
return self.html
|
return self.html
|
||||||
|
|
||||||
def _parse(self, input):
|
def _parse(self, input):
|
||||||
@ -152,8 +162,9 @@ class Document:
|
|||||||
append = False
|
append = False
|
||||||
if sibling is best_elem:
|
if sibling is best_elem:
|
||||||
append = True
|
append = True
|
||||||
sibling_key = sibling #HashableElement(sibling)
|
if sibling in candidates and candidates[sibling]['content_score'] >= sibling_score_threshold:
|
||||||
if sibling_key in candidates and candidates[sibling_key]['content_score'] >= sibling_score_threshold:
|
append = True
|
||||||
|
if sibling in self.keep_elements:
|
||||||
append = True
|
append = True
|
||||||
|
|
||||||
if sibling.tag == "p":
|
if sibling.tag == "p":
|
||||||
@ -283,6 +294,8 @@ class Document:
|
|||||||
|
|
||||||
def remove_unlikely_candidates(self):
|
def remove_unlikely_candidates(self):
|
||||||
for elem in self.html.iter():
|
for elem in self.html.iter():
|
||||||
|
if elem in self.keep_elements:
|
||||||
|
continue
|
||||||
s = "%s %s" % (elem.get('class', ''), elem.get('id', ''))
|
s = "%s %s" % (elem.get('class', ''), elem.get('id', ''))
|
||||||
#self.debug(s)
|
#self.debug(s)
|
||||||
if REGEXES['unlikelyCandidatesRe'].search(s) and (not REGEXES['okMaybeItsACandidateRe'].search(s)) and elem.tag != 'body':
|
if REGEXES['unlikelyCandidatesRe'].search(s) and (not REGEXES['okMaybeItsACandidateRe'].search(s)) and elem.tag != 'body':
|
||||||
@ -337,7 +350,7 @@ class Document:
|
|||||||
allowed = {}
|
allowed = {}
|
||||||
# Conditionally clean <table>s, <ul>s, and <div>s
|
# Conditionally clean <table>s, <ul>s, and <div>s
|
||||||
for el in self.reverse_tags(node, "table", "ul", "div"):
|
for el in self.reverse_tags(node, "table", "ul", "div"):
|
||||||
if el in allowed:
|
if el in allowed or el in self.keep_elements:
|
||||||
continue
|
continue
|
||||||
weight = self.class_weight(el)
|
weight = self.class_weight(el)
|
||||||
if el in candidates:
|
if el in candidates:
|
||||||
@ -450,64 +463,39 @@ class Document:
|
|||||||
#self.debug("pname %s pweight %.3f" %(pname, pweight))
|
#self.debug("pname %s pweight %.3f" %(pname, pweight))
|
||||||
el.drop_tree()
|
el.drop_tree()
|
||||||
|
|
||||||
for el in ([node] + [n for n in node.iter()]):
|
|
||||||
if not (self.options['attributes']):
|
|
||||||
#el.attrib = {} #FIXME:Checkout the effects of disabling this
|
|
||||||
pass
|
|
||||||
|
|
||||||
return clean_attributes(tounicode(node))
|
return clean_attributes(tounicode(node))
|
||||||
|
|
||||||
|
def option_parser():
|
||||||
|
from calibre.utils.config import OptionParser
|
||||||
|
parser = OptionParser(usage='%prog: [options] file')
|
||||||
|
parser.add_option('-v', '--verbose', default=False, action='store_true',
|
||||||
|
dest='verbose',
|
||||||
|
help='Show detailed output information. Useful for debugging')
|
||||||
|
parser.add_option('-k', '--keep-elements', default=None, action='store',
|
||||||
|
dest='keep_elements',
|
||||||
|
help='XPath specifying elements that should not be removed')
|
||||||
|
|
||||||
class HashableElement():
|
return parser
|
||||||
def __init__(self, node):
|
|
||||||
self.node = node
|
|
||||||
self._path = None
|
|
||||||
|
|
||||||
def _get_path(self):
|
|
||||||
if self._path is None:
|
|
||||||
reverse_path = []
|
|
||||||
node = self.node
|
|
||||||
while node is not None:
|
|
||||||
node_id = (node.tag, tuple(node.attrib.items()), node.text)
|
|
||||||
reverse_path.append(node_id)
|
|
||||||
node = node.getparent()
|
|
||||||
self._path = tuple(reverse_path)
|
|
||||||
return self._path
|
|
||||||
path = property(_get_path)
|
|
||||||
|
|
||||||
def __hash__(self):
|
|
||||||
return hash(self.path)
|
|
||||||
|
|
||||||
def __eq__(self, other):
|
|
||||||
return self.path == other.path
|
|
||||||
|
|
||||||
def __getattr__(self, tag):
|
|
||||||
return getattr(self.node, tag)
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
import logging
|
from calibre.utils.logging import default_log
|
||||||
from optparse import OptionParser
|
parser = option_parser()
|
||||||
parser = OptionParser(usage="%prog: [options] [file]")
|
options, args = parser.parse_args()
|
||||||
parser.add_option('-v', '--verbose', action='store_true')
|
|
||||||
parser.add_option('-u', '--url', help="use URL instead of a local file")
|
|
||||||
(options, args) = parser.parse_args()
|
|
||||||
|
|
||||||
if not (len(args) == 1 or options.url):
|
if len(args) != 1:
|
||||||
parser.print_help()
|
parser.print_help()
|
||||||
sys.exit(1)
|
raise SystemExit(1)
|
||||||
logging.basicConfig(level=logging.INFO)
|
|
||||||
|
with open(args[0], 'rb') as f:
|
||||||
|
raw = f.read()
|
||||||
|
|
||||||
file = None
|
|
||||||
if options.url:
|
|
||||||
import urllib
|
|
||||||
file = urllib.urlopen(options.url)
|
|
||||||
else:
|
|
||||||
file = open(args[0], 'rt')
|
|
||||||
enc = sys.__stdout__.encoding or 'utf-8'
|
enc = sys.__stdout__.encoding or 'utf-8'
|
||||||
try:
|
if options.verbose:
|
||||||
print Document(file.read(), debug=options.verbose).summary().encode(enc, 'replace')
|
default_log.filter_level = default_log.DEBUG
|
||||||
finally:
|
print (Document(raw, default_log,
|
||||||
file.close()
|
debug=options.verbose,
|
||||||
|
keep_elements=options.keep_elements).summary().encode(enc,
|
||||||
|
'replace'))
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
main()
|
main()
|
||||||
|
@ -142,7 +142,7 @@ def _config(): # {{{
|
|||||||
c.add_opt('upload_news_to_device', default=True,
|
c.add_opt('upload_news_to_device', default=True,
|
||||||
help=_('Upload downloaded news to device'))
|
help=_('Upload downloaded news to device'))
|
||||||
c.add_opt('delete_news_from_library_on_upload', default=False,
|
c.add_opt('delete_news_from_library_on_upload', default=False,
|
||||||
help=_('Delete books from library after uploading to device'))
|
help=_('Delete news books from library after uploading to device'))
|
||||||
c.add_opt('separate_cover_flow', default=False,
|
c.add_opt('separate_cover_flow', default=False,
|
||||||
help=_('Show the cover flow in a separate window instead of in the main calibre window'))
|
help=_('Show the cover flow in a separate window instead of in the main calibre window'))
|
||||||
c.add_opt('disable_tray_notification', default=False,
|
c.add_opt('disable_tray_notification', default=False,
|
||||||
|
@ -8,6 +8,7 @@ __docformat__ = 'restructuredtext en'
|
|||||||
import os
|
import os
|
||||||
from functools import partial
|
from functools import partial
|
||||||
from threading import Thread
|
from threading import Thread
|
||||||
|
from contextlib import closing
|
||||||
|
|
||||||
from PyQt4.Qt import QToolButton
|
from PyQt4.Qt import QToolButton
|
||||||
|
|
||||||
@ -52,7 +53,13 @@ class Worker(Thread): # {{{
|
|||||||
|
|
||||||
def doit(self):
|
def doit(self):
|
||||||
from calibre.library.database2 import LibraryDatabase2
|
from calibre.library.database2 import LibraryDatabase2
|
||||||
newdb = LibraryDatabase2(self.loc)
|
newdb = LibraryDatabase2(self.loc, is_second_db=True)
|
||||||
|
with closing(newdb):
|
||||||
|
self._doit(newdb)
|
||||||
|
newdb.break_cycles()
|
||||||
|
del newdb
|
||||||
|
|
||||||
|
def _doit(self, newdb):
|
||||||
for i, x in enumerate(self.ids):
|
for i, x in enumerate(self.ids):
|
||||||
mi = self.db.get_metadata(x, index_is_id=True, get_cover=True,
|
mi = self.db.get_metadata(x, index_is_id=True, get_cover=True,
|
||||||
cover_as_data=True)
|
cover_as_data=True)
|
||||||
@ -111,6 +118,7 @@ class Worker(Thread): # {{{
|
|||||||
os.remove(path)
|
os.remove(path)
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
class CopyToLibraryAction(InterfaceAction):
|
class CopyToLibraryAction(InterfaceAction):
|
||||||
|
@ -23,7 +23,7 @@ class PluginWidget(Widget, Ui_Form):
|
|||||||
Widget.__init__(self, parent,
|
Widget.__init__(self, parent,
|
||||||
['prefer_author_sort', 'rescale_images', 'toc_title',
|
['prefer_author_sort', 'rescale_images', 'toc_title',
|
||||||
'mobi_ignore_margins', 'mobi_toc_at_start',
|
'mobi_ignore_margins', 'mobi_toc_at_start',
|
||||||
'dont_compress', 'no_inline_toc',
|
'dont_compress', 'no_inline_toc', 'share_not_sync',
|
||||||
'personal_doc']#, 'mobi_navpoints_only_deepest']
|
'personal_doc']#, 'mobi_navpoints_only_deepest']
|
||||||
)
|
)
|
||||||
self.db, self.book_id = db, book_id
|
self.db, self.book_id = db, book_id
|
||||||
|
@ -75,6 +75,13 @@
|
|||||||
</item>
|
</item>
|
||||||
</layout>
|
</layout>
|
||||||
</item>
|
</item>
|
||||||
|
<item>
|
||||||
|
<widget class="QCheckBox" name="opt_share_not_sync">
|
||||||
|
<property name="text">
|
||||||
|
<string>Enable sharing of book content via Facebook, etc. WARNING: Disables last read syncing</string>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
</item>
|
||||||
<item>
|
<item>
|
||||||
<spacer name="verticalSpacer">
|
<spacer name="verticalSpacer">
|
||||||
<property name="orientation">
|
<property name="orientation">
|
||||||
|
@ -266,7 +266,7 @@ class JobManager(QAbstractTableModel): # {{{
|
|||||||
|
|
||||||
def kill_multiple_jobs(self, rows, view):
|
def kill_multiple_jobs(self, rows, view):
|
||||||
jobs = [self.jobs[row] for row in rows]
|
jobs = [self.jobs[row] for row in rows]
|
||||||
devjobs = [j for j in jobs is isinstance(j, DeviceJob)]
|
devjobs = [j for j in jobs if isinstance(j, DeviceJob)]
|
||||||
if devjobs:
|
if devjobs:
|
||||||
error_dialog(view, _('Cannot kill job'),
|
error_dialog(view, _('Cannot kill job'),
|
||||||
_('Cannot kill jobs that communicate with the device')).exec_()
|
_('Cannot kill jobs that communicate with the device')).exec_()
|
||||||
|
@ -443,7 +443,13 @@ class Editor(QFrame): # {{{
|
|||||||
return QWidget.keyPressEvent(self, ev)
|
return QWidget.keyPressEvent(self, ev)
|
||||||
button = getattr(self, 'button%d'%which)
|
button = getattr(self, 'button%d'%which)
|
||||||
button.setStyleSheet('QPushButton { font-weight: normal}')
|
button.setStyleSheet('QPushButton { font-weight: normal}')
|
||||||
sequence = QKeySequence(code|(int(ev.modifiers())&~Qt.KeypadModifier))
|
mods = int(ev.modifiers()) & ~Qt.KeypadModifier
|
||||||
|
txt = unicode(ev.text())
|
||||||
|
if txt and txt.lower() == txt.upper():
|
||||||
|
# We have a symbol like ! or > etc. In this case the value of code
|
||||||
|
# already includes Shift, so remove it
|
||||||
|
mods &= ~Qt.ShiftModifier
|
||||||
|
sequence = QKeySequence(code|mods)
|
||||||
button.setText(sequence.toString(QKeySequence.NativeText))
|
button.setText(sequence.toString(QKeySequence.NativeText))
|
||||||
self.capture = 0
|
self.capture = 0
|
||||||
dup_desc = self.dup_check(sequence)
|
dup_desc = self.dup_check(sequence)
|
||||||
|
@ -7,7 +7,7 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import textwrap, re, os
|
import textwrap, re, os, errno
|
||||||
|
|
||||||
from PyQt4.Qt import (Qt, QDateEdit, QDate, pyqtSignal, QMessageBox,
|
from PyQt4.Qt import (Qt, QDateEdit, QDate, pyqtSignal, QMessageBox,
|
||||||
QIcon, QToolButton, QWidget, QLabel, QGridLayout, QApplication,
|
QIcon, QToolButton, QWidget, QLabel, QGridLayout, QApplication,
|
||||||
@ -98,7 +98,7 @@ class TitleEdit(EnLineEdit):
|
|||||||
getattr(db, 'set_'+ self.TITLE_ATTR)(id_, title, notify=False,
|
getattr(db, 'set_'+ self.TITLE_ATTR)(id_, title, notify=False,
|
||||||
commit=False)
|
commit=False)
|
||||||
except (IOError, OSError) as err:
|
except (IOError, OSError) as err:
|
||||||
if getattr(err, 'errno', -1) == 13: # Permission denied
|
if getattr(err, 'errno', -1) == errno.EACCES: # Permission denied
|
||||||
import traceback
|
import traceback
|
||||||
fname = err.filename if err.filename else 'file'
|
fname = err.filename if err.filename else 'file'
|
||||||
error_dialog(self, _('Permission denied'),
|
error_dialog(self, _('Permission denied'),
|
||||||
@ -262,7 +262,7 @@ class AuthorsEdit(MultiCompleteComboBox):
|
|||||||
self.books_to_refresh |= db.set_authors(id_, authors, notify=False,
|
self.books_to_refresh |= db.set_authors(id_, authors, notify=False,
|
||||||
allow_case_change=True)
|
allow_case_change=True)
|
||||||
except (IOError, OSError) as err:
|
except (IOError, OSError) as err:
|
||||||
if getattr(err, 'errno', -1) == 13: # Permission denied
|
if getattr(err, 'errno', -1) == errno.EACCES: # Permission denied
|
||||||
import traceback
|
import traceback
|
||||||
fname = err.filename if err.filename else 'file'
|
fname = err.filename if err.filename else 'file'
|
||||||
error_dialog(self, _('Permission denied'),
|
error_dialog(self, _('Permission denied'),
|
||||||
|
@ -7,7 +7,7 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import os
|
import os, errno
|
||||||
from functools import partial
|
from functools import partial
|
||||||
|
|
||||||
from PyQt4.Qt import (Qt, QVBoxLayout, QHBoxLayout, QWidget, QPushButton,
|
from PyQt4.Qt import (Qt, QVBoxLayout, QHBoxLayout, QWidget, QPushButton,
|
||||||
@ -427,7 +427,7 @@ class MetadataSingleDialogBase(ResizableDialog):
|
|||||||
self.books_to_refresh |= getattr(widget, 'books_to_refresh',
|
self.books_to_refresh |= getattr(widget, 'books_to_refresh',
|
||||||
set([]))
|
set([]))
|
||||||
except IOError as err:
|
except IOError as err:
|
||||||
if err.errno == 13: # Permission denied
|
if err.errno == errno.EACCES: # Permission denied
|
||||||
import traceback
|
import traceback
|
||||||
fname = err.filename if err.filename else 'file'
|
fname = err.filename if err.filename else 'file'
|
||||||
error_dialog(self, _('Permission denied'),
|
error_dialog(self, _('Permission denied'),
|
||||||
|
@ -80,13 +80,15 @@ class OzonRUStore(BasicStoreConfig, StorePlugin):
|
|||||||
doc = html.fromstring(f.read())
|
doc = html.fromstring(f.read())
|
||||||
|
|
||||||
# example where we are going to find formats
|
# example where we are going to find formats
|
||||||
# <div class="box">
|
# <div class="l">
|
||||||
# ...
|
# <p>
|
||||||
# <b>Доступные форматы:</b>
|
# Доступно:
|
||||||
# <div class="vertpadd">.epub, .fb2, .pdf, .pdf, .txt</div>
|
# </p>
|
||||||
# ...
|
|
||||||
# </div>
|
# </div>
|
||||||
xpt = u'normalize-space(//div[@class="box"]//*[contains(normalize-space(text()), "Доступные форматы:")][1]/following-sibling::div[1]/text())'
|
# <div class="l">
|
||||||
|
# <p>.epub, .fb2.zip, .pdf</p>
|
||||||
|
# </div>
|
||||||
|
xpt = u'normalize-space(//div[contains(@class, "product-detail")]//*[contains(normalize-space(text()), "Доступ")]/ancestor-or-self::div[1]/following-sibling::div[1]/*[1])'
|
||||||
formats = doc.xpath(xpt)
|
formats = doc.xpath(xpt)
|
||||||
if formats:
|
if formats:
|
||||||
result = True
|
result = True
|
||||||
|
@ -161,7 +161,8 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
|||||||
return path and os.path.exists(os.path.join(path, 'metadata.db'))
|
return path and os.path.exists(os.path.join(path, 'metadata.db'))
|
||||||
|
|
||||||
def __init__(self, library_path, row_factory=False, default_prefs=None,
|
def __init__(self, library_path, row_factory=False, default_prefs=None,
|
||||||
read_only=False):
|
read_only=False, is_second_db=False):
|
||||||
|
self.is_second_db = is_second_db
|
||||||
try:
|
try:
|
||||||
if isbytestring(library_path):
|
if isbytestring(library_path):
|
||||||
library_path = library_path.decode(filesystem_encoding)
|
library_path = library_path.decode(filesystem_encoding)
|
||||||
@ -263,7 +264,8 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
|||||||
|
|
||||||
migrate_preference('user_categories', {})
|
migrate_preference('user_categories', {})
|
||||||
migrate_preference('saved_searches', {})
|
migrate_preference('saved_searches', {})
|
||||||
set_saved_searches(self, 'saved_searches')
|
if not self.is_second_db:
|
||||||
|
set_saved_searches(self, 'saved_searches')
|
||||||
|
|
||||||
# migrate grouped_search_terms
|
# migrate grouped_search_terms
|
||||||
if self.prefs.get('grouped_search_terms', None) is None:
|
if self.prefs.get('grouped_search_terms', None) is None:
|
||||||
|
@ -34,7 +34,7 @@ class DispatchController(object): # {{{
|
|||||||
def __init__(self, prefix, wsgi=False):
|
def __init__(self, prefix, wsgi=False):
|
||||||
self.dispatcher = cherrypy.dispatch.RoutesDispatcher()
|
self.dispatcher = cherrypy.dispatch.RoutesDispatcher()
|
||||||
self.funcs = []
|
self.funcs = []
|
||||||
self.seen = set([])
|
self.seen = set()
|
||||||
self.prefix = prefix if prefix else ''
|
self.prefix = prefix if prefix else ''
|
||||||
if wsgi:
|
if wsgi:
|
||||||
self.prefix = ''
|
self.prefix = ''
|
||||||
@ -146,6 +146,11 @@ class LibraryServer(ContentServer, MobileServer, XMLServer, OPDSServer, Cache,
|
|||||||
self.config = {}
|
self.config = {}
|
||||||
self.is_running = False
|
self.is_running = False
|
||||||
self.exception = None
|
self.exception = None
|
||||||
|
#self.config['/'] = {
|
||||||
|
# 'tools.sessions.on' : True,
|
||||||
|
# 'tools.sessions.timeout': 60, # Session times out after 60 minutes
|
||||||
|
#}
|
||||||
|
|
||||||
if not wsgi:
|
if not wsgi:
|
||||||
self.setup_loggers()
|
self.setup_loggers()
|
||||||
cherrypy.engine.bonjour.subscribe()
|
cherrypy.engine.bonjour.subscribe()
|
||||||
@ -154,6 +159,7 @@ class LibraryServer(ContentServer, MobileServer, XMLServer, OPDSServer, Cache,
|
|||||||
'tools.gzip.mime_types': ['text/html', 'text/plain',
|
'tools.gzip.mime_types': ['text/html', 'text/plain',
|
||||||
'text/xml', 'text/javascript', 'text/css'],
|
'text/xml', 'text/javascript', 'text/css'],
|
||||||
}
|
}
|
||||||
|
|
||||||
if opts.password:
|
if opts.password:
|
||||||
self.config['/'] = {
|
self.config['/'] = {
|
||||||
'tools.digest_auth.on' : True,
|
'tools.digest_auth.on' : True,
|
||||||
|
@ -202,7 +202,7 @@ class ContentServer(object):
|
|||||||
mode='rb')
|
mode='rb')
|
||||||
if fmt is None:
|
if fmt is None:
|
||||||
raise cherrypy.HTTPError(404, 'book: %d does not have format: %s'%(id, format))
|
raise cherrypy.HTTPError(404, 'book: %d does not have format: %s'%(id, format))
|
||||||
mi = self.db.get_metadata(id, index_is_id=True)
|
mi = newmi = self.db.get_metadata(id, index_is_id=True)
|
||||||
if format == 'EPUB':
|
if format == 'EPUB':
|
||||||
# Get the original metadata
|
# Get the original metadata
|
||||||
|
|
||||||
@ -214,9 +214,8 @@ class ContentServer(object):
|
|||||||
# Transform the metadata via the plugboard
|
# Transform the metadata via the plugboard
|
||||||
newmi = mi.deepcopy_metadata()
|
newmi = mi.deepcopy_metadata()
|
||||||
newmi.template_to_attribute(mi, cpb)
|
newmi.template_to_attribute(mi, cpb)
|
||||||
else:
|
|
||||||
newmi = mi
|
|
||||||
|
|
||||||
|
if format in ('MOBI', 'EPUB'):
|
||||||
# Write the updated file
|
# Write the updated file
|
||||||
from calibre.ebooks.metadata.meta import set_metadata
|
from calibre.ebooks.metadata.meta import set_metadata
|
||||||
set_metadata(fmt, newmi, 'epub')
|
set_metadata(fmt, newmi, 'epub')
|
||||||
|
@ -277,12 +277,15 @@ class MobileServer(object):
|
|||||||
cherrypy.response.headers['Content-Type'] = 'text/html; charset=utf-8'
|
cherrypy.response.headers['Content-Type'] = 'text/html; charset=utf-8'
|
||||||
cherrypy.response.headers['Last-Modified'] = self.last_modified(updated)
|
cherrypy.response.headers['Last-Modified'] = self.last_modified(updated)
|
||||||
|
|
||||||
|
|
||||||
url_base = "/mobile?search=" + search+";order="+order+";sort="+sort+";num="+str(num)
|
url_base = "/mobile?search=" + search+";order="+order+";sort="+sort+";num="+str(num)
|
||||||
|
|
||||||
return html.tostring(build_index(books, num, search, sort, order,
|
raw = html.tostring(build_index(books, num, search, sort, order,
|
||||||
start, len(ids), url_base, CKEYS,
|
start, len(ids), url_base, CKEYS,
|
||||||
self.opts.url_prefix),
|
self.opts.url_prefix),
|
||||||
encoding='utf-8', include_meta_content_type=True,
|
encoding='utf-8',
|
||||||
pretty_print=True)
|
pretty_print=True)
|
||||||
|
# tostring's include_meta_content_type is broken
|
||||||
|
raw = raw.replace('<head>', '<head>\n'
|
||||||
|
'<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">')
|
||||||
|
return raw
|
||||||
|
|
||||||
|
@ -28,6 +28,10 @@ class Browser(B):
|
|||||||
B.set_cookiejar(self, *args, **kwargs)
|
B.set_cookiejar(self, *args, **kwargs)
|
||||||
self._clone_actions['set_cookiejar'] = ('set_cookiejar', args, kwargs)
|
self._clone_actions['set_cookiejar'] = ('set_cookiejar', args, kwargs)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def cookiejar(self):
|
||||||
|
return self._clone_actions['set_cookiejar'][1][0]
|
||||||
|
|
||||||
def set_handle_redirect(self, *args, **kwargs):
|
def set_handle_redirect(self, *args, **kwargs):
|
||||||
B.set_handle_redirect(self, *args, **kwargs)
|
B.set_handle_redirect(self, *args, **kwargs)
|
||||||
self._clone_actions['set_handle_redirect'] = ('set_handle_redirect',
|
self._clone_actions['set_handle_redirect'] = ('set_handle_redirect',
|
||||||
|
@ -125,6 +125,7 @@ _extra_lang_codes = {
|
|||||||
'en_HR' : _('English (Croatia)'),
|
'en_HR' : _('English (Croatia)'),
|
||||||
'en_ID' : _('English (Indonesia)'),
|
'en_ID' : _('English (Indonesia)'),
|
||||||
'en_IL' : _('English (Israel)'),
|
'en_IL' : _('English (Israel)'),
|
||||||
|
'en_RU' : _('English (Russia)'),
|
||||||
'en_SG' : _('English (Singapore)'),
|
'en_SG' : _('English (Singapore)'),
|
||||||
'en_YE' : _('English (Yemen)'),
|
'en_YE' : _('English (Yemen)'),
|
||||||
'en_IE' : _('English (Ireland)'),
|
'en_IE' : _('English (Ireland)'),
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
# module pyparsing.py
|
# module pyparsing.py
|
||||||
#
|
#
|
||||||
# Copyright (c) 2003-2010 Paul T. McGuire
|
# Copyright (c) 2003-2011 Paul T. McGuire
|
||||||
#
|
#
|
||||||
# Permission is hereby granted, free of charge, to any person obtaining
|
# Permission is hereby granted, free of charge, to any person obtaining
|
||||||
# a copy of this software and associated documentation files (the
|
# a copy of this software and associated documentation files (the
|
||||||
@ -58,8 +58,8 @@ The pyparsing module handles some of the problems that are typically vexing when
|
|||||||
- embedded comments
|
- embedded comments
|
||||||
"""
|
"""
|
||||||
|
|
||||||
__version__ = "1.5.5"
|
__version__ = "1.5.6"
|
||||||
__versionTime__ = "12 Aug 2010 03:56"
|
__versionTime__ = "26 June 2011 10:53"
|
||||||
__author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
|
__author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
|
||||||
|
|
||||||
import string
|
import string
|
||||||
@ -101,11 +101,12 @@ if _PY3K:
|
|||||||
basestring = str
|
basestring = str
|
||||||
unichr = chr
|
unichr = chr
|
||||||
_ustr = str
|
_ustr = str
|
||||||
_str2dict = set
|
|
||||||
alphas = string.ascii_lowercase + string.ascii_uppercase
|
alphas = string.ascii_lowercase + string.ascii_uppercase
|
||||||
else:
|
else:
|
||||||
_MAX_INT = sys.maxint
|
_MAX_INT = sys.maxint
|
||||||
range = xrange
|
range = xrange
|
||||||
|
set = lambda s : dict( [(c,0) for c in s] )
|
||||||
|
alphas = string.lowercase + string.uppercase
|
||||||
|
|
||||||
def _ustr(obj):
|
def _ustr(obj):
|
||||||
"""Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries
|
"""Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries
|
||||||
@ -134,9 +135,6 @@ else:
|
|||||||
#return unicode(obj).encode(sys.getdefaultencoding(), 'replace')
|
#return unicode(obj).encode(sys.getdefaultencoding(), 'replace')
|
||||||
# ...
|
# ...
|
||||||
|
|
||||||
def _str2dict(strg):
|
|
||||||
return dict( [(c,0) for c in strg] )
|
|
||||||
|
|
||||||
alphas = string.lowercase + string.uppercase
|
alphas = string.lowercase + string.uppercase
|
||||||
|
|
||||||
# build list of single arg builtins, tolerant of Python version, that can be used as parse actions
|
# build list of single arg builtins, tolerant of Python version, that can be used as parse actions
|
||||||
@ -606,10 +604,10 @@ class ParseResults(object):
|
|||||||
|
|
||||||
def __setstate__(self,state):
|
def __setstate__(self,state):
|
||||||
self.__toklist = state[0]
|
self.__toklist = state[0]
|
||||||
self.__tokdict, \
|
(self.__tokdict,
|
||||||
par, \
|
par,
|
||||||
inAccumNames, \
|
inAccumNames,
|
||||||
self.__name = state[1]
|
self.__name) = state[1]
|
||||||
self.__accumNames = {}
|
self.__accumNames = {}
|
||||||
self.__accumNames.update(inAccumNames)
|
self.__accumNames.update(inAccumNames)
|
||||||
if par is not None:
|
if par is not None:
|
||||||
@ -667,6 +665,35 @@ def nullDebugAction(*args):
|
|||||||
"""'Do-nothing' debug action, to suppress debugging output during parsing."""
|
"""'Do-nothing' debug action, to suppress debugging output during parsing."""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
'decorator to trim function calls to match the arity of the target'
|
||||||
|
if not _PY3K:
|
||||||
|
def _trim_arity(func, maxargs=2):
|
||||||
|
limit = [0]
|
||||||
|
def wrapper(*args):
|
||||||
|
while 1:
|
||||||
|
try:
|
||||||
|
return func(*args[limit[0]:])
|
||||||
|
except TypeError:
|
||||||
|
if limit[0] <= maxargs:
|
||||||
|
limit[0] += 1
|
||||||
|
continue
|
||||||
|
raise
|
||||||
|
return wrapper
|
||||||
|
else:
|
||||||
|
def _trim_arity(func, maxargs=2):
|
||||||
|
limit = maxargs
|
||||||
|
def wrapper(*args):
|
||||||
|
#~ nonlocal limit
|
||||||
|
while 1:
|
||||||
|
try:
|
||||||
|
return func(*args[limit:])
|
||||||
|
except TypeError:
|
||||||
|
if limit:
|
||||||
|
limit -= 1
|
||||||
|
continue
|
||||||
|
raise
|
||||||
|
return wrapper
|
||||||
|
|
||||||
class ParserElement(object):
|
class ParserElement(object):
|
||||||
"""Abstract base level parser element class."""
|
"""Abstract base level parser element class."""
|
||||||
DEFAULT_WHITE_CHARS = " \n\t\r"
|
DEFAULT_WHITE_CHARS = " \n\t\r"
|
||||||
@ -731,6 +758,9 @@ class ParserElement(object):
|
|||||||
see L{I{__call__}<__call__>}.
|
see L{I{__call__}<__call__>}.
|
||||||
"""
|
"""
|
||||||
newself = self.copy()
|
newself = self.copy()
|
||||||
|
if name.endswith("*"):
|
||||||
|
name = name[:-1]
|
||||||
|
listAllMatches=True
|
||||||
newself.resultsName = name
|
newself.resultsName = name
|
||||||
newself.modalResults = not listAllMatches
|
newself.modalResults = not listAllMatches
|
||||||
return newself
|
return newself
|
||||||
@ -753,104 +783,6 @@ class ParserElement(object):
|
|||||||
self._parse = self._parse._originalParseMethod
|
self._parse = self._parse._originalParseMethod
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def _normalizeParseActionArgs( f ):
|
|
||||||
"""Internal method used to decorate parse actions that take fewer than 3 arguments,
|
|
||||||
so that all parse actions can be called as C{f(s,l,t)}."""
|
|
||||||
STAR_ARGS = 4
|
|
||||||
|
|
||||||
# special handling for single-argument builtins
|
|
||||||
if (f in singleArgBuiltins):
|
|
||||||
numargs = 1
|
|
||||||
else:
|
|
||||||
try:
|
|
||||||
restore = None
|
|
||||||
if isinstance(f,type):
|
|
||||||
restore = f
|
|
||||||
f = f.__init__
|
|
||||||
if not _PY3K:
|
|
||||||
codeObj = f.func_code
|
|
||||||
else:
|
|
||||||
codeObj = f.code
|
|
||||||
if codeObj.co_flags & STAR_ARGS:
|
|
||||||
return f
|
|
||||||
numargs = codeObj.co_argcount
|
|
||||||
if not _PY3K:
|
|
||||||
if hasattr(f,"im_self"):
|
|
||||||
numargs -= 1
|
|
||||||
else:
|
|
||||||
if hasattr(f,"__self__"):
|
|
||||||
numargs -= 1
|
|
||||||
if restore:
|
|
||||||
f = restore
|
|
||||||
except AttributeError:
|
|
||||||
try:
|
|
||||||
if not _PY3K:
|
|
||||||
call_im_func_code = f.__call__.im_func.func_code
|
|
||||||
else:
|
|
||||||
call_im_func_code = f.__code__
|
|
||||||
|
|
||||||
# not a function, must be a callable object, get info from the
|
|
||||||
# im_func binding of its bound __call__ method
|
|
||||||
if call_im_func_code.co_flags & STAR_ARGS:
|
|
||||||
return f
|
|
||||||
numargs = call_im_func_code.co_argcount
|
|
||||||
if not _PY3K:
|
|
||||||
if hasattr(f.__call__,"im_self"):
|
|
||||||
numargs -= 1
|
|
||||||
else:
|
|
||||||
if hasattr(f.__call__,"__self__"):
|
|
||||||
numargs -= 0
|
|
||||||
except AttributeError:
|
|
||||||
if not _PY3K:
|
|
||||||
call_func_code = f.__call__.func_code
|
|
||||||
else:
|
|
||||||
call_func_code = f.__call__.__code__
|
|
||||||
# not a bound method, get info directly from __call__ method
|
|
||||||
if call_func_code.co_flags & STAR_ARGS:
|
|
||||||
return f
|
|
||||||
numargs = call_func_code.co_argcount
|
|
||||||
if not _PY3K:
|
|
||||||
if hasattr(f.__call__,"im_self"):
|
|
||||||
numargs -= 1
|
|
||||||
else:
|
|
||||||
if hasattr(f.__call__,"__self__"):
|
|
||||||
numargs -= 1
|
|
||||||
|
|
||||||
|
|
||||||
#~ print ("adding function %s with %d args" % (f.func_name,numargs))
|
|
||||||
if numargs == 3:
|
|
||||||
return f
|
|
||||||
else:
|
|
||||||
if numargs > 3:
|
|
||||||
def tmp(s,l,t):
|
|
||||||
return f(f.__call__.__self__, s,l,t)
|
|
||||||
if numargs == 2:
|
|
||||||
def tmp(s,l,t):
|
|
||||||
return f(l,t)
|
|
||||||
elif numargs == 1:
|
|
||||||
def tmp(s,l,t):
|
|
||||||
return f(t)
|
|
||||||
else: #~ numargs == 0:
|
|
||||||
def tmp(s,l,t):
|
|
||||||
return f()
|
|
||||||
try:
|
|
||||||
tmp.__name__ = f.__name__
|
|
||||||
except (AttributeError,TypeError):
|
|
||||||
# no need for special handling if attribute doesnt exist
|
|
||||||
pass
|
|
||||||
try:
|
|
||||||
tmp.__doc__ = f.__doc__
|
|
||||||
except (AttributeError,TypeError):
|
|
||||||
# no need for special handling if attribute doesnt exist
|
|
||||||
pass
|
|
||||||
try:
|
|
||||||
tmp.__dict__.update(f.__dict__)
|
|
||||||
except (AttributeError,TypeError):
|
|
||||||
# no need for special handling if attribute doesnt exist
|
|
||||||
pass
|
|
||||||
return tmp
|
|
||||||
_normalizeParseActionArgs = staticmethod(_normalizeParseActionArgs)
|
|
||||||
|
|
||||||
def setParseAction( self, *fns, **kwargs ):
|
def setParseAction( self, *fns, **kwargs ):
|
||||||
"""Define action to perform when successfully matching parse element definition.
|
"""Define action to perform when successfully matching parse element definition.
|
||||||
Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)},
|
Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)},
|
||||||
@ -868,13 +800,13 @@ class ParserElement(object):
|
|||||||
consistent view of the parsed string, the parse location, and line and column
|
consistent view of the parsed string, the parse location, and line and column
|
||||||
positions within the parsed string.
|
positions within the parsed string.
|
||||||
"""
|
"""
|
||||||
self.parseAction = list(map(self._normalizeParseActionArgs, list(fns)))
|
self.parseAction = list(map(_trim_arity, list(fns)))
|
||||||
self.callDuringTry = ("callDuringTry" in kwargs and kwargs["callDuringTry"])
|
self.callDuringTry = ("callDuringTry" in kwargs and kwargs["callDuringTry"])
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def addParseAction( self, *fns, **kwargs ):
|
def addParseAction( self, *fns, **kwargs ):
|
||||||
"""Add parse action to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}."""
|
"""Add parse action to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}."""
|
||||||
self.parseAction += list(map(self._normalizeParseActionArgs, list(fns)))
|
self.parseAction += list(map(_trim_arity, list(fns)))
|
||||||
self.callDuringTry = self.callDuringTry or ("callDuringTry" in kwargs and kwargs["callDuringTry"])
|
self.callDuringTry = self.callDuringTry or ("callDuringTry" in kwargs and kwargs["callDuringTry"])
|
||||||
return self
|
return self
|
||||||
|
|
||||||
@ -1012,9 +944,9 @@ class ParserElement(object):
|
|||||||
lookup = (self,instring,loc,callPreParse,doActions)
|
lookup = (self,instring,loc,callPreParse,doActions)
|
||||||
if lookup in ParserElement._exprArgCache:
|
if lookup in ParserElement._exprArgCache:
|
||||||
value = ParserElement._exprArgCache[ lookup ]
|
value = ParserElement._exprArgCache[ lookup ]
|
||||||
if isinstance(value,Exception):
|
if isinstance(value, Exception):
|
||||||
raise value
|
raise value
|
||||||
return value
|
return (value[0],value[1].copy())
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
value = self._parseNoCache( instring, loc, doActions, callPreParse )
|
value = self._parseNoCache( instring, loc, doActions, callPreParse )
|
||||||
@ -1088,8 +1020,8 @@ class ParserElement(object):
|
|||||||
try:
|
try:
|
||||||
loc, tokens = self._parse( instring, 0 )
|
loc, tokens = self._parse( instring, 0 )
|
||||||
if parseAll:
|
if parseAll:
|
||||||
#loc = self.preParse( instring, loc )
|
loc = self.preParse( instring, loc )
|
||||||
se = StringEnd()
|
se = Empty() + StringEnd()
|
||||||
se._parse( instring, loc )
|
se._parse( instring, loc )
|
||||||
except ParseBaseException:
|
except ParseBaseException:
|
||||||
if ParserElement.verbose_stacktrace:
|
if ParserElement.verbose_stacktrace:
|
||||||
@ -1101,10 +1033,11 @@ class ParserElement(object):
|
|||||||
else:
|
else:
|
||||||
return tokens
|
return tokens
|
||||||
|
|
||||||
def scanString( self, instring, maxMatches=_MAX_INT ):
|
def scanString( self, instring, maxMatches=_MAX_INT, overlap=False ):
|
||||||
"""Scan the input string for expression matches. Each match will return the
|
"""Scan the input string for expression matches. Each match will return the
|
||||||
matching tokens, start location, and end location. May be called with optional
|
matching tokens, start location, and end location. May be called with optional
|
||||||
C{maxMatches} argument, to clip scanning after 'n' matches are found.
|
C{maxMatches} argument, to clip scanning after 'n' matches are found. If
|
||||||
|
C{overlap} is specified, then overlapping matches will be reported.
|
||||||
|
|
||||||
Note that the start and end locations are reported relative to the string
|
Note that the start and end locations are reported relative to the string
|
||||||
being parsed. See L{I{parseString}<parseString>} for more information on parsing
|
being parsed. See L{I{parseString}<parseString>} for more information on parsing
|
||||||
@ -1133,7 +1066,14 @@ class ParserElement(object):
|
|||||||
if nextLoc > loc:
|
if nextLoc > loc:
|
||||||
matches += 1
|
matches += 1
|
||||||
yield tokens, preloc, nextLoc
|
yield tokens, preloc, nextLoc
|
||||||
loc = nextLoc
|
if overlap:
|
||||||
|
nextloc = preparseFn( instring, loc )
|
||||||
|
if nextloc > loc:
|
||||||
|
loc = nextLoc
|
||||||
|
else:
|
||||||
|
loc += 1
|
||||||
|
else:
|
||||||
|
loc = nextLoc
|
||||||
else:
|
else:
|
||||||
loc = preloc+1
|
loc = preloc+1
|
||||||
except ParseBaseException:
|
except ParseBaseException:
|
||||||
@ -1168,6 +1108,7 @@ class ParserElement(object):
|
|||||||
out.append(t)
|
out.append(t)
|
||||||
lastE = e
|
lastE = e
|
||||||
out.append(instring[lastE:])
|
out.append(instring[lastE:])
|
||||||
|
out = [o for o in out if o]
|
||||||
return "".join(map(_ustr,_flatten(out)))
|
return "".join(map(_ustr,_flatten(out)))
|
||||||
except ParseBaseException:
|
except ParseBaseException:
|
||||||
if ParserElement.verbose_stacktrace:
|
if ParserElement.verbose_stacktrace:
|
||||||
@ -1372,6 +1313,9 @@ class ParserElement(object):
|
|||||||
userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno")
|
userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno")
|
||||||
could be written as::
|
could be written as::
|
||||||
userdata = Word(alphas)("name") + Word(nums+"-")("socsecno")
|
userdata = Word(alphas)("name") + Word(nums+"-")("socsecno")
|
||||||
|
|
||||||
|
If C{name} is given with a trailing C{'*'} character, then C{listAllMatches} will be
|
||||||
|
passed as C{True}.
|
||||||
"""
|
"""
|
||||||
return self.setResultsName(name)
|
return self.setResultsName(name)
|
||||||
|
|
||||||
@ -1398,9 +1342,9 @@ class ParserElement(object):
|
|||||||
return self
|
return self
|
||||||
|
|
||||||
def parseWithTabs( self ):
|
def parseWithTabs( self ):
|
||||||
"""Overrides default behavior to expand <TAB>s to spaces before parsing the input string.
|
"""Overrides default behavior to expand C{<TAB>}s to spaces before parsing the input string.
|
||||||
Must be called before C{parseString} when the input grammar contains elements that
|
Must be called before C{parseString} when the input grammar contains elements that
|
||||||
match <TAB> characters."""
|
match C{<TAB>} characters."""
|
||||||
self.keepTabs = True
|
self.keepTabs = True
|
||||||
return self
|
return self
|
||||||
|
|
||||||
@ -1508,12 +1452,10 @@ class Token(ParserElement):
|
|||||||
"""Abstract C{ParserElement} subclass, for defining atomic matching patterns."""
|
"""Abstract C{ParserElement} subclass, for defining atomic matching patterns."""
|
||||||
def __init__( self ):
|
def __init__( self ):
|
||||||
super(Token,self).__init__( savelist=False )
|
super(Token,self).__init__( savelist=False )
|
||||||
#self.myException = ParseException("",0,"",self)
|
|
||||||
|
|
||||||
def setName(self, name):
|
def setName(self, name):
|
||||||
s = super(Token,self).setName(name)
|
s = super(Token,self).setName(name)
|
||||||
self.errmsg = "Expected " + self.name
|
self.errmsg = "Expected " + self.name
|
||||||
#s.myException.msg = self.errmsg
|
|
||||||
return s
|
return s
|
||||||
|
|
||||||
|
|
||||||
@ -1534,7 +1476,6 @@ class NoMatch(Token):
|
|||||||
self.mayReturnEmpty = True
|
self.mayReturnEmpty = True
|
||||||
self.mayIndexError = False
|
self.mayIndexError = False
|
||||||
self.errmsg = "Unmatchable token"
|
self.errmsg = "Unmatchable token"
|
||||||
#self.myException.msg = self.errmsg
|
|
||||||
|
|
||||||
def parseImpl( self, instring, loc, doActions=True ):
|
def parseImpl( self, instring, loc, doActions=True ):
|
||||||
exc = self.myException
|
exc = self.myException
|
||||||
@ -1558,7 +1499,6 @@ class Literal(Token):
|
|||||||
self.name = '"%s"' % _ustr(self.match)
|
self.name = '"%s"' % _ustr(self.match)
|
||||||
self.errmsg = "Expected " + self.name
|
self.errmsg = "Expected " + self.name
|
||||||
self.mayReturnEmpty = False
|
self.mayReturnEmpty = False
|
||||||
#self.myException.msg = self.errmsg
|
|
||||||
self.mayIndexError = False
|
self.mayIndexError = False
|
||||||
|
|
||||||
# Performance tuning: this routine gets called a *lot*
|
# Performance tuning: this routine gets called a *lot*
|
||||||
@ -1579,12 +1519,12 @@ _L = Literal
|
|||||||
class Keyword(Token):
|
class Keyword(Token):
|
||||||
"""Token to exactly match a specified string as a keyword, that is, it must be
|
"""Token to exactly match a specified string as a keyword, that is, it must be
|
||||||
immediately followed by a non-keyword character. Compare with C{Literal}::
|
immediately followed by a non-keyword character. Compare with C{Literal}::
|
||||||
Literal("if") will match the leading 'if' in 'ifAndOnlyIf'.
|
Literal("if") will match the leading C{'if'} in C{'ifAndOnlyIf'}.
|
||||||
Keyword("if") will not; it will only match the leading 'if in 'if x=1', or 'if(y==2)'
|
Keyword("if") will not; it will only match the leading C{'if'} in C{'if x=1'}, or C{'if(y==2)'}
|
||||||
Accepts two optional constructor arguments in addition to the keyword string:
|
Accepts two optional constructor arguments in addition to the keyword string:
|
||||||
C{identChars} is a string of characters that would be valid identifier characters,
|
C{identChars} is a string of characters that would be valid identifier characters,
|
||||||
defaulting to all alphanumerics + "_" and "$"; C{caseless} allows case-insensitive
|
defaulting to all alphanumerics + "_" and "$"; C{caseless} allows case-insensitive
|
||||||
matching, default is False.
|
matching, default is C{False}.
|
||||||
"""
|
"""
|
||||||
DEFAULT_KEYWORD_CHARS = alphanums+"_$"
|
DEFAULT_KEYWORD_CHARS = alphanums+"_$"
|
||||||
|
|
||||||
@ -1600,13 +1540,12 @@ class Keyword(Token):
|
|||||||
self.name = '"%s"' % self.match
|
self.name = '"%s"' % self.match
|
||||||
self.errmsg = "Expected " + self.name
|
self.errmsg = "Expected " + self.name
|
||||||
self.mayReturnEmpty = False
|
self.mayReturnEmpty = False
|
||||||
#self.myException.msg = self.errmsg
|
|
||||||
self.mayIndexError = False
|
self.mayIndexError = False
|
||||||
self.caseless = caseless
|
self.caseless = caseless
|
||||||
if caseless:
|
if caseless:
|
||||||
self.caselessmatch = matchString.upper()
|
self.caselessmatch = matchString.upper()
|
||||||
identChars = identChars.upper()
|
identChars = identChars.upper()
|
||||||
self.identChars = _str2dict(identChars)
|
self.identChars = set(identChars)
|
||||||
|
|
||||||
def parseImpl( self, instring, loc, doActions=True ):
|
def parseImpl( self, instring, loc, doActions=True ):
|
||||||
if self.caseless:
|
if self.caseless:
|
||||||
@ -1648,7 +1587,6 @@ class CaselessLiteral(Literal):
|
|||||||
self.returnString = matchString
|
self.returnString = matchString
|
||||||
self.name = "'%s'" % self.returnString
|
self.name = "'%s'" % self.returnString
|
||||||
self.errmsg = "Expected " + self.name
|
self.errmsg = "Expected " + self.name
|
||||||
#self.myException.msg = self.errmsg
|
|
||||||
|
|
||||||
def parseImpl( self, instring, loc, doActions=True ):
|
def parseImpl( self, instring, loc, doActions=True ):
|
||||||
if instring[ loc:loc+self.matchLen ].upper() == self.match:
|
if instring[ loc:loc+self.matchLen ].upper() == self.match:
|
||||||
@ -1680,18 +1618,25 @@ class Word(Token):
|
|||||||
defaults to the initial character set), and an optional minimum,
|
defaults to the initial character set), and an optional minimum,
|
||||||
maximum, and/or exact length. The default value for C{min} is 1 (a
|
maximum, and/or exact length. The default value for C{min} is 1 (a
|
||||||
minimum value < 1 is not valid); the default values for C{max} and C{exact}
|
minimum value < 1 is not valid); the default values for C{max} and C{exact}
|
||||||
are 0, meaning no maximum or exact length restriction.
|
are 0, meaning no maximum or exact length restriction. An optional
|
||||||
|
C{exclude} parameter can list characters that might be found in
|
||||||
|
the input C{bodyChars} string; useful to define a word of all printables
|
||||||
|
except for one or two characters, for instance.
|
||||||
"""
|
"""
|
||||||
def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False ):
|
def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None ):
|
||||||
super(Word,self).__init__()
|
super(Word,self).__init__()
|
||||||
|
if excludeChars:
|
||||||
|
initChars = ''.join([c for c in initChars if c not in excludeChars])
|
||||||
|
if bodyChars:
|
||||||
|
bodyChars = ''.join([c for c in bodyChars if c not in excludeChars])
|
||||||
self.initCharsOrig = initChars
|
self.initCharsOrig = initChars
|
||||||
self.initChars = _str2dict(initChars)
|
self.initChars = set(initChars)
|
||||||
if bodyChars :
|
if bodyChars :
|
||||||
self.bodyCharsOrig = bodyChars
|
self.bodyCharsOrig = bodyChars
|
||||||
self.bodyChars = _str2dict(bodyChars)
|
self.bodyChars = set(bodyChars)
|
||||||
else:
|
else:
|
||||||
self.bodyCharsOrig = initChars
|
self.bodyCharsOrig = initChars
|
||||||
self.bodyChars = _str2dict(initChars)
|
self.bodyChars = set(initChars)
|
||||||
|
|
||||||
self.maxSpecified = max > 0
|
self.maxSpecified = max > 0
|
||||||
|
|
||||||
@ -1711,7 +1656,6 @@ class Word(Token):
|
|||||||
|
|
||||||
self.name = _ustr(self)
|
self.name = _ustr(self)
|
||||||
self.errmsg = "Expected " + self.name
|
self.errmsg = "Expected " + self.name
|
||||||
#self.myException.msg = self.errmsg
|
|
||||||
self.mayIndexError = False
|
self.mayIndexError = False
|
||||||
self.asKeyword = asKeyword
|
self.asKeyword = asKeyword
|
||||||
|
|
||||||
@ -1743,7 +1687,7 @@ class Word(Token):
|
|||||||
raise exc
|
raise exc
|
||||||
|
|
||||||
loc = result.end()
|
loc = result.end()
|
||||||
return loc,result.group()
|
return loc, result.group()
|
||||||
|
|
||||||
if not(instring[ loc ] in self.initChars):
|
if not(instring[ loc ] in self.initChars):
|
||||||
#~ raise ParseException( instring, loc, self.errmsg )
|
#~ raise ParseException( instring, loc, self.errmsg )
|
||||||
@ -1807,24 +1751,24 @@ class Regex(Token):
|
|||||||
"""
|
"""
|
||||||
compiledREtype = type(re.compile("[A-Z]"))
|
compiledREtype = type(re.compile("[A-Z]"))
|
||||||
def __init__( self, pattern, flags=0):
|
def __init__( self, pattern, flags=0):
|
||||||
"""The parameters pattern and flags are passed to the re.compile() function as-is. See the Python re module for an explanation of the acceptable patterns and flags."""
|
"""The parameters C{pattern} and C{flags} are passed to the C{re.compile()} function as-is. See the Python C{re} module for an explanation of the acceptable patterns and flags."""
|
||||||
super(Regex,self).__init__()
|
super(Regex,self).__init__()
|
||||||
|
|
||||||
if isinstance(pattern, basestring):
|
if isinstance(pattern, basestring):
|
||||||
if len(pattern) == 0:
|
if len(pattern) == 0:
|
||||||
warnings.warn("null string passed to Regex; use Empty() instead",
|
warnings.warn("null string passed to Regex; use Empty() instead",
|
||||||
SyntaxWarning, stacklevel=2)
|
SyntaxWarning, stacklevel=2)
|
||||||
|
|
||||||
self.pattern = pattern
|
self.pattern = pattern
|
||||||
self.flags = flags
|
self.flags = flags
|
||||||
|
|
||||||
try:
|
try:
|
||||||
self.re = re.compile(self.pattern, self.flags)
|
self.re = re.compile(self.pattern, self.flags)
|
||||||
self.reString = self.pattern
|
self.reString = self.pattern
|
||||||
except sre_constants.error:
|
except sre_constants.error:
|
||||||
warnings.warn("invalid pattern (%s) passed to Regex" % pattern,
|
warnings.warn("invalid pattern (%s) passed to Regex" % pattern,
|
||||||
SyntaxWarning, stacklevel=2)
|
SyntaxWarning, stacklevel=2)
|
||||||
raise
|
raise
|
||||||
|
|
||||||
elif isinstance(pattern, Regex.compiledREtype):
|
elif isinstance(pattern, Regex.compiledREtype):
|
||||||
self.re = pattern
|
self.re = pattern
|
||||||
@ -1837,7 +1781,6 @@ class Regex(Token):
|
|||||||
|
|
||||||
self.name = _ustr(self)
|
self.name = _ustr(self)
|
||||||
self.errmsg = "Expected " + self.name
|
self.errmsg = "Expected " + self.name
|
||||||
#self.myException.msg = self.errmsg
|
|
||||||
self.mayIndexError = False
|
self.mayIndexError = False
|
||||||
self.mayReturnEmpty = True
|
self.mayReturnEmpty = True
|
||||||
|
|
||||||
@ -1929,7 +1872,8 @@ class QuotedString(Token):
|
|||||||
self.pattern += (r'|(?:%s)' % re.escape(escQuote))
|
self.pattern += (r'|(?:%s)' % re.escape(escQuote))
|
||||||
if escChar:
|
if escChar:
|
||||||
self.pattern += (r'|(?:%s.)' % re.escape(escChar))
|
self.pattern += (r'|(?:%s.)' % re.escape(escChar))
|
||||||
self.escCharReplacePattern = re.escape(self.escChar)+"(.)"
|
charset = ''.join(set(self.quoteChar[0]+self.endQuoteChar[0])).replace('^',r'\^').replace('-',r'\-')
|
||||||
|
self.escCharReplacePattern = re.escape(self.escChar)+("([%s])" % charset)
|
||||||
self.pattern += (r')*%s' % re.escape(self.endQuoteChar))
|
self.pattern += (r')*%s' % re.escape(self.endQuoteChar))
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@ -1942,7 +1886,6 @@ class QuotedString(Token):
|
|||||||
|
|
||||||
self.name = _ustr(self)
|
self.name = _ustr(self)
|
||||||
self.errmsg = "Expected " + self.name
|
self.errmsg = "Expected " + self.name
|
||||||
#self.myException.msg = self.errmsg
|
|
||||||
self.mayIndexError = False
|
self.mayIndexError = False
|
||||||
self.mayReturnEmpty = True
|
self.mayReturnEmpty = True
|
||||||
|
|
||||||
@ -2014,7 +1957,6 @@ class CharsNotIn(Token):
|
|||||||
self.name = _ustr(self)
|
self.name = _ustr(self)
|
||||||
self.errmsg = "Expected " + self.name
|
self.errmsg = "Expected " + self.name
|
||||||
self.mayReturnEmpty = ( self.minLen == 0 )
|
self.mayReturnEmpty = ( self.minLen == 0 )
|
||||||
#self.myException.msg = self.errmsg
|
|
||||||
self.mayIndexError = False
|
self.mayIndexError = False
|
||||||
|
|
||||||
def parseImpl( self, instring, loc, doActions=True ):
|
def parseImpl( self, instring, loc, doActions=True ):
|
||||||
@ -2077,7 +2019,6 @@ class White(Token):
|
|||||||
self.name = ("".join([White.whiteStrs[c] for c in self.matchWhite]))
|
self.name = ("".join([White.whiteStrs[c] for c in self.matchWhite]))
|
||||||
self.mayReturnEmpty = True
|
self.mayReturnEmpty = True
|
||||||
self.errmsg = "Expected " + self.name
|
self.errmsg = "Expected " + self.name
|
||||||
#self.myException.msg = self.errmsg
|
|
||||||
|
|
||||||
self.minLen = min
|
self.minLen = min
|
||||||
|
|
||||||
@ -2150,7 +2091,6 @@ class LineStart(_PositionToken):
|
|||||||
super(LineStart,self).__init__()
|
super(LineStart,self).__init__()
|
||||||
self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") )
|
self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") )
|
||||||
self.errmsg = "Expected start of line"
|
self.errmsg = "Expected start of line"
|
||||||
#self.myException.msg = self.errmsg
|
|
||||||
|
|
||||||
def preParse( self, instring, loc ):
|
def preParse( self, instring, loc ):
|
||||||
preloc = super(LineStart,self).preParse(instring,loc)
|
preloc = super(LineStart,self).preParse(instring,loc)
|
||||||
@ -2175,7 +2115,6 @@ class LineEnd(_PositionToken):
|
|||||||
super(LineEnd,self).__init__()
|
super(LineEnd,self).__init__()
|
||||||
self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") )
|
self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") )
|
||||||
self.errmsg = "Expected end of line"
|
self.errmsg = "Expected end of line"
|
||||||
#self.myException.msg = self.errmsg
|
|
||||||
|
|
||||||
def parseImpl( self, instring, loc, doActions=True ):
|
def parseImpl( self, instring, loc, doActions=True ):
|
||||||
if loc<len(instring):
|
if loc<len(instring):
|
||||||
@ -2200,7 +2139,6 @@ class StringStart(_PositionToken):
|
|||||||
def __init__( self ):
|
def __init__( self ):
|
||||||
super(StringStart,self).__init__()
|
super(StringStart,self).__init__()
|
||||||
self.errmsg = "Expected start of text"
|
self.errmsg = "Expected start of text"
|
||||||
#self.myException.msg = self.errmsg
|
|
||||||
|
|
||||||
def parseImpl( self, instring, loc, doActions=True ):
|
def parseImpl( self, instring, loc, doActions=True ):
|
||||||
if loc != 0:
|
if loc != 0:
|
||||||
@ -2218,7 +2156,6 @@ class StringEnd(_PositionToken):
|
|||||||
def __init__( self ):
|
def __init__( self ):
|
||||||
super(StringEnd,self).__init__()
|
super(StringEnd,self).__init__()
|
||||||
self.errmsg = "Expected end of text"
|
self.errmsg = "Expected end of text"
|
||||||
#self.myException.msg = self.errmsg
|
|
||||||
|
|
||||||
def parseImpl( self, instring, loc, doActions=True ):
|
def parseImpl( self, instring, loc, doActions=True ):
|
||||||
if loc < len(instring):
|
if loc < len(instring):
|
||||||
@ -2239,14 +2176,14 @@ class StringEnd(_PositionToken):
|
|||||||
|
|
||||||
class WordStart(_PositionToken):
|
class WordStart(_PositionToken):
|
||||||
"""Matches if the current position is at the beginning of a Word, and
|
"""Matches if the current position is at the beginning of a Word, and
|
||||||
is not preceded by any character in a given set of wordChars
|
is not preceded by any character in a given set of C{wordChars}
|
||||||
(default=C{printables}). To emulate the C{\b} behavior of regular expressions,
|
(default=C{printables}). To emulate the C{\b} behavior of regular expressions,
|
||||||
use C{WordStart(alphanums)}. C{WordStart} will also match at the beginning of
|
use C{WordStart(alphanums)}. C{WordStart} will also match at the beginning of
|
||||||
the string being parsed, or at the beginning of a line.
|
the string being parsed, or at the beginning of a line.
|
||||||
"""
|
"""
|
||||||
def __init__(self, wordChars = printables):
|
def __init__(self, wordChars = printables):
|
||||||
super(WordStart,self).__init__()
|
super(WordStart,self).__init__()
|
||||||
self.wordChars = _str2dict(wordChars)
|
self.wordChars = set(wordChars)
|
||||||
self.errmsg = "Not at the start of a word"
|
self.errmsg = "Not at the start of a word"
|
||||||
|
|
||||||
def parseImpl(self, instring, loc, doActions=True ):
|
def parseImpl(self, instring, loc, doActions=True ):
|
||||||
@ -2261,14 +2198,14 @@ class WordStart(_PositionToken):
|
|||||||
|
|
||||||
class WordEnd(_PositionToken):
|
class WordEnd(_PositionToken):
|
||||||
"""Matches if the current position is at the end of a Word, and
|
"""Matches if the current position is at the end of a Word, and
|
||||||
is not followed by any character in a given set of wordChars
|
is not followed by any character in a given set of C{wordChars}
|
||||||
(default=C{printables}). To emulate the C{\b} behavior of regular expressions,
|
(default=C{printables}). To emulate the C{\b} behavior of regular expressions,
|
||||||
use C{WordEnd(alphanums)}. C{WordEnd} will also match at the end of
|
use C{WordEnd(alphanums)}. C{WordEnd} will also match at the end of
|
||||||
the string being parsed, or at the end of a line.
|
the string being parsed, or at the end of a line.
|
||||||
"""
|
"""
|
||||||
def __init__(self, wordChars = printables):
|
def __init__(self, wordChars = printables):
|
||||||
super(WordEnd,self).__init__()
|
super(WordEnd,self).__init__()
|
||||||
self.wordChars = _str2dict(wordChars)
|
self.wordChars = set(wordChars)
|
||||||
self.skipWhitespace = False
|
self.skipWhitespace = False
|
||||||
self.errmsg = "Not at the end of a word"
|
self.errmsg = "Not at the end of a word"
|
||||||
|
|
||||||
@ -2309,7 +2246,7 @@ class ParseExpression(ParserElement):
|
|||||||
return self
|
return self
|
||||||
|
|
||||||
def leaveWhitespace( self ):
|
def leaveWhitespace( self ):
|
||||||
"""Extends leaveWhitespace defined in base class, and also invokes leaveWhitespace on
|
"""Extends C{leaveWhitespace} defined in base class, and also invokes C{leaveWhitespace} on
|
||||||
all contained expressions."""
|
all contained expressions."""
|
||||||
self.skipWhitespace = False
|
self.skipWhitespace = False
|
||||||
self.exprs = [ e.copy() for e in self.exprs ]
|
self.exprs = [ e.copy() for e in self.exprs ]
|
||||||
@ -2380,11 +2317,16 @@ class ParseExpression(ParserElement):
|
|||||||
for e in self.exprs:
|
for e in self.exprs:
|
||||||
e.validate(tmp)
|
e.validate(tmp)
|
||||||
self.checkRecursion( [] )
|
self.checkRecursion( [] )
|
||||||
|
|
||||||
|
def copy(self):
|
||||||
|
ret = super(ParseExpression,self).copy()
|
||||||
|
ret.exprs = [e.copy() for e in self.exprs]
|
||||||
|
return ret
|
||||||
|
|
||||||
class And(ParseExpression):
|
class And(ParseExpression):
|
||||||
"""Requires all given C{ParseExpressions} to be found in the given order.
|
"""Requires all given C{ParseExpression}s to be found in the given order.
|
||||||
Expressions may be separated by whitespace.
|
Expressions may be separated by whitespace.
|
||||||
May be constructed using the '+' operator.
|
May be constructed using the C{'+'} operator.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
class _ErrorStop(Empty):
|
class _ErrorStop(Empty):
|
||||||
@ -2453,7 +2395,7 @@ class And(ParseExpression):
|
|||||||
class Or(ParseExpression):
|
class Or(ParseExpression):
|
||||||
"""Requires that at least one C{ParseExpression} is found.
|
"""Requires that at least one C{ParseExpression} is found.
|
||||||
If two expressions match, the expression that matches the longest string will be used.
|
If two expressions match, the expression that matches the longest string will be used.
|
||||||
May be constructed using the '^' operator.
|
May be constructed using the C{'^'} operator.
|
||||||
"""
|
"""
|
||||||
def __init__( self, exprs, savelist = False ):
|
def __init__( self, exprs, savelist = False ):
|
||||||
super(Or,self).__init__(exprs, savelist)
|
super(Or,self).__init__(exprs, savelist)
|
||||||
@ -2515,7 +2457,7 @@ class Or(ParseExpression):
|
|||||||
class MatchFirst(ParseExpression):
|
class MatchFirst(ParseExpression):
|
||||||
"""Requires that at least one C{ParseExpression} is found.
|
"""Requires that at least one C{ParseExpression} is found.
|
||||||
If two expressions match, the first one listed is the one that will match.
|
If two expressions match, the first one listed is the one that will match.
|
||||||
May be constructed using the '|' operator.
|
May be constructed using the C{'|'} operator.
|
||||||
"""
|
"""
|
||||||
def __init__( self, exprs, savelist = False ):
|
def __init__( self, exprs, savelist = False ):
|
||||||
super(MatchFirst,self).__init__(exprs, savelist)
|
super(MatchFirst,self).__init__(exprs, savelist)
|
||||||
@ -2572,9 +2514,9 @@ class MatchFirst(ParseExpression):
|
|||||||
|
|
||||||
|
|
||||||
class Each(ParseExpression):
|
class Each(ParseExpression):
|
||||||
"""Requires all given C{ParseExpressions} to be found, but in any order.
|
"""Requires all given C{ParseExpression}s to be found, but in any order.
|
||||||
Expressions may be separated by whitespace.
|
Expressions may be separated by whitespace.
|
||||||
May be constructed using the '&' operator.
|
May be constructed using the C{'&'} operator.
|
||||||
"""
|
"""
|
||||||
def __init__( self, exprs, savelist = True ):
|
def __init__( self, exprs, savelist = True ):
|
||||||
super(Each,self).__init__(exprs, savelist)
|
super(Each,self).__init__(exprs, savelist)
|
||||||
@ -2757,7 +2699,6 @@ class NotAny(ParseElementEnhance):
|
|||||||
self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs
|
self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs
|
||||||
self.mayReturnEmpty = True
|
self.mayReturnEmpty = True
|
||||||
self.errmsg = "Found unwanted token, "+_ustr(self.expr)
|
self.errmsg = "Found unwanted token, "+_ustr(self.expr)
|
||||||
#self.myException = ParseException("",0,self.errmsg,self)
|
|
||||||
|
|
||||||
def parseImpl( self, instring, loc, doActions=True ):
|
def parseImpl( self, instring, loc, doActions=True ):
|
||||||
try:
|
try:
|
||||||
@ -2916,7 +2857,6 @@ class SkipTo(ParseElementEnhance):
|
|||||||
else:
|
else:
|
||||||
self.failOn = failOn
|
self.failOn = failOn
|
||||||
self.errmsg = "No match found for "+_ustr(self.expr)
|
self.errmsg = "No match found for "+_ustr(self.expr)
|
||||||
#self.myException = ParseException("",0,self.errmsg,self)
|
|
||||||
|
|
||||||
def parseImpl( self, instring, loc, doActions=True ):
|
def parseImpl( self, instring, loc, doActions=True ):
|
||||||
startLoc = loc
|
startLoc = loc
|
||||||
@ -3040,7 +2980,7 @@ class _ForwardNoRecurse(Forward):
|
|||||||
return "..."
|
return "..."
|
||||||
|
|
||||||
class TokenConverter(ParseElementEnhance):
|
class TokenConverter(ParseElementEnhance):
|
||||||
"""Abstract subclass of ParseExpression, for converting parsed results."""
|
"""Abstract subclass of C{ParseExpression}, for converting parsed results."""
|
||||||
def __init__( self, expr, savelist=False ):
|
def __init__( self, expr, savelist=False ):
|
||||||
super(TokenConverter,self).__init__( expr )#, savelist )
|
super(TokenConverter,self).__init__( expr )#, savelist )
|
||||||
self.saveAsList = False
|
self.saveAsList = False
|
||||||
@ -3089,7 +3029,7 @@ class Combine(TokenConverter):
|
|||||||
return retToks
|
return retToks
|
||||||
|
|
||||||
class Group(TokenConverter):
|
class Group(TokenConverter):
|
||||||
"""Converter to return the matched tokens as a list - useful for returning tokens of ZeroOrMore and OneOrMore expressions."""
|
"""Converter to return the matched tokens as a list - useful for returning tokens of C{ZeroOrMore} and C{OneOrMore} expressions."""
|
||||||
def __init__( self, expr ):
|
def __init__( self, expr ):
|
||||||
super(Group,self).__init__( expr )
|
super(Group,self).__init__( expr )
|
||||||
self.saveAsList = True
|
self.saveAsList = True
|
||||||
@ -3143,7 +3083,7 @@ class Suppress(TokenConverter):
|
|||||||
class OnlyOnce(object):
|
class OnlyOnce(object):
|
||||||
"""Wrapper for parse actions, to ensure they are only called once."""
|
"""Wrapper for parse actions, to ensure they are only called once."""
|
||||||
def __init__(self, methodCall):
|
def __init__(self, methodCall):
|
||||||
self.callable = ParserElement._normalizeParseActionArgs(methodCall)
|
self.callable = _trim_arity(methodCall)
|
||||||
self.called = False
|
self.called = False
|
||||||
def __call__(self,s,l,t):
|
def __call__(self,s,l,t):
|
||||||
if not self.called:
|
if not self.called:
|
||||||
@ -3156,7 +3096,7 @@ class OnlyOnce(object):
|
|||||||
|
|
||||||
def traceParseAction(f):
|
def traceParseAction(f):
|
||||||
"""Decorator for debugging parse actions."""
|
"""Decorator for debugging parse actions."""
|
||||||
f = ParserElement._normalizeParseActionArgs(f)
|
f = _trim_arity(f)
|
||||||
def z(*paArgs):
|
def z(*paArgs):
|
||||||
thisFunc = f.func_name
|
thisFunc = f.func_name
|
||||||
s,l,t = paArgs[-3:]
|
s,l,t = paArgs[-3:]
|
||||||
@ -3194,7 +3134,7 @@ def delimitedList( expr, delim=",", combine=False ):
|
|||||||
else:
|
else:
|
||||||
return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
|
return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
|
||||||
|
|
||||||
def countedArray( expr ):
|
def countedArray( expr, intExpr=None ):
|
||||||
"""Helper to define a counted list of expressions.
|
"""Helper to define a counted list of expressions.
|
||||||
This helper defines a pattern of the form::
|
This helper defines a pattern of the form::
|
||||||
integer expr expr expr...
|
integer expr expr expr...
|
||||||
@ -3203,15 +3143,25 @@ def countedArray( expr ):
|
|||||||
"""
|
"""
|
||||||
arrayExpr = Forward()
|
arrayExpr = Forward()
|
||||||
def countFieldParseAction(s,l,t):
|
def countFieldParseAction(s,l,t):
|
||||||
n = int(t[0])
|
n = t[0]
|
||||||
arrayExpr << (n and Group(And([expr]*n)) or Group(empty))
|
arrayExpr << (n and Group(And([expr]*n)) or Group(empty))
|
||||||
return []
|
return []
|
||||||
return ( Word(nums).setName("arrayLen").setParseAction(countFieldParseAction, callDuringTry=True) + arrayExpr )
|
if intExpr is None:
|
||||||
|
intExpr = Word(nums).setParseAction(lambda t:int(t[0]))
|
||||||
|
else:
|
||||||
|
intExpr = intExpr.copy()
|
||||||
|
intExpr.setName("arrayLen")
|
||||||
|
intExpr.addParseAction(countFieldParseAction, callDuringTry=True)
|
||||||
|
return ( intExpr + arrayExpr )
|
||||||
|
|
||||||
def _flatten(L):
|
def _flatten(L):
|
||||||
if type(L) is not list: return [L]
|
ret = []
|
||||||
if L == []: return L
|
for i in L:
|
||||||
return _flatten(L[0]) + _flatten(L[1:])
|
if isinstance(i,list):
|
||||||
|
ret.extend(_flatten(i))
|
||||||
|
else:
|
||||||
|
ret.append(i)
|
||||||
|
return ret
|
||||||
|
|
||||||
def matchPreviousLiteral(expr):
|
def matchPreviousLiteral(expr):
|
||||||
"""Helper to define an expression that is indirectly defined from
|
"""Helper to define an expression that is indirectly defined from
|
||||||
@ -3346,15 +3296,15 @@ def originalTextFor(expr, asString=True):
|
|||||||
"""Helper to return the original, untokenized text for a given expression. Useful to
|
"""Helper to return the original, untokenized text for a given expression. Useful to
|
||||||
restore the parsed fields of an HTML start tag into the raw tag text itself, or to
|
restore the parsed fields of an HTML start tag into the raw tag text itself, or to
|
||||||
revert separate tokens with intervening whitespace back to the original matching
|
revert separate tokens with intervening whitespace back to the original matching
|
||||||
input text. Simpler to use than the parse action C{keepOriginalText}, and does not
|
input text. Simpler to use than the parse action C{L{keepOriginalText}}, and does not
|
||||||
require the inspect module to chase up the call stack. By default, returns a
|
require the inspect module to chase up the call stack. By default, returns a
|
||||||
string containing the original parsed text.
|
string containing the original parsed text.
|
||||||
|
|
||||||
If the optional C{asString} argument is passed as False, then the return value is a
|
If the optional C{asString} argument is passed as C{False}, then the return value is a
|
||||||
C{ParseResults} containing any results names that were originally matched, and a
|
C{ParseResults} containing any results names that were originally matched, and a
|
||||||
single token containing the original matched text from the input string. So if
|
single token containing the original matched text from the input string. So if
|
||||||
the expression passed to C{originalTextFor} contains expressions with defined
|
the expression passed to C{L{originalTextFor}} contains expressions with defined
|
||||||
results names, you must set C{asString} to False if you want to preserve those
|
results names, you must set C{asString} to C{False} if you want to preserve those
|
||||||
results name values."""
|
results name values."""
|
||||||
locMarker = Empty().setParseAction(lambda s,loc,t: loc)
|
locMarker = Empty().setParseAction(lambda s,loc,t: loc)
|
||||||
endlocMarker = locMarker.copy()
|
endlocMarker = locMarker.copy()
|
||||||
@ -3370,7 +3320,12 @@ def originalTextFor(expr, asString=True):
|
|||||||
del t["_original_end"]
|
del t["_original_end"]
|
||||||
matchExpr.setParseAction(extractText)
|
matchExpr.setParseAction(extractText)
|
||||||
return matchExpr
|
return matchExpr
|
||||||
|
|
||||||
|
def ungroup(expr):
|
||||||
|
"""Helper to undo pyparsing's default grouping of And expressions, even
|
||||||
|
if all but one are non-empty."""
|
||||||
|
return TokenConverter(expr).setParseAction(lambda t:t[0])
|
||||||
|
|
||||||
# convenience constants for positional expressions
|
# convenience constants for positional expressions
|
||||||
empty = Empty().setName("empty")
|
empty = Empty().setName("empty")
|
||||||
lineStart = LineStart().setName("lineStart")
|
lineStart = LineStart().setName("lineStart")
|
||||||
@ -3380,8 +3335,8 @@ stringEnd = StringEnd().setName("stringEnd")
|
|||||||
|
|
||||||
_escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1])
|
_escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1])
|
||||||
_printables_less_backslash = "".join([ c for c in printables if c not in r"\]" ])
|
_printables_less_backslash = "".join([ c for c in printables if c not in r"\]" ])
|
||||||
_escapedHexChar = Combine( Suppress(_bslash + "0x") + Word(hexnums) ).setParseAction(lambda s,l,t:unichr(int(t[0],16)))
|
_escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],16)))
|
||||||
_escapedOctChar = Combine( Suppress(_bslash) + Word("0","01234567") ).setParseAction(lambda s,l,t:unichr(int(t[0],8)))
|
_escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],8)))
|
||||||
_singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(_printables_less_backslash,exact=1)
|
_singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(_printables_less_backslash,exact=1)
|
||||||
_charRange = Group(_singleChar + Suppress("-") + _singleChar)
|
_charRange = Group(_singleChar + Suppress("-") + _singleChar)
|
||||||
_reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]"
|
_reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]"
|
||||||
@ -3399,7 +3354,8 @@ def srange(s):
|
|||||||
The values enclosed in the []'s may be::
|
The values enclosed in the []'s may be::
|
||||||
a single character
|
a single character
|
||||||
an escaped character with a leading backslash (such as \- or \])
|
an escaped character with a leading backslash (such as \- or \])
|
||||||
an escaped hex character with a leading '\0x' (\0x21, which is a '!' character)
|
an escaped hex character with a leading '\x' (\x21, which is a '!' character)
|
||||||
|
(\0x## is also supported for backwards compatibility)
|
||||||
an escaped octal character with a leading '\0' (\041, which is a '!' character)
|
an escaped octal character with a leading '\0' (\041, which is a '!' character)
|
||||||
a range of any of the above, separated by a dash ('a-z', etc.)
|
a range of any of the above, separated by a dash ('a-z', etc.)
|
||||||
any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.)
|
any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.)
|
||||||
@ -3486,7 +3442,7 @@ def _makeTags(tagStr, xml):
|
|||||||
else:
|
else:
|
||||||
printablesLessRAbrack = "".join( [ c for c in printables if c not in ">" ] )
|
printablesLessRAbrack = "".join( [ c for c in printables if c not in ">" ] )
|
||||||
tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack)
|
tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack)
|
||||||
openTag = Suppress("<") + tagStr + \
|
openTag = Suppress("<") + tagStr("tag") + \
|
||||||
Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \
|
Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \
|
||||||
Optional( Suppress("=") + tagAttrValue ) ))) + \
|
Optional( Suppress("=") + tagAttrValue ) ))) + \
|
||||||
Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
|
Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
|
||||||
@ -3508,19 +3464,21 @@ def makeXMLTags(tagStr):
|
|||||||
|
|
||||||
def withAttribute(*args,**attrDict):
|
def withAttribute(*args,**attrDict):
|
||||||
"""Helper to create a validating parse action to be used with start tags created
|
"""Helper to create a validating parse action to be used with start tags created
|
||||||
with makeXMLTags or makeHTMLTags. Use withAttribute to qualify a starting tag
|
with C{makeXMLTags} or C{makeHTMLTags}. Use C{withAttribute} to qualify a starting tag
|
||||||
with a required attribute value, to avoid false matches on common tags such as
|
with a required attribute value, to avoid false matches on common tags such as
|
||||||
<TD> or <DIV>.
|
C{<TD>} or C{<DIV>}.
|
||||||
|
|
||||||
Call withAttribute with a series of attribute names and values. Specify the list
|
Call C{withAttribute} with a series of attribute names and values. Specify the list
|
||||||
of filter attributes names and values as:
|
of filter attributes names and values as:
|
||||||
- keyword arguments, as in (class="Customer",align="right"), or
|
- keyword arguments, as in C{(align="right")}, or
|
||||||
|
- as an explicit dict with C{**} operator, when an attribute name is also a Python
|
||||||
|
reserved word, as in C{**{"class":"Customer", "align":"right"}}
|
||||||
- a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") )
|
- a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") )
|
||||||
For attribute names with a namespace prefix, you must use the second form. Attribute
|
For attribute names with a namespace prefix, you must use the second form. Attribute
|
||||||
names are matched insensitive to upper/lower case.
|
names are matched insensitive to upper/lower case.
|
||||||
|
|
||||||
To verify that the attribute exists, but without specifying a value, pass
|
To verify that the attribute exists, but without specifying a value, pass
|
||||||
withAttribute.ANY_VALUE as the value.
|
C{withAttribute.ANY_VALUE} as the value.
|
||||||
"""
|
"""
|
||||||
if args:
|
if args:
|
||||||
attrs = args[:]
|
attrs = args[:]
|
||||||
@ -3631,12 +3589,12 @@ def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString.cop
|
|||||||
expression will capture all whitespace-delimited content between delimiters
|
expression will capture all whitespace-delimited content between delimiters
|
||||||
as a list of separate values.
|
as a list of separate values.
|
||||||
|
|
||||||
Use the ignoreExpr argument to define expressions that may contain
|
Use the C{ignoreExpr} argument to define expressions that may contain
|
||||||
opening or closing characters that should not be treated as opening
|
opening or closing characters that should not be treated as opening
|
||||||
or closing characters for nesting, such as quotedString or a comment
|
or closing characters for nesting, such as quotedString or a comment
|
||||||
expression. Specify multiple expressions using an Or or MatchFirst.
|
expression. Specify multiple expressions using an C{L{Or}} or C{L{MatchFirst}}.
|
||||||
The default is quotedString, but if no expressions are to be ignored,
|
The default is L{quotedString}, but if no expressions are to be ignored,
|
||||||
then pass None for this argument.
|
then pass C{None} for this argument.
|
||||||
"""
|
"""
|
||||||
if opener == closer:
|
if opener == closer:
|
||||||
raise ValueError("opening and closing strings cannot be the same")
|
raise ValueError("opening and closing strings cannot be the same")
|
||||||
@ -3683,7 +3641,7 @@ def indentedBlock(blockStatementExpr, indentStack, indent=True):
|
|||||||
the current level; set to False for block of left-most statements
|
the current level; set to False for block of left-most statements
|
||||||
(default=True)
|
(default=True)
|
||||||
|
|
||||||
A valid block must contain at least one blockStatement.
|
A valid block must contain at least one C{blockStatement}.
|
||||||
"""
|
"""
|
||||||
def checkPeerIndent(s,l,t):
|
def checkPeerIndent(s,l,t):
|
||||||
if l >= len(s): return
|
if l >= len(s): return
|
||||||
|
@ -16,11 +16,11 @@ methods :method:`SearchQueryParser.universal_set` and
|
|||||||
If this module is run, it will perform a series of unit tests.
|
If this module is run, it will perform a series of unit tests.
|
||||||
'''
|
'''
|
||||||
|
|
||||||
import sys, operator
|
import sys, operator, weakref
|
||||||
|
|
||||||
from calibre.utils.pyparsing import CaselessKeyword, Group, Forward, \
|
from calibre.utils.pyparsing import (CaselessKeyword, Group, Forward,
|
||||||
CharsNotIn, Suppress, OneOrMore, MatchFirst, CaselessLiteral, \
|
CharsNotIn, Suppress, OneOrMore, MatchFirst, CaselessLiteral,
|
||||||
Optional, NoMatch, ParseException, QuotedString
|
Optional, NoMatch, ParseException, QuotedString)
|
||||||
from calibre.constants import preferred_encoding
|
from calibre.constants import preferred_encoding
|
||||||
from calibre.utils.icu import sort_key
|
from calibre.utils.icu import sort_key
|
||||||
from calibre import prints
|
from calibre import prints
|
||||||
@ -37,11 +37,19 @@ class SavedSearchQueries(object):
|
|||||||
|
|
||||||
def __init__(self, db, _opt_name):
|
def __init__(self, db, _opt_name):
|
||||||
self.opt_name = _opt_name;
|
self.opt_name = _opt_name;
|
||||||
self.db = db
|
|
||||||
if db is not None:
|
if db is not None:
|
||||||
self.queries = db.prefs.get(self.opt_name, {})
|
self.queries = db.prefs.get(self.opt_name, {})
|
||||||
else:
|
else:
|
||||||
self.queries = {}
|
self.queries = {}
|
||||||
|
try:
|
||||||
|
self._db = weakref.ref(db)
|
||||||
|
except:
|
||||||
|
# db could be None
|
||||||
|
self._db = lambda : None
|
||||||
|
|
||||||
|
@property
|
||||||
|
def db(self):
|
||||||
|
return self._db()
|
||||||
|
|
||||||
def force_unicode(self, x):
|
def force_unicode(self, x):
|
||||||
if not isinstance(x, unicode):
|
if not isinstance(x, unicode):
|
||||||
@ -49,21 +57,27 @@ class SavedSearchQueries(object):
|
|||||||
return x
|
return x
|
||||||
|
|
||||||
def add(self, name, value):
|
def add(self, name, value):
|
||||||
self.queries[self.force_unicode(name)] = self.force_unicode(value).strip()
|
db = self.db
|
||||||
self.db.prefs[self.opt_name] = self.queries
|
if db is not None:
|
||||||
|
self.queries[self.force_unicode(name)] = self.force_unicode(value).strip()
|
||||||
|
db.prefs[self.opt_name] = self.queries
|
||||||
|
|
||||||
def lookup(self, name):
|
def lookup(self, name):
|
||||||
return self.queries.get(self.force_unicode(name), None)
|
return self.queries.get(self.force_unicode(name), None)
|
||||||
|
|
||||||
def delete(self, name):
|
def delete(self, name):
|
||||||
self.queries.pop(self.force_unicode(name), False)
|
db = self.db
|
||||||
self.db.prefs[self.opt_name] = self.queries
|
if db is not None:
|
||||||
|
self.queries.pop(self.force_unicode(name), False)
|
||||||
|
db.prefs[self.opt_name] = self.queries
|
||||||
|
|
||||||
def rename(self, old_name, new_name):
|
def rename(self, old_name, new_name):
|
||||||
self.queries[self.force_unicode(new_name)] = \
|
db = self.db
|
||||||
self.queries.get(self.force_unicode(old_name), None)
|
if db is not None:
|
||||||
self.queries.pop(self.force_unicode(old_name), False)
|
self.queries[self.force_unicode(new_name)] = \
|
||||||
self.db.prefs[self.opt_name] = self.queries
|
self.queries.get(self.force_unicode(old_name), None)
|
||||||
|
self.queries.pop(self.force_unicode(old_name), False)
|
||||||
|
db.prefs[self.opt_name] = self.queries
|
||||||
|
|
||||||
def names(self):
|
def names(self):
|
||||||
return sorted(self.queries.keys(),key=sort_key)
|
return sorted(self.queries.keys(),key=sort_key)
|
||||||
|
@ -144,6 +144,18 @@ class BasicNewsRecipe(Recipe):
|
|||||||
#: manually (though manual cleanup will always be superior).
|
#: manually (though manual cleanup will always be superior).
|
||||||
auto_cleanup = False
|
auto_cleanup = False
|
||||||
|
|
||||||
|
#: Specify elements that the auto cleanup algorithm should never remove
|
||||||
|
#: The syntax is a XPath expression. For example::
|
||||||
|
#:
|
||||||
|
#: auto_cleanup_keep = '//div[@id="article-image"]' will keep all divs with
|
||||||
|
#: id="article-image"
|
||||||
|
#: auto_cleanup_keep = '//*[@class="important"]' will keep all elements
|
||||||
|
#: with class="important"
|
||||||
|
#: auto_cleanup_keep = '//div[@id="article-image"]|//span[@class="important"]'
|
||||||
|
#: will keep all divs with id="article-image" and spans
|
||||||
|
#: with class="important"
|
||||||
|
auto_cleanup_keep = None
|
||||||
|
|
||||||
#: Specify any extra :term:`CSS` that should be addded to downloaded :term:`HTML` files
|
#: Specify any extra :term:`CSS` that should be addded to downloaded :term:`HTML` files
|
||||||
#: It will be inserted into `<style>` tags, just before the closing
|
#: It will be inserted into `<style>` tags, just before the closing
|
||||||
#: `</head>` tag thereby overriding all :term:`CSS` except that which is
|
#: `</head>` tag thereby overriding all :term:`CSS` except that which is
|
||||||
@ -552,7 +564,8 @@ class BasicNewsRecipe(Recipe):
|
|||||||
from lxml.html import (fragment_fromstring, tostring,
|
from lxml.html import (fragment_fromstring, tostring,
|
||||||
document_fromstring)
|
document_fromstring)
|
||||||
|
|
||||||
doc = readability.Document(html, self.log, url=url)
|
doc = readability.Document(html, self.log, url=url,
|
||||||
|
keep_elements=self.auto_cleanup_keep)
|
||||||
article_html = doc.summary()
|
article_html = doc.summary()
|
||||||
extracted_title = doc.title()
|
extracted_title = doc.title()
|
||||||
|
|
||||||
|
@ -22,7 +22,7 @@ E = ElementMaker(namespace=NS, nsmap={None:NS})
|
|||||||
|
|
||||||
def iterate_over_builtin_recipe_files():
|
def iterate_over_builtin_recipe_files():
|
||||||
exclude = ['craigslist', 'iht', 'toronto_sun',
|
exclude = ['craigslist', 'iht', 'toronto_sun',
|
||||||
'india_today', 'livemint']
|
'livemint']
|
||||||
d = os.path.dirname
|
d = os.path.dirname
|
||||||
base = os.path.join(d(d(d(d(d(d(os.path.abspath(__file__))))))), 'recipes')
|
base = os.path.join(d(d(d(d(d(d(os.path.abspath(__file__))))))), 'recipes')
|
||||||
for f in os.listdir(base):
|
for f in os.listdir(base):
|
||||||
|
@ -75,7 +75,7 @@ MD5_SESS = "MD5-sess"
|
|||||||
AUTH = "auth"
|
AUTH = "auth"
|
||||||
AUTH_INT = "auth-int"
|
AUTH_INT = "auth-int"
|
||||||
|
|
||||||
SUPPORTED_ALGORITHM = ('md5', MD5, MD5_SESS)
|
SUPPORTED_ALGORITHM = ('md5', MD5, MD5_SESS) # Changed by Kovid
|
||||||
SUPPORTED_QOP = (AUTH, AUTH_INT)
|
SUPPORTED_QOP = (AUTH, AUTH_INT)
|
||||||
|
|
||||||
################################################################################
|
################################################################################
|
||||||
@ -83,7 +83,7 @@ SUPPORTED_QOP = (AUTH, AUTH_INT)
|
|||||||
#
|
#
|
||||||
DIGEST_AUTH_ENCODERS = {
|
DIGEST_AUTH_ENCODERS = {
|
||||||
MD5: lambda val: md5(val).hexdigest(),
|
MD5: lambda val: md5(val).hexdigest(),
|
||||||
'md5': lambda val:md5(val).hexdigest(),
|
'md5': lambda val:md5(val).hexdigest(), # Added by Kovid
|
||||||
MD5_SESS: lambda val: md5(val).hexdigest(),
|
MD5_SESS: lambda val: md5(val).hexdigest(),
|
||||||
# SHA: lambda val: sha(val).hexdigest(),
|
# SHA: lambda val: sha(val).hexdigest(),
|
||||||
}
|
}
|
||||||
@ -225,7 +225,7 @@ def _A1(params, password):
|
|||||||
algorithm = params.get ("algorithm", MD5)
|
algorithm = params.get ("algorithm", MD5)
|
||||||
H = DIGEST_AUTH_ENCODERS[algorithm]
|
H = DIGEST_AUTH_ENCODERS[algorithm]
|
||||||
|
|
||||||
if algorithm in (MD5, 'md5'):
|
if algorithm in (MD5, 'md5'): # Changed by Kovid
|
||||||
# If the "algorithm" directive's value is "MD5" or is
|
# If the "algorithm" directive's value is "MD5" or is
|
||||||
# unspecified, then A1 is:
|
# unspecified, then A1 is:
|
||||||
# A1 = unq(username-value) ":" unq(realm-value) ":" passwd
|
# A1 = unq(username-value) ":" unq(realm-value) ":" passwd
|
||||||
|
@ -33,13 +33,13 @@ missing = object()
|
|||||||
|
|
||||||
class Session(object):
|
class Session(object):
|
||||||
"""A CherryPy dict-like Session object (one per request)."""
|
"""A CherryPy dict-like Session object (one per request)."""
|
||||||
|
|
||||||
__metaclass__ = cherrypy._AttributeDocstrings
|
__metaclass__ = cherrypy._AttributeDocstrings
|
||||||
|
|
||||||
_id = None
|
_id = None
|
||||||
id_observers = None
|
id_observers = None
|
||||||
id_observers__doc = "A list of callbacks to which to pass new id's."
|
id_observers__doc = "A list of callbacks to which to pass new id's."
|
||||||
|
|
||||||
id__doc = "The current session ID."
|
id__doc = "The current session ID."
|
||||||
def _get_id(self):
|
def _get_id(self):
|
||||||
return self._id
|
return self._id
|
||||||
@ -48,33 +48,33 @@ class Session(object):
|
|||||||
for o in self.id_observers:
|
for o in self.id_observers:
|
||||||
o(value)
|
o(value)
|
||||||
id = property(_get_id, _set_id, doc=id__doc)
|
id = property(_get_id, _set_id, doc=id__doc)
|
||||||
|
|
||||||
timeout = 60
|
timeout = 60
|
||||||
timeout__doc = "Number of minutes after which to delete session data."
|
timeout__doc = "Number of minutes after which to delete session data."
|
||||||
|
|
||||||
locked = False
|
locked = False
|
||||||
locked__doc = """
|
locked__doc = """
|
||||||
If True, this session instance has exclusive read/write access
|
If True, this session instance has exclusive read/write access
|
||||||
to session data."""
|
to session data."""
|
||||||
|
|
||||||
loaded = False
|
loaded = False
|
||||||
loaded__doc = """
|
loaded__doc = """
|
||||||
If True, data has been retrieved from storage. This should happen
|
If True, data has been retrieved from storage. This should happen
|
||||||
automatically on the first attempt to access session data."""
|
automatically on the first attempt to access session data."""
|
||||||
|
|
||||||
clean_thread = None
|
clean_thread = None
|
||||||
clean_thread__doc = "Class-level Monitor which calls self.clean_up."
|
clean_thread__doc = "Class-level Monitor which calls self.clean_up."
|
||||||
|
|
||||||
clean_freq = 5
|
clean_freq = 5
|
||||||
clean_freq__doc = "The poll rate for expired session cleanup in minutes."
|
clean_freq__doc = "The poll rate for expired session cleanup in minutes."
|
||||||
|
|
||||||
def __init__(self, id=None, **kwargs):
|
def __init__(self, id=None, **kwargs):
|
||||||
self.id_observers = []
|
self.id_observers = []
|
||||||
self._data = {}
|
self._data = {}
|
||||||
|
|
||||||
for k, v in kwargs.iteritems():
|
for k, v in kwargs.iteritems():
|
||||||
setattr(self, k, v)
|
setattr(self, k, v)
|
||||||
|
|
||||||
if id is None:
|
if id is None:
|
||||||
self.regenerate()
|
self.regenerate()
|
||||||
else:
|
else:
|
||||||
@ -84,30 +84,30 @@ class Session(object):
|
|||||||
# See http://www.cherrypy.org/ticket/709.
|
# See http://www.cherrypy.org/ticket/709.
|
||||||
self.id = None
|
self.id = None
|
||||||
self.regenerate()
|
self.regenerate()
|
||||||
|
|
||||||
def regenerate(self):
|
def regenerate(self):
|
||||||
"""Replace the current session (with a new id)."""
|
"""Replace the current session (with a new id)."""
|
||||||
if self.id is not None:
|
if self.id is not None:
|
||||||
self.delete()
|
self.delete()
|
||||||
|
|
||||||
old_session_was_locked = self.locked
|
old_session_was_locked = self.locked
|
||||||
if old_session_was_locked:
|
if old_session_was_locked:
|
||||||
self.release_lock()
|
self.release_lock()
|
||||||
|
|
||||||
self.id = None
|
self.id = None
|
||||||
while self.id is None:
|
while self.id is None:
|
||||||
self.id = self.generate_id()
|
self.id = self.generate_id()
|
||||||
# Assert that the generated id is not already stored.
|
# Assert that the generated id is not already stored.
|
||||||
if self._exists():
|
if self._exists():
|
||||||
self.id = None
|
self.id = None
|
||||||
|
|
||||||
if old_session_was_locked:
|
if old_session_was_locked:
|
||||||
self.acquire_lock()
|
self.acquire_lock()
|
||||||
|
|
||||||
def clean_up(self):
|
def clean_up(self):
|
||||||
"""Clean up expired sessions."""
|
"""Clean up expired sessions."""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
try:
|
try:
|
||||||
os.urandom(20)
|
os.urandom(20)
|
||||||
except (AttributeError, NotImplementedError):
|
except (AttributeError, NotImplementedError):
|
||||||
@ -119,7 +119,7 @@ class Session(object):
|
|||||||
def generate_id(self):
|
def generate_id(self):
|
||||||
"""Return a new session id."""
|
"""Return a new session id."""
|
||||||
return os.urandom(20).encode('hex')
|
return os.urandom(20).encode('hex')
|
||||||
|
|
||||||
def save(self):
|
def save(self):
|
||||||
"""Save session data."""
|
"""Save session data."""
|
||||||
try:
|
try:
|
||||||
@ -129,12 +129,12 @@ class Session(object):
|
|||||||
t = datetime.timedelta(seconds = self.timeout * 60)
|
t = datetime.timedelta(seconds = self.timeout * 60)
|
||||||
expiration_time = datetime.datetime.now() + t
|
expiration_time = datetime.datetime.now() + t
|
||||||
self._save(expiration_time)
|
self._save(expiration_time)
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
if self.locked:
|
if self.locked:
|
||||||
# Always release the lock if the user didn't release it
|
# Always release the lock if the user didn't release it
|
||||||
self.release_lock()
|
self.release_lock()
|
||||||
|
|
||||||
def load(self):
|
def load(self):
|
||||||
"""Copy stored session data into this session instance."""
|
"""Copy stored session data into this session instance."""
|
||||||
data = self._load()
|
data = self._load()
|
||||||
@ -145,7 +145,7 @@ class Session(object):
|
|||||||
else:
|
else:
|
||||||
self._data = data[0]
|
self._data = data[0]
|
||||||
self.loaded = True
|
self.loaded = True
|
||||||
|
|
||||||
# Stick the clean_thread in the class, not the instance.
|
# Stick the clean_thread in the class, not the instance.
|
||||||
# The instances are created and destroyed per-request.
|
# The instances are created and destroyed per-request.
|
||||||
cls = self.__class__
|
cls = self.__class__
|
||||||
@ -157,23 +157,23 @@ class Session(object):
|
|||||||
t.subscribe()
|
t.subscribe()
|
||||||
cls.clean_thread = t
|
cls.clean_thread = t
|
||||||
t.start()
|
t.start()
|
||||||
|
|
||||||
def delete(self):
|
def delete(self):
|
||||||
"""Delete stored session data."""
|
"""Delete stored session data."""
|
||||||
self._delete()
|
self._delete()
|
||||||
|
|
||||||
def __getitem__(self, key):
|
def __getitem__(self, key):
|
||||||
if not self.loaded: self.load()
|
if not self.loaded: self.load()
|
||||||
return self._data[key]
|
return self._data[key]
|
||||||
|
|
||||||
def __setitem__(self, key, value):
|
def __setitem__(self, key, value):
|
||||||
if not self.loaded: self.load()
|
if not self.loaded: self.load()
|
||||||
self._data[key] = value
|
self._data[key] = value
|
||||||
|
|
||||||
def __delitem__(self, key):
|
def __delitem__(self, key):
|
||||||
if not self.loaded: self.load()
|
if not self.loaded: self.load()
|
||||||
del self._data[key]
|
del self._data[key]
|
||||||
|
|
||||||
def pop(self, key, default=missing):
|
def pop(self, key, default=missing):
|
||||||
"""Remove the specified key and return the corresponding value.
|
"""Remove the specified key and return the corresponding value.
|
||||||
If key is not found, default is returned if given,
|
If key is not found, default is returned if given,
|
||||||
@ -184,46 +184,46 @@ class Session(object):
|
|||||||
return self._data.pop(key)
|
return self._data.pop(key)
|
||||||
else:
|
else:
|
||||||
return self._data.pop(key, default)
|
return self._data.pop(key, default)
|
||||||
|
|
||||||
def __contains__(self, key):
|
def __contains__(self, key):
|
||||||
if not self.loaded: self.load()
|
if not self.loaded: self.load()
|
||||||
return key in self._data
|
return key in self._data
|
||||||
|
|
||||||
def has_key(self, key):
|
def has_key(self, key):
|
||||||
"""D.has_key(k) -> True if D has a key k, else False."""
|
"""D.has_key(k) -> True if D has a key k, else False."""
|
||||||
if not self.loaded: self.load()
|
if not self.loaded: self.load()
|
||||||
return self._data.has_key(key)
|
return self._data.has_key(key)
|
||||||
|
|
||||||
def get(self, key, default=None):
|
def get(self, key, default=None):
|
||||||
"""D.get(k[,d]) -> D[k] if k in D, else d. d defaults to None."""
|
"""D.get(k[,d]) -> D[k] if k in D, else d. d defaults to None."""
|
||||||
if not self.loaded: self.load()
|
if not self.loaded: self.load()
|
||||||
return self._data.get(key, default)
|
return self._data.get(key, default)
|
||||||
|
|
||||||
def update(self, d):
|
def update(self, d):
|
||||||
"""D.update(E) -> None. Update D from E: for k in E: D[k] = E[k]."""
|
"""D.update(E) -> None. Update D from E: for k in E: D[k] = E[k]."""
|
||||||
if not self.loaded: self.load()
|
if not self.loaded: self.load()
|
||||||
self._data.update(d)
|
self._data.update(d)
|
||||||
|
|
||||||
def setdefault(self, key, default=None):
|
def setdefault(self, key, default=None):
|
||||||
"""D.setdefault(k[,d]) -> D.get(k,d), also set D[k]=d if k not in D."""
|
"""D.setdefault(k[,d]) -> D.get(k,d), also set D[k]=d if k not in D."""
|
||||||
if not self.loaded: self.load()
|
if not self.loaded: self.load()
|
||||||
return self._data.setdefault(key, default)
|
return self._data.setdefault(key, default)
|
||||||
|
|
||||||
def clear(self):
|
def clear(self):
|
||||||
"""D.clear() -> None. Remove all items from D."""
|
"""D.clear() -> None. Remove all items from D."""
|
||||||
if not self.loaded: self.load()
|
if not self.loaded: self.load()
|
||||||
self._data.clear()
|
self._data.clear()
|
||||||
|
|
||||||
def keys(self):
|
def keys(self):
|
||||||
"""D.keys() -> list of D's keys."""
|
"""D.keys() -> list of D's keys."""
|
||||||
if not self.loaded: self.load()
|
if not self.loaded: self.load()
|
||||||
return self._data.keys()
|
return self._data.keys()
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
"""D.items() -> list of D's (key, value) pairs, as 2-tuples."""
|
"""D.items() -> list of D's (key, value) pairs, as 2-tuples."""
|
||||||
if not self.loaded: self.load()
|
if not self.loaded: self.load()
|
||||||
return self._data.items()
|
return self._data.items()
|
||||||
|
|
||||||
def values(self):
|
def values(self):
|
||||||
"""D.values() -> list of D's values."""
|
"""D.values() -> list of D's values."""
|
||||||
if not self.loaded: self.load()
|
if not self.loaded: self.load()
|
||||||
@ -231,11 +231,11 @@ class Session(object):
|
|||||||
|
|
||||||
|
|
||||||
class RamSession(Session):
|
class RamSession(Session):
|
||||||
|
|
||||||
# Class-level objects. Don't rebind these!
|
# Class-level objects. Don't rebind these!
|
||||||
cache = {}
|
cache = {}
|
||||||
locks = {}
|
locks = {}
|
||||||
|
|
||||||
def clean_up(self):
|
def clean_up(self):
|
||||||
"""Clean up expired sessions."""
|
"""Clean up expired sessions."""
|
||||||
now = datetime.datetime.now()
|
now = datetime.datetime.now()
|
||||||
@ -249,29 +249,29 @@ class RamSession(Session):
|
|||||||
del self.locks[id]
|
del self.locks[id]
|
||||||
except KeyError:
|
except KeyError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def _exists(self):
|
def _exists(self):
|
||||||
return self.id in self.cache
|
return self.id in self.cache
|
||||||
|
|
||||||
def _load(self):
|
def _load(self):
|
||||||
return self.cache.get(self.id)
|
return self.cache.get(self.id)
|
||||||
|
|
||||||
def _save(self, expiration_time):
|
def _save(self, expiration_time):
|
||||||
self.cache[self.id] = (self._data, expiration_time)
|
self.cache[self.id] = (self._data, expiration_time)
|
||||||
|
|
||||||
def _delete(self):
|
def _delete(self):
|
||||||
del self.cache[self.id]
|
del self.cache[self.id]
|
||||||
|
|
||||||
def acquire_lock(self):
|
def acquire_lock(self):
|
||||||
"""Acquire an exclusive lock on the currently-loaded session data."""
|
"""Acquire an exclusive lock on the currently-loaded session data."""
|
||||||
self.locked = True
|
self.locked = True
|
||||||
self.locks.setdefault(self.id, threading.RLock()).acquire()
|
self.locks.setdefault(self.id, threading.RLock()).acquire()
|
||||||
|
|
||||||
def release_lock(self):
|
def release_lock(self):
|
||||||
"""Release the lock on the currently-loaded session data."""
|
"""Release the lock on the currently-loaded session data."""
|
||||||
self.locks[self.id].release()
|
self.locks[self.id].release()
|
||||||
self.locked = False
|
self.locked = False
|
||||||
|
|
||||||
def __len__(self):
|
def __len__(self):
|
||||||
"""Return the number of active sessions."""
|
"""Return the number of active sessions."""
|
||||||
return len(self.cache)
|
return len(self.cache)
|
||||||
@ -279,32 +279,32 @@ class RamSession(Session):
|
|||||||
|
|
||||||
class FileSession(Session):
|
class FileSession(Session):
|
||||||
"""Implementation of the File backend for sessions
|
"""Implementation of the File backend for sessions
|
||||||
|
|
||||||
storage_path: the folder where session data will be saved. Each session
|
storage_path: the folder where session data will be saved. Each session
|
||||||
will be saved as pickle.dump(data, expiration_time) in its own file;
|
will be saved as pickle.dump(data, expiration_time) in its own file;
|
||||||
the filename will be self.SESSION_PREFIX + self.id.
|
the filename will be self.SESSION_PREFIX + self.id.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
SESSION_PREFIX = 'session-'
|
SESSION_PREFIX = 'session-'
|
||||||
LOCK_SUFFIX = '.lock'
|
LOCK_SUFFIX = '.lock'
|
||||||
|
|
||||||
def __init__(self, id=None, **kwargs):
|
def __init__(self, id=None, **kwargs):
|
||||||
# The 'storage_path' arg is required for file-based sessions.
|
# The 'storage_path' arg is required for file-based sessions.
|
||||||
kwargs['storage_path'] = os.path.abspath(kwargs['storage_path'])
|
kwargs['storage_path'] = os.path.abspath(kwargs['storage_path'])
|
||||||
Session.__init__(self, id=id, **kwargs)
|
Session.__init__(self, id=id, **kwargs)
|
||||||
|
|
||||||
def setup(cls, **kwargs):
|
def setup(cls, **kwargs):
|
||||||
"""Set up the storage system for file-based sessions.
|
"""Set up the storage system for file-based sessions.
|
||||||
|
|
||||||
This should only be called once per process; this will be done
|
This should only be called once per process; this will be done
|
||||||
automatically when using sessions.init (as the built-in Tool does).
|
automatically when using sessions.init (as the built-in Tool does).
|
||||||
"""
|
"""
|
||||||
# The 'storage_path' arg is required for file-based sessions.
|
# The 'storage_path' arg is required for file-based sessions.
|
||||||
kwargs['storage_path'] = os.path.abspath(kwargs['storage_path'])
|
kwargs['storage_path'] = os.path.abspath(kwargs['storage_path'])
|
||||||
|
|
||||||
for k, v in kwargs.iteritems():
|
for k, v in kwargs.iteritems():
|
||||||
setattr(cls, k, v)
|
setattr(cls, k, v)
|
||||||
|
|
||||||
# Warn if any lock files exist at startup.
|
# Warn if any lock files exist at startup.
|
||||||
lockfiles = [fname for fname in os.listdir(cls.storage_path)
|
lockfiles = [fname for fname in os.listdir(cls.storage_path)
|
||||||
if (fname.startswith(cls.SESSION_PREFIX)
|
if (fname.startswith(cls.SESSION_PREFIX)
|
||||||
@ -316,17 +316,17 @@ class FileSession(Session):
|
|||||||
"manually delete the lockfiles found at %r."
|
"manually delete the lockfiles found at %r."
|
||||||
% (len(lockfiles), plural, cls.storage_path))
|
% (len(lockfiles), plural, cls.storage_path))
|
||||||
setup = classmethod(setup)
|
setup = classmethod(setup)
|
||||||
|
|
||||||
def _get_file_path(self):
|
def _get_file_path(self):
|
||||||
f = os.path.join(self.storage_path, self.SESSION_PREFIX + self.id)
|
f = os.path.join(self.storage_path, self.SESSION_PREFIX + self.id)
|
||||||
if not os.path.abspath(f).startswith(self.storage_path):
|
if not os.path.abspath(f).startswith(self.storage_path):
|
||||||
raise cherrypy.HTTPError(400, "Invalid session id in cookie.")
|
raise cherrypy.HTTPError(400, "Invalid session id in cookie.")
|
||||||
return f
|
return f
|
||||||
|
|
||||||
def _exists(self):
|
def _exists(self):
|
||||||
path = self._get_file_path()
|
path = self._get_file_path()
|
||||||
return os.path.exists(path)
|
return os.path.exists(path)
|
||||||
|
|
||||||
def _load(self, path=None):
|
def _load(self, path=None):
|
||||||
if path is None:
|
if path is None:
|
||||||
path = self._get_file_path()
|
path = self._get_file_path()
|
||||||
@ -338,20 +338,20 @@ class FileSession(Session):
|
|||||||
f.close()
|
f.close()
|
||||||
except (IOError, EOFError):
|
except (IOError, EOFError):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def _save(self, expiration_time):
|
def _save(self, expiration_time):
|
||||||
f = open(self._get_file_path(), "wb")
|
f = open(self._get_file_path(), "wb")
|
||||||
try:
|
try:
|
||||||
pickle.dump((self._data, expiration_time), f)
|
pickle.dump((self._data, expiration_time), f)
|
||||||
finally:
|
finally:
|
||||||
f.close()
|
f.close()
|
||||||
|
|
||||||
def _delete(self):
|
def _delete(self):
|
||||||
try:
|
try:
|
||||||
os.unlink(self._get_file_path())
|
os.unlink(self._get_file_path())
|
||||||
except OSError:
|
except OSError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def acquire_lock(self, path=None):
|
def acquire_lock(self, path=None):
|
||||||
"""Acquire an exclusive lock on the currently-loaded session data."""
|
"""Acquire an exclusive lock on the currently-loaded session data."""
|
||||||
if path is None:
|
if path is None:
|
||||||
@ -363,17 +363,17 @@ class FileSession(Session):
|
|||||||
except OSError:
|
except OSError:
|
||||||
time.sleep(0.1)
|
time.sleep(0.1)
|
||||||
else:
|
else:
|
||||||
os.close(lockfd)
|
os.close(lockfd)
|
||||||
break
|
break
|
||||||
self.locked = True
|
self.locked = True
|
||||||
|
|
||||||
def release_lock(self, path=None):
|
def release_lock(self, path=None):
|
||||||
"""Release the lock on the currently-loaded session data."""
|
"""Release the lock on the currently-loaded session data."""
|
||||||
if path is None:
|
if path is None:
|
||||||
path = self._get_file_path()
|
path = self._get_file_path()
|
||||||
os.unlink(path + self.LOCK_SUFFIX)
|
os.unlink(path + self.LOCK_SUFFIX)
|
||||||
self.locked = False
|
self.locked = False
|
||||||
|
|
||||||
def clean_up(self):
|
def clean_up(self):
|
||||||
"""Clean up expired sessions."""
|
"""Clean up expired sessions."""
|
||||||
now = datetime.datetime.now()
|
now = datetime.datetime.now()
|
||||||
@ -395,7 +395,7 @@ class FileSession(Session):
|
|||||||
os.unlink(path)
|
os.unlink(path)
|
||||||
finally:
|
finally:
|
||||||
self.release_lock(path)
|
self.release_lock(path)
|
||||||
|
|
||||||
def __len__(self):
|
def __len__(self):
|
||||||
"""Return the number of active sessions."""
|
"""Return the number of active sessions."""
|
||||||
return len([fname for fname in os.listdir(self.storage_path)
|
return len([fname for fname in os.listdir(self.storage_path)
|
||||||
@ -412,38 +412,38 @@ class PostgresqlSession(Session):
|
|||||||
data text,
|
data text,
|
||||||
expiration_time timestamp
|
expiration_time timestamp
|
||||||
)
|
)
|
||||||
|
|
||||||
You must provide your own get_db function.
|
You must provide your own get_db function.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, id=None, **kwargs):
|
def __init__(self, id=None, **kwargs):
|
||||||
Session.__init__(self, id, **kwargs)
|
Session.__init__(self, id, **kwargs)
|
||||||
self.cursor = self.db.cursor()
|
self.cursor = self.db.cursor()
|
||||||
|
|
||||||
def setup(cls, **kwargs):
|
def setup(cls, **kwargs):
|
||||||
"""Set up the storage system for Postgres-based sessions.
|
"""Set up the storage system for Postgres-based sessions.
|
||||||
|
|
||||||
This should only be called once per process; this will be done
|
This should only be called once per process; this will be done
|
||||||
automatically when using sessions.init (as the built-in Tool does).
|
automatically when using sessions.init (as the built-in Tool does).
|
||||||
"""
|
"""
|
||||||
for k, v in kwargs.iteritems():
|
for k, v in kwargs.iteritems():
|
||||||
setattr(cls, k, v)
|
setattr(cls, k, v)
|
||||||
|
|
||||||
self.db = self.get_db()
|
self.db = self.get_db()
|
||||||
setup = classmethod(setup)
|
setup = classmethod(setup)
|
||||||
|
|
||||||
def __del__(self):
|
def __del__(self):
|
||||||
if self.cursor:
|
if self.cursor:
|
||||||
self.cursor.close()
|
self.cursor.close()
|
||||||
self.db.commit()
|
self.db.commit()
|
||||||
|
|
||||||
def _exists(self):
|
def _exists(self):
|
||||||
# Select session data from table
|
# Select session data from table
|
||||||
self.cursor.execute('select data, expiration_time from session '
|
self.cursor.execute('select data, expiration_time from session '
|
||||||
'where id=%s', (self.id,))
|
'where id=%s', (self.id,))
|
||||||
rows = self.cursor.fetchall()
|
rows = self.cursor.fetchall()
|
||||||
return bool(rows)
|
return bool(rows)
|
||||||
|
|
||||||
def _load(self):
|
def _load(self):
|
||||||
# Select session data from table
|
# Select session data from table
|
||||||
self.cursor.execute('select data, expiration_time from session '
|
self.cursor.execute('select data, expiration_time from session '
|
||||||
@ -451,34 +451,34 @@ class PostgresqlSession(Session):
|
|||||||
rows = self.cursor.fetchall()
|
rows = self.cursor.fetchall()
|
||||||
if not rows:
|
if not rows:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
pickled_data, expiration_time = rows[0]
|
pickled_data, expiration_time = rows[0]
|
||||||
data = pickle.loads(pickled_data)
|
data = pickle.loads(pickled_data)
|
||||||
return data, expiration_time
|
return data, expiration_time
|
||||||
|
|
||||||
def _save(self, expiration_time):
|
def _save(self, expiration_time):
|
||||||
pickled_data = pickle.dumps(self._data)
|
pickled_data = pickle.dumps(self._data)
|
||||||
self.cursor.execute('update session set data = %s, '
|
self.cursor.execute('update session set data = %s, '
|
||||||
'expiration_time = %s where id = %s',
|
'expiration_time = %s where id = %s',
|
||||||
(pickled_data, expiration_time, self.id))
|
(pickled_data, expiration_time, self.id))
|
||||||
|
|
||||||
def _delete(self):
|
def _delete(self):
|
||||||
self.cursor.execute('delete from session where id=%s', (self.id,))
|
self.cursor.execute('delete from session where id=%s', (self.id,))
|
||||||
|
|
||||||
def acquire_lock(self):
|
def acquire_lock(self):
|
||||||
"""Acquire an exclusive lock on the currently-loaded session data."""
|
"""Acquire an exclusive lock on the currently-loaded session data."""
|
||||||
# We use the "for update" clause to lock the row
|
# We use the "for update" clause to lock the row
|
||||||
self.locked = True
|
self.locked = True
|
||||||
self.cursor.execute('select id from session where id=%s for update',
|
self.cursor.execute('select id from session where id=%s for update',
|
||||||
(self.id,))
|
(self.id,))
|
||||||
|
|
||||||
def release_lock(self):
|
def release_lock(self):
|
||||||
"""Release the lock on the currently-loaded session data."""
|
"""Release the lock on the currently-loaded session data."""
|
||||||
# We just close the cursor and that will remove the lock
|
# We just close the cursor and that will remove the lock
|
||||||
# introduced by the "for update" clause
|
# introduced by the "for update" clause
|
||||||
self.cursor.close()
|
self.cursor.close()
|
||||||
self.locked = False
|
self.locked = False
|
||||||
|
|
||||||
def clean_up(self):
|
def clean_up(self):
|
||||||
"""Clean up expired sessions."""
|
"""Clean up expired sessions."""
|
||||||
self.cursor.execute('delete from session where expiration_time < %s',
|
self.cursor.execute('delete from session where expiration_time < %s',
|
||||||
@ -486,43 +486,43 @@ class PostgresqlSession(Session):
|
|||||||
|
|
||||||
|
|
||||||
class MemcachedSession(Session):
|
class MemcachedSession(Session):
|
||||||
|
|
||||||
# The most popular memcached client for Python isn't thread-safe.
|
# The most popular memcached client for Python isn't thread-safe.
|
||||||
# Wrap all .get and .set operations in a single lock.
|
# Wrap all .get and .set operations in a single lock.
|
||||||
mc_lock = threading.RLock()
|
mc_lock = threading.RLock()
|
||||||
|
|
||||||
# This is a seperate set of locks per session id.
|
# This is a seperate set of locks per session id.
|
||||||
locks = {}
|
locks = {}
|
||||||
|
|
||||||
servers = ['127.0.0.1:11211']
|
servers = ['127.0.0.1:11211']
|
||||||
|
|
||||||
def setup(cls, **kwargs):
|
def setup(cls, **kwargs):
|
||||||
"""Set up the storage system for memcached-based sessions.
|
"""Set up the storage system for memcached-based sessions.
|
||||||
|
|
||||||
This should only be called once per process; this will be done
|
This should only be called once per process; this will be done
|
||||||
automatically when using sessions.init (as the built-in Tool does).
|
automatically when using sessions.init (as the built-in Tool does).
|
||||||
"""
|
"""
|
||||||
for k, v in kwargs.iteritems():
|
for k, v in kwargs.iteritems():
|
||||||
setattr(cls, k, v)
|
setattr(cls, k, v)
|
||||||
|
|
||||||
import memcache
|
import memcache
|
||||||
cls.cache = memcache.Client(cls.servers)
|
cls.cache = memcache.Client(cls.servers)
|
||||||
setup = classmethod(setup)
|
setup = classmethod(setup)
|
||||||
|
|
||||||
def _exists(self):
|
def _exists(self):
|
||||||
self.mc_lock.acquire()
|
self.mc_lock.acquire()
|
||||||
try:
|
try:
|
||||||
return bool(self.cache.get(self.id))
|
return bool(self.cache.get(self.id))
|
||||||
finally:
|
finally:
|
||||||
self.mc_lock.release()
|
self.mc_lock.release()
|
||||||
|
|
||||||
def _load(self):
|
def _load(self):
|
||||||
self.mc_lock.acquire()
|
self.mc_lock.acquire()
|
||||||
try:
|
try:
|
||||||
return self.cache.get(self.id)
|
return self.cache.get(self.id)
|
||||||
finally:
|
finally:
|
||||||
self.mc_lock.release()
|
self.mc_lock.release()
|
||||||
|
|
||||||
def _save(self, expiration_time):
|
def _save(self, expiration_time):
|
||||||
# Send the expiration time as "Unix time" (seconds since 1/1/1970)
|
# Send the expiration time as "Unix time" (seconds since 1/1/1970)
|
||||||
td = int(time.mktime(expiration_time.timetuple()))
|
td = int(time.mktime(expiration_time.timetuple()))
|
||||||
@ -532,20 +532,20 @@ class MemcachedSession(Session):
|
|||||||
raise AssertionError("Session data for id %r not set." % self.id)
|
raise AssertionError("Session data for id %r not set." % self.id)
|
||||||
finally:
|
finally:
|
||||||
self.mc_lock.release()
|
self.mc_lock.release()
|
||||||
|
|
||||||
def _delete(self):
|
def _delete(self):
|
||||||
self.cache.delete(self.id)
|
self.cache.delete(self.id)
|
||||||
|
|
||||||
def acquire_lock(self):
|
def acquire_lock(self):
|
||||||
"""Acquire an exclusive lock on the currently-loaded session data."""
|
"""Acquire an exclusive lock on the currently-loaded session data."""
|
||||||
self.locked = True
|
self.locked = True
|
||||||
self.locks.setdefault(self.id, threading.RLock()).acquire()
|
self.locks.setdefault(self.id, threading.RLock()).acquire()
|
||||||
|
|
||||||
def release_lock(self):
|
def release_lock(self):
|
||||||
"""Release the lock on the currently-loaded session data."""
|
"""Release the lock on the currently-loaded session data."""
|
||||||
self.locks[self.id].release()
|
self.locks[self.id].release()
|
||||||
self.locked = False
|
self.locked = False
|
||||||
|
|
||||||
def __len__(self):
|
def __len__(self):
|
||||||
"""Return the number of active sessions."""
|
"""Return the number of active sessions."""
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
@ -555,15 +555,15 @@ class MemcachedSession(Session):
|
|||||||
|
|
||||||
def save():
|
def save():
|
||||||
"""Save any changed session data."""
|
"""Save any changed session data."""
|
||||||
|
|
||||||
if not hasattr(cherrypy.serving, "session"):
|
if not hasattr(cherrypy.serving, "session"):
|
||||||
return
|
return
|
||||||
|
|
||||||
# Guard against running twice
|
# Guard against running twice
|
||||||
if hasattr(cherrypy.request, "_sessionsaved"):
|
if hasattr(cherrypy.request, "_sessionsaved"):
|
||||||
return
|
return
|
||||||
cherrypy.request._sessionsaved = True
|
cherrypy.request._sessionsaved = True
|
||||||
|
|
||||||
if cherrypy.response.stream:
|
if cherrypy.response.stream:
|
||||||
# If the body is being streamed, we have to save the data
|
# If the body is being streamed, we have to save the data
|
||||||
# *after* the response has been written out
|
# *after* the response has been written out
|
||||||
@ -589,7 +589,7 @@ close.priority = 90
|
|||||||
def init(storage_type='ram', path=None, path_header=None, name='session_id',
|
def init(storage_type='ram', path=None, path_header=None, name='session_id',
|
||||||
timeout=60, domain=None, secure=False, clean_freq=5, **kwargs):
|
timeout=60, domain=None, secure=False, clean_freq=5, **kwargs):
|
||||||
"""Initialize session object (using cookies).
|
"""Initialize session object (using cookies).
|
||||||
|
|
||||||
storage_type: one of 'ram', 'file', 'postgresql'. This will be used
|
storage_type: one of 'ram', 'file', 'postgresql'. This will be used
|
||||||
to look up the corresponding class in cherrypy.lib.sessions
|
to look up the corresponding class in cherrypy.lib.sessions
|
||||||
globals. For example, 'file' will use the FileSession class.
|
globals. For example, 'file' will use the FileSession class.
|
||||||
@ -603,31 +603,31 @@ def init(storage_type='ram', path=None, path_header=None, name='session_id',
|
|||||||
secure: if False (the default) the cookie 'secure' value will not
|
secure: if False (the default) the cookie 'secure' value will not
|
||||||
be set. If True, the cookie 'secure' value will be set (to 1).
|
be set. If True, the cookie 'secure' value will be set (to 1).
|
||||||
clean_freq (minutes): the poll rate for expired session cleanup.
|
clean_freq (minutes): the poll rate for expired session cleanup.
|
||||||
|
|
||||||
Any additional kwargs will be bound to the new Session instance,
|
Any additional kwargs will be bound to the new Session instance,
|
||||||
and may be specific to the storage type. See the subclass of Session
|
and may be specific to the storage type. See the subclass of Session
|
||||||
you're using for more information.
|
you're using for more information.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
request = cherrypy.request
|
request = cherrypy.request
|
||||||
|
|
||||||
# Guard against running twice
|
# Guard against running twice
|
||||||
if hasattr(request, "_session_init_flag"):
|
if hasattr(request, "_session_init_flag"):
|
||||||
return
|
return
|
||||||
request._session_init_flag = True
|
request._session_init_flag = True
|
||||||
|
|
||||||
# Check if request came with a session ID
|
# Check if request came with a session ID
|
||||||
id = None
|
id = None
|
||||||
if name in request.cookie:
|
if name in request.cookie:
|
||||||
id = request.cookie[name].value
|
id = request.cookie[name].value
|
||||||
|
|
||||||
# Find the storage class and call setup (first time only).
|
# Find the storage class and call setup (first time only).
|
||||||
storage_class = storage_type.title() + 'Session'
|
storage_class = storage_type.title() + 'Session'
|
||||||
storage_class = globals()[storage_class]
|
storage_class = globals()[storage_class]
|
||||||
if not hasattr(cherrypy, "session"):
|
if not hasattr(cherrypy, "session"):
|
||||||
if hasattr(storage_class, "setup"):
|
if hasattr(storage_class, "setup"):
|
||||||
storage_class.setup(**kwargs)
|
storage_class.setup(**kwargs)
|
||||||
|
|
||||||
# Create and attach a new Session instance to cherrypy.serving.
|
# Create and attach a new Session instance to cherrypy.serving.
|
||||||
# It will possess a reference to (and lock, and lazily load)
|
# It will possess a reference to (and lock, and lazily load)
|
||||||
# the requested session data.
|
# the requested session data.
|
||||||
@ -638,11 +638,11 @@ def init(storage_type='ram', path=None, path_header=None, name='session_id',
|
|||||||
"""Update the cookie every time the session id changes."""
|
"""Update the cookie every time the session id changes."""
|
||||||
cherrypy.response.cookie[name] = id
|
cherrypy.response.cookie[name] = id
|
||||||
sess.id_observers.append(update_cookie)
|
sess.id_observers.append(update_cookie)
|
||||||
|
|
||||||
# Create cherrypy.session which will proxy to cherrypy.serving.session
|
# Create cherrypy.session which will proxy to cherrypy.serving.session
|
||||||
if not hasattr(cherrypy, "session"):
|
if not hasattr(cherrypy, "session"):
|
||||||
cherrypy.session = cherrypy._ThreadLocalProxy('session')
|
cherrypy.session = cherrypy._ThreadLocalProxy('session')
|
||||||
|
|
||||||
set_response_cookie(path=path, path_header=path_header, name=name,
|
set_response_cookie(path=path, path_header=path_header, name=name,
|
||||||
timeout=timeout, domain=domain, secure=secure)
|
timeout=timeout, domain=domain, secure=secure)
|
||||||
|
|
||||||
@ -650,7 +650,7 @@ def init(storage_type='ram', path=None, path_header=None, name='session_id',
|
|||||||
def set_response_cookie(path=None, path_header=None, name='session_id',
|
def set_response_cookie(path=None, path_header=None, name='session_id',
|
||||||
timeout=60, domain=None, secure=False):
|
timeout=60, domain=None, secure=False):
|
||||||
"""Set a response cookie for the client.
|
"""Set a response cookie for the client.
|
||||||
|
|
||||||
path: the 'path' value to stick in the response cookie metadata.
|
path: the 'path' value to stick in the response cookie metadata.
|
||||||
path_header: if 'path' is None (the default), then the response
|
path_header: if 'path' is None (the default), then the response
|
||||||
cookie 'path' will be pulled from request.headers[path_header].
|
cookie 'path' will be pulled from request.headers[path_header].
|
||||||
@ -665,14 +665,15 @@ def set_response_cookie(path=None, path_header=None, name='session_id',
|
|||||||
cookie[name] = cherrypy.serving.session.id
|
cookie[name] = cherrypy.serving.session.id
|
||||||
cookie[name]['path'] = (path or cherrypy.request.headers.get(path_header)
|
cookie[name]['path'] = (path or cherrypy.request.headers.get(path_header)
|
||||||
or '/')
|
or '/')
|
||||||
|
|
||||||
# We'd like to use the "max-age" param as indicated in
|
# We'd like to use the "max-age" param as indicated in
|
||||||
# http://www.faqs.org/rfcs/rfc2109.html but IE doesn't
|
# http://www.faqs.org/rfcs/rfc2109.html but IE doesn't
|
||||||
# save it to disk and the session is lost if people close
|
# save it to disk and the session is lost if people close
|
||||||
# the browser. So we have to use the old "expires" ... sigh ...
|
# the browser. So we have to use the old "expires" ... sigh ...
|
||||||
## cookie[name]['max-age'] = timeout * 60
|
## cookie[name]['max-age'] = timeout * 60
|
||||||
if timeout:
|
if False and timeout: # Changed by Kovid, we want the user to have to
|
||||||
cookie[name]['expires'] = http.HTTPDate(time.time() + (timeout * 60))
|
# re-authenticate on browser restart
|
||||||
|
cookie[name]['expires'] = http.HTTPDate(time.time() + timeout)
|
||||||
if domain is not None:
|
if domain is not None:
|
||||||
cookie[name]['domain'] = domain
|
cookie[name]['domain'] = domain
|
||||||
if secure:
|
if secure:
|
||||||
|
@ -241,10 +241,10 @@ def wait_for_free_port(host, port):
|
|||||||
for trial in xrange(50):
|
for trial in xrange(50):
|
||||||
try:
|
try:
|
||||||
# we are expecting a free port, so reduce the timeout
|
# we are expecting a free port, so reduce the timeout
|
||||||
check_port(host, port, timeout=0.2)
|
check_port(host, port, timeout=0.2) # Changed by Kovid
|
||||||
except IOError:
|
except IOError:
|
||||||
# Give the old server thread time to free the port.
|
# Give the old server thread time to free the port.
|
||||||
time.sleep(0.2)
|
time.sleep(0.2) # Changed by Kovid
|
||||||
else:
|
else:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user