mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
merge from trunk
This commit is contained in:
commit
e303babf89
43
resources/recipes/le_journal.recipe
Normal file
43
resources/recipes/le_journal.recipe
Normal file
@ -0,0 +1,43 @@
|
||||
__author__ = ' (lrfurtado@yahoo.com.br)'
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class LeJournalDeMontrealRecipe(BasicNewsRecipe):
|
||||
|
||||
title = u'Le Journal de Montreal'
|
||||
description = u'Le Journal de Montreal'
|
||||
__author__ = 'Luciano Furtado'
|
||||
language = 'fr'
|
||||
|
||||
oldest_article = 7
|
||||
use_embedded_content=0
|
||||
max_articles_per_feed = 15
|
||||
|
||||
remove_tags = [
|
||||
dict(name='ul',attrs={'id':'mainNav'}),
|
||||
dict(name='div',attrs={'id':'boxPolitique'}),
|
||||
dict(name='div',attrs={'id':'boxScoop'}),
|
||||
dict(name='div',attrs={'id':'DossierSpec'}),
|
||||
dict(name='div',attrs={'id':'channelBoxes'}),
|
||||
dict(name='div',attrs={'id':'sectionBoxes'}),
|
||||
dict(name='div',attrs={'id':'header'}),
|
||||
dict(name='div',attrs={'id':'footer'}),
|
||||
dict(name='div',attrs={'id':'navbarCanoe_container'}),
|
||||
dict(name='div',attrs={'id':'popularCanoe'}),
|
||||
dict(name='div',attrs={'id':'textAds'}),
|
||||
dict(name='div',attrs={'id':'24heures'}),
|
||||
dict(name='div',attrs={'class':'bottomBox clear'}),
|
||||
dict(name='div',attrs={'class':'articleControls thin'}),
|
||||
]
|
||||
|
||||
|
||||
feeds = [
|
||||
(u'Actualites',
|
||||
u'http://www.canoe.com/rss/feed/nouvelles/ljm_actualites.xml'),
|
||||
(u'Arts et spectacle',
|
||||
u'http://www.canoe.com/rss/feed/nouvelles/ljm_arts.xml'),
|
||||
(u'Sports',
|
||||
u'http://www.canoe.com/rss/feed/nouvelles/ljm_sports.xml'),
|
||||
(u'Chroniques',
|
||||
u'http://www.canoe.com/rss/feed/nouvelles/ljm_chroniques.xml'),
|
||||
]
|
@ -114,20 +114,22 @@ class Dehyphenator(object):
|
||||
retain hyphens.
|
||||
'''
|
||||
|
||||
def __init__(self):
|
||||
# Add common suffixes to the regex below to increase the likelihood of a match -
|
||||
# don't add suffixes which are also complete words, such as 'able' or 'sex'
|
||||
self.removesuffixes = re.compile(r"((ed)?ly|('e)?s|a?(t|s)ion(s|al(ly)?)?|ings?|(i)?ous|(i|a)ty|(it)?ies|ive|gence|istic|(e|a)nce|ment(s)?|ism|ated|(e|u)ct(ed)?|ed|(i|ed)?ness|(e|a)ncy|ble|ier|al|ex)$", re.IGNORECASE)
|
||||
# remove prefixes if the prefix was not already the point of hyphenation
|
||||
self.prefixes = re.compile(r'^(un|in|ex)$', re.IGNORECASE)
|
||||
self.removeprefix = re.compile(r'^(un|in|ex)', re.IGNORECASE)
|
||||
|
||||
def dehyphenate(self, match):
|
||||
firsthalf = match.group('firstpart')
|
||||
secondhalf = match.group('secondpart')
|
||||
hyphenated = str(firsthalf) + "-" + str(secondhalf)
|
||||
dehyphenated = str(firsthalf) + str(secondhalf)
|
||||
# Add common suffixes to the regex below to increase the likelihood of a match -
|
||||
# don't add suffixes which are also complete words, such as 'able' or 'sex'
|
||||
removesuffixes = re.compile(r"((ed)?ly|('e)?s|a?(t|s)ion(s|al(ly)?)?|ings?|(i)?ous|(i|a)ty|(it)?ies|ive|gence|istic|(e|a)nce|ment(s)?|ism|ated|(e|u)ct(ed)?|ed|(i|ed)?ness|(e|a)ncy|ble|ier|al|ex)$", re.IGNORECASE)
|
||||
lookupword = removesuffixes.sub('', dehyphenated)
|
||||
# remove prefixes if the prefix was not already the point of hyphenation
|
||||
prefixes = re.compile(r'^(un|in|ex)$', re.IGNORECASE)
|
||||
removeprefix = re.compile(r'^(un|in|ex)', re.IGNORECASE)
|
||||
if prefixes.match(firsthalf) is None:
|
||||
lookupword = removeprefix.sub('', lookupword)
|
||||
lookupword = self.removesuffixes.sub('', dehyphenated)
|
||||
if self.prefixes.match(firsthalf) is None:
|
||||
lookupword = self.removeprefix.sub('', lookupword)
|
||||
booklookup = re.compile(u'%s' % lookupword, re.IGNORECASE)
|
||||
#print "lookup word is: "+str(lookupword)+", orig is: " + str(hyphenated)
|
||||
match = booklookup.search(self.html)
|
||||
|
@ -184,7 +184,7 @@ class ContentServer(object):
|
||||
if path and os.path.exists(path):
|
||||
updated = fromtimestamp(os.stat(path).st_mtime)
|
||||
cherrypy.response.headers['Last-Modified'] = self.last_modified(updated)
|
||||
return fmt.read()
|
||||
return fmt
|
||||
# }}}
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user