Fix handling of '

This commit is contained in:
Kovid Goyal 2007-08-20 00:50:55 +00:00
parent 7679166dbe
commit d5bd948404
2 changed files with 4 additions and 4 deletions

View File

@ -53,7 +53,7 @@ class HTMLConverter(object):
replaced_entities = [ 'amp', 'lt', 'gt' , 'ldquo', 'rdquo', 'lsquo', 'rsquo'] replaced_entities = [ 'amp', 'lt', 'gt' , 'ldquo', 'rdquo', 'lsquo', 'rsquo']
patterns = [ re.compile('&'+i+';') for i in replaced_entities ] patterns = [ re.compile('&'+i+';') for i in replaced_entities ]
targets = [ unichr(name2codepoint[i]) for i in replaced_entities ] targets = [ unichr(name2codepoint[i]) for i in replaced_entities ]
ENTITY_RULES = zip(patterns, targets) ENTITY_RULES = zip(patterns, targets) + [(re.compile('''), "'")]
MARKUP_MASSAGE = [ MARKUP_MASSAGE = [

View File

@ -16,14 +16,14 @@
import os, time, calendar, operator import os, time, calendar, operator
from libprs500 import iswindows from libprs500 import iswindows
from libprs500.ebooks.BeautifulSoup import BeautifulStoneSoup, BeautifulSoup from libprs500.ebooks.BeautifulSoup import BeautifulStoneSoup
def parse_feeds(feeds, browser, print_version, max_articles_per_feed=10): def parse_feeds(feeds, browser, print_version, max_articles_per_feed=10):
articles = {} articles = {}
for title, url in feeds: for title, url in feeds:
src = browser.open(url).read() src = browser.open(url).read()
articles[title] = [] articles[title] = []
soup = BeautifulStoneSoup(src, convertEntities=BeautifulSoup.HTML_ENTITIES) soup = BeautifulStoneSoup(src)
for item in soup.findAll('item'): for item in soup.findAll('item'):
try: try:
pubdate = item.find('pubdate').string pubdate = item.find('pubdate').string