mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Fix handling of '
This commit is contained in:
parent
7679166dbe
commit
d5bd948404
@ -53,7 +53,7 @@ class HTMLConverter(object):
|
|||||||
replaced_entities = [ 'amp', 'lt', 'gt' , 'ldquo', 'rdquo', 'lsquo', 'rsquo']
|
replaced_entities = [ 'amp', 'lt', 'gt' , 'ldquo', 'rdquo', 'lsquo', 'rsquo']
|
||||||
patterns = [ re.compile('&'+i+';') for i in replaced_entities ]
|
patterns = [ re.compile('&'+i+';') for i in replaced_entities ]
|
||||||
targets = [ unichr(name2codepoint[i]) for i in replaced_entities ]
|
targets = [ unichr(name2codepoint[i]) for i in replaced_entities ]
|
||||||
ENTITY_RULES = zip(patterns, targets)
|
ENTITY_RULES = zip(patterns, targets) + [(re.compile('''), "'")]
|
||||||
|
|
||||||
|
|
||||||
MARKUP_MASSAGE = [
|
MARKUP_MASSAGE = [
|
||||||
|
@ -16,14 +16,14 @@
|
|||||||
import os, time, calendar, operator
|
import os, time, calendar, operator
|
||||||
|
|
||||||
from libprs500 import iswindows
|
from libprs500 import iswindows
|
||||||
from libprs500.ebooks.BeautifulSoup import BeautifulStoneSoup, BeautifulSoup
|
from libprs500.ebooks.BeautifulSoup import BeautifulStoneSoup
|
||||||
|
|
||||||
def parse_feeds(feeds, browser, print_version, max_articles_per_feed=10):
|
def parse_feeds(feeds, browser, print_version, max_articles_per_feed=10):
|
||||||
articles = {}
|
articles = {}
|
||||||
for title, url in feeds:
|
for title, url in feeds:
|
||||||
src = browser.open(url).read()
|
src = browser.open(url).read()
|
||||||
articles[title] = []
|
articles[title] = []
|
||||||
soup = BeautifulStoneSoup(src, convertEntities=BeautifulSoup.HTML_ENTITIES)
|
soup = BeautifulStoneSoup(src)
|
||||||
for item in soup.findAll('item'):
|
for item in soup.findAll('item'):
|
||||||
try:
|
try:
|
||||||
pubdate = item.find('pubdate').string
|
pubdate = item.find('pubdate').string
|
||||||
|
Loading…
x
Reference in New Issue
Block a user