mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
News download: Handle HTML entities in article titles
This commit is contained in:
parent
8e248482aa
commit
af8f3b56ce
@ -5,10 +5,11 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
|||||||
'''
|
'''
|
||||||
Contains the logic for parsing feeds.
|
Contains the logic for parsing feeds.
|
||||||
'''
|
'''
|
||||||
import time, logging, traceback, copy
|
import time, logging, traceback, copy, re
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
from calibre.web.feeds.feedparser import parse
|
from calibre.web.feeds.feedparser import parse
|
||||||
|
from calibre import entity_to_unicode
|
||||||
from lxml import html
|
from lxml import html
|
||||||
|
|
||||||
class Article(object):
|
class Article(object):
|
||||||
@ -19,6 +20,12 @@ class Article(object):
|
|||||||
self.downloaded = False
|
self.downloaded = False
|
||||||
self.id = id
|
self.id = id
|
||||||
self.title = title.strip() if title else title
|
self.title = title.strip() if title else title
|
||||||
|
try:
|
||||||
|
self.title = re.sub(r'&(\S+);',
|
||||||
|
entity_to_unicode, self.title)
|
||||||
|
print 11111, repr(self.title)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
self.url = url
|
self.url = url
|
||||||
self.summary = summary
|
self.summary = summary
|
||||||
if summary and not isinstance(summary, unicode):
|
if summary and not isinstance(summary, unicode):
|
||||||
@ -37,6 +44,7 @@ class Article(object):
|
|||||||
self.date = published
|
self.date = published
|
||||||
self.utctime = datetime(*self.date[:6])
|
self.utctime = datetime(*self.date[:6])
|
||||||
self.localtime = self.utctime + self.time_offset
|
self.localtime = self.utctime + self.time_offset
|
||||||
|
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return \
|
return \
|
||||||
@ -91,7 +99,8 @@ class Feed(object):
|
|||||||
if len(self.articles) >= max_articles_per_feed:
|
if len(self.articles) >= max_articles_per_feed:
|
||||||
break
|
break
|
||||||
self.parse_article(item)
|
self.parse_article(item)
|
||||||
|
|
||||||
|
|
||||||
def populate_from_preparsed_feed(self, title, articles, oldest_article=7,
|
def populate_from_preparsed_feed(self, title, articles, oldest_article=7,
|
||||||
max_articles_per_feed=100):
|
max_articles_per_feed=100):
|
||||||
self.title = title if title else _('Unknown feed')
|
self.title = title if title else _('Unknown feed')
|
||||||
|
Loading…
x
Reference in New Issue
Block a user