mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Pull from trunk
This commit is contained in:
commit
9445f488c2
@ -99,7 +99,8 @@ def xml_to_unicode(raw, verbose=False, strip_encoding_pats=False,
|
|||||||
try:
|
try:
|
||||||
raw = raw.decode(encoding, 'replace')
|
raw = raw.decode(encoding, 'replace')
|
||||||
except LookupError:
|
except LookupError:
|
||||||
raw = raw.decode('utf-8', 'replace')
|
encoding = 'utf-8'
|
||||||
|
raw = raw.decode(encoding, 'replace')
|
||||||
|
|
||||||
if strip_encoding_pats:
|
if strip_encoding_pats:
|
||||||
raw = strip_encoding_declarations(raw)
|
raw = strip_encoding_declarations(raw)
|
||||||
|
@ -197,6 +197,9 @@ class HTMLProcessor(Processor, Rationalizer):
|
|||||||
if not tag.text and not tag.get('src', False):
|
if not tag.text and not tag.get('src', False):
|
||||||
tag.getparent().remove(tag)
|
tag.getparent().remove(tag)
|
||||||
|
|
||||||
|
for tag in self.root.xpath('//form'):
|
||||||
|
tag.getparent().remove(tag)
|
||||||
|
|
||||||
if self.opts.linearize_tables:
|
if self.opts.linearize_tables:
|
||||||
for tag in self.root.xpath('//table | //tr | //th | //td'):
|
for tag in self.root.xpath('//table | //tr | //th | //td'):
|
||||||
tag.tag = 'div'
|
tag.tag = 'div'
|
||||||
|
@ -99,6 +99,10 @@ class HTMLConverter(object):
|
|||||||
# Replace common line break patterns with line breaks
|
# Replace common line break patterns with line breaks
|
||||||
(re.compile(r'<p>( |\s)*</p>', re.IGNORECASE), lambda m: '<br />'),
|
(re.compile(r'<p>( |\s)*</p>', re.IGNORECASE), lambda m: '<br />'),
|
||||||
|
|
||||||
|
# Replace empty headers with line breaks
|
||||||
|
(re.compile(r'<h[0-5]?>( |\s)*</h[0-5]?>',
|
||||||
|
re.IGNORECASE), lambda m: '<br />'),
|
||||||
|
|
||||||
# Replace entities
|
# Replace entities
|
||||||
(re.compile(ur'&(\S+?);'), partial(entity_to_unicode,
|
(re.compile(ur'&(\S+?);'), partial(entity_to_unicode,
|
||||||
exceptions=['lt', 'gt', 'amp'])),
|
exceptions=['lt', 'gt', 'amp'])),
|
||||||
|
@ -530,7 +530,7 @@ class LRFMetaFile(object):
|
|||||||
""" See L{file.write} """
|
""" See L{file.write} """
|
||||||
self._file.write(val)
|
self._file.write(val)
|
||||||
|
|
||||||
def objects(self):
|
def _objects(self):
|
||||||
self._file.seek(self.object_index_offset)
|
self._file.seek(self.object_index_offset)
|
||||||
c = self.number_of_objects
|
c = self.number_of_objects
|
||||||
while c > 0:
|
while c > 0:
|
||||||
@ -543,7 +543,7 @@ class LRFMetaFile(object):
|
|||||||
def get_objects_by_type(self, type):
|
def get_objects_by_type(self, type):
|
||||||
from calibre.ebooks.lrf.tags import Tag
|
from calibre.ebooks.lrf.tags import Tag
|
||||||
objects = []
|
objects = []
|
||||||
for id, offset, size in self.objects():
|
for id, offset, size in self._objects():
|
||||||
self._file.seek(offset)
|
self._file.seek(offset)
|
||||||
tag = Tag(self._file)
|
tag = Tag(self._file)
|
||||||
if tag.id == 0xF500:
|
if tag.id == 0xF500:
|
||||||
@ -554,7 +554,7 @@ class LRFMetaFile(object):
|
|||||||
|
|
||||||
def get_object_by_id(self, tid):
|
def get_object_by_id(self, tid):
|
||||||
from calibre.ebooks.lrf.tags import Tag
|
from calibre.ebooks.lrf.tags import Tag
|
||||||
for id, offset, size in self.objects():
|
for id, offset, size in self._objects():
|
||||||
self._file.seek(offset)
|
self._file.seek(offset)
|
||||||
tag = Tag(self._file)
|
tag = Tag(self._file)
|
||||||
if tag.id == 0xF500:
|
if tag.id == 0xF500:
|
||||||
|
@ -19,14 +19,22 @@ def get_metadata(stream):
|
|||||||
for item in opf.iterguide():
|
for item in opf.iterguide():
|
||||||
if 'cover' not in item.get('type', '').lower():
|
if 'cover' not in item.get('type', '').lower():
|
||||||
continue
|
continue
|
||||||
|
ctype = item.get('type')
|
||||||
href = item.get('href', '')
|
href = item.get('href', '')
|
||||||
candidates = [href, href.replace('&', '%26')]
|
candidates = [href, href.replace('&', '%26')]
|
||||||
for item in litfile.manifest.values():
|
for item in litfile.manifest.values():
|
||||||
if item.path in candidates:
|
if item.path in candidates:
|
||||||
covers.append(item.internal)
|
try:
|
||||||
|
covers.append((litfile.get_file('/data/'+item.internal),
|
||||||
|
ctype))
|
||||||
|
except:
|
||||||
|
pass
|
||||||
break
|
break
|
||||||
covers = [litfile.get_file('/data/' + i) for i in covers]
|
covers.sort(cmp=lambda x, y:cmp(len(x[0]), len(y[0])), reverse=True)
|
||||||
covers.sort(cmp=lambda x, y:cmp(len(x), len(y)))
|
idx = 0
|
||||||
mi.cover_data = ('jpg', covers[-1])
|
if len(covers) > 1:
|
||||||
|
if covers[1][1] == covers[1][0]+'-standard':
|
||||||
|
idx = 1
|
||||||
|
mi.cover_data = ('jpg', covers[idx][0])
|
||||||
return mi
|
return mi
|
||||||
|
|
||||||
|
@ -312,7 +312,7 @@ class MobiReader(object):
|
|||||||
mobi_version = self.book_header.mobi_version
|
mobi_version = self.book_header.mobi_version
|
||||||
for i, tag in enumerate(root.iter(etree.Element)):
|
for i, tag in enumerate(root.iter(etree.Element)):
|
||||||
if tag.tag in ('country-region', 'place', 'placetype', 'placename',
|
if tag.tag in ('country-region', 'place', 'placetype', 'placename',
|
||||||
'state', 'city'):
|
'state', 'city', 'street', 'address'):
|
||||||
tag.tag = 'span'
|
tag.tag = 'span'
|
||||||
for key in tag.attrib.keys():
|
for key in tag.attrib.keys():
|
||||||
tag.attrib.pop(key)
|
tag.attrib.pop(key)
|
||||||
|
@ -196,7 +196,7 @@ class ConfigDialog(QDialog, Ui_Dialog):
|
|||||||
self.language.addItem(language_codes[lang], QVariant(lang))
|
self.language.addItem(language_codes[lang], QVariant(lang))
|
||||||
else:
|
else:
|
||||||
lang = 'en'
|
lang = 'en'
|
||||||
self.language.addItem('English', 'en')
|
self.language.addItem('English', QVariant('en'))
|
||||||
items = [(l, language_codes[l]) for l in translations.keys() \
|
items = [(l, language_codes[l]) for l in translations.keys() \
|
||||||
if l != lang]
|
if l != lang]
|
||||||
if lang != 'en':
|
if lang != 'en':
|
||||||
|
@ -1406,7 +1406,15 @@ class Main(MainWindow, Ui_MainWindow):
|
|||||||
dir = os.path.expanduser('~/Library')
|
dir = os.path.expanduser('~/Library')
|
||||||
self.library_path = os.path.abspath(dir)
|
self.library_path = os.path.abspath(dir)
|
||||||
if not os.path.exists(self.library_path):
|
if not os.path.exists(self.library_path):
|
||||||
os.makedirs(self.library_path)
|
try:
|
||||||
|
os.makedirs(self.library_path)
|
||||||
|
except:
|
||||||
|
self.library_path = os.path.expanduser('~/Library')
|
||||||
|
error_dialog(self, _('Invalid library location'),
|
||||||
|
_('Could not access %s. Using %s as the library.')%
|
||||||
|
(repr(self.library_path), repr(self.library_path))
|
||||||
|
).exec_()
|
||||||
|
os.makedirs(self.library_path)
|
||||||
|
|
||||||
|
|
||||||
def read_settings(self):
|
def read_settings(self):
|
||||||
|
@ -196,7 +196,7 @@ class Server(object):
|
|||||||
|
|
||||||
def calculate_month_trend(self, days=31):
|
def calculate_month_trend(self, days=31):
|
||||||
stats = self.get_slice(date.today()-timedelta(days=days-1), date.today())
|
stats = self.get_slice(date.today()-timedelta(days=days-1), date.today())
|
||||||
fig = plt.figure(2, (12, 4), 96)#, facecolor, edgecolor, frameon, FigureClass)
|
fig = plt.figure(2, (10, 4), 96)#, facecolor, edgecolor, frameon, FigureClass)
|
||||||
fig.clear()
|
fig.clear()
|
||||||
ax = fig.add_subplot(111)
|
ax = fig.add_subplot(111)
|
||||||
x = list(range(days-1, -1, -1))
|
x = list(range(days-1, -1, -1))
|
||||||
@ -216,7 +216,7 @@ Donors per day: %(dpd).2f
|
|||||||
ad=stats.average_deviation,
|
ad=stats.average_deviation,
|
||||||
dpd=len(stats.totals)/float(stats.period.days),
|
dpd=len(stats.totals)/float(stats.period.days),
|
||||||
)
|
)
|
||||||
text = ax.annotate(text, (0.6, 0.65), textcoords='axes fraction')
|
text = ax.annotate(text, (0.5, 0.65), textcoords='axes fraction')
|
||||||
fig.savefig(self.MONTH_TRENDS)
|
fig.savefig(self.MONTH_TRENDS)
|
||||||
|
|
||||||
def calculate_trend(self):
|
def calculate_trend(self):
|
||||||
|
@ -156,7 +156,6 @@ class Feed(object):
|
|||||||
content = None
|
content = None
|
||||||
if not link and not content:
|
if not link and not content:
|
||||||
return
|
return
|
||||||
|
|
||||||
article = Article(id, title, link, description, published, content)
|
article = Article(id, title, link, description, published, content)
|
||||||
delta = datetime.utcnow() - article.utctime
|
delta = datetime.utcnow() - article.utctime
|
||||||
if delta.days*24*3600 + delta.seconds <= 24*3600*self.oldest_article:
|
if delta.days*24*3600 + delta.seconds <= 24*3600*self.oldest_article:
|
||||||
|
@ -1012,6 +1012,7 @@ class BasicNewsRecipe(object):
|
|||||||
parsed_feeds.append(feed)
|
parsed_feeds.append(feed)
|
||||||
self.log_exception(msg)
|
self.log_exception(msg)
|
||||||
|
|
||||||
|
|
||||||
return parsed_feeds
|
return parsed_feeds
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
@ -33,7 +33,7 @@ recipe_modules = ['recipe_' + r for r in (
|
|||||||
'la_republica', 'physics_today', 'chicago_tribune', 'e_novine',
|
'la_republica', 'physics_today', 'chicago_tribune', 'e_novine',
|
||||||
'al_jazeera', 'winsupersite', 'borba', 'courrierinternational',
|
'al_jazeera', 'winsupersite', 'borba', 'courrierinternational',
|
||||||
'lamujerdemivida', 'soldiers', 'theonion', 'news_times',
|
'lamujerdemivida', 'soldiers', 'theonion', 'news_times',
|
||||||
'el_universal', 'mediapart', 'wikinews_en', 'ecogeek',
|
'el_universal', 'mediapart', 'wikinews_en', 'ecogeek', 'daily_mail',
|
||||||
)]
|
)]
|
||||||
|
|
||||||
import re, imp, inspect, time, os
|
import re, imp, inspect, time, os
|
||||||
|
33
src/calibre/web/feeds/recipes/recipe_daily_mail.py
Normal file
33
src/calibre/web/feeds/recipes/recipe_daily_mail.py
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class TheDailyMail(BasicNewsRecipe):
|
||||||
|
title = u'The Daily Mail'
|
||||||
|
oldest_article = 2
|
||||||
|
language = _('English')
|
||||||
|
author = 'RufusA'
|
||||||
|
simultaneous_downloads= 1
|
||||||
|
max_articles_per_feed = 50
|
||||||
|
|
||||||
|
extra_css = 'h1 {text-align: left;}'
|
||||||
|
|
||||||
|
remove_tags = [ dict(name='ul', attrs={'class':'article-icons-links'}) ]
|
||||||
|
remove_tags_after = dict(name='h3', attrs={'class':'social-links-title'})
|
||||||
|
remove_tags_before = dict(name='div', attrs={'id':'content'})
|
||||||
|
no_stylesheets = True
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Home', u'http://www.dailymail.co.uk/home/index.rss'),
|
||||||
|
(u'News', u'http://www.dailymail.co.uk/news/index.rss'),
|
||||||
|
(u'Sport', u'http://www.dailymail.co.uk/sport/index.rss'),
|
||||||
|
(u'TV and Showbiz', u'http://www.dailymail.co.uk/tvshowbiz/index.rss'),
|
||||||
|
(u'Femail', u'http://www.dailymail.co.uk/femail/index.rss'),
|
||||||
|
(u'Health', u'http://www.dailymail.co.uk/health/index.rss'),
|
||||||
|
(u'Science and Technology', u'http://www.dailymail.co.uk/sciencetech/index.rss'),
|
||||||
|
(u'Money', u'http://www.dailymail.co.uk/money/index.rss'),
|
||||||
|
(u'Property', u'http://www.dailymail.co.uk/property/index.rss'),
|
||||||
|
(u'Motoring', u'http://www.dailymail.co.uk/motoring/index.rss'),
|
||||||
|
(u'Travel', u'http://www.dailymail.co.uk/travel/index.rss')]
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
main = url.partition('?')[0]
|
||||||
|
return main + '?printingPage=true'
|
@ -3,6 +3,7 @@ __copyright__ = '2008, Derry FitzGerald'
|
|||||||
'''
|
'''
|
||||||
iht.com
|
iht.com
|
||||||
'''
|
'''
|
||||||
|
import re
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
from calibre.ptempfile import PersistentTemporaryFile
|
from calibre.ptempfile import PersistentTemporaryFile
|
||||||
@ -16,7 +17,12 @@ class InternationalHeraldTribune(BasicNewsRecipe):
|
|||||||
max_articles_per_feed = 10
|
max_articles_per_feed = 10
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
|
|
||||||
remove_tags = [dict(name='div', attrs={'class':'footer'})]
|
remove_tags = [dict(name='div', attrs={'class':'footer'}),
|
||||||
|
dict(name=['form'])]
|
||||||
|
preprocess_regexps = [
|
||||||
|
(re.compile(r'<!-- webtrends.*', re.DOTALL),
|
||||||
|
lambda m:'</body></html>')
|
||||||
|
]
|
||||||
extra_css = '.headline {font-size: x-large;} \n .fact { padding-top: 10pt }'
|
extra_css = '.headline {font-size: x-large;} \n .fact { padding-top: 10pt }'
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
|
Loading…
x
Reference in New Issue
Block a user