diff --git a/src/calibre/ebooks/chardet/__init__.py b/src/calibre/ebooks/chardet/__init__.py
index af6d724883..971ac9bc9a 100644
--- a/src/calibre/ebooks/chardet/__init__.py
+++ b/src/calibre/ebooks/chardet/__init__.py
@@ -99,7 +99,8 @@ def xml_to_unicode(raw, verbose=False, strip_encoding_pats=False,
try:
raw = raw.decode(encoding, 'replace')
except LookupError:
- raw = raw.decode('utf-8', 'replace')
+ encoding = 'utf-8'
+ raw = raw.decode(encoding, 'replace')
if strip_encoding_pats:
raw = strip_encoding_declarations(raw)
diff --git a/src/calibre/ebooks/epub/from_html.py b/src/calibre/ebooks/epub/from_html.py
index ffe402538f..47d278a2b6 100644
--- a/src/calibre/ebooks/epub/from_html.py
+++ b/src/calibre/ebooks/epub/from_html.py
@@ -197,6 +197,9 @@ class HTMLProcessor(Processor, Rationalizer):
if not tag.text and not tag.get('src', False):
tag.getparent().remove(tag)
+ for tag in self.root.xpath('//form'):
+ tag.getparent().remove(tag)
+
if self.opts.linearize_tables:
for tag in self.root.xpath('//table | //tr | //th | //td'):
tag.tag = 'div'
diff --git a/src/calibre/ebooks/lrf/html/convert_from.py b/src/calibre/ebooks/lrf/html/convert_from.py
index 056666b301..9ec4857126 100644
--- a/src/calibre/ebooks/lrf/html/convert_from.py
+++ b/src/calibre/ebooks/lrf/html/convert_from.py
@@ -99,6 +99,10 @@ class HTMLConverter(object):
# Replace common line break patterns with line breaks
(re.compile(r'
( |\s)*
', re.IGNORECASE), lambda m: '
'),
+ # Replace empty headers with line breaks
+ (re.compile(r'( |\s)*',
+ re.IGNORECASE), lambda m: '
'),
+
# Replace entities
(re.compile(ur'&(\S+?);'), partial(entity_to_unicode,
exceptions=['lt', 'gt', 'amp'])),
diff --git a/src/calibre/ebooks/lrf/meta.py b/src/calibre/ebooks/lrf/meta.py
index 322835f470..6ec87892d6 100644
--- a/src/calibre/ebooks/lrf/meta.py
+++ b/src/calibre/ebooks/lrf/meta.py
@@ -530,7 +530,7 @@ class LRFMetaFile(object):
""" See L{file.write} """
self._file.write(val)
- def objects(self):
+ def _objects(self):
self._file.seek(self.object_index_offset)
c = self.number_of_objects
while c > 0:
@@ -543,7 +543,7 @@ class LRFMetaFile(object):
def get_objects_by_type(self, type):
from calibre.ebooks.lrf.tags import Tag
objects = []
- for id, offset, size in self.objects():
+ for id, offset, size in self._objects():
self._file.seek(offset)
tag = Tag(self._file)
if tag.id == 0xF500:
@@ -554,7 +554,7 @@ class LRFMetaFile(object):
def get_object_by_id(self, tid):
from calibre.ebooks.lrf.tags import Tag
- for id, offset, size in self.objects():
+ for id, offset, size in self._objects():
self._file.seek(offset)
tag = Tag(self._file)
if tag.id == 0xF500:
diff --git a/src/calibre/ebooks/metadata/lit.py b/src/calibre/ebooks/metadata/lit.py
index 7b3c873b38..071111e0f7 100644
--- a/src/calibre/ebooks/metadata/lit.py
+++ b/src/calibre/ebooks/metadata/lit.py
@@ -19,14 +19,22 @@ def get_metadata(stream):
for item in opf.iterguide():
if 'cover' not in item.get('type', '').lower():
continue
+ ctype = item.get('type')
href = item.get('href', '')
candidates = [href, href.replace('&', '%26')]
for item in litfile.manifest.values():
if item.path in candidates:
- covers.append(item.internal)
+ try:
+ covers.append((litfile.get_file('/data/'+item.internal),
+ ctype))
+ except:
+ pass
break
- covers = [litfile.get_file('/data/' + i) for i in covers]
- covers.sort(cmp=lambda x, y:cmp(len(x), len(y)))
- mi.cover_data = ('jpg', covers[-1])
+ covers.sort(cmp=lambda x, y:cmp(len(x[0]), len(y[0])), reverse=True)
+ idx = 0
+ if len(covers) > 1:
+ if covers[1][1] == covers[1][0]+'-standard':
+ idx = 1
+ mi.cover_data = ('jpg', covers[idx][0])
return mi
diff --git a/src/calibre/ebooks/mobi/reader.py b/src/calibre/ebooks/mobi/reader.py
index 7708dc768a..85057017a6 100644
--- a/src/calibre/ebooks/mobi/reader.py
+++ b/src/calibre/ebooks/mobi/reader.py
@@ -312,7 +312,7 @@ class MobiReader(object):
mobi_version = self.book_header.mobi_version
for i, tag in enumerate(root.iter(etree.Element)):
if tag.tag in ('country-region', 'place', 'placetype', 'placename',
- 'state', 'city'):
+ 'state', 'city', 'street', 'address'):
tag.tag = 'span'
for key in tag.attrib.keys():
tag.attrib.pop(key)
diff --git a/src/calibre/gui2/dialogs/config.py b/src/calibre/gui2/dialogs/config.py
index 5353f24544..9958ce53fa 100644
--- a/src/calibre/gui2/dialogs/config.py
+++ b/src/calibre/gui2/dialogs/config.py
@@ -196,7 +196,7 @@ class ConfigDialog(QDialog, Ui_Dialog):
self.language.addItem(language_codes[lang], QVariant(lang))
else:
lang = 'en'
- self.language.addItem('English', 'en')
+ self.language.addItem('English', QVariant('en'))
items = [(l, language_codes[l]) for l in translations.keys() \
if l != lang]
if lang != 'en':
diff --git a/src/calibre/gui2/main.py b/src/calibre/gui2/main.py
index 163a9d8bd0..4ecfc08f58 100644
--- a/src/calibre/gui2/main.py
+++ b/src/calibre/gui2/main.py
@@ -1406,7 +1406,15 @@ class Main(MainWindow, Ui_MainWindow):
dir = os.path.expanduser('~/Library')
self.library_path = os.path.abspath(dir)
if not os.path.exists(self.library_path):
- os.makedirs(self.library_path)
+ try:
+ os.makedirs(self.library_path)
+ except:
+ self.library_path = os.path.expanduser('~/Library')
+ error_dialog(self, _('Invalid library location'),
+ _('Could not access %s. Using %s as the library.')%
+ (repr(self.library_path), repr(self.library_path))
+ ).exec_()
+ os.makedirs(self.library_path)
def read_settings(self):
diff --git a/src/calibre/trac/donations/server.py b/src/calibre/trac/donations/server.py
index 8e7a096353..24174db801 100644
--- a/src/calibre/trac/donations/server.py
+++ b/src/calibre/trac/donations/server.py
@@ -196,7 +196,7 @@ class Server(object):
def calculate_month_trend(self, days=31):
stats = self.get_slice(date.today()-timedelta(days=days-1), date.today())
- fig = plt.figure(2, (12, 4), 96)#, facecolor, edgecolor, frameon, FigureClass)
+ fig = plt.figure(2, (10, 4), 96)#, facecolor, edgecolor, frameon, FigureClass)
fig.clear()
ax = fig.add_subplot(111)
x = list(range(days-1, -1, -1))
@@ -216,7 +216,7 @@ Donors per day: %(dpd).2f
ad=stats.average_deviation,
dpd=len(stats.totals)/float(stats.period.days),
)
- text = ax.annotate(text, (0.6, 0.65), textcoords='axes fraction')
+ text = ax.annotate(text, (0.5, 0.65), textcoords='axes fraction')
fig.savefig(self.MONTH_TRENDS)
def calculate_trend(self):
diff --git a/src/calibre/web/feeds/__init__.py b/src/calibre/web/feeds/__init__.py
index 3f0ec414a2..4a0f6b47f7 100644
--- a/src/calibre/web/feeds/__init__.py
+++ b/src/calibre/web/feeds/__init__.py
@@ -98,7 +98,7 @@ class Feed(object):
if len(self.articles) >= max_articles_per_feed:
break
self.parse_article(item)
-
+
def populate_from_preparsed_feed(self, title, articles, oldest_article=7,
max_articles_per_feed=100):
@@ -156,7 +156,6 @@ class Feed(object):
content = None
if not link and not content:
return
-
article = Article(id, title, link, description, published, content)
delta = datetime.utcnow() - article.utctime
if delta.days*24*3600 + delta.seconds <= 24*3600*self.oldest_article:
diff --git a/src/calibre/web/feeds/news.py b/src/calibre/web/feeds/news.py
index 7d61cead5b..bcc3cb050d 100644
--- a/src/calibre/web/feeds/news.py
+++ b/src/calibre/web/feeds/news.py
@@ -1011,7 +1011,8 @@ class BasicNewsRecipe(object):
feed.description = unicode(err)
parsed_feeds.append(feed)
self.log_exception(msg)
-
+
+
return parsed_feeds
@classmethod
diff --git a/src/calibre/web/feeds/recipes/__init__.py b/src/calibre/web/feeds/recipes/__init__.py
index b2c18b26a8..793d5cf45d 100644
--- a/src/calibre/web/feeds/recipes/__init__.py
+++ b/src/calibre/web/feeds/recipes/__init__.py
@@ -33,7 +33,7 @@ recipe_modules = ['recipe_' + r for r in (
'la_republica', 'physics_today', 'chicago_tribune', 'e_novine',
'al_jazeera', 'winsupersite', 'borba', 'courrierinternational',
'lamujerdemivida', 'soldiers', 'theonion', 'news_times',
- 'el_universal', 'mediapart', 'wikinews_en', 'ecogeek',
+ 'el_universal', 'mediapart', 'wikinews_en', 'ecogeek', 'daily_mail',
)]
import re, imp, inspect, time, os
diff --git a/src/calibre/web/feeds/recipes/recipe_daily_mail.py b/src/calibre/web/feeds/recipes/recipe_daily_mail.py
new file mode 100644
index 0000000000..c64e328bf2
--- /dev/null
+++ b/src/calibre/web/feeds/recipes/recipe_daily_mail.py
@@ -0,0 +1,33 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class TheDailyMail(BasicNewsRecipe):
+ title = u'The Daily Mail'
+ oldest_article = 2
+ language = _('English')
+ author = 'RufusA'
+ simultaneous_downloads= 1
+ max_articles_per_feed = 50
+
+ extra_css = 'h1 {text-align: left;}'
+
+ remove_tags = [ dict(name='ul', attrs={'class':'article-icons-links'}) ]
+ remove_tags_after = dict(name='h3', attrs={'class':'social-links-title'})
+ remove_tags_before = dict(name='div', attrs={'id':'content'})
+ no_stylesheets = True
+
+ feeds = [
+ (u'Home', u'http://www.dailymail.co.uk/home/index.rss'),
+ (u'News', u'http://www.dailymail.co.uk/news/index.rss'),
+ (u'Sport', u'http://www.dailymail.co.uk/sport/index.rss'),
+ (u'TV and Showbiz', u'http://www.dailymail.co.uk/tvshowbiz/index.rss'),
+ (u'Femail', u'http://www.dailymail.co.uk/femail/index.rss'),
+ (u'Health', u'http://www.dailymail.co.uk/health/index.rss'),
+ (u'Science and Technology', u'http://www.dailymail.co.uk/sciencetech/index.rss'),
+ (u'Money', u'http://www.dailymail.co.uk/money/index.rss'),
+ (u'Property', u'http://www.dailymail.co.uk/property/index.rss'),
+ (u'Motoring', u'http://www.dailymail.co.uk/motoring/index.rss'),
+ (u'Travel', u'http://www.dailymail.co.uk/travel/index.rss')]
+
+ def print_version(self, url):
+ main = url.partition('?')[0]
+ return main + '?printingPage=true'
diff --git a/src/calibre/web/feeds/recipes/recipe_iht.py b/src/calibre/web/feeds/recipes/recipe_iht.py
index c30be70dea..1bee27d061 100644
--- a/src/calibre/web/feeds/recipes/recipe_iht.py
+++ b/src/calibre/web/feeds/recipes/recipe_iht.py
@@ -3,6 +3,7 @@ __copyright__ = '2008, Derry FitzGerald'
'''
iht.com
'''
+import re
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ptempfile import PersistentTemporaryFile
@@ -16,7 +17,12 @@ class InternationalHeraldTribune(BasicNewsRecipe):
max_articles_per_feed = 10
no_stylesheets = True
- remove_tags = [dict(name='div', attrs={'class':'footer'})]
+ remove_tags = [dict(name='div', attrs={'class':'footer'}),
+ dict(name=['form'])]
+ preprocess_regexps = [
+ (re.compile(r'