diff --git a/src/calibre/ebooks/chardet/__init__.py b/src/calibre/ebooks/chardet/__init__.py index af6d724883..971ac9bc9a 100644 --- a/src/calibre/ebooks/chardet/__init__.py +++ b/src/calibre/ebooks/chardet/__init__.py @@ -99,7 +99,8 @@ def xml_to_unicode(raw, verbose=False, strip_encoding_pats=False, try: raw = raw.decode(encoding, 'replace') except LookupError: - raw = raw.decode('utf-8', 'replace') + encoding = 'utf-8' + raw = raw.decode(encoding, 'replace') if strip_encoding_pats: raw = strip_encoding_declarations(raw) diff --git a/src/calibre/ebooks/epub/from_html.py b/src/calibre/ebooks/epub/from_html.py index ffe402538f..47d278a2b6 100644 --- a/src/calibre/ebooks/epub/from_html.py +++ b/src/calibre/ebooks/epub/from_html.py @@ -197,6 +197,9 @@ class HTMLProcessor(Processor, Rationalizer): if not tag.text and not tag.get('src', False): tag.getparent().remove(tag) + for tag in self.root.xpath('//form'): + tag.getparent().remove(tag) + if self.opts.linearize_tables: for tag in self.root.xpath('//table | //tr | //th | //td'): tag.tag = 'div' diff --git a/src/calibre/ebooks/lrf/html/convert_from.py b/src/calibre/ebooks/lrf/html/convert_from.py index 056666b301..9ec4857126 100644 --- a/src/calibre/ebooks/lrf/html/convert_from.py +++ b/src/calibre/ebooks/lrf/html/convert_from.py @@ -99,6 +99,10 @@ class HTMLConverter(object): # Replace common line break patterns with line breaks (re.compile(r'

( |\s)*

', re.IGNORECASE), lambda m: '
'), + # Replace empty headers with line breaks + (re.compile(r'( |\s)*', + re.IGNORECASE), lambda m: '
'), + # Replace entities (re.compile(ur'&(\S+?);'), partial(entity_to_unicode, exceptions=['lt', 'gt', 'amp'])), diff --git a/src/calibre/ebooks/lrf/meta.py b/src/calibre/ebooks/lrf/meta.py index 322835f470..6ec87892d6 100644 --- a/src/calibre/ebooks/lrf/meta.py +++ b/src/calibre/ebooks/lrf/meta.py @@ -530,7 +530,7 @@ class LRFMetaFile(object): """ See L{file.write} """ self._file.write(val) - def objects(self): + def _objects(self): self._file.seek(self.object_index_offset) c = self.number_of_objects while c > 0: @@ -543,7 +543,7 @@ class LRFMetaFile(object): def get_objects_by_type(self, type): from calibre.ebooks.lrf.tags import Tag objects = [] - for id, offset, size in self.objects(): + for id, offset, size in self._objects(): self._file.seek(offset) tag = Tag(self._file) if tag.id == 0xF500: @@ -554,7 +554,7 @@ class LRFMetaFile(object): def get_object_by_id(self, tid): from calibre.ebooks.lrf.tags import Tag - for id, offset, size in self.objects(): + for id, offset, size in self._objects(): self._file.seek(offset) tag = Tag(self._file) if tag.id == 0xF500: diff --git a/src/calibre/ebooks/metadata/lit.py b/src/calibre/ebooks/metadata/lit.py index 7b3c873b38..071111e0f7 100644 --- a/src/calibre/ebooks/metadata/lit.py +++ b/src/calibre/ebooks/metadata/lit.py @@ -19,14 +19,22 @@ def get_metadata(stream): for item in opf.iterguide(): if 'cover' not in item.get('type', '').lower(): continue + ctype = item.get('type') href = item.get('href', '') candidates = [href, href.replace('&', '%26')] for item in litfile.manifest.values(): if item.path in candidates: - covers.append(item.internal) + try: + covers.append((litfile.get_file('/data/'+item.internal), + ctype)) + except: + pass break - covers = [litfile.get_file('/data/' + i) for i in covers] - covers.sort(cmp=lambda x, y:cmp(len(x), len(y))) - mi.cover_data = ('jpg', covers[-1]) + covers.sort(cmp=lambda x, y:cmp(len(x[0]), len(y[0])), reverse=True) + idx = 0 + if len(covers) > 1: + if covers[1][1] == covers[1][0]+'-standard': + idx = 1 + mi.cover_data = ('jpg', covers[idx][0]) return mi diff --git a/src/calibre/ebooks/mobi/reader.py b/src/calibre/ebooks/mobi/reader.py index 7708dc768a..85057017a6 100644 --- a/src/calibre/ebooks/mobi/reader.py +++ b/src/calibre/ebooks/mobi/reader.py @@ -312,7 +312,7 @@ class MobiReader(object): mobi_version = self.book_header.mobi_version for i, tag in enumerate(root.iter(etree.Element)): if tag.tag in ('country-region', 'place', 'placetype', 'placename', - 'state', 'city'): + 'state', 'city', 'street', 'address'): tag.tag = 'span' for key in tag.attrib.keys(): tag.attrib.pop(key) diff --git a/src/calibre/gui2/dialogs/config.py b/src/calibre/gui2/dialogs/config.py index 5353f24544..9958ce53fa 100644 --- a/src/calibre/gui2/dialogs/config.py +++ b/src/calibre/gui2/dialogs/config.py @@ -196,7 +196,7 @@ class ConfigDialog(QDialog, Ui_Dialog): self.language.addItem(language_codes[lang], QVariant(lang)) else: lang = 'en' - self.language.addItem('English', 'en') + self.language.addItem('English', QVariant('en')) items = [(l, language_codes[l]) for l in translations.keys() \ if l != lang] if lang != 'en': diff --git a/src/calibre/gui2/main.py b/src/calibre/gui2/main.py index 163a9d8bd0..4ecfc08f58 100644 --- a/src/calibre/gui2/main.py +++ b/src/calibre/gui2/main.py @@ -1406,7 +1406,15 @@ class Main(MainWindow, Ui_MainWindow): dir = os.path.expanduser('~/Library') self.library_path = os.path.abspath(dir) if not os.path.exists(self.library_path): - os.makedirs(self.library_path) + try: + os.makedirs(self.library_path) + except: + self.library_path = os.path.expanduser('~/Library') + error_dialog(self, _('Invalid library location'), + _('Could not access %s. Using %s as the library.')% + (repr(self.library_path), repr(self.library_path)) + ).exec_() + os.makedirs(self.library_path) def read_settings(self): diff --git a/src/calibre/trac/donations/server.py b/src/calibre/trac/donations/server.py index 8e7a096353..24174db801 100644 --- a/src/calibre/trac/donations/server.py +++ b/src/calibre/trac/donations/server.py @@ -196,7 +196,7 @@ class Server(object): def calculate_month_trend(self, days=31): stats = self.get_slice(date.today()-timedelta(days=days-1), date.today()) - fig = plt.figure(2, (12, 4), 96)#, facecolor, edgecolor, frameon, FigureClass) + fig = plt.figure(2, (10, 4), 96)#, facecolor, edgecolor, frameon, FigureClass) fig.clear() ax = fig.add_subplot(111) x = list(range(days-1, -1, -1)) @@ -216,7 +216,7 @@ Donors per day: %(dpd).2f ad=stats.average_deviation, dpd=len(stats.totals)/float(stats.period.days), ) - text = ax.annotate(text, (0.6, 0.65), textcoords='axes fraction') + text = ax.annotate(text, (0.5, 0.65), textcoords='axes fraction') fig.savefig(self.MONTH_TRENDS) def calculate_trend(self): diff --git a/src/calibre/web/feeds/__init__.py b/src/calibre/web/feeds/__init__.py index 3f0ec414a2..4a0f6b47f7 100644 --- a/src/calibre/web/feeds/__init__.py +++ b/src/calibre/web/feeds/__init__.py @@ -98,7 +98,7 @@ class Feed(object): if len(self.articles) >= max_articles_per_feed: break self.parse_article(item) - + def populate_from_preparsed_feed(self, title, articles, oldest_article=7, max_articles_per_feed=100): @@ -156,7 +156,6 @@ class Feed(object): content = None if not link and not content: return - article = Article(id, title, link, description, published, content) delta = datetime.utcnow() - article.utctime if delta.days*24*3600 + delta.seconds <= 24*3600*self.oldest_article: diff --git a/src/calibre/web/feeds/news.py b/src/calibre/web/feeds/news.py index 7d61cead5b..bcc3cb050d 100644 --- a/src/calibre/web/feeds/news.py +++ b/src/calibre/web/feeds/news.py @@ -1011,7 +1011,8 @@ class BasicNewsRecipe(object): feed.description = unicode(err) parsed_feeds.append(feed) self.log_exception(msg) - + + return parsed_feeds @classmethod diff --git a/src/calibre/web/feeds/recipes/__init__.py b/src/calibre/web/feeds/recipes/__init__.py index b2c18b26a8..793d5cf45d 100644 --- a/src/calibre/web/feeds/recipes/__init__.py +++ b/src/calibre/web/feeds/recipes/__init__.py @@ -33,7 +33,7 @@ recipe_modules = ['recipe_' + r for r in ( 'la_republica', 'physics_today', 'chicago_tribune', 'e_novine', 'al_jazeera', 'winsupersite', 'borba', 'courrierinternational', 'lamujerdemivida', 'soldiers', 'theonion', 'news_times', - 'el_universal', 'mediapart', 'wikinews_en', 'ecogeek', + 'el_universal', 'mediapart', 'wikinews_en', 'ecogeek', 'daily_mail', )] import re, imp, inspect, time, os diff --git a/src/calibre/web/feeds/recipes/recipe_daily_mail.py b/src/calibre/web/feeds/recipes/recipe_daily_mail.py new file mode 100644 index 0000000000..c64e328bf2 --- /dev/null +++ b/src/calibre/web/feeds/recipes/recipe_daily_mail.py @@ -0,0 +1,33 @@ +from calibre.web.feeds.news import BasicNewsRecipe + +class TheDailyMail(BasicNewsRecipe): + title = u'The Daily Mail' + oldest_article = 2 + language = _('English') + author = 'RufusA' + simultaneous_downloads= 1 + max_articles_per_feed = 50 + + extra_css = 'h1 {text-align: left;}' + + remove_tags = [ dict(name='ul', attrs={'class':'article-icons-links'}) ] + remove_tags_after = dict(name='h3', attrs={'class':'social-links-title'}) + remove_tags_before = dict(name='div', attrs={'id':'content'}) + no_stylesheets = True + + feeds = [ + (u'Home', u'http://www.dailymail.co.uk/home/index.rss'), + (u'News', u'http://www.dailymail.co.uk/news/index.rss'), + (u'Sport', u'http://www.dailymail.co.uk/sport/index.rss'), + (u'TV and Showbiz', u'http://www.dailymail.co.uk/tvshowbiz/index.rss'), + (u'Femail', u'http://www.dailymail.co.uk/femail/index.rss'), + (u'Health', u'http://www.dailymail.co.uk/health/index.rss'), + (u'Science and Technology', u'http://www.dailymail.co.uk/sciencetech/index.rss'), + (u'Money', u'http://www.dailymail.co.uk/money/index.rss'), + (u'Property', u'http://www.dailymail.co.uk/property/index.rss'), + (u'Motoring', u'http://www.dailymail.co.uk/motoring/index.rss'), + (u'Travel', u'http://www.dailymail.co.uk/travel/index.rss')] + + def print_version(self, url): + main = url.partition('?')[0] + return main + '?printingPage=true' diff --git a/src/calibre/web/feeds/recipes/recipe_iht.py b/src/calibre/web/feeds/recipes/recipe_iht.py index c30be70dea..1bee27d061 100644 --- a/src/calibre/web/feeds/recipes/recipe_iht.py +++ b/src/calibre/web/feeds/recipes/recipe_iht.py @@ -3,6 +3,7 @@ __copyright__ = '2008, Derry FitzGerald' ''' iht.com ''' +import re from calibre.web.feeds.news import BasicNewsRecipe from calibre.ptempfile import PersistentTemporaryFile @@ -16,7 +17,12 @@ class InternationalHeraldTribune(BasicNewsRecipe): max_articles_per_feed = 10 no_stylesheets = True - remove_tags = [dict(name='div', attrs={'class':'footer'})] + remove_tags = [dict(name='div', attrs={'class':'footer'}), + dict(name=['form'])] + preprocess_regexps = [ + (re.compile(r'