Pull from trunk

2025-07-09 03:04:10 -04:00 · 2009-03-10 19:30:20 -07:00 · 2009-03-10 19:30:20 -07:00 · 9445f488c2
commit 9445f488c2
parent 5067a62e71 77f2f9d76f
14 changed files with 81 additions and 18 deletions
--- a/src/calibre/ebooks/chardet/init.py
+++ b/src/calibre/ebooks/chardet/init.py
@ -99,7 +99,8 @@ def xml_to_unicode(raw, verbose=False, strip_encoding_pats=False,
        try:
            raw = raw.decode(encoding, 'replace')
        except LookupError:
-            raw = raw.decode('utf-8', 'replace')
+            encoding = 'utf-8'
            raw = raw.decode(encoding, 'replace')
    if strip_encoding_pats:
        raw = strip_encoding_declarations(raw)
--- a/src/calibre/ebooks/epub/from_html.py
+++ b/src/calibre/ebooks/epub/from_html.py
@ -197,6 +197,9 @@ class HTMLProcessor(Processor, Rationalizer):
            if not tag.text and not tag.get('src', False):
                tag.getparent().remove(tag)
        for tag in self.root.xpath('//form'):
            tag.getparent().remove(tag)
        if self.opts.linearize_tables:
            for tag in self.root.xpath('//table | //tr | //th | //td'):
                tag.tag = 'div'
--- a/src/calibre/ebooks/lrf/html/convert_from.py
+++ b/src/calibre/ebooks/lrf/html/convert_from.py
@ -99,6 +99,10 @@ class HTMLConverter(object):
                        # Replace common line break patterns with line breaks
                        (re.compile(r'<p>(&nbsp;|\s)*</p>', re.IGNORECASE), lambda m: '<br />'),
                        # Replace empty headers with line breaks
                        (re.compile(r'<h[0-5]?>(&nbsp;|\s)*</h[0-5]?>', 
                                    re.IGNORECASE), lambda m: '<br />'),
                        # Replace entities
                        (re.compile(ur'&(\S+?);'), partial(entity_to_unicode, 
                                                           exceptions=['lt', 'gt', 'amp'])),
--- a/src/calibre/ebooks/lrf/meta.py
+++ b/src/calibre/ebooks/lrf/meta.py
@ -530,7 +530,7 @@ class LRFMetaFile(object):
        """ See L{file.write} """
        self._file.write(val)
-    def objects(self):
+    def _objects(self):
        self._file.seek(self.object_index_offset)
        c = self.number_of_objects
        while c > 0:
@ -543,7 +543,7 @@ class LRFMetaFile(object):
    def get_objects_by_type(self, type):
        from calibre.ebooks.lrf.tags import Tag
        objects = []
-        for id, offset, size in self.objects():
+        for id, offset, size in self._objects():
            self._file.seek(offset)
            tag = Tag(self._file)
            if tag.id == 0xF500:
@ -554,7 +554,7 @@ class LRFMetaFile(object):
    def get_object_by_id(self, tid):
        from calibre.ebooks.lrf.tags import Tag
-        for id, offset, size in self.objects():
+        for id, offset, size in self._objects():
            self._file.seek(offset)
            tag = Tag(self._file)
            if tag.id == 0xF500:
--- a/src/calibre/ebooks/metadata/lit.py
+++ b/src/calibre/ebooks/metadata/lit.py
@ -19,14 +19,22 @@ def get_metadata(stream):
    for item in opf.iterguide():
        if 'cover' not in item.get('type', '').lower():
            continue
        ctype = item.get('type')
        href = item.get('href', '')
        candidates = [href, href.replace('&', '%26')]
        for item in litfile.manifest.values():
            if item.path in candidates:
-                covers.append(item.internal)
+                try:
                    covers.append((litfile.get_file('/data/'+item.internal), 
                                   ctype))
                except:
                    pass
                break
-    covers = [litfile.get_file('/data/' + i) for i in covers]
+    covers.sort(cmp=lambda x, y:cmp(len(x[0]), len(y[0])), reverse=True)
-    covers.sort(cmp=lambda x, y:cmp(len(x), len(y)))
+    idx = 0
-    mi.cover_data = ('jpg', covers[-1])
+    if len(covers) > 1:
        if covers[1][1] == covers[1][0]+'-standard':
            idx = 1
    mi.cover_data = ('jpg', covers[idx][0])
    return mi
--- a/src/calibre/ebooks/mobi/reader.py
+++ b/src/calibre/ebooks/mobi/reader.py
@ -312,7 +312,7 @@ class MobiReader(object):
        mobi_version = self.book_header.mobi_version
        for i, tag in enumerate(root.iter(etree.Element)):
            if tag.tag in ('country-region', 'place', 'placetype', 'placename',
-                           'state', 'city'):
+                           'state', 'city', 'street', 'address'):
                tag.tag = 'span'
                for key in tag.attrib.keys():
                    tag.attrib.pop(key)
--- a/src/calibre/gui2/dialogs/config.py
+++ b/src/calibre/gui2/dialogs/config.py
@ -196,7 +196,7 @@ class ConfigDialog(QDialog, Ui_Dialog):
            self.language.addItem(language_codes[lang], QVariant(lang))
        else:
            lang = 'en'
-            self.language.addItem('English', 'en')
+            self.language.addItem('English', QVariant('en'))
        items = [(l, language_codes[l]) for l in translations.keys() \
                 if l != lang]
        if lang != 'en':
--- a/src/calibre/gui2/main.py
+++ b/src/calibre/gui2/main.py
@ -1406,7 +1406,15 @@ class Main(MainWindow, Ui_MainWindow):
                dir = os.path.expanduser('~/Library')
            self.library_path = os.path.abspath(dir)
        if not os.path.exists(self.library_path):
-            os.makedirs(self.library_path)
+            try:
                os.makedirs(self.library_path)
            except:
                self.library_path = os.path.expanduser('~/Library')
                error_dialog(self, _('Invalid library location'), 
                     _('Could not access %s. Using %s as the library.')%
                     (repr(self.library_path), repr(self.library_path)) 
                             ).exec_()
                os.makedirs(self.library_path)
    def read_settings(self):
--- a/src/calibre/trac/donations/server.py
+++ b/src/calibre/trac/donations/server.py
@ -196,7 +196,7 @@ class Server(object):
    def calculate_month_trend(self, days=31):
        stats = self.get_slice(date.today()-timedelta(days=days-1), date.today())
-        fig = plt.figure(2, (12, 4), 96)#, facecolor, edgecolor, frameon, FigureClass)
+        fig = plt.figure(2, (10, 4), 96)#, facecolor, edgecolor, frameon, FigureClass)
        fig.clear()
        ax = fig.add_subplot(111)
        x = list(range(days-1, -1, -1))
@ -216,7 +216,7 @@ Donors per day: %(dpd).2f
                 ad=stats.average_deviation,
                 dpd=len(stats.totals)/float(stats.period.days),
             )
-        text = ax.annotate(text, (0.6, 0.65), textcoords='axes fraction')
+        text = ax.annotate(text, (0.5, 0.65), textcoords='axes fraction')
        fig.savefig(self.MONTH_TRENDS)
    def calculate_trend(self):
--- a/src/calibre/web/feeds/init.py
+++ b/src/calibre/web/feeds/init.py
@ -156,7 +156,6 @@ class Feed(object):
            content = None
        if not link and not content:
            return
        article = Article(id, title, link, description, published, content)
        delta = datetime.utcnow() - article.utctime
        if delta.days*24*3600 + delta.seconds <= 24*3600*self.oldest_article:
--- a/src/calibre/web/feeds/news.py
+++ b/src/calibre/web/feeds/news.py
@ -1012,6 +1012,7 @@ class BasicNewsRecipe(object):
                parsed_feeds.append(feed)
                self.log_exception(msg)
        return parsed_feeds
    @classmethod
--- a/src/calibre/web/feeds/recipes/init.py
+++ b/src/calibre/web/feeds/recipes/init.py
@ -33,7 +33,7 @@ recipe_modules = ['recipe_' + r for r in (
           'la_republica', 'physics_today', 'chicago_tribune', 'e_novine',
           'al_jazeera', 'winsupersite', 'borba', 'courrierinternational',
           'lamujerdemivida', 'soldiers', 'theonion', 'news_times',
-           'el_universal', 'mediapart', 'wikinews_en', 'ecogeek',
+           'el_universal', 'mediapart', 'wikinews_en', 'ecogeek', 'daily_mail',
          )]
 import re, imp, inspect, time, os
--- a/src/calibre/web/feeds/recipes/recipe_daily_mail.py
+++ b/src/calibre/web/feeds/recipes/recipe_daily_mail.py
@ -0,0 +1,33 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class TheDailyMail(BasicNewsRecipe):
    title          = u'The Daily Mail'
    oldest_article = 2
    language = _('English')
    author = 'RufusA'
    simultaneous_downloads= 1
    max_articles_per_feed = 50
    extra_css = 'h1 {text-align: left;}'
    remove_tags = [ dict(name='ul', attrs={'class':'article-icons-links'}) ]
    remove_tags_after  = dict(name='h3', attrs={'class':'social-links-title'})
    remove_tags_before  = dict(name='div', attrs={'id':'content'})
    no_stylesheets = True
    feeds          = [
 	(u'Home', u'http://www.dailymail.co.uk/home/index.rss'),
 	(u'News', u'http://www.dailymail.co.uk/news/index.rss'),
 	(u'Sport', u'http://www.dailymail.co.uk/sport/index.rss'),
 	(u'TV and Showbiz', u'http://www.dailymail.co.uk/tvshowbiz/index.rss'),
 	(u'Femail', u'http://www.dailymail.co.uk/femail/index.rss'),
 	(u'Health', u'http://www.dailymail.co.uk/health/index.rss'),
 	(u'Science and Technology', u'http://www.dailymail.co.uk/sciencetech/index.rss'),
 	(u'Money', u'http://www.dailymail.co.uk/money/index.rss'),
 	(u'Property', u'http://www.dailymail.co.uk/property/index.rss'),
 	(u'Motoring', u'http://www.dailymail.co.uk/motoring/index.rss'),
 	(u'Travel', u'http://www.dailymail.co.uk/travel/index.rss')]
    def print_version(self, url):
        main = url.partition('?')[0]
        return main + '?printingPage=true'
--- a/src/calibre/web/feeds/recipes/recipe_iht.py
+++ b/src/calibre/web/feeds/recipes/recipe_iht.py
@ -3,6 +3,7 @@ __copyright__ = '2008, Derry FitzGerald'
 '''
 iht.com
 '''
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.ptempfile import PersistentTemporaryFile
@ -16,7 +17,12 @@ class InternationalHeraldTribune(BasicNewsRecipe):
    max_articles_per_feed = 10
    no_stylesheets = True
-    remove_tags    = [dict(name='div', attrs={'class':'footer'})]
+    remove_tags    = [dict(name='div', attrs={'class':'footer'}),
                      dict(name=['form'])]
    preprocess_regexps = [
            (re.compile(r'<!-- webtrends.*', re.DOTALL), 
             lambda m:'</body></html>')
                          ]
    extra_css      = '.headline {font-size: x-large;} \n .fact { padding-top: 10pt  }' 
    feeds          = [