0.9.9

2025-07-09 03:04:10 -04:00 · 2012-12-07 03:33:29 -07:00 · 2012-12-07 03:33:29 -07:00 · 92e06792f8
commit 92e06792f8
parent 276034e57f fa0b3854a4
9 changed files with 353 additions and 286 deletions
--- a/Changelog.yaml
+++ b/Changelog.yaml
@ -19,6 +19,50 @@
 #   new recipes:
 #     - title: 

+- version: 0.9.9
+  date: 2012-12-07
+
+  new features:
+    - title: "64 bit build for windows"
+      type: major
+      description: "calibre now has a 64 bit version for windows, available at: http://calibre-ebook.com/download_windows64 The 64bit build is not limited to using only 3GB of RAM when converting large/complex documents. It may also be slightly faster for some tasks. You can have both the 32 bit and the 64 bit build installed at the same time, they will use the same libraries, plugins and settings."
+
+    - title: "Content server: Make the identifiers in each books metadata clickable."
+      tickets: [1085726]
+ 
+  bug fixes:
+    - title: "EPUB Input: Fix an infinite loop while trying to recover a damaged EPUB file."
+      tickets: [1086917]
+
+    - title: "KF8 Input: Fix handling of links in files that link to the obsolete <a name> tags instead of tags with an id attribute."
+      tickets: [1086705]
+
+    - title: "Conversion: Fix a bug in removal of invalid entries from the spine, where not all invalid entries were removed, causing conversion to fail."
+      tickets: [1086054]
+
+    - title: "KF8 Input: Ignore invalid flow references in the KF8 document instead of erroring out on them."
+      tickets: [1085306]
+
+    - title: "Fix command line output on linux systems with incorrect LANG/LC_TYPE env vars."
+      tickets: [1085103]
+
+    - title: "KF8 Input: Fix page breaks specified using the data-AmznPageBreak attribute being ignored by calibre."
+
+    - title: "PDF Output: Fix custom size field not accepting fractional numbers as sizes"
+
+    - title: "Get Books: Update libre.de and publio for website changes"
+
+    - title: "Wireless driver: Increase timeout interval, and when allocating a random port try 9090 first"
+
+  improved recipes:
+    - New York Times
+    - Weblogs SL
+    - Zaman Gazetesi
+    - Aksiyon Dergisi
+    - Endgadget
+    - Metro UK
+    - Heise Online
+
 - version: 0.9.8
  date: 2012-11-30

--- a/recipes/harpers.recipe
+++ b/recipes/harpers.recipe
@ -16,6 +16,7 @@ class Harpers(BasicNewsRecipe):
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
+    auto_cleanup = True

    conversion_options = {
                          'comment'   : description
@ -31,14 +32,14 @@ class Harpers(BasicNewsRecipe):
                .caption{font-family:Verdana,sans-serif;font-size:x-small;color:#666666;}
                '''

-    keep_only_tags = [ dict(name='div', attrs={'id':'cached'}) ]
-    remove_tags = [
-                     dict(name='table', attrs={'class':['rcnt','rcnt topline']})
-                    ,dict(name=['link','object','embed','meta','base'])
-                  ]
-    remove_attributes = ['width','height']
+    #keep_only_tags = [ dict(name='div', attrs={'id':'cached'}) ]
+    #remove_tags = [
+                     #dict(name='table', attrs={'class':['rcnt','rcnt topline']})
+                    #,dict(name=['link','object','embed','meta','base'])
+                  #]
+    #remove_attributes = ['width','height']

-    feeds       = [(u"Harper's Magazine", u'http://www.harpers.org/rss/frontpage-rss20.xml')]
+    feeds       = [(u"Harper's Magazine", u'http://harpers.org/feed/')]

    def get_cover_url(self):
        cover_url = None
@ -49,9 +50,9 @@ class Harpers(BasicNewsRecipe):
           cover_url = 'http://harpers.org' + link_item['src']
        return cover_url

-    def preprocess_html(self, soup):
-        for item in soup.findAll(style=True):
-            del item['style']
-        for item in soup.findAll(xmlns=True):
-            del item['xmlns']
-        return soup
+    #def preprocess_html(self, soup):
+        #for item in soup.findAll(style=True):
+            #del item['style']
+        #for item in soup.findAll(xmlns=True):
+            #del item['xmlns']
+        #return soup
--- a/recipes/heise_online.recipe
+++ b/recipes/heise_online.recipe
@ -15,23 +15,12 @@ class AdvancedUserRecipe(BasicNewsRecipe):
    timeout = 5
    no_stylesheets = True

+    keep_only_tags = [dict(name='div', attrs={'id':'mitte_news'}),
+                               dict(name='h1', attrs={'class':'clear'}),
+                               dict(name='div', attrs={'class':'meldung_wrapper'})]

-    remove_tags_after = dict(name ='p', attrs={'class':'editor'})
    remove_tags = [dict(id='navi_top_container'),
-                            dict(id='navi_bottom'),
-                            dict(id='mitte_rechts'),
-                            dict(id='navigation'),
-                            dict(id='subnavi'),
-                            dict(id='social_bookmarks'),
-                            dict(id='permalink'),
-                            dict(id='content_foren'),
-                            dict(id='seiten_navi'),
-                            dict(id='adbottom'),
-                            dict(id='sitemap'),
-                            dict(name='div', attrs={'id':'sitemap'}),
-                            dict(name='ul', attrs={'class':'erste_zeile'}),
-                            dict(name='ul', attrs={'class':'zweite_zeile'}),
-                            dict(name='div', attrs={'class':'navi_top_container'})]
+                            dict(name='p', attrs={'class':'size80'})]

    feeds =  [
                   ('Newsticker', 'http://www.heise.de/newsticker/heise.rdf'),
@ -54,5 +43,3 @@ class AdvancedUserRecipe(BasicNewsRecipe):

    def print_version(self, url):
        return url + '?view=print'
-
-
--- a/recipes/metro_uk.recipe
+++ b/recipes/metro_uk.recipe
@ -1,43 +1,74 @@
 from calibre.web.feeds.news import BasicNewsRecipe
+from calibre import strftime
+import re
+import datetime
+import time
+
 class AdvancedUserRecipe1306097511(BasicNewsRecipe):
    title          = u'Metro UK'
-    description = 'Author Dave Asbury : News from The Metro - UK'
+    description = 'News as provided by The Metro -UK'
    #timefmt = ''
    __author__ = 'Dave Asbury'
-    #last update 9/9/12
+    #last update 9/6/12
    cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/276636_117118184990145_2132092232_n.jpg'
-    no_stylesheets = True
    oldest_article = 1
-    max_articles_per_feed = 12
    remove_empty_feeds = True
    remove_javascript     = True
-    #auto_cleanup = True
+    auto_cleanup = True
    encoding = 'UTF-8'
-    cover_url ='http://profile.ak.fbcdn.net/hprofile-ak-snc4/157897_117118184990145_840702264_n.jpg'
+
    language = 'en_GB'
    masthead_url        = 'http://e-edition.metro.co.uk/images/metro_logo.gif'
-    extra_css = '''
-                    h1{font-family:Arial,Helvetica,sans-serif; font-weight:900;font-size:1.6em;}
-                    h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:1.2em;}
-                    p{font-family:Arial,Helvetica,sans-serif;font-size:1.0em;}
-                    body{font-family:Helvetica,Arial,sans-serif;font-size:1.0em;}
-        '''
-    keep_only_tags = [
-        #dict(name='h1'),
-        #dict(name='h2'),
-        #dict(name='div', attrs={'class' : ['row','article','img-cnt figure','clrd']})
-    #dict(name='h3'),
-    #dict(attrs={'class' :  'BText'}),
-        ]
-    remove_tags    = [
-                                dict(name='div',attrs={'class' : 'art-fd fd-gr1-b clrd'}),
-                                dict(name='span',attrs={'class' : 'share'}),
-        dict(name='li'),
-        dict(attrs={'class' : ['twitter-share-button','header-forms','hdr-lnks','close','art-rgt','fd-gr1-b clrd google-article','news m12 clrd clr-b p5t shareBtm','item-ds csl-3-img news','c-1of3 c-last','c-1of1','pd','item-ds csl-3-img sport']}),
-                dict(attrs={'id' : ['','sky-left','sky-right','ftr-nav','and-ftr','notificationList','logo','miniLogo','comments-news','metro_extras']})
-                               ]
-    remove_tags_before = dict(name='h1')
-    #remove_tags_after = dict(attrs={'id':['topic-buttons']})

-    feeds          = [
-        (u'News', u'http://www.metro.co.uk/rss/news/'), (u'Money', u'http://www.metro.co.uk/rss/money/'), (u'Sport', u'http://www.metro.co.uk/rss/sport/'), (u'Film', u'http://www.metro.co.uk/rss/metrolife/film/'), (u'Music', u'http://www.metro.co.uk/rss/metrolife/music/'), (u'TV', u'http://www.metro.co.uk/rss/tv/'), (u'Showbiz', u'http://www.metro.co.uk/rss/showbiz/'), (u'Weird News', u'http://www.metro.co.uk/rss/weird/'), (u'Travel', u'http://www.metro.co.uk/rss/travel/'), (u'Lifestyle', u'http://www.metro.co.uk/rss/lifestyle/'), (u'Books', u'http://www.metro.co.uk/rss/lifestyle/books/'), (u'Food', u'http://www.metro.co.uk/rss/lifestyle/restaurants/')]
+    def parse_index(self):
+		articles = {}
+		key = None
+		ans = []
+		feeds = [ ('UK', 'http://metro.co.uk/news/uk/'),
+			('World', 'http://metro.co.uk/news/world/'),
+			('Weird', 'http://metro.co.uk/news/weird/'),
+			('Money', 'http://metro.co.uk/news/money/'),
+			('Sport', 'http://metro.co.uk/sport/'),
+			('Guilty Pleasures', 'http://metro.co.uk/guilty-pleasures/')
+			]
+		for key, feed in feeds:
+			soup = self.index_to_soup(feed)
+			articles[key] = []
+			ans.append(key)
+
+			today = datetime.date.today()
+			today = time.mktime(today.timetuple())-60*60*24
+
+			for a in soup.findAll('a'):
+				for name, value in a.attrs:
+					if name == "class" and value=="post":
+						url = a['href']
+						title = a['title']
+						print title
+						description = ''
+						m = re.search('^.*uk/([^/]*)/([^/]*)/([^/]*)/', url)
+						skip = 1
+						if len(m.groups()) == 3:
+							g = m.groups()
+							dt = datetime.datetime.strptime(''+g[0]+'-'+g[1]+'-'+g[2], '%Y-%m-%d')
+							pubdate = time.strftime('%a, %d %b', dt.timetuple())
+
+							dt = time.mktime(dt.timetuple())
+							if dt >= today:
+								print pubdate
+								skip = 0
+						else:
+							pubdate = strftime('%a, %d %b')
+
+						summary = a.find(True, attrs={'class':'excerpt'})
+						if summary:
+							description = self.tag_to_string(summary, use_alt=False)
+
+						if skip == 0:
+							articles[key].append(
+										dict(title=title, url=url, date=pubdate,
+												description=description,
+												content=''))
+		#ans = self.sort_index_by(ans, {'The Front Page':-1, 'Dining In, Dining Out':1, 'Obituaries':2})
+		ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
+		return ans
--- a/src/calibre/constants.py
+++ b/src/calibre/constants.py
@ -4,7 +4,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'
 __appname__   = u'calibre'
-numeric_version = (0, 9, 8)
+numeric_version = (0, 9, 9)
 __version__   = u'.'.join(map(unicode, numeric_version))
 __author__    = u"Kovid Goyal <kovid@kovidgoyal.net>"

--- a/src/calibre/devices/mtp/unix/devices.c
+++ b/src/calibre/devices/mtp/unix/devices.c
@ -13,6 +13,7 @@ const calibre_device_entry_t calibre_mtp_device_table[] = {

    // Amazon Kindle Fire HD
    , { "Amazon", 0x1949, "Fire HD", 0x0007, DEVICE_FLAGS_ANDROID_BUGS}
+    , { "Amazon", 0x1949, "Fire HD", 0x000a, DEVICE_FLAGS_ANDROID_BUGS}

    // Nexus 10
    , { "Google", 0x18d1, "Nexus 10", 0x4ee2, DEVICE_FLAGS_ANDROID_BUGS}
--- a/src/calibre/gui2/store/stores/publio_plugin.py
+++ b/src/calibre/gui2/store/stores/publio_plugin.py
@ -60,9 +60,9 @@ class PublioStore(BasicStoreConfig, StorePlugin):
                        series = ''.join(data.xpath('./div[@class="desc"]/div[@class="detailShortList"]/div[last()]/a/@title'))
                        title = title + ' (seria ' + series + ')'
                    author = ', '.join(data.xpath('./div[@class="desc"]/div[@class="detailShortList"]/div[@class="row"][1]/a/@title'))
-                    price = ''.join(data.xpath('.//div[@class="priceBoxContener "]/div/ins/text()'))
+                    price = ''.join(data.xpath('.//div[@class="priceBox tk-museo-slab"]/ins/text()'))
                    if not price:
-                        price = ''.join(data.xpath('.//div[@class="priceBoxContener "]/div/text()'))
+                        price = ''.join(data.xpath('.//div[@class="priceBox tk-museo-slab"]/text()')).strip()
                    formats = ', '.join(data.xpath('.//div[@class="formats"]/a/img/@alt'))

                    counter -= 1
--- a/src/calibre/translations/calibre.pot
+++ b/src/calibre/translations/calibre.pot
--- a/src/calibre/utils/ipc/simple_worker.py
+++ b/src/calibre/utils/ipc/simple_worker.py
@ -48,7 +48,7 @@ class ConnectedWorker(Thread):
            try:
                eintr_retry_call(conn.send, self.args)
                self.res = eintr_retry_call(conn.recv)
-            except:
+            except BaseException:
                self.tb = traceback.format_exc()

 def communicate(ans, worker, listener, args, timeout=300, heartbeat=None,
@ -77,6 +77,8 @@ def communicate(ans, worker, listener, args, timeout=300, heartbeat=None,

    if cw.tb:
        raise WorkerError('Failed to communicate with worker process')
+    if cw.res is None:
+        raise WorkerError('Something strange happened. The worker process was aborted without an exception.')
    if cw.res.get('tb', None):
        raise WorkerError('Worker failed', cw.res['tb'])
    ans['result'] = cw.res['result']