Merge from trunk.

2025-07-07 18:24:30 -04:00 · 2013-03-18 10:38:08 -05:00 · 2013-03-18 10:38:08 -05:00 · c5bc937e6e
commit c5bc937e6e
parent b4b4b9d9da ac450fb04b
16 changed files with 372 additions and 188 deletions
--- a/manual/faq.rst
+++ b/manual/faq.rst
@ -579,9 +579,23 @@ Yes, you can. Follow the instructions in the answer above for adding custom colu
 How do I move my |app| library from one computer to another?
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Simply copy the |app| library folder from the old to the new computer. You can find out what the library folder is by clicking the calibre icon in the toolbar. The very first item is the path to the library folder. Now on the new computer, start |app| for the first time. It will run the Welcome Wizard asking you for the location of the |app| library. Point it to the previously copied folder. If the computer you are transferring to already has a calibre installation, then the Welcome wizard wont run. In that case, right-click the |app| icon in the tooolbar and point it to the newly copied directory. You will now have two calibre libraries on your computer and you can switch between them by clicking the |app| icon on the toolbar. Transferring your library in this manner preserver all your metadata, tags, custom columns, etc.
+Simply copy the |app| library folder from the old to the new computer. You can
 find out what the library folder is by clicking the calibre icon in the
 toolbar. The very first item is the path to the library folder. Now on the new
 computer, start |app| for the first time. It will run the Welcome Wizard asking
 you for the location of the |app| library. Point it to the previously copied
 folder. If the computer you are transferring to already has a calibre
 installation, then the Welcome wizard wont run. In that case, right-click the
 |app| icon in the tooolbar and point it to the newly copied directory. You will
 now have two |app| libraries on your computer and you can switch between them
 by clicking the |app| icon on the toolbar. Transferring your library in this
 manner preserver all your metadata, tags, custom columns, etc.
-Note that if you are transferring between different types of computers (for example Windows to OS X) then after doing the above you should also right-click the |app| icon on the tool bar, select Library Maintenance and run the Check Library action. It will warn you about any problems in your library, which you should fix by hand.
+Note that if you are transferring between different types of computers (for
 example Windows to OS X) then after doing the above you should also right-click
 the |app| icon on the tool bar, select Library Maintenance and run the Check
 Library action. It will warn you about any problems in your library, which you
 should fix by hand.
 .. note:: A |app| library is just a folder which contains all the book files and their metadata. All the metadata is stored in a single file called metadata.db, in the top level folder. If this file gets corrupted, you may see an empty list of books in |app|. In this case you can ask |app| to restore your books by doing a right-click on the |app| icon in the toolbar and selecting Library Maintenance->Restore Library.
--- a/recipes/il_giornale.recipe
+++ b/recipes/il_giornale.recipe
@ -7,7 +7,6 @@ description   = 'Italian daily newspaper - 09-11-2011'
 '''
 http://www.ilgiornale.it/
 '''
 from calibre.ebooks.BeautifulSoup import BeautifulSoup
 from calibre.web.feeds.news import BasicNewsRecipe
 class IlGiornale(BasicNewsRecipe):
@ -25,35 +24,39 @@ class IlGiornale(BasicNewsRecipe):
    oldest_article = 7
    max_articles_per_feed = 100
    use_embedded_content  = False
    #auto_cleanup = True
    #auto_cleanup_keep = '//div[@id="insertbox_text"]'
    no_stylesheets        = True
    conversion_options = {'linearize_tables':True}
    remove_javascript = True
    keep_only_tags = [dict(name='h1', attrs={'class':'entry-title'}), dict(name='div', attrs={'id':'insertbox_text'})]
    def get_article_url(self, article):
        return article.get('guid', article.get('id', None))
-    def print_version(self, url):
+    #def get_article_url(self, article):
-        raw = self.browser.open(url).read()
+        #return article.get('guid', article.get('id', None))
-        soup = BeautifulSoup(raw.decode('utf8', 'replace'))
+
-        all_print_tags = soup.find('div', {'id':'print_article'})
+    #def print_version(self, url):
-        print_link = all_print_tags.a
+        #raw = self.browser.open(url).read()
-        if print_link is None:
+        #soup = BeautifulSoup(raw.decode('utf8', 'replace'))
-           return url
+        #all_print_tags = soup.find('div', {'id':'print_article'})
-        return  'http://www.ilgiornale.it' + print_link['href']
+        #print_link = all_print_tags.a
        #if print_link is None:
           #return url
        #return  'http://www.ilgiornale.it' + print_link['href']
    feeds = [
-             (u'Ultime Notizie',u'http://www.ilgiornale.it/?RSS=S'),
+             (u'Ultime Notizie',u'http://www.ilgiornale.it/rss.xml'),
-             (u'All\'Interno', u'http://www.ilgiornale.it/la_s.pic1?SID=8&RSS=S'),
+             #(u'All\'Interno', u'http://www.ilgiornale.it/la_s.pic1?SID=8&RSS=S'),
-             (u'Esteri', u'http://www.ilgiornale.it/la_s.pic1?SID=6&RSS=S'),
+             #(u'Esteri', u'http://www.ilgiornale.it/la_s.pic1?SID=6&RSS=S'),
-             (u'Economia', u'http://www.ilgiornale.it/la_s.pic1?SID=5&RSS=S'),
+             #(u'Economia', u'http://www.ilgiornale.it/la_s.pic1?SID=5&RSS=S'),
-             (u'Cultura', u'http://www.ilgiornale.it/la_s.pic1?SID=4&RSS=S'),
+             #(u'Cultura', u'http://www.ilgiornale.it/la_s.pic1?SID=4&RSS=S'),
-             (u'Spettacoli', u'http://www.ilgiornale.it/la_s.pic1?SID=14&RSS=S'),
+             #(u'Spettacoli', u'http://www.ilgiornale.it/la_s.pic1?SID=14&RSS=S'),
-             (u'Sport', u'http://www.ilgiornale.it/la_s.pic1?SID=15&RSS=S'),
+             #(u'Sport', u'http://www.ilgiornale.it/la_s.pic1?SID=15&RSS=S'),
-             (u'Tech&Web', u'http://www.ilgiornale.it/la_s.pic1?SID=35&RSS=S'),
+             #(u'Tech&Web', u'http://www.ilgiornale.it/la_s.pic1?SID=35&RSS=S'),
-             (u'Edizione di Roma', u'http://www.ilgiornale.it/roma.pic1?SID=13&RSS=S'),
+             #(u'Edizione di Roma', u'http://www.ilgiornale.it/roma.pic1?SID=13&RSS=S'),
-             (u'Edizione di Milano', u'http://www.ilgiornale.it/milano.pic1?SID=9&RSS=S'),
+             #(u'Edizione di Milano', u'http://www.ilgiornale.it/milano.pic1?SID=9&RSS=S'),
-             (u'Edizione di Genova', u'http://www.ilgiornale.it/genova.pic1?SID=7&RSS=S')
+             #(u'Edizione di Genova', u'http://www.ilgiornale.it/genova.pic1?SID=7&RSS=S')
             ]
--- a/recipes/nytimes.recipe
+++ b/recipes/nytimes.recipe
@ -41,7 +41,7 @@ class NYTimes(BasicNewsRecipe):
    # number of days old an article can be for inclusion. If oldest_web_article = None all articles
    # will be included. Note: oldest_web_article is ignored if webEdition = False
    webEdition = False
-    oldest_web_article = 7
+    oldest_web_article = None
    # download higher resolution images than the small thumbnails typically included in the article
    # the down side of having large beautiful images is the file size is much larger, on the order of 7MB per paper
@ -188,6 +188,8 @@ class NYTimes(BasicNewsRecipe):
                                        'relatedSearchesModule',
                                        'side_tool',
                                        'singleAd',
                                        'postCategory column',
                                        'refer tagRefer', # added for bits blog post
                                        'entry entry-utility', #added for DealBook
                                        'entry-tags', #added for DealBook
                                        'footer promos clearfix', #added for DealBook
@ -324,6 +326,8 @@ class NYTimes(BasicNewsRecipe):
            return True
        if '/video/' in url:
            return True
        if '/multimedia/' in url:
            return True
        if '/slideshow/' in url:
            return True
        if '/magazine/index' in url:
@ -334,6 +338,15 @@ class NYTimes(BasicNewsRecipe):
            return True
        if '/premium/' in url:
            return True
        if '#comment' in url:
            return True
        if '#postComment' in url:
            return True
        if '#postcomment' in url:
            return True
        if re.search('/\d\d\d\d/\d\d/\d\d/',url) is None:
            print("NO DATE IN "+url)
            return True
        return False
    def fixChars(self,string):
@ -363,6 +376,7 @@ class NYTimes(BasicNewsRecipe):
    cover_tag = 'NY_NYT'
    def get_cover_url(self):
        from datetime import timedelta, date
        cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.cover_tag+'.jpg'
        br = BasicNewsRecipe.get_browser(self)
        daysback=1
@ -385,7 +399,6 @@ class NYTimes(BasicNewsRecipe):
    masthead_url = 'http://graphics8.nytimes.com/images/misc/nytlogo379x64.gif'
    def short_title(self):
        return self.title
@ -647,75 +660,53 @@ class NYTimes(BasicNewsRecipe):
        soup = self.index_to_soup('http://www.nytimes.com/pages/todaysheadlines/')
-        # Fetch the content table
+        section_name='Unknown Section'
-        content_table = soup.find('table',{'id':'content'})
+        pubdate = strftime('%a, %d %b')
-        if content_table is None:
+        for td_col in soup.findAll('td'):
-            self.log("FATAL ERROR: CANNOT FIND CONTENT TABLE")
+            h6_sec_name = td_col.find('h6')
-            return None
+            if h6_sec_name is not None:
-
+                new_section_name = self.tag_to_string(h6_sec_name,use_alt=False)
-        # Within this table are <td id=".*Column.*"> entries, each containing one or more h6 tags which represent sections
+                new_section_name = re.sub(r'^ *$','',new_section_name)
-
+                if new_section_name == '':
-        for td_col in content_table.findAll('td', {'id' : re.compile('Column')}):
+                    continue
-            for div_sec in td_col.findAll('div',recursive=False):
+                section_name = new_section_name
-                for h6_sec_name in div_sec.findAll('h6',{'style' : re.compile('text-transform: *uppercase')}):
+                continue
-
+            atag = td_col.find('a')
-                    section_name = self.tag_to_string(h6_sec_name,use_alt=False)
+            if atag is not None:
-                    section_name = re.sub(r'^ *$','',section_name)
+                h4tag = None
-
+                for h4tag in atag.findNextSiblings('h4'):
-                    if section_name == '':
+                    break
                if h4tag is None:
                    continue
                author = self.tag_to_string(h4tag,use_alt=False)
                try:
                    url = re.sub(r'\?.*', '', atag['href'])
                except:
                    continue
                if self.exclude_url(url):
                    continue
                if '?' in url:
                    url += '&pagewanted=all'
                else:
                    url += '?pagewanted=all'
                if self.filterDuplicates:
                    if url in self.url_list:
                        continue
-                    if self.includeSections != []:
+                self.url_list.append(url)
-                        if section_name not in self.includeSections:
+                title = self.tag_to_string(atag, use_alt=False).strip()
-                            print "SECTION NOT INCLUDED: ",section_name
+                desc = atag.parent.find('p')
-                            continue
+                if desc is not None:
-                    if section_name in self.excludeSections:
+                    description = self.tag_to_string(desc,use_alt=False)
-                        print "SECTION EXCLUDED: ",section_name
+                else:
-                        continue
+                    description = ''
-
+                if not self.articles.has_key(section_name):
-                    section_name=string.capwords(section_name)
+                    self.ans.append(section_name)
-                    section_name = section_name.replace('Op-ed','Op-Ed')
+                    self.articles[section_name] = []
-                    section_name = section_name.replace('U.s.','U.S.')
+                print('Title '+title+' author '+author)
-                    section_name = section_name.replace('N.y.','N.Y.')
+                self.articles[section_name].append(dict(title=title, url=url, date=pubdate, description=description, author=author, content=''))
                    pubdate = strftime('%a, %d %b')
                    search_div = div_sec
                    for next_tag in h6_sec_name.findNextSiblings(True):
                        if next_tag.__class__.__name__ == 'Tag':
                            if next_tag.name == 'div':
                                search_div = next_tag
                            break
                    # Get the articles
                    for h3_item in search_div.findAll('h3'):
                        byline = h3_item.h6
                        if byline is not None:
                            author = self.tag_to_string(byline,use_alt=False)
                        else:
                            author = ''
                        a = h3_item.find('a', href=True)
                        if not a:
                            continue
                        url = re.sub(r'\?.*', '', a['href'])
                        if self.exclude_url(url):
                            continue
                        url += '?pagewanted=all'
                        if self.filterDuplicates:
                            if url in self.url_list:
                                continue
                        self.url_list.append(url)
                        title = self.tag_to_string(a, use_alt=True).strip()
                        desc = h3_item.find('p')
                        if desc is not None:
                            description = self.tag_to_string(desc,use_alt=False)
                        else:
                            description = ''
                        if not self.articles.has_key(section_name):
                            self.ans.append(section_name)
                            self.articles[section_name] = []
                        self.articles[section_name].append(dict(title=title, url=url, date=pubdate, description=description, author=author, content=''))
        self.ans = [(k, self.articles[k]) for k in self.ans if self.articles.has_key(k)]
-        return self.filter_ans(self.get_tech_feeds(self.get_popular_articles(self.ans)))
+        return self.filter_ans(self.ans)
    def parse_index(self):
        if self.headlinesOnly:
@ -825,8 +816,9 @@ class NYTimes(BasicNewsRecipe):
            for divr in soup.findAll('div',attrs={'class':re.compile('w190 right')}):
                if divr.find(text=re.compile('Sign up')):
                    divr.extract()
-            divr = soup.find('div',attrs={'id':re.compile('related-content')})
+            divr = soup.find('div',attrs={'class':re.compile('^relatedArticlesModule')})
            if divr is not None:
                print("PROCESSING RELATED: "+self.tag_to_string(soup.title,False))
            # handle related articles
                rlist = []
                ul = divr.find('ul')
@ -856,6 +848,8 @@ class NYTimes(BasicNewsRecipe):
                    asidediv.append(Tag(soup,'hr'))
                    smain = soup.find('body')
                    smain.append(asidediv)
            else:
                print("CANNOT FIND RELATED: "+self.tag_to_string(soup.title,False))
            for atag in soup.findAll('a'):
                img = atag.find('img')
                if img is not None:
@ -898,6 +892,18 @@ class NYTimes(BasicNewsRecipe):
                                            first_outer = outerdiv
                                    else:
                                        litag.extract()
                            for h6tag in rdiv.findAll('h6'):
                                if h6tag.find('a') is not None:
                                    if h6tag.find('a')['href'].startswith('http://www.nytimes.com'):
                                        url = re.sub(r'\?.*', '', h6tag.find('a')['href'])
                                        h6tag.find('a')['href'] = url+'?pagewanted=all'
                                        h6tag.extract()
                                        related.append(h6tag)
                                        if first_related is None:
                                            first_related = rdiv
                                            first_outer = outerdiv
                                    else:
                                        h6tag.extract()
            if related != []:
                for r in related:
                    if r.h6: # don't want the anchor inside a h6 tag
--- a/recipes/nytimes_sub.recipe
+++ b/recipes/nytimes_sub.recipe
@ -188,6 +188,8 @@ class NYTimes(BasicNewsRecipe):
                                        'relatedSearchesModule',
                                        'side_tool',
                                        'singleAd',
                                        'postCategory column',
                                        'refer tagRefer', # added for bits blog post
                                        'entry entry-utility', #added for DealBook
                                        'entry-tags', #added for DealBook
                                        'footer promos clearfix', #added for DealBook
@ -324,6 +326,8 @@ class NYTimes(BasicNewsRecipe):
            return True
        if '/video/' in url:
            return True
        if '/multimedia/' in url:
            return True
        if '/slideshow/' in url:
            return True
        if '/magazine/index' in url:
@ -334,6 +338,15 @@ class NYTimes(BasicNewsRecipe):
            return True
        if '/premium/' in url:
            return True
        if '#comment' in url:
            return True
        if '#postComment' in url:
            return True
        if '#postcomment' in url:
            return True
        if re.search('/\d\d\d\d/\d\d/\d\d/',url) is None:
            print("NO DATE IN "+url)
            return True
        return False
    def fixChars(self,string):
@ -371,6 +384,7 @@ class NYTimes(BasicNewsRecipe):
    cover_tag = 'NY_NYT'
    def get_cover_url(self):
        from datetime import timedelta, date
        cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.cover_tag+'.jpg'
        br = BasicNewsRecipe.get_browser(self)
        daysback=1
@ -393,7 +407,6 @@ class NYTimes(BasicNewsRecipe):
    masthead_url = 'http://graphics8.nytimes.com/images/misc/nytlogo379x64.gif'
    def short_title(self):
        return self.title
@ -655,75 +668,53 @@ class NYTimes(BasicNewsRecipe):
        soup = self.index_to_soup('http://www.nytimes.com/pages/todaysheadlines/')
-        # Fetch the content table
+        section_name='Unknown Section'
-        content_table = soup.find('table',{'id':'content'})
+        pubdate = strftime('%a, %d %b')
-        if content_table is None:
+        for td_col in soup.findAll('td'):
-            self.log("FATAL ERROR: CANNOT FIND CONTENT TABLE")
+            h6_sec_name = td_col.find('h6')
-            return None
+            if h6_sec_name is not None:
-
+                new_section_name = self.tag_to_string(h6_sec_name,use_alt=False)
-        # Within this table are <td id=".*Column.*"> entries, each containing one or more h6 tags which represent sections
+                new_section_name = re.sub(r'^ *$','',new_section_name)
-
+                if new_section_name == '':
-        for td_col in content_table.findAll('td', {'id' : re.compile('Column')}):
+                    continue
-            for div_sec in td_col.findAll('div',recursive=False):
+                section_name = new_section_name
-                for h6_sec_name in div_sec.findAll('h6',{'style' : re.compile('text-transform: *uppercase')}):
+                continue
-
+            atag = td_col.find('a')
-                    section_name = self.tag_to_string(h6_sec_name,use_alt=False)
+            if atag is not None:
-                    section_name = re.sub(r'^ *$','',section_name)
+                h4tag = None
-
+                for h4tag in atag.findNextSiblings('h4'):
-                    if section_name == '':
+                    break
                if h4tag is None:
                    continue
                author = self.tag_to_string(h4tag,use_alt=False)
                try:
                    url = re.sub(r'\?.*', '', atag['href'])
                except:
                    continue
                if self.exclude_url(url):
                    continue
                if '?' in url:
                    url += '&pagewanted=all'
                else:
                    url += '?pagewanted=all'
                if self.filterDuplicates:
                    if url in self.url_list:
                        continue
-                    if self.includeSections != []:
+                self.url_list.append(url)
-                        if section_name not in self.includeSections:
+                title = self.tag_to_string(atag, use_alt=False).strip()
-                            print "SECTION NOT INCLUDED: ",section_name
+                desc = atag.parent.find('p')
-                            continue
+                if desc is not None:
-                    if section_name in self.excludeSections:
+                    description = self.tag_to_string(desc,use_alt=False)
-                        print "SECTION EXCLUDED: ",section_name
+                else:
-                        continue
+                    description = ''
-
+                if not self.articles.has_key(section_name):
-                    section_name=string.capwords(section_name)
+                    self.ans.append(section_name)
-                    section_name = section_name.replace('Op-ed','Op-Ed')
+                    self.articles[section_name] = []
-                    section_name = section_name.replace('U.s.','U.S.')
+                print('Title '+title+' author '+author)
-                    section_name = section_name.replace('N.y.','N.Y.')
+                self.articles[section_name].append(dict(title=title, url=url, date=pubdate, description=description, author=author, content=''))
                    pubdate = strftime('%a, %d %b')
                    search_div = div_sec
                    for next_tag in h6_sec_name.findNextSiblings(True):
                        if next_tag.__class__.__name__ == 'Tag':
                            if next_tag.name == 'div':
                                search_div = next_tag
                            break
                    # Get the articles
                    for h3_item in search_div.findAll('h3'):
                        byline = h3_item.h6
                        if byline is not None:
                            author = self.tag_to_string(byline,use_alt=False)
                        else:
                            author = ''
                        a = h3_item.find('a', href=True)
                        if not a:
                            continue
                        url = re.sub(r'\?.*', '', a['href'])
                        if self.exclude_url(url):
                            continue
                        url += '?pagewanted=all'
                        if self.filterDuplicates:
                            if url in self.url_list:
                                continue
                        self.url_list.append(url)
                        title = self.tag_to_string(a, use_alt=True).strip()
                        desc = h3_item.find('p')
                        if desc is not None:
                            description = self.tag_to_string(desc,use_alt=False)
                        else:
                            description = ''
                        if not self.articles.has_key(section_name):
                            self.ans.append(section_name)
                            self.articles[section_name] = []
                        self.articles[section_name].append(dict(title=title, url=url, date=pubdate, description=description, author=author, content=''))
        self.ans = [(k, self.articles[k]) for k in self.ans if self.articles.has_key(k)]
-        return self.filter_ans(self.get_tech_feeds(self.get_popular_articles(self.ans)))
+        return self.filter_ans(self.ans)
    def parse_index(self):
        if self.headlinesOnly:
@ -833,8 +824,9 @@ class NYTimes(BasicNewsRecipe):
            for divr in soup.findAll('div',attrs={'class':re.compile('w190 right')}):
                if divr.find(text=re.compile('Sign up')):
                    divr.extract()
-            divr = soup.find('div',attrs={'id':re.compile('related-content')})
+            divr = soup.find('div',attrs={'class':re.compile('^relatedArticlesModule')})
            if divr is not None:
                print("PROCESSING RELATED: "+self.tag_to_string(soup.title,False))
            # handle related articles
                rlist = []
                ul = divr.find('ul')
@ -864,6 +856,8 @@ class NYTimes(BasicNewsRecipe):
                    asidediv.append(Tag(soup,'hr'))
                    smain = soup.find('body')
                    smain.append(asidediv)
            else:
                print("CANNOT FIND RELATED: "+self.tag_to_string(soup.title,False))
            for atag in soup.findAll('a'):
                img = atag.find('img')
                if img is not None:
@ -906,6 +900,18 @@ class NYTimes(BasicNewsRecipe):
                                            first_outer = outerdiv
                                    else:
                                        litag.extract()
                            for h6tag in rdiv.findAll('h6'):
                                if h6tag.find('a') is not None:
                                    if h6tag.find('a')['href'].startswith('http://www.nytimes.com'):
                                        url = re.sub(r'\?.*', '', h6tag.find('a')['href'])
                                        h6tag.find('a')['href'] = url+'?pagewanted=all'
                                        h6tag.extract()
                                        related.append(h6tag)
                                        if first_related is None:
                                            first_related = rdiv
                                            first_outer = outerdiv
                                    else:
                                        h6tag.extract()
            if related != []:
                for r in related:
                    if r.h6: # don't want the anchor inside a h6 tag
--- a/recipes/nytimesbook.recipe
+++ b/recipes/nytimesbook.recipe
@ -35,7 +35,10 @@ class NewYorkTimesBookReview(BasicNewsRecipe):
                continue
            if x['class'] in {'story', 'ledeStory'}:
                tt = 'h3' if x['class'] == 'story' else 'h1'
-                a = x.find(tt).find('a', href=True)
+                try:
                    a = x.find(tt).find('a', href=True)
                except AttributeError:
                    continue
                title = self.tag_to_string(a)
                url = a['href'] + '&pagewanted=all'
                self.log('\tFound article:', title, url)
--- a/session.vim
+++ b/session.vim
@ -19,6 +19,7 @@ let g:syntastic_c_include_dirs = g:syntastic_cpp_include_dirs
 set wildignore+=resources/viewer/mathjax/**
 set wildignore+=build/**
 set wildignore+=dist/**
 fun! CalibreLog()
    " Setup buffers to edit the calibre changelog and version info prior to
--- a/src/calibre/ebooks/metadata/sources/amazon.py
+++ b/src/calibre/ebooks/metadata/sources/amazon.py
@ -302,7 +302,7 @@ class Worker(Thread): # Get details {{{
            self.log.exception('Error parsing series for url: %r'%self.url)
        try:
-            self.cover_url = self.parse_cover(root)
+            self.cover_url = self.parse_cover(root, raw)
        except:
            self.log.exception('Error parsing cover for url: %r'%self.url)
        mi.has_cover = bool(self.cover_url)
@ -450,18 +450,24 @@ class Worker(Thread): # Get details {{{
                    ans = (s, i)
        return ans
-
+    def parse_cover(self, root, raw=b""):
    def parse_cover(self, root):
        imgs = root.xpath('//img[(@id="prodImage" or @id="original-main-image" or @id="main-image") and @src]')
        if not imgs:
            imgs = root.xpath('//div[@class="main-image-inner-wrapper"]/img[@src]')
        if imgs:
            src = imgs[0].get('src')
-            if '/no-image-avail' not in src:
+            if 'loading-' in src:
                js_img = re.search(br'"largeImage":"(http://[^"]+)",',raw)
                if js_img:
                    src = js_img.group(1).decode('utf-8')
            if ('/no-image-avail' not in src and 'loading-' not in src and '/no-img-sm' not in src):
                self.log('Found image: %s' % src)
                parts = src.split('/')
                if len(parts) > 3:
                    bn = parts[-1]
                    sparts = bn.split('_')
                    if len(sparts) > 2:
-                        bn = sparts[0] + sparts[-1]
+                        bn = re.sub(r'\.\.jpg$', '.jpg', (sparts[0] + sparts[-1]))
                        return ('/'.join(parts[:-1]))+'/'+bn
    def parse_isbn(self, pd):
--- a/src/calibre/gui2/cover_flow.py
+++ b/src/calibre/gui2/cover_flow.py
@ -54,6 +54,27 @@ if pictureflow is not None:
        def currentChanged(self, index):
            print 'current changed:', index
    class DummyImageList(pictureflow.FlowImages):
        def __init__(self):
            pictureflow.FlowImages.__init__(self)
            self.num = 40000
            i1, i2 = QImage(300, 400, QImage.Format_RGB32), QImage(300, 400, QImage.Format_RGB32)
            i1.fill(Qt.green), i2.fill(Qt.blue)
            self.images = [i1, i2]
        def count(self):
            return self.num
        def image(self, index):
            return self.images[index%2]
        def caption(self, index):
            return 'Number: %d'%index
        def subtitle(self, index):
            return ''
    class DatabaseImages(pictureflow.FlowImages):
        def __init__(self, model, buffer=20):
@ -328,6 +349,21 @@ class CoverFlowMixin(object):
    def sync_listview_to_cf(self, row):
        self.cf_last_updated_at = time.time()
 def test():
    from PyQt4.QtGui import QApplication, QMainWindow
    app = QApplication([])
    w = QMainWindow()
    cf = CoverFlow()
    cf.resize(int(available_width()/1.5), available_height()-60)
    w.resize(cf.size()+QSize(30, 20))
    model = DummyImageList()
    cf.setImages(model)
    cf.setCurrentSlide(39000)
    w.setCentralWidget(cf)
    w.show()
    cf.setFocus(Qt.OtherFocusReason)
    sys.exit(app.exec_())
 def main(args=sys.argv):
    return 0
--- a/src/calibre/gui2/init.py
+++ b/src/calibre/gui2/init.py
@ -94,6 +94,9 @@ class LibraryViewMixin(object): # {{{
                v = self.current_view()
                if hasattr(v, 'set_current_row'):
                    v.set_current_row(0)
                    if v is self.library_view and v.row_count() == 0:
                        self.book_details.reset_info()
    # }}}
--- a/src/calibre/gui2/main.py
+++ b/src/calibre/gui2/main.py
@ -87,10 +87,11 @@ def init_qt(args):
    opts, args = parser.parse_args(args)
    find_portable_library()
    if opts.with_library is not None:
-        if not os.path.exists(opts.with_library):
+        libpath = os.path.expanduser(opts.with_library)
-            os.makedirs(opts.with_library)
+        if not os.path.exists(libpath):
-        if os.path.isdir(opts.with_library):
+            os.makedirs(libpath)
-            prefs.set('library_path', os.path.abspath(opts.with_library))
+        if os.path.isdir(libpath):
            prefs.set('library_path', os.path.abspath(libpath))
            prints('Using library at', prefs['library_path'])
    QCoreApplication.setOrganizationName(ORG_NAME)
    QCoreApplication.setApplicationName(APP_UID)
--- a/src/calibre/gui2/pictureflow/pictureflow.cpp
+++ b/src/calibre/gui2/pictureflow/pictureflow.cpp
@ -398,7 +398,7 @@ private:
  QCache<int, QImage> surfaceCache;
  QTimer triggerTimer;
-  int slideFrame;
+  long long slideFrame;
  int step;
  int target;
  int fade;
@ -493,7 +493,7 @@ void PictureFlowPrivate::setCurrentSlide(int index)
  step = 0;
  centerIndex = qBound(index, 0, slideImages->count()-1);
  target = centerIndex;
-  slideFrame = index << 16;
+  slideFrame = ((long long)index) << 16;
  resetSlides();
  triggerRender();
  widget->emitcurrentChanged(centerIndex);
@ -1069,7 +1069,7 @@ void PictureFlowPrivate::updateAnimation()
    const int max = 2 * 65536;
    int fi = slideFrame;
-    fi -= (target << 16);  
+    fi -= (target << 16);
    if(fi < 0)
      fi = -fi;
    fi = qMin(fi, max);
@ -1094,7 +1094,7 @@ void PictureFlowPrivate::updateAnimation()
  if(centerIndex != index)
  {
    centerIndex = index;
-    slideFrame = index << 16;
+    slideFrame = ((long long)index) << 16;
    centerSlide.slideIndex = centerIndex;
    for(int i = 0; i < leftSlides.count(); i++)
      leftSlides[i].slideIndex = centerIndex-1-i;
--- a/src/calibre/gui2/preferences/coloring.py
+++ b/src/calibre/gui2/preferences/coloring.py
@ -763,22 +763,24 @@ class EditRules(QWidget): # {{{
                ' double clicking it.'))
            self.add_advanced_button.setVisible(False)
-    def _add_rule(self, dlg):
+    def add_rule(self):
-        if dlg.exec_() == dlg.Accepted:
+        d = RuleEditor(self.model.fm, self.pref_name)
-            kind, col, r = dlg.rule
+        d.add_blank_condition()
        if d.exec_() == d.Accepted:
            kind, col, r = d.rule
            if kind and r and col:
                idx = self.model.add_rule(kind, col, r)
                self.rules_view.scrollTo(idx)
                self.changed.emit()
    def add_rule(self):
        d = RuleEditor(self.model.fm, self.pref_name)
        d.add_blank_condition()
        self._add_rule(d)
    def add_advanced(self):
        td = TemplateDialog(self, '', mi=self.mi, fm=self.fm, color_field='')
-        self._add_rule(('color', td[0], td[1]))
+        if td.exec_() == td.Accepted:
            col, r = td.rule
            if r and col:
                idx = self.model.add_rule('color', col, r)
                self.rules_view.scrollTo(idx)
                self.changed.emit()
    def edit_rule(self, index):
        try:
--- a/src/calibre/gui2/toc/main.py
+++ b/src/calibre/gui2/toc/main.py
@ -30,6 +30,7 @@ class ItemView(QFrame): # {{{
    add_new_item = pyqtSignal(object, object)
    delete_item = pyqtSignal()
    flatten_item = pyqtSignal()
    go_to_root = pyqtSignal()
    def __init__(self, parent):
        QFrame.__init__(self, parent)
@ -132,6 +133,11 @@ class ItemView(QFrame): # {{{
        b.setToolTip(_('All children of this entry are brought to the same '
                       'level as this entry.'))
        l.addWidget(b, l.rowCount()+1, 0, 1, 2)
        ip.b4 = b = QPushButton(QIcon(I('back.png')), _('&Return to root'))
        b.clicked.connect(self.go_to_root)
        b.setToolTip(_('Go back to the top level view'))
        l.addWidget(b, l.rowCount()+1, 0, 1, 2)
        l.setRowMinimumHeight(rs, 20)
        l.addWidget(QLabel(), l.rowCount(), 0, 1, 2)
@ -237,6 +243,7 @@ class TOCView(QWidget): # {{{
        self.item_view.delete_item.connect(self.delete_current_item)
        i.add_new_item.connect(self.add_new_item)
        i.flatten_item.connect(self.flatten_item)
        i.go_to_root.connect(self.go_to_root)
        l.addWidget(i, 0, 4, col, 1)
        l.setColumnStretch(2, 10)
@ -271,6 +278,9 @@ class TOCView(QWidget): # {{{
                item.removeChild(child)
                p.insertChild(idx+1, child)
    def go_to_root(self):
        self.tocw.setCurrentItem(None)
    def highlight_item(self, item):
        self.tocw.setCurrentItem(item, 0, QItemSelectionModel.ClearAndSelect)
        self.tocw.scrollToItem(item)
--- a/src/calibre/web/feeds/init.py
+++ b/src/calibre/web/feeds/init.py
@ -184,7 +184,12 @@ class Feed(object):
            id = 'internal id#%s'%self.id_counter
        if id in self.added_articles:
            return
-        published = item.get('date_parsed', time.gmtime())
+        published = None
        for date_field in ('date_parsed', 'published_parsed',
                           'updated_parsed'):
            published = item.get(date_field, None)
            if published is not None:
                break
        if not published:
            published = time.gmtime()
        self.added_articles.append(id)
--- a/src/calibre/web/feeds/news.py
+++ b/src/calibre/web/feeds/news.py
@ -338,6 +338,41 @@ class BasicNewsRecipe(Recipe):
    #: :meth:`javascript_login` method, to do the actual logging in.
    use_javascript_to_login = False
    # The following parameters control how the recipe attempts to minimize
    # jpeg image sizes
    #: Set this to False to ignore all scaling and compression parameters and
    #: pass images through unmodified. If True and the other compression
    #: parameters are left at their default values, jpeg images will be scaled to fit
    #: in the screen dimensions set by the output profile and compressed to size at
    #: most (w * h)/16 where w x h are the scaled image dimensions.
    compress_news_images = False
    #: The factor used when auto compressing jpeg images. If set to None,
    #: auto compression is disabled. Otherwise, the images will be reduced in size to
    #: (w * h)/compress_news_images_auto_size bytes if possible by reducing
    #: the quality level, where w x h are the image dimensions in pixels.
    #: The minimum jpeg quality will be 5/100 so it is possible this constraint
    #: will not be met.  This parameter can be overridden by the parameter
    #: compress_news_images_max_size which provides a fixed maximum size for images.
    compress_news_images_auto_size = 16
    #: Set jpeg quality so images do not exceed the size given (in KBytes).
    #: If set, this parameter overrides auto compression via compress_news_images_auto_size.
    #: The minimum jpeg quality will be 5/100 so it is possible this constraint
    #: will not be met.
    compress_news_images_max_size = None
    #: Rescale images to fit in the device screen dimensions set by the output profile.
    #: Ignored if no output profile is set.
    scale_news_images_to_device = True
    #: Maximum dimensions (w,h) to scale images to. If scale_news_images_to_device is True
    #: this is set to the device screen dimensions set by the output profile unless
    #: there is no profile set, in which case it is left at whatever value it has been
    #: assigned (default None).
    scale_news_images = None
    # See the built-in profiles for examples of these settings.
    def short_title(self):
@ -849,11 +884,19 @@ class BasicNewsRecipe(Recipe):
        for reg in self.filter_regexps:
            web2disk_cmdline.extend(['--filter-regexp', reg])
        if options.output_profile.short_name == 'default':
            self.scale_news_images_to_device = False
        elif self.scale_news_images_to_device:
            self.scale_news_images = options.output_profile.screen_size
        self.web2disk_options = web2disk_option_parser().parse_args(web2disk_cmdline)[0]
        for extra in ('keep_only_tags', 'remove_tags', 'preprocess_regexps',
                      'skip_ad_pages', 'preprocess_html', 'remove_tags_after',
-                      'remove_tags_before', 'is_link_wanted'):
+                      'remove_tags_before', 'is_link_wanted',
                      'compress_news_images', 'compress_news_images_max_size',
                      'compress_news_images_auto_size', 'scale_news_images'):
            setattr(self.web2disk_options, extra, getattr(self, extra))
        self.web2disk_options.postprocess_html = self._postprocess_html
        self.web2disk_options.encoding = self.encoding
        self.web2disk_options.preprocess_raw_html = self.preprocess_raw_html_
--- a/src/calibre/web/fetch/simple.py
+++ b/src/calibre/web/fetch/simple.py
@ -12,7 +12,7 @@ from urllib import url2pathname, quote
 from httplib import responses
 from base64 import b64decode
-from calibre import browser, relpath, unicode_path
+from calibre import browser, relpath, unicode_path, fit_image
 from calibre.constants import filesystem_encoding, iswindows
 from calibre.utils.filenames import ascii_filename
 from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
@ -20,7 +20,7 @@ from calibre.ebooks.chardet import xml_to_unicode
 from calibre.utils.config import OptionParser
 from calibre.utils.logging import Log
 from calibre.utils.magick import Image
-from calibre.utils.magick.draw import identify_data
+from calibre.utils.magick.draw import identify_data, thumbnail
 class FetchError(Exception):
    pass
@ -142,6 +142,10 @@ class RecursiveFetcher(object):
        self.postprocess_html_ext= getattr(options, 'postprocess_html', None)
        self._is_link_wanted     = getattr(options, 'is_link_wanted',
                default_is_link_wanted)
        self.compress_news_images_max_size = getattr(options, 'compress_news_images_max_size', None)
        self.compress_news_images = getattr(options, 'compress_news_images', False)
        self.compress_news_images_auto_size = getattr(options, 'compress_news_images_auto_size', 16)
        self.scale_news_images = getattr(options, 'scale_news_images', None)
        self.download_stylesheets = not options.no_stylesheets
        self.show_progress = True
        self.failed_links = []
@ -338,7 +342,42 @@ class RecursiveFetcher(object):
                            x.write(data)
                        ns.replaceWith(src.replace(m.group(1), stylepath))
    def rescale_image(self, data):
        orig_w, orig_h, ifmt = identify_data(data)
        orig_data = data # save it in case compression fails
        if self.scale_news_images is not None:
            wmax, hmax = self.scale_news_images
            scale, new_w, new_h = fit_image(orig_w, orig_h, wmax, hmax)
            if scale:
                data = thumbnail(data, new_w, new_h, compression_quality=95)[-1]
                orig_w = new_w
                orig_h = new_h
        if self.compress_news_images_max_size is None:
            if self.compress_news_images_auto_size is None: # not compressing
                return data
            else:
                maxsizeb = (orig_w * orig_h)/self.compress_news_images_auto_size
        else:
            maxsizeb = self.compress_news_images_max_size * 1024
        scaled_data = data # save it in case compression fails
        if len(scaled_data) <= maxsizeb: # no compression required
            return scaled_data
        img = Image()
        quality = 95
        img.load(data)
        while len(data) >= maxsizeb and quality >= 5:
            quality -= 5
            img.set_compression_quality(quality)
            data = img.export('jpg')
        if len(data) >= len(scaled_data): # compression failed
            return orig_data if len(orig_data) <= len(scaled_data) else scaled_data
        if len(data) >= len(orig_data): # no improvement
            return orig_data
        return data
    def process_images(self, soup, baseurl):
        diskpath = unicode_path(os.path.join(self.current_dir, 'images'))
@ -390,6 +429,12 @@ class RecursiveFetcher(object):
                        im = Image()
                        im.load(data)
                        data = im.export(itype)
                    if self.compress_news_images and itype in {'jpg','jpeg'}:
                        try:
                            data = self.rescale_image(data)
                        except:
                            self.log.exception('failed to compress image '+iurl)
                            identify_data(data)
                    else:
                        identify_data(data)
                    imgpath = os.path.join(diskpath, fname+'.'+itype)