diff --git a/manual/faq.rst b/manual/faq.rst
index 2d2862e4e6..ba11c865f3 100644
--- a/manual/faq.rst
+++ b/manual/faq.rst
@@ -579,9 +579,23 @@ Yes, you can. Follow the instructions in the answer above for adding custom colu
How do I move my |app| library from one computer to another?
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Simply copy the |app| library folder from the old to the new computer. You can find out what the library folder is by clicking the calibre icon in the toolbar. The very first item is the path to the library folder. Now on the new computer, start |app| for the first time. It will run the Welcome Wizard asking you for the location of the |app| library. Point it to the previously copied folder. If the computer you are transferring to already has a calibre installation, then the Welcome wizard wont run. In that case, right-click the |app| icon in the tooolbar and point it to the newly copied directory. You will now have two calibre libraries on your computer and you can switch between them by clicking the |app| icon on the toolbar. Transferring your library in this manner preserver all your metadata, tags, custom columns, etc.
+Simply copy the |app| library folder from the old to the new computer. You can
+find out what the library folder is by clicking the calibre icon in the
+toolbar. The very first item is the path to the library folder. Now on the new
+computer, start |app| for the first time. It will run the Welcome Wizard asking
+you for the location of the |app| library. Point it to the previously copied
+folder. If the computer you are transferring to already has a calibre
+installation, then the Welcome wizard wont run. In that case, right-click the
+|app| icon in the tooolbar and point it to the newly copied directory. You will
+now have two |app| libraries on your computer and you can switch between them
+by clicking the |app| icon on the toolbar. Transferring your library in this
+manner preserver all your metadata, tags, custom columns, etc.
-Note that if you are transferring between different types of computers (for example Windows to OS X) then after doing the above you should also right-click the |app| icon on the tool bar, select Library Maintenance and run the Check Library action. It will warn you about any problems in your library, which you should fix by hand.
+Note that if you are transferring between different types of computers (for
+example Windows to OS X) then after doing the above you should also right-click
+the |app| icon on the tool bar, select Library Maintenance and run the Check
+Library action. It will warn you about any problems in your library, which you
+should fix by hand.
.. note:: A |app| library is just a folder which contains all the book files and their metadata. All the metadata is stored in a single file called metadata.db, in the top level folder. If this file gets corrupted, you may see an empty list of books in |app|. In this case you can ask |app| to restore your books by doing a right-click on the |app| icon in the toolbar and selecting Library Maintenance->Restore Library.
diff --git a/recipes/il_giornale.recipe b/recipes/il_giornale.recipe
index 007432ed88..6d3eaa5fef 100644
--- a/recipes/il_giornale.recipe
+++ b/recipes/il_giornale.recipe
@@ -7,7 +7,6 @@ description = 'Italian daily newspaper - 09-11-2011'
'''
http://www.ilgiornale.it/
'''
-from calibre.ebooks.BeautifulSoup import BeautifulSoup
from calibre.web.feeds.news import BasicNewsRecipe
class IlGiornale(BasicNewsRecipe):
@@ -25,35 +24,39 @@ class IlGiornale(BasicNewsRecipe):
oldest_article = 7
max_articles_per_feed = 100
use_embedded_content = False
+ #auto_cleanup = True
+ #auto_cleanup_keep = '//div[@id="insertbox_text"]'
no_stylesheets = True
conversion_options = {'linearize_tables':True}
remove_javascript = True
+ keep_only_tags = [dict(name='h1', attrs={'class':'entry-title'}), dict(name='div', attrs={'id':'insertbox_text'})]
- def get_article_url(self, article):
- return article.get('guid', article.get('id', None))
- def print_version(self, url):
- raw = self.browser.open(url).read()
- soup = BeautifulSoup(raw.decode('utf8', 'replace'))
- all_print_tags = soup.find('div', {'id':'print_article'})
- print_link = all_print_tags.a
- if print_link is None:
- return url
- return 'http://www.ilgiornale.it' + print_link['href']
+ #def get_article_url(self, article):
+ #return article.get('guid', article.get('id', None))
+
+ #def print_version(self, url):
+ #raw = self.browser.open(url).read()
+ #soup = BeautifulSoup(raw.decode('utf8', 'replace'))
+ #all_print_tags = soup.find('div', {'id':'print_article'})
+ #print_link = all_print_tags.a
+ #if print_link is None:
+ #return url
+ #return 'http://www.ilgiornale.it' + print_link['href']
feeds = [
- (u'Ultime Notizie',u'http://www.ilgiornale.it/?RSS=S'),
- (u'All\'Interno', u'http://www.ilgiornale.it/la_s.pic1?SID=8&RSS=S'),
- (u'Esteri', u'http://www.ilgiornale.it/la_s.pic1?SID=6&RSS=S'),
- (u'Economia', u'http://www.ilgiornale.it/la_s.pic1?SID=5&RSS=S'),
- (u'Cultura', u'http://www.ilgiornale.it/la_s.pic1?SID=4&RSS=S'),
- (u'Spettacoli', u'http://www.ilgiornale.it/la_s.pic1?SID=14&RSS=S'),
- (u'Sport', u'http://www.ilgiornale.it/la_s.pic1?SID=15&RSS=S'),
- (u'Tech&Web', u'http://www.ilgiornale.it/la_s.pic1?SID=35&RSS=S'),
- (u'Edizione di Roma', u'http://www.ilgiornale.it/roma.pic1?SID=13&RSS=S'),
- (u'Edizione di Milano', u'http://www.ilgiornale.it/milano.pic1?SID=9&RSS=S'),
- (u'Edizione di Genova', u'http://www.ilgiornale.it/genova.pic1?SID=7&RSS=S')
+ (u'Ultime Notizie',u'http://www.ilgiornale.it/rss.xml'),
+ #(u'All\'Interno', u'http://www.ilgiornale.it/la_s.pic1?SID=8&RSS=S'),
+ #(u'Esteri', u'http://www.ilgiornale.it/la_s.pic1?SID=6&RSS=S'),
+ #(u'Economia', u'http://www.ilgiornale.it/la_s.pic1?SID=5&RSS=S'),
+ #(u'Cultura', u'http://www.ilgiornale.it/la_s.pic1?SID=4&RSS=S'),
+ #(u'Spettacoli', u'http://www.ilgiornale.it/la_s.pic1?SID=14&RSS=S'),
+ #(u'Sport', u'http://www.ilgiornale.it/la_s.pic1?SID=15&RSS=S'),
+ #(u'Tech&Web', u'http://www.ilgiornale.it/la_s.pic1?SID=35&RSS=S'),
+ #(u'Edizione di Roma', u'http://www.ilgiornale.it/roma.pic1?SID=13&RSS=S'),
+ #(u'Edizione di Milano', u'http://www.ilgiornale.it/milano.pic1?SID=9&RSS=S'),
+ #(u'Edizione di Genova', u'http://www.ilgiornale.it/genova.pic1?SID=7&RSS=S')
]
diff --git a/recipes/nytimes.recipe b/recipes/nytimes.recipe
index d0f311818e..c4a4b3cee5 100644
--- a/recipes/nytimes.recipe
+++ b/recipes/nytimes.recipe
@@ -41,7 +41,7 @@ class NYTimes(BasicNewsRecipe):
# number of days old an article can be for inclusion. If oldest_web_article = None all articles
# will be included. Note: oldest_web_article is ignored if webEdition = False
webEdition = False
- oldest_web_article = 7
+ oldest_web_article = None
# download higher resolution images than the small thumbnails typically included in the article
# the down side of having large beautiful images is the file size is much larger, on the order of 7MB per paper
@@ -188,6 +188,8 @@ class NYTimes(BasicNewsRecipe):
'relatedSearchesModule',
'side_tool',
'singleAd',
+ 'postCategory column',
+ 'refer tagRefer', # added for bits blog post
'entry entry-utility', #added for DealBook
'entry-tags', #added for DealBook
'footer promos clearfix', #added for DealBook
@@ -324,6 +326,8 @@ class NYTimes(BasicNewsRecipe):
return True
if '/video/' in url:
return True
+ if '/multimedia/' in url:
+ return True
if '/slideshow/' in url:
return True
if '/magazine/index' in url:
@@ -334,6 +338,15 @@ class NYTimes(BasicNewsRecipe):
return True
if '/premium/' in url:
return True
+ if '#comment' in url:
+ return True
+ if '#postComment' in url:
+ return True
+ if '#postcomment' in url:
+ return True
+ if re.search('/\d\d\d\d/\d\d/\d\d/',url) is None:
+ print("NO DATE IN "+url)
+ return True
return False
def fixChars(self,string):
@@ -363,6 +376,7 @@ class NYTimes(BasicNewsRecipe):
cover_tag = 'NY_NYT'
def get_cover_url(self):
+ from datetime import timedelta, date
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.cover_tag+'.jpg'
br = BasicNewsRecipe.get_browser(self)
daysback=1
@@ -385,7 +399,6 @@ class NYTimes(BasicNewsRecipe):
masthead_url = 'http://graphics8.nytimes.com/images/misc/nytlogo379x64.gif'
-
def short_title(self):
return self.title
@@ -647,75 +660,53 @@ class NYTimes(BasicNewsRecipe):
soup = self.index_to_soup('http://www.nytimes.com/pages/todaysheadlines/')
- # Fetch the content table
- content_table = soup.find('table',{'id':'content'})
- if content_table is None:
- self.log("FATAL ERROR: CANNOT FIND CONTENT TABLE")
- return None
-
- # Within this table are
entries, each containing one or more h6 tags which represent sections
-
- for td_col in content_table.findAll('td', {'id' : re.compile('Column')}):
- for div_sec in td_col.findAll('div',recursive=False):
- for h6_sec_name in div_sec.findAll('h6',{'style' : re.compile('text-transform: *uppercase')}):
-
- section_name = self.tag_to_string(h6_sec_name,use_alt=False)
- section_name = re.sub(r'^ *$','',section_name)
-
- if section_name == '':
+ section_name='Unknown Section'
+ pubdate = strftime('%a, %d %b')
+ for td_col in soup.findAll('td'):
+ h6_sec_name = td_col.find('h6')
+ if h6_sec_name is not None:
+ new_section_name = self.tag_to_string(h6_sec_name,use_alt=False)
+ new_section_name = re.sub(r'^ *$','',new_section_name)
+ if new_section_name == '':
+ continue
+ section_name = new_section_name
+ continue
+ atag = td_col.find('a')
+ if atag is not None:
+ h4tag = None
+ for h4tag in atag.findNextSiblings('h4'):
+ break
+ if h4tag is None:
+ continue
+ author = self.tag_to_string(h4tag,use_alt=False)
+ try:
+ url = re.sub(r'\?.*', '', atag['href'])
+ except:
+ continue
+ if self.exclude_url(url):
+ continue
+ if '?' in url:
+ url += '&pagewanted=all'
+ else:
+ url += '?pagewanted=all'
+ if self.filterDuplicates:
+ if url in self.url_list:
continue
- if self.includeSections != []:
- if section_name not in self.includeSections:
- print "SECTION NOT INCLUDED: ",section_name
- continue
- if section_name in self.excludeSections:
- print "SECTION EXCLUDED: ",section_name
- continue
-
- section_name=string.capwords(section_name)
- section_name = section_name.replace('Op-ed','Op-Ed')
- section_name = section_name.replace('U.s.','U.S.')
- section_name = section_name.replace('N.y.','N.Y.')
- pubdate = strftime('%a, %d %b')
-
- search_div = div_sec
- for next_tag in h6_sec_name.findNextSiblings(True):
- if next_tag.__class__.__name__ == 'Tag':
- if next_tag.name == 'div':
- search_div = next_tag
- break
-
- # Get the articles
- for h3_item in search_div.findAll('h3'):
- byline = h3_item.h6
- if byline is not None:
- author = self.tag_to_string(byline,use_alt=False)
- else:
- author = ''
- a = h3_item.find('a', href=True)
- if not a:
- continue
- url = re.sub(r'\?.*', '', a['href'])
- if self.exclude_url(url):
- continue
- url += '?pagewanted=all'
- if self.filterDuplicates:
- if url in self.url_list:
- continue
- self.url_list.append(url)
- title = self.tag_to_string(a, use_alt=True).strip()
- desc = h3_item.find('p')
- if desc is not None:
- description = self.tag_to_string(desc,use_alt=False)
- else:
- description = ''
- if not self.articles.has_key(section_name):
- self.ans.append(section_name)
- self.articles[section_name] = []
- self.articles[section_name].append(dict(title=title, url=url, date=pubdate, description=description, author=author, content=''))
+ self.url_list.append(url)
+ title = self.tag_to_string(atag, use_alt=False).strip()
+ desc = atag.parent.find('p')
+ if desc is not None:
+ description = self.tag_to_string(desc,use_alt=False)
+ else:
+ description = ''
+ if not self.articles.has_key(section_name):
+ self.ans.append(section_name)
+ self.articles[section_name] = []
+ print('Title '+title+' author '+author)
+ self.articles[section_name].append(dict(title=title, url=url, date=pubdate, description=description, author=author, content=''))
self.ans = [(k, self.articles[k]) for k in self.ans if self.articles.has_key(k)]
- return self.filter_ans(self.get_tech_feeds(self.get_popular_articles(self.ans)))
+ return self.filter_ans(self.ans)
def parse_index(self):
if self.headlinesOnly:
@@ -825,8 +816,9 @@ class NYTimes(BasicNewsRecipe):
for divr in soup.findAll('div',attrs={'class':re.compile('w190 right')}):
if divr.find(text=re.compile('Sign up')):
divr.extract()
- divr = soup.find('div',attrs={'id':re.compile('related-content')})
+ divr = soup.find('div',attrs={'class':re.compile('^relatedArticlesModule')})
if divr is not None:
+ print("PROCESSING RELATED: "+self.tag_to_string(soup.title,False))
# handle related articles
rlist = []
ul = divr.find('ul')
@@ -856,6 +848,8 @@ class NYTimes(BasicNewsRecipe):
asidediv.append(Tag(soup,'hr'))
smain = soup.find('body')
smain.append(asidediv)
+ else:
+ print("CANNOT FIND RELATED: "+self.tag_to_string(soup.title,False))
for atag in soup.findAll('a'):
img = atag.find('img')
if img is not None:
@@ -898,6 +892,18 @@ class NYTimes(BasicNewsRecipe):
first_outer = outerdiv
else:
litag.extract()
+ for h6tag in rdiv.findAll('h6'):
+ if h6tag.find('a') is not None:
+ if h6tag.find('a')['href'].startswith('http://www.nytimes.com'):
+ url = re.sub(r'\?.*', '', h6tag.find('a')['href'])
+ h6tag.find('a')['href'] = url+'?pagewanted=all'
+ h6tag.extract()
+ related.append(h6tag)
+ if first_related is None:
+ first_related = rdiv
+ first_outer = outerdiv
+ else:
+ h6tag.extract()
if related != []:
for r in related:
if r.h6: # don't want the anchor inside a h6 tag
diff --git a/recipes/nytimes_sub.recipe b/recipes/nytimes_sub.recipe
index 06c476ef19..2dba2d505d 100644
--- a/recipes/nytimes_sub.recipe
+++ b/recipes/nytimes_sub.recipe
@@ -188,6 +188,8 @@ class NYTimes(BasicNewsRecipe):
'relatedSearchesModule',
'side_tool',
'singleAd',
+ 'postCategory column',
+ 'refer tagRefer', # added for bits blog post
'entry entry-utility', #added for DealBook
'entry-tags', #added for DealBook
'footer promos clearfix', #added for DealBook
@@ -324,6 +326,8 @@ class NYTimes(BasicNewsRecipe):
return True
if '/video/' in url:
return True
+ if '/multimedia/' in url:
+ return True
if '/slideshow/' in url:
return True
if '/magazine/index' in url:
@@ -334,6 +338,15 @@ class NYTimes(BasicNewsRecipe):
return True
if '/premium/' in url:
return True
+ if '#comment' in url:
+ return True
+ if '#postComment' in url:
+ return True
+ if '#postcomment' in url:
+ return True
+ if re.search('/\d\d\d\d/\d\d/\d\d/',url) is None:
+ print("NO DATE IN "+url)
+ return True
return False
def fixChars(self,string):
@@ -371,6 +384,7 @@ class NYTimes(BasicNewsRecipe):
cover_tag = 'NY_NYT'
def get_cover_url(self):
+ from datetime import timedelta, date
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.cover_tag+'.jpg'
br = BasicNewsRecipe.get_browser(self)
daysback=1
@@ -393,7 +407,6 @@ class NYTimes(BasicNewsRecipe):
masthead_url = 'http://graphics8.nytimes.com/images/misc/nytlogo379x64.gif'
-
def short_title(self):
return self.title
@@ -655,75 +668,53 @@ class NYTimes(BasicNewsRecipe):
soup = self.index_to_soup('http://www.nytimes.com/pages/todaysheadlines/')
- # Fetch the content table
- content_table = soup.find('table',{'id':'content'})
- if content_table is None:
- self.log("FATAL ERROR: CANNOT FIND CONTENT TABLE")
- return None
-
- # Within this table are | entries, each containing one or more h6 tags which represent sections
-
- for td_col in content_table.findAll('td', {'id' : re.compile('Column')}):
- for div_sec in td_col.findAll('div',recursive=False):
- for h6_sec_name in div_sec.findAll('h6',{'style' : re.compile('text-transform: *uppercase')}):
-
- section_name = self.tag_to_string(h6_sec_name,use_alt=False)
- section_name = re.sub(r'^ *$','',section_name)
-
- if section_name == '':
+ section_name='Unknown Section'
+ pubdate = strftime('%a, %d %b')
+ for td_col in soup.findAll('td'):
+ h6_sec_name = td_col.find('h6')
+ if h6_sec_name is not None:
+ new_section_name = self.tag_to_string(h6_sec_name,use_alt=False)
+ new_section_name = re.sub(r'^ *$','',new_section_name)
+ if new_section_name == '':
+ continue
+ section_name = new_section_name
+ continue
+ atag = td_col.find('a')
+ if atag is not None:
+ h4tag = None
+ for h4tag in atag.findNextSiblings('h4'):
+ break
+ if h4tag is None:
+ continue
+ author = self.tag_to_string(h4tag,use_alt=False)
+ try:
+ url = re.sub(r'\?.*', '', atag['href'])
+ except:
+ continue
+ if self.exclude_url(url):
+ continue
+ if '?' in url:
+ url += '&pagewanted=all'
+ else:
+ url += '?pagewanted=all'
+ if self.filterDuplicates:
+ if url in self.url_list:
continue
- if self.includeSections != []:
- if section_name not in self.includeSections:
- print "SECTION NOT INCLUDED: ",section_name
- continue
- if section_name in self.excludeSections:
- print "SECTION EXCLUDED: ",section_name
- continue
-
- section_name=string.capwords(section_name)
- section_name = section_name.replace('Op-ed','Op-Ed')
- section_name = section_name.replace('U.s.','U.S.')
- section_name = section_name.replace('N.y.','N.Y.')
- pubdate = strftime('%a, %d %b')
-
- search_div = div_sec
- for next_tag in h6_sec_name.findNextSiblings(True):
- if next_tag.__class__.__name__ == 'Tag':
- if next_tag.name == 'div':
- search_div = next_tag
- break
-
- # Get the articles
- for h3_item in search_div.findAll('h3'):
- byline = h3_item.h6
- if byline is not None:
- author = self.tag_to_string(byline,use_alt=False)
- else:
- author = ''
- a = h3_item.find('a', href=True)
- if not a:
- continue
- url = re.sub(r'\?.*', '', a['href'])
- if self.exclude_url(url):
- continue
- url += '?pagewanted=all'
- if self.filterDuplicates:
- if url in self.url_list:
- continue
- self.url_list.append(url)
- title = self.tag_to_string(a, use_alt=True).strip()
- desc = h3_item.find('p')
- if desc is not None:
- description = self.tag_to_string(desc,use_alt=False)
- else:
- description = ''
- if not self.articles.has_key(section_name):
- self.ans.append(section_name)
- self.articles[section_name] = []
- self.articles[section_name].append(dict(title=title, url=url, date=pubdate, description=description, author=author, content=''))
+ self.url_list.append(url)
+ title = self.tag_to_string(atag, use_alt=False).strip()
+ desc = atag.parent.find('p')
+ if desc is not None:
+ description = self.tag_to_string(desc,use_alt=False)
+ else:
+ description = ''
+ if not self.articles.has_key(section_name):
+ self.ans.append(section_name)
+ self.articles[section_name] = []
+ print('Title '+title+' author '+author)
+ self.articles[section_name].append(dict(title=title, url=url, date=pubdate, description=description, author=author, content=''))
self.ans = [(k, self.articles[k]) for k in self.ans if self.articles.has_key(k)]
- return self.filter_ans(self.get_tech_feeds(self.get_popular_articles(self.ans)))
+ return self.filter_ans(self.ans)
def parse_index(self):
if self.headlinesOnly:
@@ -833,8 +824,9 @@ class NYTimes(BasicNewsRecipe):
for divr in soup.findAll('div',attrs={'class':re.compile('w190 right')}):
if divr.find(text=re.compile('Sign up')):
divr.extract()
- divr = soup.find('div',attrs={'id':re.compile('related-content')})
+ divr = soup.find('div',attrs={'class':re.compile('^relatedArticlesModule')})
if divr is not None:
+ print("PROCESSING RELATED: "+self.tag_to_string(soup.title,False))
# handle related articles
rlist = []
ul = divr.find('ul')
@@ -864,6 +856,8 @@ class NYTimes(BasicNewsRecipe):
asidediv.append(Tag(soup,'hr'))
smain = soup.find('body')
smain.append(asidediv)
+ else:
+ print("CANNOT FIND RELATED: "+self.tag_to_string(soup.title,False))
for atag in soup.findAll('a'):
img = atag.find('img')
if img is not None:
@@ -906,6 +900,18 @@ class NYTimes(BasicNewsRecipe):
first_outer = outerdiv
else:
litag.extract()
+ for h6tag in rdiv.findAll('h6'):
+ if h6tag.find('a') is not None:
+ if h6tag.find('a')['href'].startswith('http://www.nytimes.com'):
+ url = re.sub(r'\?.*', '', h6tag.find('a')['href'])
+ h6tag.find('a')['href'] = url+'?pagewanted=all'
+ h6tag.extract()
+ related.append(h6tag)
+ if first_related is None:
+ first_related = rdiv
+ first_outer = outerdiv
+ else:
+ h6tag.extract()
if related != []:
for r in related:
if r.h6: # don't want the anchor inside a h6 tag
diff --git a/recipes/nytimesbook.recipe b/recipes/nytimesbook.recipe
index ad20586770..2d8fb69a7e 100644
--- a/recipes/nytimesbook.recipe
+++ b/recipes/nytimesbook.recipe
@@ -35,7 +35,10 @@ class NewYorkTimesBookReview(BasicNewsRecipe):
continue
if x['class'] in {'story', 'ledeStory'}:
tt = 'h3' if x['class'] == 'story' else 'h1'
- a = x.find(tt).find('a', href=True)
+ try:
+ a = x.find(tt).find('a', href=True)
+ except AttributeError:
+ continue
title = self.tag_to_string(a)
url = a['href'] + '&pagewanted=all'
self.log('\tFound article:', title, url)
diff --git a/session.vim b/session.vim
index 54c269978f..5e127428cf 100644
--- a/session.vim
+++ b/session.vim
@@ -19,6 +19,7 @@ let g:syntastic_c_include_dirs = g:syntastic_cpp_include_dirs
set wildignore+=resources/viewer/mathjax/**
set wildignore+=build/**
+set wildignore+=dist/**
fun! CalibreLog()
" Setup buffers to edit the calibre changelog and version info prior to
diff --git a/src/calibre/ebooks/metadata/sources/amazon.py b/src/calibre/ebooks/metadata/sources/amazon.py
index 9024a7f49f..a8e15a6d94 100644
--- a/src/calibre/ebooks/metadata/sources/amazon.py
+++ b/src/calibre/ebooks/metadata/sources/amazon.py
@@ -302,7 +302,7 @@ class Worker(Thread): # Get details {{{
self.log.exception('Error parsing series for url: %r'%self.url)
try:
- self.cover_url = self.parse_cover(root)
+ self.cover_url = self.parse_cover(root, raw)
except:
self.log.exception('Error parsing cover for url: %r'%self.url)
mi.has_cover = bool(self.cover_url)
@@ -450,18 +450,24 @@ class Worker(Thread): # Get details {{{
ans = (s, i)
return ans
-
- def parse_cover(self, root):
+ def parse_cover(self, root, raw=b""):
imgs = root.xpath('//img[(@id="prodImage" or @id="original-main-image" or @id="main-image") and @src]')
+ if not imgs:
+ imgs = root.xpath('//div[@class="main-image-inner-wrapper"]/img[@src]')
if imgs:
src = imgs[0].get('src')
- if '/no-image-avail' not in src:
+ if 'loading-' in src:
+ js_img = re.search(br'"largeImage":"(http://[^"]+)",',raw)
+ if js_img:
+ src = js_img.group(1).decode('utf-8')
+ if ('/no-image-avail' not in src and 'loading-' not in src and '/no-img-sm' not in src):
+ self.log('Found image: %s' % src)
parts = src.split('/')
if len(parts) > 3:
bn = parts[-1]
sparts = bn.split('_')
if len(sparts) > 2:
- bn = sparts[0] + sparts[-1]
+ bn = re.sub(r'\.\.jpg$', '.jpg', (sparts[0] + sparts[-1]))
return ('/'.join(parts[:-1]))+'/'+bn
def parse_isbn(self, pd):
diff --git a/src/calibre/gui2/cover_flow.py b/src/calibre/gui2/cover_flow.py
index 8a7beb811c..81cdf9b90d 100644
--- a/src/calibre/gui2/cover_flow.py
+++ b/src/calibre/gui2/cover_flow.py
@@ -54,6 +54,27 @@ if pictureflow is not None:
def currentChanged(self, index):
print 'current changed:', index
+ class DummyImageList(pictureflow.FlowImages):
+
+ def __init__(self):
+ pictureflow.FlowImages.__init__(self)
+ self.num = 40000
+ i1, i2 = QImage(300, 400, QImage.Format_RGB32), QImage(300, 400, QImage.Format_RGB32)
+ i1.fill(Qt.green), i2.fill(Qt.blue)
+ self.images = [i1, i2]
+
+ def count(self):
+ return self.num
+
+ def image(self, index):
+ return self.images[index%2]
+
+ def caption(self, index):
+ return 'Number: %d'%index
+
+ def subtitle(self, index):
+ return ''
+
class DatabaseImages(pictureflow.FlowImages):
def __init__(self, model, buffer=20):
@@ -328,6 +349,21 @@ class CoverFlowMixin(object):
def sync_listview_to_cf(self, row):
self.cf_last_updated_at = time.time()
+def test():
+ from PyQt4.QtGui import QApplication, QMainWindow
+ app = QApplication([])
+ w = QMainWindow()
+ cf = CoverFlow()
+ cf.resize(int(available_width()/1.5), available_height()-60)
+ w.resize(cf.size()+QSize(30, 20))
+ model = DummyImageList()
+ cf.setImages(model)
+ cf.setCurrentSlide(39000)
+ w.setCentralWidget(cf)
+
+ w.show()
+ cf.setFocus(Qt.OtherFocusReason)
+ sys.exit(app.exec_())
def main(args=sys.argv):
return 0
diff --git a/src/calibre/gui2/init.py b/src/calibre/gui2/init.py
index 8c88393d45..eff36e865b 100644
--- a/src/calibre/gui2/init.py
+++ b/src/calibre/gui2/init.py
@@ -94,6 +94,9 @@ class LibraryViewMixin(object): # {{{
v = self.current_view()
if hasattr(v, 'set_current_row'):
v.set_current_row(0)
+ if v is self.library_view and v.row_count() == 0:
+ self.book_details.reset_info()
+
# }}}
diff --git a/src/calibre/gui2/main.py b/src/calibre/gui2/main.py
index d43e618e9a..f35a9ca083 100644
--- a/src/calibre/gui2/main.py
+++ b/src/calibre/gui2/main.py
@@ -87,10 +87,11 @@ def init_qt(args):
opts, args = parser.parse_args(args)
find_portable_library()
if opts.with_library is not None:
- if not os.path.exists(opts.with_library):
- os.makedirs(opts.with_library)
- if os.path.isdir(opts.with_library):
- prefs.set('library_path', os.path.abspath(opts.with_library))
+ libpath = os.path.expanduser(opts.with_library)
+ if not os.path.exists(libpath):
+ os.makedirs(libpath)
+ if os.path.isdir(libpath):
+ prefs.set('library_path', os.path.abspath(libpath))
prints('Using library at', prefs['library_path'])
QCoreApplication.setOrganizationName(ORG_NAME)
QCoreApplication.setApplicationName(APP_UID)
diff --git a/src/calibre/gui2/pictureflow/pictureflow.cpp b/src/calibre/gui2/pictureflow/pictureflow.cpp
index e26309f4cf..173a080301 100644
--- a/src/calibre/gui2/pictureflow/pictureflow.cpp
+++ b/src/calibre/gui2/pictureflow/pictureflow.cpp
@@ -398,7 +398,7 @@ private:
QCache surfaceCache;
QTimer triggerTimer;
- int slideFrame;
+ long long slideFrame;
int step;
int target;
int fade;
@@ -493,7 +493,7 @@ void PictureFlowPrivate::setCurrentSlide(int index)
step = 0;
centerIndex = qBound(index, 0, slideImages->count()-1);
target = centerIndex;
- slideFrame = index << 16;
+ slideFrame = ((long long)index) << 16;
resetSlides();
triggerRender();
widget->emitcurrentChanged(centerIndex);
@@ -1069,7 +1069,7 @@ void PictureFlowPrivate::updateAnimation()
const int max = 2 * 65536;
int fi = slideFrame;
- fi -= (target << 16);
+ fi -= (target << 16);
if(fi < 0)
fi = -fi;
fi = qMin(fi, max);
@@ -1094,7 +1094,7 @@ void PictureFlowPrivate::updateAnimation()
if(centerIndex != index)
{
centerIndex = index;
- slideFrame = index << 16;
+ slideFrame = ((long long)index) << 16;
centerSlide.slideIndex = centerIndex;
for(int i = 0; i < leftSlides.count(); i++)
leftSlides[i].slideIndex = centerIndex-1-i;
diff --git a/src/calibre/gui2/preferences/coloring.py b/src/calibre/gui2/preferences/coloring.py
index 1dce44b865..8d27d14e5b 100644
--- a/src/calibre/gui2/preferences/coloring.py
+++ b/src/calibre/gui2/preferences/coloring.py
@@ -763,22 +763,24 @@ class EditRules(QWidget): # {{{
' double clicking it.'))
self.add_advanced_button.setVisible(False)
- def _add_rule(self, dlg):
- if dlg.exec_() == dlg.Accepted:
- kind, col, r = dlg.rule
+ def add_rule(self):
+ d = RuleEditor(self.model.fm, self.pref_name)
+ d.add_blank_condition()
+ if d.exec_() == d.Accepted:
+ kind, col, r = d.rule
if kind and r and col:
idx = self.model.add_rule(kind, col, r)
self.rules_view.scrollTo(idx)
self.changed.emit()
- def add_rule(self):
- d = RuleEditor(self.model.fm, self.pref_name)
- d.add_blank_condition()
- self._add_rule(d)
-
def add_advanced(self):
td = TemplateDialog(self, '', mi=self.mi, fm=self.fm, color_field='')
- self._add_rule(('color', td[0], td[1]))
+ if td.exec_() == td.Accepted:
+ col, r = td.rule
+ if r and col:
+ idx = self.model.add_rule('color', col, r)
+ self.rules_view.scrollTo(idx)
+ self.changed.emit()
def edit_rule(self, index):
try:
diff --git a/src/calibre/gui2/toc/main.py b/src/calibre/gui2/toc/main.py
index a79dbc4e40..63aad654ad 100644
--- a/src/calibre/gui2/toc/main.py
+++ b/src/calibre/gui2/toc/main.py
@@ -30,6 +30,7 @@ class ItemView(QFrame): # {{{
add_new_item = pyqtSignal(object, object)
delete_item = pyqtSignal()
flatten_item = pyqtSignal()
+ go_to_root = pyqtSignal()
def __init__(self, parent):
QFrame.__init__(self, parent)
@@ -132,6 +133,11 @@ class ItemView(QFrame): # {{{
b.setToolTip(_('All children of this entry are brought to the same '
'level as this entry.'))
l.addWidget(b, l.rowCount()+1, 0, 1, 2)
+ ip.b4 = b = QPushButton(QIcon(I('back.png')), _('&Return to root'))
+ b.clicked.connect(self.go_to_root)
+ b.setToolTip(_('Go back to the top level view'))
+ l.addWidget(b, l.rowCount()+1, 0, 1, 2)
+
l.setRowMinimumHeight(rs, 20)
l.addWidget(QLabel(), l.rowCount(), 0, 1, 2)
@@ -237,6 +243,7 @@ class TOCView(QWidget): # {{{
self.item_view.delete_item.connect(self.delete_current_item)
i.add_new_item.connect(self.add_new_item)
i.flatten_item.connect(self.flatten_item)
+ i.go_to_root.connect(self.go_to_root)
l.addWidget(i, 0, 4, col, 1)
l.setColumnStretch(2, 10)
@@ -271,6 +278,9 @@ class TOCView(QWidget): # {{{
item.removeChild(child)
p.insertChild(idx+1, child)
+ def go_to_root(self):
+ self.tocw.setCurrentItem(None)
+
def highlight_item(self, item):
self.tocw.setCurrentItem(item, 0, QItemSelectionModel.ClearAndSelect)
self.tocw.scrollToItem(item)
diff --git a/src/calibre/web/feeds/__init__.py b/src/calibre/web/feeds/__init__.py
index 43b404367c..d819aade7a 100644
--- a/src/calibre/web/feeds/__init__.py
+++ b/src/calibre/web/feeds/__init__.py
@@ -184,7 +184,12 @@ class Feed(object):
id = 'internal id#%s'%self.id_counter
if id in self.added_articles:
return
- published = item.get('date_parsed', time.gmtime())
+ published = None
+ for date_field in ('date_parsed', 'published_parsed',
+ 'updated_parsed'):
+ published = item.get(date_field, None)
+ if published is not None:
+ break
if not published:
published = time.gmtime()
self.added_articles.append(id)
diff --git a/src/calibre/web/feeds/news.py b/src/calibre/web/feeds/news.py
index 5bf09d8a3b..e9348f6ae7 100644
--- a/src/calibre/web/feeds/news.py
+++ b/src/calibre/web/feeds/news.py
@@ -338,6 +338,41 @@ class BasicNewsRecipe(Recipe):
#: :meth:`javascript_login` method, to do the actual logging in.
use_javascript_to_login = False
+ # The following parameters control how the recipe attempts to minimize
+ # jpeg image sizes
+
+ #: Set this to False to ignore all scaling and compression parameters and
+ #: pass images through unmodified. If True and the other compression
+ #: parameters are left at their default values, jpeg images will be scaled to fit
+ #: in the screen dimensions set by the output profile and compressed to size at
+ #: most (w * h)/16 where w x h are the scaled image dimensions.
+ compress_news_images = False
+
+ #: The factor used when auto compressing jpeg images. If set to None,
+ #: auto compression is disabled. Otherwise, the images will be reduced in size to
+ #: (w * h)/compress_news_images_auto_size bytes if possible by reducing
+ #: the quality level, where w x h are the image dimensions in pixels.
+ #: The minimum jpeg quality will be 5/100 so it is possible this constraint
+ #: will not be met. This parameter can be overridden by the parameter
+ #: compress_news_images_max_size which provides a fixed maximum size for images.
+ compress_news_images_auto_size = 16
+
+ #: Set jpeg quality so images do not exceed the size given (in KBytes).
+ #: If set, this parameter overrides auto compression via compress_news_images_auto_size.
+ #: The minimum jpeg quality will be 5/100 so it is possible this constraint
+ #: will not be met.
+ compress_news_images_max_size = None
+
+ #: Rescale images to fit in the device screen dimensions set by the output profile.
+ #: Ignored if no output profile is set.
+ scale_news_images_to_device = True
+
+ #: Maximum dimensions (w,h) to scale images to. If scale_news_images_to_device is True
+ #: this is set to the device screen dimensions set by the output profile unless
+ #: there is no profile set, in which case it is left at whatever value it has been
+ #: assigned (default None).
+ scale_news_images = None
+
# See the built-in profiles for examples of these settings.
def short_title(self):
@@ -849,11 +884,19 @@ class BasicNewsRecipe(Recipe):
for reg in self.filter_regexps:
web2disk_cmdline.extend(['--filter-regexp', reg])
+ if options.output_profile.short_name == 'default':
+ self.scale_news_images_to_device = False
+ elif self.scale_news_images_to_device:
+ self.scale_news_images = options.output_profile.screen_size
+
self.web2disk_options = web2disk_option_parser().parse_args(web2disk_cmdline)[0]
for extra in ('keep_only_tags', 'remove_tags', 'preprocess_regexps',
'skip_ad_pages', 'preprocess_html', 'remove_tags_after',
- 'remove_tags_before', 'is_link_wanted'):
+ 'remove_tags_before', 'is_link_wanted',
+ 'compress_news_images', 'compress_news_images_max_size',
+ 'compress_news_images_auto_size', 'scale_news_images'):
setattr(self.web2disk_options, extra, getattr(self, extra))
+
self.web2disk_options.postprocess_html = self._postprocess_html
self.web2disk_options.encoding = self.encoding
self.web2disk_options.preprocess_raw_html = self.preprocess_raw_html_
diff --git a/src/calibre/web/fetch/simple.py b/src/calibre/web/fetch/simple.py
index e7ad119dae..7cc8bd9309 100644
--- a/src/calibre/web/fetch/simple.py
+++ b/src/calibre/web/fetch/simple.py
@@ -12,7 +12,7 @@ from urllib import url2pathname, quote
from httplib import responses
from base64 import b64decode
-from calibre import browser, relpath, unicode_path
+from calibre import browser, relpath, unicode_path, fit_image
from calibre.constants import filesystem_encoding, iswindows
from calibre.utils.filenames import ascii_filename
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
@@ -20,7 +20,7 @@ from calibre.ebooks.chardet import xml_to_unicode
from calibre.utils.config import OptionParser
from calibre.utils.logging import Log
from calibre.utils.magick import Image
-from calibre.utils.magick.draw import identify_data
+from calibre.utils.magick.draw import identify_data, thumbnail
class FetchError(Exception):
pass
@@ -142,6 +142,10 @@ class RecursiveFetcher(object):
self.postprocess_html_ext= getattr(options, 'postprocess_html', None)
self._is_link_wanted = getattr(options, 'is_link_wanted',
default_is_link_wanted)
+ self.compress_news_images_max_size = getattr(options, 'compress_news_images_max_size', None)
+ self.compress_news_images = getattr(options, 'compress_news_images', False)
+ self.compress_news_images_auto_size = getattr(options, 'compress_news_images_auto_size', 16)
+ self.scale_news_images = getattr(options, 'scale_news_images', None)
self.download_stylesheets = not options.no_stylesheets
self.show_progress = True
self.failed_links = []
@@ -338,7 +342,42 @@ class RecursiveFetcher(object):
x.write(data)
ns.replaceWith(src.replace(m.group(1), stylepath))
+ def rescale_image(self, data):
+ orig_w, orig_h, ifmt = identify_data(data)
+ orig_data = data # save it in case compression fails
+ if self.scale_news_images is not None:
+ wmax, hmax = self.scale_news_images
+ scale, new_w, new_h = fit_image(orig_w, orig_h, wmax, hmax)
+ if scale:
+ data = thumbnail(data, new_w, new_h, compression_quality=95)[-1]
+ orig_w = new_w
+ orig_h = new_h
+ if self.compress_news_images_max_size is None:
+ if self.compress_news_images_auto_size is None: # not compressing
+ return data
+ else:
+ maxsizeb = (orig_w * orig_h)/self.compress_news_images_auto_size
+ else:
+ maxsizeb = self.compress_news_images_max_size * 1024
+ scaled_data = data # save it in case compression fails
+ if len(scaled_data) <= maxsizeb: # no compression required
+ return scaled_data
+ img = Image()
+ quality = 95
+ img.load(data)
+ while len(data) >= maxsizeb and quality >= 5:
+ quality -= 5
+ img.set_compression_quality(quality)
+ data = img.export('jpg')
+
+ if len(data) >= len(scaled_data): # compression failed
+ return orig_data if len(orig_data) <= len(scaled_data) else scaled_data
+
+ if len(data) >= len(orig_data): # no improvement
+ return orig_data
+
+ return data
def process_images(self, soup, baseurl):
diskpath = unicode_path(os.path.join(self.current_dir, 'images'))
@@ -390,6 +429,12 @@ class RecursiveFetcher(object):
im = Image()
im.load(data)
data = im.export(itype)
+ if self.compress_news_images and itype in {'jpg','jpeg'}:
+ try:
+ data = self.rescale_image(data)
+ except:
+ self.log.exception('failed to compress image '+iurl)
+ identify_data(data)
else:
identify_data(data)
imgpath = os.path.join(diskpath, fname+'.'+itype)
|