mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-07 18:24:30 -04:00
Merge from trunk.
This commit is contained in:
commit
c5bc937e6e
@ -579,9 +579,23 @@ Yes, you can. Follow the instructions in the answer above for adding custom colu
|
|||||||
|
|
||||||
How do I move my |app| library from one computer to another?
|
How do I move my |app| library from one computer to another?
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
Simply copy the |app| library folder from the old to the new computer. You can find out what the library folder is by clicking the calibre icon in the toolbar. The very first item is the path to the library folder. Now on the new computer, start |app| for the first time. It will run the Welcome Wizard asking you for the location of the |app| library. Point it to the previously copied folder. If the computer you are transferring to already has a calibre installation, then the Welcome wizard wont run. In that case, right-click the |app| icon in the tooolbar and point it to the newly copied directory. You will now have two calibre libraries on your computer and you can switch between them by clicking the |app| icon on the toolbar. Transferring your library in this manner preserver all your metadata, tags, custom columns, etc.
|
Simply copy the |app| library folder from the old to the new computer. You can
|
||||||
|
find out what the library folder is by clicking the calibre icon in the
|
||||||
|
toolbar. The very first item is the path to the library folder. Now on the new
|
||||||
|
computer, start |app| for the first time. It will run the Welcome Wizard asking
|
||||||
|
you for the location of the |app| library. Point it to the previously copied
|
||||||
|
folder. If the computer you are transferring to already has a calibre
|
||||||
|
installation, then the Welcome wizard wont run. In that case, right-click the
|
||||||
|
|app| icon in the tooolbar and point it to the newly copied directory. You will
|
||||||
|
now have two |app| libraries on your computer and you can switch between them
|
||||||
|
by clicking the |app| icon on the toolbar. Transferring your library in this
|
||||||
|
manner preserver all your metadata, tags, custom columns, etc.
|
||||||
|
|
||||||
Note that if you are transferring between different types of computers (for example Windows to OS X) then after doing the above you should also right-click the |app| icon on the tool bar, select Library Maintenance and run the Check Library action. It will warn you about any problems in your library, which you should fix by hand.
|
Note that if you are transferring between different types of computers (for
|
||||||
|
example Windows to OS X) then after doing the above you should also right-click
|
||||||
|
the |app| icon on the tool bar, select Library Maintenance and run the Check
|
||||||
|
Library action. It will warn you about any problems in your library, which you
|
||||||
|
should fix by hand.
|
||||||
|
|
||||||
.. note:: A |app| library is just a folder which contains all the book files and their metadata. All the metadata is stored in a single file called metadata.db, in the top level folder. If this file gets corrupted, you may see an empty list of books in |app|. In this case you can ask |app| to restore your books by doing a right-click on the |app| icon in the toolbar and selecting Library Maintenance->Restore Library.
|
.. note:: A |app| library is just a folder which contains all the book files and their metadata. All the metadata is stored in a single file called metadata.db, in the top level folder. If this file gets corrupted, you may see an empty list of books in |app|. In this case you can ask |app| to restore your books by doing a right-click on the |app| icon in the toolbar and selecting Library Maintenance->Restore Library.
|
||||||
|
|
||||||
|
@ -7,7 +7,6 @@ description = 'Italian daily newspaper - 09-11-2011'
|
|||||||
'''
|
'''
|
||||||
http://www.ilgiornale.it/
|
http://www.ilgiornale.it/
|
||||||
'''
|
'''
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class IlGiornale(BasicNewsRecipe):
|
class IlGiornale(BasicNewsRecipe):
|
||||||
@ -25,35 +24,39 @@ class IlGiornale(BasicNewsRecipe):
|
|||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
|
#auto_cleanup = True
|
||||||
|
#auto_cleanup_keep = '//div[@id="insertbox_text"]'
|
||||||
|
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
conversion_options = {'linearize_tables':True}
|
conversion_options = {'linearize_tables':True}
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
|
|
||||||
|
keep_only_tags = [dict(name='h1', attrs={'class':'entry-title'}), dict(name='div', attrs={'id':'insertbox_text'})]
|
||||||
|
|
||||||
def get_article_url(self, article):
|
|
||||||
return article.get('guid', article.get('id', None))
|
|
||||||
|
|
||||||
def print_version(self, url):
|
#def get_article_url(self, article):
|
||||||
raw = self.browser.open(url).read()
|
#return article.get('guid', article.get('id', None))
|
||||||
soup = BeautifulSoup(raw.decode('utf8', 'replace'))
|
|
||||||
all_print_tags = soup.find('div', {'id':'print_article'})
|
#def print_version(self, url):
|
||||||
print_link = all_print_tags.a
|
#raw = self.browser.open(url).read()
|
||||||
if print_link is None:
|
#soup = BeautifulSoup(raw.decode('utf8', 'replace'))
|
||||||
return url
|
#all_print_tags = soup.find('div', {'id':'print_article'})
|
||||||
return 'http://www.ilgiornale.it' + print_link['href']
|
#print_link = all_print_tags.a
|
||||||
|
#if print_link is None:
|
||||||
|
#return url
|
||||||
|
#return 'http://www.ilgiornale.it' + print_link['href']
|
||||||
|
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Ultime Notizie',u'http://www.ilgiornale.it/?RSS=S'),
|
(u'Ultime Notizie',u'http://www.ilgiornale.it/rss.xml'),
|
||||||
(u'All\'Interno', u'http://www.ilgiornale.it/la_s.pic1?SID=8&RSS=S'),
|
#(u'All\'Interno', u'http://www.ilgiornale.it/la_s.pic1?SID=8&RSS=S'),
|
||||||
(u'Esteri', u'http://www.ilgiornale.it/la_s.pic1?SID=6&RSS=S'),
|
#(u'Esteri', u'http://www.ilgiornale.it/la_s.pic1?SID=6&RSS=S'),
|
||||||
(u'Economia', u'http://www.ilgiornale.it/la_s.pic1?SID=5&RSS=S'),
|
#(u'Economia', u'http://www.ilgiornale.it/la_s.pic1?SID=5&RSS=S'),
|
||||||
(u'Cultura', u'http://www.ilgiornale.it/la_s.pic1?SID=4&RSS=S'),
|
#(u'Cultura', u'http://www.ilgiornale.it/la_s.pic1?SID=4&RSS=S'),
|
||||||
(u'Spettacoli', u'http://www.ilgiornale.it/la_s.pic1?SID=14&RSS=S'),
|
#(u'Spettacoli', u'http://www.ilgiornale.it/la_s.pic1?SID=14&RSS=S'),
|
||||||
(u'Sport', u'http://www.ilgiornale.it/la_s.pic1?SID=15&RSS=S'),
|
#(u'Sport', u'http://www.ilgiornale.it/la_s.pic1?SID=15&RSS=S'),
|
||||||
(u'Tech&Web', u'http://www.ilgiornale.it/la_s.pic1?SID=35&RSS=S'),
|
#(u'Tech&Web', u'http://www.ilgiornale.it/la_s.pic1?SID=35&RSS=S'),
|
||||||
(u'Edizione di Roma', u'http://www.ilgiornale.it/roma.pic1?SID=13&RSS=S'),
|
#(u'Edizione di Roma', u'http://www.ilgiornale.it/roma.pic1?SID=13&RSS=S'),
|
||||||
(u'Edizione di Milano', u'http://www.ilgiornale.it/milano.pic1?SID=9&RSS=S'),
|
#(u'Edizione di Milano', u'http://www.ilgiornale.it/milano.pic1?SID=9&RSS=S'),
|
||||||
(u'Edizione di Genova', u'http://www.ilgiornale.it/genova.pic1?SID=7&RSS=S')
|
#(u'Edizione di Genova', u'http://www.ilgiornale.it/genova.pic1?SID=7&RSS=S')
|
||||||
]
|
]
|
||||||
|
@ -41,7 +41,7 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
# number of days old an article can be for inclusion. If oldest_web_article = None all articles
|
# number of days old an article can be for inclusion. If oldest_web_article = None all articles
|
||||||
# will be included. Note: oldest_web_article is ignored if webEdition = False
|
# will be included. Note: oldest_web_article is ignored if webEdition = False
|
||||||
webEdition = False
|
webEdition = False
|
||||||
oldest_web_article = 7
|
oldest_web_article = None
|
||||||
|
|
||||||
# download higher resolution images than the small thumbnails typically included in the article
|
# download higher resolution images than the small thumbnails typically included in the article
|
||||||
# the down side of having large beautiful images is the file size is much larger, on the order of 7MB per paper
|
# the down side of having large beautiful images is the file size is much larger, on the order of 7MB per paper
|
||||||
@ -188,6 +188,8 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
'relatedSearchesModule',
|
'relatedSearchesModule',
|
||||||
'side_tool',
|
'side_tool',
|
||||||
'singleAd',
|
'singleAd',
|
||||||
|
'postCategory column',
|
||||||
|
'refer tagRefer', # added for bits blog post
|
||||||
'entry entry-utility', #added for DealBook
|
'entry entry-utility', #added for DealBook
|
||||||
'entry-tags', #added for DealBook
|
'entry-tags', #added for DealBook
|
||||||
'footer promos clearfix', #added for DealBook
|
'footer promos clearfix', #added for DealBook
|
||||||
@ -324,6 +326,8 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
return True
|
return True
|
||||||
if '/video/' in url:
|
if '/video/' in url:
|
||||||
return True
|
return True
|
||||||
|
if '/multimedia/' in url:
|
||||||
|
return True
|
||||||
if '/slideshow/' in url:
|
if '/slideshow/' in url:
|
||||||
return True
|
return True
|
||||||
if '/magazine/index' in url:
|
if '/magazine/index' in url:
|
||||||
@ -334,6 +338,15 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
return True
|
return True
|
||||||
if '/premium/' in url:
|
if '/premium/' in url:
|
||||||
return True
|
return True
|
||||||
|
if '#comment' in url:
|
||||||
|
return True
|
||||||
|
if '#postComment' in url:
|
||||||
|
return True
|
||||||
|
if '#postcomment' in url:
|
||||||
|
return True
|
||||||
|
if re.search('/\d\d\d\d/\d\d/\d\d/',url) is None:
|
||||||
|
print("NO DATE IN "+url)
|
||||||
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def fixChars(self,string):
|
def fixChars(self,string):
|
||||||
@ -363,6 +376,7 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
|
|
||||||
cover_tag = 'NY_NYT'
|
cover_tag = 'NY_NYT'
|
||||||
def get_cover_url(self):
|
def get_cover_url(self):
|
||||||
|
from datetime import timedelta, date
|
||||||
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.cover_tag+'.jpg'
|
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.cover_tag+'.jpg'
|
||||||
br = BasicNewsRecipe.get_browser(self)
|
br = BasicNewsRecipe.get_browser(self)
|
||||||
daysback=1
|
daysback=1
|
||||||
@ -385,7 +399,6 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
|
|
||||||
masthead_url = 'http://graphics8.nytimes.com/images/misc/nytlogo379x64.gif'
|
masthead_url = 'http://graphics8.nytimes.com/images/misc/nytlogo379x64.gif'
|
||||||
|
|
||||||
|
|
||||||
def short_title(self):
|
def short_title(self):
|
||||||
return self.title
|
return self.title
|
||||||
|
|
||||||
@ -647,75 +660,53 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
|
|
||||||
soup = self.index_to_soup('http://www.nytimes.com/pages/todaysheadlines/')
|
soup = self.index_to_soup('http://www.nytimes.com/pages/todaysheadlines/')
|
||||||
|
|
||||||
# Fetch the content table
|
section_name='Unknown Section'
|
||||||
content_table = soup.find('table',{'id':'content'})
|
pubdate = strftime('%a, %d %b')
|
||||||
if content_table is None:
|
for td_col in soup.findAll('td'):
|
||||||
self.log("FATAL ERROR: CANNOT FIND CONTENT TABLE")
|
h6_sec_name = td_col.find('h6')
|
||||||
return None
|
if h6_sec_name is not None:
|
||||||
|
new_section_name = self.tag_to_string(h6_sec_name,use_alt=False)
|
||||||
# Within this table are <td id=".*Column.*"> entries, each containing one or more h6 tags which represent sections
|
new_section_name = re.sub(r'^ *$','',new_section_name)
|
||||||
|
if new_section_name == '':
|
||||||
for td_col in content_table.findAll('td', {'id' : re.compile('Column')}):
|
continue
|
||||||
for div_sec in td_col.findAll('div',recursive=False):
|
section_name = new_section_name
|
||||||
for h6_sec_name in div_sec.findAll('h6',{'style' : re.compile('text-transform: *uppercase')}):
|
continue
|
||||||
|
atag = td_col.find('a')
|
||||||
section_name = self.tag_to_string(h6_sec_name,use_alt=False)
|
if atag is not None:
|
||||||
section_name = re.sub(r'^ *$','',section_name)
|
h4tag = None
|
||||||
|
for h4tag in atag.findNextSiblings('h4'):
|
||||||
if section_name == '':
|
break
|
||||||
|
if h4tag is None:
|
||||||
|
continue
|
||||||
|
author = self.tag_to_string(h4tag,use_alt=False)
|
||||||
|
try:
|
||||||
|
url = re.sub(r'\?.*', '', atag['href'])
|
||||||
|
except:
|
||||||
|
continue
|
||||||
|
if self.exclude_url(url):
|
||||||
|
continue
|
||||||
|
if '?' in url:
|
||||||
|
url += '&pagewanted=all'
|
||||||
|
else:
|
||||||
|
url += '?pagewanted=all'
|
||||||
|
if self.filterDuplicates:
|
||||||
|
if url in self.url_list:
|
||||||
continue
|
continue
|
||||||
if self.includeSections != []:
|
self.url_list.append(url)
|
||||||
if section_name not in self.includeSections:
|
title = self.tag_to_string(atag, use_alt=False).strip()
|
||||||
print "SECTION NOT INCLUDED: ",section_name
|
desc = atag.parent.find('p')
|
||||||
continue
|
if desc is not None:
|
||||||
if section_name in self.excludeSections:
|
description = self.tag_to_string(desc,use_alt=False)
|
||||||
print "SECTION EXCLUDED: ",section_name
|
else:
|
||||||
continue
|
description = ''
|
||||||
|
if not self.articles.has_key(section_name):
|
||||||
section_name=string.capwords(section_name)
|
self.ans.append(section_name)
|
||||||
section_name = section_name.replace('Op-ed','Op-Ed')
|
self.articles[section_name] = []
|
||||||
section_name = section_name.replace('U.s.','U.S.')
|
print('Title '+title+' author '+author)
|
||||||
section_name = section_name.replace('N.y.','N.Y.')
|
self.articles[section_name].append(dict(title=title, url=url, date=pubdate, description=description, author=author, content=''))
|
||||||
pubdate = strftime('%a, %d %b')
|
|
||||||
|
|
||||||
search_div = div_sec
|
|
||||||
for next_tag in h6_sec_name.findNextSiblings(True):
|
|
||||||
if next_tag.__class__.__name__ == 'Tag':
|
|
||||||
if next_tag.name == 'div':
|
|
||||||
search_div = next_tag
|
|
||||||
break
|
|
||||||
|
|
||||||
# Get the articles
|
|
||||||
for h3_item in search_div.findAll('h3'):
|
|
||||||
byline = h3_item.h6
|
|
||||||
if byline is not None:
|
|
||||||
author = self.tag_to_string(byline,use_alt=False)
|
|
||||||
else:
|
|
||||||
author = ''
|
|
||||||
a = h3_item.find('a', href=True)
|
|
||||||
if not a:
|
|
||||||
continue
|
|
||||||
url = re.sub(r'\?.*', '', a['href'])
|
|
||||||
if self.exclude_url(url):
|
|
||||||
continue
|
|
||||||
url += '?pagewanted=all'
|
|
||||||
if self.filterDuplicates:
|
|
||||||
if url in self.url_list:
|
|
||||||
continue
|
|
||||||
self.url_list.append(url)
|
|
||||||
title = self.tag_to_string(a, use_alt=True).strip()
|
|
||||||
desc = h3_item.find('p')
|
|
||||||
if desc is not None:
|
|
||||||
description = self.tag_to_string(desc,use_alt=False)
|
|
||||||
else:
|
|
||||||
description = ''
|
|
||||||
if not self.articles.has_key(section_name):
|
|
||||||
self.ans.append(section_name)
|
|
||||||
self.articles[section_name] = []
|
|
||||||
self.articles[section_name].append(dict(title=title, url=url, date=pubdate, description=description, author=author, content=''))
|
|
||||||
|
|
||||||
self.ans = [(k, self.articles[k]) for k in self.ans if self.articles.has_key(k)]
|
self.ans = [(k, self.articles[k]) for k in self.ans if self.articles.has_key(k)]
|
||||||
return self.filter_ans(self.get_tech_feeds(self.get_popular_articles(self.ans)))
|
return self.filter_ans(self.ans)
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
if self.headlinesOnly:
|
if self.headlinesOnly:
|
||||||
@ -825,8 +816,9 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
for divr in soup.findAll('div',attrs={'class':re.compile('w190 right')}):
|
for divr in soup.findAll('div',attrs={'class':re.compile('w190 right')}):
|
||||||
if divr.find(text=re.compile('Sign up')):
|
if divr.find(text=re.compile('Sign up')):
|
||||||
divr.extract()
|
divr.extract()
|
||||||
divr = soup.find('div',attrs={'id':re.compile('related-content')})
|
divr = soup.find('div',attrs={'class':re.compile('^relatedArticlesModule')})
|
||||||
if divr is not None:
|
if divr is not None:
|
||||||
|
print("PROCESSING RELATED: "+self.tag_to_string(soup.title,False))
|
||||||
# handle related articles
|
# handle related articles
|
||||||
rlist = []
|
rlist = []
|
||||||
ul = divr.find('ul')
|
ul = divr.find('ul')
|
||||||
@ -856,6 +848,8 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
asidediv.append(Tag(soup,'hr'))
|
asidediv.append(Tag(soup,'hr'))
|
||||||
smain = soup.find('body')
|
smain = soup.find('body')
|
||||||
smain.append(asidediv)
|
smain.append(asidediv)
|
||||||
|
else:
|
||||||
|
print("CANNOT FIND RELATED: "+self.tag_to_string(soup.title,False))
|
||||||
for atag in soup.findAll('a'):
|
for atag in soup.findAll('a'):
|
||||||
img = atag.find('img')
|
img = atag.find('img')
|
||||||
if img is not None:
|
if img is not None:
|
||||||
@ -898,6 +892,18 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
first_outer = outerdiv
|
first_outer = outerdiv
|
||||||
else:
|
else:
|
||||||
litag.extract()
|
litag.extract()
|
||||||
|
for h6tag in rdiv.findAll('h6'):
|
||||||
|
if h6tag.find('a') is not None:
|
||||||
|
if h6tag.find('a')['href'].startswith('http://www.nytimes.com'):
|
||||||
|
url = re.sub(r'\?.*', '', h6tag.find('a')['href'])
|
||||||
|
h6tag.find('a')['href'] = url+'?pagewanted=all'
|
||||||
|
h6tag.extract()
|
||||||
|
related.append(h6tag)
|
||||||
|
if first_related is None:
|
||||||
|
first_related = rdiv
|
||||||
|
first_outer = outerdiv
|
||||||
|
else:
|
||||||
|
h6tag.extract()
|
||||||
if related != []:
|
if related != []:
|
||||||
for r in related:
|
for r in related:
|
||||||
if r.h6: # don't want the anchor inside a h6 tag
|
if r.h6: # don't want the anchor inside a h6 tag
|
||||||
|
@ -188,6 +188,8 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
'relatedSearchesModule',
|
'relatedSearchesModule',
|
||||||
'side_tool',
|
'side_tool',
|
||||||
'singleAd',
|
'singleAd',
|
||||||
|
'postCategory column',
|
||||||
|
'refer tagRefer', # added for bits blog post
|
||||||
'entry entry-utility', #added for DealBook
|
'entry entry-utility', #added for DealBook
|
||||||
'entry-tags', #added for DealBook
|
'entry-tags', #added for DealBook
|
||||||
'footer promos clearfix', #added for DealBook
|
'footer promos clearfix', #added for DealBook
|
||||||
@ -324,6 +326,8 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
return True
|
return True
|
||||||
if '/video/' in url:
|
if '/video/' in url:
|
||||||
return True
|
return True
|
||||||
|
if '/multimedia/' in url:
|
||||||
|
return True
|
||||||
if '/slideshow/' in url:
|
if '/slideshow/' in url:
|
||||||
return True
|
return True
|
||||||
if '/magazine/index' in url:
|
if '/magazine/index' in url:
|
||||||
@ -334,6 +338,15 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
return True
|
return True
|
||||||
if '/premium/' in url:
|
if '/premium/' in url:
|
||||||
return True
|
return True
|
||||||
|
if '#comment' in url:
|
||||||
|
return True
|
||||||
|
if '#postComment' in url:
|
||||||
|
return True
|
||||||
|
if '#postcomment' in url:
|
||||||
|
return True
|
||||||
|
if re.search('/\d\d\d\d/\d\d/\d\d/',url) is None:
|
||||||
|
print("NO DATE IN "+url)
|
||||||
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def fixChars(self,string):
|
def fixChars(self,string):
|
||||||
@ -371,6 +384,7 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
|
|
||||||
cover_tag = 'NY_NYT'
|
cover_tag = 'NY_NYT'
|
||||||
def get_cover_url(self):
|
def get_cover_url(self):
|
||||||
|
from datetime import timedelta, date
|
||||||
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.cover_tag+'.jpg'
|
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.cover_tag+'.jpg'
|
||||||
br = BasicNewsRecipe.get_browser(self)
|
br = BasicNewsRecipe.get_browser(self)
|
||||||
daysback=1
|
daysback=1
|
||||||
@ -393,7 +407,6 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
|
|
||||||
masthead_url = 'http://graphics8.nytimes.com/images/misc/nytlogo379x64.gif'
|
masthead_url = 'http://graphics8.nytimes.com/images/misc/nytlogo379x64.gif'
|
||||||
|
|
||||||
|
|
||||||
def short_title(self):
|
def short_title(self):
|
||||||
return self.title
|
return self.title
|
||||||
|
|
||||||
@ -655,75 +668,53 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
|
|
||||||
soup = self.index_to_soup('http://www.nytimes.com/pages/todaysheadlines/')
|
soup = self.index_to_soup('http://www.nytimes.com/pages/todaysheadlines/')
|
||||||
|
|
||||||
# Fetch the content table
|
section_name='Unknown Section'
|
||||||
content_table = soup.find('table',{'id':'content'})
|
pubdate = strftime('%a, %d %b')
|
||||||
if content_table is None:
|
for td_col in soup.findAll('td'):
|
||||||
self.log("FATAL ERROR: CANNOT FIND CONTENT TABLE")
|
h6_sec_name = td_col.find('h6')
|
||||||
return None
|
if h6_sec_name is not None:
|
||||||
|
new_section_name = self.tag_to_string(h6_sec_name,use_alt=False)
|
||||||
# Within this table are <td id=".*Column.*"> entries, each containing one or more h6 tags which represent sections
|
new_section_name = re.sub(r'^ *$','',new_section_name)
|
||||||
|
if new_section_name == '':
|
||||||
for td_col in content_table.findAll('td', {'id' : re.compile('Column')}):
|
continue
|
||||||
for div_sec in td_col.findAll('div',recursive=False):
|
section_name = new_section_name
|
||||||
for h6_sec_name in div_sec.findAll('h6',{'style' : re.compile('text-transform: *uppercase')}):
|
continue
|
||||||
|
atag = td_col.find('a')
|
||||||
section_name = self.tag_to_string(h6_sec_name,use_alt=False)
|
if atag is not None:
|
||||||
section_name = re.sub(r'^ *$','',section_name)
|
h4tag = None
|
||||||
|
for h4tag in atag.findNextSiblings('h4'):
|
||||||
if section_name == '':
|
break
|
||||||
|
if h4tag is None:
|
||||||
|
continue
|
||||||
|
author = self.tag_to_string(h4tag,use_alt=False)
|
||||||
|
try:
|
||||||
|
url = re.sub(r'\?.*', '', atag['href'])
|
||||||
|
except:
|
||||||
|
continue
|
||||||
|
if self.exclude_url(url):
|
||||||
|
continue
|
||||||
|
if '?' in url:
|
||||||
|
url += '&pagewanted=all'
|
||||||
|
else:
|
||||||
|
url += '?pagewanted=all'
|
||||||
|
if self.filterDuplicates:
|
||||||
|
if url in self.url_list:
|
||||||
continue
|
continue
|
||||||
if self.includeSections != []:
|
self.url_list.append(url)
|
||||||
if section_name not in self.includeSections:
|
title = self.tag_to_string(atag, use_alt=False).strip()
|
||||||
print "SECTION NOT INCLUDED: ",section_name
|
desc = atag.parent.find('p')
|
||||||
continue
|
if desc is not None:
|
||||||
if section_name in self.excludeSections:
|
description = self.tag_to_string(desc,use_alt=False)
|
||||||
print "SECTION EXCLUDED: ",section_name
|
else:
|
||||||
continue
|
description = ''
|
||||||
|
if not self.articles.has_key(section_name):
|
||||||
section_name=string.capwords(section_name)
|
self.ans.append(section_name)
|
||||||
section_name = section_name.replace('Op-ed','Op-Ed')
|
self.articles[section_name] = []
|
||||||
section_name = section_name.replace('U.s.','U.S.')
|
print('Title '+title+' author '+author)
|
||||||
section_name = section_name.replace('N.y.','N.Y.')
|
self.articles[section_name].append(dict(title=title, url=url, date=pubdate, description=description, author=author, content=''))
|
||||||
pubdate = strftime('%a, %d %b')
|
|
||||||
|
|
||||||
search_div = div_sec
|
|
||||||
for next_tag in h6_sec_name.findNextSiblings(True):
|
|
||||||
if next_tag.__class__.__name__ == 'Tag':
|
|
||||||
if next_tag.name == 'div':
|
|
||||||
search_div = next_tag
|
|
||||||
break
|
|
||||||
|
|
||||||
# Get the articles
|
|
||||||
for h3_item in search_div.findAll('h3'):
|
|
||||||
byline = h3_item.h6
|
|
||||||
if byline is not None:
|
|
||||||
author = self.tag_to_string(byline,use_alt=False)
|
|
||||||
else:
|
|
||||||
author = ''
|
|
||||||
a = h3_item.find('a', href=True)
|
|
||||||
if not a:
|
|
||||||
continue
|
|
||||||
url = re.sub(r'\?.*', '', a['href'])
|
|
||||||
if self.exclude_url(url):
|
|
||||||
continue
|
|
||||||
url += '?pagewanted=all'
|
|
||||||
if self.filterDuplicates:
|
|
||||||
if url in self.url_list:
|
|
||||||
continue
|
|
||||||
self.url_list.append(url)
|
|
||||||
title = self.tag_to_string(a, use_alt=True).strip()
|
|
||||||
desc = h3_item.find('p')
|
|
||||||
if desc is not None:
|
|
||||||
description = self.tag_to_string(desc,use_alt=False)
|
|
||||||
else:
|
|
||||||
description = ''
|
|
||||||
if not self.articles.has_key(section_name):
|
|
||||||
self.ans.append(section_name)
|
|
||||||
self.articles[section_name] = []
|
|
||||||
self.articles[section_name].append(dict(title=title, url=url, date=pubdate, description=description, author=author, content=''))
|
|
||||||
|
|
||||||
self.ans = [(k, self.articles[k]) for k in self.ans if self.articles.has_key(k)]
|
self.ans = [(k, self.articles[k]) for k in self.ans if self.articles.has_key(k)]
|
||||||
return self.filter_ans(self.get_tech_feeds(self.get_popular_articles(self.ans)))
|
return self.filter_ans(self.ans)
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
if self.headlinesOnly:
|
if self.headlinesOnly:
|
||||||
@ -833,8 +824,9 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
for divr in soup.findAll('div',attrs={'class':re.compile('w190 right')}):
|
for divr in soup.findAll('div',attrs={'class':re.compile('w190 right')}):
|
||||||
if divr.find(text=re.compile('Sign up')):
|
if divr.find(text=re.compile('Sign up')):
|
||||||
divr.extract()
|
divr.extract()
|
||||||
divr = soup.find('div',attrs={'id':re.compile('related-content')})
|
divr = soup.find('div',attrs={'class':re.compile('^relatedArticlesModule')})
|
||||||
if divr is not None:
|
if divr is not None:
|
||||||
|
print("PROCESSING RELATED: "+self.tag_to_string(soup.title,False))
|
||||||
# handle related articles
|
# handle related articles
|
||||||
rlist = []
|
rlist = []
|
||||||
ul = divr.find('ul')
|
ul = divr.find('ul')
|
||||||
@ -864,6 +856,8 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
asidediv.append(Tag(soup,'hr'))
|
asidediv.append(Tag(soup,'hr'))
|
||||||
smain = soup.find('body')
|
smain = soup.find('body')
|
||||||
smain.append(asidediv)
|
smain.append(asidediv)
|
||||||
|
else:
|
||||||
|
print("CANNOT FIND RELATED: "+self.tag_to_string(soup.title,False))
|
||||||
for atag in soup.findAll('a'):
|
for atag in soup.findAll('a'):
|
||||||
img = atag.find('img')
|
img = atag.find('img')
|
||||||
if img is not None:
|
if img is not None:
|
||||||
@ -906,6 +900,18 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
first_outer = outerdiv
|
first_outer = outerdiv
|
||||||
else:
|
else:
|
||||||
litag.extract()
|
litag.extract()
|
||||||
|
for h6tag in rdiv.findAll('h6'):
|
||||||
|
if h6tag.find('a') is not None:
|
||||||
|
if h6tag.find('a')['href'].startswith('http://www.nytimes.com'):
|
||||||
|
url = re.sub(r'\?.*', '', h6tag.find('a')['href'])
|
||||||
|
h6tag.find('a')['href'] = url+'?pagewanted=all'
|
||||||
|
h6tag.extract()
|
||||||
|
related.append(h6tag)
|
||||||
|
if first_related is None:
|
||||||
|
first_related = rdiv
|
||||||
|
first_outer = outerdiv
|
||||||
|
else:
|
||||||
|
h6tag.extract()
|
||||||
if related != []:
|
if related != []:
|
||||||
for r in related:
|
for r in related:
|
||||||
if r.h6: # don't want the anchor inside a h6 tag
|
if r.h6: # don't want the anchor inside a h6 tag
|
||||||
|
@ -35,7 +35,10 @@ class NewYorkTimesBookReview(BasicNewsRecipe):
|
|||||||
continue
|
continue
|
||||||
if x['class'] in {'story', 'ledeStory'}:
|
if x['class'] in {'story', 'ledeStory'}:
|
||||||
tt = 'h3' if x['class'] == 'story' else 'h1'
|
tt = 'h3' if x['class'] == 'story' else 'h1'
|
||||||
a = x.find(tt).find('a', href=True)
|
try:
|
||||||
|
a = x.find(tt).find('a', href=True)
|
||||||
|
except AttributeError:
|
||||||
|
continue
|
||||||
title = self.tag_to_string(a)
|
title = self.tag_to_string(a)
|
||||||
url = a['href'] + '&pagewanted=all'
|
url = a['href'] + '&pagewanted=all'
|
||||||
self.log('\tFound article:', title, url)
|
self.log('\tFound article:', title, url)
|
||||||
|
@ -19,6 +19,7 @@ let g:syntastic_c_include_dirs = g:syntastic_cpp_include_dirs
|
|||||||
|
|
||||||
set wildignore+=resources/viewer/mathjax/**
|
set wildignore+=resources/viewer/mathjax/**
|
||||||
set wildignore+=build/**
|
set wildignore+=build/**
|
||||||
|
set wildignore+=dist/**
|
||||||
|
|
||||||
fun! CalibreLog()
|
fun! CalibreLog()
|
||||||
" Setup buffers to edit the calibre changelog and version info prior to
|
" Setup buffers to edit the calibre changelog and version info prior to
|
||||||
|
@ -302,7 +302,7 @@ class Worker(Thread): # Get details {{{
|
|||||||
self.log.exception('Error parsing series for url: %r'%self.url)
|
self.log.exception('Error parsing series for url: %r'%self.url)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
self.cover_url = self.parse_cover(root)
|
self.cover_url = self.parse_cover(root, raw)
|
||||||
except:
|
except:
|
||||||
self.log.exception('Error parsing cover for url: %r'%self.url)
|
self.log.exception('Error parsing cover for url: %r'%self.url)
|
||||||
mi.has_cover = bool(self.cover_url)
|
mi.has_cover = bool(self.cover_url)
|
||||||
@ -450,18 +450,24 @@ class Worker(Thread): # Get details {{{
|
|||||||
ans = (s, i)
|
ans = (s, i)
|
||||||
return ans
|
return ans
|
||||||
|
|
||||||
|
def parse_cover(self, root, raw=b""):
|
||||||
def parse_cover(self, root):
|
|
||||||
imgs = root.xpath('//img[(@id="prodImage" or @id="original-main-image" or @id="main-image") and @src]')
|
imgs = root.xpath('//img[(@id="prodImage" or @id="original-main-image" or @id="main-image") and @src]')
|
||||||
|
if not imgs:
|
||||||
|
imgs = root.xpath('//div[@class="main-image-inner-wrapper"]/img[@src]')
|
||||||
if imgs:
|
if imgs:
|
||||||
src = imgs[0].get('src')
|
src = imgs[0].get('src')
|
||||||
if '/no-image-avail' not in src:
|
if 'loading-' in src:
|
||||||
|
js_img = re.search(br'"largeImage":"(http://[^"]+)",',raw)
|
||||||
|
if js_img:
|
||||||
|
src = js_img.group(1).decode('utf-8')
|
||||||
|
if ('/no-image-avail' not in src and 'loading-' not in src and '/no-img-sm' not in src):
|
||||||
|
self.log('Found image: %s' % src)
|
||||||
parts = src.split('/')
|
parts = src.split('/')
|
||||||
if len(parts) > 3:
|
if len(parts) > 3:
|
||||||
bn = parts[-1]
|
bn = parts[-1]
|
||||||
sparts = bn.split('_')
|
sparts = bn.split('_')
|
||||||
if len(sparts) > 2:
|
if len(sparts) > 2:
|
||||||
bn = sparts[0] + sparts[-1]
|
bn = re.sub(r'\.\.jpg$', '.jpg', (sparts[0] + sparts[-1]))
|
||||||
return ('/'.join(parts[:-1]))+'/'+bn
|
return ('/'.join(parts[:-1]))+'/'+bn
|
||||||
|
|
||||||
def parse_isbn(self, pd):
|
def parse_isbn(self, pd):
|
||||||
|
@ -54,6 +54,27 @@ if pictureflow is not None:
|
|||||||
def currentChanged(self, index):
|
def currentChanged(self, index):
|
||||||
print 'current changed:', index
|
print 'current changed:', index
|
||||||
|
|
||||||
|
class DummyImageList(pictureflow.FlowImages):
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
pictureflow.FlowImages.__init__(self)
|
||||||
|
self.num = 40000
|
||||||
|
i1, i2 = QImage(300, 400, QImage.Format_RGB32), QImage(300, 400, QImage.Format_RGB32)
|
||||||
|
i1.fill(Qt.green), i2.fill(Qt.blue)
|
||||||
|
self.images = [i1, i2]
|
||||||
|
|
||||||
|
def count(self):
|
||||||
|
return self.num
|
||||||
|
|
||||||
|
def image(self, index):
|
||||||
|
return self.images[index%2]
|
||||||
|
|
||||||
|
def caption(self, index):
|
||||||
|
return 'Number: %d'%index
|
||||||
|
|
||||||
|
def subtitle(self, index):
|
||||||
|
return ''
|
||||||
|
|
||||||
class DatabaseImages(pictureflow.FlowImages):
|
class DatabaseImages(pictureflow.FlowImages):
|
||||||
|
|
||||||
def __init__(self, model, buffer=20):
|
def __init__(self, model, buffer=20):
|
||||||
@ -328,6 +349,21 @@ class CoverFlowMixin(object):
|
|||||||
def sync_listview_to_cf(self, row):
|
def sync_listview_to_cf(self, row):
|
||||||
self.cf_last_updated_at = time.time()
|
self.cf_last_updated_at = time.time()
|
||||||
|
|
||||||
|
def test():
|
||||||
|
from PyQt4.QtGui import QApplication, QMainWindow
|
||||||
|
app = QApplication([])
|
||||||
|
w = QMainWindow()
|
||||||
|
cf = CoverFlow()
|
||||||
|
cf.resize(int(available_width()/1.5), available_height()-60)
|
||||||
|
w.resize(cf.size()+QSize(30, 20))
|
||||||
|
model = DummyImageList()
|
||||||
|
cf.setImages(model)
|
||||||
|
cf.setCurrentSlide(39000)
|
||||||
|
w.setCentralWidget(cf)
|
||||||
|
|
||||||
|
w.show()
|
||||||
|
cf.setFocus(Qt.OtherFocusReason)
|
||||||
|
sys.exit(app.exec_())
|
||||||
|
|
||||||
def main(args=sys.argv):
|
def main(args=sys.argv):
|
||||||
return 0
|
return 0
|
||||||
|
@ -94,6 +94,9 @@ class LibraryViewMixin(object): # {{{
|
|||||||
v = self.current_view()
|
v = self.current_view()
|
||||||
if hasattr(v, 'set_current_row'):
|
if hasattr(v, 'set_current_row'):
|
||||||
v.set_current_row(0)
|
v.set_current_row(0)
|
||||||
|
if v is self.library_view and v.row_count() == 0:
|
||||||
|
self.book_details.reset_info()
|
||||||
|
|
||||||
|
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
|
@ -87,10 +87,11 @@ def init_qt(args):
|
|||||||
opts, args = parser.parse_args(args)
|
opts, args = parser.parse_args(args)
|
||||||
find_portable_library()
|
find_portable_library()
|
||||||
if opts.with_library is not None:
|
if opts.with_library is not None:
|
||||||
if not os.path.exists(opts.with_library):
|
libpath = os.path.expanduser(opts.with_library)
|
||||||
os.makedirs(opts.with_library)
|
if not os.path.exists(libpath):
|
||||||
if os.path.isdir(opts.with_library):
|
os.makedirs(libpath)
|
||||||
prefs.set('library_path', os.path.abspath(opts.with_library))
|
if os.path.isdir(libpath):
|
||||||
|
prefs.set('library_path', os.path.abspath(libpath))
|
||||||
prints('Using library at', prefs['library_path'])
|
prints('Using library at', prefs['library_path'])
|
||||||
QCoreApplication.setOrganizationName(ORG_NAME)
|
QCoreApplication.setOrganizationName(ORG_NAME)
|
||||||
QCoreApplication.setApplicationName(APP_UID)
|
QCoreApplication.setApplicationName(APP_UID)
|
||||||
|
@ -398,7 +398,7 @@ private:
|
|||||||
QCache<int, QImage> surfaceCache;
|
QCache<int, QImage> surfaceCache;
|
||||||
QTimer triggerTimer;
|
QTimer triggerTimer;
|
||||||
|
|
||||||
int slideFrame;
|
long long slideFrame;
|
||||||
int step;
|
int step;
|
||||||
int target;
|
int target;
|
||||||
int fade;
|
int fade;
|
||||||
@ -493,7 +493,7 @@ void PictureFlowPrivate::setCurrentSlide(int index)
|
|||||||
step = 0;
|
step = 0;
|
||||||
centerIndex = qBound(index, 0, slideImages->count()-1);
|
centerIndex = qBound(index, 0, slideImages->count()-1);
|
||||||
target = centerIndex;
|
target = centerIndex;
|
||||||
slideFrame = index << 16;
|
slideFrame = ((long long)index) << 16;
|
||||||
resetSlides();
|
resetSlides();
|
||||||
triggerRender();
|
triggerRender();
|
||||||
widget->emitcurrentChanged(centerIndex);
|
widget->emitcurrentChanged(centerIndex);
|
||||||
@ -1069,7 +1069,7 @@ void PictureFlowPrivate::updateAnimation()
|
|||||||
const int max = 2 * 65536;
|
const int max = 2 * 65536;
|
||||||
|
|
||||||
int fi = slideFrame;
|
int fi = slideFrame;
|
||||||
fi -= (target << 16);
|
fi -= (target << 16);
|
||||||
if(fi < 0)
|
if(fi < 0)
|
||||||
fi = -fi;
|
fi = -fi;
|
||||||
fi = qMin(fi, max);
|
fi = qMin(fi, max);
|
||||||
@ -1094,7 +1094,7 @@ void PictureFlowPrivate::updateAnimation()
|
|||||||
if(centerIndex != index)
|
if(centerIndex != index)
|
||||||
{
|
{
|
||||||
centerIndex = index;
|
centerIndex = index;
|
||||||
slideFrame = index << 16;
|
slideFrame = ((long long)index) << 16;
|
||||||
centerSlide.slideIndex = centerIndex;
|
centerSlide.slideIndex = centerIndex;
|
||||||
for(int i = 0; i < leftSlides.count(); i++)
|
for(int i = 0; i < leftSlides.count(); i++)
|
||||||
leftSlides[i].slideIndex = centerIndex-1-i;
|
leftSlides[i].slideIndex = centerIndex-1-i;
|
||||||
|
@ -763,22 +763,24 @@ class EditRules(QWidget): # {{{
|
|||||||
' double clicking it.'))
|
' double clicking it.'))
|
||||||
self.add_advanced_button.setVisible(False)
|
self.add_advanced_button.setVisible(False)
|
||||||
|
|
||||||
def _add_rule(self, dlg):
|
def add_rule(self):
|
||||||
if dlg.exec_() == dlg.Accepted:
|
d = RuleEditor(self.model.fm, self.pref_name)
|
||||||
kind, col, r = dlg.rule
|
d.add_blank_condition()
|
||||||
|
if d.exec_() == d.Accepted:
|
||||||
|
kind, col, r = d.rule
|
||||||
if kind and r and col:
|
if kind and r and col:
|
||||||
idx = self.model.add_rule(kind, col, r)
|
idx = self.model.add_rule(kind, col, r)
|
||||||
self.rules_view.scrollTo(idx)
|
self.rules_view.scrollTo(idx)
|
||||||
self.changed.emit()
|
self.changed.emit()
|
||||||
|
|
||||||
def add_rule(self):
|
|
||||||
d = RuleEditor(self.model.fm, self.pref_name)
|
|
||||||
d.add_blank_condition()
|
|
||||||
self._add_rule(d)
|
|
||||||
|
|
||||||
def add_advanced(self):
|
def add_advanced(self):
|
||||||
td = TemplateDialog(self, '', mi=self.mi, fm=self.fm, color_field='')
|
td = TemplateDialog(self, '', mi=self.mi, fm=self.fm, color_field='')
|
||||||
self._add_rule(('color', td[0], td[1]))
|
if td.exec_() == td.Accepted:
|
||||||
|
col, r = td.rule
|
||||||
|
if r and col:
|
||||||
|
idx = self.model.add_rule('color', col, r)
|
||||||
|
self.rules_view.scrollTo(idx)
|
||||||
|
self.changed.emit()
|
||||||
|
|
||||||
def edit_rule(self, index):
|
def edit_rule(self, index):
|
||||||
try:
|
try:
|
||||||
|
@ -30,6 +30,7 @@ class ItemView(QFrame): # {{{
|
|||||||
add_new_item = pyqtSignal(object, object)
|
add_new_item = pyqtSignal(object, object)
|
||||||
delete_item = pyqtSignal()
|
delete_item = pyqtSignal()
|
||||||
flatten_item = pyqtSignal()
|
flatten_item = pyqtSignal()
|
||||||
|
go_to_root = pyqtSignal()
|
||||||
|
|
||||||
def __init__(self, parent):
|
def __init__(self, parent):
|
||||||
QFrame.__init__(self, parent)
|
QFrame.__init__(self, parent)
|
||||||
@ -132,6 +133,11 @@ class ItemView(QFrame): # {{{
|
|||||||
b.setToolTip(_('All children of this entry are brought to the same '
|
b.setToolTip(_('All children of this entry are brought to the same '
|
||||||
'level as this entry.'))
|
'level as this entry.'))
|
||||||
l.addWidget(b, l.rowCount()+1, 0, 1, 2)
|
l.addWidget(b, l.rowCount()+1, 0, 1, 2)
|
||||||
|
ip.b4 = b = QPushButton(QIcon(I('back.png')), _('&Return to root'))
|
||||||
|
b.clicked.connect(self.go_to_root)
|
||||||
|
b.setToolTip(_('Go back to the top level view'))
|
||||||
|
l.addWidget(b, l.rowCount()+1, 0, 1, 2)
|
||||||
|
|
||||||
l.setRowMinimumHeight(rs, 20)
|
l.setRowMinimumHeight(rs, 20)
|
||||||
|
|
||||||
l.addWidget(QLabel(), l.rowCount(), 0, 1, 2)
|
l.addWidget(QLabel(), l.rowCount(), 0, 1, 2)
|
||||||
@ -237,6 +243,7 @@ class TOCView(QWidget): # {{{
|
|||||||
self.item_view.delete_item.connect(self.delete_current_item)
|
self.item_view.delete_item.connect(self.delete_current_item)
|
||||||
i.add_new_item.connect(self.add_new_item)
|
i.add_new_item.connect(self.add_new_item)
|
||||||
i.flatten_item.connect(self.flatten_item)
|
i.flatten_item.connect(self.flatten_item)
|
||||||
|
i.go_to_root.connect(self.go_to_root)
|
||||||
l.addWidget(i, 0, 4, col, 1)
|
l.addWidget(i, 0, 4, col, 1)
|
||||||
|
|
||||||
l.setColumnStretch(2, 10)
|
l.setColumnStretch(2, 10)
|
||||||
@ -271,6 +278,9 @@ class TOCView(QWidget): # {{{
|
|||||||
item.removeChild(child)
|
item.removeChild(child)
|
||||||
p.insertChild(idx+1, child)
|
p.insertChild(idx+1, child)
|
||||||
|
|
||||||
|
def go_to_root(self):
|
||||||
|
self.tocw.setCurrentItem(None)
|
||||||
|
|
||||||
def highlight_item(self, item):
|
def highlight_item(self, item):
|
||||||
self.tocw.setCurrentItem(item, 0, QItemSelectionModel.ClearAndSelect)
|
self.tocw.setCurrentItem(item, 0, QItemSelectionModel.ClearAndSelect)
|
||||||
self.tocw.scrollToItem(item)
|
self.tocw.scrollToItem(item)
|
||||||
|
@ -184,7 +184,12 @@ class Feed(object):
|
|||||||
id = 'internal id#%s'%self.id_counter
|
id = 'internal id#%s'%self.id_counter
|
||||||
if id in self.added_articles:
|
if id in self.added_articles:
|
||||||
return
|
return
|
||||||
published = item.get('date_parsed', time.gmtime())
|
published = None
|
||||||
|
for date_field in ('date_parsed', 'published_parsed',
|
||||||
|
'updated_parsed'):
|
||||||
|
published = item.get(date_field, None)
|
||||||
|
if published is not None:
|
||||||
|
break
|
||||||
if not published:
|
if not published:
|
||||||
published = time.gmtime()
|
published = time.gmtime()
|
||||||
self.added_articles.append(id)
|
self.added_articles.append(id)
|
||||||
|
@ -338,6 +338,41 @@ class BasicNewsRecipe(Recipe):
|
|||||||
#: :meth:`javascript_login` method, to do the actual logging in.
|
#: :meth:`javascript_login` method, to do the actual logging in.
|
||||||
use_javascript_to_login = False
|
use_javascript_to_login = False
|
||||||
|
|
||||||
|
# The following parameters control how the recipe attempts to minimize
|
||||||
|
# jpeg image sizes
|
||||||
|
|
||||||
|
#: Set this to False to ignore all scaling and compression parameters and
|
||||||
|
#: pass images through unmodified. If True and the other compression
|
||||||
|
#: parameters are left at their default values, jpeg images will be scaled to fit
|
||||||
|
#: in the screen dimensions set by the output profile and compressed to size at
|
||||||
|
#: most (w * h)/16 where w x h are the scaled image dimensions.
|
||||||
|
compress_news_images = False
|
||||||
|
|
||||||
|
#: The factor used when auto compressing jpeg images. If set to None,
|
||||||
|
#: auto compression is disabled. Otherwise, the images will be reduced in size to
|
||||||
|
#: (w * h)/compress_news_images_auto_size bytes if possible by reducing
|
||||||
|
#: the quality level, where w x h are the image dimensions in pixels.
|
||||||
|
#: The minimum jpeg quality will be 5/100 so it is possible this constraint
|
||||||
|
#: will not be met. This parameter can be overridden by the parameter
|
||||||
|
#: compress_news_images_max_size which provides a fixed maximum size for images.
|
||||||
|
compress_news_images_auto_size = 16
|
||||||
|
|
||||||
|
#: Set jpeg quality so images do not exceed the size given (in KBytes).
|
||||||
|
#: If set, this parameter overrides auto compression via compress_news_images_auto_size.
|
||||||
|
#: The minimum jpeg quality will be 5/100 so it is possible this constraint
|
||||||
|
#: will not be met.
|
||||||
|
compress_news_images_max_size = None
|
||||||
|
|
||||||
|
#: Rescale images to fit in the device screen dimensions set by the output profile.
|
||||||
|
#: Ignored if no output profile is set.
|
||||||
|
scale_news_images_to_device = True
|
||||||
|
|
||||||
|
#: Maximum dimensions (w,h) to scale images to. If scale_news_images_to_device is True
|
||||||
|
#: this is set to the device screen dimensions set by the output profile unless
|
||||||
|
#: there is no profile set, in which case it is left at whatever value it has been
|
||||||
|
#: assigned (default None).
|
||||||
|
scale_news_images = None
|
||||||
|
|
||||||
# See the built-in profiles for examples of these settings.
|
# See the built-in profiles for examples of these settings.
|
||||||
|
|
||||||
def short_title(self):
|
def short_title(self):
|
||||||
@ -849,11 +884,19 @@ class BasicNewsRecipe(Recipe):
|
|||||||
for reg in self.filter_regexps:
|
for reg in self.filter_regexps:
|
||||||
web2disk_cmdline.extend(['--filter-regexp', reg])
|
web2disk_cmdline.extend(['--filter-regexp', reg])
|
||||||
|
|
||||||
|
if options.output_profile.short_name == 'default':
|
||||||
|
self.scale_news_images_to_device = False
|
||||||
|
elif self.scale_news_images_to_device:
|
||||||
|
self.scale_news_images = options.output_profile.screen_size
|
||||||
|
|
||||||
self.web2disk_options = web2disk_option_parser().parse_args(web2disk_cmdline)[0]
|
self.web2disk_options = web2disk_option_parser().parse_args(web2disk_cmdline)[0]
|
||||||
for extra in ('keep_only_tags', 'remove_tags', 'preprocess_regexps',
|
for extra in ('keep_only_tags', 'remove_tags', 'preprocess_regexps',
|
||||||
'skip_ad_pages', 'preprocess_html', 'remove_tags_after',
|
'skip_ad_pages', 'preprocess_html', 'remove_tags_after',
|
||||||
'remove_tags_before', 'is_link_wanted'):
|
'remove_tags_before', 'is_link_wanted',
|
||||||
|
'compress_news_images', 'compress_news_images_max_size',
|
||||||
|
'compress_news_images_auto_size', 'scale_news_images'):
|
||||||
setattr(self.web2disk_options, extra, getattr(self, extra))
|
setattr(self.web2disk_options, extra, getattr(self, extra))
|
||||||
|
|
||||||
self.web2disk_options.postprocess_html = self._postprocess_html
|
self.web2disk_options.postprocess_html = self._postprocess_html
|
||||||
self.web2disk_options.encoding = self.encoding
|
self.web2disk_options.encoding = self.encoding
|
||||||
self.web2disk_options.preprocess_raw_html = self.preprocess_raw_html_
|
self.web2disk_options.preprocess_raw_html = self.preprocess_raw_html_
|
||||||
|
@ -12,7 +12,7 @@ from urllib import url2pathname, quote
|
|||||||
from httplib import responses
|
from httplib import responses
|
||||||
from base64 import b64decode
|
from base64 import b64decode
|
||||||
|
|
||||||
from calibre import browser, relpath, unicode_path
|
from calibre import browser, relpath, unicode_path, fit_image
|
||||||
from calibre.constants import filesystem_encoding, iswindows
|
from calibre.constants import filesystem_encoding, iswindows
|
||||||
from calibre.utils.filenames import ascii_filename
|
from calibre.utils.filenames import ascii_filename
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
||||||
@ -20,7 +20,7 @@ from calibre.ebooks.chardet import xml_to_unicode
|
|||||||
from calibre.utils.config import OptionParser
|
from calibre.utils.config import OptionParser
|
||||||
from calibre.utils.logging import Log
|
from calibre.utils.logging import Log
|
||||||
from calibre.utils.magick import Image
|
from calibre.utils.magick import Image
|
||||||
from calibre.utils.magick.draw import identify_data
|
from calibre.utils.magick.draw import identify_data, thumbnail
|
||||||
|
|
||||||
class FetchError(Exception):
|
class FetchError(Exception):
|
||||||
pass
|
pass
|
||||||
@ -142,6 +142,10 @@ class RecursiveFetcher(object):
|
|||||||
self.postprocess_html_ext= getattr(options, 'postprocess_html', None)
|
self.postprocess_html_ext= getattr(options, 'postprocess_html', None)
|
||||||
self._is_link_wanted = getattr(options, 'is_link_wanted',
|
self._is_link_wanted = getattr(options, 'is_link_wanted',
|
||||||
default_is_link_wanted)
|
default_is_link_wanted)
|
||||||
|
self.compress_news_images_max_size = getattr(options, 'compress_news_images_max_size', None)
|
||||||
|
self.compress_news_images = getattr(options, 'compress_news_images', False)
|
||||||
|
self.compress_news_images_auto_size = getattr(options, 'compress_news_images_auto_size', 16)
|
||||||
|
self.scale_news_images = getattr(options, 'scale_news_images', None)
|
||||||
self.download_stylesheets = not options.no_stylesheets
|
self.download_stylesheets = not options.no_stylesheets
|
||||||
self.show_progress = True
|
self.show_progress = True
|
||||||
self.failed_links = []
|
self.failed_links = []
|
||||||
@ -338,7 +342,42 @@ class RecursiveFetcher(object):
|
|||||||
x.write(data)
|
x.write(data)
|
||||||
ns.replaceWith(src.replace(m.group(1), stylepath))
|
ns.replaceWith(src.replace(m.group(1), stylepath))
|
||||||
|
|
||||||
|
def rescale_image(self, data):
|
||||||
|
orig_w, orig_h, ifmt = identify_data(data)
|
||||||
|
orig_data = data # save it in case compression fails
|
||||||
|
if self.scale_news_images is not None:
|
||||||
|
wmax, hmax = self.scale_news_images
|
||||||
|
scale, new_w, new_h = fit_image(orig_w, orig_h, wmax, hmax)
|
||||||
|
if scale:
|
||||||
|
data = thumbnail(data, new_w, new_h, compression_quality=95)[-1]
|
||||||
|
orig_w = new_w
|
||||||
|
orig_h = new_h
|
||||||
|
if self.compress_news_images_max_size is None:
|
||||||
|
if self.compress_news_images_auto_size is None: # not compressing
|
||||||
|
return data
|
||||||
|
else:
|
||||||
|
maxsizeb = (orig_w * orig_h)/self.compress_news_images_auto_size
|
||||||
|
else:
|
||||||
|
maxsizeb = self.compress_news_images_max_size * 1024
|
||||||
|
scaled_data = data # save it in case compression fails
|
||||||
|
if len(scaled_data) <= maxsizeb: # no compression required
|
||||||
|
return scaled_data
|
||||||
|
|
||||||
|
img = Image()
|
||||||
|
quality = 95
|
||||||
|
img.load(data)
|
||||||
|
while len(data) >= maxsizeb and quality >= 5:
|
||||||
|
quality -= 5
|
||||||
|
img.set_compression_quality(quality)
|
||||||
|
data = img.export('jpg')
|
||||||
|
|
||||||
|
if len(data) >= len(scaled_data): # compression failed
|
||||||
|
return orig_data if len(orig_data) <= len(scaled_data) else scaled_data
|
||||||
|
|
||||||
|
if len(data) >= len(orig_data): # no improvement
|
||||||
|
return orig_data
|
||||||
|
|
||||||
|
return data
|
||||||
|
|
||||||
def process_images(self, soup, baseurl):
|
def process_images(self, soup, baseurl):
|
||||||
diskpath = unicode_path(os.path.join(self.current_dir, 'images'))
|
diskpath = unicode_path(os.path.join(self.current_dir, 'images'))
|
||||||
@ -390,6 +429,12 @@ class RecursiveFetcher(object):
|
|||||||
im = Image()
|
im = Image()
|
||||||
im.load(data)
|
im.load(data)
|
||||||
data = im.export(itype)
|
data = im.export(itype)
|
||||||
|
if self.compress_news_images and itype in {'jpg','jpeg'}:
|
||||||
|
try:
|
||||||
|
data = self.rescale_image(data)
|
||||||
|
except:
|
||||||
|
self.log.exception('failed to compress image '+iurl)
|
||||||
|
identify_data(data)
|
||||||
else:
|
else:
|
||||||
identify_data(data)
|
identify_data(data)
|
||||||
imgpath = os.path.join(diskpath, fname+'.'+itype)
|
imgpath = os.path.join(diskpath, fname+'.'+itype)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user