mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Merge branch 'master' of https://github.com/kovidgoyal/calibre
This commit is contained in:
commit
3e81bee473
@ -33,7 +33,6 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
# and 30 will get the most popular measured over 30 days.
|
# and 30 will get the most popular measured over 30 days.
|
||||||
# you still only get up to 20 articles in each category
|
# you still only get up to 20 articles in each category
|
||||||
|
|
||||||
|
|
||||||
# set headlinesOnly to True for the headlines-only version. If True, webEdition is ignored.
|
# set headlinesOnly to True for the headlines-only version. If True, webEdition is ignored.
|
||||||
headlinesOnly = True
|
headlinesOnly = True
|
||||||
|
|
||||||
@ -82,6 +81,7 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
|
|
||||||
# The maximum number of articles that will be downloaded
|
# The maximum number of articles that will be downloaded
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
|
use_embedded_content = False
|
||||||
|
|
||||||
# Whether to omit duplicates of articles (typically arsing when articles are indexed in
|
# Whether to omit duplicates of articles (typically arsing when articles are indexed in
|
||||||
# more than one section). If True, only the first occurance will be downloaded.
|
# more than one section). If True, only the first occurance will be downloaded.
|
||||||
@ -122,7 +122,6 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
(u'Tech - Open', u'http://open.blogs.nytimes.com/feed/')
|
(u'Tech - Open', u'http://open.blogs.nytimes.com/feed/')
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
if headlinesOnly:
|
if headlinesOnly:
|
||||||
title='New York Times Headlines'
|
title='New York Times Headlines'
|
||||||
description = 'Headlines from the New York Times'
|
description = 'Headlines from the New York Times'
|
||||||
@ -168,8 +167,7 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
|
|
||||||
cover_margins = (18,18,'grey99')
|
cover_margins = (18,18,'grey99')
|
||||||
|
|
||||||
remove_tags_before = dict(id='article')
|
keep_only_tags = dict(id=['article', 'story', 'content'])
|
||||||
remove_tags_after = dict(id='article')
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(attrs={'class':[
|
dict(attrs={'class':[
|
||||||
'articleFooter',
|
'articleFooter',
|
||||||
@ -184,6 +182,7 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
'entry-response module',
|
'entry-response module',
|
||||||
'leftNavTabs',
|
'leftNavTabs',
|
||||||
'metaFootnote',
|
'metaFootnote',
|
||||||
|
'inside-story',
|
||||||
'module box nav',
|
'module box nav',
|
||||||
'nextArticleLink',
|
'nextArticleLink',
|
||||||
'nextArticleLink clearfix',
|
'nextArticleLink clearfix',
|
||||||
@ -222,6 +221,8 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
re.compile('commentCount'),
|
re.compile('commentCount'),
|
||||||
'credit'
|
'credit'
|
||||||
]}),
|
]}),
|
||||||
|
dict(attrs={'class':lambda x: x and 'related-coverage-marginalia' in x.split()}),
|
||||||
|
dict(attrs={'class':lambda x: x and 'interactive' in x.split()}),
|
||||||
dict(name='div', attrs={'class':re.compile('toolsList')}), # bits
|
dict(name='div', attrs={'class':re.compile('toolsList')}), # bits
|
||||||
dict(name='div', attrs={'class':re.compile('postNavigation')}), # bits
|
dict(name='div', attrs={'class':re.compile('postNavigation')}), # bits
|
||||||
dict(name='div', attrs={'class':'tweet'}),
|
dict(name='div', attrs={'class':'tweet'}),
|
||||||
@ -235,6 +236,8 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
dict(id=[
|
dict(id=[
|
||||||
'adxLeaderboard',
|
'adxLeaderboard',
|
||||||
'adxSponLink',
|
'adxSponLink',
|
||||||
|
'anchoredAd_module',
|
||||||
|
'anchoredAd_spot',
|
||||||
'archive',
|
'archive',
|
||||||
'articleExtras',
|
'articleExtras',
|
||||||
'articleInline',
|
'articleInline',
|
||||||
@ -268,11 +271,13 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
'related-content', # added for DealBook
|
'related-content', # added for DealBook
|
||||||
'whats-next',
|
'whats-next',
|
||||||
]),
|
]),
|
||||||
dict(name=['script', 'noscript', 'style','form','hr', 'button'])]
|
dict(name=['script', 'noscript', 'style','form','hr', 'button', 'meta', 'footer'])]
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
extra_css = '''
|
extra_css = '''
|
||||||
.articleHeadline { text-align: left; margin-top:0.5em; margin-bottom:0.25em; }
|
.articleHeadline { text-align: left; margin-top:0.5em; margin-bottom:0.25em; }
|
||||||
.credit { font-weight: normal; text-align: right; font-size: 50%; line-height:1em; margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
|
.credit { font-weight: normal; text-align: right; font-size:
|
||||||
|
50%; line-height:1em; margin-top:5px; margin-left:0;
|
||||||
|
margin-right:0; margin-bottom: 0; }
|
||||||
.byline { text-align: left; font-size: 50%; line-height:1em; margin-top:10px; margin-left:0; margin-right:0; margin-bottom: 0; }
|
.byline { text-align: left; font-size: 50%; line-height:1em; margin-top:10px; margin-left:0; margin-right:0; margin-bottom: 0; }
|
||||||
.dateline { text-align: left; font-size: 50%; line-height:1em;margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
|
.dateline { text-align: left; font-size: 50%; line-height:1em;margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
|
||||||
.kicker { font-size: 50%; line-height:1em;margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
|
.kicker { font-size: 50%; line-height:1em;margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
|
||||||
@ -288,7 +293,6 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
.asidenote {color:blue;margin:0px 0px 0px 0px; padding: 0px 0px 0px 0px; font-size:100%;font-weight:bold;}
|
.asidenote {color:blue;margin:0px 0px 0px 0px; padding: 0px 0px 0px 0px; font-size:100%;font-weight:bold;}
|
||||||
.source {text-align: left; font-size: x-small; }'''
|
.source {text-align: left; font-size: x-small; }'''
|
||||||
|
|
||||||
|
|
||||||
articles = {}
|
articles = {}
|
||||||
key = None
|
key = None
|
||||||
ans = []
|
ans = []
|
||||||
@ -409,7 +413,6 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
def short_title(self):
|
def short_title(self):
|
||||||
return self.title
|
return self.title
|
||||||
|
|
||||||
|
|
||||||
def article_to_soup(self, url_or_raw, raw=False):
|
def article_to_soup(self, url_or_raw, raw=False):
|
||||||
from contextlib import closing
|
from contextlib import closing
|
||||||
import copy
|
import copy
|
||||||
@ -443,7 +446,6 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
usrc = self.preprocess_raw_html(usrc, url_or_raw)
|
usrc = self.preprocess_raw_html(usrc, url_or_raw)
|
||||||
return BeautifulSoup(usrc, markupMassage=nmassage)
|
return BeautifulSoup(usrc, markupMassage=nmassage)
|
||||||
|
|
||||||
|
|
||||||
def massageNCXText(self, description):
|
def massageNCXText(self, description):
|
||||||
# Kindle TOC descriptions won't render certain characters
|
# Kindle TOC descriptions won't render certain characters
|
||||||
if description:
|
if description:
|
||||||
@ -498,7 +500,7 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
if authorAttribution:
|
if authorAttribution:
|
||||||
author = self.tag_to_string(authorAttribution, use_alt=False)
|
author = self.tag_to_string(authorAttribution, use_alt=False)
|
||||||
feed = self.key if self.key is not None else 'Uncategorized'
|
feed = self.key if self.key is not None else 'Uncategorized'
|
||||||
if not self.articles.has_key(feed):
|
if feed not in self.articles:
|
||||||
self.ans.append(feed)
|
self.ans.append(feed)
|
||||||
self.articles[feed] = []
|
self.articles[feed] = []
|
||||||
self.articles[feed].append(
|
self.articles[feed].append(
|
||||||
@ -533,7 +535,6 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
desc = ''
|
desc = ''
|
||||||
return(title,url,author,desc)
|
return(title,url,author,desc)
|
||||||
|
|
||||||
|
|
||||||
have_emailed = False
|
have_emailed = False
|
||||||
emailed_soup = self.index_to_soup('http://www.nytimes.com/most-popular-emailed?period='+self.popularPeriod)
|
emailed_soup = self.index_to_soup('http://www.nytimes.com/most-popular-emailed?period='+self.popularPeriod)
|
||||||
for h3tag in emailed_soup.findAll('h3'):
|
for h3tag in emailed_soup.findAll('h3'):
|
||||||
@ -562,7 +563,7 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
dict(title=title, url=url, date=strftime('%a, %d %b'),
|
dict(title=title, url=url, date=strftime('%a, %d %b'),
|
||||||
description=desc, author=author,
|
description=desc, author=author,
|
||||||
content=''))
|
content=''))
|
||||||
viewed_ans = [(k, popular_articles[k]) for k in key_list if popular_articles.has_key(k)]
|
viewed_ans = [(k, popular_articles[k]) for k in key_list if k in popular_articles]
|
||||||
for x in viewed_ans:
|
for x in viewed_ans:
|
||||||
ans.append(x)
|
ans.append(x)
|
||||||
return ans
|
return ans
|
||||||
@ -585,10 +586,10 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
tech_articles[f.title] = []
|
tech_articles[f.title] = []
|
||||||
for a in f.articles:
|
for a in f.articles:
|
||||||
tech_articles[f.title].append(
|
tech_articles[f.title].append(
|
||||||
dict(title=a.title, url=a.url, date=a.date,
|
dict(title=a.title, url=a.url.partition('?')[0], date=a.date,
|
||||||
description=a.summary, author=a.author,
|
description=a.summary, author=a.author,
|
||||||
content=a.content))
|
content=a.content))
|
||||||
tech_ans = [(k, tech_articles[k]) for k in key_list if tech_articles.has_key(k)]
|
tech_ans = [(k, tech_articles[k]) for k in key_list if k in tech_articles]
|
||||||
for x in tech_ans:
|
for x in tech_ans:
|
||||||
ans.append(x)
|
ans.append(x)
|
||||||
return ans
|
return ans
|
||||||
@ -627,10 +628,9 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
for lidiv in div.findAll('li'):
|
for lidiv in div.findAll('li'):
|
||||||
self.handle_article(lidiv)
|
self.handle_article(lidiv)
|
||||||
|
|
||||||
self.ans = [(k, self.articles[k]) for k in self.ans if self.articles.has_key(k)]
|
self.ans = [(k, self.articles[k]) for k in self.ans if k in self.articles]
|
||||||
return self.filter_ans(self.get_tech_feeds(self.get_popular_articles(self.ans)))
|
return self.filter_ans(self.get_tech_feeds(self.get_popular_articles(self.ans)))
|
||||||
|
|
||||||
|
|
||||||
def parse_todays_index(self):
|
def parse_todays_index(self):
|
||||||
|
|
||||||
soup = self.index_to_soup('http://www.nytimes.com/pages/todayspaper/index.html')
|
soup = self.index_to_soup('http://www.nytimes.com/pages/todayspaper/index.html')
|
||||||
@ -660,7 +660,7 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
if not skipping:
|
if not skipping:
|
||||||
self.handle_article(lidiv)
|
self.handle_article(lidiv)
|
||||||
|
|
||||||
self.ans = [(k, self.articles[k]) for k in self.ans if self.articles.has_key(k)]
|
self.ans = [(k, self.articles[k]) for k in self.ans if k in self.articles]
|
||||||
return self.filter_ans(self.get_tech_feeds(self.get_popular_articles(self.ans)))
|
return self.filter_ans(self.get_tech_feeds(self.get_popular_articles(self.ans)))
|
||||||
|
|
||||||
def parse_headline_index(self):
|
def parse_headline_index(self):
|
||||||
@ -706,13 +706,13 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
description = self.tag_to_string(desc,use_alt=False)
|
description = self.tag_to_string(desc,use_alt=False)
|
||||||
else:
|
else:
|
||||||
description = ''
|
description = ''
|
||||||
if not self.articles.has_key(section_name):
|
if section_name not in self.articles:
|
||||||
self.ans.append(section_name)
|
self.ans.append(section_name)
|
||||||
self.articles[section_name] = []
|
self.articles[section_name] = []
|
||||||
print('Title '+title+' author '+author)
|
print('Title '+title+' author '+author)
|
||||||
self.articles[section_name].append(dict(title=title, url=url, date=pubdate, description=description, author=author, content=''))
|
self.articles[section_name].append(dict(title=title, url=url, date=pubdate, description=description, author=author, content=''))
|
||||||
|
|
||||||
self.ans = [(k, self.articles[k]) for k in self.ans if self.articles.has_key(k)]
|
self.ans = [(k, self.articles[k]) for k in self.ans if k in self.articles]
|
||||||
return self.filter_ans(self.ans)
|
return self.filter_ans(self.ans)
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
@ -732,7 +732,7 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
if kill_all or (self.recursions==0):
|
if kill_all or (self.recursions==0):
|
||||||
a.replaceWith(self.tag_to_string(a,False))
|
a.replaceWith(self.tag_to_string(a,False))
|
||||||
else:
|
else:
|
||||||
if a.has_key('href'):
|
if 'href' in a:
|
||||||
if a['href'].startswith('http://www.nytimes'):
|
if a['href'].startswith('http://www.nytimes'):
|
||||||
if not a['href'].endswith('pagewanted=all'):
|
if not a['href'].endswith('pagewanted=all'):
|
||||||
url = re.sub(r'\?.*', '', a['href'])
|
url = re.sub(r'\?.*', '', a['href'])
|
||||||
@ -740,13 +740,13 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
a.replaceWith(self.tag_to_string(a,False))
|
a.replaceWith(self.tag_to_string(a,False))
|
||||||
else:
|
else:
|
||||||
a['href'] = url+'?pagewanted=all'
|
a['href'] = url+'?pagewanted=all'
|
||||||
elif not (a['href'].startswith('http://pogue') or \
|
elif not (a['href'].startswith('http://pogue') or
|
||||||
a['href'].startswith('http://bits') or \
|
a['href'].startswith('http://bits') or
|
||||||
a['href'].startswith('http://travel') or \
|
a['href'].startswith('http://travel') or
|
||||||
a['href'].startswith('http://business') or \
|
a['href'].startswith('http://business') or
|
||||||
a['href'].startswith('http://tech') or \
|
a['href'].startswith('http://tech') or
|
||||||
a['href'].startswith('http://health') or \
|
a['href'].startswith('http://health') or
|
||||||
a['href'].startswith('http://dealbook') or \
|
a['href'].startswith('http://dealbook') or
|
||||||
a['href'].startswith('http://open')):
|
a['href'].startswith('http://open')):
|
||||||
a.replaceWith(self.tag_to_string(a,False))
|
a.replaceWith(self.tag_to_string(a,False))
|
||||||
return soup
|
return soup
|
||||||
@ -761,7 +761,7 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
## print("HANDLING AD FORWARD:")
|
## print("HANDLING AD FORWARD:")
|
||||||
## print(soup)
|
# print(soup)
|
||||||
if self.keep_only_tags:
|
if self.keep_only_tags:
|
||||||
body = Tag(soup, 'body')
|
body = Tag(soup, 'body')
|
||||||
try:
|
try:
|
||||||
@ -799,7 +799,6 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
|
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
#print(strftime("%H:%M:%S")+" -- PREPROCESS TITLE="+self.tag_to_string(soup.title))
|
#print(strftime("%H:%M:%S")+" -- PREPROCESS TITLE="+self.tag_to_string(soup.title))
|
||||||
skip_tag = soup.find(True, {'name':'skip'})
|
skip_tag = soup.find(True, {'name':'skip'})
|
||||||
@ -861,9 +860,9 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
img = atag.find('img')
|
img = atag.find('img')
|
||||||
if img is not None:
|
if img is not None:
|
||||||
atag.replaceWith(img)
|
atag.replaceWith(img)
|
||||||
elif not atag.has_key('href'):
|
elif 'href' not in atag:
|
||||||
atag.replaceWith(atag.renderContents().decode('cp1252','replace'))
|
atag.replaceWith(atag.renderContents().decode('cp1252','replace'))
|
||||||
elif not (atag['href'].startswith('http://www.nytimes') or atag['href'].startswith('http://pogue') or \
|
elif not (atag['href'].startswith('http://www.nytimes') or atag['href'].startswith('http://pogue') or
|
||||||
atag['href'].startswith('http://bits') or atag['href'].startswith('http://open')):
|
atag['href'].startswith('http://bits') or atag['href'].startswith('http://open')):
|
||||||
atag.replaceWith(atag.renderContents().decode('cp1252','replace'))
|
atag.replaceWith(atag.renderContents().decode('cp1252','replace'))
|
||||||
hdr = soup.find('address')
|
hdr = soup.find('address')
|
||||||
@ -953,8 +952,10 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
year = str(st.tm_year)
|
year = str(st.tm_year)
|
||||||
month = "%.2d" % st.tm_mon
|
month = "%.2d" % st.tm_mon
|
||||||
day = "%.2d" % st.tm_mday
|
day = "%.2d" % st.tm_mday
|
||||||
imgstartpos = popuphtml.find('http://graphics8.nytimes.com/images/' + year + '/' + month +'/' + day +'/') + len('http://graphics8.nytimes.com/images/' + year + '/' + month +'/' + day +'/')
|
imgstartpos = popuphtml.find('http://graphics8.nytimes.com/images/' + year + '/' + month +'/' + day +'/') + \
|
||||||
highResImageLink = 'http://graphics8.nytimes.com/images/' + year + '/' + month +'/' + day +'/' + popuphtml[imgstartpos:popuphtml.find('.jpg',imgstartpos)+4]
|
len('http://graphics8.nytimes.com/images/' + year + '/' + month +'/' + day +'/')
|
||||||
|
highResImageLink = 'http://graphics8.nytimes.com/images/' + year + '/' + \
|
||||||
|
month +'/' + day +'/' + popuphtml[imgstartpos:popuphtml.find('.jpg',imgstartpos)+4]
|
||||||
popupSoup = BeautifulSoup(popuphtml)
|
popupSoup = BeautifulSoup(popuphtml)
|
||||||
highResTag = popupSoup.find('img', {'src':highResImageLink})
|
highResTag = popupSoup.find('img', {'src':highResImageLink})
|
||||||
if highResTag:
|
if highResTag:
|
||||||
@ -984,7 +985,6 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
except:
|
except:
|
||||||
self.log("Error removing Enlarge this text")
|
self.log("Error removing Enlarge this text")
|
||||||
|
|
||||||
|
|
||||||
return self.strip_anchors(soup,False)
|
return self.strip_anchors(soup,False)
|
||||||
|
|
||||||
def postprocess_html(self,soup,first_fetch):
|
def postprocess_html(self,soup,first_fetch):
|
||||||
@ -1108,7 +1108,6 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
except:
|
except:
|
||||||
self.log("ERROR: fixing credit format")
|
self.log("ERROR: fixing credit format")
|
||||||
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Change <h1> to <h3> - used in editorial blogs
|
# Change <h1> to <h3> - used in editorial blogs
|
||||||
masthead = soup.find("h1")
|
masthead = soup.find("h1")
|
||||||
@ -1202,4 +1201,3 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
self.log("Error creating article descriptions")
|
self.log("Error creating article descriptions")
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
|
@ -33,7 +33,6 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
# and 30 will get the most popular measured over 30 days.
|
# and 30 will get the most popular measured over 30 days.
|
||||||
# you still only get up to 20 articles in each category
|
# you still only get up to 20 articles in each category
|
||||||
|
|
||||||
|
|
||||||
# set headlinesOnly to True for the headlines-only version. If True, webEdition is ignored.
|
# set headlinesOnly to True for the headlines-only version. If True, webEdition is ignored.
|
||||||
headlinesOnly = False
|
headlinesOnly = False
|
||||||
|
|
||||||
@ -82,6 +81,7 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
|
|
||||||
# The maximum number of articles that will be downloaded
|
# The maximum number of articles that will be downloaded
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
|
use_embedded_content = False
|
||||||
|
|
||||||
# Whether to omit duplicates of articles (typically arsing when articles are indexed in
|
# Whether to omit duplicates of articles (typically arsing when articles are indexed in
|
||||||
# more than one section). If True, only the first occurance will be downloaded.
|
# more than one section). If True, only the first occurance will be downloaded.
|
||||||
@ -122,7 +122,6 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
(u'Tech - Open', u'http://open.blogs.nytimes.com/feed/')
|
(u'Tech - Open', u'http://open.blogs.nytimes.com/feed/')
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
if headlinesOnly:
|
if headlinesOnly:
|
||||||
title='New York Times Headlines'
|
title='New York Times Headlines'
|
||||||
description = 'Headlines from the New York Times'
|
description = 'Headlines from the New York Times'
|
||||||
@ -168,8 +167,7 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
|
|
||||||
cover_margins = (18,18,'grey99')
|
cover_margins = (18,18,'grey99')
|
||||||
|
|
||||||
remove_tags_before = dict(id='article')
|
keep_only_tags = dict(id=['article', 'story', 'content'])
|
||||||
remove_tags_after = dict(id='article')
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(attrs={'class':[
|
dict(attrs={'class':[
|
||||||
'articleFooter',
|
'articleFooter',
|
||||||
@ -184,6 +182,7 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
'entry-response module',
|
'entry-response module',
|
||||||
'leftNavTabs',
|
'leftNavTabs',
|
||||||
'metaFootnote',
|
'metaFootnote',
|
||||||
|
'inside-story',
|
||||||
'module box nav',
|
'module box nav',
|
||||||
'nextArticleLink',
|
'nextArticleLink',
|
||||||
'nextArticleLink clearfix',
|
'nextArticleLink clearfix',
|
||||||
@ -222,6 +221,8 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
re.compile('commentCount'),
|
re.compile('commentCount'),
|
||||||
'credit'
|
'credit'
|
||||||
]}),
|
]}),
|
||||||
|
dict(attrs={'class':lambda x: x and 'related-coverage-marginalia' in x.split()}),
|
||||||
|
dict(attrs={'class':lambda x: x and 'interactive' in x.split()}),
|
||||||
dict(name='div', attrs={'class':re.compile('toolsList')}), # bits
|
dict(name='div', attrs={'class':re.compile('toolsList')}), # bits
|
||||||
dict(name='div', attrs={'class':re.compile('postNavigation')}), # bits
|
dict(name='div', attrs={'class':re.compile('postNavigation')}), # bits
|
||||||
dict(name='div', attrs={'class':'tweet'}),
|
dict(name='div', attrs={'class':'tweet'}),
|
||||||
@ -235,6 +236,8 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
dict(id=[
|
dict(id=[
|
||||||
'adxLeaderboard',
|
'adxLeaderboard',
|
||||||
'adxSponLink',
|
'adxSponLink',
|
||||||
|
'anchoredAd_module',
|
||||||
|
'anchoredAd_spot',
|
||||||
'archive',
|
'archive',
|
||||||
'articleExtras',
|
'articleExtras',
|
||||||
'articleInline',
|
'articleInline',
|
||||||
@ -251,6 +254,7 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
'masthead-nav',
|
'masthead-nav',
|
||||||
'memberTools',
|
'memberTools',
|
||||||
'navigation', 'navigation-ghost', 'navigation-modal', 'navigation-edge',
|
'navigation', 'navigation-ghost', 'navigation-modal', 'navigation-edge',
|
||||||
|
'page-footer',
|
||||||
'portfolioInline',
|
'portfolioInline',
|
||||||
'readerReviews',
|
'readerReviews',
|
||||||
'readerReviewsCount',
|
'readerReviewsCount',
|
||||||
@ -267,11 +271,13 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
'related-content', # added for DealBook
|
'related-content', # added for DealBook
|
||||||
'whats-next',
|
'whats-next',
|
||||||
]),
|
]),
|
||||||
dict(name=['script', 'noscript', 'style','form','hr', 'button'])]
|
dict(name=['script', 'noscript', 'style','form','hr', 'button', 'meta', 'footer'])]
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
extra_css = '''
|
extra_css = '''
|
||||||
.articleHeadline { text-align: left; margin-top:0.5em; margin-bottom:0.25em; }
|
.articleHeadline { text-align: left; margin-top:0.5em; margin-bottom:0.25em; }
|
||||||
.credit { font-weight: normal; text-align: right; font-size: 50%; line-height:1em; margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
|
.credit { font-weight: normal; text-align: right; font-size:
|
||||||
|
50%; line-height:1em; margin-top:5px; margin-left:0;
|
||||||
|
margin-right:0; margin-bottom: 0; }
|
||||||
.byline { text-align: left; font-size: 50%; line-height:1em; margin-top:10px; margin-left:0; margin-right:0; margin-bottom: 0; }
|
.byline { text-align: left; font-size: 50%; line-height:1em; margin-top:10px; margin-left:0; margin-right:0; margin-bottom: 0; }
|
||||||
.dateline { text-align: left; font-size: 50%; line-height:1em;margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
|
.dateline { text-align: left; font-size: 50%; line-height:1em;margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
|
||||||
.kicker { font-size: 50%; line-height:1em;margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
|
.kicker { font-size: 50%; line-height:1em;margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
|
||||||
@ -287,7 +293,6 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
.asidenote {color:blue;margin:0px 0px 0px 0px; padding: 0px 0px 0px 0px; font-size:100%;font-weight:bold;}
|
.asidenote {color:blue;margin:0px 0px 0px 0px; padding: 0px 0px 0px 0px; font-size:100%;font-weight:bold;}
|
||||||
.source {text-align: left; font-size: x-small; }'''
|
.source {text-align: left; font-size: x-small; }'''
|
||||||
|
|
||||||
|
|
||||||
articles = {}
|
articles = {}
|
||||||
key = None
|
key = None
|
||||||
ans = []
|
ans = []
|
||||||
@ -416,7 +421,6 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
def short_title(self):
|
def short_title(self):
|
||||||
return self.title
|
return self.title
|
||||||
|
|
||||||
|
|
||||||
def article_to_soup(self, url_or_raw, raw=False):
|
def article_to_soup(self, url_or_raw, raw=False):
|
||||||
from contextlib import closing
|
from contextlib import closing
|
||||||
import copy
|
import copy
|
||||||
@ -450,7 +454,6 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
usrc = self.preprocess_raw_html(usrc, url_or_raw)
|
usrc = self.preprocess_raw_html(usrc, url_or_raw)
|
||||||
return BeautifulSoup(usrc, markupMassage=nmassage)
|
return BeautifulSoup(usrc, markupMassage=nmassage)
|
||||||
|
|
||||||
|
|
||||||
def massageNCXText(self, description):
|
def massageNCXText(self, description):
|
||||||
# Kindle TOC descriptions won't render certain characters
|
# Kindle TOC descriptions won't render certain characters
|
||||||
if description:
|
if description:
|
||||||
@ -505,7 +508,7 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
if authorAttribution:
|
if authorAttribution:
|
||||||
author = self.tag_to_string(authorAttribution, use_alt=False)
|
author = self.tag_to_string(authorAttribution, use_alt=False)
|
||||||
feed = self.key if self.key is not None else 'Uncategorized'
|
feed = self.key if self.key is not None else 'Uncategorized'
|
||||||
if not self.articles.has_key(feed):
|
if feed not in self.articles:
|
||||||
self.ans.append(feed)
|
self.ans.append(feed)
|
||||||
self.articles[feed] = []
|
self.articles[feed] = []
|
||||||
self.articles[feed].append(
|
self.articles[feed].append(
|
||||||
@ -540,7 +543,6 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
desc = ''
|
desc = ''
|
||||||
return(title,url,author,desc)
|
return(title,url,author,desc)
|
||||||
|
|
||||||
|
|
||||||
have_emailed = False
|
have_emailed = False
|
||||||
emailed_soup = self.index_to_soup('http://www.nytimes.com/most-popular-emailed?period='+self.popularPeriod)
|
emailed_soup = self.index_to_soup('http://www.nytimes.com/most-popular-emailed?period='+self.popularPeriod)
|
||||||
for h3tag in emailed_soup.findAll('h3'):
|
for h3tag in emailed_soup.findAll('h3'):
|
||||||
@ -569,7 +571,7 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
dict(title=title, url=url, date=strftime('%a, %d %b'),
|
dict(title=title, url=url, date=strftime('%a, %d %b'),
|
||||||
description=desc, author=author,
|
description=desc, author=author,
|
||||||
content=''))
|
content=''))
|
||||||
viewed_ans = [(k, popular_articles[k]) for k in key_list if popular_articles.has_key(k)]
|
viewed_ans = [(k, popular_articles[k]) for k in key_list if k in popular_articles]
|
||||||
for x in viewed_ans:
|
for x in viewed_ans:
|
||||||
ans.append(x)
|
ans.append(x)
|
||||||
return ans
|
return ans
|
||||||
@ -592,10 +594,10 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
tech_articles[f.title] = []
|
tech_articles[f.title] = []
|
||||||
for a in f.articles:
|
for a in f.articles:
|
||||||
tech_articles[f.title].append(
|
tech_articles[f.title].append(
|
||||||
dict(title=a.title, url=a.url, date=a.date,
|
dict(title=a.title, url=a.url.partition('?')[0], date=a.date,
|
||||||
description=a.summary, author=a.author,
|
description=a.summary, author=a.author,
|
||||||
content=a.content))
|
content=a.content))
|
||||||
tech_ans = [(k, tech_articles[k]) for k in key_list if tech_articles.has_key(k)]
|
tech_ans = [(k, tech_articles[k]) for k in key_list if k in tech_articles]
|
||||||
for x in tech_ans:
|
for x in tech_ans:
|
||||||
ans.append(x)
|
ans.append(x)
|
||||||
return ans
|
return ans
|
||||||
@ -634,10 +636,9 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
for lidiv in div.findAll('li'):
|
for lidiv in div.findAll('li'):
|
||||||
self.handle_article(lidiv)
|
self.handle_article(lidiv)
|
||||||
|
|
||||||
self.ans = [(k, self.articles[k]) for k in self.ans if self.articles.has_key(k)]
|
self.ans = [(k, self.articles[k]) for k in self.ans if k in self.articles]
|
||||||
return self.filter_ans(self.get_tech_feeds(self.get_popular_articles(self.ans)))
|
return self.filter_ans(self.get_tech_feeds(self.get_popular_articles(self.ans)))
|
||||||
|
|
||||||
|
|
||||||
def parse_todays_index(self):
|
def parse_todays_index(self):
|
||||||
|
|
||||||
soup = self.index_to_soup('http://www.nytimes.com/pages/todayspaper/index.html')
|
soup = self.index_to_soup('http://www.nytimes.com/pages/todayspaper/index.html')
|
||||||
@ -667,7 +668,7 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
if not skipping:
|
if not skipping:
|
||||||
self.handle_article(lidiv)
|
self.handle_article(lidiv)
|
||||||
|
|
||||||
self.ans = [(k, self.articles[k]) for k in self.ans if self.articles.has_key(k)]
|
self.ans = [(k, self.articles[k]) for k in self.ans if k in self.articles]
|
||||||
return self.filter_ans(self.get_tech_feeds(self.get_popular_articles(self.ans)))
|
return self.filter_ans(self.get_tech_feeds(self.get_popular_articles(self.ans)))
|
||||||
|
|
||||||
def parse_headline_index(self):
|
def parse_headline_index(self):
|
||||||
@ -713,13 +714,13 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
description = self.tag_to_string(desc,use_alt=False)
|
description = self.tag_to_string(desc,use_alt=False)
|
||||||
else:
|
else:
|
||||||
description = ''
|
description = ''
|
||||||
if not self.articles.has_key(section_name):
|
if section_name not in self.articles:
|
||||||
self.ans.append(section_name)
|
self.ans.append(section_name)
|
||||||
self.articles[section_name] = []
|
self.articles[section_name] = []
|
||||||
print('Title '+title+' author '+author)
|
print('Title '+title+' author '+author)
|
||||||
self.articles[section_name].append(dict(title=title, url=url, date=pubdate, description=description, author=author, content=''))
|
self.articles[section_name].append(dict(title=title, url=url, date=pubdate, description=description, author=author, content=''))
|
||||||
|
|
||||||
self.ans = [(k, self.articles[k]) for k in self.ans if self.articles.has_key(k)]
|
self.ans = [(k, self.articles[k]) for k in self.ans if k in self.articles]
|
||||||
return self.filter_ans(self.ans)
|
return self.filter_ans(self.ans)
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
@ -739,7 +740,7 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
if kill_all or (self.recursions==0):
|
if kill_all or (self.recursions==0):
|
||||||
a.replaceWith(self.tag_to_string(a,False))
|
a.replaceWith(self.tag_to_string(a,False))
|
||||||
else:
|
else:
|
||||||
if a.has_key('href'):
|
if 'href' in a:
|
||||||
if a['href'].startswith('http://www.nytimes'):
|
if a['href'].startswith('http://www.nytimes'):
|
||||||
if not a['href'].endswith('pagewanted=all'):
|
if not a['href'].endswith('pagewanted=all'):
|
||||||
url = re.sub(r'\?.*', '', a['href'])
|
url = re.sub(r'\?.*', '', a['href'])
|
||||||
@ -747,13 +748,13 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
a.replaceWith(self.tag_to_string(a,False))
|
a.replaceWith(self.tag_to_string(a,False))
|
||||||
else:
|
else:
|
||||||
a['href'] = url+'?pagewanted=all'
|
a['href'] = url+'?pagewanted=all'
|
||||||
elif not (a['href'].startswith('http://pogue') or \
|
elif not (a['href'].startswith('http://pogue') or
|
||||||
a['href'].startswith('http://bits') or \
|
a['href'].startswith('http://bits') or
|
||||||
a['href'].startswith('http://travel') or \
|
a['href'].startswith('http://travel') or
|
||||||
a['href'].startswith('http://business') or \
|
a['href'].startswith('http://business') or
|
||||||
a['href'].startswith('http://tech') or \
|
a['href'].startswith('http://tech') or
|
||||||
a['href'].startswith('http://health') or \
|
a['href'].startswith('http://health') or
|
||||||
a['href'].startswith('http://dealbook') or \
|
a['href'].startswith('http://dealbook') or
|
||||||
a['href'].startswith('http://open')):
|
a['href'].startswith('http://open')):
|
||||||
a.replaceWith(self.tag_to_string(a,False))
|
a.replaceWith(self.tag_to_string(a,False))
|
||||||
return soup
|
return soup
|
||||||
@ -768,7 +769,7 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
## print("HANDLING AD FORWARD:")
|
## print("HANDLING AD FORWARD:")
|
||||||
## print(soup)
|
# print(soup)
|
||||||
if self.keep_only_tags:
|
if self.keep_only_tags:
|
||||||
body = Tag(soup, 'body')
|
body = Tag(soup, 'body')
|
||||||
try:
|
try:
|
||||||
@ -806,7 +807,6 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
|
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
#print(strftime("%H:%M:%S")+" -- PREPROCESS TITLE="+self.tag_to_string(soup.title))
|
#print(strftime("%H:%M:%S")+" -- PREPROCESS TITLE="+self.tag_to_string(soup.title))
|
||||||
skip_tag = soup.find(True, {'name':'skip'})
|
skip_tag = soup.find(True, {'name':'skip'})
|
||||||
@ -868,9 +868,9 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
img = atag.find('img')
|
img = atag.find('img')
|
||||||
if img is not None:
|
if img is not None:
|
||||||
atag.replaceWith(img)
|
atag.replaceWith(img)
|
||||||
elif not atag.has_key('href'):
|
elif 'href' not in atag:
|
||||||
atag.replaceWith(atag.renderContents().decode('cp1252','replace'))
|
atag.replaceWith(atag.renderContents().decode('cp1252','replace'))
|
||||||
elif not (atag['href'].startswith('http://www.nytimes') or atag['href'].startswith('http://pogue') or \
|
elif not (atag['href'].startswith('http://www.nytimes') or atag['href'].startswith('http://pogue') or
|
||||||
atag['href'].startswith('http://bits') or atag['href'].startswith('http://open')):
|
atag['href'].startswith('http://bits') or atag['href'].startswith('http://open')):
|
||||||
atag.replaceWith(atag.renderContents().decode('cp1252','replace'))
|
atag.replaceWith(atag.renderContents().decode('cp1252','replace'))
|
||||||
hdr = soup.find('address')
|
hdr = soup.find('address')
|
||||||
@ -960,8 +960,10 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
year = str(st.tm_year)
|
year = str(st.tm_year)
|
||||||
month = "%.2d" % st.tm_mon
|
month = "%.2d" % st.tm_mon
|
||||||
day = "%.2d" % st.tm_mday
|
day = "%.2d" % st.tm_mday
|
||||||
imgstartpos = popuphtml.find('http://graphics8.nytimes.com/images/' + year + '/' + month +'/' + day +'/') + len('http://graphics8.nytimes.com/images/' + year + '/' + month +'/' + day +'/')
|
imgstartpos = popuphtml.find('http://graphics8.nytimes.com/images/' + year + '/' + month +'/' + day +'/') + \
|
||||||
highResImageLink = 'http://graphics8.nytimes.com/images/' + year + '/' + month +'/' + day +'/' + popuphtml[imgstartpos:popuphtml.find('.jpg',imgstartpos)+4]
|
len('http://graphics8.nytimes.com/images/' + year + '/' + month +'/' + day +'/')
|
||||||
|
highResImageLink = 'http://graphics8.nytimes.com/images/' + year + '/' + \
|
||||||
|
month +'/' + day +'/' + popuphtml[imgstartpos:popuphtml.find('.jpg',imgstartpos)+4]
|
||||||
popupSoup = BeautifulSoup(popuphtml)
|
popupSoup = BeautifulSoup(popuphtml)
|
||||||
highResTag = popupSoup.find('img', {'src':highResImageLink})
|
highResTag = popupSoup.find('img', {'src':highResImageLink})
|
||||||
if highResTag:
|
if highResTag:
|
||||||
@ -991,7 +993,6 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
except:
|
except:
|
||||||
self.log("Error removing Enlarge this text")
|
self.log("Error removing Enlarge this text")
|
||||||
|
|
||||||
|
|
||||||
return self.strip_anchors(soup,False)
|
return self.strip_anchors(soup,False)
|
||||||
|
|
||||||
def postprocess_html(self,soup,first_fetch):
|
def postprocess_html(self,soup,first_fetch):
|
||||||
@ -1115,7 +1116,6 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
except:
|
except:
|
||||||
self.log("ERROR: fixing credit format")
|
self.log("ERROR: fixing credit format")
|
||||||
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Change <h1> to <h3> - used in editorial blogs
|
# Change <h1> to <h3> - used in editorial blogs
|
||||||
masthead = soup.find("h1")
|
masthead = soup.find("h1")
|
||||||
@ -1209,4 +1209,3 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
self.log("Error creating article descriptions")
|
self.log("Error creating article descriptions")
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
|
@ -20,6 +20,7 @@ from calibre.constants import iswindows
|
|||||||
from calibre import unicode_path, as_unicode, replace_entities
|
from calibre import unicode_path, as_unicode, replace_entities
|
||||||
|
|
||||||
class Link(object):
|
class Link(object):
|
||||||
|
|
||||||
'''
|
'''
|
||||||
Represents a link in a HTML file.
|
Represents a link in a HTML file.
|
||||||
'''
|
'''
|
||||||
@ -73,6 +74,7 @@ class IgnoreFile(Exception):
|
|||||||
self.errno = errno
|
self.errno = errno
|
||||||
|
|
||||||
class HTMLFile(object):
|
class HTMLFile(object):
|
||||||
|
|
||||||
'''
|
'''
|
||||||
Contains basic information about an HTML file. This
|
Contains basic information about an HTML file. This
|
||||||
includes a list of links to other files as well as
|
includes a list of links to other files as well as
|
||||||
@ -103,8 +105,14 @@ class HTMLFile(object):
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
with open(self.path, 'rb') as f:
|
with open(self.path, 'rb') as f:
|
||||||
src = f.read(4096)
|
src = header = f.read(4096)
|
||||||
self.is_binary = level > 0 and not bool(self.HTML_PAT.search(src))
|
encoding = detect_xml_encoding(src)[1]
|
||||||
|
if encoding:
|
||||||
|
try:
|
||||||
|
header = header.decode(encoding)
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
self.is_binary = level > 0 and not bool(self.HTML_PAT.search(header))
|
||||||
if not self.is_binary:
|
if not self.is_binary:
|
||||||
src += f.read()
|
src += f.read()
|
||||||
except IOError as err:
|
except IOError as err:
|
||||||
@ -139,7 +147,6 @@ class HTMLFile(object):
|
|||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return str(self)
|
return str(self)
|
||||||
|
|
||||||
|
|
||||||
def find_links(self, src):
|
def find_links(self, src):
|
||||||
for match in self.LINK_PAT.finditer(src):
|
for match in self.LINK_PAT.finditer(src):
|
||||||
url = None
|
url = None
|
||||||
@ -232,8 +239,7 @@ def get_filelist(htmlfile, dir, opts, log):
|
|||||||
log.info('Building file list...')
|
log.info('Building file list...')
|
||||||
filelist = traverse(htmlfile, max_levels=int(opts.max_levels),
|
filelist = traverse(htmlfile, max_levels=int(opts.max_levels),
|
||||||
verbose=opts.verbose,
|
verbose=opts.verbose,
|
||||||
encoding=opts.input_encoding)\
|
encoding=opts.input_encoding)[0 if opts.breadth_first else 1]
|
||||||
[0 if opts.breadth_first else 1]
|
|
||||||
if opts.verbose:
|
if opts.verbose:
|
||||||
log.debug('\tFound files...')
|
log.debug('\tFound files...')
|
||||||
for f in filelist:
|
for f in filelist:
|
||||||
|
@ -317,13 +317,11 @@ class FlowSplitter(object):
|
|||||||
def split_to_size(self, tree):
|
def split_to_size(self, tree):
|
||||||
self.log.debug('\t\tSplitting...')
|
self.log.debug('\t\tSplitting...')
|
||||||
root = tree.getroot()
|
root = tree.getroot()
|
||||||
# Split large <pre> tags
|
# Split large <pre> tags if they contain only text
|
||||||
for pre in list(XPath('//h:pre')(root)):
|
for pre in XPath('//h:pre')(root):
|
||||||
text = u''.join(pre.xpath('descendant::text()'))
|
if len(tuple(pre.iterchildren(etree.Element))) > 0:
|
||||||
pre.text = text
|
continue
|
||||||
for child in list(pre.iterchildren()):
|
if pre.text and len(pre.text) > self.max_flow_size*0.5:
|
||||||
pre.remove(child)
|
|
||||||
if len(pre.text) > self.max_flow_size*0.5:
|
|
||||||
self.log.debug('\t\tSplitting large <pre> tag')
|
self.log.debug('\t\tSplitting large <pre> tag')
|
||||||
frags = self.split_text(pre.text, root, int(0.2*self.max_flow_size))
|
frags = self.split_text(pre.text, root, int(0.2*self.max_flow_size))
|
||||||
new_pres = []
|
new_pres = []
|
||||||
|
@ -1104,7 +1104,8 @@ class OnDeviceSearch(SearchQueryParser): # {{{
|
|||||||
'format',
|
'format',
|
||||||
'formats',
|
'formats',
|
||||||
'title',
|
'title',
|
||||||
'inlibrary'
|
'inlibrary',
|
||||||
|
'tags'
|
||||||
]
|
]
|
||||||
|
|
||||||
def __init__(self, model):
|
def __init__(self, model):
|
||||||
@ -1135,14 +1136,15 @@ class OnDeviceSearch(SearchQueryParser): # {{{
|
|||||||
if location not in self.USABLE_LOCATIONS:
|
if location not in self.USABLE_LOCATIONS:
|
||||||
return set([])
|
return set([])
|
||||||
matches = set([])
|
matches = set([])
|
||||||
all_locs = set(self.USABLE_LOCATIONS) - set(['all'])
|
all_locs = set(self.USABLE_LOCATIONS) - set(['all', 'tags'])
|
||||||
locations = all_locs if location == 'all' else [location]
|
locations = all_locs if location == 'all' else [location]
|
||||||
q = {
|
q = {
|
||||||
'title' : lambda x : getattr(x, 'title').lower(),
|
'title' : lambda x : getattr(x, 'title').lower(),
|
||||||
'author': lambda x: ' & '.join(getattr(x, 'authors')).lower(),
|
'author': lambda x: ' & '.join(getattr(x, 'authors')).lower(),
|
||||||
'collections':lambda x: ','.join(getattr(x, 'device_collections')).lower(),
|
'collections':lambda x: ','.join(getattr(x, 'device_collections')).lower(),
|
||||||
'format':lambda x: os.path.splitext(x.path)[1].lower(),
|
'format':lambda x: os.path.splitext(x.path)[1].lower(),
|
||||||
'inlibrary':lambda x : getattr(x, 'in_library')
|
'inlibrary':lambda x : getattr(x, 'in_library'),
|
||||||
|
'tags':lambda x : getattr(x, 'tags', [])
|
||||||
}
|
}
|
||||||
for x in ('author', 'format'):
|
for x in ('author', 'format'):
|
||||||
q[x+'s'] = q[x]
|
q[x+'s'] = q[x]
|
||||||
@ -1169,10 +1171,11 @@ class OnDeviceSearch(SearchQueryParser): # {{{
|
|||||||
else:
|
else:
|
||||||
m = matchkind
|
m = matchkind
|
||||||
|
|
||||||
if locvalue == 'collections':
|
vals = accessor(row)
|
||||||
vals = accessor(row).split(',')
|
if vals is None:
|
||||||
else:
|
vals = ''
|
||||||
vals = [accessor(row)]
|
if isinstance(vals, basestring):
|
||||||
|
vals = vals.split(',') if locvalue == 'collections' else [vals]
|
||||||
if _match(query, vals, m, use_primary_find_in_search=upf):
|
if _match(query, vals, m, use_primary_find_in_search=upf):
|
||||||
matches.add(index)
|
matches.add(index)
|
||||||
break
|
break
|
||||||
|
@ -21,7 +21,7 @@ from calibre.constants import ispy3, plugins, cache_dir
|
|||||||
from calibre.gui2 import NONE
|
from calibre.gui2 import NONE
|
||||||
from calibre.gui2.widgets2 import HistoryLineEdit2
|
from calibre.gui2.widgets2 import HistoryLineEdit2
|
||||||
from calibre.gui2.tweak_book import tprefs
|
from calibre.gui2.tweak_book import tprefs
|
||||||
from calibre.gui2.tweak_book.editor.insert_resource import Dialog
|
from calibre.gui2.tweak_book.widgets import Dialog
|
||||||
|
|
||||||
if not ispy3:
|
if not ispy3:
|
||||||
if sys.maxunicode >= 0x10FFFF:
|
if sys.maxunicode >= 0x10FFFF:
|
||||||
|
@ -10,11 +10,11 @@ import sys, os
|
|||||||
from functools import partial
|
from functools import partial
|
||||||
|
|
||||||
from PyQt4.Qt import (
|
from PyQt4.Qt import (
|
||||||
QDialog, QGridLayout, QDialogButtonBox, QSize, QListView, QStyledItemDelegate,
|
QGridLayout, QSize, QListView, QStyledItemDelegate, QLabel, QPixmap,
|
||||||
QLabel, QPixmap, QApplication, QSizePolicy, QAbstractListModel, QVariant,
|
QApplication, QSizePolicy, QAbstractListModel, QVariant, Qt, QRect,
|
||||||
Qt, QRect, QPainter, QModelIndex, QSortFilterProxyModel, QLineEdit,
|
QPainter, QModelIndex, QSortFilterProxyModel, QLineEdit, QToolButton,
|
||||||
QToolButton, QIcon, QFormLayout, pyqtSignal, QTreeWidget, QTreeWidgetItem,
|
QIcon, QFormLayout, pyqtSignal, QTreeWidget, QTreeWidgetItem, QVBoxLayout,
|
||||||
QVBoxLayout, QMenu, QInputDialog)
|
QMenu, QInputDialog)
|
||||||
|
|
||||||
from calibre import fit_image
|
from calibre import fit_image
|
||||||
from calibre.constants import plugins
|
from calibre.constants import plugins
|
||||||
@ -23,43 +23,11 @@ from calibre.ebooks.metadata.book.base import Metadata
|
|||||||
from calibre.gui2 import NONE, choose_files, error_dialog
|
from calibre.gui2 import NONE, choose_files, error_dialog
|
||||||
from calibre.gui2.languages import LanguagesEdit
|
from calibre.gui2.languages import LanguagesEdit
|
||||||
from calibre.gui2.tweak_book import current_container, tprefs
|
from calibre.gui2.tweak_book import current_container, tprefs
|
||||||
|
from calibre.gui2.tweak_book.widgets import Dialog
|
||||||
from calibre.gui2.tweak_book.file_list import name_is_ok
|
from calibre.gui2.tweak_book.file_list import name_is_ok
|
||||||
from calibre.utils.localization import get_lang, canonicalize_lang
|
from calibre.utils.localization import get_lang, canonicalize_lang
|
||||||
from calibre.utils.icu import sort_key
|
from calibre.utils.icu import sort_key
|
||||||
|
|
||||||
class Dialog(QDialog):
|
|
||||||
|
|
||||||
def __init__(self, title, name, parent=None):
|
|
||||||
QDialog.__init__(self, parent)
|
|
||||||
self.setWindowTitle(title)
|
|
||||||
self.name = name
|
|
||||||
self.bb = QDialogButtonBox(QDialogButtonBox.Ok | QDialogButtonBox.Cancel)
|
|
||||||
self.bb.accepted.connect(self.accept)
|
|
||||||
self.bb.rejected.connect(self.reject)
|
|
||||||
|
|
||||||
self.setup_ui()
|
|
||||||
|
|
||||||
self.resize(self.sizeHint())
|
|
||||||
geom = tprefs.get(name + '-geometry', None)
|
|
||||||
if geom is not None:
|
|
||||||
self.restoreGeometry(geom)
|
|
||||||
if hasattr(self, 'splitter'):
|
|
||||||
state = tprefs.get(name + '-splitter-state', None)
|
|
||||||
if state is not None:
|
|
||||||
self.splitter.restoreState(state)
|
|
||||||
|
|
||||||
def accept(self):
|
|
||||||
tprefs.set(self.name + '-geometry', bytearray(self.saveGeometry()))
|
|
||||||
if hasattr(self, 'splitter'):
|
|
||||||
tprefs.set(self.name + '-splitter-state', bytearray(self.splitter.saveState()))
|
|
||||||
QDialog.accept(self)
|
|
||||||
|
|
||||||
def reject(self):
|
|
||||||
tprefs.set(self.name + '-geometry', bytearray(self.saveGeometry()))
|
|
||||||
if hasattr(self, 'splitter'):
|
|
||||||
tprefs.set(self.name + '-splitter-state', bytearray(self.splitter.saveState()))
|
|
||||||
QDialog.reject(self)
|
|
||||||
|
|
||||||
class ChooseName(Dialog): # {{{
|
class ChooseName(Dialog): # {{{
|
||||||
|
|
||||||
''' Chooses the filename for a newly imported file, with error checking '''
|
''' Chooses the filename for a newly imported file, with error checking '''
|
||||||
|
48
src/calibre/gui2/tweak_book/widgets.py
Normal file
48
src/calibre/gui2/tweak_book/widgets.py
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# vim:fileencoding=utf-8
|
||||||
|
from __future__ import (unicode_literals, division, absolute_import,
|
||||||
|
print_function)
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||||
|
|
||||||
|
from PyQt4.Qt import (QDialog, QDialogButtonBox)
|
||||||
|
|
||||||
|
from calibre.gui2.tweak_book import tprefs
|
||||||
|
|
||||||
|
class Dialog(QDialog):
|
||||||
|
|
||||||
|
def __init__(self, title, name, parent=None):
|
||||||
|
QDialog.__init__(self, parent)
|
||||||
|
self.setWindowTitle(title)
|
||||||
|
self.name = name
|
||||||
|
self.bb = QDialogButtonBox(QDialogButtonBox.Ok | QDialogButtonBox.Cancel)
|
||||||
|
self.bb.accepted.connect(self.accept)
|
||||||
|
self.bb.rejected.connect(self.reject)
|
||||||
|
|
||||||
|
self.setup_ui()
|
||||||
|
|
||||||
|
self.resize(self.sizeHint())
|
||||||
|
geom = tprefs.get(name + '-geometry', None)
|
||||||
|
if geom is not None:
|
||||||
|
self.restoreGeometry(geom)
|
||||||
|
if hasattr(self, 'splitter'):
|
||||||
|
state = tprefs.get(name + '-splitter-state', None)
|
||||||
|
if state is not None:
|
||||||
|
self.splitter.restoreState(state)
|
||||||
|
|
||||||
|
def accept(self):
|
||||||
|
tprefs.set(self.name + '-geometry', bytearray(self.saveGeometry()))
|
||||||
|
if hasattr(self, 'splitter'):
|
||||||
|
tprefs.set(self.name + '-splitter-state', bytearray(self.splitter.saveState()))
|
||||||
|
QDialog.accept(self)
|
||||||
|
|
||||||
|
def reject(self):
|
||||||
|
tprefs.set(self.name + '-geometry', bytearray(self.saveGeometry()))
|
||||||
|
if hasattr(self, 'splitter'):
|
||||||
|
tprefs.set(self.name + '-splitter-state', bytearray(self.splitter.saveState()))
|
||||||
|
QDialog.reject(self)
|
||||||
|
|
||||||
|
def setup_ui(self):
|
||||||
|
raise NotImplementedError('You must implement this method in Dialog subclasses')
|
||||||
|
|
Loading…
x
Reference in New Issue
Block a user