mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Misc. fixes
This commit is contained in:
parent
eb625d37c3
commit
656c55debf
@ -24,7 +24,7 @@ class ANDROID(USBMS):
|
|||||||
]
|
]
|
||||||
PRODUCT_ID = [0x0c02]
|
PRODUCT_ID = [0x0c02]
|
||||||
BCD = [0x100]
|
BCD = [0x100]
|
||||||
EBOOK_DIR_MAIN = 'wordplayer/calibre'
|
EBOOK_DIR_MAIN = 'wordplayer/calibretransfer'
|
||||||
|
|
||||||
VENDOR_NAME = 'HTC'
|
VENDOR_NAME = 'HTC'
|
||||||
WINDOWS_MAIN_MEM = 'ANDROID_PHONE'
|
WINDOWS_MAIN_MEM = 'ANDROID_PHONE'
|
||||||
|
@ -163,7 +163,7 @@ def fetch_scheduled_recipe(recipe, script):
|
|||||||
OptionRecommendation.HIGH))
|
OptionRecommendation.HIGH))
|
||||||
lf = load_defaults('look_and_feel')
|
lf = load_defaults('look_and_feel')
|
||||||
if lf.get('base_font_size', 0.0) != 0.0:
|
if lf.get('base_font_size', 0.0) != 0.0:
|
||||||
recs.append(('base_font_size', ps['base_font_size'],
|
recs.append(('base_font_size', lf['base_font_size'],
|
||||||
OptionRecommendation.HIGH))
|
OptionRecommendation.HIGH))
|
||||||
|
|
||||||
args = [script, pt.name, recs]
|
args = [script, pt.name, recs]
|
||||||
|
@ -1015,7 +1015,7 @@ ALTER TABLE books ADD COLUMN isbn TEXT DEFAULT "" COLLATE NOCASE;
|
|||||||
|
|
||||||
def books_in_series_of(self, index, index_is_id=False):
|
def books_in_series_of(self, index, index_is_id=False):
|
||||||
'''
|
'''
|
||||||
Return an ordered list of all books in the series that the book indetified by index belongs to.
|
Return an ordered list of all books in the series that the book identified by index belongs to.
|
||||||
If the book does not belong to a series return an empty list. The list contains book ids.
|
If the book does not belong to a series return an empty list. The list contains book ids.
|
||||||
'''
|
'''
|
||||||
series_id = self.series_id(index, index_is_id=index_is_id)
|
series_id = self.series_id(index, index_is_id=index_is_id)
|
||||||
|
@ -16,7 +16,43 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
__author__ = 'GRiker'
|
__author__ = 'GRiker'
|
||||||
language = _('English')
|
language = _('English')
|
||||||
description = 'Top Stories from the New York Times'
|
description = 'Top Stories from the New York Times'
|
||||||
#max_articles_per_feed = 3
|
|
||||||
|
# List of sections typically included in Top Stories. Use a keyword from the
|
||||||
|
# right column in the excludeSectionKeywords[] list to skip downloading that section
|
||||||
|
sections = {
|
||||||
|
'arts' : 'Arts',
|
||||||
|
'business' : 'Business',
|
||||||
|
'diningwine' : 'Dining & Wine',
|
||||||
|
'editorials' : 'Editorials',
|
||||||
|
'health' : 'Health',
|
||||||
|
'magazine' : 'Magazine',
|
||||||
|
'mediaadvertising' : 'Media & Advertising',
|
||||||
|
'newyorkregion' : 'New York/Region',
|
||||||
|
'oped' : 'Op-Ed',
|
||||||
|
'politics' : 'Politics',
|
||||||
|
'science' : 'Science',
|
||||||
|
'sports' : 'Sports',
|
||||||
|
'technology' : 'Technology',
|
||||||
|
'topstories' : 'Top Stories',
|
||||||
|
'travel' : 'Travel',
|
||||||
|
'us' : 'U.S.',
|
||||||
|
'world' : 'World'
|
||||||
|
}
|
||||||
|
|
||||||
|
# By default, no sections are skipped.
|
||||||
|
excludeSectionKeywords = []
|
||||||
|
|
||||||
|
# Add section keywords from the right column above to skip that section
|
||||||
|
# For example, to skip sections containing the word 'Sports' or 'Dining', use:
|
||||||
|
# excludeSectionKeywords = ['Sports', 'Dining']
|
||||||
|
# Fetch only Business and Technology
|
||||||
|
#excludeSectionKeywords = ['Arts','Dining','Editorials','Health','Magazine','Media','Region','Op-Ed','Politics','Science','Sports','Top Stories','Travel','U.S.','World']
|
||||||
|
# Fetch only Top Stories
|
||||||
|
#excludeSectionKeywords = ['Arts','Business','Dining','Editorials','Health','Magazine','Media','Region','Op-Ed','Politics','Science','Sports','Technology','Travel','U.S.','World']
|
||||||
|
|
||||||
|
# The maximum number of articles that will be downloaded
|
||||||
|
max_articles_per_feed = 50
|
||||||
|
|
||||||
timefmt = ''
|
timefmt = ''
|
||||||
needs_subscription = True
|
needs_subscription = True
|
||||||
remove_tags_after = dict(attrs={'id':['comments']})
|
remove_tags_after = dict(attrs={'id':['comments']})
|
||||||
@ -31,17 +67,11 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
dict(name=['script', 'noscript', 'style','hr'])]
|
dict(name=['script', 'noscript', 'style','hr'])]
|
||||||
encoding = 'cp1252'
|
encoding = 'cp1252'
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
#extra_css = 'h1 {font: sans-serif large;}\n.byline {font:monospace;}'
|
|
||||||
extra_css = '.headline {text-align:left;}\n\
|
extra_css = '.headline {text-align:left;}\n\
|
||||||
.byline {font:monospace; margin-bottom:0px;}\n\
|
.byline {font:monospace; margin-bottom:0px;}\n\
|
||||||
.source {align:left;}\n\
|
.source {align:left;}\n\
|
||||||
.credit {align:right;}\n'
|
.credit {align:right;}\n'
|
||||||
|
|
||||||
|
|
||||||
flatPeriodical = True
|
|
||||||
feed = None
|
|
||||||
ans = []
|
|
||||||
|
|
||||||
def get_browser(self):
|
def get_browser(self):
|
||||||
br = BasicNewsRecipe.get_browser()
|
br = BasicNewsRecipe.get_browser()
|
||||||
if self.username is not None and self.password is not None:
|
if self.username is not None and self.password is not None:
|
||||||
@ -54,14 +84,8 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
|
|
||||||
def index_to_soup(self, url_or_raw, raw=False):
|
def index_to_soup(self, url_or_raw, raw=False):
|
||||||
'''
|
'''
|
||||||
Convenience method that takes an URL to the index page and returns
|
OVERRIDE of class method
|
||||||
a `BeautifulSoup <http://www.crummy.com/software/BeautifulSoup/documentation.html>`_
|
deals with various page encodings between index and articles
|
||||||
of it.
|
|
||||||
|
|
||||||
This is an OVERRIDE of the method provided in news.py to solve an encoding problem
|
|
||||||
with NYTimes index pages which seem to be encoded in a wonderful blend
|
|
||||||
|
|
||||||
`url_or_raw`: Either a URL or the downloaded index page as a string
|
|
||||||
'''
|
'''
|
||||||
def get_the_soup(docEncoding, url_or_raw, raw=False) :
|
def get_the_soup(docEncoding, url_or_raw, raw=False) :
|
||||||
if re.match(r'\w+://', url_or_raw):
|
if re.match(r'\w+://', url_or_raw):
|
||||||
@ -88,8 +112,6 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
if docEncoding == '' :
|
if docEncoding == '' :
|
||||||
docEncoding = self.encoding
|
docEncoding = self.encoding
|
||||||
|
|
||||||
if self.verbose :
|
|
||||||
self.log( " document encoding: '%s'" % docEncoding)
|
|
||||||
if docEncoding != self.encoding :
|
if docEncoding != self.encoding :
|
||||||
soup = get_the_soup(docEncoding, url_or_raw)
|
soup = get_the_soup(docEncoding, url_or_raw)
|
||||||
|
|
||||||
@ -97,49 +119,11 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
articles = {}
|
articles = {}
|
||||||
|
ans = []
|
||||||
|
|
||||||
if self.flatPeriodical :
|
feed = key = 'All Top Stories'
|
||||||
self.feed = key = 'All Top Stories'
|
articles[key] = []
|
||||||
articles[key] = []
|
ans.append(key)
|
||||||
self.ans.append(key)
|
|
||||||
else :
|
|
||||||
key = None
|
|
||||||
|
|
||||||
'''
|
|
||||||
def feed_title(div):
|
|
||||||
return ''.join(div.findAll(text=True, recursive=False)).strip()
|
|
||||||
'''
|
|
||||||
|
|
||||||
|
|
||||||
sections = {
|
|
||||||
'arts' : 'Arts',
|
|
||||||
'business' : 'Business',
|
|
||||||
'editorials' : 'Editorials',
|
|
||||||
'health' : 'Health',
|
|
||||||
'magazine' : 'Magazine',
|
|
||||||
'mediaadvertising' : 'Media & Advertising',
|
|
||||||
'newyorkregion' : 'New York/Region',
|
|
||||||
'oped' : 'Op-Ed',
|
|
||||||
'politics' : 'Politics',
|
|
||||||
'science' : 'Science',
|
|
||||||
'sports' : 'Sports',
|
|
||||||
'technology' : 'Technology',
|
|
||||||
'topstories' : 'Top Stories',
|
|
||||||
'travel' : 'Travel',
|
|
||||||
'us' : 'U.S.',
|
|
||||||
'world' : 'World'
|
|
||||||
}
|
|
||||||
|
|
||||||
'''
|
|
||||||
excludeSectionKeywords = ['Arts','Business','Editorials','Health','Magazine','Media',
|
|
||||||
'New York','Op-Ed','Politics','Science','Sports','Technology',
|
|
||||||
'Top Stories','Travel','U.S.','World']
|
|
||||||
'''
|
|
||||||
excludeSectionKeywords = ['Arts','Business','Editorials','Health','Magazine','Media',
|
|
||||||
'New York','Politics','Science','Sports','Technology',
|
|
||||||
'Top Stories','Travel','U.S.','World']
|
|
||||||
|
|
||||||
#excludeSectionKeywords = []
|
|
||||||
|
|
||||||
soup = self.index_to_soup('http://www.nytimes.com/pages/todaysheadlines/')
|
soup = self.index_to_soup('http://www.nytimes.com/pages/todaysheadlines/')
|
||||||
|
|
||||||
@ -152,35 +136,25 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
while True :
|
while True :
|
||||||
table = table.find('table')
|
table = table.find('table')
|
||||||
if table.find(text=re.compile('top stories start')) :
|
if table.find(text=re.compile('top stories start')) :
|
||||||
if self.verbose > 2 : self.log( "*********** dropping one level deeper **************")
|
|
||||||
previousTable = table
|
previousTable = table
|
||||||
continue
|
continue
|
||||||
else :
|
else :
|
||||||
if self.verbose > 2 : self.log( "found table with top stories")
|
|
||||||
table = previousTable
|
table = previousTable
|
||||||
if self.verbose > 2 : self.log( "lowest table containing 'top stories start:\n%s" % table)
|
|
||||||
break
|
break
|
||||||
|
|
||||||
# There are multiple subtables, find the one containing the stories
|
# There are multiple subtables, find the one containing the stories
|
||||||
for block in table.findAll('table') :
|
for block in table.findAll('table') :
|
||||||
if block.find(text=re.compile('top stories start')) :
|
if block.find(text=re.compile('top stories start')) :
|
||||||
if self.verbose > 2 : self.log( "found subtable with top stories")
|
|
||||||
table = block
|
table = block
|
||||||
if self.verbose > 2 : self.log( "lowest subtable containing 'top stories start:\n%s" % table)
|
|
||||||
break
|
break
|
||||||
else :
|
else :
|
||||||
if self.verbose > 2 : self.log( "trying next subtable")
|
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Again there are multiple subtables, find the one containing the stories
|
# Again there are multiple subtables, find the one containing the stories
|
||||||
for storyblock in table.findAll('table') :
|
for storyblock in table.findAll('table') :
|
||||||
if storyblock.find(text=re.compile('top stories start')) :
|
if storyblock.find(text=re.compile('top stories start')) :
|
||||||
if self.verbose > 2 : self.log( "found subsubtable with top stories\n" )
|
|
||||||
# table = storyblock
|
|
||||||
if self.verbose > 2 : self.log( "\nlowest subsubtable containing 'top stories start:\n%s" % storyblock)
|
|
||||||
break
|
break
|
||||||
else :
|
else :
|
||||||
if self.verbose > 2 : self.log( "trying next subsubtable")
|
|
||||||
continue
|
continue
|
||||||
|
|
||||||
skipThisSection = False
|
skipThisSection = False
|
||||||
@ -192,7 +166,6 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
sectionblock = tr.find(True, attrs={'face':['times new roman, times,sans serif',
|
sectionblock = tr.find(True, attrs={'face':['times new roman, times,sans serif',
|
||||||
'times new roman,times, sans serif',
|
'times new roman,times, sans serif',
|
||||||
'times new roman, times, sans serif']})
|
'times new roman, times, sans serif']})
|
||||||
if self.verbose > 2 : self.log( "----------- new tr ----------------")
|
|
||||||
section = None
|
section = None
|
||||||
bylines = []
|
bylines = []
|
||||||
descriptions = []
|
descriptions = []
|
||||||
@ -205,26 +178,20 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
if ('Comment' in str(i.__class__)) :
|
if ('Comment' in str(i.__class__)) :
|
||||||
if 'start(name=' in i :
|
if 'start(name=' in i :
|
||||||
section = i[i.find('=')+1:-2]
|
section = i[i.find('=')+1:-2]
|
||||||
if self.verbose > 2 : self.log( "sectionTitle: %s" % sections[section])
|
|
||||||
|
|
||||||
if not sections.has_key(section) :
|
if not self.sections.has_key(section) :
|
||||||
self.log( "Unrecognized section id: %s, skipping" % section )
|
|
||||||
skipThisSection = True
|
skipThisSection = True
|
||||||
break
|
break
|
||||||
|
|
||||||
# Check for excluded section
|
# Check for excluded section
|
||||||
if len(excludeSectionKeywords):
|
if len(self.excludeSectionKeywords):
|
||||||
key = sections[section]
|
key = self.sections[section]
|
||||||
excluded = re.compile('|'.join(excludeSectionKeywords))
|
excluded = re.compile('|'.join(self.excludeSectionKeywords))
|
||||||
if excluded.search(key) or articles.has_key(key):
|
if excluded.search(key) or articles.has_key(key):
|
||||||
if self.verbose > 2 : self.log("Skipping section %s" % key)
|
if self.verbose : self.log("Skipping section %s" % key)
|
||||||
skipThisSection = True
|
skipThisSection = True
|
||||||
break
|
break
|
||||||
|
|
||||||
if not self.flatPeriodical :
|
|
||||||
articles[key] = []
|
|
||||||
self.ans.append(key)
|
|
||||||
|
|
||||||
# Get the bylines and descriptions
|
# Get the bylines and descriptions
|
||||||
if not skipThisSection :
|
if not skipThisSection :
|
||||||
for (x,i) in enumerate(sectionblock.contents) :
|
for (x,i) in enumerate(sectionblock.contents) :
|
||||||
@ -248,31 +215,26 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
#continue
|
#continue
|
||||||
url = re.sub(r'\?.*', '', a['href'])
|
url = re.sub(r'\?.*', '', a['href'])
|
||||||
url += '?pagewanted=all'
|
url += '?pagewanted=all'
|
||||||
|
|
||||||
title = self.tag_to_string(a, use_alt=True)
|
title = self.tag_to_string(a, use_alt=True)
|
||||||
if self.flatPeriodical :
|
# prepend the section name
|
||||||
# prepend the section name
|
title = self.sections[section] + " · " + title
|
||||||
title = sections[section] + " · " + title
|
|
||||||
if not isinstance(title, unicode):
|
if not isinstance(title, unicode):
|
||||||
title = title.decode('utf-8', 'replace')
|
title = title.decode('utf-8', 'replace')
|
||||||
|
|
||||||
description = descriptions[i]
|
description = descriptions[i]
|
||||||
|
|
||||||
if len(bylines) == articleCount :
|
if len(bylines) == articleCount :
|
||||||
author = bylines[i]
|
author = bylines[i]
|
||||||
else :
|
else :
|
||||||
author = None
|
author = None
|
||||||
|
|
||||||
if self.verbose > 2 : self.log( " title: %s" % title)
|
|
||||||
if self.verbose > 2 : self.log( " url: %s" % url)
|
|
||||||
if self.verbose > 2 : self.log( " author: %s" % author)
|
|
||||||
if self.verbose > 2 : self.log( "description: %s" % description)
|
|
||||||
|
|
||||||
if not self.flatPeriodical :
|
|
||||||
self.feed = key
|
|
||||||
|
|
||||||
# Check for duplicates
|
# Check for duplicates
|
||||||
duplicateFound = False
|
duplicateFound = False
|
||||||
if self.flatPeriodical and len(articles[self.feed]) > 1:
|
if len(articles[feed]) > 1:
|
||||||
#print articles[self.feed]
|
#print articles[feed]
|
||||||
for article in articles[self.feed] :
|
for article in articles[feed] :
|
||||||
#print "comparing %s\n %s\n" % (url, article['url'])
|
#print "comparing %s\n %s\n" % (url, article['url'])
|
||||||
if url == article['url'] :
|
if url == article['url'] :
|
||||||
duplicateFound = True
|
duplicateFound = True
|
||||||
@ -280,23 +242,18 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
#print
|
#print
|
||||||
|
|
||||||
if duplicateFound:
|
if duplicateFound:
|
||||||
# Continue fetching, don't add this article
|
|
||||||
print " skipping duplicate %s" % article['url']
|
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if not articles.has_key(self.feed):
|
if not articles.has_key(feed):
|
||||||
if self.verbose > 2 : self.log( "adding %s to articles[]" % self.feed)
|
articles[feed] = []
|
||||||
articles[self.feed] = []
|
articles[feed].append(
|
||||||
if self.verbose > 2 : self.log( " adding: %s to articles[%s]\n" % (title, self.feed))
|
|
||||||
articles[self.feed].append(
|
|
||||||
dict(title=title, url=url, date=pubdate,
|
dict(title=title, url=url, date=pubdate,
|
||||||
description=description, author=author, content=''))
|
description=description, author=author, content=''))
|
||||||
|
|
||||||
self.ans = self.sort_index_by(self.ans, {'Top Stories':-1})
|
ans = self.sort_index_by(ans, {'Top Stories':-1})
|
||||||
self.ans = [(key, articles[key]) for key in self.ans if articles.has_key(key)]
|
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
|
||||||
#sys.exit(1)
|
|
||||||
|
|
||||||
return self.ans
|
return ans
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
refresh = soup.find('meta', {'http-equiv':'refresh'})
|
refresh = soup.find('meta', {'http-equiv':'refresh'})
|
||||||
@ -307,12 +264,9 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
return BeautifulSoup(raw.decode('cp1252', 'replace'))
|
return BeautifulSoup(raw.decode('cp1252', 'replace'))
|
||||||
|
|
||||||
def postprocess_html(self,soup, True):
|
def postprocess_html(self,soup, True):
|
||||||
if self.verbose > 2 : self.log(" ********** recipe.postprocess_html ********** ")
|
|
||||||
# Change class="kicker" to <h3>
|
# Change class="kicker" to <h3>
|
||||||
kicker = soup.find(True, {'class':'kicker'})
|
kicker = soup.find(True, {'class':'kicker'})
|
||||||
if kicker is not None :
|
if kicker is not None :
|
||||||
print "changing kicker to <h3>"
|
|
||||||
print kicker
|
|
||||||
h3Tag = Tag(soup, "h3")
|
h3Tag = Tag(soup, "h3")
|
||||||
h3Tag.insert(0, kicker.contents[0])
|
h3Tag.insert(0, kicker.contents[0])
|
||||||
kicker.replaceWith(h3Tag)
|
kicker.replaceWith(h3Tag)
|
||||||
@ -345,13 +299,7 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
tag = Tag(soup, "h3")
|
tag = Tag(soup, "h3")
|
||||||
tag.insert(0, masthead.contents[0])
|
tag.insert(0, masthead.contents[0])
|
||||||
soup.h1.replaceWith(tag)
|
soup.h1.replaceWith(tag)
|
||||||
'''
|
|
||||||
# Change subheads to <h3>
|
|
||||||
for subhead in soup.findAll(True, {'class':'bold'}) :
|
|
||||||
h3Tag = Tag(soup, "h3")
|
|
||||||
h3Tag.insert(0, subhead.contents[0])
|
|
||||||
subhead.replaceWith(h3Tag)
|
|
||||||
'''
|
|
||||||
# Change <span class="bold"> to <b>
|
# Change <span class="bold"> to <b>
|
||||||
for subhead in soup.findAll(True, {'class':'bold'}) :
|
for subhead in soup.findAll(True, {'class':'bold'}) :
|
||||||
bTag = Tag(soup, "b")
|
bTag = Tag(soup, "b")
|
||||||
@ -359,4 +307,3 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
subhead.replaceWith(bTag)
|
subhead.replaceWith(bTag)
|
||||||
|
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user