Bug fixes to itunes driver and updated NYTimes recipes

This commit is contained in:
Kovid Goyal 2010-06-18 09:00:47 -06:00
commit 1f98ff1e57
3 changed files with 96 additions and 86 deletions

View File

@ -64,6 +64,7 @@ class NYTimes(BasicNewsRecipe):
timefmt = ''
needs_subscription = True
masthead_url = 'http://graphics8.nytimes.com/images/misc/nytlogo379x64.gif'
cover_margins = (18,18,'grey99')
remove_tags_before = dict(id='article')
remove_tags_after = dict(id='article')
@ -183,6 +184,16 @@ class NYTimes(BasicNewsRecipe):
self.log("\nFailed to login")
return br
def skip_ad_pages(self, soup):
# Skip ad pages served before actual article
skip_tag = soup.find(True, {'name':'skip'})
if skip_tag is not None:
self.log.warn("Found forwarding link: %s" % skip_tag.parent['href'])
url = 'http://www.nytimes.com' + re.sub(r'\?.*', '', skip_tag.parent['href'])
url += '?pagewanted=all'
self.log.warn("Skipping ad to article at '%s'" % url)
return self.index_to_soup(url, raw=True)
def get_cover_url(self):
cover = None
st = time.localtime()
@ -391,14 +402,6 @@ class NYTimes(BasicNewsRecipe):
return ans
def preprocess_html(self, soup):
# Skip ad pages served before actual article
skip_tag = soup.find(True, {'name':'skip'})
if skip_tag is not None:
self.log.error("Found forwarding link: %s" % skip_tag.parent['href'])
url = 'http://www.nytimes.com' + re.sub(r'\?.*', '', skip_tag.parent['href'])
url += '?pagewanted=all'
self.log.error("Skipping ad to article at '%s'" % url)
soup = self.index_to_soup(url)
return self.strip_anchors(soup)
def postprocess_html(self,soup, True):

View File

@ -103,6 +103,7 @@ class NYTimes(BasicNewsRecipe):
]),
dict(name=['script', 'noscript', 'style'])]
masthead_url = 'http://graphics8.nytimes.com/images/misc/nytlogo379x64.gif'
cover_margins = (18,18,'grey99')
no_stylesheets = True
extra_css = '.headline {text-align: left;}\n \
.byline {font-family: monospace; \
@ -158,7 +159,7 @@ class NYTimes(BasicNewsRecipe):
return cover
def get_masthead_title(self):
return 'NYTimes GR Version'
return self.title
def dump_ans(self, ans):
total_article_count = 0
@ -279,15 +280,17 @@ class NYTimes(BasicNewsRecipe):
self.dump_ans(ans)
return ans
def preprocess_html(self, soup):
def skip_ad_pages(self, soup):
# Skip ad pages served before actual article
skip_tag = soup.find(True, {'name':'skip'})
if skip_tag is not None:
self.log.error("Found forwarding link: %s" % skip_tag.parent['href'])
self.log.warn("Found forwarding link: %s" % skip_tag.parent['href'])
url = 'http://www.nytimes.com' + re.sub(r'\?.*', '', skip_tag.parent['href'])
url += '?pagewanted=all'
self.log.error("Skipping ad to article at '%s'" % url)
soup = self.index_to_soup(url)
self.log.warn("Skipping ad to article at '%s'" % url)
return self.index_to_soup(url, raw=True)
def preprocess_html(self, soup):
return self.strip_anchors(soup)
def postprocess_html(self,soup, True):

View File

@ -160,6 +160,7 @@ class ITUNES(DevicePlugin):
sources = None
update_msg = None
update_needed = False
use_series_data = True
# Public methods
def add_books_to_metadata(self, locations, metadata, booklists):
@ -398,7 +399,7 @@ class ITUNES(DevicePlugin):
attempts -= 1
time.sleep(0.5)
if DEBUG:
self.log.warning(" waiting for identified iPad, attempt #%d" % (10 - attempts))
self.log.warning(" waiting for connected iPad, attempt #%d" % (10 - attempts))
else:
if DEBUG:
self.log.info(' found connected iPad')
@ -474,7 +475,7 @@ class ITUNES(DevicePlugin):
attempts -= 1
time.sleep(0.5)
if DEBUG:
self.log.warning(" waiting for identified iPad, attempt #%d" % (10 - attempts))
self.log.warning(" waiting for connected iPad, attempt #%d" % (10 - attempts))
else:
if DEBUG:
self.log.info(' found connected iPad in iTunes')
@ -693,6 +694,8 @@ class ITUNES(DevicePlugin):
# Purge the booklist, self.cached_books
for i,bl_book in enumerate(booklists[0]):
if False:
self.log.info(" evaluating '%s'" % bl_book.uuid)
if bl_book.uuid == self.cached_books[path]['uuid']:
# Remove from booklists[0]
booklists[0].pop(i)
@ -703,6 +706,10 @@ class ITUNES(DevicePlugin):
break
break
if False:
self._dump_booklist(booklists[0], indent = 2)
self._dump_cached_books(indent=2)
def reset(self, key='-1', log_packets=False, report_progress=None,
detected_device=None) :
"""
@ -1061,7 +1068,7 @@ class ITUNES(DevicePlugin):
except:
if DEBUG:
self.log.warning(" iTunes automation interface reported an error"
" when adding artwork to '%s'" % metadata.title)
" when adding artwork to '%s' on the iDevice" % metadata.title)
#import traceback
#traceback.print_exc()
#from calibre import ipython
@ -1264,18 +1271,18 @@ class ITUNES(DevicePlugin):
def _dump_cached_book(self, cached_book, header=None,indent=0):
'''
'''
if header:
msg = '%s%s' % (' '*indent,header)
self.log.info(msg)
self.log.info( "%s%s" % (' '*indent, '-' * len(msg)))
if isosx:
self.log.info("%s%-40.40s %-30.30s %-10.10s %-10.10s %s" %
(' '*indent,
'title',
'author',
'lib_book',
'dev_book',
'uuid'))
if header:
msg = '%s%s' % (' '*indent,header)
self.log.info(msg)
self.log.info( "%s%s" % (' '*indent, '-' * len(msg)))
self.log.info("%s%-40.40s %-30.30s %-10.10s %-10.10s %s" %
(' '*indent,
'title',
'author',
'lib_book',
'dev_book',
'uuid'))
self.log.info("%s%-40.40s %-30.30s %-10.10s %-10.10s %s" %
(' '*indent,
cached_book['title'],
@ -1284,14 +1291,17 @@ class ITUNES(DevicePlugin):
str(cached_book['dev_book'])[-9:],
cached_book['uuid']))
elif iswindows:
if header:
msg = '%s%s' % (' '*indent,header)
self.log.info(msg)
self.log.info( "%s%s" % (' '*indent, '-' * len(msg)))
self.log.info("%s%-40.40s %-30.30s %s" %
(' '*indent,
cached_book['title'],
cached_book['author'],
cached_book['uuid']))
self.log.info()
def _dump_cached_books(self, header=None, indent=0):
'''
'''
@ -1415,19 +1425,21 @@ class ITUNES(DevicePlugin):
(search['uuid'], search['title'], search['author']))
attempts = 9
while attempts:
# Try by uuid
# Try by uuid - only one hit
hits = dev_books.Search(search['uuid'],self.SearchField.index('Albums'))
if hits:
hit = hits[0]
self.log.info(" found '%s' by %s (%s)" % (hit.Name, hit.Artist, hit.Album))
return hit
# Try by author
# Try by author - there could be multiple hits
hits = dev_books.Search(search['author'],self.SearchField.index('Artists'))
if hits:
hit = hits[0]
self.log.info(" found '%s' by %s" % (hit.Name, hit.Artist))
return hit
for hit in hits:
if hit.Name == search['title']:
if DEBUG:
self.log.info(" found '%s' by %s (%s)" % (hit.Name, hit.Artist, hit.Album))
return hit
attempts -= 1
time.sleep(0.5)
@ -1438,19 +1450,19 @@ class ITUNES(DevicePlugin):
self.log.error(" no hits")
return None
def _find_library_book(self, cached_book):
def _find_library_book(self, search):
'''
Windows-only method to get a handle to a library book in the current pythoncom session
'''
if iswindows:
if DEBUG:
self.log.info(" ITUNES._find_library_book()")
if 'uuid' in cached_book:
if 'uuid' in search:
self.log.info(" looking for '%s' by %s (%s)" %
(cached_book['title'], cached_book['author'], cached_book['uuid']))
(search['title'], search['author'], search['uuid']))
else:
self.log.info(" looking for '%s' by %s" %
(cached_book['title'], cached_book['author']))
(search['title'], search['author']))
for source in self.iTunes.sources:
if source.Kind == self.Sources.index('Library'):
@ -1477,22 +1489,26 @@ class ITUNES(DevicePlugin):
attempts = 9
while attempts:
# Find book whose Album field = cached_book['uuid']
if 'uuid' in cached_book:
hits = lib_books.Search(cached_book['uuid'],self.SearchField.index('Albums'))
# Find book whose Album field = search['uuid']
if 'uuid' in search:
if DEBUG:
self.log.info(" searching by uuid '%s' ..." % search['uuid'])
hits = lib_books.Search(search['uuid'],self.SearchField.index('Albums'))
if hits:
hit = hits[0]
if DEBUG:
self.log.info(" found '%s' by %s (%s)" % (hit.Name, hit.Artist, hit.Album))
return hit
hits = lib_books.Search(cached_book['author'],self.SearchField.index('Artists'))
if DEBUG:
self.log.info(" searching by author '%s' ..." % search['author'])
hits = lib_books.Search(search['author'],self.SearchField.index('Artists'))
if hits:
hit = hits[0]
if hit.Name == cached_book['title']:
if DEBUG:
self.log.info(" found '%s' by %s (%s)" % (hit.Name, hit.Artist, hit.Album))
return hit
for hit in hits:
if hit.Name == search['title']:
if DEBUG:
self.log.info(" found '%s' by %s (%s)" % (hit.Name, hit.Artist, hit.Album))
return hit
attempts -= 1
time.sleep(0.5)
@ -1500,7 +1516,7 @@ class ITUNES(DevicePlugin):
self.log.warning(" attempt #%d" % (10 - attempts))
if DEBUG:
self.log.error(" search for '%s' yielded no hits" % cached_book['title'])
self.log.error(" search for '%s' yielded no hits" % search['title'])
return None
def _generate_thumbnail(self, book_path, book):
@ -1617,7 +1633,7 @@ class ITUNES(DevicePlugin):
self.log.info(" ignoring '%s' of type '%s'" % (book.name(), book.kind()))
else:
if DEBUG:
self.log.info(" %-30.30s %-30.30s %s [%s]" %
self.log.info(" %-30.30s %-30.30s %-40.40s [%s]" %
(book.name(), book.artist(), book.album(), book.kind()))
device_books.append(book)
if DEBUG:
@ -1649,7 +1665,7 @@ class ITUNES(DevicePlugin):
self.log.info(" ignoring '%s' of type '%s'" % (book.Name, book.KindAsString))
else:
if DEBUG:
self.log.info(" %-30.30s %-30.30s %s [%s]" % (book.Name, book.Artist, book.Album, book.KindAsString))
self.log.info(" %-30.30s %-30.30s %-40.40s [%s]" % (book.Name, book.Artist, book.Album, book.KindAsString))
device_books.append(book)
if DEBUG:
self.log.info()
@ -1663,8 +1679,6 @@ class ITUNES(DevicePlugin):
'''
assumes pythoncom wrapper
'''
# if DEBUG:
# self.log.info(" ITUNES._get_device_books_playlist()")
if iswindows:
if 'iPod' in self.sources:
pl = None
@ -1707,11 +1721,6 @@ class ITUNES(DevicePlugin):
if update_md:
self._update_epub_metadata(fpath, metadata)
# if DEBUG:
# self.log.info(" metadata before rewrite: '{0[0]}' '{0[1]}' '{0[2]}'".format(self._dump_epub_metadata(fpath)))
# self._update_epub_metadata(fpath, metadata)
# if DEBUG:
# self.log.info(" metadata after rewrite: '{0[0]}' '{0[1]}' '{0[2]}'".format(self._dump_epub_metadata(fpath)))
return fpath
def _get_library_books(self):
@ -1766,7 +1775,7 @@ class ITUNES(DevicePlugin):
library_books[path] = book
if DEBUG:
self.log.info(" %-30.30s %-30.30s %s [%s]" % (book.name(), book.artist(), book.album(), book.kind()))
self.log.info(" %-30.30s %-30.30s %-40.40s [%s]" % (book.name(), book.artist(), book.album(), book.kind()))
else:
if DEBUG:
self.log.info(' no Library playlists')
@ -1819,7 +1828,7 @@ class ITUNES(DevicePlugin):
library_books[path] = book
if DEBUG:
self.log.info(" %-30.30s %-30.30s %s [%s]" % (book.Name, book.Artist, book.Album, book.KindAsString))
self.log.info(" %-30.30s %-30.30s %-40.40s [%s]" % (book.Name, book.Artist, book.Album, book.KindAsString))
except:
if DEBUG:
self.log.info(" no books in library")
@ -1852,8 +1861,12 @@ class ITUNES(DevicePlugin):
Check for >1 iPod device connected to iTunes
'''
if isosx:
names = [s.name() for s in self.iTunes.sources()]
kinds = [str(s.kind()).rpartition('.')[2] for s in self.iTunes.sources()]
try:
names = [s.name() for s in self.iTunes.sources()]
kinds = [str(s.kind()).rpartition('.')[2] for s in self.iTunes.sources()]
except:
# User probably quit iTunes
return {}
elif iswindows:
# Assumes a pythoncom wrapper
it_sources = ['Unknown','Library','iPod','AudioCD','MP3CD','Device','RadioTuner','SharedLibrary']
@ -2130,21 +2143,6 @@ class ITUNES(DevicePlugin):
# Refresh epub metadata
with open(fpath,'r+b') as zfo:
'''
# Touch the timestamp to force a recache
if metadata.timestamp:
if DEBUG:
self.log.info(" old timestamp: %s" % metadata.timestamp)
old_ts = metadata.timestamp
metadata.timestamp = datetime.datetime(old_ts.year, old_ts.month, old_ts.day, old_ts.hour,
old_ts.minute, old_ts.second, old_ts.microsecond+1, old_ts.tzinfo)
if DEBUG:
self.log.info(" new timestamp: %s" % metadata.timestamp)
else:
metadata.timestamp = isoformat(now())
if DEBUG:
self.log.info(" add timestamp: %s" % metadata.timestamp)
'''
# Touch the OPF timestamp
zf_opf = ZipFile(fpath,'r')
fnames = zf_opf.namelist()
@ -2273,16 +2271,20 @@ class ITUNES(DevicePlugin):
pass
# Set genre from series if available, else first alpha tag
# Otherwise iTunes grabs the first dc:subject from the opf metadata,
if metadata.series:
# Otherwise iTunes grabs the first dc:subject from the opf metadata
if self.use_series_data and metadata.series:
if lb_added:
lb_added.sort_name.set("%s %03d" % (metadata.series, metadata.series_index))
lb_added.genre.set(metadata.series)
lb_added.episode_ID.set(metadata.series)
lb_added.episode_number.set(metadata.series_index)
if db_added:
db_added.sort_name.set("%s %03d" % (metadata.series, metadata.series_index))
db_added.genre.set(metadata.series)
db_added.episode_ID.set(metadata.series)
db_added.episode_number.set(metadata.series_index)
elif metadata.tags:
for tag in metadata.tags:
if self._is_alpha(tag[0]):
@ -2323,36 +2325,38 @@ class ITUNES(DevicePlugin):
except:
if DEBUG:
self.log.warning(" iTunes automation interface reported an error"
" setting AlbumRating")
" setting AlbumRating on iDevice")
# Set Category from first alpha tag, overwrite with series if available
# Set Genre from first alpha tag, overwrite with series if available
# Otherwise iBooks uses first <dc:subject> from opf
# iTunes balks on setting EpisodeNumber, but it sticks (9.1.1.12)
if metadata.series:
if self.use_series_data and metadata.series:
if lb_added:
lb_added.Category = metadata.series
lb_added.SortName = "%s %03d" % (metadata.series, metadata.series_index)
lb_added.Genre = metadata.series
lb_added.EpisodeID = metadata.series
try:
lb_added.EpisodeNumber = metadata.series_index
except:
pass
if db_added:
db_added.Category = metadata.series
db_added.SortName = "%s %03d" % (metadata.series, metadata.series_index)
db_added.Genre = metadata.series
db_added.EpisodeID = metadata.series
try:
db_added.EpisodeNumber = metadata.series_index
except:
if DEBUG:
self.log.warning(" iTunes automation interface reported an error"
" setting EpisodeNumber")
" setting EpisodeNumber on iDevice")
elif metadata.tags:
for tag in metadata.tags:
if self._is_alpha(tag[0]):
if lb_added:
lb_added.Category = tag
lb_added.Genre = tag
if db_added:
db_added.Category = tag
db_added.Genre = tag
break