Bug fixes to itunes driver and updated NYTimes recipes

This commit is contained in:
Kovid Goyal 2010-06-18 09:00:47 -06:00
commit 1f98ff1e57
3 changed files with 96 additions and 86 deletions

View File

@ -64,6 +64,7 @@ class NYTimes(BasicNewsRecipe):
timefmt = '' timefmt = ''
needs_subscription = True needs_subscription = True
masthead_url = 'http://graphics8.nytimes.com/images/misc/nytlogo379x64.gif' masthead_url = 'http://graphics8.nytimes.com/images/misc/nytlogo379x64.gif'
cover_margins = (18,18,'grey99')
remove_tags_before = dict(id='article') remove_tags_before = dict(id='article')
remove_tags_after = dict(id='article') remove_tags_after = dict(id='article')
@ -183,6 +184,16 @@ class NYTimes(BasicNewsRecipe):
self.log("\nFailed to login") self.log("\nFailed to login")
return br return br
def skip_ad_pages(self, soup):
# Skip ad pages served before actual article
skip_tag = soup.find(True, {'name':'skip'})
if skip_tag is not None:
self.log.warn("Found forwarding link: %s" % skip_tag.parent['href'])
url = 'http://www.nytimes.com' + re.sub(r'\?.*', '', skip_tag.parent['href'])
url += '?pagewanted=all'
self.log.warn("Skipping ad to article at '%s'" % url)
return self.index_to_soup(url, raw=True)
def get_cover_url(self): def get_cover_url(self):
cover = None cover = None
st = time.localtime() st = time.localtime()
@ -391,14 +402,6 @@ class NYTimes(BasicNewsRecipe):
return ans return ans
def preprocess_html(self, soup): def preprocess_html(self, soup):
# Skip ad pages served before actual article
skip_tag = soup.find(True, {'name':'skip'})
if skip_tag is not None:
self.log.error("Found forwarding link: %s" % skip_tag.parent['href'])
url = 'http://www.nytimes.com' + re.sub(r'\?.*', '', skip_tag.parent['href'])
url += '?pagewanted=all'
self.log.error("Skipping ad to article at '%s'" % url)
soup = self.index_to_soup(url)
return self.strip_anchors(soup) return self.strip_anchors(soup)
def postprocess_html(self,soup, True): def postprocess_html(self,soup, True):

View File

@ -103,6 +103,7 @@ class NYTimes(BasicNewsRecipe):
]), ]),
dict(name=['script', 'noscript', 'style'])] dict(name=['script', 'noscript', 'style'])]
masthead_url = 'http://graphics8.nytimes.com/images/misc/nytlogo379x64.gif' masthead_url = 'http://graphics8.nytimes.com/images/misc/nytlogo379x64.gif'
cover_margins = (18,18,'grey99')
no_stylesheets = True no_stylesheets = True
extra_css = '.headline {text-align: left;}\n \ extra_css = '.headline {text-align: left;}\n \
.byline {font-family: monospace; \ .byline {font-family: monospace; \
@ -158,7 +159,7 @@ class NYTimes(BasicNewsRecipe):
return cover return cover
def get_masthead_title(self): def get_masthead_title(self):
return 'NYTimes GR Version' return self.title
def dump_ans(self, ans): def dump_ans(self, ans):
total_article_count = 0 total_article_count = 0
@ -279,15 +280,17 @@ class NYTimes(BasicNewsRecipe):
self.dump_ans(ans) self.dump_ans(ans)
return ans return ans
def preprocess_html(self, soup): def skip_ad_pages(self, soup):
# Skip ad pages served before actual article # Skip ad pages served before actual article
skip_tag = soup.find(True, {'name':'skip'}) skip_tag = soup.find(True, {'name':'skip'})
if skip_tag is not None: if skip_tag is not None:
self.log.error("Found forwarding link: %s" % skip_tag.parent['href']) self.log.warn("Found forwarding link: %s" % skip_tag.parent['href'])
url = 'http://www.nytimes.com' + re.sub(r'\?.*', '', skip_tag.parent['href']) url = 'http://www.nytimes.com' + re.sub(r'\?.*', '', skip_tag.parent['href'])
url += '?pagewanted=all' url += '?pagewanted=all'
self.log.error("Skipping ad to article at '%s'" % url) self.log.warn("Skipping ad to article at '%s'" % url)
soup = self.index_to_soup(url) return self.index_to_soup(url, raw=True)
def preprocess_html(self, soup):
return self.strip_anchors(soup) return self.strip_anchors(soup)
def postprocess_html(self,soup, True): def postprocess_html(self,soup, True):

View File

@ -160,6 +160,7 @@ class ITUNES(DevicePlugin):
sources = None sources = None
update_msg = None update_msg = None
update_needed = False update_needed = False
use_series_data = True
# Public methods # Public methods
def add_books_to_metadata(self, locations, metadata, booklists): def add_books_to_metadata(self, locations, metadata, booklists):
@ -398,7 +399,7 @@ class ITUNES(DevicePlugin):
attempts -= 1 attempts -= 1
time.sleep(0.5) time.sleep(0.5)
if DEBUG: if DEBUG:
self.log.warning(" waiting for identified iPad, attempt #%d" % (10 - attempts)) self.log.warning(" waiting for connected iPad, attempt #%d" % (10 - attempts))
else: else:
if DEBUG: if DEBUG:
self.log.info(' found connected iPad') self.log.info(' found connected iPad')
@ -474,7 +475,7 @@ class ITUNES(DevicePlugin):
attempts -= 1 attempts -= 1
time.sleep(0.5) time.sleep(0.5)
if DEBUG: if DEBUG:
self.log.warning(" waiting for identified iPad, attempt #%d" % (10 - attempts)) self.log.warning(" waiting for connected iPad, attempt #%d" % (10 - attempts))
else: else:
if DEBUG: if DEBUG:
self.log.info(' found connected iPad in iTunes') self.log.info(' found connected iPad in iTunes')
@ -693,6 +694,8 @@ class ITUNES(DevicePlugin):
# Purge the booklist, self.cached_books # Purge the booklist, self.cached_books
for i,bl_book in enumerate(booklists[0]): for i,bl_book in enumerate(booklists[0]):
if False:
self.log.info(" evaluating '%s'" % bl_book.uuid)
if bl_book.uuid == self.cached_books[path]['uuid']: if bl_book.uuid == self.cached_books[path]['uuid']:
# Remove from booklists[0] # Remove from booklists[0]
booklists[0].pop(i) booklists[0].pop(i)
@ -703,6 +706,10 @@ class ITUNES(DevicePlugin):
break break
break break
if False:
self._dump_booklist(booklists[0], indent = 2)
self._dump_cached_books(indent=2)
def reset(self, key='-1', log_packets=False, report_progress=None, def reset(self, key='-1', log_packets=False, report_progress=None,
detected_device=None) : detected_device=None) :
""" """
@ -1061,7 +1068,7 @@ class ITUNES(DevicePlugin):
except: except:
if DEBUG: if DEBUG:
self.log.warning(" iTunes automation interface reported an error" self.log.warning(" iTunes automation interface reported an error"
" when adding artwork to '%s'" % metadata.title) " when adding artwork to '%s' on the iDevice" % metadata.title)
#import traceback #import traceback
#traceback.print_exc() #traceback.print_exc()
#from calibre import ipython #from calibre import ipython
@ -1264,18 +1271,18 @@ class ITUNES(DevicePlugin):
def _dump_cached_book(self, cached_book, header=None,indent=0): def _dump_cached_book(self, cached_book, header=None,indent=0):
''' '''
''' '''
if header:
msg = '%s%s' % (' '*indent,header)
self.log.info(msg)
self.log.info( "%s%s" % (' '*indent, '-' * len(msg)))
if isosx: if isosx:
self.log.info("%s%-40.40s %-30.30s %-10.10s %-10.10s %s" % if header:
(' '*indent, msg = '%s%s' % (' '*indent,header)
'title', self.log.info(msg)
'author', self.log.info( "%s%s" % (' '*indent, '-' * len(msg)))
'lib_book', self.log.info("%s%-40.40s %-30.30s %-10.10s %-10.10s %s" %
'dev_book', (' '*indent,
'uuid')) 'title',
'author',
'lib_book',
'dev_book',
'uuid'))
self.log.info("%s%-40.40s %-30.30s %-10.10s %-10.10s %s" % self.log.info("%s%-40.40s %-30.30s %-10.10s %-10.10s %s" %
(' '*indent, (' '*indent,
cached_book['title'], cached_book['title'],
@ -1284,14 +1291,17 @@ class ITUNES(DevicePlugin):
str(cached_book['dev_book'])[-9:], str(cached_book['dev_book'])[-9:],
cached_book['uuid'])) cached_book['uuid']))
elif iswindows: elif iswindows:
if header:
msg = '%s%s' % (' '*indent,header)
self.log.info(msg)
self.log.info( "%s%s" % (' '*indent, '-' * len(msg)))
self.log.info("%s%-40.40s %-30.30s %s" % self.log.info("%s%-40.40s %-30.30s %s" %
(' '*indent, (' '*indent,
cached_book['title'], cached_book['title'],
cached_book['author'], cached_book['author'],
cached_book['uuid'])) cached_book['uuid']))
self.log.info()
def _dump_cached_books(self, header=None, indent=0): def _dump_cached_books(self, header=None, indent=0):
''' '''
''' '''
@ -1415,19 +1425,21 @@ class ITUNES(DevicePlugin):
(search['uuid'], search['title'], search['author'])) (search['uuid'], search['title'], search['author']))
attempts = 9 attempts = 9
while attempts: while attempts:
# Try by uuid # Try by uuid - only one hit
hits = dev_books.Search(search['uuid'],self.SearchField.index('Albums')) hits = dev_books.Search(search['uuid'],self.SearchField.index('Albums'))
if hits: if hits:
hit = hits[0] hit = hits[0]
self.log.info(" found '%s' by %s (%s)" % (hit.Name, hit.Artist, hit.Album)) self.log.info(" found '%s' by %s (%s)" % (hit.Name, hit.Artist, hit.Album))
return hit return hit
# Try by author # Try by author - there could be multiple hits
hits = dev_books.Search(search['author'],self.SearchField.index('Artists')) hits = dev_books.Search(search['author'],self.SearchField.index('Artists'))
if hits: if hits:
hit = hits[0] for hit in hits:
self.log.info(" found '%s' by %s" % (hit.Name, hit.Artist)) if hit.Name == search['title']:
return hit if DEBUG:
self.log.info(" found '%s' by %s (%s)" % (hit.Name, hit.Artist, hit.Album))
return hit
attempts -= 1 attempts -= 1
time.sleep(0.5) time.sleep(0.5)
@ -1438,19 +1450,19 @@ class ITUNES(DevicePlugin):
self.log.error(" no hits") self.log.error(" no hits")
return None return None
def _find_library_book(self, cached_book): def _find_library_book(self, search):
''' '''
Windows-only method to get a handle to a library book in the current pythoncom session Windows-only method to get a handle to a library book in the current pythoncom session
''' '''
if iswindows: if iswindows:
if DEBUG: if DEBUG:
self.log.info(" ITUNES._find_library_book()") self.log.info(" ITUNES._find_library_book()")
if 'uuid' in cached_book: if 'uuid' in search:
self.log.info(" looking for '%s' by %s (%s)" % self.log.info(" looking for '%s' by %s (%s)" %
(cached_book['title'], cached_book['author'], cached_book['uuid'])) (search['title'], search['author'], search['uuid']))
else: else:
self.log.info(" looking for '%s' by %s" % self.log.info(" looking for '%s' by %s" %
(cached_book['title'], cached_book['author'])) (search['title'], search['author']))
for source in self.iTunes.sources: for source in self.iTunes.sources:
if source.Kind == self.Sources.index('Library'): if source.Kind == self.Sources.index('Library'):
@ -1477,22 +1489,26 @@ class ITUNES(DevicePlugin):
attempts = 9 attempts = 9
while attempts: while attempts:
# Find book whose Album field = cached_book['uuid'] # Find book whose Album field = search['uuid']
if 'uuid' in cached_book: if 'uuid' in search:
hits = lib_books.Search(cached_book['uuid'],self.SearchField.index('Albums')) if DEBUG:
self.log.info(" searching by uuid '%s' ..." % search['uuid'])
hits = lib_books.Search(search['uuid'],self.SearchField.index('Albums'))
if hits: if hits:
hit = hits[0] hit = hits[0]
if DEBUG: if DEBUG:
self.log.info(" found '%s' by %s (%s)" % (hit.Name, hit.Artist, hit.Album)) self.log.info(" found '%s' by %s (%s)" % (hit.Name, hit.Artist, hit.Album))
return hit return hit
hits = lib_books.Search(cached_book['author'],self.SearchField.index('Artists')) if DEBUG:
self.log.info(" searching by author '%s' ..." % search['author'])
hits = lib_books.Search(search['author'],self.SearchField.index('Artists'))
if hits: if hits:
hit = hits[0] for hit in hits:
if hit.Name == cached_book['title']: if hit.Name == search['title']:
if DEBUG: if DEBUG:
self.log.info(" found '%s' by %s (%s)" % (hit.Name, hit.Artist, hit.Album)) self.log.info(" found '%s' by %s (%s)" % (hit.Name, hit.Artist, hit.Album))
return hit return hit
attempts -= 1 attempts -= 1
time.sleep(0.5) time.sleep(0.5)
@ -1500,7 +1516,7 @@ class ITUNES(DevicePlugin):
self.log.warning(" attempt #%d" % (10 - attempts)) self.log.warning(" attempt #%d" % (10 - attempts))
if DEBUG: if DEBUG:
self.log.error(" search for '%s' yielded no hits" % cached_book['title']) self.log.error(" search for '%s' yielded no hits" % search['title'])
return None return None
def _generate_thumbnail(self, book_path, book): def _generate_thumbnail(self, book_path, book):
@ -1617,7 +1633,7 @@ class ITUNES(DevicePlugin):
self.log.info(" ignoring '%s' of type '%s'" % (book.name(), book.kind())) self.log.info(" ignoring '%s' of type '%s'" % (book.name(), book.kind()))
else: else:
if DEBUG: if DEBUG:
self.log.info(" %-30.30s %-30.30s %s [%s]" % self.log.info(" %-30.30s %-30.30s %-40.40s [%s]" %
(book.name(), book.artist(), book.album(), book.kind())) (book.name(), book.artist(), book.album(), book.kind()))
device_books.append(book) device_books.append(book)
if DEBUG: if DEBUG:
@ -1649,7 +1665,7 @@ class ITUNES(DevicePlugin):
self.log.info(" ignoring '%s' of type '%s'" % (book.Name, book.KindAsString)) self.log.info(" ignoring '%s' of type '%s'" % (book.Name, book.KindAsString))
else: else:
if DEBUG: if DEBUG:
self.log.info(" %-30.30s %-30.30s %s [%s]" % (book.Name, book.Artist, book.Album, book.KindAsString)) self.log.info(" %-30.30s %-30.30s %-40.40s [%s]" % (book.Name, book.Artist, book.Album, book.KindAsString))
device_books.append(book) device_books.append(book)
if DEBUG: if DEBUG:
self.log.info() self.log.info()
@ -1663,8 +1679,6 @@ class ITUNES(DevicePlugin):
''' '''
assumes pythoncom wrapper assumes pythoncom wrapper
''' '''
# if DEBUG:
# self.log.info(" ITUNES._get_device_books_playlist()")
if iswindows: if iswindows:
if 'iPod' in self.sources: if 'iPod' in self.sources:
pl = None pl = None
@ -1707,11 +1721,6 @@ class ITUNES(DevicePlugin):
if update_md: if update_md:
self._update_epub_metadata(fpath, metadata) self._update_epub_metadata(fpath, metadata)
# if DEBUG:
# self.log.info(" metadata before rewrite: '{0[0]}' '{0[1]}' '{0[2]}'".format(self._dump_epub_metadata(fpath)))
# self._update_epub_metadata(fpath, metadata)
# if DEBUG:
# self.log.info(" metadata after rewrite: '{0[0]}' '{0[1]}' '{0[2]}'".format(self._dump_epub_metadata(fpath)))
return fpath return fpath
def _get_library_books(self): def _get_library_books(self):
@ -1766,7 +1775,7 @@ class ITUNES(DevicePlugin):
library_books[path] = book library_books[path] = book
if DEBUG: if DEBUG:
self.log.info(" %-30.30s %-30.30s %s [%s]" % (book.name(), book.artist(), book.album(), book.kind())) self.log.info(" %-30.30s %-30.30s %-40.40s [%s]" % (book.name(), book.artist(), book.album(), book.kind()))
else: else:
if DEBUG: if DEBUG:
self.log.info(' no Library playlists') self.log.info(' no Library playlists')
@ -1819,7 +1828,7 @@ class ITUNES(DevicePlugin):
library_books[path] = book library_books[path] = book
if DEBUG: if DEBUG:
self.log.info(" %-30.30s %-30.30s %s [%s]" % (book.Name, book.Artist, book.Album, book.KindAsString)) self.log.info(" %-30.30s %-30.30s %-40.40s [%s]" % (book.Name, book.Artist, book.Album, book.KindAsString))
except: except:
if DEBUG: if DEBUG:
self.log.info(" no books in library") self.log.info(" no books in library")
@ -1852,8 +1861,12 @@ class ITUNES(DevicePlugin):
Check for >1 iPod device connected to iTunes Check for >1 iPod device connected to iTunes
''' '''
if isosx: if isosx:
names = [s.name() for s in self.iTunes.sources()] try:
kinds = [str(s.kind()).rpartition('.')[2] for s in self.iTunes.sources()] names = [s.name() for s in self.iTunes.sources()]
kinds = [str(s.kind()).rpartition('.')[2] for s in self.iTunes.sources()]
except:
# User probably quit iTunes
return {}
elif iswindows: elif iswindows:
# Assumes a pythoncom wrapper # Assumes a pythoncom wrapper
it_sources = ['Unknown','Library','iPod','AudioCD','MP3CD','Device','RadioTuner','SharedLibrary'] it_sources = ['Unknown','Library','iPod','AudioCD','MP3CD','Device','RadioTuner','SharedLibrary']
@ -2130,21 +2143,6 @@ class ITUNES(DevicePlugin):
# Refresh epub metadata # Refresh epub metadata
with open(fpath,'r+b') as zfo: with open(fpath,'r+b') as zfo:
'''
# Touch the timestamp to force a recache
if metadata.timestamp:
if DEBUG:
self.log.info(" old timestamp: %s" % metadata.timestamp)
old_ts = metadata.timestamp
metadata.timestamp = datetime.datetime(old_ts.year, old_ts.month, old_ts.day, old_ts.hour,
old_ts.minute, old_ts.second, old_ts.microsecond+1, old_ts.tzinfo)
if DEBUG:
self.log.info(" new timestamp: %s" % metadata.timestamp)
else:
metadata.timestamp = isoformat(now())
if DEBUG:
self.log.info(" add timestamp: %s" % metadata.timestamp)
'''
# Touch the OPF timestamp # Touch the OPF timestamp
zf_opf = ZipFile(fpath,'r') zf_opf = ZipFile(fpath,'r')
fnames = zf_opf.namelist() fnames = zf_opf.namelist()
@ -2273,16 +2271,20 @@ class ITUNES(DevicePlugin):
pass pass
# Set genre from series if available, else first alpha tag # Set genre from series if available, else first alpha tag
# Otherwise iTunes grabs the first dc:subject from the opf metadata, # Otherwise iTunes grabs the first dc:subject from the opf metadata
if metadata.series: if self.use_series_data and metadata.series:
if lb_added: if lb_added:
lb_added.sort_name.set("%s %03d" % (metadata.series, metadata.series_index))
lb_added.genre.set(metadata.series) lb_added.genre.set(metadata.series)
lb_added.episode_ID.set(metadata.series) lb_added.episode_ID.set(metadata.series)
lb_added.episode_number.set(metadata.series_index) lb_added.episode_number.set(metadata.series_index)
if db_added: if db_added:
db_added.sort_name.set("%s %03d" % (metadata.series, metadata.series_index))
db_added.genre.set(metadata.series) db_added.genre.set(metadata.series)
db_added.episode_ID.set(metadata.series) db_added.episode_ID.set(metadata.series)
db_added.episode_number.set(metadata.series_index) db_added.episode_number.set(metadata.series_index)
elif metadata.tags: elif metadata.tags:
for tag in metadata.tags: for tag in metadata.tags:
if self._is_alpha(tag[0]): if self._is_alpha(tag[0]):
@ -2323,36 +2325,38 @@ class ITUNES(DevicePlugin):
except: except:
if DEBUG: if DEBUG:
self.log.warning(" iTunes automation interface reported an error" self.log.warning(" iTunes automation interface reported an error"
" setting AlbumRating") " setting AlbumRating on iDevice")
# Set Category from first alpha tag, overwrite with series if available # Set Genre from first alpha tag, overwrite with series if available
# Otherwise iBooks uses first <dc:subject> from opf # Otherwise iBooks uses first <dc:subject> from opf
# iTunes balks on setting EpisodeNumber, but it sticks (9.1.1.12) # iTunes balks on setting EpisodeNumber, but it sticks (9.1.1.12)
if metadata.series: if self.use_series_data and metadata.series:
if lb_added: if lb_added:
lb_added.Category = metadata.series lb_added.SortName = "%s %03d" % (metadata.series, metadata.series_index)
lb_added.Genre = metadata.series
lb_added.EpisodeID = metadata.series lb_added.EpisodeID = metadata.series
try: try:
lb_added.EpisodeNumber = metadata.series_index lb_added.EpisodeNumber = metadata.series_index
except: except:
pass pass
if db_added: if db_added:
db_added.Category = metadata.series db_added.SortName = "%s %03d" % (metadata.series, metadata.series_index)
db_added.Genre = metadata.series
db_added.EpisodeID = metadata.series db_added.EpisodeID = metadata.series
try: try:
db_added.EpisodeNumber = metadata.series_index db_added.EpisodeNumber = metadata.series_index
except: except:
if DEBUG: if DEBUG:
self.log.warning(" iTunes automation interface reported an error" self.log.warning(" iTunes automation interface reported an error"
" setting EpisodeNumber") " setting EpisodeNumber on iDevice")
elif metadata.tags: elif metadata.tags:
for tag in metadata.tags: for tag in metadata.tags:
if self._is_alpha(tag[0]): if self._is_alpha(tag[0]):
if lb_added: if lb_added:
lb_added.Category = tag lb_added.Genre = tag
if db_added: if db_added:
db_added.Category = tag db_added.Genre = tag
break break