Merge from trunk

This commit is contained in:
Charles Haley 2010-06-11 20:30:57 +01:00
commit d4090fbd57
29 changed files with 24092 additions and 10779 deletions

Binary file not shown.

Before

Width:  |  Height:  |  Size: 983 B

After

Width:  |  Height:  |  Size: 983 B

View File

@ -1,5 +1,5 @@
__license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
'''
www.boston.com
'''
@ -7,10 +7,10 @@ www.boston.com
from calibre.web.feeds.recipes import BasicNewsRecipe
class BusinessStandard(BasicNewsRecipe):
title = 'Boston'
title = 'The Boston Globe'
__author__ = 'Darko Miletic'
description = 'News from Boston'
oldest_article = 7
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
delay = 1
@ -19,6 +19,9 @@ class BusinessStandard(BasicNewsRecipe):
publisher = 'Boston'
category = 'news, boston, usa, world'
language = 'en'
publication_type = 'newspaper'
masthead_url = 'http://cache.boston.com/images/globe/grslider/the_boston_globe.gif'
extra_css = ' body{font-family: Georgia, serif} div#articleBodyTop{display:block} '
conversion_options = {
'comments' : description
@ -27,8 +30,11 @@ class BusinessStandard(BasicNewsRecipe):
,'publisher' : publisher
}
keep_only_tags = [dict(name='div', attrs={'class':'story'})]
remove_tags = [dict(name=['object','link','script','iframe'])]
keep_only_tags = [dict(attrs={'id':['INDblogEntry','blogEntry','articleHeader','articleGraphs','galleryShell']})]
remove_tags = [
dict(name=['object','link','script','iframe'])
,dict(attrs={'id':['blogheadTools','bdc_emailWidget','tools','relatedContent']})
]
feeds = [
(u'Top Stories' , u'http://feeds.boston.com/boston/topstories' )
@ -38,12 +44,9 @@ class BusinessStandard(BasicNewsRecipe):
]
def print_version(self, url):
return url + '?mode=PF'
return url + '?page=full'
def get_article_url(self, article):
rawarticle = article.get('pheedo_origlink', None)
artls, sep, rsep = rawarticle.rpartition('/?')
if artls == '':
artls = rawarticle.rpartition('?')[0]
return artls
rawarticle = article.get('guid', None)
return rawarticle.rpartition('?')[0]

View File

@ -88,7 +88,9 @@ class Economist(BasicNewsRecipe):
continue
a = tag.find('a', href=True)
if a is not None:
url=a['href'].split('?')[0]+'/print'
url=a['href']
id_ = re.search(r'story_id=(\d+)', url).group(1)
url = 'http://www.economist.com/node/%s/print'%id_
if url.startswith('Printer'):
url = '/'+url
if url.startswith('/'):

View File

@ -1,96 +0,0 @@
#!/usr/bin/env python
# -*- coding: cp1252 -*-
__license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
'''
honoluluadvertiser.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Honoluluadvertiser(BasicNewsRecipe):
title = 'Honolulu Advertiser'
__author__ = 'Darko Miletic and Sujata Raman'
description = "Latest national and local Hawaii sports news from The Honolulu Advertiser."
publisher = 'Honolulu Advertiser'
category = 'news, Honolulu, Hawaii'
oldest_article = 2
language = 'en'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'cp1252'
remove_javascript = True
cover_url = 'http://www.honoluluadvertiser.com/graphics/frontpage/frontpage.jpg'
html2lrf_options = [
'--comment' , description
, '--category' , category
, '--publisher' , publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
keep_only_tags = [dict(name='div', attrs={'class':["hon_article_top","article-bodytext","hon_article_photo","storyphoto","article"]}),
dict(name='div', attrs={'id':["storycontentleft","article"]})
]
remove_tags = [dict(name=['object','link','embed']),
dict(name='div', attrs={'class':["article-tools","titleBar","invisiblespacer","articleflex-container","hon_newslist","categoryheader","columnframe","subHeadline","poster-container"]}),
dict(name='div', attrs={'align':["right"]}),
dict(name='div', attrs={'id':["pluckcomments"]}),
dict(name='td', attrs={'class':["prepsfacts"]}),
dict(name='img', attrs={'height':["1"]}),
dict(name='img', attrs={'alt':["Advertisement"]}),
dict(name='img', attrs={'src':["/gcicommonfiles/sr/graphics/common/adlabel_horz.gif","/gcicommonfiles/sr/graphics/common/icon_whatsthis.gif",]}),
]
extra_css = '''
h1{font-family:Arial,Helvetica,sans-serif; font-size:large; color:#000000; }
.hon_article_timestamp{font-family:Arial,Helvetica,sans-serif; font-size:70%; }
.postedStoryDate{font-family:Arial,Helvetica,sans-serif; font-size:30%; }
.postedDate{font-family:Arial,Helvetica,sans-serif; font-size:30%; }
.credit{font-family:Arial,Helvetica,sans-serif; font-size:30%; }
.hon_article_top{font-family:Arial,Helvetica,sans-serif; color:#666666; font-size:30%; font-weight:bold;}
.grayBackground{font-family:Arial,Helvetica,sans-serif; color:#666666; font-size:30%;}
.hon_photocaption{font-family:Arial,Helvetica,sans-serif; font-size:30%; }
.photoCaption{font-family:Arial,Helvetica,sans-serif; font-size:30%; }
.hon_photocredit{font-family:Arial,Helvetica,sans-serif; font-size:30%; color:#666666;}
.storyphoto{font-family:Arial,Helvetica,sans-serif; font-size:30%; color:#666666;}
.article-bodytext{font-family:Arial,Helvetica,sans-serif; font-size:xx-small; }
.storycontentleft{font-family:Arial,Helvetica,sans-serif; font-size:xx-small; }
#article{font-family:Arial,Helvetica,sans-serif; font-size:xx-small; }
.contentarea{font-family:Arial,Helvetica,sans-serif; font-size:xx-small; }
.storytext{font-family:Verdana,Arial,Helvetica,sans-serif; font-size:xx-small;}
.storyHeadline{font-family:Arial,Helvetica,sans-serif; font-size:large; color:#000000; font-weight:bold;}
.source{font-family:Arial,Helvetica,sans-serif; color:#333333; font-style: italic; font-weight:bold; }
'''
feeds = [
(u'Breaking news', u'http://www.honoluluadvertiser.com/apps/pbcs.dll/section?Category=RSS01&MIME=XML' )
,(u'Local news', u'http://www.honoluluadvertiser.com/apps/pbcs.dll/section?Category=RSS02&MIME=XML' )
,(u'Sports', u'http://www.honoluluadvertiser.com/apps/pbcs.dll/section?Category=RSS03&MIME=XML' )
,(u'Island life', u'http://www.honoluluadvertiser.com/apps/pbcs.dll/section?Category=RSS05&MIME=XML' )
,(u'Entertainment', u'http://www.honoluluadvertiser.com/apps/pbcs.dll/section?Category=RSS06&MIME=XML' )
,(u'Business', u'http://www.honoluluadvertiser.com/apps/pbcs.dll/section?Category=RSS04&MIME=XML' )
]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
mtag = '\n<meta http-equiv="Content-Language" content="en"/>\n'
soup.head.insert(0,mtag)
for tag in soup.findAll(name=['span','table','font']):
tag.name = 'div'
return soup
# def print_version(self, url):
# ubody, sep, rest = url.rpartition('/-1/')
# root, sep2, article_id = ubody.partition('/article/')
# return u'http://www.honoluluadvertiser.com/apps/pbcs.dll/article?AID=/' + article_id + '&template=printart'

View File

@ -51,6 +51,7 @@ class LeMondeDiplomatiqueEn(BasicNewsRecipe):
, dict(name='div',attrs={'class':'notes surlignable'})
]
remove_tags = [dict(name=['object','link','script','iframe','base'])]
remove_attributes = ['height','width']
def parse_index(self):
articles = []
@ -72,5 +73,5 @@ class LeMondeDiplomatiqueEn(BasicNewsRecipe):
,'url' :url
,'description':description
})
return [(soup.head.title.string, articles)]
return [(self.title, articles)]

View File

@ -0,0 +1,47 @@
__license__ = 'GPL v3'
__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
'''
staradvertiser.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Starbulletin(BasicNewsRecipe):
title = 'Honolulu Star Advertiser'
__author__ = 'Darko Miletic'
description = "Latest national and local Hawaii sports news"
publisher = 'Honolulu Star-Advertiser'
category = 'news, Honolulu, Hawaii'
oldest_article = 2
max_articles_per_feed = 100
language = 'en'
no_stylesheets = True
use_embedded_content = False
encoding = 'utf8'
publication_type = 'newspaper'
extra_css = ' body{font-family: Verdana,Arial,Helvetica,sans-serif} h1,.brown,.postCredit{color: #663300} .storyDeck{font-size: 1.2em; font-weight: bold} '
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
, 'linearize_tables' : True
}
remove_tags_before = dict(attrs={'id':'storyTitle'})
remove_tags_after = dict(name='div', attrs={'class':'storytext'})
remove_tags = [
dict(name=['object','link'])
,dict(attrs={'class':'insideStoryImage'})
]
feeds = [
(u'Headlines' , u'http://www.staradvertiser.com/staradvertiser_headlines.rss' )
,(u'News' , u'http://www.staradvertiser.com/news/index.rss' )
,(u'Sports' , u'http://www.staradvertiser.com/sports/index.rss' )
,(u'Features' , u'http://www.staradvertiser.com/features/index.rss' )
,(u'Editorials', u'http://www.staradvertiser.com/editorials/index.rss' )
,(u'Business' , u'http://www.staradvertiser.com/business/index.rss' )
,(u'Travel' , u'http://www.staradvertiser.com/travel/index.rss' )
]

View File

@ -1,60 +0,0 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
'''
starbulletin.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Starbulletin(BasicNewsRecipe):
title = 'Honolulu Star-Bulletin'
__author__ = 'Darko Miletic'
description = "Latest national and local Hawaii sports news"
publisher = 'Honolulu Star-Bulletin'
category = 'news, Honolulu, Hawaii'
oldest_article = 2
max_articles_per_feed = 100
language = 'en'
no_stylesheets = True
use_embedded_content = False
encoding = 'utf8'
remove_javascript = True
cover_url = 'http://media.starbulletin.com/designimages/spacer.gif'
html2lrf_options = [
'--comment' , description
, '--category' , category
, '--publisher' , publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
keep_only_tags = [ dict(name='div', attrs={'id':'storyColoumn'}) ]
remove_tags = [
dict(name=['object','link'])
,dict(name='span', attrs={'id':'printdesc'})
,dict(name='div' , attrs={'class':'lightGreyBox storyTools clearAll'})
,dict(name='div' , attrs={'id':'breadcrumbs'})
]
feeds = [
(u'Headlines', u'http://www.starbulletin.com/starbulletin_headlines.rss' )
,(u'News', u'http://www.starbulletin.com/news/index.rss' )
,(u'Sports', u'http://www.starbulletin.com/sports/index.rss' )
,(u'Features', u'http://www.starbulletin.com/features/index.rss' )
,(u'Editorials', u'http://www.starbulletin.com/editorials/index.rss' )
,(u'Business', u'http://www.starbulletin.com/business/index.rss' )
,(u'Travel', u'http://www.starbulletin.com/travel/index.rss' )
]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
mtag = '\n<meta http-equiv="Content-Language" content="en"/>\n'
soup.head.insert(0,mtag)
return soup

View File

@ -147,6 +147,7 @@ class ITUNES(DevicePlugin):
ejected = False
iTunes= None
iTunes_media = None
library_orphans = None
log = Log()
manual_sync_mode = False
path_template = 'iTunes/%s - %s.epub'
@ -244,14 +245,13 @@ class ITUNES(DevicePlugin):
# Fetch a list of books from iPod device connected to iTunes
# Fetch Library|Books
library_books = self._get_library_books()
if 'iPod' in self.sources:
booklist = BookList(self.log)
cached_books = {}
if isosx:
library_books = self._get_library_books()
device_books = self._get_device_books()
book_count = float(len(device_books))
for (i,book) in enumerate(device_books):
@ -281,11 +281,13 @@ class ITUNES(DevicePlugin):
if self.report_progress is not None:
self.report_progress(i+1/book_count, _('%d of %d') % (i+1, book_count))
self._purge_orphans(cached_books)
elif iswindows:
try:
pythoncom.CoInitialize()
self.iTunes = win32com.client.Dispatch("iTunes.Application")
library_books = self._get_library_books()
device_books = self._get_device_books()
book_count = float(len(device_books))
for (i,book) in enumerate(device_books):
@ -315,6 +317,7 @@ class ITUNES(DevicePlugin):
if self.report_progress is not None:
self.report_progress(i+1/book_count,
_('%d of %d') % (i+1, book_count))
self._purge_orphans(cached_books)
finally:
pythoncom.CoUninitialize()
@ -1671,11 +1674,13 @@ class ITUNES(DevicePlugin):
def _get_library_books(self):
'''
Populate a dict of paths from iTunes Library|Books
Windows assumes pythoncom wrapper
'''
if DEBUG:
self.log.info("\n ITUNES._get_library_books()")
library_books = {}
library_orphans = {}
lib = None
if isosx:
@ -1708,15 +1713,14 @@ class ITUNES(DevicePlugin):
if DEBUG:
self.log.info(" ignoring '%s' of type '%s'" % (book.name(), book.kind()))
else:
# Remove calibre orphans
# Collect calibre orphans - remnants of recipe uploads
path = self.path_template % (book.name(), book.artist())
if str(book.description()).startswith(self.description_prefix):
if book.location() == appscript.k.missing_value:
library_orphans[path] = book
if DEBUG:
self.log.info(" found calibre orphan '%s' in Library|Books" % book.name())
#book.delete()
#continue
path = self.path_template % (book.name(), book.artist())
library_books[path] = book
if DEBUG:
self.log.info(" adding %-30.30s [%s]" % (book.name(), book.kind()))
@ -1729,59 +1733,59 @@ class ITUNES(DevicePlugin):
elif iswindows:
lib = None
try:
pythoncom.CoInitialize()
self.iTunes = win32com.client.Dispatch("iTunes.Application")
for source in self.iTunes.sources:
if source.Kind == self.Sources.index('Library'):
lib = source
self.log.info(" Library source: '%s' kind: %s" % (lib.Name, self.Sources[lib.Kind]))
break
else:
self.log.error(" Library source not found")
# try:
# pythoncom.CoInitialize()
# self.iTunes = win32com.client.Dispatch("iTunes.Application")
for source in self.iTunes.sources:
if source.Kind == self.Sources.index('Library'):
lib = source
self.log.info(" Library source: '%s' kind: %s" % (lib.Name, self.Sources[lib.Kind]))
break
else:
self.log.error(" Library source not found")
if lib is not None:
lib_books = None
if lib.Playlists is not None:
for pl in lib.Playlists:
if pl.Kind == self.PlaylistKind.index('User') and \
pl.SpecialKind == self.PlaylistSpecialKind.index('Books'):
if DEBUG:
self.log.info(" Books playlist: '%s'" % (pl.Name))
lib_books = pl.Tracks
break
else:
if lib is not None:
lib_books = None
if lib.Playlists is not None:
for pl in lib.Playlists:
if pl.Kind == self.PlaylistKind.index('User') and \
pl.SpecialKind == self.PlaylistSpecialKind.index('Books'):
if DEBUG:
self.log.error(" no Library|Books playlist found")
self.log.info(" Books playlist: '%s'" % (pl.Name))
lib_books = pl.Tracks
break
else:
if DEBUG:
self.log.error(" no Library playlists found")
self.log.error(" no Library|Books playlist found")
else:
if DEBUG:
self.log.error(" no Library playlists found")
try:
for book in lib_books:
# This may need additional entries for international iTunes users
if book.KindAsString in ['MPEG audio file']:
if DEBUG:
self.log.info(" ignoring %-30.30s of type '%s'" % (book.Name, book.KindAsString))
else:
# Remove calibre orphans
if book.Description.startswith(self.description_prefix):
if not book.Location:
if DEBUG:
self.log.info(" found calibre orphan '%s' in Library|Books" % book.Name)
#book.Delete()
#continue
try:
for book in lib_books:
# This may need additional entries for international iTunes users
if book.KindAsString in ['MPEG audio file']:
if DEBUG:
self.log.info(" ignoring %-30.30s of type '%s'" % (book.Name, book.KindAsString))
else:
path = self.path_template % (book.Name, book.Artist)
path = self.path_template % (book.Name, book.Artist)
library_books[path] = book
if DEBUG:
self.log.info(" adding %-30.30s [%s]" % (book.Name, book.KindAsString))
except:
if DEBUG:
self.log.info(" no books in library")
finally:
pythoncom.CoUninitialize()
# Collect calibre orphans
if book.Description.startswith(self.description_prefix):
if not book.Location:
library_orphans[path] = book
if DEBUG:
self.log.info(" found calibre orphan '%s' in Library|Books" % book.Name)
library_books[path] = book
if DEBUG:
self.log.info(" adding %-30.30s [%s]" % (book.Name, book.KindAsString))
except:
if DEBUG:
self.log.info(" no books in library")
# finally:
# pythoncom.CoUninitialize()
self.library_orphans = library_orphans
return library_books
def _get_purchased_book_ids(self):
@ -1904,6 +1908,45 @@ class ITUNES(DevicePlugin):
self.version[0],self.version[1],self.version[2]))
self.log.info(" iTunes_media: %s" % self.iTunes_media)
def _purge_orphans(self,cached_books):
'''
Scan self.library_orphans for any paths not on device
Remove any true orphans from iTunes
This occurs when recipes are uploaded in a previous session
and the book has since been deleted on the device
'''
if DEBUG:
self.log.info(" ITUNES._purge_orphans")
#self.log.info(" cached_books:\n %s" % "\n ".join(cached_books.keys()))
orphan_paths = {}
if isosx:
for orphan in self.library_orphans:
path = self.path_template % (self.library_orphans[orphan].name(),
self.library_orphans[orphan].artist())
orphan_paths[path] = self.library_orphans[orphan]
# Scan orphan_paths for paths not found in cached_books
for orphan in orphan_paths.keys():
if orphan not in cached_books:
if DEBUG:
self.log.info(" '%s' not found on device, removing from iTunes" % orphan)
self.iTunes.delete(orphan_paths[orphan])
elif iswindows:
for orphan in self.library_orphans:
path = self.path_template % (self.library_orphans[orphan].Name,
self.library_orphans[orphan].Artist)
orphan_paths[path] = self.library_orphans[orphan]
# Scan orphan_paths for paths not found in cached_books
for orphan in orphan_paths.keys():
if orphan not in cached_books:
if DEBUG:
self.log.info(" '%s' not found on device, removing from iTunes" % orphan)
orphan_paths[orphan].Delete()
def _remove_existing_copies(self,path,file,metadata):
'''
'''

View File

@ -12,7 +12,7 @@ from uuid import uuid4
from lxml import etree
from calibre import prints, guess_type, iswindows
from calibre import prints, guess_type, iswindows, islinux
from calibre.devices.errors import DeviceError
from calibre.devices.usbms.driver import debug_print
from calibre.constants import DEBUG
@ -47,7 +47,10 @@ def strptime(src):
src[2] = str(MONTH_MAP[src[2]])
return time.strptime(' '.join(src), '%w, %d %m %Y %H:%M:%S %Z')
def strftime(epoch, zone=time.gmtime):
def strftime(epoch, zone=None):
zone = time.gmtime
if islinux:
zone = time.localtime
src = time.strftime("%w, %d %m %Y %H:%M:%S GMT", zone(epoch)).split()
src[0] = INVERSE_DAY_MAP[int(src[0][:-1])]+','
src[2] = INVERSE_MONTH_MAP[int(src[2])]

View File

@ -53,7 +53,24 @@ def init_qt(args):
app.setWindowIcon(QIcon(I('library.png')))
return app, opts, args, actions
def get_library_path():
def get_default_library_path():
fname = _('Calibre Library')
if isinstance(fname, unicode):
try:
fname = fname.encode(filesystem_encoding)
except:
fname = 'Calibre Library'
x = os.path.expanduser('~'+os.sep+fname)
if not os.path.exists(x):
try:
os.makedirs(x)
except:
x = os.path.expanduser('~')
return x
def get_library_path(parent=None):
library_path = prefs['library_path']
if library_path is None: # Need to migrate to new database layout
base = os.path.expanduser('~')
@ -73,10 +90,12 @@ def get_library_path():
try:
os.makedirs(library_path)
except:
error_dialog(None, _('Failed to create library'),
_('Failed to create calibre library at: %r. Aborting.')%library_path,
error_dialog(parent, _('Failed to create library'),
_('Failed to create calibre library at: %r.')%library_path,
det_msg=traceback.format_exc(), show=True)
library_path = None
library_path = choose_dir(parent, 'choose calibre library',
_('Choose a location for your new calibre e-book library'),
default_dir=get_default_library_path())
return library_path
class DBRepair(QThread):
@ -159,22 +178,9 @@ class GuiRunner(QObject):
'a new empty library.'),
det_msg=tb, show=True)
if db is None:
fname = _('Calibre Library')
if isinstance(fname, unicode):
try:
fname = fname.encode(filesystem_encoding)
except:
fname = 'Calibre Library'
x = os.path.expanduser('~'+os.sep+fname)
if not os.path.exists(x):
try:
os.makedirs(x)
except:
x = os.path.expanduser('~')
candidate = choose_dir(self.splash_screen, 'choose calibre library',
_('Choose a location for your new calibre e-book library'),
default_dir=x)
default_dir=get_default_library_path())
if not candidate:
self.initialization_failed()
@ -236,8 +242,8 @@ class GuiRunner(QObject):
if gprefs.get('show_splash_screen', True):
self.show_splash_screen()
self.library_path = get_library_path()
if self.library_path is None:
self.library_path = get_library_path(parent=self.splash_screen)
if not self.library_path:
self.initialization_failed()
self.initialize_db()

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff