Sync to trunk.

This commit is contained in:
John Schember 2009-07-13 23:21:05 -04:00
commit 2e6baa0cd7
6 changed files with 100 additions and 12 deletions

View File

@ -10,9 +10,11 @@ for a particular device.
import os import os
import fnmatch import fnmatch
import shutil import shutil
from math import ceil
from itertools import cycle from itertools import cycle
from calibre import sanitize_file_name as sanitize from calibre import sanitize_file_name as sanitize
from calibre.constants import iswindows
from calibre.ebooks.metadata import authors_to_string from calibre.ebooks.metadata import authors_to_string
from calibre.devices.usbms.cli import CLI from calibre.devices.usbms.cli import CLI
from calibre.devices.usbms.device import Device from calibre.devices.usbms.device import Device
@ -127,6 +129,7 @@ class USBMS(CLI, Device):
for i, infile in enumerate(files): for i, infile in enumerate(files):
newpath = path newpath = path
resizable = []
if self.SUPPORTS_SUB_DIRS: if self.SUPPORTS_SUB_DIRS:
mdata = metadata.next() mdata = metadata.next()
@ -135,23 +138,54 @@ class USBMS(CLI, Device):
for tag in mdata['tags']: for tag in mdata['tags']:
if tag.startswith(_('News')): if tag.startswith(_('News')):
newpath = os.path.join(newpath, 'news') newpath = os.path.join(newpath, 'news')
newpath = os.path.join(newpath, sanitize(mdata.get('title', ''))) c = sanitize(mdata.get('title', ''))
newpath = os.path.join(newpath, sanitize(mdata.get('timestamp', ''))) if c:
newpath = os.path.join(newpath, c)
resizable.append(c)
c = sanitize(mdata.get('timestamp', ''))
if c:
newpath = os.path.join(newpath, c)
resizable.append(c)
break break
elif tag.startswith('/'): elif tag.startswith('/'):
newpath += tag for c in tag.split('/'):
newpath = os.path.normpath(newpath) c = sanitize(c)
if not c: continue
newpath = os.path.join(newpath, c)
resizable.append(c)
break break
if newpath == path: if newpath == path:
newpath = os.path.join(newpath, c = sanitize(mdata.get('authors', _('Unknown')))
sanitize(mdata.get('authors', _('Unknown'))), if c:
sanitize(mdata.get('title', _('Unknown')))) newpath = os.path.join(newpath, c)
resizable.append(c)
c = sanitize(mdata.get('title', _('Unknown')))
if c:
newpath = os.path.join(newpath, c)
resizable.append(c)
newpath = os.path.abspath(newpath)
fname = sanitize(names.next())
resizable.append(fname)
filepath = os.path.join(newpath, fname)
if iswindows and len(filepath) > 250:
extra = len(filepath) - 250
delta = int(ceil(extra/float(len(resizable))))
for x in resizable:
if delta > len(x):
r = ''
else:
r = x[:-delta]
filepath = filepath.replace(os.sep+x+os.sep, os.sep+r+os.sep)
filepath = filepath.replace(os.sep+os.sep, os.sep)
newpath = os.path.dirname(filepath)
if not os.path.exists(newpath): if not os.path.exists(newpath):
os.makedirs(newpath) os.makedirs(newpath)
filepath = os.path.join(newpath, sanitize(names.next()))
paths.append(filepath) paths.append(filepath)
if hasattr(infile, 'read'): if hasattr(infile, 'read'):

View File

@ -1957,6 +1957,17 @@ class MobiWriter(object):
# Test to see if this child's offset is the same as the previous child's # Test to see if this child's offset is the same as the previous child's
# offset, skip it # offset, skip it
h = child.href h = child.href
first = False
if h is None:
self._oeb.logger.warn(' Ignoring TOC entry with no href:',
child.title)
continue
if h not in self._id_offsets:
self._oeb.logger.warn(' Ignoring missing TOC entry:',
child)
continue
currentOffset = self._id_offsets[h] currentOffset = self._id_offsets[h]
# print "_generate_ctoc: child offset: 0x%X" % currentOffset # print "_generate_ctoc: child offset: 0x%X" % currentOffset
@ -1967,7 +1978,6 @@ class MobiWriter(object):
else : else :
self._oeb.logger.warn(" Ignoring redundant href: %s in '%s'" % (h, child.title)) self._oeb.logger.warn(" Ignoring redundant href: %s in '%s'" % (h, child.title))
first = False
else : else :
if self.opts.verbose > 2 : if self.opts.verbose > 2 :
self._oeb.logger.info("skipping class: %s depth %d at position %d" % \ self._oeb.logger.info("skipping class: %s depth %d at position %d" % \

View File

@ -41,9 +41,12 @@ class SpineItem(unicode):
def __new__(cls, *args): def __new__(cls, *args):
args = list(args) args = list(args)
args[0] = args[0].partition('#')[0]
obj = super(SpineItem, cls).__new__(cls, *args)
path = args[0] path = args[0]
ppath = path.partition('#')[0]
if not os.path.exists(path) and os.path.exists(ppath):
path = ppath
args[0] = path
obj = super(SpineItem, cls).__new__(cls, *args)
raw = open(path, 'rb').read() raw = open(path, 'rb').read()
raw, obj.encoding = xml_to_unicode(raw) raw, obj.encoding = xml_to_unicode(raw)
obj.character_count = character_count(raw) obj.character_count = character_count(raw)

Binary file not shown.

After

Width:  |  Height:  |  Size: 994 B

View File

@ -47,7 +47,7 @@ recipe_modules = ['recipe_' + r for r in (
'climate_progress', 'carta', 'slashdot', 'publico', 'climate_progress', 'carta', 'slashdot', 'publico',
'the_budget_fashionista', 'elperiodico_catalan', 'the_budget_fashionista', 'elperiodico_catalan',
'elperiodico_spanish', 'expansion_spanish', 'lavanguardia', 'elperiodico_spanish', 'expansion_spanish', 'lavanguardia',
'marca', 'kellog_faculty', 'kellog_insight', 'marca', 'kellog_faculty', 'kellog_insight', 'noaa',
'theeconomictimes_india', '7dias', 'buenosaireseconomico', 'theeconomictimes_india', '7dias', 'buenosaireseconomico',
'diagonales', 'miradasalsur', 'newsweek_argentina', 'veintitres', 'diagonales', 'miradasalsur', 'newsweek_argentina', 'veintitres',
'gva_be', 'hln', 'tijd', 'degentenaar', 'inquirer_net', 'uncrate', 'gva_be', 'hln', 'tijd', 'degentenaar', 'inquirer_net', 'uncrate',

View File

@ -0,0 +1,41 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
'''
noaa.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag
class NOAA(BasicNewsRecipe):
title = 'NOAA Online'
__author__ = 'Darko Miletic'
description = 'NOAA'
publisher = 'NOAA'
category = 'news, science, US, ocean'
oldest_article = 15
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
simultaneous_downloads = 1
encoding = 'utf-8'
lang = 'en-US'
language = _('English')
remove_tags = [dict(name=['embed','object'])]
keep_only_tags = [dict(name='div', attrs={'id':'contentArea'})]
feeds = [(u'NOAA articles', u'http://www.rss.noaa.gov/noaarss.xml')]
def preprocess_html(self, soup):
soup.html['xml:lang'] = self.lang
soup.html['lang'] = self.lang
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")])
soup.head.insert(0,mlang)
soup.head.insert(1,mcharset)
return self.adeify_images(soup)