Sync to trunk.

This commit is contained in:
John Schember 2009-07-21 17:29:01 -04:00
commit 405485f126
53 changed files with 27463 additions and 22962 deletions

View File

@ -47,6 +47,8 @@ def freeze():
'/usr/lib/libexslt.so.0', '/usr/lib/libexslt.so.0',
'/usr/lib/libMagickWand.so', '/usr/lib/libMagickWand.so',
'/usr/lib/libMagickCore.so', '/usr/lib/libMagickCore.so',
'/usr/lib/libgcrypt.so.11',
'/usr/lib/libgpg-error.so.0',
] ]
binary_includes += [os.path.join(QTDIR, 'lib%s.so.4'%x) for x in QTDLLS] binary_includes += [os.path.join(QTDIR, 'lib%s.so.4'%x) for x in QTDLLS]

View File

@ -6,7 +6,7 @@ __docformat__ = 'restructuredtext en'
''' '''
Freeze app into executable using py2exe. Freeze app into executable using py2exe.
''' '''
QT_DIR = 'C:\\Qt\\4.5.1' QT_DIR = 'C:\\Qt\\4.5.2'
LIBUSB_DIR = 'C:\\libusb' LIBUSB_DIR = 'C:\\libusb'
LIBUNRAR = 'C:\\Program Files\\UnrarDLL\\unrar.dll' LIBUNRAR = 'C:\\Program Files\\UnrarDLL\\unrar.dll'
PDFTOHTML = 'C:\\cygwin\\home\\kovid\\poppler-0.10.6\\rel\\pdftohtml.exe' PDFTOHTML = 'C:\\cygwin\\home\\kovid\\poppler-0.10.6\\rel\\pdftohtml.exe'

View File

@ -93,21 +93,36 @@ def sanitize_file_name(name, substitute='_', as_unicode=False):
def prints(*args, **kwargs): def prints(*args, **kwargs):
''' '''
Print unicode arguments safely by encoding them to preferred_encoding Print unicode arguments safely by encoding them to preferred_encoding
Has the same signature as the print function from Python 3. Has the same signature as the print function from Python 3, except for the
additional keyword argument safe_encode, which if set to True will cause the
function to use repr when encoding fails.
''' '''
file = kwargs.get('file', sys.stdout) file = kwargs.get('file', sys.stdout)
sep = kwargs.get('sep', ' ') sep = kwargs.get('sep', ' ')
end = kwargs.get('end', '\n') end = kwargs.get('end', '\n')
enc = preferred_encoding enc = preferred_encoding
safe_encode = kwargs.get('safe_encode', False)
if 'CALIBRE_WORKER' in os.environ: if 'CALIBRE_WORKER' in os.environ:
enc = 'utf-8' enc = 'utf-8'
for i, arg in enumerate(args): for i, arg in enumerate(args):
if isinstance(arg, unicode): if isinstance(arg, unicode):
arg = arg.encode(enc) try:
arg = arg.encode(enc)
except UnicodeEncodeError:
if not safe_encode:
raise
arg = repr(arg)
if not isinstance(arg, str): if not isinstance(arg, str):
arg = str(arg) arg = str(arg)
if not isinstance(arg, unicode): if not isinstance(arg, unicode):
arg = arg.decode(preferred_encoding, 'replace').encode(enc) arg = arg.decode(preferred_encoding, 'replace')
try:
arg = arg.encode(enc)
except UnicodeEncodeError:
if not safe_encode:
raise
arg = repr(arg)
file.write(arg) file.write(arg)
if i != len(args)-1: if i != len(args)-1:
file.write(sep) file.write(sep)

View File

@ -2,7 +2,7 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
__appname__ = 'calibre' __appname__ = 'calibre'
__version__ = '0.6.0b14' __version__ = '0.6.0b16'
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>" __author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
import re import re

View File

@ -175,8 +175,8 @@ def add_pipeline_options(parser, plumber):
if rec.level < rec.HIGH: if rec.level < rec.HIGH:
option_recommendation_to_cli_option(add_option, rec) option_recommendation_to_cli_option(add_option, rec)
option_recommendation_to_cli_option(parser.add_option, parser.add_option('--list-recipes', default=False, action='store_true',
plumber.get_option_by_name('list_recipes')) help=_('List builtin recipes'))
def option_parser(): def option_parser():
return OptionParser(usage=USAGE) return OptionParser(usage=USAGE)
@ -193,6 +193,22 @@ class ProgressBar(object):
self.log('%d%% %s'%(percent, msg)) self.log('%d%% %s'%(percent, msg))
def create_option_parser(args, log): def create_option_parser(args, log):
if '--version' in args:
from calibre.constants import __appname__, __version__, __author__
log(os.path.basename(args[0]), '('+__appname__, __version__+')')
log('Created by:', __author__)
raise SystemExit(0)
if '--list-recipes' in args:
from calibre.web.feeds.recipes import titles
log('Available recipes:')
for title in sorted(titles):
try:
log('\t'+title)
except:
log('\t'+repr(title))
log('%d recipes available'%len(titles))
raise SystemExit(0)
parser = option_parser() parser = option_parser()
if len(args) < 3: if len(args) < 3:
print_help(parser, log) print_help(parser, log)

View File

@ -406,9 +406,6 @@ OptionRecommendation(name='language',
recommended_value=None, level=OptionRecommendation.LOW, recommended_value=None, level=OptionRecommendation.LOW,
help=_('Set the language.')), help=_('Set the language.')),
OptionRecommendation(name='list_recipes',
recommended_value=False, help=_('List available recipes.')),
] ]
input_fmt = os.path.splitext(self.input)[1] input_fmt = os.path.splitext(self.input)[1]
@ -611,13 +608,6 @@ OptionRecommendation(name='list_recipes',
self.setup_options() self.setup_options()
if self.opts.verbose: if self.opts.verbose:
self.log.filter_level = self.log.DEBUG self.log.filter_level = self.log.DEBUG
if self.opts.list_recipes:
from calibre.web.feeds.recipes import titles
self.log('Available recipes:')
for title in sorted(titles):
self.log('\t'+title)
self.log('%d recipes available'%len(titles))
raise SystemExit(0)
self.flush() self.flush()
# Run any preprocess plugins # Run any preprocess plugins

View File

@ -180,9 +180,9 @@ def main(args=sys.argv):
if opts.to_opf is not None: if opts.to_opf is not None:
from calibre.ebooks.metadata.opf2 import OPFCreator from calibre.ebooks.metadata.opf2 import OPFCreator
opf = OPFCreator(os.getcwdu(), mi) opf = OPFCreator(os.getcwdu(), mi)
with open(opts.opf, 'wb') as f: with open(opts.to_opf, 'wb') as f:
opf.render(f) opf.render(f)
prints(_('OPF created in'), opts.opf) prints(_('OPF created in'), opts.to_opf)
if opts.get_cover is not None: if opts.get_cover is not None:
if mi.cover_data and mi.cover_data[1]: if mi.cover_data and mi.cover_data[1]:

View File

@ -15,6 +15,7 @@
<dc:description py:if="mi.comments">${mi.comments}</dc:description> <dc:description py:if="mi.comments">${mi.comments}</dc:description>
<dc:publisher py:if="mi.publisher">${mi.publisher}</dc:publisher> <dc:publisher py:if="mi.publisher">${mi.publisher}</dc:publisher>
<dc:identifier opf:scheme="ISBN" py:if="mi.isbn">${mi.isbn}</dc:identifier> <dc:identifier opf:scheme="ISBN" py:if="mi.isbn">${mi.isbn}</dc:identifier>
<dc:rights py:if="mi.rights">${mi.rights}</dc:rights>
<meta py:if="mi.series is not None" name="calibre:series" content="${mi.series}"/> <meta py:if="mi.series is not None" name="calibre:series" content="${mi.series}"/>
<meta py:if="mi.series_index is not None" name="calibre:series_index" content="${mi.format_series_index()}"/> <meta py:if="mi.series_index is not None" name="calibre:series_index" content="${mi.format_series_index()}"/>
<meta py:if="mi.rating is not None" name="calibre:rating" content="${mi.rating}"/> <meta py:if="mi.rating is not None" name="calibre:rating" content="${mi.rating}"/>

View File

@ -439,7 +439,7 @@ class OPF(object):
publisher = MetadataField('publisher') publisher = MetadataField('publisher')
language = MetadataField('language') language = MetadataField('language')
comments = MetadataField('description') comments = MetadataField('description')
category = MetadataField('category') category = MetadataField('type')
rights = MetadataField('rights') rights = MetadataField('rights')
series = MetadataField('series', is_dc=False) series = MetadataField('series', is_dc=False)
series_index = MetadataField('series_index', is_dc=False, formatter=float, none_is=1) series_index = MetadataField('series_index', is_dc=False, formatter=float, none_is=1)
@ -967,6 +967,130 @@ class OPFCreator(MetaInformation):
ncx_stream.flush() ncx_stream.flush()
def metadata_to_opf(mi, as_string=True):
from lxml import etree
import textwrap
from calibre.ebooks.oeb.base import OPF, DC
if not mi.application_id:
mi.application_id = str(uuid.uuid4())
if not mi.book_producer:
mi.book_producer = __appname__ + ' (%s) '%__version__ + \
'[http://calibre-ebook.com]'
if not mi.language:
mi.language = 'UND'
root = etree.fromstring(textwrap.dedent(
'''
<package xmlns="http://www.idpf.org/2007/opf" unique-identifier="%(a)s_id">
<metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:opf="http://www.idpf.org/2007/opf">
<dc:identifier opf:scheme="%(a)s" id="%(a)s_id">%(id)s</dc:identifier>
</metadata>
<guide/>
</package>
'''%dict(a=__appname__, id=mi.application_id)))
metadata = root[0]
guide = root[1]
metadata[0].tail = '\n'+(' '*8)
def factory(tag, text=None, sort=None, role=None, scheme=None, name=None,
content=None):
attrib = {}
if sort:
attrib[OPF('file-as')] = sort
if role:
attrib[OPF('role')] = role
if scheme:
attrib[OPF('scheme')] = scheme
if name:
attrib['name'] = name
if content:
attrib['content'] = content
elem = metadata.makeelement(tag, attrib=attrib)
elem.tail = '\n'+(' '*8)
if text:
elem.text = text.strip()
metadata.append(elem)
factory(DC('title'), mi.title, mi.title_sort)
for au in mi.authors:
factory(DC('creator'), au, mi.author_sort, 'aut')
factory(DC('contributor'), mi.book_producer, __appname__, 'bkp')
if hasattr(mi.pubdate, 'isoformat'):
factory(DC('date'), mi.pubdate.isoformat())
factory(DC('language'), mi.language)
if mi.category:
factory(DC('type'), mi.category)
if mi.comments:
factory(DC('description'), mi.comments)
if mi.publisher:
factory(DC('publisher'), mi.publisher)
if mi.isbn:
factory(DC('identifier'), mi.isbn, scheme='ISBN')
if mi.rights:
factory(DC('rights'), mi.rights)
if mi.tags:
for tag in mi.tags:
factory(DC('subject'), tag)
meta = lambda n, c: factory('meta', name='calibre:'+n, content=c)
if mi.series:
meta('series', mi.series)
if mi.series_index is not None:
meta('series_index', mi.format_series_index())
if mi.rating is not None:
meta('rating', str(mi.rating))
if hasattr(mi.timestamp, 'isoformat'):
meta('timestamp', mi.timestamp.isoformat())
if mi.publication_type:
meta('publication_type', mi.publication_type)
metadata[-1].tail = '\n' +(' '*4)
if mi.cover:
guide.text = '\n'+(' '*8)
r = guide.makeelement(OPF('reference'),
attrib={'type':'cover', 'title':_('Cover'), 'href':mi.cover})
r.tail = '\n' +(' '*4)
guide.append(r)
return etree.tostring(root, pretty_print=True, encoding='utf-8',
xml_declaration=True) if as_string else root
def test_m2o():
from datetime import datetime
from cStringIO import StringIO
mi = MetaInformation('test & title', ['a"1', "a'2"])
mi.title_sort = 'a\'"b'
mi.author_sort = 'author sort'
mi.pubdate = datetime.now()
mi.language = 'en'
mi.category = 'test'
mi.comments = 'what a fun book\n\n'
mi.publisher = 'publisher'
mi.isbn = 'boooo'
mi.tags = ['a', 'b']
mi.series = 's"c\'l&<>'
mi.series_index = 3.34
mi.rating = 3
mi.timestamp = datetime.now()
mi.publication_type = 'ooooo'
mi.rights = 'yes'
mi.cover = 'asd.jpg'
opf = metadata_to_opf(mi)
print opf
newmi = MetaInformation(OPF(StringIO(opf)))
for attr in ('author_sort', 'title_sort', 'comments', 'category',
'publisher', 'series', 'series_index', 'rating',
'isbn', 'tags', 'cover_data', 'application_id',
'language', 'cover',
'book_producer', 'timestamp', 'lccn', 'lcc', 'ddc',
'pubdate', 'rights', 'publication_type'):
o, n = getattr(mi, attr), getattr(newmi, attr)
if o != n and o.strip() != n.strip():
print 'FAILED:', attr, getattr(mi, attr), '!=', getattr(newmi, attr)
class OPFTest(unittest.TestCase): class OPFTest(unittest.TestCase):
def setUp(self): def setUp(self):

View File

@ -22,7 +22,7 @@ def debug(*args):
def read_metadata_(task, tdir, notification=lambda x,y:x): def read_metadata_(task, tdir, notification=lambda x,y:x):
from calibre.ebooks.metadata.meta import metadata_from_formats from calibre.ebooks.metadata.meta import metadata_from_formats
from calibre.ebooks.metadata.opf2 import OPFCreator from calibre.ebooks.metadata.opf2 import metadata_to_opf
for x in task: for x in task:
try: try:
id, formats = x id, formats = x
@ -33,9 +33,8 @@ def read_metadata_(task, tdir, notification=lambda x,y:x):
if mi.cover_data: if mi.cover_data:
cdata = mi.cover_data[-1] cdata = mi.cover_data[-1]
mi.cover_data = None mi.cover_data = None
opf = OPFCreator(tdir, mi)
with open(os.path.join(tdir, '%s.opf'%id), 'wb') as f: with open(os.path.join(tdir, '%s.opf'%id), 'wb') as f:
opf.render(f) f.write(metadata_to_opf(mi))
if cdata: if cdata:
with open(os.path.join(tdir, str(id)), 'wb') as f: with open(os.path.join(tdir, str(id)), 'wb') as f:
f.write(cdata) f.write(cdata)
@ -116,7 +115,10 @@ class ReadMetadata(Thread):
if job.failed: if job.failed:
prints(job.details) prints(job.details)
if os.path.exists(job.log_path): if os.path.exists(job.log_path):
os.remove(job.log_path) try:
os.remove(job.log_path)
except:
pass
def read_metadata(paths, result_queue, chunk=50, spare_server=None): def read_metadata(paths, result_queue, chunk=50, spare_server=None):
@ -191,7 +193,10 @@ class SaveWorker(Thread):
prints(job.details) prints(job.details)
self.error = job.details self.error = job.details
if os.path.exists(job.log_path): if os.path.exists(job.log_path):
os.remove(job.log_path) try:
os.remove(job.log_path)
except:
pass
def save_book(task, library_path, path, single_dir, single_format, def save_book(task, library_path, path, single_dir, single_format,

View File

@ -5,6 +5,9 @@ __docformat__ = 'restructuredtext en'
from struct import pack from struct import pack
lang_codes = {
}
main_language = { main_language = {
0 : "NEUTRAL", 0 : "NEUTRAL",
54 : "AFRIKAANS", 54 : "AFRIKAANS",
@ -314,7 +317,7 @@ def iana2mobi(icode):
if lang in IANA_MOBI: if lang in IANA_MOBI:
langdict = IANA_MOBI[lang] langdict = IANA_MOBI[lang]
break break
mcode = langdict[None] mcode = langdict[None]
while len(subtags) > 0: while len(subtags) > 0:
subtag = subtags.pop(0) subtag = subtags.pop(0)
@ -326,3 +329,21 @@ def iana2mobi(icode):
mcode = langdict[subtag] mcode = langdict[subtag]
break break
return pack('>HBB', 0, mcode[1], mcode[0]) return pack('>HBB', 0, mcode[1], mcode[0])
def mobi2iana(langcode, sublangcode):
prefix = suffix = None
for code, d in IANA_MOBI.items():
for subcode, t in d.items():
cc, cl = t
if cc == langcode:
prefix = code
if cl == sublangcode:
suffix = subcode.lower() if subcode else None
break
if prefix is not None:
break
if prefix is None:
return 'und'
if suffix is None:
return prefix
return prefix + '-' + suffix

View File

@ -67,31 +67,65 @@ class MOBIOutput(OutputFormatPlugin):
self.oeb.manifest.add(id, href, 'image/gif', data=raw) self.oeb.manifest.add(id, href, 'image/gif', data=raw)
self.oeb.guide.add('masthead', 'Masthead Image', href) self.oeb.guide.add('masthead', 'Masthead Image', href)
def dump_toc(self, toc) :
self.log( "\n >>> TOC contents <<<")
self.log( " toc.title: %s" % toc.title)
self.log( " toc.href: %s" % toc.href)
for periodical in toc.nodes :
self.log( "\tperiodical title: %s" % periodical.title)
self.log( "\t href: %s" % periodical.href)
for section in periodical :
self.log( "\t\tsection title: %s" % section.title)
self.log( "\t\tfirst article: %s" % section.href)
for article in section :
self.log( "\t\t\tarticle title: %s" % repr(article.title))
self.log( "\t\t\t href: %s" % article.href)
def dump_manifest(self) :
self.log( "\n >>> Manifest entries <<<")
for href in self.oeb.manifest.hrefs :
self.log ("\t%s" % href)
def periodicalize_toc(self): def periodicalize_toc(self):
from calibre.ebooks.oeb.base import TOC from calibre.ebooks.oeb.base import TOC
toc = self.oeb.toc toc = self.oeb.toc
if not toc or len(self.oeb.spine) < 3:
return
if toc and toc[0].klass != 'periodical': if toc and toc[0].klass != 'periodical':
start_href = self.oeb.spine[0].href one, two = self.oeb.spine[0], self.oeb.spine[1]
self.log('Converting TOC for MOBI periodical indexing...') self.log('Converting TOC for MOBI periodical indexing...')
articles = {} articles = {}
if toc.depth() < 3: if toc.depth() < 3:
# single section periodical
self.oeb.manifest.remove(one)
self.oeb.manifest.remove(two)
sections = [TOC(klass='section', title=_('All articles'), sections = [TOC(klass='section', title=_('All articles'),
href=start_href)] href=self.oeb.spine[0].href)]
for x in toc: for x in toc:
sections[0].nodes.append(x) sections[0].nodes.append(x)
else: else:
# multi-section periodical
self.oeb.manifest.remove(one)
sections = list(toc) sections = list(toc)
for x in sections: for i,x in enumerate(sections):
x.klass = 'section' x.klass = 'section'
articles_ = list(x)
if articles_:
self.oeb.manifest.remove(self.oeb.manifest.hrefs[x.href])
x.href = articles_[0].href
for sec in sections: for sec in sections:
articles[id(sec)] = [] articles[id(sec)] = []
for a in list(sec): for a in list(sec):
a.klass = 'article' a.klass = 'article'
articles[id(sec)].append(a) articles[id(sec)].append(a)
sec.nodes.remove(a) sec.nodes.remove(a)
root = TOC(klass='periodical', href=start_href,
root = TOC(klass='periodical', href=self.oeb.spine[0].href,
title=unicode(self.oeb.metadata.title[0])) title=unicode(self.oeb.metadata.title[0]))
for s in sections: for s in sections:
if articles[id(s)]: if articles[id(s)]:
for a in articles[id(s)]: for a in articles[id(s)]:
@ -103,6 +137,13 @@ class MOBIOutput(OutputFormatPlugin):
toc.nodes.append(root) toc.nodes.append(root)
# Fix up the periodical href to point to first section href
toc.nodes[0].href = toc.nodes[0].nodes[0].href
# GR diagnostics
#self.dump_toc(toc)
#self.dump_manifest()
def convert(self, oeb, output_path, input_plugin, opts, log): def convert(self, oeb, output_path, input_plugin, opts, log):
self.log, self.opts, self.oeb = log, opts, oeb self.log, self.opts, self.oeb = log, opts, oeb

View File

@ -27,7 +27,7 @@ from calibre.ebooks import DRMError
from calibre.ebooks.chardet import ENCODING_PATS from calibre.ebooks.chardet import ENCODING_PATS
from calibre.ebooks.mobi import MobiError from calibre.ebooks.mobi import MobiError
from calibre.ebooks.mobi.huffcdic import HuffReader from calibre.ebooks.mobi.huffcdic import HuffReader
from calibre.ebooks.mobi.langcodes import main_language, sub_language from calibre.ebooks.mobi.langcodes import main_language, sub_language, mobi2iana
from calibre.ebooks.compression.palmdoc import decompress_doc from calibre.ebooks.compression.palmdoc import decompress_doc
from calibre.ebooks.metadata import MetaInformation from calibre.ebooks.metadata import MetaInformation
from calibre.ebooks.metadata.opf2 import OPFCreator, OPF from calibre.ebooks.metadata.opf2 import OPFCreator, OPF
@ -163,7 +163,11 @@ class BookHeader(object):
if self.exth_flag & 0x40: if self.exth_flag & 0x40:
self.exth = EXTHHeader(raw[16 + self.length:], self.codec, self.title) self.exth = EXTHHeader(raw[16 + self.length:], self.codec, self.title)
self.exth.mi.uid = self.unique_id self.exth.mi.uid = self.unique_id
self.exth.mi.language = self.language try:
self.exth.mi.language = mobi2iana(langid, sublangid)
except:
import traceback
traceback.print_exc()
class MetadataHeader(BookHeader): class MetadataHeader(BookHeader):
@ -290,7 +294,7 @@ class MobiReader(object):
for pat in ENCODING_PATS: for pat in ENCODING_PATS:
self.processed_html = pat.sub('', self.processed_html) self.processed_html = pat.sub('', self.processed_html)
e2u = functools.partial(entity_to_unicode, e2u = functools.partial(entity_to_unicode,
exceptions=['lt', 'gt', 'amp', 'apos', 'quot']) exceptions=['lt', 'gt', 'amp', 'apos', 'quot', '#60', '#62'])
self.processed_html = re.sub(r'&(\S+?);', e2u, self.processed_html = re.sub(r'&(\S+?);', e2u,
self.processed_html) self.processed_html)
self.extract_images(processed_records, output_dir) self.extract_images(processed_records, output_dir)

View File

@ -31,6 +31,7 @@ from calibre.ebooks.compression.palmdoc import compress_doc
INDEXING = True INDEXING = True
FCIS_FLIS = True FCIS_FLIS = True
WRITE_PBREAKS = True
# TODO: # TODO:
# - Optionally rasterize tables # - Optionally rasterize tables
@ -189,25 +190,21 @@ class Serializer(object):
path = urldefrag(ref.href)[0] path = urldefrag(ref.href)[0]
if hrefs[path].media_type not in OEB_DOCS: if hrefs[path].media_type not in OEB_DOCS:
continue continue
if ref.type == 'other.start' : buffer.write('<reference type="')
# Kindle-specific 'Start Reading' directive if ref.type.startswith('other.') :
buffer.write('<reference title="Startup Page" ') self.serialize_text(ref.type.replace('other.',''), quot=True)
buffer.write('type="start" ') else :
self.serialize_href(ref.href)
# Space required or won't work, I kid you not
buffer.write(' />')
else:
buffer.write('<reference type="')
self.serialize_text(ref.type, quot=True) self.serialize_text(ref.type, quot=True)
buffer.write('" ')
if ref.title is not None:
buffer.write('title="')
self.serialize_text(ref.title, quot=True)
buffer.write('" ') buffer.write('" ')
if ref.title is not None: self.serialize_href(ref.href)
buffer.write('title="') # Space required or won't work, I kid you not
self.serialize_text(ref.title, quot=True) buffer.write(' />')
buffer.write('" ')
self.serialize_href(ref.href)
# Space required or won't work, I kid you not
buffer.write(' />')
buffer.write('</guide>') buffer.write('</guide>')
def serialize_href(self, href, base=None): def serialize_href(self, href, base=None):
@ -653,23 +650,19 @@ class MobiWriter(object):
# *** This should check currentSectionNumber, because content could start late # *** This should check currentSectionNumber, because content could start late
if thisRecord > 0: if thisRecord > 0:
# If next article falls into a later record, bump thisRecord
thisRecordPrime = thisRecord
if (offset + length) // RECORD_SIZE > thisRecord :
thisRecordPrime = (offset + length) // RECORD_SIZE
sectionChangesInThisRecord = True sectionChangesInThisRecord = True
sectionChangedInRecordNumber = thisRecordPrime sectionChangesInRecordNumber = thisRecord
self._currentSectionIndex += 1 # <<< self._currentSectionIndex += 1
self._HTMLRecords[thisRecordPrime].nextSectionNumber = self._currentSectionIndex self._HTMLRecords[thisRecord].nextSectionNumber = self._currentSectionIndex
# The following article node opens the nextSection # The following node opens the nextSection
self._HTMLRecords[thisRecordPrime].nextSectionOpeningNode = myIndex self._HTMLRecords[thisRecord].nextSectionOpeningNode = myIndex
continue continue
else : else :
continue continue
# If no one has taken the openingNode slot, it must be us # If no one has taken the openingNode slot, it must be us
# This could happen before detecting a section change # This could happen before detecting a section change
if self._HTMLRecords[thisRecord].openingNode == -1 : if self._HTMLRecords[thisRecord].openingNode == -1 :
self._HTMLRecords[thisRecord].openingNode = myIndex self._HTMLRecords[thisRecord].openingNode = myIndex
self._HTMLRecords[thisRecord].openingNodeParent = self._currentSectionIndex self._HTMLRecords[thisRecord].openingNodeParent = self._currentSectionIndex
@ -1267,30 +1260,28 @@ class MobiWriter(object):
record.write(data) record.write(data)
# Marshall's utf-8 break code. # Marshall's utf-8 break code.
record.write(overlap) if WRITE_PBREAKS :
record.write(pack('>B', len(overlap))) record.write(overlap)
nextra = 0 record.write(pack('>B', len(overlap)))
pbreak = 0 nextra = 0
running = offset pbreak = 0
running = offset
while breaks and (breaks[0] - offset) < RECORD_SIZE: while breaks and (breaks[0] - offset) < RECORD_SIZE:
# .pop returns item, removes it from list # .pop returns item, removes it from list
pbreak = (breaks.pop(0) - running) >> 3 pbreak = (breaks.pop(0) - running) >> 3
if self.opts.verbose > 2 : if self.opts.verbose > 2 :
self._oeb.logger.info('pbreak = 0x%X at 0x%X' % (pbreak, record.tell()) ) self._oeb.logger.info('pbreak = 0x%X at 0x%X' % (pbreak, record.tell()) )
encoded = decint(pbreak, DECINT_FORWARD) encoded = decint(pbreak, DECINT_FORWARD)
record.write(encoded) record.write(encoded)
running += pbreak << 3 running += pbreak << 3
nextra += len(encoded) nextra += len(encoded)
lsize = 1
lsize = 1 while True:
while True: size = decint(nextra + lsize, DECINT_BACKWARD)
size = decint(nextra + lsize, DECINT_BACKWARD) if len(size) == lsize:
if len(size) == lsize: break
break lsize += 1
lsize += 1 record.write(size)
record.write(size)
# Write Trailing Byte Sequence # Write Trailing Byte Sequence
if INDEXING and self._indexable: if INDEXING and self._indexable:
@ -1305,15 +1296,6 @@ class MobiWriter(object):
else : else :
raise NotImplementedError('Indexing for mobitype 0x%X not implemented' % booktype) raise NotImplementedError('Indexing for mobitype 0x%X not implemented' % booktype)
# Dump the current HTML Record Data / TBS
# GR diagnostics
if False :
self._HTMLRecords[nrecords].dumpData(nrecords, self._oeb)
outstr = ''
for eachbyte in self._tbSequence:
outstr += '0x%02X ' % ord(eachbyte)
self._oeb.logger.info(' Trailing Byte Sequence: %s\n' % outstr)
# Write the sequence # Write the sequence
record.write(self._tbSequence) record.write(self._tbSequence)
@ -1370,8 +1352,13 @@ class MobiWriter(object):
metadata = self._oeb.metadata metadata = self._oeb.metadata
exth = self._build_exth() exth = self._build_exth()
last_content_record = len(self._records) - 1 last_content_record = len(self._records) - 1
'''
if INDEXING and self._indexable: if INDEXING and self._indexable:
self._generate_end_records() self._generate_end_records()
'''
self._generate_end_records()
record0 = StringIO() record0 = StringIO()
# The PalmDOC Header # The PalmDOC Header
record0.write(pack('>HHIHHHH', self._compression, 0, record0.write(pack('>HHIHHHH', self._compression, 0,
@ -1501,16 +1488,19 @@ class MobiWriter(object):
record0.write(pack('>IIII', 0xffffffff, 0, 0xffffffff, 0xffffffff)) record0.write(pack('>IIII', 0xffffffff, 0, 0xffffffff, 0xffffffff))
# 0xe0 - 0xe3 : Extra record data # 0xe0 - 0xe3 : Extra record data
# The '5' is a bitmask of extra record data at the end: # Extra record data flags:
# - 0x1: <extra multibyte bytes><size> (?) # - 0x1: <extra multibyte bytes><size> (?)
# - 0x2: <TBS indexing description of this HTML record><size> GR # - 0x2: <TBS indexing description of this HTML record><size> GR
# - 0x4: <uncrossable breaks><size> # - 0x4: <uncrossable breaks><size>
# Of course, the formats aren't quite the same.
# GR: Use 7 for indexed files, 5 for unindexed # GR: Use 7 for indexed files, 5 for unindexed
if INDEXING and self._indexable : # Setting bit 2 (0x4) disables <guide><reference type="start"> functionality
record0.write(pack('>I', 7))
else: trailingDataFlags = 1
record0.write(pack('>I', 5)) if self._indexable :
trailingDataFlags |= 2
if WRITE_PBREAKS :
trailingDataFlags |= 4
record0.write(pack('>I', trailingDataFlags))
# 0xe4 - 0xe7 : Primary index record # 0xe4 - 0xe7 : Primary index record
record0.write(pack('>I', 0xffffffff if self._primary_index_record is record0.write(pack('>I', 0xffffffff if self._primary_index_record is
@ -1681,6 +1671,8 @@ class MobiWriter(object):
header.write(pack('>I', 0)) header.write(pack('>I', 0))
# 0x10 - 0x13 : Generator ID # 0x10 - 0x13 : Generator ID
# This value may impact the position of flagBits written in
# write_article_node(). Change with caution.
header.write(pack('>I', 6)) header.write(pack('>I', 6))
# 0x14 - 0x17 : IDXT offset # 0x14 - 0x17 : IDXT offset
@ -1959,7 +1951,7 @@ class MobiWriter(object):
self._oeb.logger.info('Generating flat CTOC ...') self._oeb.logger.info('Generating flat CTOC ...')
previousOffset = -1 previousOffset = -1
currentOffset = 0 currentOffset = 0
for (i, child) in enumerate(toc.iter()): for (i, child) in enumerate(toc.iterdescendants()):
# Only add chapters or articles at depth==1 # Only add chapters or articles at depth==1
# no class defaults to 'chapter' # no class defaults to 'chapter'
if child.klass is None : child.klass = 'chapter' if child.klass is None : child.klass = 'chapter'
@ -2077,31 +2069,17 @@ class MobiWriter(object):
hasAuthor = True if self._ctoc_map[index]['authorOffset'] else False hasAuthor = True if self._ctoc_map[index]['authorOffset'] else False
hasDescription = True if self._ctoc_map[index]['descriptionOffset'] else False hasDescription = True if self._ctoc_map[index]['descriptionOffset'] else False
initialOffset = offset
if hasAuthor :
if offset < 0x4000 :
# Set bit 17
offset += 0x00010000
else :
# Set bit 24
offset += 0x00800000
if hasDescription :
if initialOffset < 0x4000 :
# Set bit 16
offset += 0x00008000
else :
# Set bit 23
offset += 0x00400000
# If we didn't set any flags, write an extra zero in the stream
# Seems unnecessary, but matching Mobigen
if initialOffset == offset:
indxt.write(chr(0))
# flagBits may be dependent upon the generatorID written at 0x10 in generate_index().
# in INDX0. Mobigen uses a generatorID of 2 and writes these bits at positions 1 & 2;
# calibre uses a generatorID of 6 and writes the bits at positions 2 & 3.
flagBits = 0
if hasAuthor : flagBits |= 0x4
if hasDescription : flagBits |= 0x2
indxt.write(pack('>B',flagBits)) # Author/description flags
indxt.write(decint(offset, DECINT_FORWARD)) # offset indxt.write(decint(offset, DECINT_FORWARD)) # offset
indxt.write(decint(length, DECINT_FORWARD)) # length indxt.write(decint(length, DECINT_FORWARD)) # length
indxt.write(decint(self._ctoc_map[index]['titleOffset'], DECINT_FORWARD)) # vwi title offset in CNCX indxt.write(decint(self._ctoc_map[index]['titleOffset'], DECINT_FORWARD)) # vwi title offset in CNCX

View File

@ -199,19 +199,21 @@ class CSSFlattener(object):
if node.tag == XHTML('font'): if node.tag == XHTML('font'):
node.tag = XHTML('span') node.tag = XHTML('span')
if 'size' in node.attrib: if 'size' in node.attrib:
def force_int(raw):
return int(re.search(r'([0-9+-]+)', raw).group(1))
size = node.attrib['size'].strip() size = node.attrib['size'].strip()
if size: if size:
fnums = self.context.source.fnums fnums = self.context.source.fnums
if size[0] in ('+', '-'): if size[0] in ('+', '-'):
# Oh, the warcrimes # Oh, the warcrimes
esize = 3 + int(size) esize = 3 + force_int(size)
if esize < 1: if esize < 1:
esize = 1 esize = 1
if esize > 7: if esize > 7:
esize = 7 esize = 7
cssdict['font-size'] = fnums[esize] cssdict['font-size'] = fnums[esize]
else: else:
cssdict['font-size'] = fnums[int(size)] cssdict['font-size'] = fnums[force_int(size)]
del node.attrib['size'] del node.attrib['size']
if 'color' in node.attrib: if 'color' in node.attrib:
cssdict['color'] = node.attrib['color'] cssdict['color'] = node.attrib['color']

View File

@ -38,4 +38,3 @@ class Clean(object):
'title-page', 'copyright-page', 'start'): 'title-page', 'copyright-page', 'start'):
self.oeb.guide.remove(x) self.oeb.guide.remove(x)

View File

@ -94,12 +94,17 @@ class MergeMetadata(object):
cdata = open(mi.cover, 'rb').read() cdata = open(mi.cover, 'rb').read()
elif mi.cover_data and mi.cover_data[-1]: elif mi.cover_data and mi.cover_data[-1]:
cdata = mi.cover_data[1] cdata = mi.cover_data[1]
id = None id = old_cover = None
old_cover = self.oeb.guide.remove('cover') if 'cover' in self.oeb.guide:
self.oeb.guide.remove('titlepage') old_cover = self.oeb.guide['cover']
if cdata:
self.oeb.guide.remove('cover')
self.oeb.guide.remove('titlepage')
if old_cover is not None: if old_cover is not None:
if old_cover.href in self.oeb.manifest.hrefs: if old_cover.href in self.oeb.manifest.hrefs:
item = self.oeb.manifest.hrefs[old_cover.href] item = self.oeb.manifest.hrefs[old_cover.href]
if not cdata:
return item.id
self.oeb.manifest.remove(item) self.oeb.manifest.remove(item)
if cdata: if cdata:
id, href = self.oeb.manifest.generate('cover', 'cover.jpg') id, href = self.oeb.manifest.generate('cover', 'cover.jpg')

View File

@ -230,7 +230,7 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
self.cpixmap = None self.cpixmap = None
self.cover.setAcceptDrops(True) self.cover.setAcceptDrops(True)
self.pubdate.setMinimumDate(QDate(100,1,1)) self.pubdate.setMinimumDate(QDate(100,1,1))
self.connect(self.cover, SIGNAL('cover_changed()'), self.cover_dropped) self.connect(self.cover, SIGNAL('cover_changed(PyQt_PyObject)'), self.cover_dropped)
QObject.connect(self.cover_button, SIGNAL("clicked(bool)"), \ QObject.connect(self.cover_button, SIGNAL("clicked(bool)"), \
self.select_cover) self.select_cover)
QObject.connect(self.add_format_button, SIGNAL("clicked(bool)"), \ QObject.connect(self.add_format_button, SIGNAL("clicked(bool)"), \
@ -288,7 +288,10 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
si = self.db.series_index(row) si = self.db.series_index(row)
if si is None: if si is None:
si = 1.0 si = 1.0
self.series_index.setValue(si) try:
self.series_index.setValue(float(si))
except:
self.series_index.setValue(1.0)
QObject.connect(self.series, SIGNAL('currentIndexChanged(int)'), self.enable_series_index) QObject.connect(self.series, SIGNAL('currentIndexChanged(int)'), self.enable_series_index)
QObject.connect(self.series, SIGNAL('editTextChanged(QString)'), self.enable_series_index) QObject.connect(self.series, SIGNAL('editTextChanged(QString)'), self.enable_series_index)
@ -321,7 +324,7 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
self.authors.setText(title) self.authors.setText(title)
self.author_sort.setText('') self.author_sort.setText('')
def cover_dropped(self): def cover_dropped(self, paths):
self.cover_changed = True self.cover_changed = True
self.cover_data = self.cover.cover_data self.cover_data = self.cover.cover_data

View File

@ -142,7 +142,7 @@ class ImageView(QLabel):
self.setPixmap(pmap) self.setPixmap(pmap)
event.accept() event.accept()
self.cover_data = open(path, 'rb').read() self.cover_data = open(path, 'rb').read()
self.emit(SIGNAL('cover_changed()'), paths, Qt.QueuedConnection) self.emit(SIGNAL('cover_changed(PyQt_PyObject)'), paths)
break break
def dragMoveEvent(self, event): def dragMoveEvent(self, event):

View File

@ -29,7 +29,7 @@ from calibre.ebooks.metadata import string_to_authors, authors_to_string, \
MetaInformation, authors_to_sort_string MetaInformation, authors_to_sort_string
from calibre.ebooks.metadata.meta import get_metadata, set_metadata, \ from calibre.ebooks.metadata.meta import get_metadata, set_metadata, \
metadata_from_formats metadata_from_formats
from calibre.ebooks.metadata.opf2 import OPFCreator from calibre.ebooks.metadata.opf2 import metadata_to_opf
from calibre.constants import preferred_encoding, iswindows, isosx, filesystem_encoding from calibre.constants import preferred_encoding, iswindows, isosx, filesystem_encoding
from calibre.ptempfile import PersistentTemporaryFile from calibre.ptempfile import PersistentTemporaryFile
from calibre.customize.ui import run_plugins_on_import from calibre.customize.ui import run_plugins_on_import
@ -1611,13 +1611,12 @@ books_series_link feeds
id = idx if index_is_id else self.id(idx) id = idx if index_is_id else self.id(idx)
id = str(id) id = str(id)
if not single_dir and not os.path.exists(tpath): if not single_dir and not os.path.exists(tpath):
os.mkdir(tpath) os.makedirs(tpath)
name = au + ' - ' + title if byauthor else title + ' - ' + au name = au + ' - ' + title if byauthor else title + ' - ' + au
name += '_'+id name += '_'+id
base = dir if single_dir else tpath base = dir if single_dir else tpath
mi = self.get_metadata(idx, index_is_id=index_is_id, get_cover=True) mi = self.get_metadata(idx, index_is_id=index_is_id, get_cover=True)
f = open(os.path.join(base, sanitize_file_name(name)+'.opf'), 'wb')
if not mi.authors: if not mi.authors:
mi.authors = [_('Unknown')] mi.authors = [_('Unknown')]
cdata = self.cover(int(id), index_is_id=True) cdata = self.cover(int(id), index_is_id=True)
@ -1625,9 +1624,9 @@ books_series_link feeds
cname = sanitize_file_name(name)+'.jpg' cname = sanitize_file_name(name)+'.jpg'
open(os.path.join(base, cname), 'wb').write(cdata) open(os.path.join(base, cname), 'wb').write(cdata)
mi.cover = cname mi.cover = cname
opf = OPFCreator(base, mi) with open(os.path.join(base, sanitize_file_name(name)+'.opf'),
opf.render(f) 'wb') as f:
f.close() f.write(metadata_to_opf(mi))
fmts = self.formats(idx, index_is_id=index_is_id) fmts = self.formats(idx, index_is_id=index_is_id)
if not fmts: if not fmts:

View File

@ -1,589 +0,0 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
'''
Keep track of donations to calibre.
'''
import sys, cStringIO, textwrap, traceback, re, os, time, calendar
from datetime import date, timedelta
from math import sqrt
os.environ['HOME'] = '/tmp'
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import cherrypy
from lxml import etree
def range_for_month(year, month):
ty, tm = date.today().year, date.today().month
min = max = date(year=year, month=month, day=1)
x = date.today().day if ty == year and tm == month else 31
while x > 1:
try:
max = min.replace(day=x)
break
except ValueError:
x -= 1
return min, max
def range_for_year(year):
return date(year=year, month=1, day=1), date(year=year, month=12, day=31)
def days_in_month(year, month):
c = calendar.Calendar()
ans = 0
for x in c.itermonthdays(year, month):
if x != 0: ans += 1
return ans
def rationalize_country(country):
if not country:
return 'Unknown'
if re.match('(?i)(US|USA|America)', country):
country = 'USA'
elif re.match('(?i)(UK|Britain|england)', country):
country = 'UK'
elif re.match('(?i)italy', country):
country = 'Italy'
elif re.match('(?i)germany', country):
country = 'Germany'
elif re.match('(?i)france', country):
country = 'France'
elif re.match('(?i)ireland', country):
country = 'Ireland'
elif re.match('(?i)norway', country):
country = 'Norway'
elif re.match('(?i)canada', country):
country = 'Canada'
elif re.match(r'(?i)new\s*zealand', country):
country = 'New Zealand'
elif re.match('(?i)jamaica', country):
country = 'Jamaica'
elif re.match('(?i)australia', country):
country = 'Australia'
elif re.match('(?i)Netherlands', country):
country = 'Netherlands'
elif re.match('(?i)spain', country):
country = 'Spain'
elif re.match('(?i)colombia', country):
country = 'Colombia'
return country
class Record(object):
def __init__(self, email, country, amount, date, name):
self.email = email
self.country = country
self.amount = amount
self.date = date
self.name = name
def __str__(self):
return '<donation email="%s" country="%s" amount="%.2f" date="%s" %s />'%\
(self.email, self.country, self.amount, self.date.isoformat(), 'name="%s"'%self.name if self.name else '')
class Country(list):
def __init__(self, name):
list.__init__(self)
self.name = name
self.total = 0.
self.percent = 0.
def append(self, r):
self.total += r.amount
list.append(self, r)
def __str__(self):
return self.name + ': %.2f%%'%self.percent
def __cmp__(self, other):
return cmp(self.total, other.total)
class Stats:
def get_deviation(self, amounts):
l = float(len(amounts))
if l == 0:
return 0
mean = sum(amounts)/l
return sqrt( sum([i**2 for i in amounts])/l - mean**2 )
def __init__(self, records, start, end):
self.total = sum([r.amount for r in records])
self.days = {}
l, rg = date.max, date.min
self.totals = []
for r in records:
self.totals.append(r.amount)
l, rg = min(l, r.date), max(rg, r.date)
if r.date not in self.days.keys():
self.days[r.date] = []
self.days[r.date].append(r)
self.min, self.max = start, end
self.period = (self.max - self.min) + timedelta(days=1)
daily_totals = []
day = self.min
while day <= self.max:
x = self.days.get(day, [])
daily_totals.append(sum([y.amount for y in x]))
day += timedelta(days=1)
self.daily_average = self.total/self.period.days
self.daily_deviation = self.get_deviation(daily_totals)
self.average = self.total/len(records) if len(records) else 0.
self.average_deviation = self.get_deviation(self.totals)
self.countries = {}
self.daily_totals = daily_totals
for r in records:
if r.country not in self.countries.keys():
self.countries[r.country] = Country(r.country)
self.countries[r.country].append(r)
for country in self.countries.values():
country.percent = (100 * country.total/self.total) if self.total else 0.
def get_daily_averages(self):
month_buckets, month_order = {}, []
x = self.min
for t in self.daily_totals:
month = (x.year, x.month)
if month not in month_buckets:
month_buckets[month] = 0.
month_order.append(month)
month_buckets[month] += t
x += timedelta(days=1)
c = calendar.Calendar()
month_days = [days_in_month(*x) for x in month_order]
month_averages = [month_buckets[x]/float(y) for x, y in zip(month_order, month_days)]
return month_order, month_averages
def __str__(self):
buf = cStringIO.StringIO()
print >>buf, '\tTotal: %.2f'%self.total
print >>buf, '\tDaily Average: %.2f'%self.daily_average
print >>buf, '\tAverage contribution: %.2f'%self.average
print >>buf, '\tCountry breakup:'
for c in self.countries.values():
print >>buf, '\t\t', c
return buf.getvalue()
def to_html(self, num_of_countries=sys.maxint):
countries = sorted(self.countries.values(), cmp=cmp, reverse=True)[:num_of_countries]
crows = ['<tr><td>%s</td><td class="country_percent">%.2f %%</td></tr>'%(c.name, c.percent) for c in countries]
ctable = '<table>\n<tr><th>Country</th><th>Contribution</th></tr>\n%s</table>'%('\n'.join(crows))
if num_of_countries < sys.maxint:
ctable = '<p>Top %d countries</p>'%num_of_countries + ctable
return textwrap.dedent('''
<div class="stats">
<p style="font-weight: bold">Donations in %(period)d days [%(min)s &mdash; %(max)s]:</p>
<table style="border-left: 4em">
<tr><td>Total</td><td class="money">$%(total).2f (%(num)d)</td></tr>
<tr><td>Daily average</td><td class="money">$%(da).2f &plusmn; %(dd).2f</td></tr>
<tr><td>Average contribution</td><td class="money">$%(ac).2f &plusmn; %(ad).2f</td></tr>
<tr><td>Donors per day</td><td class="money">%(dpd).2f</td></tr>
</table>
<br />
%(ctable)s
</div>
''')%dict(total=self.total, da=self.daily_average, ac=self.average,
ctable=ctable, period=self.period.days, num=len(self.totals),
dd=self.daily_deviation, ad=self.average_deviation,
dpd=len(self.totals)/float(self.period.days),
min=self.min.isoformat(), max=self.max.isoformat())
def expose(func):
def do(self, *args, **kwargs):
dict.update(cherrypy.response.headers, {'Server':'Donations_server/1.0'})
return func(self, *args, **kwargs)
return cherrypy.expose(do)
class Server(object):
TRENDS = '/tmp/donations_trend.png'
MONTH_TRENDS = '/tmp/donations_month_trend.png'
AVERAGES = '/tmp/donations_averages.png'
def __init__(self, apache=False, root='/', data_file='/tmp/donations.xml'):
self.apache = apache
self.document_root = root
self.data_file = data_file
self.read_records()
def calculate_daily_averages(self):
stats = self.get_slice(self.earliest, self.latest)
fig = plt.figure(2, (10, 4), 96)#, facecolor, edgecolor, frameon, FigureClass)
fig.clear()
ax = fig.add_subplot(111)
month_order, month_averages = stats.get_daily_averages()
x = [date(y, m, 1) for y, m in month_order[:-1]]
ax.plot(x, month_averages[:-1])
ax.set_xlabel('Month')
ax.set_ylabel('Daily average ($)')
ax.xaxis.set_major_locator(mdates.MonthLocator(interval=2))
ax.xaxis.set_major_formatter(mdates.DateFormatter('%m/%y'))
fig.savefig(self.AVERAGES)
def calculate_month_trend(self, days=31):
stats = self.get_slice(date.today()-timedelta(days=days-1), date.today())
fig = plt.figure(2, (10, 4), 96)#, facecolor, edgecolor, frameon, FigureClass)
fig.clear()
ax = fig.add_subplot(111)
x = list(range(days-1, -1, -1))
y = stats.daily_totals
ax.plot(x, y)#, align='center', width=20, color='g')
ax.set_xlabel('Days ago')
ax.set_ylabel('Income ($)')
ax.hlines([stats.daily_average], 0, days-1)
ax.hlines([stats.daily_average+stats.daily_deviation,
stats.daily_average-stats.daily_deviation], 0, days-1,
linestyle=':',color='r')
ax.set_xlim([0, days-1])
text = u'''\
Total: $%(total).2f
Daily average: $%(da).2f \u00b1 %(dd).2f
Average contribution: $%(ac).2f \u00b1 %(ad).2f
Donors per day: %(dpd).2f
'''%dict(total=stats.total, da=stats.daily_average,
dd=stats.daily_deviation, ac=stats.average,
ad=stats.average_deviation,
dpd=len(stats.totals)/float(stats.period.days),
)
text = ax.annotate(text, (0.5, 0.65), textcoords='axes fraction')
fig.savefig(self.MONTH_TRENDS)
def calculate_trend(self):
def months(start, end):
pos = range_for_month(start.year, start.month)[0]
while pos <= end:
yield (pos.year, pos.month)
if pos.month == 12:
pos = pos.replace(year = pos.year+1)
pos = pos.replace(month = 1)
else:
pos = pos.replace(month = pos.month + 1)
_months = list(months(self.earliest, self.latest))[:-1][-12:]
_months = [range_for_month(*m) for m in _months]
_months = [self.get_slice(*m) for m in _months]
x = [m.min for m in _months]
y = [m.total for m in _months]
ml = mdates.MonthLocator() # every month
fig = plt.figure(1, (8, 4), 96)#, facecolor, edgecolor, frameon, FigureClass)
fig.clear()
ax = fig.add_subplot(111)
average = sum(y)/len(y)
ax.bar(x, y, align='center', width=20, color='g')
ax.hlines([average], x[0], x[-1])
ax.xaxis.set_major_locator(ml)
ax.xaxis.set_major_formatter(mdates.DateFormatter('%b %y'))
ax.set_xlim(_months[0].min-timedelta(days=15), _months[-1].min+timedelta(days=15))
ax.set_xlabel('Month')
ax.set_ylabel('Income ($)')
fig.autofmt_xdate()
fig.savefig(self.TRENDS)
#plt.show()
def read_records(self):
self.tree = etree.parse(self.data_file)
self.last_read_time = time.time()
self.root = self.tree.getroot()
self.records = []
min_date, max_date = date.today(), date.fromordinal(1)
for x in self.root.xpath('//donation'):
d = list(map(int, x.get('date').split('-')))
d = date(*d)
self.records.append(Record(x.get('email'), x.get('country'), float(x.get('amount')), d, x.get('name')))
min_date = min(min_date, d)
max_date = max(max_date, d)
self.earliest, self.latest = min_date, max_date
self.calculate_trend()
self.calculate_month_trend()
self.calculate_daily_averages()
def get_slice(self, start_date, end_date):
stats = Stats([r for r in self.records if r.date >= start_date and r.date <= end_date],
start_date, end_date)
return stats
def month(self, year, month):
return self.get_slice(*range_for_month(year, month))
def year(self, year):
return self.get_slice(*range_for_year(year))
def range_to_date(self, raw):
return date(*map(int, raw.split('-')))
def build_page(self, period_type, data):
if os.stat(self.data_file).st_mtime >= self.last_read_time:
self.read_records()
month = date.today().month
year = date.today().year
mm = data[1] if period_type == 'month' else month
my = data[0] if period_type == 'month' else year
yy = data if period_type == 'year' else year
rl = data[0] if period_type == 'range' else ''
rr = data[1] if period_type == 'range' else ''
def build_month_list(current):
months = []
for i in range(1, 13):
month = date(2000, i, 1).strftime('%b')
sel = 'selected="selected"' if i == current else ''
months.append('<option value="%d" %s>%s</option>'%(i, sel, month))
return months
def build_year_list(current):
all_years = sorted(range(self.earliest.year, self.latest.year+1, 1))
if current not in all_years:
current = all_years[0]
years = []
for year in all_years:
sel = 'selected="selected"' if year == current else ''
years.append('<option value="%d" %s>%d</option>'%(year, sel, year))
return years
mmlist = '<select name="month_month">\n%s</select>'%('\n'.join(build_month_list(mm)))
mylist = '<select name="month_year">\n%s</select>'%('\n'.join(build_year_list(my)))
yylist = '<select name="year_year">\n%s</select>'%('\n'.join(build_year_list(yy)))
if period_type == 'month':
range_stats = range_for_month(my, mm)
elif period_type == 'year':
range_stats = range_for_year(yy)
else:
try:
range_stats = list(map(self.range_to_date, (rl, rr)))
err = None
except:
range_stats = None
err = traceback.format_exc()
if range_stats is None:
range_stats = '<pre>Invalid input:\n%s</pre>'%err
else:
range_stats = self.get_slice(*range_stats).to_html(num_of_countries=10)
today = self.get_slice(date.today(), date.today())
return textwrap.dedent('''\
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" version="XHTML 1.1" xml:lang="en">
<head>
<title>Calibre donations</title>
<link rel="icon" href="http://calibre.kovidgoyal.net/chrome/site/favicon.ico" type="image/x-icon" />
<style type="text/css">
body { background-color: white }
.country_percent { text-align: right; font-family: monospace; }
.money { text-align: right; font-family: monospace; padding-left:2em;}
.period_box { padding-left: 60px; border-bottom: 10px; }
#banner {font-size: xx-large; font-family: cursive; text-align: center}
#stats_container td { vertical-align: top }
</style>
<script type="text/javascript">
String.prototype.trim = function() {
return this.replace(/^\s+|\s+$/g,"");
}
function test_date(date) {
var valid_format = /\d{4}-\d{1,2}-\d{1,2}/;
if (!valid_format.test(date)) return false;
var yearfield = date.split('-')[0];
var monthfield = date.split('-')[1];
var dayfield = date.split('-')[2];
var dayobj = new Date(yearfield, monthfield-1, dayfield)
if ((dayobj.getMonth()+1!=monthfield)||(dayobj.getDate()!=dayfield)||(dayobj.getFullYear()!=yearfield)) return false;
return true;
}
function check_period_form(form) {
if (form.period_type[2].checked) {
if (!test_date(form.range_left.value)) {
form.range_left.focus();
alert("Left Range date invalid!");
return false;
}
if (!test_date(form.range_right.value)) {
form.range_right.focus();
alert("Right Range date invalid!");
return false;
}
}
return true;
}
function is_empty(val) {
return val.trim().length == 0
}
function check_add_form(form) {
var test_amount = /[\.0-9]+/;
if (is_empty(form.email.value)) {
form.email.focus();
alert("Email must be filled!");
return false;
}
if (is_empty(form.country.value)) {
form.country.focus();
alert("Country must be filled!");
return false;
}
if (!test_amount.test(form.amount.value)) {
form.amount.focus();
alert("Amount " + form.amount.value + " is not a valid number!");
return false;
}
if (!test_date(form.date.value)) {
form.date.focus();
alert("Date " + form.date.value +" is invalid!");
return false;
}
return true;
}
function rationalize_periods() {
var form = document.forms[0];
var disabled = !form.period_type[0].checked;
form.month_month.disabled = disabled;
form.month_year.disabled = disabled;
disabled = !form.period_type[1].checked;
form.year_year.disabled = disabled;
disabled = !form.period_type[2].checked;
form.range_left.disabled = disabled;
form.range_right.disabled = disabled;
}
</script>
</head>
<body onload="rationalize_periods()">
<table id="banner" style="width: 100%%">
<tr>
<td style="text-align:left; width:150px"><a style="border:0pt" href="http://calibre.kovidgoyal.net"><img style="vertical-align: middle;border:0pt" alt="calibre" src="http://calibre.kovidgoyal.net/chrome/site/calibre_banner.png" /></a></td>
<td>Calibre donations</td>
</tr>
</table>
<hr />
<table id="stats_container" style="width:100%%">
<tr>
<td id="left">
<h3>Donations to date</h3>
%(todate)s
</td>
<td id="right">
<h3>Donations in period</h3>
<fieldset>
<legend>Choose a period</legend>
<form method="post" action="%(root)sshow" onsubmit="return check_period_form(this);">
<input type="radio" name="period_type" value="month" %(mc)s onclick="rationalize_periods()"/>
Month:&nbsp;%(month_month)s&nbsp;%(month_year)s
<br /><br />
<input type="radio" name="period_type" value="year" %(yc)s onclick="rationalize_periods()" />
Year:&nbsp;%(year_year)s
<br /><br />
<input type="radio" name="period_type" value="range" %(rc)s onclick="rationalize_periods()" />
Range (YYYY-MM-DD):&nbsp;<input size="10" maxlength="10" type="text" name="range_left" value="%(rl)s" />&nbsp;to&nbsp;<input size="10" maxlength="10" type="text" name="range_right" value="%(rr)s"/>
<br /><br />
<input type="submit" value="Update" />
</form>
</fieldset>
<b>Donations today: $%(today).2f</b><br />
%(range_stats)s
</td>
</tr>
</table>
<hr />
<div style="text-align:center">
<img src="%(root)strend.png" alt="Income trends" />
<h3>Income trends for the last year</h3>
<img src="%(root)smonth_trend.png" alt="Month income trend" />
<h3>Income trends for the last 31 days</h3>
<img src="%(root)saverage_trend.png" alt="Daily average
income trend" />
<h3>Income trends since records started</h3>
</div>
</body>
</html>
''')%dict(
todate=self.get_slice(self.earliest, self.latest).to_html(),
mc = 'checked="checked"' if period_type=="month" else '',
yc = 'checked="checked"' if period_type=="year" else '',
rc = 'checked="checked"' if period_type=="range" else '',
month_month=mmlist, month_year=mylist, year_year=yylist,
rl=rl, rr=rr, range_stats=range_stats, root=self.document_root,
today=today.total
)
@expose
def index(self):
month = date.today().month
year = date.today().year
cherrypy.response.headers['Content-Type'] = 'application/xhtml+xml'
return self.build_page('month', (year, month))
@expose
def trend_png(self):
cherrypy.response.headers['Content-Type'] = 'image/png'
return open(self.TRENDS, 'rb').read()
@expose
def month_trend_png(self):
cherrypy.response.headers['Content-Type'] = 'image/png'
return open(self.MONTH_TRENDS, 'rb').read()
@expose
def average_trend_png(self):
cherrypy.response.headers['Content-Type'] = 'image/png'
return open(self.AVERAGES, 'rb').read()
@expose
def show(self, period_type='month', month_month='', month_year='',
year_year='', range_left='', range_right=''):
if period_type == 'month':
mm = int(month_month) if month_month else date.today().month
my = int(month_year) if month_year else date.today().year
data = (my, mm)
elif period_type == 'year':
data = int(year_year) if year_year else date.today().year
else:
data = (range_left, range_right)
cherrypy.response.headers['Content-Type'] = 'application/xhtml+xml'
return self.build_page(period_type, data)
def config():
config = {
'global': {
'tools.gzip.on' : True,
'tools.gzip.mime_types': ['text/html', 'text/plain', 'text/xml', 'text/javascript', 'text/css', 'application/xhtml+xml'],
}
}
return config
def apache_start():
cherrypy.config.update({
'log.screen' : False,
#'log.error_file' : '/tmp/donations.log',
'environment' : 'production',
'show_tracebacks' : False,
})
cherrypy.tree.mount(Server(apache=True, root='/donations/', data_file='/var/www/calibre.kovidgoyal.net/donations.xml'),
'/donations', config=config())
def main(args=sys.argv):
server = Server()
cherrypy.quickstart(server, config=config())
return 0
if __name__ == '__main__':
sys.exit(main())

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -5,8 +5,8 @@
msgid "" msgid ""
msgstr "" msgstr ""
"Project-Id-Version: calibre 0.6.0b14\n" "Project-Id-Version: calibre 0.6.0b14\n"
"POT-Creation-Date: 2009-07-17 13:17+MDT\n" "POT-Creation-Date: 2009-07-19 12:31+MDT\n"
"PO-Revision-Date: 2009-07-17 13:17+MDT\n" "PO-Revision-Date: 2009-07-19 12:31+MDT\n"
"Last-Translator: Automatically generated\n" "Last-Translator: Automatically generated\n"
"Language-Team: LANGUAGE\n" "Language-Team: LANGUAGE\n"
"MIME-Version: 1.0\n" "MIME-Version: 1.0\n"
@ -59,8 +59,8 @@ msgstr ""
#: /home/kovid/work/calibre/src/calibre/ebooks/mobi/reader.py:78 #: /home/kovid/work/calibre/src/calibre/ebooks/mobi/reader.py:78
#: /home/kovid/work/calibre/src/calibre/ebooks/mobi/reader.py:117 #: /home/kovid/work/calibre/src/calibre/ebooks/mobi/reader.py:117
#: /home/kovid/work/calibre/src/calibre/ebooks/mobi/reader.py:150 #: /home/kovid/work/calibre/src/calibre/ebooks/mobi/reader.py:150
#: /home/kovid/work/calibre/src/calibre/ebooks/mobi/reader.py:548 #: /home/kovid/work/calibre/src/calibre/ebooks/mobi/reader.py:552
#: /home/kovid/work/calibre/src/calibre/ebooks/mobi/reader.py:732 #: /home/kovid/work/calibre/src/calibre/ebooks/mobi/reader.py:736
#: /home/kovid/work/calibre/src/calibre/ebooks/odt/input.py:44 #: /home/kovid/work/calibre/src/calibre/ebooks/odt/input.py:44
#: /home/kovid/work/calibre/src/calibre/ebooks/odt/input.py:46 #: /home/kovid/work/calibre/src/calibre/ebooks/odt/input.py:46
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:857 #: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:857
@ -665,7 +665,11 @@ msgstr ""
msgid "Options to help with debugging the conversion" msgid "Options to help with debugging the conversion"
msgstr "" msgstr ""
#: /home/kovid/work/calibre/src/calibre/ebooks/conversion/cli.py:229 #: /home/kovid/work/calibre/src/calibre/ebooks/conversion/cli.py:179
msgid "List builtin recipes"
msgstr ""
#: /home/kovid/work/calibre/src/calibre/ebooks/conversion/cli.py:245
msgid "Output saved to" msgid "Output saved to"
msgstr "" msgstr ""
@ -880,23 +884,19 @@ msgstr ""
msgid "Set the language." msgid "Set the language."
msgstr "" msgstr ""
#: /home/kovid/work/calibre/src/calibre/ebooks/conversion/plumber.py:410 #: /home/kovid/work/calibre/src/calibre/ebooks/conversion/plumber.py:498
msgid "List available recipes."
msgstr ""
#: /home/kovid/work/calibre/src/calibre/ebooks/conversion/plumber.py:501
msgid "Could not find an ebook inside the archive" msgid "Could not find an ebook inside the archive"
msgstr "" msgstr ""
#: /home/kovid/work/calibre/src/calibre/ebooks/conversion/plumber.py:639 #: /home/kovid/work/calibre/src/calibre/ebooks/conversion/plumber.py:629
msgid "Converting input to HTML..." msgid "Converting input to HTML..."
msgstr "" msgstr ""
#: /home/kovid/work/calibre/src/calibre/ebooks/conversion/plumber.py:654 #: /home/kovid/work/calibre/src/calibre/ebooks/conversion/plumber.py:644
msgid "Running transforms on ebook..." msgid "Running transforms on ebook..."
msgstr "" msgstr ""
#: /home/kovid/work/calibre/src/calibre/ebooks/conversion/plumber.py:729 #: /home/kovid/work/calibre/src/calibre/ebooks/conversion/plumber.py:719
msgid "Creating" msgid "Creating"
msgstr "" msgstr ""
@ -1405,7 +1405,7 @@ msgstr ""
msgid "Disable compression of the file contents." msgid "Disable compression of the file contents."
msgstr "" msgstr ""
#: /home/kovid/work/calibre/src/calibre/ebooks/mobi/output.py:79 #: /home/kovid/work/calibre/src/calibre/ebooks/mobi/output.py:101
msgid "All articles" msgid "All articles"
msgstr "" msgstr ""
@ -6086,15 +6086,14 @@ msgstr ""
msgid "Failed to download the following articles:" msgid "Failed to download the following articles:"
msgstr "" msgstr ""
#: /home/kovid/work/calibre/src/calibre/web/feeds/news.py:582
#: /home/kovid/work/calibre/src/calibre/web/feeds/news.py:588
msgid " from "
msgstr ""
#: /home/kovid/work/calibre/src/calibre/web/feeds/news.py:586 #: /home/kovid/work/calibre/src/calibre/web/feeds/news.py:586
msgid "Failed to download parts of the following articles:" msgid "Failed to download parts of the following articles:"
msgstr "" msgstr ""
#: /home/kovid/work/calibre/src/calibre/web/feeds/news.py:588
msgid " from "
msgstr ""
#: /home/kovid/work/calibre/src/calibre/web/feeds/news.py:590 #: /home/kovid/work/calibre/src/calibre/web/feeds/news.py:590
msgid "\tFailed links:" msgid "\tFailed links:"
msgstr "" msgstr ""
@ -6135,26 +6134,15 @@ msgstr ""
msgid "Untitled Article" msgid "Untitled Article"
msgstr "" msgstr ""
#: /home/kovid/work/calibre/src/calibre/web/feeds/news.py:971 #: /home/kovid/work/calibre/src/calibre/web/feeds/news.py:978
msgid ""
"\n"
"Downloaded article %s from %s"
msgstr ""
#: /home/kovid/work/calibre/src/calibre/web/feeds/news.py:977
msgid "Article downloaded: %s" msgid "Article downloaded: %s"
msgstr "" msgstr ""
#: /home/kovid/work/calibre/src/calibre/web/feeds/news.py:983 #: /home/kovid/work/calibre/src/calibre/web/feeds/news.py:989
msgid ""
"Failed to download article: %s from %s\n"
msgstr ""
#: /home/kovid/work/calibre/src/calibre/web/feeds/news.py:986
msgid "Article download failed: %s" msgid "Article download failed: %s"
msgstr "" msgstr ""
#: /home/kovid/work/calibre/src/calibre/web/feeds/news.py:1001 #: /home/kovid/work/calibre/src/calibre/web/feeds/news.py:1004
#: /home/kovid/work/calibre/src/calibre/web/feeds/recipes/recipe_borba.py:78 #: /home/kovid/work/calibre/src/calibre/web/feeds/recipes/recipe_borba.py:78
#: /home/kovid/work/calibre/src/calibre/web/feeds/recipes/recipe_glas_srpske.py:76 #: /home/kovid/work/calibre/src/calibre/web/feeds/recipes/recipe_glas_srpske.py:76
#: /home/kovid/work/calibre/src/calibre/web/feeds/recipes/recipe_instapaper.py:56 #: /home/kovid/work/calibre/src/calibre/web/feeds/recipes/recipe_instapaper.py:56

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -20,7 +20,7 @@ class Stream(object):
def __init__(self, stream): def __init__(self, stream):
from calibre import prints from calibre import prints
self._prints = prints self._prints = partial(prints, safe_encode=True)
self.stream = stream self.stream = stream
def flush(self): def flush(self):

View File

@ -579,7 +579,7 @@ class BasicNewsRecipe(Recipe):
if self.failed_downloads: if self.failed_downloads:
self.log.warning(_('Failed to download the following articles:')) self.log.warning(_('Failed to download the following articles:'))
for feed, article, debug in self.failed_downloads: for feed, article, debug in self.failed_downloads:
self.log.warning(article.title+_(' from ')+feed.title) self.log.warning(article.title, 'from', feed.title)
self.log.debug(article.url) self.log.debug(article.url)
self.log.debug(debug) self.log.debug(debug)
if self.partial_failures: if self.partial_failures:
@ -968,22 +968,25 @@ class BasicNewsRecipe(Recipe):
a = request.requestID[1] a = request.requestID[1]
article = request.article article = request.article
self.log.debug(_(u'\nDownloaded article %s from %s')%(article.title, article.url)) self.log.debug('Downloaded article:', article.title, 'from', article.url)
article.orig_url = article.url article.orig_url = article.url
article.url = 'article_%d/index.html'%a article.url = 'article_%d/index.html'%a
article.downloaded = True article.downloaded = True
article.sub_pages = result[1][1:] article.sub_pages = result[1][1:]
self.jobs_done += 1 self.jobs_done += 1
self.report_progress(float(self.jobs_done)/len(self.jobs), _(u'Article downloaded: %s')%article.title) self.report_progress(float(self.jobs_done)/len(self.jobs),
_(u'Article downloaded: %s')%repr(article.title))
if result[2]: if result[2]:
self.partial_failures.append((request.feed.title, article.title, article.url, result[2])) self.partial_failures.append((request.feed.title, article.title, article.url, result[2]))
def error_in_article_download(self, request, traceback): def error_in_article_download(self, request, traceback):
self.jobs_done += 1 self.jobs_done += 1
self.log.error(_(u'Failed to download article: %s from %s\n')%(request.article.title, request.article.url)) self.log.error('Failed to download article:', request.article.title,
'from', request.article.url)
self.log.debug(traceback) self.log.debug(traceback)
self.log.debug('\n') self.log.debug('\n')
self.report_progress(float(self.jobs_done)/len(self.jobs), _('Article download failed: %s')%request.article.title) self.report_progress(float(self.jobs_done)/len(self.jobs),
_('Article download failed: %s')%repr(request.article.title))
self.failed_downloads.append((request.feed, request.article, traceback)) self.failed_downloads.append((request.feed, request.article, traceback))
def parse_feeds(self): def parse_feeds(self):

View File

@ -16,56 +16,5 @@ class CraigsList(BasicNewsRecipe):
__author__ = 'kiodane' __author__ = 'kiodane'
feeds = [(u'Best of craigslist', feeds = [(u'Best of craigslist',
u'http://www.craigslist.org/about/best/all/index.rss'), (u'Ann Arbor', u'http://www.craigslist.org/about/best/all/index.rss'), ]
u'http://www.craigslist.org/about/best/aaa/index.rss'), (u'Asheville',
u'http://www.craigslist.org/about/best/ash/index.rss'), (u'Austin',
u'http://www.craigslist.org/about/best/aus/index.rss'), (u'Baltimore',
u'http://www.craigslist.org/about/best/bal/index.rss'), (u'Birmingham',
u'http://www.craigslist.org/about/best/bhm/index.rss'), (u'Boston',
u'http://www.craigslist.org/about/best/bos/index.rss'), (u'Vermont',
u'http://www.craigslist.org/about/best/brl/index.rss'), (u'Columbia',
u'http://www.craigslist.org/about/best/cae/index.rss'), (u'Charlotte',
u'http://www.craigslist.org/about/best/cha/index.rss'), (u'Chico',
u'http://www.craigslist.org/about/best/chc/index.rss'), (u'Chicago',
u'http://www.craigslist.org/about/best/chi/index.rss'), (u'Charleston',
u'http://www.craigslist.org/about/best/chs/index.rss'), (u'Cleveland',
u'http://www.craigslist.org/about/best/cle/index.rss'), (u'Calgary',
u'http://www.craigslist.org/about/best/clg/index.rss'),
(u'Colorado Springs', u'http://www.craigslist.org/about/best/cos/index.rss'),
(u'Dallas', u'http://www.craigslist.org/about/best/dal/index.rss'),
(u'Denver', u'http://www.craigslist.org/about/best/den/index.rss'),
(u'Detroit Metro', u'http://www.craigslist.org/about/best/det/index.rss'),
(u'Des Moines', u'http://www.craigslist.org/about/best/dsm/index.rss'),
(u'Eau Claire', u'http://www.craigslist.org/about/best/eau/index.rss'),
(u'Grand Rapids', u'http://www.craigslist.org/about/best/grr/index.rss'),
(u'Hawaii', u'http://www.craigslist.org/about/best/hnl/index.rss'),
(u'Jacksonville', u'http://www.craigslist.org/about/best/jax/index.rss'),
(u'Knoxville', u'http://www.craigslist.org/about/best/knx/index.rss'),
(u'Kansas City', u'http://www.craigslist.org/about/best/ksc/index.rss'),
(u'South Florida', u'http://www.craigslist.org/about/best/mia/index.rss'),
(u'Minneapolis', u'http://www.craigslist.org/about/best/min/index.rss'),
(u'Maine', u'http://www.craigslist.org/about/best/mne/index.rss'),
(u'Montreal', u'http://www.craigslist.org/about/best/mon/index.rss'),
(u'Nashville', u'http://www.craigslist.org/about/best/nsh/index.rss'),
(u'New York', u'http://www.craigslist.org/about/best/nyc/index.rss'),
(u'Orange County', u'http://www.craigslist.org/about/best/orc/index.rss'),
(u'Portland', u'http://www.craigslist.org/about/best/pdx/index.rss'),
(u'Phoenix', u'http://www.craigslist.org/about/best/phx/index.rss'),
(u'Pittsburgh', u'http://www.craigslist.org/about/best/pit/index.rss'),
(u'Rhode Island', u'http://www.craigslist.org/about/best/prv/index.rss'),
(u'Raleigh', u'http://www.craigslist.org/about/best/ral/index.rss'),
(u'Rochester', u'http://www.craigslist.org/about/best/rcs/index.rss'),
(u'San Antonio', u'http://www.craigslist.org/about/best/sat/index.rss'),
(u'Santa Barbara', u'http://www.craigslist.org/about/best/sba/index.rss'),
(u'San Diego', u'http://www.craigslist.org/about/best/sdo/index.rss'),
(u'Seattle-Tacoma', u'http://www.craigslist.org/about/best/sea/index.rss'),
(u'Sf Bay Area', u'http://www.craigslist.org/about/best/sfo/index.rss'),
(u'Salt Lake City',
u'http://www.craigslist.org/about/best/slc/index.rss'), (u'Spokane',
u'http://www.craigslist.org/about/best/spk/index.rss'), (u'St Louis',
u'http://www.craigslist.org/about/best/stl/index.rss'), (u'Sydney',
u'http://www.craigslist.org/about/best/syd/index.rss'), (u'Toronto',
u'http://www.craigslist.org/about/best/tor/index.rss'), (u'Vancouver BC',
u'http://www.craigslist.org/about/best/van/index.rss'), (u'Washington DC',
u'http://www.craigslist.org/about/best/wdc/index.rss')]

View File

@ -42,11 +42,12 @@ class NYTimes(BasicNewsRecipe):
# By default, no sections are skipped. # By default, no sections are skipped.
excludeSectionKeywords = [] excludeSectionKeywords = []
# Add section keywords from the right column above to skip that section # To skip sections containing the word 'Sports' or 'Dining', use:
# For example, to skip sections containing the word 'Sports' or 'Dining', use:
# excludeSectionKeywords = ['Sports', 'Dining'] # excludeSectionKeywords = ['Sports', 'Dining']
# Fetch only Business and Technology # Fetch only Business and Technology
#excludeSectionKeywords = ['Arts','Dining','Editorials','Health','Magazine','Media','Region','Op-Ed','Politics','Science','Sports','Top Stories','Travel','U.S.','World'] #excludeSectionKeywords = ['Arts','Dining','Editorials','Health','Magazine','Media','Region','Op-Ed','Politics','Science','Sports','Top Stories','Travel','U.S.','World']
# Fetch only Top Stories # Fetch only Top Stories
#excludeSectionKeywords = ['Arts','Business','Dining','Editorials','Health','Magazine','Media','Region','Op-Ed','Politics','Science','Sports','Technology','Travel','U.S.','World'] #excludeSectionKeywords = ['Arts','Business','Dining','Editorials','Health','Magazine','Media','Region','Op-Ed','Politics','Science','Sports','Technology','Travel','U.S.','World']
@ -56,11 +57,11 @@ class NYTimes(BasicNewsRecipe):
timefmt = '' timefmt = ''
needs_subscription = True needs_subscription = True
remove_tags_after = dict(attrs={'id':['comments']}) remove_tags_after = dict(attrs={'id':['comments']})
remove_tags = [dict(attrs={'class':['articleTools', 'post-tools', 'side_tool', 'nextArticleLink', remove_tags = [dict(attrs={'class':['articleTools', 'post-tools', 'side_tool', 'nextArticleLink',
'clearfix', 'nextArticleLink clearfix','inlineSearchControl', 'clearfix', 'nextArticleLink clearfix','inlineSearchControl',
'columnGroup','entry-meta','entry-response module','jumpLink','nav', 'columnGroup','entry-meta','entry-response module','jumpLink','nav',
'columnGroup advertisementColumnGroup', 'kicker entry-category']}), 'columnGroup advertisementColumnGroup', 'kicker entry-category']}),
dict(id=['footer', 'toolsRight', 'articleInline', 'navigation', 'archive', dict(id=['footer', 'toolsRight', 'articleInline', 'navigation', 'archive',
'side_search', 'blog_sidebar', 'side_tool', 'side_index', 'login', 'side_search', 'blog_sidebar', 'side_tool', 'side_index', 'login',
'blog-header','searchForm','NYTLogo','insideNYTimes','adxToolSponsor', 'blog-header','searchForm','NYTLogo','insideNYTimes','adxToolSponsor',
'adxLeaderboard']), 'adxLeaderboard']),
@ -70,7 +71,7 @@ class NYTimes(BasicNewsRecipe):
extra_css = '.headline {text-align:left;}\n\ extra_css = '.headline {text-align:left;}\n\
.byline {font:monospace; margin-bottom:0px;}\n\ .byline {font:monospace; margin-bottom:0px;}\n\
.source {align:left;}\n\ .source {align:left;}\n\
.credit {align:right;}\n' .credit {text-align:right;font-size:smaller;}\n'
def get_browser(self): def get_browser(self):
br = BasicNewsRecipe.get_browser() br = BasicNewsRecipe.get_browser()
@ -113,7 +114,7 @@ class NYTimes(BasicNewsRecipe):
docEncoding = self.encoding docEncoding = self.encoding
if docEncoding != self.encoding : if docEncoding != self.encoding :
soup = get_the_soup(docEncoding, url_or_raw) soup = get_the_soup(docEncoding, url_or_raw)
return soup return soup
@ -268,7 +269,7 @@ class NYTimes(BasicNewsRecipe):
kicker = soup.find(True, {'class':'kicker'}) kicker = soup.find(True, {'class':'kicker'})
if kicker is not None : if kicker is not None :
h3Tag = Tag(soup, "h3") h3Tag = Tag(soup, "h3")
h3Tag.insert(0, kicker.contents[0]) h3Tag.insert(0, self.tag_to_string(kicker))
kicker.replaceWith(h3Tag) kicker.replaceWith(h3Tag)
# Change captions to italic -1 # Change captions to italic -1
@ -277,7 +278,7 @@ class NYTimes(BasicNewsRecipe):
emTag = Tag(soup, "em") emTag = Tag(soup, "em")
#emTag['class'] = "caption" #emTag['class'] = "caption"
#emTag['font-size-adjust'] = "-1" #emTag['font-size-adjust'] = "-1"
emTag.insert(0, caption.contents[0]) emTag.insert(0, self.tag_to_string(caption))
hrTag = Tag(soup, 'hr') hrTag = Tag(soup, 'hr')
emTag.insert(1, hrTag) emTag.insert(1, hrTag)
caption.replaceWith(emTag) caption.replaceWith(emTag)
@ -285,10 +286,10 @@ class NYTimes(BasicNewsRecipe):
# Change <nyt_headline> to <h2> # Change <nyt_headline> to <h2>
headline = soup.find("nyt_headline") headline = soup.find("nyt_headline")
if headline is not None : if headline is not None :
tag = Tag(soup, "h2") h2tag = Tag(soup, "h2")
tag['class'] = "headline" h2tag['class'] = "headline"
tag.insert(0, headline.contents[0]) h2tag.insert(0, self.tag_to_string(headline))
soup.h1.replaceWith(tag) headline.replaceWith(h2tag)
# Change <h1> to <h3> - used in editorial blogs # Change <h1> to <h3> - used in editorial blogs
masthead = soup.find("h1") masthead = soup.find("h1")
@ -296,14 +297,14 @@ class NYTimes(BasicNewsRecipe):
# Nuke the href # Nuke the href
if masthead.a is not None : if masthead.a is not None :
del(masthead.a['href']) del(masthead.a['href'])
tag = Tag(soup, "h3") h3tag = Tag(soup, "h3")
tag.insert(0, masthead.contents[0]) h3tag.insert(0, self.tag_to_string(masthead))
soup.h1.replaceWith(tag) masthead.replaceWith(h3tag)
# Change <span class="bold"> to <b> # Change <span class="bold"> to <b>
for subhead in soup.findAll(True, {'class':'bold'}) : for subhead in soup.findAll(True, {'class':'bold'}) :
bTag = Tag(soup, "b") bTag = Tag(soup, "b")
bTag.insert(0, subhead.contents[0]) bTag.insert(0, self.tag_to_string(subhead))
subhead.replaceWith(bTag) subhead.replaceWith(bTag)
return soup return soup