Pull from trunk

This commit is contained in:
Kovid Goyal 2009-03-03 17:29:39 -08:00
commit 5be5277f32
33 changed files with 13918 additions and 7267 deletions

View File

@ -465,7 +465,3 @@ if isosx:
except:
import traceback
traceback.print_exc()
# Migrate from QSettings based config system
from calibre.utils.config import migrate
migrate()

View File

@ -2,7 +2,7 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
__appname__ = 'calibre'
__version__ = '0.4.141'
__version__ = '0.4.142'
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
'''
Various run time constants.

View File

@ -845,7 +845,12 @@ class Processor(Parser):
except:
size = '3'
if size and size.strip() and size.strip()[0] in ('+', '-'):
size = 3 + float(size) # Hack assumes basefont=3
size = re.search(r'[+-]{0,1}[\d\.]+', size)
try:
size = float(size.group())
except:
size = 0
size += 3 # Hack assumes basefont=3
try:
setting = 'font-size: %d%%;'%int((float(size)/3) * 100)
except ValueError:

View File

@ -122,11 +122,15 @@ class UnBinary(object):
OPEN_ANGLE_RE = re.compile(r'<<(?![!]--)')
CLOSE_ANGLE_RE = re.compile(r'(?<!--)>>(?=>>|[^>])')
DOUBLE_ANGLE_RE = re.compile(r'([<>])\1')
EMPTY_ATOMS = ({},{})
def __init__(self, bin, path, manifest={}, map=HTML_MAP):
def __init__(self, bin, path, manifest={}, map=HTML_MAP, atoms=EMPTY_ATOMS):
self.manifest = manifest
self.tag_map, self.attr_map, self.tag_to_attr_map = map
self.is_html = map is HTML_MAP
self.tag_atoms, self.attr_atoms = atoms
self.opf = map is OPF_MAP
self.bin = bin
self.dir = os.path.dirname(path)
buf = StringIO()
self.binary_to_text(bin, buf)
@ -205,7 +209,10 @@ class UnBinary(object):
state = 'get custom length'
continue
if flags & FLAG_ATOM:
raise LitError('TODO: Atoms not yet implemented')
if not self.tag_atoms or tag not in self.tag_atoms:
raise LitError("atom tag %d not in atom tag list" % tag)
tag_name = self.tag_atoms[tag]
current_map = self.attr_atoms
elif tag < len(self.tag_map):
tag_name = self.tag_map[tag]
current_map = self.tag_to_attr_map[tag]
@ -804,6 +811,54 @@ class LitFile(object):
raise LitError("Failed to completely decompress section")
return ''.join(result)
def get_atoms(self, entry):
name = '/'.join(('/data', entry.internal, 'atom'))
if name not in self.entries:
return ({}, {})
data = self.get_file(name)
nentries, data = u32(data), data[4:]
tags = {}
for i in xrange(1, nentries + 1):
if len(data) <= 1:
break
size, data = ord(data[0]), data[1:]
if size == 0 or len(data) < size:
break
tags[i], data = data[:size], data[size:]
if len(tags) != nentries:
self._warn("damaged or invalid atoms tag table")
if len(data) < 4:
return (tags, {})
attrs = {}
nentries, data = u32(data), data[4:]
for i in xrange(1, nentries + 1):
if len(data) <= 4:
break
size, data = u32(data), data[4:]
if size == 0 or len(data) < size:
break
attrs[i], data = data[:size], data[size:]
if len(attrs) != nentries:
self._warn("damaged or invalid atoms attributes table")
return (tags, attrs)
def get_entry_content(self, entry, pretty_print=False):
if 'spine' in entry.state:
name = '/'.join(('/data', entry.internal, 'content'))
path = entry.path
raw = self.get_file(name)
decl, map = (OPF_DECL, OPF_MAP) \
if name == '/meta' else (HTML_DECL, HTML_MAP)
atoms = self.get_atoms(entry)
content = decl + unicode(UnBinary(raw, path, self.manifest, map, atoms))
if pretty_print:
content = self._pretty_print(content)
content = content.encode('utf-8')
else:
internal = '/'.join(('/data', entry.internal))
content = self._litfile.get_file(internal)
return content
class LitContainer(object):
"""Simple Container-interface, read-only accessor for LIT files."""
@ -826,11 +881,7 @@ class LitContainer(object):
raw = self._litfile.get_file(internal)
unbin = UnBinary(raw, name, self._litfile.manifest, HTML_MAP)
content = HTML_DECL + str(unbin)
else:
internal = '/'.join(('/data', entry.internal))
content = self._litfile.get_file(internal)
return content
def _read_meta(self):
path = 'content.opf'
raw = self._litfile.get_file('/meta')

View File

@ -39,13 +39,13 @@ def metadata_from_formats(formats):
return mi2
for path, ext in zip(formats, extensions):
stream = open(path, 'rb')
try:
mi.smart_update(get_metadata(stream, stream_type=ext, use_libprs_metadata=True))
except:
continue
if getattr(mi, 'application_id', None) is not None:
return mi
with open(path, 'rb') as stream:
try:
mi.smart_update(get_metadata(stream, stream_type=ext, use_libprs_metadata=True))
except:
continue
if getattr(mi, 'application_id', None) is not None:
return mi
if not mi.title:
mi.title = _('Unknown')

View File

@ -227,7 +227,7 @@ class CSSFlattener(object):
items.sort()
css = u';\n'.join(u'%s: %s' % (key, val) for key, val in items)
classes = node.get('class', None) or 'calibre'
klass = STRIPNUM.sub('', classes.split()[0])
klass = STRIPNUM.sub('', classes.split()[0].replace('_', ''))
if css in styles:
match = styles[css]
else:

Binary file not shown.

After

Width:  |  Height:  |  Size: 878 B

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -32,7 +32,8 @@ recipe_modules = ['recipe_' + r for r in (
'hindu', 'cincinnati_enquirer', 'physics_world', 'pressonline',
'la_republica', 'physics_today', 'chicago_tribune', 'e_novine',
'al_jazeera', 'winsupersite', 'borba', 'courrierinternational',
'lamujerdemivida', 'soldiers', 'theonion',
'lamujerdemivida', 'soldiers', 'theonion', 'news_times',
'el_universal',
)]
import re, imp, inspect, time, os

View File

@ -0,0 +1,65 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
'''
eluniversal.com.mx
'''
from calibre.web.feeds.news import BasicNewsRecipe
class ElUniversal(BasicNewsRecipe):
title = 'El Universal'
__author__ = 'Darko Miletic'
description = 'News from Mexico'
oldest_article = 1
max_articles_per_feed = 100
publisher = 'El Universal'
category = 'news, politics, Mexico'
no_stylesheets = True
use_embedded_content = False
encoding = 'cp1252'
remove_javascript = True
language = _('Spanish')
html2lrf_options = [
'--comment', description
, '--category', category
, '--publisher', publisher
, '--ignore-tables'
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
remove_tags = [dict(name='link')]
feeds = [
(u'Minuto por Minuto', u'http://www.eluniversal.com.mx/rss/universalmxm.xml' )
,(u'Mundo' , u'http://www.eluniversal.com.mx/rss/mundo.xml' )
,(u'Mexico' , u'http://www.eluniversal.com.mx/rss/mexico.xml' )
,(u'Estados' , u'http://www.eluniversal.com.mx/rss/estados.xml' )
,(u'Finanzas' , u'http://www.eluniversal.com.mx/rss/finanzas.xml' )
,(u'Deportes' , u'http://www.eluniversal.com.mx/rss/deportes.xml' )
,(u'Espectaculos' , u'http://www.eluniversal.com.mx/rss/espectaculos.xml' )
,(u'Cultura' , u'http://www.eluniversal.com.mx/rss/cultura.xml' )
,(u'Ciencia' , u'http://www.eluniversal.com.mx/rss/ciencia.xml' )
,(u'Computacion' , u'http://www.eluniversal.com.mx/rss/computo.xml' )
,(u'Sociedad' , u'http://www.eluniversal.com.mx/rss/sociedad.xml' )
]
def print_version(self, url):
return url.replace('/notas/','/notas/vi_')
def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Language" content="es-MX"/><meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll(font=True):
del item['font']
for item in soup.findAll(face=True):
del item['face']
for item in soup.findAll(helvetica=True):
del item['helvetica']
return soup

View File

@ -0,0 +1,28 @@
from calibre.web.feeds.news import BasicNewsRecipe
class NewsTimes(BasicNewsRecipe):
title = 'Newstimes'
__author__ = 'Darko Miletic'
description = 'news from USA'
language = _('English')
oldest_article = 1
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'utf-8'
remove_javascript = True
keep_only_tags = [
dict(name='h1', attrs={'id':'articleTitle'})
,dict(name='div', attrs={'id':['articleByline','articleDate','articleBody']})
]
remove_tags = [
dict(name=['object','link'])
,dict(name='div', attrs={'class':'articleEmbeddedAdBox'})
]
feeds = [
(u'Latest news' , u'http://feeds.newstimes.com/mngi/rss/CustomRssServlet/3/201071.xml' )
]