mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Pull from trunk
This commit is contained in:
commit
5be5277f32
@ -465,7 +465,3 @@ if isosx:
|
||||
except:
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
|
||||
# Migrate from QSettings based config system
|
||||
from calibre.utils.config import migrate
|
||||
migrate()
|
||||
|
@ -2,7 +2,7 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
__appname__ = 'calibre'
|
||||
__version__ = '0.4.141'
|
||||
__version__ = '0.4.142'
|
||||
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
|
||||
'''
|
||||
Various run time constants.
|
||||
|
@ -845,7 +845,12 @@ class Processor(Parser):
|
||||
except:
|
||||
size = '3'
|
||||
if size and size.strip() and size.strip()[0] in ('+', '-'):
|
||||
size = 3 + float(size) # Hack assumes basefont=3
|
||||
size = re.search(r'[+-]{0,1}[\d\.]+', size)
|
||||
try:
|
||||
size = float(size.group())
|
||||
except:
|
||||
size = 0
|
||||
size += 3 # Hack assumes basefont=3
|
||||
try:
|
||||
setting = 'font-size: %d%%;'%int((float(size)/3) * 100)
|
||||
except ValueError:
|
||||
|
@ -122,11 +122,15 @@ class UnBinary(object):
|
||||
OPEN_ANGLE_RE = re.compile(r'<<(?![!]--)')
|
||||
CLOSE_ANGLE_RE = re.compile(r'(?<!--)>>(?=>>|[^>])')
|
||||
DOUBLE_ANGLE_RE = re.compile(r'([<>])\1')
|
||||
EMPTY_ATOMS = ({},{})
|
||||
|
||||
def __init__(self, bin, path, manifest={}, map=HTML_MAP):
|
||||
def __init__(self, bin, path, manifest={}, map=HTML_MAP, atoms=EMPTY_ATOMS):
|
||||
self.manifest = manifest
|
||||
self.tag_map, self.attr_map, self.tag_to_attr_map = map
|
||||
self.is_html = map is HTML_MAP
|
||||
self.tag_atoms, self.attr_atoms = atoms
|
||||
self.opf = map is OPF_MAP
|
||||
self.bin = bin
|
||||
self.dir = os.path.dirname(path)
|
||||
buf = StringIO()
|
||||
self.binary_to_text(bin, buf)
|
||||
@ -205,7 +209,10 @@ class UnBinary(object):
|
||||
state = 'get custom length'
|
||||
continue
|
||||
if flags & FLAG_ATOM:
|
||||
raise LitError('TODO: Atoms not yet implemented')
|
||||
if not self.tag_atoms or tag not in self.tag_atoms:
|
||||
raise LitError("atom tag %d not in atom tag list" % tag)
|
||||
tag_name = self.tag_atoms[tag]
|
||||
current_map = self.attr_atoms
|
||||
elif tag < len(self.tag_map):
|
||||
tag_name = self.tag_map[tag]
|
||||
current_map = self.tag_to_attr_map[tag]
|
||||
@ -804,6 +811,54 @@ class LitFile(object):
|
||||
raise LitError("Failed to completely decompress section")
|
||||
return ''.join(result)
|
||||
|
||||
def get_atoms(self, entry):
|
||||
name = '/'.join(('/data', entry.internal, 'atom'))
|
||||
if name not in self.entries:
|
||||
return ({}, {})
|
||||
data = self.get_file(name)
|
||||
nentries, data = u32(data), data[4:]
|
||||
tags = {}
|
||||
for i in xrange(1, nentries + 1):
|
||||
if len(data) <= 1:
|
||||
break
|
||||
size, data = ord(data[0]), data[1:]
|
||||
if size == 0 or len(data) < size:
|
||||
break
|
||||
tags[i], data = data[:size], data[size:]
|
||||
if len(tags) != nentries:
|
||||
self._warn("damaged or invalid atoms tag table")
|
||||
if len(data) < 4:
|
||||
return (tags, {})
|
||||
attrs = {}
|
||||
nentries, data = u32(data), data[4:]
|
||||
for i in xrange(1, nentries + 1):
|
||||
if len(data) <= 4:
|
||||
break
|
||||
size, data = u32(data), data[4:]
|
||||
if size == 0 or len(data) < size:
|
||||
break
|
||||
attrs[i], data = data[:size], data[size:]
|
||||
if len(attrs) != nentries:
|
||||
self._warn("damaged or invalid atoms attributes table")
|
||||
return (tags, attrs)
|
||||
|
||||
def get_entry_content(self, entry, pretty_print=False):
|
||||
if 'spine' in entry.state:
|
||||
name = '/'.join(('/data', entry.internal, 'content'))
|
||||
path = entry.path
|
||||
raw = self.get_file(name)
|
||||
decl, map = (OPF_DECL, OPF_MAP) \
|
||||
if name == '/meta' else (HTML_DECL, HTML_MAP)
|
||||
atoms = self.get_atoms(entry)
|
||||
content = decl + unicode(UnBinary(raw, path, self.manifest, map, atoms))
|
||||
if pretty_print:
|
||||
content = self._pretty_print(content)
|
||||
content = content.encode('utf-8')
|
||||
else:
|
||||
internal = '/'.join(('/data', entry.internal))
|
||||
content = self._litfile.get_file(internal)
|
||||
return content
|
||||
|
||||
|
||||
class LitContainer(object):
|
||||
"""Simple Container-interface, read-only accessor for LIT files."""
|
||||
@ -826,11 +881,7 @@ class LitContainer(object):
|
||||
raw = self._litfile.get_file(internal)
|
||||
unbin = UnBinary(raw, name, self._litfile.manifest, HTML_MAP)
|
||||
content = HTML_DECL + str(unbin)
|
||||
else:
|
||||
internal = '/'.join(('/data', entry.internal))
|
||||
content = self._litfile.get_file(internal)
|
||||
return content
|
||||
|
||||
|
||||
def _read_meta(self):
|
||||
path = 'content.opf'
|
||||
raw = self._litfile.get_file('/meta')
|
||||
|
@ -39,13 +39,13 @@ def metadata_from_formats(formats):
|
||||
return mi2
|
||||
|
||||
for path, ext in zip(formats, extensions):
|
||||
stream = open(path, 'rb')
|
||||
try:
|
||||
mi.smart_update(get_metadata(stream, stream_type=ext, use_libprs_metadata=True))
|
||||
except:
|
||||
continue
|
||||
if getattr(mi, 'application_id', None) is not None:
|
||||
return mi
|
||||
with open(path, 'rb') as stream:
|
||||
try:
|
||||
mi.smart_update(get_metadata(stream, stream_type=ext, use_libprs_metadata=True))
|
||||
except:
|
||||
continue
|
||||
if getattr(mi, 'application_id', None) is not None:
|
||||
return mi
|
||||
|
||||
if not mi.title:
|
||||
mi.title = _('Unknown')
|
||||
|
@ -227,7 +227,7 @@ class CSSFlattener(object):
|
||||
items.sort()
|
||||
css = u';\n'.join(u'%s: %s' % (key, val) for key, val in items)
|
||||
classes = node.get('class', None) or 'calibre'
|
||||
klass = STRIPNUM.sub('', classes.split()[0])
|
||||
klass = STRIPNUM.sub('', classes.split()[0].replace('_', ''))
|
||||
if css in styles:
|
||||
match = styles[css]
|
||||
else:
|
||||
|
BIN
src/calibre/gui2/images/news/el_universal.png
Normal file
BIN
src/calibre/gui2/images/news/el_universal.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 878 B |
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
5738
src/calibre/translations/he.po
Normal file
5738
src/calibre/translations/he.po
Normal file
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -32,7 +32,8 @@ recipe_modules = ['recipe_' + r for r in (
|
||||
'hindu', 'cincinnati_enquirer', 'physics_world', 'pressonline',
|
||||
'la_republica', 'physics_today', 'chicago_tribune', 'e_novine',
|
||||
'al_jazeera', 'winsupersite', 'borba', 'courrierinternational',
|
||||
'lamujerdemivida', 'soldiers', 'theonion',
|
||||
'lamujerdemivida', 'soldiers', 'theonion', 'news_times',
|
||||
'el_universal',
|
||||
)]
|
||||
|
||||
import re, imp, inspect, time, os
|
||||
|
65
src/calibre/web/feeds/recipes/recipe_el_universal.py
Normal file
65
src/calibre/web/feeds/recipes/recipe_el_universal.py
Normal file
@ -0,0 +1,65 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
eluniversal.com.mx
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class ElUniversal(BasicNewsRecipe):
|
||||
title = 'El Universal'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'News from Mexico'
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
publisher = 'El Universal'
|
||||
category = 'news, politics, Mexico'
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'cp1252'
|
||||
remove_javascript = True
|
||||
language = _('Spanish')
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
, '--ignore-tables'
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
|
||||
|
||||
remove_tags = [dict(name='link')]
|
||||
|
||||
feeds = [
|
||||
(u'Minuto por Minuto', u'http://www.eluniversal.com.mx/rss/universalmxm.xml' )
|
||||
,(u'Mundo' , u'http://www.eluniversal.com.mx/rss/mundo.xml' )
|
||||
,(u'Mexico' , u'http://www.eluniversal.com.mx/rss/mexico.xml' )
|
||||
,(u'Estados' , u'http://www.eluniversal.com.mx/rss/estados.xml' )
|
||||
,(u'Finanzas' , u'http://www.eluniversal.com.mx/rss/finanzas.xml' )
|
||||
,(u'Deportes' , u'http://www.eluniversal.com.mx/rss/deportes.xml' )
|
||||
,(u'Espectaculos' , u'http://www.eluniversal.com.mx/rss/espectaculos.xml' )
|
||||
,(u'Cultura' , u'http://www.eluniversal.com.mx/rss/cultura.xml' )
|
||||
,(u'Ciencia' , u'http://www.eluniversal.com.mx/rss/ciencia.xml' )
|
||||
,(u'Computacion' , u'http://www.eluniversal.com.mx/rss/computo.xml' )
|
||||
,(u'Sociedad' , u'http://www.eluniversal.com.mx/rss/sociedad.xml' )
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('/notas/','/notas/vi_')
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
mtag = '<meta http-equiv="Content-Language" content="es-MX"/><meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
|
||||
soup.head.insert(0,mtag)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
for item in soup.findAll(font=True):
|
||||
del item['font']
|
||||
for item in soup.findAll(face=True):
|
||||
del item['face']
|
||||
for item in soup.findAll(helvetica=True):
|
||||
del item['helvetica']
|
||||
return soup
|
||||
|
28
src/calibre/web/feeds/recipes/recipe_news_times.py
Normal file
28
src/calibre/web/feeds/recipes/recipe_news_times.py
Normal file
@ -0,0 +1,28 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class NewsTimes(BasicNewsRecipe):
|
||||
title = 'Newstimes'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'news from USA'
|
||||
language = _('English')
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'utf-8'
|
||||
remove_javascript = True
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='h1', attrs={'id':'articleTitle'})
|
||||
,dict(name='div', attrs={'id':['articleByline','articleDate','articleBody']})
|
||||
]
|
||||
remove_tags = [
|
||||
dict(name=['object','link'])
|
||||
,dict(name='div', attrs={'class':'articleEmbeddedAdBox'})
|
||||
]
|
||||
|
||||
|
||||
feeds = [
|
||||
(u'Latest news' , u'http://feeds.newstimes.com/mngi/rss/CustomRssServlet/3/201071.xml' )
|
||||
]
|
||||
|
Loading…
x
Reference in New Issue
Block a user