mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Sync to trunk
This commit is contained in:
commit
4d21dd9fe2
@ -36,6 +36,7 @@ def freeze():
|
|||||||
'/lib/libbz2.so.1',
|
'/lib/libbz2.so.1',
|
||||||
'/usr/lib/libpoppler.so.4',
|
'/usr/lib/libpoppler.so.4',
|
||||||
'/usr/lib/libxml2.so.2',
|
'/usr/lib/libxml2.so.2',
|
||||||
|
'/usr/lib/libdbus-1.so.3',
|
||||||
'/usr/lib/libxslt.so.1',
|
'/usr/lib/libxslt.so.1',
|
||||||
'/usr/lib/libxslt.so.1',
|
'/usr/lib/libxslt.so.1',
|
||||||
'/usr/lib/libgthread-2.0.so.0',
|
'/usr/lib/libgthread-2.0.so.0',
|
||||||
|
@ -2,7 +2,7 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
__appname__ = 'calibre'
|
__appname__ = 'calibre'
|
||||||
__version__ = '0.4.140'
|
__version__ = '0.4.141'
|
||||||
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
|
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
|
||||||
'''
|
'''
|
||||||
Various run time constants.
|
Various run time constants.
|
||||||
|
@ -276,8 +276,11 @@ class UnBinary(object):
|
|||||||
state = 'get attr'
|
state = 'get attr'
|
||||||
elif count > 0:
|
elif count > 0:
|
||||||
if not in_censorship:
|
if not in_censorship:
|
||||||
self.buf.write(c.encode(
|
if c == '"':
|
||||||
'ascii', 'xmlcharrefreplace'))
|
c = '"'
|
||||||
|
elif c == '<':
|
||||||
|
c = '<'
|
||||||
|
self.buf.write(c.encode('ascii', 'xmlcharrefreplace'))
|
||||||
count -= 1
|
count -= 1
|
||||||
if count == 0:
|
if count == 0:
|
||||||
if not in_censorship:
|
if not in_censorship:
|
||||||
|
@ -3,6 +3,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
|||||||
''''''
|
''''''
|
||||||
|
|
||||||
import sys, os, subprocess, logging
|
import sys, os, subprocess, logging
|
||||||
|
import errno
|
||||||
from functools import partial
|
from functools import partial
|
||||||
from calibre import isosx, setup_cli_handlers, filename_to_utf8, iswindows, islinux
|
from calibre import isosx, setup_cli_handlers, filename_to_utf8, iswindows, islinux
|
||||||
from calibre.ebooks import ConversionError, DRMError
|
from calibre.ebooks import ConversionError, DRMError
|
||||||
@ -41,14 +42,26 @@ def generate_html(pathtopdf, tdir):
|
|||||||
try:
|
try:
|
||||||
os.chdir(tdir)
|
os.chdir(tdir)
|
||||||
try:
|
try:
|
||||||
p = popen(cmd, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
|
p = popen(cmd, stderr=subprocess.PIPE)
|
||||||
except OSError, err:
|
except OSError, err:
|
||||||
if err.errno == 2:
|
if err.errno == 2:
|
||||||
raise ConversionError(_('Could not find pdftohtml, check it is in your PATH'), True)
|
raise ConversionError(_('Could not find pdftohtml, check it is in your PATH'), True)
|
||||||
else:
|
else:
|
||||||
raise
|
raise
|
||||||
|
|
||||||
|
'''
|
||||||
print p.stdout.read()
|
print p.stdout.read()
|
||||||
ret = p.wait()
|
'''
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
ret = p.wait()
|
||||||
|
break
|
||||||
|
except OSError, e:
|
||||||
|
if e.errno == errno.EINTR:
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
raise
|
||||||
|
|
||||||
if ret != 0:
|
if ret != 0:
|
||||||
err = p.stderr.read()
|
err = p.stderr.read()
|
||||||
raise ConversionError, err
|
raise ConversionError, err
|
||||||
|
@ -81,6 +81,7 @@ class MetadataUpdater(object):
|
|||||||
type = self.type = data[60:68]
|
type = self.type = data[60:68]
|
||||||
self.nrecs, = unpack('>H', data[76:78])
|
self.nrecs, = unpack('>H', data[76:78])
|
||||||
record0 = self.record0 = self.record(0)
|
record0 = self.record0 = self.record(0)
|
||||||
|
self.encryption_type, = unpack('>H', record0[12:14])
|
||||||
codepage, = unpack('>I', record0[28:32])
|
codepage, = unpack('>I', record0[28:32])
|
||||||
self.codec = 'utf-8' if codepage == 65001 else 'cp1252'
|
self.codec = 'utf-8' if codepage == 65001 else 'cp1252'
|
||||||
image_base, = unpack('>I', record0[108:112])
|
image_base, = unpack('>I', record0[108:112])
|
||||||
@ -134,6 +135,8 @@ class MetadataUpdater(object):
|
|||||||
if self.thumbnail_record is not None:
|
if self.thumbnail_record is not None:
|
||||||
recs.append((202, pack('>I', self.thumbnail_rindex)))
|
recs.append((202, pack('>I', self.thumbnail_rindex)))
|
||||||
exth = StringIO()
|
exth = StringIO()
|
||||||
|
if getattr(self, 'encryption_type', -1) != 0:
|
||||||
|
raise MobiError('Setting metadata in DRMed MOBI files is not supported.')
|
||||||
for code, data in recs:
|
for code, data in recs:
|
||||||
exth.write(pack('>II', code, len(data) + 8))
|
exth.write(pack('>II', code, len(data) + 8))
|
||||||
exth.write(data)
|
exth.write(data)
|
||||||
|
@ -37,6 +37,8 @@ class KeyMapper(object):
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def relate(size, base):
|
def relate(size, base):
|
||||||
|
if size == 0:
|
||||||
|
return base
|
||||||
size = float(size)
|
size = float(size)
|
||||||
base = float(base)
|
base = float(base)
|
||||||
if abs(size - base) < 0.1: return 0
|
if abs(size - base) < 0.1: return 0
|
||||||
@ -48,6 +50,7 @@ class KeyMapper(object):
|
|||||||
return result
|
return result
|
||||||
|
|
||||||
def __getitem__(self, ssize):
|
def __getitem__(self, ssize):
|
||||||
|
ssize = asfloat(ssize, 0)
|
||||||
if ssize in self.cache:
|
if ssize in self.cache:
|
||||||
return self.cache[ssize]
|
return self.cache[ssize]
|
||||||
dsize = self.map(ssize)
|
dsize = self.map(ssize)
|
||||||
@ -66,6 +69,7 @@ class ScaleMapper(object):
|
|||||||
self.dscale = float(dbase) / float(sbase)
|
self.dscale = float(dbase) / float(sbase)
|
||||||
|
|
||||||
def __getitem__(self, ssize):
|
def __getitem__(self, ssize):
|
||||||
|
ssize = asfloat(ssize, 0)
|
||||||
dsize = ssize * self.dscale
|
dsize = ssize * self.dscale
|
||||||
return dsize
|
return dsize
|
||||||
|
|
||||||
|
@ -90,11 +90,11 @@ class DateDelegate(QStyledItemDelegate):
|
|||||||
def displayText(self, val, locale):
|
def displayText(self, val, locale):
|
||||||
d = val.toDate()
|
d = val.toDate()
|
||||||
return d.toString('dd MMM yyyy')
|
return d.toString('dd MMM yyyy')
|
||||||
if d.isNull():
|
|
||||||
return ''
|
|
||||||
d = datetime(d.year(), d.month(), d.day())
|
|
||||||
return strftime(BooksView.TIME_FMT, d.timetuple())
|
|
||||||
|
|
||||||
|
def createEditor(self, parent, option, index):
|
||||||
|
qde = QStyledItemDelegate.createEditor(self, parent, option, index)
|
||||||
|
qde.setDisplayFormat('MM/dd/yyyy')
|
||||||
|
return qde
|
||||||
|
|
||||||
class BooksModel(QAbstractTableModel):
|
class BooksModel(QAbstractTableModel):
|
||||||
coding = zip(
|
coding = zip(
|
||||||
|
@ -44,7 +44,6 @@ from calibre.gui2.dialogs.search import SearchDialog
|
|||||||
from calibre.gui2.dialogs.choose_format import ChooseFormatDialog
|
from calibre.gui2.dialogs.choose_format import ChooseFormatDialog
|
||||||
from calibre.gui2.dialogs.book_info import BookInfo
|
from calibre.gui2.dialogs.book_info import BookInfo
|
||||||
from calibre.ebooks.metadata.meta import set_metadata
|
from calibre.ebooks.metadata.meta import set_metadata
|
||||||
from calibre.ebooks.metadata import MetaInformation
|
|
||||||
from calibre.ebooks import BOOK_EXTENSIONS
|
from calibre.ebooks import BOOK_EXTENSIONS
|
||||||
from calibre.library.database2 import LibraryDatabase2, CoverCache
|
from calibre.library.database2 import LibraryDatabase2, CoverCache
|
||||||
from calibre.parallel import JobKilled
|
from calibre.parallel import JobKilled
|
||||||
@ -399,7 +398,7 @@ class Main(MainWindow, Ui_MainWindow):
|
|||||||
def change_output_format(self, x):
|
def change_output_format(self, x):
|
||||||
of = unicode(x).strip()
|
of = unicode(x).strip()
|
||||||
if of != prefs['output_format']:
|
if of != prefs['output_format']:
|
||||||
if of not in ('LRF', 'EPUB'):
|
if of not in ('LRF', 'EPUB', 'MOBI'):
|
||||||
warning_dialog(self, 'Warning',
|
warning_dialog(self, 'Warning',
|
||||||
'<p>%s support is still in beta. If you find bugs, please report them by opening a <a href="http://calibre.kovidgoyal.net">ticket</a>.'%of).exec_()
|
'<p>%s support is still in beta. If you find bugs, please report them by opening a <a href="http://calibre.kovidgoyal.net">ticket</a>.'%of).exec_()
|
||||||
prefs.set('output_format', of)
|
prefs.set('output_format', of)
|
||||||
|
@ -31,7 +31,8 @@ recipe_modules = ['recipe_' + r for r in (
|
|||||||
'pobjeda', 'chicago_breaking_news', 'glasgow_herald', 'linuxdevices',
|
'pobjeda', 'chicago_breaking_news', 'glasgow_herald', 'linuxdevices',
|
||||||
'hindu', 'cincinnati_enquirer', 'physics_world', 'pressonline',
|
'hindu', 'cincinnati_enquirer', 'physics_world', 'pressonline',
|
||||||
'la_republica', 'physics_today', 'chicago_tribune', 'e_novine',
|
'la_republica', 'physics_today', 'chicago_tribune', 'e_novine',
|
||||||
'al_jazeera', 'winsupersite', 'borba',
|
'al_jazeera', 'winsupersite', 'borba', 'courrierinternational',
|
||||||
|
'lamujerdemivida',
|
||||||
)]
|
)]
|
||||||
|
|
||||||
import re, imp, inspect, time, os
|
import re, imp, inspect, time, os
|
||||||
|
@ -0,0 +1,41 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2009, Mathieu Godlewski <mathieu at godlewski.fr>'
|
||||||
|
'''
|
||||||
|
Courrier International
|
||||||
|
'''
|
||||||
|
|
||||||
|
import re
|
||||||
|
from datetime import date
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class CourrierInternational(BasicNewsRecipe):
|
||||||
|
title = 'Courrier International'
|
||||||
|
__author__ = 'Mathieu Godlewski <mathieu at godlewski.fr>'
|
||||||
|
description = 'Global news in french from international newspapers'
|
||||||
|
oldest_article = 7
|
||||||
|
language = _('French')
|
||||||
|
max_articles_per_feed = 50
|
||||||
|
no_stylesheets = True
|
||||||
|
|
||||||
|
html2lrf_options = ['--base-font-size', '10']
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
# Some articles requiring subscription fails on download.
|
||||||
|
('A la Une', 'http://www.courrierinternational.com/rss/rss_a_la_une.xml'),
|
||||||
|
]
|
||||||
|
|
||||||
|
preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE|re.DOTALL), i[1]) for i in
|
||||||
|
[
|
||||||
|
#Handle Depeches
|
||||||
|
(r'.*<td [^>]*>([0-9][0-9]/.*</p>)</td>.*', lambda match : '<html><body><table><tr><td>'+match.group(1)+'</td></tr></table></body></html>'),
|
||||||
|
#Handle Articles
|
||||||
|
(r'.*<td [^>]*>(Courrier international.*?) <td width="10"><img src="/img/espaceur.gif"></td>.*', lambda match : '<html><body><table><tr><td>'+match.group(1)+'</body></html>'),
|
||||||
|
]
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
return re.sub('/[a-zA-Z]+\.asp','/imprimer.asp' ,url)
|
||||||
|
|
76
src/calibre/web/feeds/recipes/recipe_lamujerdemivida.py
Normal file
76
src/calibre/web/feeds/recipes/recipe_lamujerdemivida.py
Normal file
@ -0,0 +1,76 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
'''
|
||||||
|
lamujerdemivida.com.ar
|
||||||
|
'''
|
||||||
|
from calibre import strftime
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class LaMujerDeMiVida(BasicNewsRecipe):
|
||||||
|
title = 'La Mujer de mi Vida'
|
||||||
|
__author__ = 'Darko Miletic'
|
||||||
|
description = 'Cultura de otra manera'
|
||||||
|
oldest_article = 90
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
encoding = 'cp1252'
|
||||||
|
publisher = 'La Mujer de mi Vida'
|
||||||
|
category = 'literatura, critica, arte, ensayos'
|
||||||
|
language = _('Spanish')
|
||||||
|
INDEX = 'http://www.lamujerdemivida.com.ar/'
|
||||||
|
html2lrf_options = [
|
||||||
|
'--comment', description
|
||||||
|
, '--category', category
|
||||||
|
, '--publisher', publisher
|
||||||
|
, '--ignore-tables'
|
||||||
|
]
|
||||||
|
|
||||||
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
|
||||||
|
|
||||||
|
keep_only_tags = [dict(name='table', attrs={'width':'570'})]
|
||||||
|
|
||||||
|
feeds = [(u'Articulos', u'http://www.lamujerdemivida.com.ar/index.php')]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
soup.html['xml:lang'] = 'es-AR'
|
||||||
|
soup.html['lang'] = 'es-AR'
|
||||||
|
mtag = '<meta http-equiv="Content-Language" content="es-AR"/>'
|
||||||
|
soup.head.insert(0,mtag)
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
return soup
|
||||||
|
|
||||||
|
def get_cover_url(self):
|
||||||
|
cover_url = None
|
||||||
|
soup = self.index_to_soup(self.INDEX)
|
||||||
|
cover_item = soup.find('img',attrs={'alt':'Lamujerdemivida.'})
|
||||||
|
if cover_item:
|
||||||
|
cover_url = self.INDEX + cover_item['src']
|
||||||
|
return cover_url
|
||||||
|
|
||||||
|
def parse_index(self):
|
||||||
|
totalfeeds = []
|
||||||
|
lfeeds = self.get_feeds()
|
||||||
|
for feedobj in lfeeds:
|
||||||
|
feedtitle, feedurl = feedobj
|
||||||
|
self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
|
||||||
|
articles = []
|
||||||
|
soup = self.index_to_soup(feedurl)
|
||||||
|
for item in soup.findAll('td', attrs={'width':'390'}):
|
||||||
|
atag = item.find('a',href=True)
|
||||||
|
if atag:
|
||||||
|
url = atag['href']
|
||||||
|
title = self.tag_to_string(atag)
|
||||||
|
date = strftime(self.timefmt)
|
||||||
|
articles.append({
|
||||||
|
'title' :title
|
||||||
|
,'date' :date
|
||||||
|
,'url' :url
|
||||||
|
,'description':''
|
||||||
|
})
|
||||||
|
totalfeeds.append((feedtitle, articles))
|
||||||
|
return totalfeeds
|
||||||
|
|
@ -7,7 +7,7 @@ lemonde.fr
|
|||||||
'''
|
'''
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
from datetime import date
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
@ -15,10 +15,14 @@ class LeMonde(BasicNewsRecipe):
|
|||||||
title = 'LeMonde.fr'
|
title = 'LeMonde.fr'
|
||||||
__author__ = 'Mathieu Godlewski <mathieu at godlewski.fr>'
|
__author__ = 'Mathieu Godlewski <mathieu at godlewski.fr>'
|
||||||
description = 'Global news in french'
|
description = 'Global news in french'
|
||||||
oldest_article = 7
|
oldest_article = 3
|
||||||
language = _('French')
|
language = _('French')
|
||||||
max_articles_per_feed = 20
|
max_articles_per_feed = 30
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
|
cover_url='http://abonnes.lemonde.fr/titresdumonde/'+date.today().strftime("%y%m%d")+'/1.jpg'
|
||||||
|
|
||||||
|
|
||||||
|
html2lrf_options = ['--base-font-size', '10']
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
('A la Une', 'http://www.lemonde.fr/rss/une.xml'),
|
('A la Une', 'http://www.lemonde.fr/rss/une.xml'),
|
||||||
@ -47,16 +51,48 @@ class LeMonde(BasicNewsRecipe):
|
|||||||
|
|
||||||
extra_css = '.ar-tit {font-size: x-large;} \n .dt {font-size: x-small;}'
|
extra_css = '.ar-tit {font-size: x-large;} \n .dt {font-size: x-small;}'
|
||||||
|
|
||||||
filter_regexps = [r'xiti\.com']
|
preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE|re.DOTALL), i[1]) for i in
|
||||||
|
|
||||||
preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
|
|
||||||
[
|
[
|
||||||
|
(r'<html.*(<div class="post".*?>.*?</div>.*?<div class="entry">.*?</div>).*You can start editing here.*</html>', lambda match : '<html><body>'+match.group(1)+'</body></html>'),
|
||||||
(r'<p> </p>', lambda match : ''),
|
(r'<p> </p>', lambda match : ''),
|
||||||
(r'<img src="http://medias\.lemonde\.fr/mmpub/img/let/(.)\.gif"[^>]*><div class=ar-txt>', lambda match : '<div class=ar-txt>'+match.group(1).upper()),
|
(r'<img src="http://medias\.lemonde\.fr/mmpub/img/let/(.)\.gif"[^>]*><div class=ar-txt>', lambda match : '<div class=ar-txt>'+match.group(1).upper()),
|
||||||
|
(r'<img src="http://medias\.lemonde\.fr/mmpub/img/let/q(.)\.gif"[^>]*><div class=ar-txt>', lambda match : '<div class=ar-txt>"'+match.group(1).upper()),
|
||||||
(r'(<div class=desc><b>.*</b></div>).*</body>', lambda match : match.group(1)),
|
(r'(<div class=desc><b>.*</b></div>).*</body>', lambda match : match.group(1)),
|
||||||
]
|
]
|
||||||
]
|
]
|
||||||
|
|
||||||
def print_version(self, url):
|
article_match_regexps = [ (re.compile(i)) for i in
|
||||||
return re.sub('http:.*_([0-9]+)_[0-9]+\.html.*','http://www.lemonde.fr/web/imprimer_element/0,40-0,50-\\1,0.html' ,url)
|
[
|
||||||
|
(r'http://www\.lemonde\.fr/\S+/article/.*'),
|
||||||
|
(r'http://www\.lemonde\.fr/\S+/portfolio/.*'),
|
||||||
|
(r'http://www\.lemonde\.fr/\S+/article_interactif/.*'),
|
||||||
|
(r'http://\S+\.blog\.lemonde\.fr/.*'),
|
||||||
|
]
|
||||||
|
]
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
return re.sub('http://www\.lemonde\.fr/.*_([0-9]+)_[0-9]+\.html.*','http://www.lemonde.fr/web/imprimer_element/0,40-0,50-\\1,0.html' ,url)
|
||||||
|
|
||||||
|
# Used to filter duplicated articles
|
||||||
|
articles_list = []
|
||||||
|
|
||||||
|
def get_article_url(self, article):
|
||||||
|
url=article.get('link', None)
|
||||||
|
url=url[0:url.find("#")]
|
||||||
|
if url in self.articles_list:
|
||||||
|
self.log_debug(_('Skipping duplicated article: %s')%url)
|
||||||
|
return False
|
||||||
|
if self.is_article_wanted(url):
|
||||||
|
self.articles_list.append(url)
|
||||||
|
return url
|
||||||
|
self.log_debug(_('Skipping filtered article: %s')%url)
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def is_article_wanted(self, url):
|
||||||
|
if self.article_match_regexps:
|
||||||
|
for m in self.article_match_regexps:
|
||||||
|
if m.search(url):
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
return False
|
||||||
|
Loading…
x
Reference in New Issue
Block a user