From f1ba77d1725212864d14091eab0e9e741fa7d9e2 Mon Sep 17 00:00:00 2001 From: "Marshall T. Vandegrift" Date: Tue, 24 Feb 2009 23:43:54 -0500 Subject: [PATCH 01/11] Fix #1886. Handle invalid non-numeric font-size values. --- src/calibre/ebooks/oeb/transforms/flatcss.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/calibre/ebooks/oeb/transforms/flatcss.py b/src/calibre/ebooks/oeb/transforms/flatcss.py index ee7d7fa0b2..63466ac551 100644 --- a/src/calibre/ebooks/oeb/transforms/flatcss.py +++ b/src/calibre/ebooks/oeb/transforms/flatcss.py @@ -37,6 +37,8 @@ class KeyMapper(object): @staticmethod def relate(size, base): + if size == 0: + return base size = float(size) base = float(base) if abs(size - base) < 0.1: return 0 @@ -48,6 +50,7 @@ class KeyMapper(object): return result def __getitem__(self, ssize): + ssize = asfloat(ssize, 0) if ssize in self.cache: return self.cache[ssize] dsize = self.map(ssize) @@ -66,6 +69,7 @@ class ScaleMapper(object): self.dscale = float(dbase) / float(sbase) def __getitem__(self, ssize): + ssize = asfloat(ssize, 0) dsize = ssize * self.dscale return dsize From df4b4707a2eedcdf73b6f3604efa8f400c8ce342 Mon Sep 17 00:00:00 2001 From: "Marshall T. Vandegrift" Date: Wed, 25 Feb 2009 00:14:11 -0500 Subject: [PATCH 02/11] #1887. Escape characters which are not allowed to appear in attribute values. --- src/calibre/ebooks/lit/reader.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/calibre/ebooks/lit/reader.py b/src/calibre/ebooks/lit/reader.py index 06f5ebdabb..407aedae1f 100644 --- a/src/calibre/ebooks/lit/reader.py +++ b/src/calibre/ebooks/lit/reader.py @@ -276,8 +276,11 @@ class UnBinary(object): state = 'get attr' elif count > 0: if not in_censorship: - self.buf.write(c.encode( - 'ascii', 'xmlcharrefreplace')) + if c == '"': + c = '"' + elif c == '<': + c = '<' + self.buf.write(c.encode('ascii', 'xmlcharrefreplace')) count -= 1 if count == 0: if not in_censorship: From 670f450d78c29d5e476f2b9fe0d0b9aca1fc2788 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 24 Feb 2009 22:43:32 -0800 Subject: [PATCH 03/11] Make PDF conversion on OSX more robust. Fixes #1927 (PDF->MOBI traceback) --- src/calibre/ebooks/lrf/pdf/convert_from.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/src/calibre/ebooks/lrf/pdf/convert_from.py b/src/calibre/ebooks/lrf/pdf/convert_from.py index ac9c4b3ade..cc8ebd64b8 100644 --- a/src/calibre/ebooks/lrf/pdf/convert_from.py +++ b/src/calibre/ebooks/lrf/pdf/convert_from.py @@ -3,6 +3,7 @@ __copyright__ = '2008, Kovid Goyal ' '''''' import sys, os, subprocess, logging +import errno from functools import partial from calibre import isosx, setup_cli_handlers, filename_to_utf8, iswindows, islinux from calibre.ebooks import ConversionError, DRMError @@ -41,14 +42,26 @@ def generate_html(pathtopdf, tdir): try: os.chdir(tdir) try: - p = popen(cmd, stderr=subprocess.PIPE, stdout=subprocess.PIPE) + p = popen(cmd, stderr=subprocess.PIPE) except OSError, err: if err.errno == 2: raise ConversionError(_('Could not find pdftohtml, check it is in your PATH'), True) else: raise + + ''' print p.stdout.read() - ret = p.wait() + ''' + while True: + try: + ret = p.wait() + break + except OSError, e: + if e.errno == errno.EINTR: + continue + else: + raise + if ret != 0: err = p.stderr.read() raise ConversionError, err From abcf40b21f97410dcf0dd3595a3c33a2917a7026 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 24 Feb 2009 22:53:31 -0800 Subject: [PATCH 04/11] Move MOBI output out of beta status, since it's pretty stable --- src/calibre/gui2/main.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/calibre/gui2/main.py b/src/calibre/gui2/main.py index 3972e00bdd..264b5149fb 100644 --- a/src/calibre/gui2/main.py +++ b/src/calibre/gui2/main.py @@ -44,7 +44,6 @@ from calibre.gui2.dialogs.search import SearchDialog from calibre.gui2.dialogs.choose_format import ChooseFormatDialog from calibre.gui2.dialogs.book_info import BookInfo from calibre.ebooks.metadata.meta import set_metadata -from calibre.ebooks.metadata import MetaInformation from calibre.ebooks import BOOK_EXTENSIONS from calibre.library.database2 import LibraryDatabase2, CoverCache from calibre.parallel import JobKilled @@ -399,7 +398,7 @@ class Main(MainWindow, Ui_MainWindow): def change_output_format(self, x): of = unicode(x).strip() if of != prefs['output_format']: - if of not in ('LRF', 'EPUB'): + if of not in ('LRF', 'EPUB', 'MOBI'): warning_dialog(self, 'Warning', '

%s support is still in beta. If you find bugs, please report them by opening a ticket.'%of).exec_() prefs.set('output_format', of) From 0a493da7e8bb29f313e9b3faa5e880ce0a2a044f Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 25 Feb 2009 10:00:46 -0800 Subject: [PATCH 05/11] IGN:Add dbus library to linux binary build --- installer/linux/freeze.py | 1 + 1 file changed, 1 insertion(+) diff --git a/installer/linux/freeze.py b/installer/linux/freeze.py index 97bf8061e3..b836e48de1 100644 --- a/installer/linux/freeze.py +++ b/installer/linux/freeze.py @@ -36,6 +36,7 @@ def freeze(): '/lib/libbz2.so.1', '/usr/lib/libpoppler.so.4', '/usr/lib/libxml2.so.2', + '/usr/lib/libdbus-1.so.3', '/usr/lib/libxslt.so.1', '/usr/lib/libxslt.so.1', '/usr/lib/libgthread-2.0.so.0', From 18c39c6e2016215a0ec707422df6fc38bfa22fc0 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 25 Feb 2009 11:08:44 -0800 Subject: [PATCH 06/11] New recipe for Courrier International and updated Le Monde by Mathieu Godlewski --- src/calibre/web/feeds/recipes/__init__.py | 2 +- .../recipes/recipe_courrierinternational.py | 41 ++++++++++++++ .../web/feeds/recipes/recipe_le_monde.py | 56 +++++++++++++++---- 3 files changed, 88 insertions(+), 11 deletions(-) create mode 100644 src/calibre/web/feeds/recipes/recipe_courrierinternational.py diff --git a/src/calibre/web/feeds/recipes/__init__.py b/src/calibre/web/feeds/recipes/__init__.py index 8277338e18..7ae997f90d 100644 --- a/src/calibre/web/feeds/recipes/__init__.py +++ b/src/calibre/web/feeds/recipes/__init__.py @@ -31,7 +31,7 @@ recipe_modules = ['recipe_' + r for r in ( 'pobjeda', 'chicago_breaking_news', 'glasgow_herald', 'linuxdevices', 'hindu', 'cincinnati_enquirer', 'physics_world', 'pressonline', 'la_republica', 'physics_today', 'chicago_tribune', 'e_novine', - 'al_jazeera', 'winsupersite', 'borba', + 'al_jazeera', 'winsupersite', 'borba', 'courrierinternational', )] import re, imp, inspect, time, os diff --git a/src/calibre/web/feeds/recipes/recipe_courrierinternational.py b/src/calibre/web/feeds/recipes/recipe_courrierinternational.py new file mode 100644 index 0000000000..153896d4e0 --- /dev/null +++ b/src/calibre/web/feeds/recipes/recipe_courrierinternational.py @@ -0,0 +1,41 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = '2009, Mathieu Godlewski ' +''' +Courrier International +''' + +import re +from datetime import date +from calibre.web.feeds.news import BasicNewsRecipe + +class CourrierInternational(BasicNewsRecipe): + title = 'Courrier International' + __author__ = 'Mathieu Godlewski ' + description = 'Global news in french from international newspapers' + oldest_article = 7 + language = _('French') + max_articles_per_feed = 50 + no_stylesheets = True + + html2lrf_options = ['--base-font-size', '10'] + + feeds = [ + # Some articles requiring subscription fails on download. + ('A la Une', 'http://www.courrierinternational.com/rss/rss_a_la_une.xml'), + ] + + preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE|re.DOTALL), i[1]) for i in + [ + #Handle Depeches + (r'.*]*>([0-9][0-9]/.*

).*', lambda match : '
'+match.group(1)+'
'), + #Handle Articles + (r'.*]*>(Courrier international.*?) .*', lambda match : '
'+match.group(1)+''), + ] + ] + + + def print_version(self, url): + return re.sub('/[a-zA-Z]+\.asp','/imprimer.asp' ,url) + diff --git a/src/calibre/web/feeds/recipes/recipe_le_monde.py b/src/calibre/web/feeds/recipes/recipe_le_monde.py index b543650200..0fc05b3aa5 100644 --- a/src/calibre/web/feeds/recipes/recipe_le_monde.py +++ b/src/calibre/web/feeds/recipes/recipe_le_monde.py @@ -7,7 +7,7 @@ lemonde.fr ''' import re - +from datetime import date from calibre.web.feeds.news import BasicNewsRecipe @@ -15,11 +15,15 @@ class LeMonde(BasicNewsRecipe): title = 'LeMonde.fr' __author__ = 'Mathieu Godlewski ' description = 'Global news in french' - oldest_article = 7 + oldest_article = 3 language = _('French') - max_articles_per_feed = 20 + max_articles_per_feed = 30 no_stylesheets = True + cover_url='http://abonnes.lemonde.fr/titresdumonde/'+date.today().strftime("%y%m%d")+'/1.jpg' + + html2lrf_options = ['--base-font-size', '10'] + feeds = [ ('A la Une', 'http://www.lemonde.fr/rss/une.xml'), ('International', 'http://www.lemonde.fr/rss/sequence/0,2-3210,1-0,0.xml'), @@ -38,25 +42,57 @@ class LeMonde(BasicNewsRecipe): ('Examens', 'http://www.lemonde.fr/rss/sequence/0,2-3404,1-0,0.xml'), ('Opinions', 'http://www.lemonde.fr/rss/sequence/0,2-3232,1-0,0.xml') ] - + remove_tags = [dict(name='img', attrs={'src':'http://medias.lemonde.fr/mmpub/img/lgo/lemondefr_pet.gif'}), dict(name='div', attrs={'id':'xiti-logo-noscript'}), dict(name='br', attrs={}), dict(name='iframe', attrs={}), ] - + extra_css = '.ar-tit {font-size: x-large;} \n .dt {font-size: x-small;}' - filter_regexps = [r'xiti\.com'] - - preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in + preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE|re.DOTALL), i[1]) for i in [ + (r'.*?.*?
.*?
).*You can start editing here.*', lambda match : ''+match.group(1)+''), (r'

 

', lambda match : ''), (r']*>
', lambda match : '
'+match.group(1).upper()), + (r']*>
', lambda match : '
"'+match.group(1).upper()), (r'(
.*
).*', lambda match : match.group(1)), ] ] - + + article_match_regexps = [ (re.compile(i)) for i in + [ + (r'http://www\.lemonde\.fr/\S+/article/.*'), + (r'http://www\.lemonde\.fr/\S+/portfolio/.*'), + (r'http://www\.lemonde\.fr/\S+/article_interactif/.*'), + (r'http://\S+\.blog\.lemonde\.fr/.*'), + ] + ] + def print_version(self, url): - return re.sub('http:.*_([0-9]+)_[0-9]+\.html.*','http://www.lemonde.fr/web/imprimer_element/0,40-0,50-\\1,0.html' ,url) + return re.sub('http://www\.lemonde\.fr/.*_([0-9]+)_[0-9]+\.html.*','http://www.lemonde.fr/web/imprimer_element/0,40-0,50-\\1,0.html' ,url) + # Used to filter duplicated articles + articles_list = [] + + def get_article_url(self, article): + url=article.get('link', None) + url=url[0:url.find("#")] + if url in self.articles_list: + self.log_debug(_('Skipping duplicated article: %s')%url) + return False + if self.is_article_wanted(url): + self.articles_list.append(url) + return url + self.log_debug(_('Skipping filtered article: %s')%url) + return False + + + def is_article_wanted(self, url): + if self.article_match_regexps: + for m in self.article_match_regexps: + if m.search(url): + return True + return False + return False From 9c3062a4b585c35f272e6f9363a907b6a9527b64 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 25 Feb 2009 12:12:56 -0800 Subject: [PATCH 07/11] Fix #1934 (Would like to assign a year (not a full date) to a title) --- src/calibre/gui2/library.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/calibre/gui2/library.py b/src/calibre/gui2/library.py index 38652ad971..c0f8eac796 100644 --- a/src/calibre/gui2/library.py +++ b/src/calibre/gui2/library.py @@ -90,11 +90,11 @@ class DateDelegate(QStyledItemDelegate): def displayText(self, val, locale): d = val.toDate() return d.toString('dd MMM yyyy') - if d.isNull(): - return '' - d = datetime(d.year(), d.month(), d.day()) - return strftime(BooksView.TIME_FMT, d.timetuple()) + def createEditor(self, parent, option, index): + qde = QStyledItemDelegate.createEditor(self, parent, option, index) + qde.setDisplayFormat('MM/dd/yyyy') + return qde class BooksModel(QAbstractTableModel): coding = zip( From 3ea924b1d39c58eed52ae5268004b06a7aaacf7d Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 25 Feb 2009 16:15:33 -0800 Subject: [PATCH 08/11] Refuse to set metadata in DRMed MOBI files as this can lead to files that dont work on the device --- src/calibre/ebooks/metadata/mobi.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/calibre/ebooks/metadata/mobi.py b/src/calibre/ebooks/metadata/mobi.py index b464b6553f..b6ec1fb721 100644 --- a/src/calibre/ebooks/metadata/mobi.py +++ b/src/calibre/ebooks/metadata/mobi.py @@ -81,6 +81,7 @@ class MetadataUpdater(object): type = self.type = data[60:68] self.nrecs, = unpack('>H', data[76:78]) record0 = self.record0 = self.record(0) + self.encryption_type, = unpack('>H', record0[12:14]) codepage, = unpack('>I', record0[28:32]) self.codec = 'utf-8' if codepage == 65001 else 'cp1252' image_base, = unpack('>I', record0[108:112]) @@ -134,6 +135,8 @@ class MetadataUpdater(object): if self.thumbnail_record is not None: recs.append((202, pack('>I', self.thumbnail_rindex))) exth = StringIO() + if getattr(self, 'encryption_type', -1) != 0: + raise MobiError('Setting metadata in DRMed MOBI files is not supported.') for code, data in recs: exth.write(pack('>II', code, len(data) + 8)) exth.write(data) From 0426b3150f68228131884b556cc72441c7fd1f8a Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 25 Feb 2009 16:20:26 -0800 Subject: [PATCH 09/11] New recipe for La Mujer De mi Vida by Darko Miletic --- src/calibre/web/feeds/recipes/__init__.py | 1 + .../feeds/recipes/recipe_lamujerdemivida.py | 76 +++++++++++++++++++ 2 files changed, 77 insertions(+) create mode 100644 src/calibre/web/feeds/recipes/recipe_lamujerdemivida.py diff --git a/src/calibre/web/feeds/recipes/__init__.py b/src/calibre/web/feeds/recipes/__init__.py index 7ae997f90d..5c4976eb27 100644 --- a/src/calibre/web/feeds/recipes/__init__.py +++ b/src/calibre/web/feeds/recipes/__init__.py @@ -32,6 +32,7 @@ recipe_modules = ['recipe_' + r for r in ( 'hindu', 'cincinnati_enquirer', 'physics_world', 'pressonline', 'la_republica', 'physics_today', 'chicago_tribune', 'e_novine', 'al_jazeera', 'winsupersite', 'borba', 'courrierinternational', + 'lamujerdemivida', )] import re, imp, inspect, time, os diff --git a/src/calibre/web/feeds/recipes/recipe_lamujerdemivida.py b/src/calibre/web/feeds/recipes/recipe_lamujerdemivida.py new file mode 100644 index 0000000000..a99be8f955 --- /dev/null +++ b/src/calibre/web/feeds/recipes/recipe_lamujerdemivida.py @@ -0,0 +1,76 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = '2009, Darko Miletic ' +''' +lamujerdemivida.com.ar +''' +from calibre import strftime +from calibre.web.feeds.news import BasicNewsRecipe + +class LaMujerDeMiVida(BasicNewsRecipe): + title = 'La Mujer de mi Vida' + __author__ = 'Darko Miletic' + description = 'Cultura de otra manera' + oldest_article = 90 + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + encoding = 'cp1252' + publisher = 'La Mujer de mi Vida' + category = 'literatura, critica, arte, ensayos' + language = _('Spanish') + INDEX = 'http://www.lamujerdemivida.com.ar/' + html2lrf_options = [ + '--comment', description + , '--category', category + , '--publisher', publisher + , '--ignore-tables' + ] + + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True' + + keep_only_tags = [dict(name='table', attrs={'width':'570'})] + + feeds = [(u'Articulos', u'http://www.lamujerdemivida.com.ar/index.php')] + + def preprocess_html(self, soup): + soup.html['xml:lang'] = 'es-AR' + soup.html['lang'] = 'es-AR' + mtag = '' + soup.head.insert(0,mtag) + for item in soup.findAll(style=True): + del item['style'] + return soup + + def get_cover_url(self): + cover_url = None + soup = self.index_to_soup(self.INDEX) + cover_item = soup.find('img',attrs={'alt':'Lamujerdemivida.'}) + if cover_item: + cover_url = self.INDEX + cover_item['src'] + return cover_url + + def parse_index(self): + totalfeeds = [] + lfeeds = self.get_feeds() + for feedobj in lfeeds: + feedtitle, feedurl = feedobj + self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl)) + articles = [] + soup = self.index_to_soup(feedurl) + for item in soup.findAll('td', attrs={'width':'390'}): + atag = item.find('a',href=True) + if atag: + url = atag['href'] + title = self.tag_to_string(atag) + date = strftime(self.timefmt) + articles.append({ + 'title' :title + ,'date' :date + ,'url' :url + ,'description':'' + }) + totalfeeds.append((feedtitle, articles)) + return totalfeeds + From 52d13c5a0e07765e6fa60f2f2c823c51bc14dea1 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 25 Feb 2009 19:22:01 -0800 Subject: [PATCH 10/11] version 0.4.141 --- src/calibre/constants.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/constants.py b/src/calibre/constants.py index ef83336740..d9d5ee0cfc 100644 --- a/src/calibre/constants.py +++ b/src/calibre/constants.py @@ -2,7 +2,7 @@ __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' __docformat__ = 'restructuredtext en' __appname__ = 'calibre' -__version__ = '0.4.140' +__version__ = '0.4.141' __author__ = "Kovid Goyal " ''' Various run time constants. From 46c7759e8891ff00dd26696ce6c9c281fe6c0c9b Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 25 Feb 2009 19:38:52 -0800 Subject: [PATCH 11/11] IGN:Tag release