mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
GwR utf-8 fix for Topaz metadata
This commit is contained in:
commit
4fce3578ff
BIN
resources/images/news/elsevier.png
Normal file
BIN
resources/images/news/elsevier.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 568 B |
65
resources/recipes/elsevier.recipe
Normal file
65
resources/recipes/elsevier.recipe
Normal file
@ -0,0 +1,65 @@
|
|||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
'''
|
||||||
|
elsevier.nl
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class Pagina12(BasicNewsRecipe):
|
||||||
|
title = 'Elsevier.nl'
|
||||||
|
__author__ = 'Darko Miletic'
|
||||||
|
description = 'News from Denmark'
|
||||||
|
publisher = 'elsevier.nl'
|
||||||
|
category = 'news, politics, Denmark'
|
||||||
|
oldest_article = 2
|
||||||
|
max_articles_per_feed = 200
|
||||||
|
no_stylesheets = True
|
||||||
|
encoding = 'utf-8'
|
||||||
|
use_embedded_content = False
|
||||||
|
language = 'nl'
|
||||||
|
country = 'NL'
|
||||||
|
remove_empty_feeds = True
|
||||||
|
masthead_url = 'http://www.elsevier.nl/static/elsevier/stdimg/logo.gif'
|
||||||
|
extra_css = ' body{font-family: Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} '
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comment' : description
|
||||||
|
, 'tags' : category
|
||||||
|
, 'publisher' : publisher
|
||||||
|
, 'language' : language
|
||||||
|
}
|
||||||
|
|
||||||
|
keep_only_tags = dict(attrs={'id':'artikel_container'})
|
||||||
|
remove_tags_before = dict(attrs={'id':'breadcrumb_container'})
|
||||||
|
remove_tags_after = dict(attrs={'class':'author_link'})
|
||||||
|
remove_tags = [
|
||||||
|
dict(attrs={'id':'breadcrumb_container'})
|
||||||
|
,dict(name='div',attrs={'class':'pullout_vak'})
|
||||||
|
]
|
||||||
|
remove_attributes = ['width','height']
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Laatste nieuws' , u'http://www.elsevier.nl/web/RSS/Homepage-RSS.htm?output=xml' )
|
||||||
|
,(u'Nederland' , u'http://www.elsevier.nl/web/RSS/Nederland-RSS.htm?output=xml' )
|
||||||
|
,(u'Politiek' , u'http://www.elsevier.nl/web/RSS/Politiek-RSS.htm?output=xml' )
|
||||||
|
,(u'Europese Unie' , u'http://www.elsevier.nl/web/RSS/Europese-Unie-RSS.htm?output=xml' )
|
||||||
|
,(u'Buitenland' , u'http://www.elsevier.nl/web/RSS/Buitenland-RSS.htm?output=xml' )
|
||||||
|
,(u'Economie' , u'http://www.elsevier.nl/web/RSS/Economie-RSS.htm?output=xml' )
|
||||||
|
,(u'Wetenschap' , u'http://www.elsevier.nl/web/RSS/Wetenschap-RSS.htm?output=xml' )
|
||||||
|
,(u'Cultuur & Televisie' , u'http://www.elsevier.nl/web/RSS/Cultuur-Televisie-RSS.htm?output=xml')
|
||||||
|
,(u'Society' , u'http://www.elsevier.nl/web/RSS/Society-RSS.htm?output=xml' )
|
||||||
|
,(u'Internet&/Gadgets' , u'http://www.elsevier.nl/web/RSS/Internet-Gadgets-RSS.htm?output=xml' )
|
||||||
|
,(u'Comentaren' , u'http://www.elsevier.nl/web/RSS/Commentaren-RSS.htm?output=xml' )
|
||||||
|
]
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
return url + '?print=true'
|
||||||
|
|
||||||
|
def get_article_url(self, article):
|
||||||
|
return article.get('guid', None).rpartition('?')[0]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
return soup
|
62
resources/recipes/taz.recipe
Normal file
62
resources/recipes/taz.recipe
Normal file
@ -0,0 +1,62 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Lars Jacob jacob.lars at gmail.com'
|
||||||
|
__docformat__ = 'restructuredtext de'
|
||||||
|
|
||||||
|
'''
|
||||||
|
www.taz.de/digiabo
|
||||||
|
'''
|
||||||
|
import os, urllib2, zipfile, tempfile
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class TazDigiabo(BasicNewsRecipe):
|
||||||
|
|
||||||
|
title = u'Taz Digiabo'
|
||||||
|
description = u'Das EPUB DigiAbo der Taz'
|
||||||
|
language = 'de'
|
||||||
|
lang = 'de-DE'
|
||||||
|
|
||||||
|
__author__ = 'Lars Jacob'
|
||||||
|
needs_subscription = True
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'no_default_epub_cover' : True
|
||||||
|
}
|
||||||
|
|
||||||
|
def build_index(self):
|
||||||
|
if self.username is not None and self.password is not None:
|
||||||
|
domain = "http://www.taz.de"
|
||||||
|
|
||||||
|
url = domain + "/epub/"
|
||||||
|
|
||||||
|
auth_handler = urllib2.HTTPBasicAuthHandler()
|
||||||
|
auth_handler.add_password(realm='TAZ-ABO',
|
||||||
|
uri=url,
|
||||||
|
user=self.username,
|
||||||
|
passwd=self.password)
|
||||||
|
opener = urllib2.build_opener(auth_handler)
|
||||||
|
urllib2.install_opener(opener)
|
||||||
|
|
||||||
|
try:
|
||||||
|
f = urllib2.urlopen(url)
|
||||||
|
except urllib2.HTTPError:
|
||||||
|
self.report_progress(0,_('Can\'t login to download issue'))
|
||||||
|
return
|
||||||
|
|
||||||
|
tmp = tempfile.TemporaryFile()
|
||||||
|
self.report_progress(0,_('downloading epub'))
|
||||||
|
tmp.write(f.read())
|
||||||
|
|
||||||
|
zfile = zipfile.ZipFile(tmp, 'r')
|
||||||
|
self.report_progress(0,_('extracting epub'))
|
||||||
|
|
||||||
|
zfile.extractall(self.output_dir)
|
||||||
|
|
||||||
|
tmp.close()
|
||||||
|
index = os.path.join(self.output_dir, 'content.opf')
|
||||||
|
|
||||||
|
self.report_progress(1,_('epub downloaded and extracted'))
|
||||||
|
|
||||||
|
return index
|
@ -115,7 +115,7 @@ class BOOX(HANLINV3):
|
|||||||
supported_platforms = ['windows', 'osx', 'linux']
|
supported_platforms = ['windows', 'osx', 'linux']
|
||||||
|
|
||||||
# Ordered list of supported formats
|
# Ordered list of supported formats
|
||||||
FORMATS = ['epub', 'fb2', 'pdf', 'html', 'txt', 'rtf', 'mobi', 'prc', 'chm']
|
FORMATS = ['epub', 'fb2', 'djvu', 'pdf', 'html', 'txt', 'rtf', 'mobi', 'prc', 'chm']
|
||||||
|
|
||||||
VENDOR_ID = [0x0525]
|
VENDOR_ID = [0x0525]
|
||||||
PRODUCT_ID = [0xa4a5]
|
PRODUCT_ID = [0xa4a5]
|
||||||
|
@ -32,6 +32,7 @@ class IRIVER_STORY(USBMS):
|
|||||||
|
|
||||||
MAIN_MEMORY_VOLUME_LABEL = 'Story Main Memory'
|
MAIN_MEMORY_VOLUME_LABEL = 'Story Main Memory'
|
||||||
STORAGE_CARD_VOLUME_LABEL = 'Story Storage Card'
|
STORAGE_CARD_VOLUME_LABEL = 'Story Storage Card'
|
||||||
|
EBOOK_DIR_MAIN = 'Book'
|
||||||
|
|
||||||
SUPPORTS_SUB_DIRS = True
|
SUPPORTS_SUB_DIRS = True
|
||||||
|
|
||||||
|
@ -19,6 +19,8 @@ def _clean(s):
|
|||||||
|
|
||||||
def _detag(tag):
|
def _detag(tag):
|
||||||
str = u""
|
str = u""
|
||||||
|
if tag is None:
|
||||||
|
return str
|
||||||
for elem in tag:
|
for elem in tag:
|
||||||
if hasattr(elem, "contents"):
|
if hasattr(elem, "contents"):
|
||||||
str += _detag(elem)
|
str += _detag(elem)
|
||||||
@ -34,7 +36,7 @@ def _metadata_from_table(soup, searchfor):
|
|||||||
td = td.parent
|
td = td.parent
|
||||||
# there appears to be multiple ways of structuring the metadata
|
# there appears to be multiple ways of structuring the metadata
|
||||||
# on the home page. cue some nasty special-case hacks...
|
# on the home page. cue some nasty special-case hacks...
|
||||||
if re.match(r'^\s*'+searchfor+r'\s*$', td.renderContents(), flags=re.I):
|
if re.match(r'^\s*'+searchfor+r'\s*$', td.renderContents(None), flags=re.I):
|
||||||
meta = _detag(td.findNextSibling('td'))
|
meta = _detag(td.findNextSibling('td'))
|
||||||
return re.sub('^:', '', meta).strip()
|
return re.sub('^:', '', meta).strip()
|
||||||
else:
|
else:
|
||||||
@ -46,7 +48,7 @@ def _metadata_from_span(soup, searchfor):
|
|||||||
if span is None:
|
if span is None:
|
||||||
return None
|
return None
|
||||||
# this metadata might need some cleaning up still :/
|
# this metadata might need some cleaning up still :/
|
||||||
return _detag(span.renderContents().strip())
|
return _detag(span.renderContents(None).strip())
|
||||||
|
|
||||||
def _get_authors(soup):
|
def _get_authors(soup):
|
||||||
aut = (_metadata_from_span(soup, r'author')
|
aut = (_metadata_from_span(soup, r'author')
|
||||||
|
@ -32,7 +32,7 @@ def get_metadata(stream):
|
|||||||
if stream_type:
|
if stream_type:
|
||||||
stream_type = stream_type[1:]
|
stream_type = stream_type[1:]
|
||||||
if stream_type in ('lit', 'opf', 'prc', 'mobi', 'fb2', 'epub',
|
if stream_type in ('lit', 'opf', 'prc', 'mobi', 'fb2', 'epub',
|
||||||
'rb', 'imp', 'pdf', 'lrf'):
|
'rb', 'imp', 'pdf', 'lrf', 'azw'):
|
||||||
with TemporaryDirectory() as tdir:
|
with TemporaryDirectory() as tdir:
|
||||||
with CurrentDir(tdir):
|
with CurrentDir(tdir):
|
||||||
stream = extract_member(path, match=None, name=f,
|
stream = extract_member(path, match=None, name=f,
|
||||||
|
@ -272,20 +272,19 @@ class MetadataUpdater(object):
|
|||||||
def generate_metadata_stream(self):
|
def generate_metadata_stream(self):
|
||||||
ms = StringIO.StringIO()
|
ms = StringIO.StringIO()
|
||||||
# Generate the header
|
# Generate the header
|
||||||
ms.write(self.encode_vwi(len(self.md_header['tag'])))
|
ms.write(self.encode_vwi(len(self.md_header['tag'])).encode('iso-8859-1'))
|
||||||
ms.write(self.md_header['tag'])
|
ms.write(self.md_header['tag'])
|
||||||
ms.write(chr(self.md_header['flags']))
|
ms.write(chr(self.md_header['flags']))
|
||||||
ms.write(chr(len(self.metadata)))
|
ms.write(chr(len(self.metadata)))
|
||||||
|
|
||||||
# Add the metadata fields
|
# Add the metadata fields.
|
||||||
for item in self.metadata:
|
for item in self.metadata:
|
||||||
ms.write(self.encode_vwi(len(self.metadata[item]['tag'])))
|
ms.write(self.encode_vwi(len(self.metadata[item]['tag'])).encode('iso-8859-1'))
|
||||||
ms.write(self.metadata[item]['tag'])
|
ms.write(self.metadata[item]['tag'])
|
||||||
ms.write(self.encode_vwi(len(self.metadata[item]['metadata'])))
|
ms.write(self.encode_vwi(len(self.metadata[item]['metadata'])).encode('iso-8859-1'))
|
||||||
ms.write(self.metadata[item]['metadata'])
|
ms.write(self.metadata[item]['metadata'])
|
||||||
|
|
||||||
return ms.getvalue().encode('iso-8859-1')
|
return ms.getvalue()
|
||||||
#return ms.getvalue().encode('utf-8')
|
|
||||||
|
|
||||||
def get_md_header(self,offset):
|
def get_md_header(self,offset):
|
||||||
md_header = {}
|
md_header = {}
|
||||||
@ -344,11 +343,11 @@ class MetadataUpdater(object):
|
|||||||
|
|
||||||
if mi.author_sort and pas:
|
if mi.author_sort and pas:
|
||||||
authors = mi.author_sort
|
authors = mi.author_sort
|
||||||
update_metadata('Authors',authors)
|
update_metadata('Authors',authors.encode('utf-8'))
|
||||||
elif mi.authors:
|
elif mi.authors:
|
||||||
authors = '; '.join(mi.authors)
|
authors = '; '.join(mi.authors)
|
||||||
update_metadata('Authors',authors)
|
update_metadata('Authors',authors)
|
||||||
update_metadata('Title',mi.title)
|
update_metadata('Title',mi.title.encode('utf-8'))
|
||||||
|
|
||||||
updated_metadata = self.generate_metadata_stream()
|
updated_metadata = self.generate_metadata_stream()
|
||||||
head = self.fixup_topaz_headers(len(updated_metadata) - self.orig_md_len)
|
head = self.fixup_topaz_headers(len(updated_metadata) - self.orig_md_len)
|
||||||
|
@ -23,7 +23,7 @@ def get_metadata(stream):
|
|||||||
if stream_type:
|
if stream_type:
|
||||||
stream_type = stream_type[1:]
|
stream_type = stream_type[1:]
|
||||||
if stream_type in ('lit', 'opf', 'prc', 'mobi', 'fb2', 'epub',
|
if stream_type in ('lit', 'opf', 'prc', 'mobi', 'fb2', 'epub',
|
||||||
'rb', 'imp', 'pdf', 'lrf'):
|
'rb', 'imp', 'pdf', 'lrf', 'azw'):
|
||||||
with TemporaryDirectory() as tdir:
|
with TemporaryDirectory() as tdir:
|
||||||
with CurrentDir(tdir):
|
with CurrentDir(tdir):
|
||||||
path = zf.extract(f)
|
path = zf.extract(f)
|
||||||
|
@ -102,7 +102,7 @@ class CSV_XML(CatalogPlugin):
|
|||||||
if field == 'formats':
|
if field == 'formats':
|
||||||
fmt_list = []
|
fmt_list = []
|
||||||
for format in item:
|
for format in item:
|
||||||
fmt_list.append(format.partition('.')[2])
|
fmt_list.append(format.rpartition('.')[2].lower())
|
||||||
item = ', '.join(fmt_list)
|
item = ', '.join(fmt_list)
|
||||||
elif field in ['authors','tags']:
|
elif field in ['authors','tags']:
|
||||||
item = ', '.join(item)
|
item = ', '.join(item)
|
||||||
@ -3869,9 +3869,7 @@ class EPUB_MOBI(CatalogPlugin):
|
|||||||
elem.extract()
|
elem.extract()
|
||||||
|
|
||||||
# Reconstruct comments w/o <div>s
|
# Reconstruct comments w/o <div>s
|
||||||
comments = soup.renderContents()
|
comments = soup.renderContents(None)
|
||||||
if not isinstance(comments, unicode):
|
|
||||||
comments = comments.decode('utf-8', 'replace')
|
|
||||||
|
|
||||||
# Convert \n\n to <p>s
|
# Convert \n\n to <p>s
|
||||||
if re.search('\n\n', comments):
|
if re.search('\n\n', comments):
|
||||||
@ -3883,7 +3881,7 @@ class EPUB_MOBI(CatalogPlugin):
|
|||||||
pTag.insert(0,p)
|
pTag.insert(0,p)
|
||||||
soup.insert(tsc,pTag)
|
soup.insert(tsc,pTag)
|
||||||
tsc += 1
|
tsc += 1
|
||||||
comments = soup.renderContents()
|
comments = soup.renderContents(None)
|
||||||
|
|
||||||
# Convert solo returns to <br />
|
# Convert solo returns to <br />
|
||||||
comments = re.sub('[\r\n]','<br />', comments)
|
comments = re.sub('[\r\n]','<br />', comments)
|
||||||
|
@ -313,3 +313,9 @@ I want some feature added to |app|. What can I do?
|
|||||||
You have two choices:
|
You have two choices:
|
||||||
1. Create a patch by hacking on |app| and send it to me for review and inclusion. See `Development <http://calibre-ebook.com/get-involved>`_.
|
1. Create a patch by hacking on |app| and send it to me for review and inclusion. See `Development <http://calibre-ebook.com/get-involved>`_.
|
||||||
2. `Open a ticket <http://bugs.calibre-ebook.com/newticket>`_ (you have to register and login first) and hopefully I will find the time to implement your feature.
|
2. `Open a ticket <http://bugs.calibre-ebook.com/newticket>`_ (you have to register and login first) and hopefully I will find the time to implement your feature.
|
||||||
|
|
||||||
|
Can I include |app| on a CD to be distributed with my product/magazine?
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|app| is licensed under the GNU General Public License v3 (an open source license). This means that you are free to redistribute |app| as long as you make the source code available. So if you want to put |app| on a CD with your product, you must also put the |app| source code on the CD. The source code is available for download `here <http://code.google.com/p/calibre-ebook/downloads/list>`_.
|
||||||
|
|
||||||
|
|
||||||
|
@ -6492,7 +6492,7 @@ msgstr ""
|
|||||||
" <ol>\n"
|
" <ol>\n"
|
||||||
" <li>Отключите устройство. Дождитесь завершения создания "
|
" <li>Отключите устройство. Дождитесь завершения создания "
|
||||||
"базы данных (т.е. дождитесь ее готовности к использованию). Подключите "
|
"базы данных (т.е. дождитесь ее готовности к использованию). Подключите "
|
||||||
"устройство. Теперь должно все заработать в приложении %(apps)s. Если этого "
|
"устройство. Теперь должно все заработать в приложении %(app)s. Если этого "
|
||||||
"не произошло, то переходите к следующему пункту.</li>\n"
|
"не произошло, то переходите к следующему пункту.</li>\n"
|
||||||
" <li>Закройте %(app)s приложение. Найдите файл media.xml в "
|
" <li>Закройте %(app)s приложение. Найдите файл media.xml в "
|
||||||
"основной памяти устройства. Удалите его. Отключите устройство. Подождите "
|
"основной памяти устройства. Удалите его. Отключите устройство. Подождите "
|
||||||
|
@ -135,7 +135,7 @@ def option_parser():
|
|||||||
'to SMTP server.'))
|
'to SMTP server.'))
|
||||||
r=parser.add_option_group('SMTP RELAY',
|
r=parser.add_option_group('SMTP RELAY',
|
||||||
'Options to use an SMTP relay server to send mail. '
|
'Options to use an SMTP relay server to send mail. '
|
||||||
'%prog will try to send the email directly unless --relay is '
|
'calibre will try to send the email directly unless --relay is '
|
||||||
'specified.').add_option
|
'specified.').add_option
|
||||||
r('-r', '--relay', help=('An SMTP relay server to use to send mail.'))
|
r('-r', '--relay', help=('An SMTP relay server to use to send mail.'))
|
||||||
r('-p', '--port', default=-1,
|
r('-p', '--port', default=-1,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user