merging 0.6.24

This commit is contained in:
James Ralston 2010-02-21 10:37:59 -08:00
commit ba051d2480
47 changed files with 17448 additions and 7002 deletions

View File

@ -4,6 +4,80 @@
# for important features/bug fixes. # for important features/bug fixes.
# Also, each release can have new and improved recipes. # Also, each release can have new and improved recipes.
- version: 0.6.42
date: 2010-02-20
bug fixes:
- title: "Fix regression that broke catalog generation from the Graphical User Interface in 0.6.41"
- title: "Fix right edge of comics like Dilbert and xkcd getting cut off on the SONY reader. More generally, take page margins into account when rescaling images to fit in the selected output profile."
- version: 0.6.41
date: 2010-02-19
new features:
- title: "Make calibre timezone aware. This required lots of internal changes, so I may have broken something"
type: major
- title: "Allow editing of metadata in DRMed MOBI files"
type: major
- title: "ebook-convert: Allow passing URLs as argument to --cover"
tickets: [4909]
- title: "OS X/linux driver for EB511"
- title: "ebook-meta: Allow changing of published date"
- title: "Make replacing of files in ZIP archives faster and (hopefully) more robust"
- title: "Speed optimization for viewing large EPUB files"
- title: "Speed up parsing of OPF files"
tickets: [4908]
bug fixes:
- title: "Fix drag and drop of multiple books to OS X dock icon"
tickets: [4849]
- title: "MOBI Output: Encode titles as UTF-8 in the PalmDoc header as well as the EXTH header, since there are apparently MOBI readers that use the title from the PalmDoc header in preference to the title from the EXTH header."
- title: "MOBI Output: Remove soft hyphens as the Kindle doesn't support them."
tickets: [4887]
- title: "Fix Boox main mem and SD card swapped on windows"
- title: "Fix sending large ebook fiels to devices"
tickets: [4896]
- title: "EPUB Output: Strip invalid anchors from NCX TOC as Adobe Digital Editions cries when it sees one"
tickets: [4907]
- title: "EPUB metadata: Don't set title_sort as a file_as attribute, as the brain-dead OPF spec doesn't allow this"
- title: "Make publishing the content server via mDNS a little more robust"
- title: "Content server: Use new exact matching for greater precision when generating OPDS catalogs. Also fix regression that broke rowsing by Tags on Stanza."
- title: "Proper fix for breakage in LRF viewer caused by API change in QGraphicsItem in Qt 4.6"
new recipes:
- title: Various Polish news sources
author: Tomaz Dlugosz
- title: Que Leer, Wired UK
author: Darko Miletic
- title: Kathermini and Ta Nea
author: Pan
- title: Winter Olympics
author: Starson17
improved recipes:
- Wired Magazine
- version: 0.6.40 - version: 0.6.40
date: 2010-02-12 date: 2010-02-12

Binary file not shown.

After

Width:  |  Height:  |  Size: 647 B

View File

@ -0,0 +1,60 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__author__ = 'Mori'
__version__ = 'v. 0.5'
'''
di.com.pl
'''
from calibre.web.feeds.news import BasicNewsRecipe
import re
class DziennikInternautowRecipe(BasicNewsRecipe):
__author__ = 'Mori'
language = 'pl'
title = u'Dziennik Internautow'
publisher = u'Dziennik Internaut\xc3\xb3w Sp. z o.o.'
description =u'Internet w \xc5\xbcyciu i biznesie. Porady, wywiady, interwencje, bezpiecze\xc5\x84stwo w Sieci, technologia.'
max_articles_per_feed = 100
oldest_article = 7
cover_url = 'http://di.com.pl/pic/logo_di_norm.gif'
no_stylesheets = True
remove_javascript = True
encoding = 'utf-8'
extra_css = '''
.fotodesc{font-size: 75%;}
.pub_data{font-size: 75%;}
.fotonews{clear: both; padding-top: 10px; padding-bottom: 10px;}
#pub_foto{font-size: 75%; float: left; padding-right: 10px;}
'''
feeds = [
(u'Dziennik Internautów', u'http://feeds.feedburner.com/glowny-di')
]
keep_only_tags = [
dict(name = 'div', attrs = {'id' : 'pub_head'}),
dict(name = 'div', attrs = {'id' : 'pub_content'})
]
remove_tags = [
dict(name = 'div', attrs = {'class' : 'poradniki_context'}),
dict(name = 'div', attrs = {'class' : 'uniBox'}),
dict(name = 'object', attrs = {}),
dict(name = 'h3', attrs = {})
]
preprocess_regexps = [
(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
[
(r', <a href="http://di.com.pl/komentarze,.*?</div>', lambda match: '</div>'),
(r'<div class="fotonews".*?">', lambda match: '<div class="fotonews">'),
(r'http://di.com.pl/pic/photo/mini/', lambda match: 'http://di.com.pl/pic/photo/oryginal/'),
(r'\s*</', lambda match: '</'),
]
]

View File

@ -0,0 +1,49 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__author__ = 'Mori'
__version__ = 'v. 0.1'
'''
blog.eclicto.pl
'''
from calibre.web.feeds.news import BasicNewsRecipe
import re
class BlogeClictoRecipe(BasicNewsRecipe):
__author__ = 'Mori'
language = 'pl'
title = u'Blog eClicto'
publisher = u'Blog eClicto'
description = u'Blog o e-papierze i e-bookach'
max_articles_per_feed = 100
cover_url = 'http://blog.eclicto.pl/wordpress/wp-content/themes/blog_eclicto/g/logo.gif'
no_stylesheets = True
remove_javascript = True
encoding = 'utf-8'
extra_css = '''
img{float: left; padding-right: 10px; padding-bottom: 5px;}
'''
feeds = [
(u'Blog eClicto', u'http://blog.eclicto.pl/feed/')
]
remove_tags = [
dict(name = 'span', attrs = {'id' : 'tags'})
]
remove_tags_after = [
dict(name = 'div', attrs = {'class' : 'post'})
]
preprocess_regexps = [
(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
[
(r'\s*</', lambda match: '</'),
]
]

View File

@ -1,6 +1,6 @@
#!/usr/bin/env python #!/usr/bin/env python
__license__ = 'GPL v2' __license__ = 'GPL v3'
__copyright__ = u'2010, Tomasz Dlugosz <tomek3d@gmail.com>' __copyright__ = u'2010, Tomasz Dlugosz <tomek3d@gmail.com>'
''' '''
eksiazki.org eksiazki.org
@ -10,14 +10,14 @@ from calibre.web.feeds.news import BasicNewsRecipe
class eksiazki(BasicNewsRecipe): class eksiazki(BasicNewsRecipe):
title = u'eksiazki.org' title = u'eKsiazki.org'
desciption = u'Twoje centrum wiedzy o epapierze i ebookach' desciption = u'Twoje centrum wiedzy o ePapierze i eBookach'
language = 'pl' language = 'pl'
__author__ = u'Tomasz D\u0142ugosz' __author__ = u'Tomasz D\u0142ugosz'
no_stylesheets = True no_stylesheets = True
remove_javascript = True remove_javascript = True
feeds = [(u'wpisy', u'http://www.eksiazki.org/feed/')] feeds = [(u'eKsiazki.org', u'http://www.eksiazki.org/feed/')]
keep_only_tags = [dict(name='div', attrs={'id':'content-body'})] keep_only_tags = [dict(name='div', attrs={'id':'content-body'})]
remove_tags = [ remove_tags = [

View File

@ -0,0 +1,38 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2010, Tomasz Dlugosz <tomek3d@gmail.com>'
'''
fakty.interia.pl
'''
from calibre.web.feeds.news import BasicNewsRecipe
class InteriaFakty(BasicNewsRecipe):
title = u'Interia.pl - Fakty'
language = 'pl'
oldest_article = 7
__author__ = u'Tomasz D\u0142ugosz'
simultaneous_downloads = 2
no_stylesheets = True
remove_javascript = True
max_articles_per_feed = 100
feeds = [(u'Kraj', u'http://kanaly.rss.interia.pl/kraj.xml'),
(u'\u015awiat', u'http://kanaly.rss.interia.pl/swiat.xml'),
(u'Wiadomo\u015bci dnia', u'http://kanaly.rss.interia.pl/fakty.xml'),
(u'Przegl\u0105d prasy', u'http://kanaly.rss.interia.pl/przeglad_prasy.xml'),
(u'Wywiady', u'http://kanaly.rss.interia.pl/wywiady.xml'),
(u'Ciekawostki', u'http://kanaly.rss.interia.pl/ciekawostki.xml')]
keep_only_tags = [dict(name='div', attrs={'id':'article'})]
remove_tags = [
dict(name='div', attrs={'class':'box fontSizeSwitch'}),
dict(name='div', attrs={'class':'clear'}),
dict(name='div', attrs={'class':'embed embedLeft articleEmbedArticleList articleEmbedArticleListTitle'}),
dict(name='span', attrs={'class':'keywords'})]
extra_css = '''
h2 { font-size: 1.2em; }
'''

View File

@ -0,0 +1,71 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2010, Tomasz Dlugosz <tomek3d@gmail.com>'
'''
sport.interia.pl
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
class InteriaSport(BasicNewsRecipe):
title = u'Interia.pl - Sport'
language = 'pl'
oldest_article = 7
__author__ = u'Tomasz D\u0142ugosz'
simultaneous_downloads = 3
no_stylesheets = True
remove_javascript = True
max_articles_per_feed = 100
feeds = [(u'Wydarzenia sportowe', u'http://kanaly.rss.interia.pl/sport.xml'),
(u'Pi\u0142ka no\u017cna', u'http://kanaly.rss.interia.pl/pilka_nozna.xml'),
(u'Siatk\xf3wka', u'http://kanaly.rss.interia.pl/siatkowka.xml'),
(u'Koszyk\xf3wka', u'http://kanaly.rss.interia.pl/koszykowka.xml'),
(u'NBA', u'http://kanaly.rss.interia.pl/nba.xml'),
(u'Kolarstwo', u'http://kanaly.rss.interia.pl/kolarstwo.xml'),
(u'\u017bu\u017cel', u'http://kanaly.rss.interia.pl/zuzel.xml'),
(u'Tenis', u'http://kanaly.rss.interia.pl/tenis.xml')]
keep_only_tags = [dict(name='div', attrs={'id':'article'})]
remove_tags = [dict(name='div', attrs={'class':'object gallery'})]
extra_css = '''
.articleDate {
font-size: 0.5em;
color: black;
}
.articleFoto {
display: block;
font-family: sans;
font-size: 0.5em;
text-indent: 0
color: black;
}
.articleText {
display: block;
margin-bottom: 1em;
margin-left: 0;
margin-right: 0;
margin-top: 1em
color: black;
}
.articleLead {
font-size: 1.2em;
}
'''
preprocess_regexps = [
(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
[
(r'<p><a href.*?</a></p>', lambda match: ''),
# FIXME
#(r'(<div id="newsAddContent">)(.*?)(<a href=".*">)(.*?)(</a>)', lambda match: '\1\2\4'),
(r'<p>(<i>)?<b>(ZOBACZ|CZYTAJ) T.*?</div>', lambda match: '</div>')
]
]

View File

@ -0,0 +1,43 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__author__ = 'Mori'
__version__ = 'v. 0.1'
'''
olgierd.bblog.pl
'''
from calibre.web.feeds.news import BasicNewsRecipe
class LegeArtisRecipe(BasicNewsRecipe):
__author__ = 'Mori'
language = 'pl'
title = u'Lege Artis'
publisher = u'Olgierd Rudak'
description = u'Wszystko, co chcieliby\xc5\x9bcie wiedzie\xc4\x87 o prawie, ale wstydzicie si\xc4\x99 zapyta\xc4\x87'
max_articles_per_feed = 100
no_stylesheets = True
remove_javascript = True
extra_css = '''
img{clear: both;}
'''
feeds = [
(u'Lege Artis', u'http://olgierd.bblog.pl/rss/rss20.xml')
]
keep_only_tags = [
dict(name = 'div', attrs = {'class' : 'post_title'}),
dict(name = 'div', attrs = {'class' : 'post_date'}),
dict(name = 'div', attrs = {'class' : 'post_content'})
]
remove_tags = [
dict(name = 'div', attrs = {'id' : 'bb_tools'}),
dict(name = 'div', attrs = {'class' : 'post_comments'}),
dict(name = 'object', attrs = {})
]

View File

@ -0,0 +1,49 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2010, Tomasz Dlugosz <tomek3d@gmail.com>'
'''
legitymizm.org
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Legitymizm(BasicNewsRecipe):
title = u'Organizacja Monarchist\xf3w Polskich'
language = 'pl'
oldest_article = 7
__author__ = u'Tomasz D\u0142ugosz'
max_articles_per_feed = 100
cover_url = 'http://www.legitymizm.org/img_omp/logo.gif'
no_stylesheets = True
feeds = [(u'Aktualno\u015bci i publicystyka', u'http://www.legitymizm.org/rss.php')]
keep_only_tags = [dict(name='div', attrs={'id':'szeroka_kolumna'})]
remove_tags = [dict(name = 'div', attrs = {'class' : 'koniec_tresci_wlasciwej'}),
dict(name = 'div', attrs = {'class' : 'return'})]
extra_css = '''
body { font-family: Georgia, 'Times New Roman', Times, serif; }
h1 { color: #898981; font-weight: normal; font-size: 26px; letter-spacing: -1px; line-height: 23px; text-align: left; }
h2, h3 { font-weight: normal; font-size: 20px; line-height: 23px; letter-spacing: -1px; margin: 0 0 3px 0; text-align: left; }
#szeroka_kolumna { float: left; line-height: 20px; }
#szeroka_kolumna ul.wykaz { list-style-type: none; margin: 0 0 1.2em 0; padding: 0; }
#szeroka_kolumna ul.wykaz li.wykaz_2 { font-weight: bold; margin: 0.6em 0 0 0; }
#szeroka_kolumna ul.wykaz a { text-decoration: none; }
#szeroka_kolumna ul.wykaz li.wykaz_1, #szeroka_kolumna ul.wykaz li.wykaz_2 ul li { list-style-type: square; color: #898981; text-transform: none; font-weight: normal; padding: 0; }
#szeroka_kolumna ul.wykaz li.wykaz_1 { margin: 0 0 0 1.3em; }
#szeroka_kolumna ul.wykaz li.wykaz_2 ul { margin: 0; padding: 0 0 0 1.3em; }
#szeroka_kolumna h3.autor { background-color: #898981; color: #f9f9f8; margin: -25px 0px 30px 0; text-align: left; padding: 0 0 0 2px; }
.tresc_wlasciwa { border-top: 1px solid #898981; padding: 30px 0px 0px 0px; position: relative; }
#cytat { font-size: 11px; line-height: 19px; font-style: italic; text-align: justify; }
#cytat img { width: 100px; height: 105px; float: right; margin: 3px 0 0 10px; }
.duzy_cytat { padding: 20px 20px 10px 20px; margin: 0 0 1.2em 0; }
#szeroka_kolumna img, #szeroka_kolumna object { padding: 3px; border: 1px solid #898981; }
#szeroka_kolumna img.ilustracja { margin: 0px 10px 0 0; float: left; }
p { margin: 0 0 1.2em 0; }
#cytat p.sentencja { margin: 0; }
#cytat p.sentencja:first-letter { font-size: 44px; line-height: 33px; margin: 0 2px 0 0; font-style: normal; float: left; display: block; }
p.autor { text-transform: uppercase; color: #898981; font-style: normal; text-align: left; }
'''

View File

@ -0,0 +1,26 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2010, Tomasz Dlugosz <tomek3d@gmail.com>'
'''
michalkiewicz.pl
'''
from calibre.web.feeds.news import BasicNewsRecipe
#
class michalkiewicz(BasicNewsRecipe):
title = u'Stanis\u0142aw Michalkiewicz'
desciption = u'Strona autorska * felietony * artyku\u0142y * komentarze'
__author__ = u'Tomasz D\u0142ugosz'
language = 'pl'
oldest_article = 7
max_articles_per_feed = 100
no_stylesheets = True
keep_only_tags = [dict(name='div', attrs={'class':'modul_srodek'})]
remove_tags = [dict(name='ul', attrs={'class':'menu'})]
feeds = [(u'Teksty', u'http://www.michalkiewicz.pl/rss.xml')]

View File

@ -0,0 +1,35 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2010, Tomasz Dlugosz <tomek3d@gmail.com>'
'''
nczas.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
#
class NCzas(BasicNewsRecipe):
title = u'Najwy\u017cszy Czas!'
desciption = u'Najwy\u017cszy Czas!\nwydanie internetowe'
__author__ = u'Tomasz D\u0142ugosz'
language = 'pl'
oldest_article = 7
max_articles_per_feed = 100
no_stylesheets = True
cover_url = 'http://nczas.com/wp-content/themes/default/grafika/logo.png'
keep_only_tags = [dict(name='div', attrs={'class':'trescartykulu'})]
feeds = [(u'Najwy\u017cszy Czas!', u'http://nczas.com/feed/')]
def postprocess_html(self, soup, first):
for tag in soup.findAll(name= 'img', alt=""):
tag.extract()
for item in soup.findAll(align = "right"):
del item['align']
return soup

View File

@ -0,0 +1,56 @@
__license__ = 'GPL v3'
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
'''
www.que-leer.com
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
class QueLeer(BasicNewsRecipe):
title = 'Que Leer'
__author__ = 'Darko Miletic'
description = 'Libros, Novedades en libros, Criticas, Noticias libro'
publisher = 'MC Ediciones, S.A.'
category = 'news, books, criticas, libros'
oldest_article = 7
max_articles_per_feed = 200
no_stylesheets = True
encoding = 'utf-8'
use_embedded_content = False
language = 'es'
remove_empty_feeds = True
masthead_url = 'http://www.que-leer.com/wp-content/themes/queleer/images/backgrounds/que-leer.jpg'
extra_css = ' body{font-family: Arial,sans-serif } img{margin-bottom: 0.4em} '
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
}
preprocess_regexps = [(re.compile(r'<h2 class="izq">.*?</body>', re.DOTALL|re.IGNORECASE),lambda match: '')]
remove_tags = [
dict(attrs={'class':['post-ratings','post-ratings-loading','sociable','toc-anchor']})
,dict(name=['object','embed','iframe','link'])
,dict(attrs={'id':'left'})
]
remove_tags_after = dict(attrs={'class':'sociable'})
remove_attributes = ['width','height']
keep_only_tags = [dict(attrs={'class':'post'})]
feeds = [(u'Articulos', u'http://www.que-leer.com/feed')]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
url = 'http://www.que-leer.com/comprar-libros-tienda-que-leer/libros-recomendados'
fitem = soup.find('a',href=url)
if fitem:
par = fitem.parent
par.extract()
return self.adeify_images(soup)

View File

@ -0,0 +1,74 @@
__license__ = 'GPL v3'
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
'''
www.wired.co.uk
'''
from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe
class Wired_UK(BasicNewsRecipe):
title = 'Wired Magazine - UK edition'
__author__ = 'Darko Miletic'
description = 'Gaming news'
publisher = 'Conde Nast Digital'
category = 'news, games, IT, gadgets'
oldest_article = 32
max_articles_per_feed = 100
no_stylesheets = True
encoding = 'utf-8'
use_embedded_content = False
masthead_url = 'http://www.wired.co.uk/_/media/wired-logo_UK.gif'
language = 'en_GB'
extra_css = ' body{font-family: Palatino,"Palatino Linotype","Times New Roman",Times,serif} img{margin-bottom: 0.8em } .img-descr{font-family: Tahoma,Arial,Helvetica,sans-serif; font-size: 0.6875em; display: block} '
index = 'http://www.wired.co.uk/wired-magazine.aspx'
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
}
keep_only_tags = [dict(name='div', attrs={'class':'article-box'})]
remove_tags = [
dict(name=['object','embed','iframe','link'])
,dict(attrs={'class':['opts','comment','stories']})
]
remove_tags_after = dict(name='div',attrs={'class':'stories'})
remove_attributes = ['height','width']
def parse_index(self):
totalfeeds = []
soup = self.index_to_soup(self.index)
maincontent = soup.find('div',attrs={'class':'main-content'})
mfeed = []
if maincontent:
st = maincontent.find(attrs={'class':'most-wired-box'})
if st:
for itt in st.findAll('a',href=True):
url = 'http://www.wired.co.uk' + itt['href']
title = self.tag_to_string(itt)
description = ''
date = strftime(self.timefmt)
mfeed.append({
'title' :title
,'date' :date
,'url' :url
,'description':description
})
totalfeeds.append(('Articles', mfeed))
return totalfeeds
def get_cover_url(self):
cover_url = None
soup = self.index_to_soup(self.index)
cover_item = soup.find('span', attrs={'class':'cover'})
if cover_item:
cover_url = cover_item.img['src']
return cover_url
def print_version(self, url):
return url + '?page=all'

View File

@ -2,7 +2,7 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
__appname__ = 'calibre' __appname__ = 'calibre'
__version__ = '0.6.40' __version__ = '0.6.42'
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>" __author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
import re import re

View File

@ -139,12 +139,11 @@ class FileTypePlugin(Plugin):
#: to the database #: to the database
on_import = False on_import = False
#: If True, this plugin is run whenever an any2* tool #: If True, this plugin is run just before a conversion
#: is used, on the file passed to the any2* tool.
on_preprocess = False on_preprocess = False
#: If True, this plugin is run after an any2* tool is #: If True, this plugin is run after conversion
#: used, on the final file produced by the tool. #: on the final file produced by the conversion output plugin.
on_postprocess = False on_postprocess = False
type = _('File type') type = _('File type')
@ -249,6 +248,7 @@ class CatalogPlugin(Plugin):
#: dest = 'catalog_title', #: dest = 'catalog_title',
#: help = (_('Title of generated catalog. \nDefault:') + " '" + #: help = (_('Title of generated catalog. \nDefault:') + " '" +
#: '%default' + "'"))] #: '%default' + "'"))]
#: cli_options parsed in library.cli:catalog_option_parser()
cli_options = [] cli_options = []
@ -275,7 +275,8 @@ class CatalogPlugin(Plugin):
def get_output_fields(self, opts): def get_output_fields(self, opts):
# Return a list of requested fields, with opts.sort_by first # Return a list of requested fields, with opts.sort_by first
all_fields = set( all_fields = set(
['author_sort','authors','comments','cover','formats', 'id','isbn','pubdate','publisher','rating', ['author_sort','authors','comments','cover','formats',
'id','isbn','pubdate','publisher','rating',
'series_index','series','size','tags','timestamp', 'series_index','series','size','tags','timestamp',
'title','uuid']) 'title','uuid'])

View File

@ -4,27 +4,21 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>,' \ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>,' \
' and Alex Bramley <a.bramley at gmail.com>.' ' and Alex Bramley <a.bramley at gmail.com>.'
import sys, logging, os, re, shutil, subprocess, uuid import os, shutil, uuid
from shutil import rmtree
from tempfile import mkdtemp from tempfile import mkdtemp
from mimetypes import guess_type as guess_mimetype from mimetypes import guess_type as guess_mimetype
from htmlentitydefs import name2codepoint
from pprint import PrettyPrinter
from BeautifulSoup import BeautifulSoup, NavigableString from BeautifulSoup import BeautifulSoup
from lxml import html, etree from lxml import html
from pychm.chm import CHMFile from pychm.chm import CHMFile
from pychm.chmlib import ( from pychm.chmlib import (
CHM_RESOLVE_SUCCESS, CHM_ENUMERATE_NORMAL, CHM_RESOLVE_SUCCESS, CHM_ENUMERATE_NORMAL,
chm_enumerate, chm_retrieve_object, chm_enumerate,
) )
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
from calibre.utils.config import OptionParser from calibre.utils.config import OptionParser
from calibre.ebooks.metadata import MetaInformation
from calibre.ebooks.metadata.opf2 import OPFCreator, Guide
from calibre.ebooks.metadata.toc import TOC from calibre.ebooks.metadata.toc import TOC
from calibre.ebooks.lrf.html.convert_from import process_file as html_process_file
from calibre.utils.localization import get_lang from calibre.utils.localization import get_lang
from calibre.utils.filenames import ascii_filename from calibre.utils.filenames import ascii_filename
@ -35,17 +29,6 @@ def match_string(s1, s2_already_lowered):
return True return True
return False return False
def check_all_prev_empty(tag):
if tag is None:
return True
if tag.__class__ == NavigableString and not check_empty(tag):
return False
return check_all_prev_empty(tag.previousSibling)
def check_empty(s, rex = re.compile(r'\S')):
return rex.search(s) is None
def option_parser(): def option_parser():
parser = OptionParser(usage=_('%prog [options] mybook.chm')) parser = OptionParser(usage=_('%prog [options] mybook.chm'))
parser.add_option('--output-dir', '-d', default='.', help=_('Output directory. Defaults to current directory'), dest='output') parser.add_option('--output-dir', '-d', default='.', help=_('Output directory. Defaults to current directory'), dest='output')
@ -150,18 +133,18 @@ class CHMReader(CHMFile):
def _reformat(self, data): def _reformat(self, data):
try: try:
html = BeautifulSoup(data) soup = BeautifulSoup(data)
except UnicodeEncodeError: except UnicodeEncodeError:
# hit some strange encoding problems... # hit some strange encoding problems...
print "Unable to parse html for cleaning, leaving it :(" print "Unable to parse html for cleaning, leaving it :("
return data return data
# nuke javascript... # nuke javascript...
[s.extract() for s in html('script')] [s.extract() for s in soup('script')]
# remove forward and back nav bars from the top/bottom of each page # remove forward and back nav bars from the top/bottom of each page
# cos they really fuck with the flow of things and generally waste space # cos they really fuck with the flow of things and generally waste space
# since we can't use [a,b] syntax to select arbitrary items from a list # since we can't use [a,b] syntax to select arbitrary items from a list
# we'll have to do this manually... # we'll have to do this manually...
t = html('table') t = soup('table')
if t: if t:
if (t[0].previousSibling is None if (t[0].previousSibling is None
or t[0].previousSibling.previousSibling is None): or t[0].previousSibling.previousSibling is None):
@ -172,14 +155,8 @@ class CHMReader(CHMFile):
# for some very odd reason each page's content appears to be in a table # for some very odd reason each page's content appears to be in a table
# too. and this table has sub-tables for random asides... grr. # too. and this table has sub-tables for random asides... grr.
# remove br at top of page if present after nav bars removed
br = html('br')
if br:
if check_all_prev_empty(br[0].previousSibling):
br[0].extract()
# some images seem to be broken in some chm's :/ # some images seem to be broken in some chm's :/
for img in html('img'): for img in soup('img'):
try: try:
# some are supposedly "relative"... lies. # some are supposedly "relative"... lies.
while img['src'].startswith('../'): img['src'] = img['src'][3:] while img['src'].startswith('../'): img['src'] = img['src'][3:]
@ -189,7 +166,7 @@ class CHMReader(CHMFile):
# and some don't even have a src= ?! # and some don't even have a src= ?!
pass pass
# now give back some pretty html. # now give back some pretty html.
return html.prettify() return soup.prettify()
def Contents(self): def Contents(self):
if self._contents is not None: if self._contents is not None:
@ -257,7 +234,6 @@ class CHMInput(InputFormatPlugin):
metadata = get_metadata_(tdir) metadata = get_metadata_(tdir)
cwd = os.getcwdu()
odi = options.debug_pipeline odi = options.debug_pipeline
options.debug_pipeline = None options.debug_pipeline = None
# try a custom conversion: # try a custom conversion:
@ -281,11 +257,7 @@ class CHMInput(InputFormatPlugin):
def _create_oebbook(self, hhcpath, basedir, opts, log, mi): def _create_oebbook(self, hhcpath, basedir, opts, log, mi):
from calibre.ebooks.conversion.plumber import create_oebbook from calibre.ebooks.conversion.plumber import create_oebbook
from calibre.ebooks.oeb.base import DirContainer, \ from calibre.ebooks.oeb.base import DirContainer
rewrite_links, urlnormalize, urldefrag, BINARY_MIME, OEB_STYLES, \
xpath
from calibre import guess_type
import cssutils
oeb = create_oebbook(log, None, opts, self, oeb = create_oebbook(log, None, opts, self,
encoding=opts.input_encoding, populate=False) encoding=opts.input_encoding, populate=False)
self.oeb = oeb self.oeb = oeb
@ -305,10 +277,10 @@ class CHMInput(InputFormatPlugin):
metadata.add('language', get_lang()) metadata.add('language', get_lang())
if not metadata.creator: if not metadata.creator:
oeb.logger.warn('Creator not specified') oeb.logger.warn('Creator not specified')
metadata.add('creator', self.oeb.translate(__('Unknown'))) metadata.add('creator', _('Unknown'))
if not metadata.title: if not metadata.title:
oeb.logger.warn('Title not specified') oeb.logger.warn('Title not specified')
metadata.add('title', self.oeb.translate(__('Unknown'))) metadata.add('title', _('Unknown'))
bookid = str(uuid.uuid4()) bookid = str(uuid.uuid4())
metadata.add('identifier', bookid, id='uuid_id', scheme='uuid') metadata.add('identifier', bookid, id='uuid_id', scheme='uuid')

View File

@ -233,14 +233,18 @@ def create_option_parser(args, log):
return parser, plumber return parser, plumber
def abspath(x):
if x.startswith('http:') or x.startswith('https:'):
return x
return os.path.abspath(os.path.expanduser(x))
def main(args=sys.argv): def main(args=sys.argv):
log = Log() log = Log()
parser, plumber = create_option_parser(args, log) parser, plumber = create_option_parser(args, log)
opts = parser.parse_args(args)[0] opts = parser.parse_args(args)[0]
y = lambda q : os.path.abspath(os.path.expanduser(q))
for x in ('read_metadata_from_opf', 'cover'): for x in ('read_metadata_from_opf', 'cover'):
if getattr(opts, x, None) is not None: if getattr(opts, x, None) is not None:
setattr(opts, x, y(getattr(opts, x))) setattr(opts, x, abspath(getattr(opts, x)))
recommendations = [(n.dest, getattr(opts, n.dest), recommendations = [(n.dest, getattr(opts, n.dest),
OptionRecommendation.HIGH) \ OptionRecommendation.HIGH) \
for n in parser.options_iter() for n in parser.options_iter()

View File

@ -424,7 +424,7 @@ OptionRecommendation(name='author_sort',
OptionRecommendation(name='cover', OptionRecommendation(name='cover',
recommended_value=None, level=OptionRecommendation.LOW, recommended_value=None, level=OptionRecommendation.LOW,
help=_('Set the cover to the specified file.')), help=_('Set the cover to the specified file or URL')),
OptionRecommendation(name='comments', OptionRecommendation(name='comments',
recommended_value=None, level=OptionRecommendation.LOW, recommended_value=None, level=OptionRecommendation.LOW,
@ -638,6 +638,20 @@ OptionRecommendation(name='timestamp',
continue continue
setattr(mi, x, val) setattr(mi, x, val)
def download_cover(self, url):
from calibre import browser
from PIL import Image
from cStringIO import StringIO
from calibre.ptempfile import PersistentTemporaryFile
self.log('Downloading cover from %r'%url)
br = browser()
raw = br.open_novisit(url).read()
buf = StringIO(raw)
pt = PersistentTemporaryFile('.jpg')
pt.close()
img = Image.open(buf)
img.convert('RGB').save(pt.name)
return pt.name
def read_user_metadata(self): def read_user_metadata(self):
''' '''
@ -655,6 +669,8 @@ OptionRecommendation(name='timestamp',
mi = MetaInformation(opf) mi = MetaInformation(opf)
self.opts_to_mi(mi) self.opts_to_mi(mi)
if mi.cover: if mi.cover:
if mi.cover.startswith('http:') or mi.cover.startswith('https:'):
mi.cover = self.download_cover(mi.cover)
mi.cover_data = ('', open(mi.cover, 'rb').read()) mi.cover_data = ('', open(mi.cover, 'rb').read())
mi.cover = None mi.cover = None
self.user_metadata = mi self.user_metadata = mi
@ -770,6 +786,7 @@ OptionRecommendation(name='timestamp',
self.oeb = create_oebbook(self.log, self.oeb, self.opts, self.oeb = create_oebbook(self.log, self.oeb, self.opts,
self.input_plugin) self.input_plugin)
self.input_plugin.postprocess_book(self.oeb, self.opts, self.log) self.input_plugin.postprocess_book(self.oeb, self.opts, self.log)
self.opts.is_image_collection = self.input_plugin.is_image_collection
pr = CompositeProgressReporter(0.34, 0.67, self.ui_reporter) pr = CompositeProgressReporter(0.34, 0.67, self.ui_reporter)
self.flush() self.flush()
if self.opts.debug_pipeline is not None: if self.opts.debug_pipeline is not None:

View File

@ -85,6 +85,8 @@ class StreamSlicer(object):
self._stream.truncate(value) self._stream.truncate(value)
class MetadataUpdater(object): class MetadataUpdater(object):
DRM_KEY_SIZE = 48
def __init__(self, stream): def __init__(self, stream):
self.stream = stream self.stream = stream
data = self.data = StreamSlicer(stream) data = self.data = StreamSlicer(stream)
@ -105,6 +107,13 @@ class MetadataUpdater(object):
self.timestamp = None self.timestamp = None
self.pdbrecords = self.get_pdbrecords() self.pdbrecords = self.get_pdbrecords()
self.drm_block = None
if self.encryption_type != 0:
if self.have_exth:
self.drm_block = self.fetchDRMdata()
else:
raise MobiError('Unable to set metadata on DRM file without EXTH header')
self.original_exth_records = {} self.original_exth_records = {}
if not have_exth: if not have_exth:
self.create_exth() self.create_exth()
@ -112,6 +121,16 @@ class MetadataUpdater(object):
# Fetch timestamp, cover_record, thumbnail_record # Fetch timestamp, cover_record, thumbnail_record
self.fetchEXTHFields() self.fetchEXTHFields()
def fetchDRMdata(self):
''' Fetch the DRM keys '''
drm_offset = int(unpack('>I', self.record0[0xa8:0xac])[0])
self.drm_key_count = int(unpack('>I', self.record0[0xac:0xb0])[0])
drm_keys = ''
for x in range(self.drm_key_count):
base_addr = drm_offset + (x * self.DRM_KEY_SIZE)
drm_keys += self.record0[base_addr:base_addr + self.DRM_KEY_SIZE]
return drm_keys
def fetchEXTHFields(self): def fetchEXTHFields(self):
stream = self.stream stream = self.stream
record0 = self.record0 record0 = self.record0
@ -186,7 +205,8 @@ class MetadataUpdater(object):
def create_exth(self, new_title=None, exth=None): def create_exth(self, new_title=None, exth=None):
# Add an EXTH block to record 0, rewrite the stream # Add an EXTH block to record 0, rewrite the stream
# self.hexdump(self.record0) if isinstance(new_title, unicode):
new_title = new_title.encode(self.codec, 'replace')
# Fetch the existing title # Fetch the existing title
title_offset, = unpack('>L', self.record0[0x54:0x58]) title_offset, = unpack('>L', self.record0[0x54:0x58])
@ -210,8 +230,14 @@ class MetadataUpdater(object):
exth = ['EXTH', pack('>II', 12, 0), pad] exth = ['EXTH', pack('>II', 12, 0), pad]
exth = ''.join(exth) exth = ''.join(exth)
# Update title_offset, title_len if new_title # Update drm_offset(0xa8), title_offset(0x54)
if self.encryption_type != 0:
self.record0[0xa8:0xac] = pack('>L', 0x10 + mobi_header_length + len(exth))
self.record0[0xb0:0xb4] = pack('>L', len(self.drm_block))
self.record0[0x54:0x58] = pack('>L', 0x10 + mobi_header_length + len(exth) + len(self.drm_block))
else:
self.record0[0x54:0x58] = pack('>L', 0x10 + mobi_header_length + len(exth)) self.record0[0x54:0x58] = pack('>L', 0x10 + mobi_header_length + len(exth))
if new_title: if new_title:
self.record0[0x58:0x5c] = pack('>L', len(new_title)) self.record0[0x58:0x5c] = pack('>L', len(new_title))
@ -219,20 +245,15 @@ class MetadataUpdater(object):
new_record0 = StringIO() new_record0 = StringIO()
new_record0.write(self.record0[:0x10 + mobi_header_length]) new_record0.write(self.record0[:0x10 + mobi_header_length])
new_record0.write(exth) new_record0.write(exth)
if new_title: if self.encryption_type != 0:
#new_record0.write(new_title.encode(self.codec, 'replace')) new_record0.write(self.drm_block)
new_title = (new_title or _('Unknown')).encode(self.codec, 'replace') new_record0.write(new_title if new_title else title_in_file)
new_record0.write(new_title)
else:
new_record0.write(title_in_file)
# Pad to a 4-byte boundary # Pad to a 4-byte boundary
trail = len(new_record0.getvalue()) % 4 trail = len(new_record0.getvalue()) % 4
pad = '\0' * (4 - trail) # Always pad w/ at least 1 byte pad = '\0' * (4 - trail) # Always pad w/ at least 1 byte
new_record0.write(pad) new_record0.write(pad)
#self.hexdump(new_record0.getvalue())
# Rebuild the stream, update the pdbrecords pointers # Rebuild the stream, update the pdbrecords pointers
self.patchSection(0,new_record0.getvalue()) self.patchSection(0,new_record0.getvalue())
@ -342,10 +363,7 @@ class MetadataUpdater(object):
recs.append((202, pack('>I', self.thumbnail_rindex))) recs.append((202, pack('>I', self.thumbnail_rindex)))
pop_exth_record(202) pop_exth_record(202)
if getattr(self, 'encryption_type', -1) != 0: # Restore any original EXTH fields that weren't updated
raise MobiError('Setting metadata in DRMed MOBI files is not supported.')
# Restore any original EXTH fields that weren't modified/updated
for id in sorted(self.original_exth_records): for id in sorted(self.original_exth_records):
recs.append((id, self.original_exth_records[id])) recs.append((id, self.original_exth_records[id]))
recs = sorted(recs, key=lambda x:(x[0],x[0])) recs = sorted(recs, key=lambda x:(x[0],x[0]))

View File

@ -1376,7 +1376,7 @@ class MobiWriter(object):
self._text_length, self._text_length,
self._text_nrecords-1, RECORD_SIZE, 0, 0)) # 0 - 15 (0x0 - 0xf) self._text_nrecords-1, RECORD_SIZE, 0, 0)) # 0 - 15 (0x0 - 0xf)
uid = random.randint(0, 0xffffffff) uid = random.randint(0, 0xffffffff)
title = str(metadata.title[0]) title = unicode(metadata.title[0]).encode('utf-8')
# The MOBI Header # The MOBI Header
# 0x0 - 0x3 # 0x0 - 0x3

View File

@ -29,6 +29,9 @@ class RescaleImages(object):
page_width, page_height = self.opts.dest.width, self.opts.dest.height page_width, page_height = self.opts.dest.width, self.opts.dest.height
if not self.opts.is_image_collection:
page_width -= (self.opts.margin_left + self.opts.margin_right) * self.opts.dest.dpi/72.
page_height -= (self.opts.margin_top + self.opts.margin_bottom) * self.opts.dest.dpi/72.
for item in self.oeb.manifest: for item in self.oeb.manifest:
if item.media_type.startswith('image'): if item.media_type.startswith('image'):
raw = item.data raw = item.data
@ -53,7 +56,8 @@ class RescaleImages(object):
scaled, new_width, new_height = fit_image(width, height, scaled, new_width, new_height = fit_image(width, height,
page_width, page_height) page_width, page_height)
if scaled: if scaled:
self.log('Rescaling image', item.href) self.log('Rescaling image from %dx%d to %dx%d'%(
width, height, new_width, new_height), item.href)
if qt: if qt:
img = img.scaled(new_width, new_height, img = img.scaled(new_width, new_height,
Qt.IgnoreAspectRatio, Qt.SmoothTransformation) Qt.IgnoreAspectRatio, Qt.SmoothTransformation)

View File

@ -2,9 +2,11 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>' __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
""" The GUI """ """ The GUI """
import os import os
from threading import RLock
from PyQt4.QtCore import QVariant, QFileInfo, QObject, SIGNAL, QBuffer, Qt, QSize, \ from PyQt4.QtCore import QVariant, QFileInfo, QObject, SIGNAL, QBuffer, Qt, QSize, \
QByteArray, QTranslator, QCoreApplication, QThread, \ QByteArray, QTranslator, QCoreApplication, QThread, \
QEvent QEvent, QTimer, pyqtSignal
from PyQt4.QtGui import QFileDialog, QMessageBox, QPixmap, QFileIconProvider, \ from PyQt4.QtGui import QFileDialog, QMessageBox, QPixmap, QFileIconProvider, \
QIcon, QTableView, QApplication, QDialog, QPushButton QIcon, QTableView, QApplication, QDialog, QPushButton
@ -234,16 +236,17 @@ def human_readable(size):
return size + " " + suffix return size + " " + suffix
class Dispatcher(QObject): class Dispatcher(QObject):
'''Convenience class to ensure that a function call always happens in the GUI thread''' '''Convenience class to ensure that a function call always happens in the
SIGNAL = SIGNAL('dispatcher(PyQt_PyObject,PyQt_PyObject)') thread the reciver was created in.'''
dispatch_signal = pyqtSignal(object, object)
def __init__(self, func): def __init__(self, func):
QObject.__init__(self) QObject.__init__(self)
self.func = func self.func = func
self.connect(self, self.SIGNAL, self.dispatch, Qt.QueuedConnection) self.dispatch_signal.connect(self.dispatch, type=Qt.QueuedConnection)
def __call__(self, *args, **kwargs): def __call__(self, *args, **kwargs):
self.emit(self.SIGNAL, args, kwargs) self.dispatch_signal.emit(args, kwargs)
def dispatch(self, args, kwargs): def dispatch(self, args, kwargs):
self.func(*args, **kwargs) self.func(*args, **kwargs)
@ -533,6 +536,8 @@ class Application(QApplication):
self._translator = None self._translator = None
self.load_translations() self.load_translations()
qt_app = self qt_app = self
self._file_open_paths = []
self._file_open_lock = RLock()
if islinux: if islinux:
self.setStyleSheet(''' self.setStyleSheet('''
@ -545,6 +550,12 @@ class Application(QApplication):
} }
''') ''')
def _send_file_open_events(self):
with self._file_open_lock:
if self._file_open_paths:
self.file_event_hook(self._file_open_paths)
self._file_open_paths = []
def load_translations(self): def load_translations(self):
if self._translator is not None: if self._translator is not None:
@ -557,7 +568,9 @@ class Application(QApplication):
if callable(self.file_event_hook) and e.type() == QEvent.FileOpen: if callable(self.file_event_hook) and e.type() == QEvent.FileOpen:
path = unicode(e.file()) path = unicode(e.file())
if os.access(path, os.R_OK): if os.access(path, os.R_OK):
self.file_event_hook(path) with self._file_open_lock:
self._file_open_paths.append(path)
QTimer.singleShot(1000, self._send_file_open_events)
return True return True
else: else:
return QApplication.event(self, e) return QApplication.event(self, e)

View File

@ -601,6 +601,8 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
if dynamic.get('tag_view_visible', False): if dynamic.get('tag_view_visible', False):
self.status_bar.tag_view_button.toggle() self.status_bar.tag_view_button.toggle()
self._add_filesystem_book = Dispatcher(self.__add_filesystem_book)
def resizeEvent(self, ev): def resizeEvent(self, ev):
MainWindow.resizeEvent(self, ev) MainWindow.resizeEvent(self, ev)
@ -988,15 +990,24 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
self.cover_cache.refresh([cid]) self.cover_cache.refresh([cid])
self.library_view.model().current_changed(current_idx, current_idx) self.library_view.model().current_changed(current_idx, current_idx)
def add_filesystem_book(self, path, allow_device=True): def __add_filesystem_book(self, paths, allow_device=True):
if os.access(path, os.R_OK): print 222, paths
books = [os.path.abspath(path)] if isinstance(paths, basestring):
paths = [paths]
books = [path for path in map(os.path.abspath, paths) if os.access(path,
os.R_OK)]
if books:
to_device = allow_device and self.stack.currentIndex() != 0 to_device = allow_device and self.stack.currentIndex() != 0
self._add_books(books, to_device) self._add_books(books, to_device)
if to_device: if to_device:
self.status_bar.showMessage(\ self.status_bar.showMessage(\
_('Uploading books to device.'), 2000) _('Uploading books to device.'), 2000)
def add_filesystem_book(self, paths, allow_device=True):
self._add_filesystem_book(paths, allow_device=allow_device)
def add_books(self, checked): def add_books(self, checked):
''' '''
Add books from the local filesystem to either the library or the device. Add books from the local filesystem to either the library or the device.
@ -1042,20 +1053,22 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
infos, on_card=on_card) infos, on_card=on_card)
self.status_bar.showMessage( self.status_bar.showMessage(
_('Uploading books to device.'), 2000) _('Uploading books to device.'), 2000)
if self._adder.number_of_books_added > 0: if getattr(self._adder, 'number_of_books_added', 0) > 0:
self.library_view.model().books_added(self._adder.number_of_books_added) self.library_view.model().books_added(self._adder.number_of_books_added)
if hasattr(self, 'db_images'): if hasattr(self, 'db_images'):
self.db_images.reset() self.db_images.reset()
if self._adder.critical: if getattr(self._adder, 'critical', None):
det_msg = [] det_msg = []
for name, log in self._adder.critical.items(): for name, log in self._adder.critical.items():
if isinstance(name, str): if isinstance(name, str):
name = name.decode(filesystem_encoding, 'replace') name = name.decode(filesystem_encoding, 'replace')
det_msg.append(name+'\n'+log) det_msg.append(name+'\n'+log)
warning_dialog(self, _('Failed to read metadata'), warning_dialog(self, _('Failed to read metadata'),
_('Failed to read metadata from the following')+':', _('Failed to read metadata from the following')+':',
det_msg='\n\n'.join(det_msg), show=True) det_msg='\n\n'.join(det_msg), show=True)
if hasattr(self._adder, 'cleanup'):
self._adder.cleanup() self._adder.cleanup()
self._adder = None self._adder = None

View File

@ -1,4 +1,4 @@
import datetime, htmlentitydefs, os, re, shutil, time import datetime, htmlentitydefs, os, re, shutil
from collections import namedtuple from collections import namedtuple
from copy import deepcopy from copy import deepcopy
@ -11,7 +11,7 @@ from calibre.customize.conversion import OptionRecommendation, DummyReporter
from calibre.ebooks.BeautifulSoup import BeautifulSoup, BeautifulStoneSoup, Tag, NavigableString from calibre.ebooks.BeautifulSoup import BeautifulSoup, BeautifulStoneSoup, Tag, NavigableString
from calibre.ptempfile import PersistentTemporaryDirectory from calibre.ptempfile import PersistentTemporaryDirectory
from calibre.utils.logging import Log from calibre.utils.logging import Log
from calibre.utils.date import isoformat from calibre.utils.date import isoformat, now as nowf
FIELDS = ['all', 'author_sort', 'authors', 'comments', FIELDS = ['all', 'author_sort', 'authors', 'comments',
'cover', 'formats', 'id', 'isbn', 'pubdate', 'publisher', 'rating', 'cover', 'formats', 'id', 'isbn', 'pubdate', 'publisher', 'rating',
@ -21,7 +21,7 @@ FIELDS = ['all', 'author_sort', 'authors', 'comments',
class CSV_XML(CatalogPlugin): class CSV_XML(CatalogPlugin):
'CSV/XML catalog generator' 'CSV/XML catalog generator'
Option = namedtuple('Option', 'option, default, dest, help') Option = namedtuple('Option', 'option, default, dest, action, help')
name = 'Catalog_CSV_XML' name = 'Catalog_CSV_XML'
description = 'CSV/XML catalog generator' description = 'CSV/XML catalog generator'
@ -34,6 +34,7 @@ class CSV_XML(CatalogPlugin):
Option('--fields', Option('--fields',
default = 'all', default = 'all',
dest = 'fields', dest = 'fields',
action = None,
help = _('The fields to output when cataloging books in the ' help = _('The fields to output when cataloging books in the '
'database. Should be a comma-separated list of fields.\n' 'database. Should be a comma-separated list of fields.\n'
'Available fields: %s.\n' 'Available fields: %s.\n'
@ -43,6 +44,7 @@ class CSV_XML(CatalogPlugin):
Option('--sort-by', Option('--sort-by',
default = 'id', default = 'id',
dest = 'sort_by', dest = 'sort_by',
action = None,
help = _('Output field to sort on.\n' help = _('Output field to sort on.\n'
'Available fields: author_sort, id, rating, size, timestamp, title.\n' 'Available fields: author_sort, id, rating, size, timestamp, title.\n'
"Default: '%default'\n" "Default: '%default'\n"
@ -241,7 +243,7 @@ class CSV_XML(CatalogPlugin):
class EPUB_MOBI(CatalogPlugin): class EPUB_MOBI(CatalogPlugin):
'ePub catalog generator' 'ePub catalog generator'
Option = namedtuple('Option', 'option, default, dest, help') Option = namedtuple('Option', 'option, default, dest, action, help')
name = 'Catalog_EPUB_MOBI' name = 'Catalog_EPUB_MOBI'
description = 'EPUB/MOBI catalog generator' description = 'EPUB/MOBI catalog generator'
@ -254,12 +256,14 @@ class EPUB_MOBI(CatalogPlugin):
cli_options = [Option('--catalog-title', cli_options = [Option('--catalog-title',
default = 'My Books', default = 'My Books',
dest = 'catalog_title', dest = 'catalog_title',
action = None,
help = _('Title of generated catalog used as title in metadata.\n' help = _('Title of generated catalog used as title in metadata.\n'
"Default: '%default'\n" "Default: '%default'\n"
"Applies to: ePub, MOBI output formats")), "Applies to: ePub, MOBI output formats")),
Option('--debug-pipeline', Option('--debug-pipeline',
default=None, default=None,
dest='debug_pipeline', dest='debug_pipeline',
action = None,
help=_("Save the output from different stages of the conversion " help=_("Save the output from different stages of the conversion "
"pipeline to the specified " "pipeline to the specified "
"directory. Useful if you are unsure at which stage " "directory. Useful if you are unsure at which stage "
@ -269,48 +273,56 @@ class EPUB_MOBI(CatalogPlugin):
Option('--exclude-genre', Option('--exclude-genre',
default='\[[\w ]*\]', default='\[[\w ]*\]',
dest='exclude_genre', dest='exclude_genre',
action = None,
help=_("Regex describing tags to exclude as genres.\n" "Default: '%default' excludes bracketed tags, e.g. '[<tag>]'\n" help=_("Regex describing tags to exclude as genres.\n" "Default: '%default' excludes bracketed tags, e.g. '[<tag>]'\n"
"Applies to: ePub, MOBI output formats")), "Applies to: ePub, MOBI output formats")),
Option('--exclude-tags', Option('--exclude-tags',
default=('~,'+_('Catalog')), default=('~,'+_('Catalog')),
dest='exclude_tags', dest='exclude_tags',
action = None,
help=_("Comma-separated list of tag words indicating book should be excluded from output. Case-insensitive.\n" help=_("Comma-separated list of tag words indicating book should be excluded from output. Case-insensitive.\n"
"--exclude-tags=skip will match 'skip this book' and 'Skip will like this'.\n" "--exclude-tags=skip will match 'skip this book' and 'Skip will like this'.\n"
"Default: '%default'\n" "Default: '%default'\n"
"Applies to: ePub, MOBI output formats")), "Applies to: ePub, MOBI output formats")),
Option('--generate-titles', Option('--generate-titles',
default=True, default=False,
dest='generate_titles', dest='generate_titles',
action = 'store_true',
help=_("Include 'Titles' section in catalog.\n" help=_("Include 'Titles' section in catalog.\n"
"Default: '%default'\n" "Default: '%default'\n"
"Applies to: ePub, MOBI output formats")), "Applies to: ePub, MOBI output formats")),
Option('--generate-recently-added', Option('--generate-recently-added',
default=True, default=False,
dest='generate_recently_added', dest='generate_recently_added',
action = 'store_true',
help=_("Include 'Recently Added' section in catalog.\n" help=_("Include 'Recently Added' section in catalog.\n"
"Default: '%default'\n" "Default: '%default'\n"
"Applies to: ePub, MOBI output formats")), "Applies to: ePub, MOBI output formats")),
Option('--note-tag', Option('--note-tag',
default='*', default='*',
dest='note_tag', dest='note_tag',
action = None,
help=_("Tag prefix for user notes, e.g. '*Jeff might enjoy reading this'.\n" help=_("Tag prefix for user notes, e.g. '*Jeff might enjoy reading this'.\n"
"Default: '%default'\n" "Default: '%default'\n"
"Applies to: ePub, MOBI output formats")), "Applies to: ePub, MOBI output formats")),
Option('--numbers-as-text', Option('--numbers-as-text',
default=False, default=False,
dest='numbers_as_text', dest='numbers_as_text',
action = None,
help=_("Sort titles with leading numbers as text, e.g.,\n'2001: A Space Odyssey' sorts as \n'Two Thousand One: A Space Odyssey'.\n" help=_("Sort titles with leading numbers as text, e.g.,\n'2001: A Space Odyssey' sorts as \n'Two Thousand One: A Space Odyssey'.\n"
"Default: '%default'\n" "Default: '%default'\n"
"Applies to: ePub, MOBI output formats")), "Applies to: ePub, MOBI output formats")),
Option('--output-profile', Option('--output-profile',
default=None, default=None,
dest='output_profile', dest='output_profile',
action = None,
help=_("Specifies the output profile. In some cases, an output profile is required to optimize the catalog for the device. For example, 'kindle' or 'kindle_dx' creates a structured Table of Contents with Sections and Articles.\n" help=_("Specifies the output profile. In some cases, an output profile is required to optimize the catalog for the device. For example, 'kindle' or 'kindle_dx' creates a structured Table of Contents with Sections and Articles.\n"
"Default: '%default'\n" "Default: '%default'\n"
"Applies to: ePub, MOBI output formats")), "Applies to: ePub, MOBI output formats")),
Option('--read-tag', Option('--read-tag',
default='+', default='+',
dest='read_tag', dest='read_tag',
action = None,
help=_("Tag indicating book has been read.\n" "Default: '%default'\n" help=_("Tag indicating book has been read.\n" "Default: '%default'\n"
"Applies to: ePub, MOBI output formats")), "Applies to: ePub, MOBI output formats")),
] ]
@ -1749,9 +1761,8 @@ class EPUB_MOBI(CatalogPlugin):
book['title_sort'] = self.generateSortTitle(book['title']) book['title_sort'] = self.generateSortTitle(book['title'])
self.booksByDateRange = sorted(nspt, key=lambda x:(x['timestamp'], x['timestamp']),reverse=True) self.booksByDateRange = sorted(nspt, key=lambda x:(x['timestamp'], x['timestamp']),reverse=True)
today = datetime.datetime.now()
date_range_list = [] date_range_list = []
today_time = datetime.datetime(today.year, today.month, today.day) today_time = nowf().replace(hour=23, minute=59, second=59)
books_added_in_date_range = False books_added_in_date_range = False
for (i, date) in enumerate(self.DATE_RANGE): for (i, date) in enumerate(self.DATE_RANGE):
date_range_limit = self.DATE_RANGE[i] date_range_limit = self.DATE_RANGE[i]
@ -1759,14 +1770,16 @@ class EPUB_MOBI(CatalogPlugin):
date_range = '%d to %d days ago' % (self.DATE_RANGE[i-1], self.DATE_RANGE[i]) date_range = '%d to %d days ago' % (self.DATE_RANGE[i-1], self.DATE_RANGE[i])
else: else:
date_range = 'Last %d days' % (self.DATE_RANGE[i]) date_range = 'Last %d days' % (self.DATE_RANGE[i])
for book in self.booksByDateRange: for book in self.booksByDateRange:
book_time = datetime.datetime(book['timestamp'].year, book['timestamp'].month, book['timestamp'].day) book_time = book['timestamp']
if (today_time-book_time).days <= date_range_limit: delta = today_time-book_time
#print "generateHTMLByDateAdded: %s added %d days ago" % (book['title'], (today_time-book_time).days) if delta.days <= date_range_limit:
date_range_list.append(book) date_range_list.append(book)
books_added_in_date_range = True books_added_in_date_range = True
else: else:
break break
dtc = add_books_to_HTML_by_date_range(date_range_list, date_range, dtc) dtc = add_books_to_HTML_by_date_range(date_range_list, date_range, dtc)
date_range_list = [book] date_range_list = [book]
@ -3412,13 +3425,12 @@ class EPUB_MOBI(CatalogPlugin):
def run(self, path_to_output, opts, db, notification=DummyReporter()): def run(self, path_to_output, opts, db, notification=DummyReporter()):
opts.log = log = Log() opts.log = log = Log()
opts.fmt = self.fmt = path_to_output.rpartition('.')[2] opts.fmt = self.fmt = path_to_output.rpartition('.')[2]
self.opts = opts
# Add local options # Add local options
opts.creator = "calibre" opts.creator = "calibre"
# Finalize output_profile # Finalize output_profile
op = self.opts.output_profile op = opts.output_profile
if op is None: if op is None:
op = 'default' op = 'default'
if opts.connected_device['name'] and 'kindle' in opts.connected_device['name'].lower(): if opts.connected_device['name'] and 'kindle' in opts.connected_device['name'].lower():
@ -3428,20 +3440,37 @@ class EPUB_MOBI(CatalogPlugin):
op = "kindle" op = "kindle"
opts.descriptionClip = 380 if op.endswith('dx') or 'kindle' not in op else 100 opts.descriptionClip = 380 if op.endswith('dx') or 'kindle' not in op else 100
opts.authorClip = 100 if op.endswith('dx') or 'kindle' not in op else 60 opts.authorClip = 100 if op.endswith('dx') or 'kindle' not in op else 60
self.opts.output_profile = op opts.output_profile = op
opts.basename = "Catalog" opts.basename = "Catalog"
opts.cli_environment = not hasattr(opts,'sync') opts.cli_environment = not hasattr(opts,'sync')
# GwR *** hardwired to sort by author, could be an option if passed in opts
opts.sort_descriptions_by_author = True opts.sort_descriptions_by_author = True
if opts.verbose: build_log = []
# If exclude_genre is blank, assume user wants all genre tags included
if opts.exclude_genre.strip() == '':
opts.exclude_genre = '\[^.\]'
build_log.append(" converting empty exclude_genre to '\[^.\]'")
if opts.connected_device['name']:
if opts.connected_device['serial']:
build_log.append(" connected_device: '%s' #%s%s " % \
(opts.connected_device['name'],
opts.connected_device['serial'][0:4],
'x' * (len(opts.connected_device['serial']) - 4)))
else:
build_log.append(" connected_device: '%s'" % opts.connected_device['name'])
for storage in opts.connected_device['storage']:
if storage:
build_log.append(" mount point: %s" % storage)
opts_dict = vars(opts) opts_dict = vars(opts)
log(u"%s(): Generating %s %sin %s environment" % build_log.append(u"%s(): Generating %s %sin %s environment" %
(self.name,self.fmt,'for %s ' % opts.output_profile if opts.output_profile else '', (self.name,self.fmt,'for %s ' % opts.output_profile if opts.output_profile else '',
'CLI' if opts.cli_environment else 'GUI')) 'CLI' if opts.cli_environment else 'GUI'))
if opts_dict['ids']: if opts_dict['ids']:
log(" Book count: %d" % len(opts_dict['ids'])) build_log.append(" Book count: %d" % len(opts_dict['ids']))
sections_list = ['Descriptions','Authors'] sections_list = ['Descriptions','Authors']
if opts.generate_titles: if opts.generate_titles:
@ -3450,37 +3479,22 @@ class EPUB_MOBI(CatalogPlugin):
sections_list.append('Recently Added') sections_list.append('Recently Added')
if not opts.exclude_genre.strip() == '.': if not opts.exclude_genre.strip() == '.':
sections_list.append('Genres') sections_list.append('Genres')
log(u"Creating Sections for %s" % ', '.join(sections_list)) build_log.append(u"Creating Sections for %s" % ', '.join(sections_list))
# If exclude_genre is blank, assume user wants all genre tags included
if opts.exclude_genre.strip() == '':
opts.exclude_genre = '\[^.\]'
log(" converting empty exclude_genre to '\[^.\]'")
if opts.connected_device['name']:
if opts.connected_device['serial']:
log(" connected_device: '%s' #%s%s " % \
(opts.connected_device['name'],
opts.connected_device['serial'][0:4],
'x' * (len(opts.connected_device['serial']) - 4)))
else:
log(" connected_device: '%s'" % opts.connected_device['name'])
for storage in opts.connected_device['storage']:
if storage:
log(" mount point: %s" % storage)
# for book in opts.connected_device['books']:
# log("%s: %s" % (book.title, book.path))
# Display opts # Display opts
keys = opts_dict.keys() keys = opts_dict.keys()
keys.sort() keys.sort()
log(" opts:") build_log.append(" opts:")
for key in keys: for key in keys:
if key in ['catalog_title','authorClip','descriptionClip','exclude_genre','exclude_tags', if key in ['catalog_title','authorClip','descriptionClip','exclude_genre','exclude_tags',
'note_tag','numbers_as_text','read_tag', 'note_tag','numbers_as_text','read_tag',
'search_text','sort_by','sort_descriptions_by_author','sync']: 'search_text','sort_by','sort_descriptions_by_author','sync']:
log(" %s: %s" % (key, opts_dict[key])) build_log.append(" %s: %s" % (key, opts_dict[key]))
if opts.verbose:
log('\n'.join(line for line in build_log))
self.opts = opts
# Launch the Catalog builder # Launch the Catalog builder
catalog = self.CatalogBuilder(db, opts, self, report_progress=notification) catalog = self.CatalogBuilder(db, opts, self, report_progress=notification)
@ -3498,7 +3512,8 @@ class EPUB_MOBI(CatalogPlugin):
if catalog_source_built: if catalog_source_built:
recommendations = [] recommendations = []
# recommendations.append(('cover', I('catalog.svg'), OptionRecommendation.HIGH)) recommendations.append(('comments', '\n'.join(line for line in build_log),
OptionRecommendation.HIGH))
dp = getattr(opts, 'debug_pipeline', None) dp = getattr(opts, 'debug_pipeline', None)
if dp is not None: if dp is not None:
@ -3519,7 +3534,6 @@ class EPUB_MOBI(CatalogPlugin):
opts.basename + '.opf'), path_to_output, log, report_progress=notification, opts.basename + '.opf'), path_to_output, log, report_progress=notification,
abort_after_input_dump=False) abort_after_input_dump=False)
plumber.merge_ui_recommendations(recommendations) plumber.merge_ui_recommendations(recommendations)
plumber.run() plumber.run()
return 0 return 0
else: else:

View File

@ -587,9 +587,6 @@ def command_export(args, dbpath):
do_export(get_db(dbpath, opts), ids, dir, opts) do_export(get_db(dbpath, opts), ids, dir, opts)
return 0 return 0
# GR additions
def catalog_option_parser(args): def catalog_option_parser(args):
from calibre.customize.ui import available_catalog_formats, plugin_for_catalog_format from calibre.customize.ui import available_catalog_formats, plugin_for_catalog_format
from calibre.utils.logging import Log from calibre.utils.logging import Log
@ -599,6 +596,13 @@ def catalog_option_parser(args):
# Fetch the extension-specific CLI options from the plugin # Fetch the extension-specific CLI options from the plugin
plugin = plugin_for_catalog_format(fmt) plugin = plugin_for_catalog_format(fmt)
for option in plugin.cli_options: for option in plugin.cli_options:
if option.action:
parser.add_option(option.option,
default=option.default,
dest=option.dest,
action=option.action,
help=option.help)
else:
parser.add_option(option.option, parser.add_option(option.option,
default=option.default, default=option.default,
dest=option.dest, dest=option.dest,

View File

@ -1458,16 +1458,14 @@ class LibraryDatabase2(LibraryDatabase):
def add_catalog(self, path, title): def add_catalog(self, path, title):
format = os.path.splitext(path)[1][1:].lower() format = os.path.splitext(path)[1][1:].lower()
stream = path if hasattr(path, 'read') else open(path, 'rb') with open(path, 'rb') as stream:
stream.seek(0) matches = self.data.get_matches('title', '='+title)
matches = self.data.get_matches('title', title)
if matches: if matches:
tag_matches = self.data.get_matches('tags', _('Catalog')) tag_matches = self.data.get_matches('tags', '='+_('Catalog'))
matches = matches.intersection(tag_matches) matches = matches.intersection(tag_matches)
db_id, existing = None, False db_id = None
if matches: if matches:
db_id = list(matches)[0] db_id = list(matches)[0]
existing = True
if db_id is None: if db_id is None:
obj = self.conn.execute('INSERT INTO books(title, author_sort) VALUES (?, ?)', obj = self.conn.execute('INSERT INTO books(title, author_sort) VALUES (?, ?)',
(title, 'calibre')) (title, 'calibre'))
@ -1475,18 +1473,20 @@ class LibraryDatabase2(LibraryDatabase):
self.data.books_added([db_id], self) self.data.books_added([db_id], self)
self.set_path(db_id, index_is_id=True) self.set_path(db_id, index_is_id=True)
self.conn.commit() self.conn.commit()
try:
mi = get_metadata(stream, format)
except:
mi = MetaInformation(title, ['calibre']) mi = MetaInformation(title, ['calibre'])
stream.seek(0)
mi.title, mi.authors = title, ['calibre']
mi.tags = [_('Catalog')] mi.tags = [_('Catalog')]
mi.pubdate = mi.timestamp = utcnow()
if format == 'mobi':
mi.cover, mi.cover_data = None, (None, None)
self.set_metadata(db_id, mi) self.set_metadata(db_id, mi)
self.add_format(db_id, format, stream, index_is_id=True) self.add_format(db_id, format, stream, index_is_id=True)
if not hasattr(path, 'read'):
stream.close()
self.conn.commit() self.conn.commit()
if existing:
t = utcnow()
self.set_timestamp(db_id, t, notify=False)
self.set_pubdate(db_id, t, notify=False)
self.data.refresh_ids(self, [db_id]) # Needed to update format list and size self.data.refresh_ids(self, [db_id]) # Needed to update format list and size
return db_id return db_id
@ -1509,6 +1509,10 @@ class LibraryDatabase2(LibraryDatabase):
self.data.books_added([id], self) self.data.books_added([id], self)
self.set_path(id, index_is_id=True) self.set_path(id, index_is_id=True)
self.conn.commit() self.conn.commit()
if mi.pubdate is None:
mi.pubdate = utcnow()
if mi.timestamp is None:
mi.timestamp = utcnow()
self.set_metadata(id, mi) self.set_metadata(id, mi)
self.add_format(id, format, stream, index_is_id=True) self.add_format(id, format, stream, index_is_id=True)
@ -1546,6 +1550,10 @@ class LibraryDatabase2(LibraryDatabase):
self.data.books_added([id], self) self.data.books_added([id], self)
self.set_path(id, True) self.set_path(id, True)
self.conn.commit() self.conn.commit()
if mi.timestamp is None:
mi.timestamp = utcnow()
if mi.pubdate is None:
mi.pubdate = utcnow()
self.set_metadata(id, mi) self.set_metadata(id, mi)
if cover is not None: if cover is not None:
self.set_cover(id, cover) self.set_cover(id, cover)
@ -1581,7 +1589,9 @@ class LibraryDatabase2(LibraryDatabase):
self.set_path(id, True) self.set_path(id, True)
self.conn.commit() self.conn.commit()
if mi.timestamp is None: if mi.timestamp is None:
mi.timestamp = nowf() mi.timestamp = utcnow()
if mi.pubdate is None:
mi.pubdate = utcnow()
self.set_metadata(id, mi) self.set_metadata(id, mi)
npath = self.run_import_plugins(path, format) npath = self.run_import_plugins(path, format)
format = os.path.splitext(npath)[-1].lower().replace('.', '').upper() format = os.path.splitext(npath)[-1].lower().replace('.', '').upper()
@ -1614,7 +1624,9 @@ class LibraryDatabase2(LibraryDatabase):
self.data.books_added([id], self) self.data.books_added([id], self)
self.set_path(id, True) self.set_path(id, True)
if mi.timestamp is None: if mi.timestamp is None:
mi.timestamp = nowf() mi.timestamp = utcnow()
if mi.pubdate is None:
mi.pubdate = utcnow()
self.set_metadata(id, mi, ignore_errors=True) self.set_metadata(id, mi, ignore_errors=True)
for path in formats: for path in formats:
ext = os.path.splitext(path)[1][1:].lower() ext = os.path.splitext(path)[1][1:].lower()

View File

@ -23,7 +23,7 @@ def convert_timestamp(val):
return parse_date(val, as_utc=False) return parse_date(val, as_utc=False)
def adapt_datetime(dt): def adapt_datetime(dt):
return isoformat(dt) return isoformat(dt, sep=' ')
sqlite.register_adapter(datetime, adapt_datetime) sqlite.register_adapter(datetime, adapt_datetime)
sqlite.register_converter('timestamp', convert_timestamp) sqlite.register_converter('timestamp', convert_timestamp)

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -72,7 +72,7 @@ def qt_to_dt(qdate_or_qdatetime, as_utc=True):
return dt.astimezone(_utc_tz if as_utc else _local_tz) return dt.astimezone(_utc_tz if as_utc else _local_tz)
def fromtimestamp(ctime, as_utc=True): def fromtimestamp(ctime, as_utc=True):
dt = datetime.utcfromtimestamp().replace(tzinfo=_utc_tz) dt = datetime.utcfromtimestamp(ctime).replace(tzinfo=_utc_tz)
if not as_utc: if not as_utc:
dt = dt.astimezone(_local_tz) dt = dt.astimezone(_local_tz)
return dt return dt