diff --git a/src/calibre/devices/jetbook/driver.py b/src/calibre/devices/jetbook/driver.py index 0d815640b1..2af1b03bf7 100644 --- a/src/calibre/devices/jetbook/driver.py +++ b/src/calibre/devices/jetbook/driver.py @@ -9,6 +9,7 @@ from itertools import cycle from calibre.devices.usbms.driver import USBMS from calibre import sanitize_file_name as sanitize +from calibre.ebooks.metadata import string_to_authors class JETBOOK(USBMS): name = 'Ectaco JetBook Device Interface' @@ -118,7 +119,7 @@ class JETBOOK(USBMS): match = cls.JETBOOK_FILE_NAME_PATTERN.match(fn) if match is not None: mi.title = check_unicode(match.group('title')) - authors = match.group('authors').split('&') + authors = string_to_authors(match.group('authors')) mi.authors = map(check_unicode, authors) return mi diff --git a/src/calibre/ebooks/conversion/cli.py b/src/calibre/ebooks/conversion/cli.py index dbf86c4d94..3a34fa8675 100644 --- a/src/calibre/ebooks/conversion/cli.py +++ b/src/calibre/ebooks/conversion/cli.py @@ -70,6 +70,8 @@ def option_recommendation_to_cli_option(add_option, rec): switches.append('--'+opt.long_switch) attrs = dict(dest=opt.name, help=opt.help, choices=opt.choices, default=rec.recommended_value) + if opt.long_switch == 'verbose': + attrs['action'] = 'count' if isinstance(rec.recommended_value, type(True)): attrs['action'] = 'store_false' if rec.recommended_value else \ 'store_true' diff --git a/src/calibre/ebooks/lrf/meta.py b/src/calibre/ebooks/lrf/meta.py index 6ec87892d6..af1fad128f 100644 --- a/src/calibre/ebooks/lrf/meta.py +++ b/src/calibre/ebooks/lrf/meta.py @@ -19,7 +19,7 @@ import xml.dom.minidom as dom from functools import wraps from calibre.devices.prs500.prstypes import field -from calibre.ebooks.metadata import MetaInformation +from calibre.ebooks.metadata import MetaInformation, string_to_authors BYTE = ">sys.stderr, msg.encode('utf8') diff --git a/src/calibre/ebooks/metadata/rtf.py b/src/calibre/ebooks/metadata/rtf.py index bd8b1098c1..b1ee453218 100644 --- a/src/calibre/ebooks/metadata/rtf.py +++ b/src/calibre/ebooks/metadata/rtf.py @@ -5,7 +5,7 @@ Edit metadata in RTF files. """ import re, cStringIO, sys -from calibre.ebooks.metadata import MetaInformation +from calibre.ebooks.metadata import MetaInformation, string_to_authors title_pat = re.compile(r'\{\\info.*?\{\\title(.*?)(?'+data+'') - if barename(data.tag) != 'html': - raise NotHTML( - 'File %r does not appear to be (X)HTML' % self.href) + self.log.warn('File %r does not appear to be (X)HTML'%self.href) + nroot = etree.fromstring('') + for child in list(data): + child.getparent.remove(child) + nroot.append(child) + data = nroot elif not namespace(data.tag): data.attrib['xmlns'] = XHTML_NS data = etree.tostring(data, encoding=unicode) @@ -799,10 +804,11 @@ class Manifest(object): try: data = etree.fromstring(data) except etree.XMLSyntaxError: - self.oeb.logger.warn('Stripping comments from %s'% + self.oeb.logger.warn('Stripping comments and meta tags from %s'% self.href) data = re.compile(r'', re.DOTALL).sub('', data) + data = re.sub(r']+?>', '', data) data = etree.fromstring(data) elif namespace(data.tag) != XHTML_NS: # OEB_DOC_NS, but possibly others @@ -1371,9 +1377,11 @@ class TOC(object): :attr:`href`: Book-internal URL referenced by this node. :attr:`klass`: Optional semantic class referenced by this node. :attr:`id`: Option unique identifier for this node. + :attr:`author`: Optional author attribution for periodicals + :attr:`description`: Optional description attribute for periodicals """ def __init__(self, title=None, href=None, klass=None, id=None, - play_order=None): + play_order=None, author=None, description=None): self.title = title self.href = urlnormalize(href) if href else href self.klass = klass @@ -1383,10 +1391,12 @@ class TOC(object): if play_order is None: play_order = self.next_play_order() self.play_order = play_order + self.author = author + self.description = description - def add(self, title, href, klass=None, id=None, play_order=0): + def add(self, title, href, klass=None, id=None, play_order=0, author=None, description=None): """Create and return a new sub-node of this node.""" - node = TOC(title, href, klass, id, play_order) + node = TOC(title, href, klass, id, play_order, author, description) self.nodes.append(node) return node diff --git a/src/calibre/ebooks/oeb/reader.py b/src/calibre/ebooks/oeb/reader.py index 4ccc1eeed1..75d92f1815 100644 --- a/src/calibre/ebooks/oeb/reader.py +++ b/src/calibre/ebooks/oeb/reader.py @@ -351,9 +351,27 @@ class OEBReader(object): self.logger.warn('TOC reference %r not found' % href) continue id = child.get('id') - klass = child.get('class') + klass = child.get('class', 'chapter') + po = int(child.get('playOrder', self.oeb.toc.next_play_order())) - node = toc.add(title, href, id=id, klass=klass, play_order=po) + + authorElement = xpath(child, + 'descendant::mbp:meta[@name = "author"]') + if authorElement : + author = authorElement[0].text + else : + author = None + + descriptionElement = xpath(child, + 'descendant::mbp:meta[@name = "description"]') + if descriptionElement : + description = descriptionElement[0].text + else : + description = None + + node = toc.add(title, href, id=id, klass=klass, + play_order=po, description=description, author=author) + self._toc_from_navpoint(item, node, child) def _toc_from_ncx(self, item): diff --git a/src/calibre/ebooks/oeb/transforms/jacket.py b/src/calibre/ebooks/oeb/transforms/jacket.py index 14b19716df..b3eefc46fa 100644 --- a/src/calibre/ebooks/oeb/transforms/jacket.py +++ b/src/calibre/ebooks/oeb/transforms/jacket.py @@ -10,7 +10,7 @@ import textwrap from lxml import etree -from calibre.ebooks.oeb.base import XPNSMAP +from calibre.ebooks.oeb.base import XPath, XPNSMAP from calibre import guess_type class Jacket(object): @@ -41,10 +41,11 @@ class Jacket(object): ''') def remove_first_image(self): + path = XPath('//h:img[@src]') for i, item in enumerate(self.oeb.spine): if i > 2: break - for img in item.data.xpath('//h:img[@src]', namespace=XPNSMAP): - href = item.abshref(img.get('src')) + for img in path(item.data): + href = item.abshref(img.get('src')) image = self.oeb.manifest.hrefs.get(href, None) if image is not None: self.log('Removing first image', img.get('src')) diff --git a/src/calibre/gui2/convert/xpath_wizard.py b/src/calibre/gui2/convert/xpath_wizard.py index d2a0d55a48..9b8e44ddaa 100644 --- a/src/calibre/gui2/convert/xpath_wizard.py +++ b/src/calibre/gui2/convert/xpath_wizard.py @@ -31,6 +31,8 @@ class WizardWidget(QWidget, Ui_Form): q = '[re:test(@%s, "%s", "i")]'%(attr, val) else: q = '[@%s]'%attr + elif val: + q = '[re:test(., "%s", "i")]'%(val) expr = '//'+tag + q return expr diff --git a/src/calibre/gui2/dialogs/fetch_metadata.py b/src/calibre/gui2/dialogs/fetch_metadata.py index aab564e05d..ea076b42a1 100644 --- a/src/calibre/gui2/dialogs/fetch_metadata.py +++ b/src/calibre/gui2/dialogs/fetch_metadata.py @@ -14,6 +14,7 @@ from calibre.gui2.dialogs.fetch_metadata_ui import Ui_FetchMetadata from calibre.gui2 import error_dialog, NONE, info_dialog from calibre.gui2.widgets import ProgressIndicator from calibre.utils.config import prefs +from calibre import strftime class Fetcher(QThread): @@ -45,7 +46,7 @@ class Matches(QAbstractTableModel): return len(self.matches) def columnCount(self, *args): - return 5 + return 6 def headerData(self, section, orientation, role): if role != Qt.DisplayRole: @@ -57,6 +58,7 @@ class Matches(QAbstractTableModel): elif section == 2: text = _("Author Sort") elif section == 3: text = _("Publisher") elif section == 4: text = _("ISBN") + elif section == 5: text = _("Published") return QVariant(text) else: @@ -80,6 +82,9 @@ class Matches(QAbstractTableModel): res = book.publisher elif col == 4: res = book.isbn + elif col == 5: + if hasattr(book.pubdate, 'timetuple'): + res = strftime('%b %Y', book.pubdate.timetuple()) if not res: return NONE return QVariant(res) @@ -126,7 +131,7 @@ class FetchMetadata(QDialog, Ui_FetchMetadata): prefs['isbndb_com_key'] = key else: key = None - title = author = publisher = isbn = None + title = author = publisher = isbn = pubdate = None if self.isbn: isbn = self.isbn if self.title: diff --git a/src/calibre/gui2/dialogs/metadata_single.py b/src/calibre/gui2/dialogs/metadata_single.py index bf153ba932..d25d0609c8 100644 --- a/src/calibre/gui2/dialogs/metadata_single.py +++ b/src/calibre/gui2/dialogs/metadata_single.py @@ -10,8 +10,9 @@ import os import re import time import traceback +from datetime import datetime -from PyQt4.QtCore import SIGNAL, QObject, QCoreApplication, Qt, QTimer, QThread +from PyQt4.QtCore import SIGNAL, QObject, QCoreApplication, Qt, QTimer, QThread, QDate from PyQt4.QtGui import QPixmap, QListWidgetItem, QErrorMessage, QDialog, QCompleter from calibre.gui2 import qstring_to_unicode, error_dialog, file_icon_provider, \ @@ -234,6 +235,7 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog): self.cover.setAcceptDrops(True) self._author_completer = AuthorCompleter(self.db) self.authors.setCompleter(self._author_completer) + self.pubdate.setMinimumDate(QDate(100,1,1)) self.connect(self.cover, SIGNAL('cover_changed()'), self.cover_dropped) QObject.connect(self.cover_button, SIGNAL("clicked(bool)"), \ self.select_cover) @@ -279,6 +281,9 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog): comments = self.db.comments(row) self.comments.setPlainText(comments if comments else '') cover = self.db.cover(row) + pubdate = db.pubdate(self.id, index_is_id=True) + self.pubdate.setDate(QDate(pubdate.year, pubdate.month, + pubdate.day)) exts = self.db.formats(row) if exts: @@ -441,6 +446,9 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog): if book.author_sort: self.author_sort.setText(book.author_sort) if book.publisher: self.publisher.setEditText(book.publisher) if book.isbn: self.isbn.setText(book.isbn) + if book.pubdate: + d = book.pubdate + self.pubdate.setDate(QDate(d.year, d.month, d.day)) summ = book.comments if summ: prefix = qstring_to_unicode(self.comments.toPlainText()) @@ -485,6 +493,9 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog): self.db.set_series(self.id, qstring_to_unicode(self.series.currentText()), notify=False) self.db.set_series_index(self.id, self.series_index.value(), notify=False) self.db.set_comment(self.id, qstring_to_unicode(self.comments.toPlainText()), notify=False) + d = self.pubdate.date() + self.db.set_pubdate(self.id, datetime(d.year(), d.month(), d.day())) + if self.cover_changed: self.db.set_cover(self.id, pixmap_to_data(self.cover.pixmap())) QDialog.accept(self) diff --git a/src/calibre/gui2/dialogs/metadata_single.ui b/src/calibre/gui2/dialogs/metadata_single.ui index 4163c51583..3f44b28d1c 100644 --- a/src/calibre/gui2/dialogs/metadata_single.ui +++ b/src/calibre/gui2/dialogs/metadata_single.ui @@ -325,6 +325,19 @@ + + + + Publishe&d: + + + Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter + + + pubdate + + + @@ -348,6 +361,16 @@ + + + + MMM yyyy + + + true + + + @@ -632,6 +655,7 @@ tag_editor_button remove_series_button isbn + pubdate comments fetch_metadata_button fetch_cover_button diff --git a/src/calibre/gui2/main.py b/src/calibre/gui2/main.py index 4940f53dd7..e0e8f7c337 100644 --- a/src/calibre/gui2/main.py +++ b/src/calibre/gui2/main.py @@ -314,7 +314,10 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI): QObject.connect(self.action_convert, SIGNAL('triggered(bool)'), self.convert_single) self.convert_menu = cm + pm = QMenu() + ap = self.action_preferences + pm.addAction(ap.icon(), ap.text()) pm.addAction(self.preferences_action) pm.addAction(_('Run welcome wizard')) self.connect(pm.actions()[1], SIGNAL('triggered(bool)'), diff --git a/src/calibre/library/database.py b/src/calibre/library/database.py index 7261aed7ad..72b629db0b 100644 --- a/src/calibre/library/database.py +++ b/src/calibre/library/database.py @@ -9,6 +9,7 @@ from zlib import compress, decompress from calibre.ebooks.metadata import MetaInformation from calibre.web.feeds.recipes import migrate_automatic_profile_to_automatic_recipe +from calibre.ebooks.metadata import string_to_authors class Concatenate(object): '''String concatenation aggregator for sqlite''' @@ -97,7 +98,7 @@ class LibraryDatabase(object): obj = conn.execute('INSERT INTO books(title, timestamp, author_sort) VALUES (?,?,?)', (book['title'], book['timestamp'], authors)) id = obj.lastrowid - authors = authors.split('&') + authors = string_to_authors(authors) for a in authors: author = conn.execute('SELECT id from authors WHERE name=?', (a,)).fetchone() if author: @@ -1103,7 +1104,7 @@ ALTER TABLE books ADD COLUMN isbn TEXT DEFAULT "" COLLATE NOCASE; item[col] = val break if column == 'authors': - val = val.split('&,') + val = string_to_authors(val) self.set_authors(id, val) elif column == 'title': self.set_title(id, val) @@ -1266,7 +1267,7 @@ ALTER TABLE books ADD COLUMN isbn TEXT DEFAULT "" COLLATE NOCASE; mi.authors = ['Unknown'] authors = [] for a in mi.authors: - authors += a.split('&') + authors += string_to_authors(a) self.set_authors(id, authors) if mi.author_sort: self.set_author_sort(id, mi.author_sort) diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py index e1bbe06ec0..0c5013880f 100644 --- a/src/calibre/library/database2.py +++ b/src/calibre/library/database2.py @@ -993,7 +993,7 @@ class LibraryDatabase2(LibraryDatabase): mi.authors = [_('Unknown')] authors = [] for a in mi.authors: - authors += a.split('&') + authors += string_to_authors(a) self.set_authors(id, authors, notify=False) if mi.author_sort: self.set_author_sort(id, mi.author_sort, notify=False) diff --git a/src/calibre/web/feeds/recipes/recipe_globe_and_mail.py b/src/calibre/web/feeds/recipes/recipe_globe_and_mail.py index 6214fa0578..1126990e5b 100644 --- a/src/calibre/web/feeds/recipes/recipe_globe_and_mail.py +++ b/src/calibre/web/feeds/recipes/recipe_globe_and_mail.py @@ -1,44 +1,69 @@ -#!/usr/bin/env python -__license__ = 'GPL v3' -__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' -__docformat__ = 'restructuredtext en' - -''' -globeandmail.com -''' - -from calibre.web.feeds.news import BasicNewsRecipe - -class GlobeAndMail(BasicNewsRecipe): - - title = 'Globe and Mail' - __author__ = 'Kovid Goyal' - language = _('English') - oldest_article = 2.0 - no_stylesheets = True - description = 'Canada\'s national newspaper' - remove_tags_before = dict(id="article-top") - remove_tags = [ - {'id':['util', 'article-tabs', 'comments', 'article-relations', - 'gallery-controls', 'video', 'galleryLoading']}, - ] - remove_tags_after = dict(id='article-content') - - feeds = [ - ('Latest headlines', 'http://www.theglobeandmail.com/?service=rss'), - ('Top stories', 'http://www.theglobeandmail.com/?service=rss&feed=topstories'), - ('National', 'http://www.theglobeandmail.com/news/national/?service=rss'), - ('Politics', 'http://www.theglobeandmail.com/news/politics/?service=rss'), - ('World', 'http://www.theglobeandmail.com/news/world/?service=rss'), - ('Business', 'http://www.theglobeandmail.com/report-on-business/?service=rss'), - ('Opinions', 'http://www.theglobeandmail.com/news/opinions/?service=rss'), - ('Columnists', 'http://www.theglobeandmail.com/news/opinions/columnists/?service=rss'), - ('Globe Investor', 'http://www.theglobeandmail.com/globe-investor/?service=rss'), - ('Sports', 'http://www.theglobeandmail.com/sports/?service=rss'), - ('Technology', 'http://www.theglobeandmail.com/news/technology/?service=rss'), - ('Arts', 'http://www.theglobeandmail.com/news/arts/?service=rss'), - ('Life', 'http://www.theglobeandmail.com/life/?service=rss'), - ('Blogs', 'http://www.theglobeandmail.com/blogs/?service=rss'), - ('Real Estate', 'http://www.theglobeandmail.com/real-estate/?service=rss'), - ('Auto', 'http://www.theglobeandmail.com/auto/?service=rss'), - ] +#!/usr/bin/env python +__license__ = 'GPL v3' + +__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' +__docformat__ = 'restructuredtext en' + +''' +globeandmail.com +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class GlobeAndMail(BasicNewsRecipe): + title = u'Globe and Mail' + language = _('English') + __author__ = 'Kovid Goyal' + oldest_article = 2 + max_articles_per_feed = 10 + no_stylesheets = True + extra_css = ''' + h3 {font-size: 22pt; font-weight:bold; margin:0px; padding:0px 0px 8pt 0px;} + h4 {margin-top: 0px;} + #byline { font-family: monospace; font-weight:bold; } + #placeline {font-weight:bold;} + #credit {margin-top:0px;} + .tag {font-size: 22pt;}''' + description = 'Canada\'s national newspaper' + remove_tags_before = dict(id="article-top") + remove_tags = [ + {'id':['util', 'article-tabs', 'comments', 'article-relations', + 'gallery-controls', 'video', 'galleryLoading','deck','header'] }, + {'class':['credit','inline-img-caption','tab-pointer'] }, + dict(name='div', attrs={'id':'lead-photo'}), + dict(name='div', attrs={'class':'right'}), + dict(name='div', attrs={'id':'footer'}), + dict(name='div', attrs={'id':'beta-msg'}), + dict(name='img', attrs={'class':'headshot'}), + dict(name='div', attrs={'class':'brand'}), + dict(name='div', attrs={'id':'nav-wrap'}), + dict(name='div', attrs={'id':'featureTopics'}), + dict(name='div', attrs={'id':'videoNav'}), + dict(name='div', attrs={'id':'blog-header'}), + dict(name='div', attrs={'id':'right-rail'}), + dict(name='div', attrs={'id':'group-footer-container'}), + dict(name=['iframe','img']) + ] + remove_tags_after = [{'id':['article-content']}, + {'class':['pull','inline-img'] }, + dict(name='img', attrs={'class':'inline-media-embed'}), + ] + feeds = [ + (u'Latest headlines', u'http://www.theglobeandmail.com/?service=rss'), + (u'Top stories', u'http://www.theglobeandmail.com/?service=rss&feed=topstories'), + (u'National', u'http://www.theglobeandmail.com/news/national/?service=rss'), + (u'Politics', u'http://www.theglobeandmail.com/news/politics/?service=rss'), + (u'World', u'http://www.theglobeandmail.com/news/world/?service=rss'), + (u'Business', u'http://www.theglobeandmail.com/report-on-business/?service=rss'), + (u'Opinions', u'http://www.theglobeandmail.com/news/opinions/?service=rss'), + (u'Columnists', u'http://www.theglobeandmail.com/news/opinions/columnists/?service=rss'), + (u'Globe Investor', u'http://www.theglobeandmail.com/globe-investor/?service=rss'), + (u'Sports', u'http://www.theglobeandmail.com/sports/?service=rss'), + (u'Technology', u'http://www.theglobeandmail.com/news/technology/?service=rss'), + (u'Arts', u'http://www.theglobeandmail.com/news/arts/?service=rss'), + (u'Life', u'http://www.theglobeandmail.com/life/?service=rss'), + (u'Blogs', u'http://www.theglobeandmail.com/blogs/?service=rss'), + (u'Real Estate', u'http://www.theglobeandmail.com/real-estate/?service=rss'), + (u'Auto', u'http://www.theglobeandmail.com/auto/?service=rss') + ] +