diff --git a/resources/recipes/eksiazki.recipe b/resources/recipes/eksiazki.recipe new file mode 100644 index 0000000000..cc576f4dc6 --- /dev/null +++ b/resources/recipes/eksiazki.recipe @@ -0,0 +1,26 @@ +#!/usr/bin/env python + +__license__ = 'GPL v2' +__copyright__ = u'2010, Tomasz Dlugosz ' +''' +eksiazki.org +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class eksiazki(BasicNewsRecipe): + + title = u'eksiazki.org' + desciption = u'Twoje centrum wiedzy o epapierze i ebookach' + language = 'pl' + __author__ = u'Tomasz D\u0142ugosz' + no_stylesheets = True + remove_javascript = True + + feeds = [(u'wpisy', u'http://www.eksiazki.org/feed/')] + + keep_only_tags = [dict(name='div', attrs={'id':'content-body'})] + remove_tags = [ + dict(name='span', attrs={'class':'nr_comm'}), + dict(name='div', attrs={'id':'tabsContainer'}), + dict(name='div', attrs={'class':'next_previous_links'})] diff --git a/resources/recipes/sueddeutschezeitung.recipe b/resources/recipes/sueddeutschezeitung.recipe index 260c5a012a..8b731e2c4f 100644 --- a/resources/recipes/sueddeutschezeitung.recipe +++ b/resources/recipes/sueddeutschezeitung.recipe @@ -25,7 +25,7 @@ class SueddeutcheZeitung(BasicNewsRecipe): LOGIN = PREFIX + '/app/lbox/index.html' use_embedded_content = False masthead_url = 'http://pix.sueddeutsche.de/img/g_.gif' - language = 'de_DE' + language = 'de' extra_css = ' body{font-family: Arial,Helvetica,sans-serif} ' conversion_options = { diff --git a/resources/recipes/winter_olympics.recipe b/resources/recipes/winter_olympics.recipe new file mode 100644 index 0000000000..a7062d7930 --- /dev/null +++ b/resources/recipes/winter_olympics.recipe @@ -0,0 +1,95 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = '2010, Starson17' +''' +www.nbcolympics.com +''' +from calibre.web.feeds.news import BasicNewsRecipe + +class Olympics_2010(BasicNewsRecipe): + title = u'NBC Olympics 2010' + __author__ = 'Starson17' + description = 'Olympics 2010' + cover_url = 'http://www.digitaljournal.com/img/1/1/2/1/i/4/7/6/o/WinterOlympics2010-logo.jpg' + publisher = 'Olympics 2010' + tags = 'Olympics news' + language = 'en' + use_embedded_content = False + no_stylesheets = True + remove_javascript = True + # recursions = 3 + oldest_article = 7 + max_articles_per_feed = 10 + + keep_only_tags = [dict(name='div', attrs={'class':['Article ','ArticleGallery']}), + ] + + remove_tags = [dict(name='div', attrs={'id':['RelatedTagsBox','ShareBox']}), + dict(name='div', attrs={'class':['DateUtilities','PhotoGallery BoxRight','Frame','ToolBox']}), + ] + + # RSS feeds are at: http://www.nbcolympics.com/rss/index.html + feeds = [ + ('NBCOlympics.com - News', 'http://www.nbcolympics.com/rss/newscenter/mostpopular.xml'), + ('NBCOlympics.com - News - Top Stories', 'http://www.nbcolympics.com/rss/newscenter/topstories.xml'), + ('NBCOlympics.com - News - Latest Headlines', 'http://www.nbcolympics.com/rss/newscenter/latestnews.xml'), + # ('NBCOlympics.com - Photos', 'http://www.nbcolympics.com/rss/photos/mostpopular.xml'), + # ('NBCOlympics.com - Photos - Editorial Picks', 'http://www.nbcolympics.com/rss/photos/editorialpicks.xml'), + # ('NBCOlympics.com - Photos - Latest Slideshows', 'http://www.nbcolympics.com/rss/photos/latestslideshows.xml'), + ('NBCOlympics.com - Team USA - Latest news', 'http://www.nbcolympics.com/rss/countries/team-usa/index.xml'), + # ('NBCOlympics.com - Team USA - Latest Slideshows', 'http://www.nbcolympics.com/rss/countries/team-usa/photos/index.xml'), + # ('NBCOlympics.com - Team USA - Video', 'http://www.nbcolympics.com/rss/countries/team-usa/video/index.xml'), + # ('NBCOlympics.com - Alpine Skiing - Most Popular News', 'http://www.nbcolympics.com/rss/sport=AS/mostpopular.xml'), + # ('NBCOlympics.com - Alpine Skiing - Top News', 'http://www.nbcolympics.com/rss/sport=AS/topnews.xml'), + ('NBCOlympics.com - Alpine Skiing - Latest News', 'http://www.nbcolympics.com/rss/sport=AS/latestnews.xml'), + # ('NBCOlympics.com - Biathlon - Most Popular News', 'http://www.nbcolympics.com/rss/sport=BT/mostpopular.xml'), + # ('NBCOlympics.com - Biathlon - Top News', 'http://www.nbcolympics.com/rss/sport=BT/topnews.xml'), + ('NBCOlympics.com - Biathlon - Latest News', 'http://www.nbcolympics.com/rss/sport=BT/latestnews.xml'), + # ('NBCOlympics.com - Bobsled - Most Popular News', 'http://www.nbcolympics.com/rss/sport=BS/mostpopular.xml'), + # ('NBCOlympics.com - Bobsled - Top News', 'http://www.nbcolympics.com/rss/sport=BS/topnews.xml'), + ('NBCOlympics.com - Bobsled - Latest News', 'http://www.nbcolympics.com/rss/sport=BS/latestnews.xml'), + # ('NBCOlympics.com - Cross-Country - Most Popular News', 'http://www.nbcolympics.com/rss/sport=CC/mostpopular.xml'), + # ('NBCOlympics.com - Cross-Country - Top News', 'http://www.nbcolympics.com/rss/sport=CC/topnews.xml'), + ('NBCOlympics.com - Cross-Country - Latest News', 'http://www.nbcolympics.com/rss/sport=CC/latestnews.xml'), + # ('NBCOlympics.com - Curling - Most Popular News', 'http://www.nbcolympics.com/rss/sport=CU/mostpopular.xml'), + # ('NBCOlympics.com - Curling - Top News', 'http://www.nbcolympics.com/rss/sport=CU/topnews.xml'), + ('NBCOlympics.com - Curling - Latest News', 'http://www.nbcolympics.com/rss/sport=CU/latestnews.xml'), + # ('NBCOlympics.com - Figure Skating - Most Popular News', 'http://www.nbcolympics.com/rss/sport=FS/mostpopular.xml'), + # ('NBCOlympics.com - Figure Skating - Top News', 'http://www.nbcolympics.com/rss/sport=FS/topnews.xml'), + ('NBCOlympics.com - Figure Skating - Latest News', 'http://www.nbcolympics.com/rss/sport=FS/latestnews.xml'), + # ('NBCOlympics.com - Freestyle Skiing - Most Popular News', 'http://www.nbcolympics.com/rss/sport=FR/mostpopular.xml'), + # ('NBCOlympics.com - Freestyle Skiing - Top News', 'http://www.nbcolympics.com/rss/sport=FR/topnews.xml'), + ('NBCOlympics.com - Freestyle Skiing - Latest News', 'http://www.nbcolympics.com/rss/sport=FR/latestnews.xml'), + # ('NBCOlympics.com - Hockey - Most Popular News', 'http://www.nbcolympics.com/rss/sport=IH/mostpopular.xml'), + # ('NBCOlympics.com - Hockey - Top News', 'http://www.nbcolympics.com/rss/sport=IH/topnews.xml'), + ('NBCOlympics.com - Hockey - Latest News', 'http://www.nbcolympics.com/rss/sport=IH/latestnews.xml'), + # ('NBCOlympics.com - Luge - Most Popular News', 'http://www.nbcolympics.com/rss/sport=LG/mostpopular.xml'), + # ('NBCOlympics.com - Luge - Top News', 'http://www.nbcolympics.com/rss/sport=LG/topnews.xml'), + ('NBCOlympics.com - Luge - Latest News', 'http://www.nbcolympics.com/rss/sport=LG/latestnews.xml'), + # ('NBCOlympics.com - Nordic Combined - Most Popular News', 'http://www.nbcolympics.com/rss/sport=NC/mostpopular.xml'), + # ('NBCOlympics.com - Nordic Combined - Top News', 'http://www.nbcolympics.com/rss/sport=NC/topnews.xml'), + ('NBCOlympics.com - Nordic Combined - Latest News', 'http://www.nbcolympics.com/rss/sport=NC/latestnews.xml'), + # ('NBCOlympics.com - Short Track - Most Popular News', 'http://www.nbcolympics.com/rss/sport=ST/mostpopular.xml'), + # ('NBCOlympics.com - Short Track - Top News', 'http://www.nbcolympics.com/rss/sport=ST/topnews.xml'), + ('NBCOlympics.com - Short Track - Latest News', 'http://www.nbcolympics.com/rss/sport=ST/latestnews.xml'), + # ('NBCOlympics.com - Skeleton - Most Popular News', 'http://www.nbcolympics.com/rss/sport=SN/mostpopular.xml'), + # ('NBCOlympics.com - Skeleton - Top News', 'http://www.nbcolympics.com/rss/sport=SN/topnews.xml'), + ('NBCOlympics.com - Skeleton - Latest News', 'http://www.nbcolympics.com/rss/sport=SN/latestnews.xml'), + # ('NBCOlympics.com - Ski Jumping - Most Popular News', 'http://www.nbcolympics.com/rss/sport=SJ/mostpopular.xml'), + # ('NBCOlympics.com - Ski Jumping - Top News', 'http://www.nbcolympics.com/rss/sport=SJ/topnews.xml'), + ('NBCOlympics.com - Ski Jumping - Latest News', 'http://www.nbcolympics.com/rss/sport=SJ/latestnews.xml'), + # ('NBCOlympics.com - Snowboarding - Most Popular News', 'http://www.nbcolympics.com/rss/sport=SB/mostpopular.xml'), + # ('NBCOlympics.com - Snowboarding - Top News', 'http://www.nbcolympics.com/rss/sport=SB/topnews.xml'), + ('NBCOlympics.com - Snowboarding - Latest News', 'http://www.nbcolympics.com/rss/sport=SB/latestnews.xml'), + # ('NBCOlympics.com - Speed Skating - Most Popular News', 'http://www.nbcolympics.com/rss/sport=AS/mostpopular.xml'), + # ('NBCOlympics.com - Speed Skating - Top News', 'http://www.nbcolympics.com/rss/sport=AS/topnews.xml'), + ('NBCOlympics.com - Speed Skating - Latest News', 'http://www.nbcolympics.com/rss/sport=AS/latestnews.xml'), + ] + + extra_css = ''' + h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;} + h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;} + p{font-family:Arial,Helvetica,sans-serif;font-size:small;} + body{font-family:Helvetica,Arial,sans-serif;font-size:small;} + ''' diff --git a/resources/recipes/wired.recipe b/resources/recipes/wired.recipe index e7395a9ada..33577447cc 100644 --- a/resources/recipes/wired.recipe +++ b/resources/recipes/wired.recipe @@ -20,6 +20,7 @@ class Wired(BasicNewsRecipe): no_stylesheets = True encoding = 'utf-8' use_embedded_content = False + masthead_url = 'http://www.wired.com/images/home/wired_logo.gif' language = 'en' extra_css = ' body{font-family: sans-serif} .entryDescription li {display: inline; list-style-type: none} ' index = 'http://www.wired.com/magazine/' @@ -38,14 +39,34 @@ class Wired(BasicNewsRecipe): dict(name=['object','embed','iframe','link']) ,dict(name='div', attrs={'class':['podcast_storyboard','tweetmeme_button']}) ] + remove_attributes = ['height','width'] - #feeds = [(u'Articles' , u'http://www.wired.com/magazine/feed/' )] - def parse_index(self): totalfeeds = [] - soup = self.index_to_soup(self.index) + soup = self.index_to_soup(self.index) + majorf = soup.find('div',attrs={'class':'index'}) + if majorf: + pfarticles = [] + firsta = majorf.find(attrs={'class':'spread-header'}) + if firsta: + pfarticles.append({ + 'title' :self.tag_to_string(firsta.a) + ,'date' :strftime(self.timefmt) + ,'url' :'http://www.wired.com' + firsta.a['href'] + ,'description':'' + }) + for itt in majorf.findAll('li'): + itema = itt.find('a',href=True) + if itema: + pfarticles.append({ + 'title' :self.tag_to_string(itema) + ,'date' :strftime(self.timefmt) + ,'url' :'http://www.wired.com' + itema['href'] + ,'description':'' + }) + totalfeeds.append(('Cover', pfarticles)) features = soup.find('div',attrs={'id':'my-glider'}) if features: farticles = [] diff --git a/src/calibre/devices/android/driver.py b/src/calibre/devices/android/driver.py index fd89675d91..aa4500d87b 100644 --- a/src/calibre/devices/android/driver.py +++ b/src/calibre/devices/android/driver.py @@ -20,7 +20,7 @@ class ANDROID(USBMS): VENDOR_ID = { 0x0bb4 : { 0x0c02 : [0x100], 0x0c01 : [0x100]}, 0x22b8 : { 0x41d9 : [0x216]}, - 0x18d1 : { 0x4e11 : [0x0100]}, + 0x18d1 : { 0x4e11 : [0x0100], 0x4e12: [0x0100]}, } EBOOK_DIR_MAIN = ['wordplayer/calibretransfer', 'eBooks/import', 'Books'] EXTRA_CUSTOMIZATION_MESSAGE = _('Comma separated list of directories to ' diff --git a/src/calibre/ebooks/conversion/plumber.py b/src/calibre/ebooks/conversion/plumber.py index 55a46da8fe..9bb3a71c03 100644 --- a/src/calibre/ebooks/conversion/plumber.py +++ b/src/calibre/ebooks/conversion/plumber.py @@ -12,6 +12,7 @@ from calibre.customize.ui import input_profiles, output_profiles, \ run_plugins_on_preprocess, run_plugins_on_postprocess from calibre.ebooks.conversion.preprocess import HTMLPreProcessor from calibre.ptempfile import PersistentTemporaryDirectory +from calibre.utils.date import parse_date from calibre import extract, walk DEBUG_README=u''' @@ -65,7 +66,7 @@ class Plumber(object): metadata_option_names = [ 'title', 'authors', 'title_sort', 'author_sort', 'cover', 'comments', 'publisher', 'series', 'series_index', 'rating', 'isbn', - 'tags', 'book_producer', 'language' + 'tags', 'book_producer', 'language', 'pubdate', 'timestamp' ] def __init__(self, input, output, log, report_progress=DummyReporter(), @@ -461,6 +462,14 @@ OptionRecommendation(name='language', recommended_value=None, level=OptionRecommendation.LOW, help=_('Set the language.')), +OptionRecommendation(name='pubdate', + recommended_value=None, level=OptionRecommendation.LOW, + help=_('Set the publication date.')), + +OptionRecommendation(name='timestamp', + recommended_value=None, level=OptionRecommendation.LOW, + help=_('Set the book timestamp (used by the date column in calibre).')), + ] input_fmt = os.path.splitext(self.input)[1] @@ -619,6 +628,14 @@ OptionRecommendation(name='language', except ValueError: self.log.warn(_('Values of series index and rating must' ' be numbers. Ignoring'), val) + continue + elif x in ('timestamp', 'pubdate'): + try: + val = parse_date(val, assume_utc=x=='pubdate') + except: + self.log.exception(_('Failed to parse date/time') + ' ' + + unicode(val)) + continue setattr(mi, x, val) diff --git a/src/calibre/ebooks/epub/input.py b/src/calibre/ebooks/epub/input.py index fd1e4733e1..cf903c0a5d 100644 --- a/src/calibre/ebooks/epub/input.py +++ b/src/calibre/ebooks/epub/input.py @@ -132,6 +132,8 @@ class EPUBInput(InputFormatPlugin): self.rationalize_cover(opf, log) + self.optimize_opf_parsing = opf + with open('content.opf', 'wb') as nopf: nopf.write(opf.render()) diff --git a/src/calibre/ebooks/epub/output.py b/src/calibre/ebooks/epub/output.py index 6e381d5237..5cc62e8260 100644 --- a/src/calibre/ebooks/epub/output.py +++ b/src/calibre/ebooks/epub/output.py @@ -256,7 +256,20 @@ class EPUBOutput(OutputFormatPlugin): Perform various markup transforms to get the output to render correctly in the quirky ADE. ''' - from calibre.ebooks.oeb.base import XPath, XHTML, OEB_STYLES, barename + from calibre.ebooks.oeb.base import XPath, XHTML, OEB_STYLES, barename, urlunquote + + # ADE cries big wet tears when it encounters an invalid fragment + # identifier in the NCX toc. + frag_pat = re.compile(r'[-A-Za-z0-9_:.]+$') + for node in self.oeb.toc.iter(): + href = getattr(node, 'href', None) + if hasattr(href, 'partition'): + base, _, frag = href.partition('#') + frag = urlunquote(frag) + if frag and frag_pat.match(frag) is None: + self.log.warn( + 'Removing invalid fragment identifier %r from TOC'%frag) + node.href = base for x in self.oeb.spine: root = x.data diff --git a/src/calibre/ebooks/lrf/pylrs/pylrs.py b/src/calibre/ebooks/lrf/pylrs/pylrs.py index 5e0c296807..53a768c073 100644 --- a/src/calibre/ebooks/lrf/pylrs/pylrs.py +++ b/src/calibre/ebooks/lrf/pylrs/pylrs.py @@ -50,6 +50,7 @@ from pylrf import (LrfWriter, LrfObject, LrfTag, LrfToc, STREAM_COMPRESSED, LrfTagStream, LrfStreamBase, IMAGE_TYPE_ENCODING, BINDING_DIRECTION_ENCODING, LINE_TYPE_ENCODING, LrfFileStream, STREAM_FORCE_COMPRESSED) +from calibre.utils.date import isoformat DEFAULT_SOURCE_ENCODING = "cp1252" # defualt is us-windows character set DEFAULT_GENREADING = "fs" # default is yes to both lrf and lrs @@ -852,7 +853,7 @@ class DocInfo(object): self.thumbnail = None self.language = "en" self.creator = None - self.creationdate = date.today().isoformat() + self.creationdate = str(isoformat(date.today())) self.producer = "%s v%s"%(__appname__, __version__) self.numberofpages = "0" diff --git a/src/calibre/ebooks/metadata/__init__.py b/src/calibre/ebooks/metadata/__init__.py index 3af486352a..f741d2201d 100644 --- a/src/calibre/ebooks/metadata/__init__.py +++ b/src/calibre/ebooks/metadata/__init__.py @@ -13,6 +13,7 @@ from urlparse import urlparse from calibre import relpath from calibre.utils.config import tweaks +from calibre.utils.date import isoformat _author_pat = re.compile(',?\s+(and|with)\s+', re.IGNORECASE) def string_to_authors(raw): @@ -344,9 +345,9 @@ class MetaInformation(object): if self.rating is not None: fmt('Rating', self.rating) if self.timestamp is not None: - fmt('Timestamp', self.timestamp.isoformat(' ')) + fmt('Timestamp', isoformat(self.timestamp)) if self.pubdate is not None: - fmt('Published', self.pubdate.isoformat(' ')) + fmt('Published', isoformat(self.pubdate)) if self.rights is not None: fmt('Rights', unicode(self.rights)) if self.lccn: diff --git a/src/calibre/ebooks/metadata/amazon.py b/src/calibre/ebooks/metadata/amazon.py index 616185d5a6..d1473be8f0 100644 --- a/src/calibre/ebooks/metadata/amazon.py +++ b/src/calibre/ebooks/metadata/amazon.py @@ -7,12 +7,11 @@ __docformat__ = 'restructuredtext en' Fetch metadata using Amazon AWS ''' import sys, re -from datetime import datetime from lxml import etree -from dateutil import parser from calibre import browser +from calibre.utils.date import parse_date, utcnow from calibre.ebooks.metadata import MetaInformation, string_to_authors AWS_NS = 'http://webservices.amazon.com/AWSECommerceService/2005-10-05' @@ -44,9 +43,8 @@ def get_social_metadata(title, authors, publisher, isbn): try: d = root.findtext('.//'+AWS('PublicationDate')) if d: - default = datetime.utcnow() - default = datetime(default.year, default.month, 15) - d = parser.parse(d[0].text, default=default) + default = utcnow().replace(day=15) + d = parse_date(d[0].text, assume_utc=True, default=default) mi.pubdate = d except: pass diff --git a/src/calibre/ebooks/metadata/cli.py b/src/calibre/ebooks/metadata/cli.py index 5de8b76c43..780d3febcf 100644 --- a/src/calibre/ebooks/metadata/cli.py +++ b/src/calibre/ebooks/metadata/cli.py @@ -15,6 +15,7 @@ from calibre.ebooks.metadata import string_to_authors, authors_to_sort_string, \ title_sort, MetaInformation from calibre.ebooks.lrf.meta import LRFMetaFile from calibre import prints +from calibre.utils.date import parse_date USAGE='%%prog ebook_file [' + _('options') + ']\n' + \ _(''' @@ -69,6 +70,8 @@ def config(): help=_('Set the book producer.')) c.add_opt('language', ['-l', '--language'], help=_('Set the language.')) + c.add_opt('pubdate', ['-d', '--date'], + help=_('Set the published date.')) c.add_opt('get_cover', ['--get-cover'], help=_('Get the cover from the ebook and save it at as the ' @@ -132,6 +135,8 @@ def do_set_metadata(opts, mi, stream, stream_type): mi.series = opts.series.strip() if getattr(opts, 'series_index', None) is not None: mi.series_index = float(opts.series_index.strip()) + if getattr(opts, 'pubdate', None) is not None: + mi.pubdate = parse_date(opts.pubdate, assume_utc=False, as_utc=False) if getattr(opts, 'cover', None) is not None: ext = os.path.splitext(opts.cover)[1].replace('.', '').upper() diff --git a/src/calibre/ebooks/metadata/epub.py b/src/calibre/ebooks/metadata/epub.py index 29f8978cbc..b063d3ef52 100644 --- a/src/calibre/ebooks/metadata/epub.py +++ b/src/calibre/ebooks/metadata/epub.py @@ -69,7 +69,7 @@ class OCFReader(OCF): self.opf_path = self.container[OPF.MIMETYPE] try: with closing(self.open(self.opf_path)) as f: - self.opf = OPF(f, self.root) + self.opf = OPF(f, self.root, populate_spine=False) except KeyError: raise EPubException("missing OPF package file") @@ -101,10 +101,9 @@ class OCFDirReader(OCFReader): def get_cover(opf, opf_path, stream): from calibre.ebooks import render_html_svg_workaround from calibre.utils.logging import default_log - spine = list(opf.spine_items()) - if not spine: + cpage = opf.first_spine_item() + if not cpage: return - cpage = spine[0] with TemporaryDirectory('_epub_meta') as tdir: with CurrentDir(tdir): stream.seek(0) diff --git a/src/calibre/ebooks/metadata/google_books.py b/src/calibre/ebooks/metadata/google_books.py index 2705e3554e..2087b7c489 100644 --- a/src/calibre/ebooks/metadata/google_books.py +++ b/src/calibre/ebooks/metadata/google_books.py @@ -6,14 +6,13 @@ __docformat__ = 'restructuredtext en' import sys, textwrap from urllib import urlencode from functools import partial -from datetime import datetime from lxml import etree -from dateutil import parser from calibre import browser, preferred_encoding from calibre.ebooks.metadata import MetaInformation from calibre.utils.config import OptionParser +from calibre.utils.date import parse_date, utcnow NAMESPACES = { 'openSearch':'http://a9.com/-/spec/opensearchrss/1.0/', @@ -156,9 +155,8 @@ class ResultList(list): try: d = date(entry) if d: - default = datetime.utcnow() - default = datetime(default.year, default.month, 15) - d = parser.parse(d[0].text, default=default) + default = utcnow().replace(day=15) + d = parse_date(d[0].text, assume_utc=True, default=default) else: d = None except: diff --git a/src/calibre/ebooks/metadata/mobi.py b/src/calibre/ebooks/metadata/mobi.py index a83ac2fb9b..1de73d7dd4 100644 --- a/src/calibre/ebooks/metadata/mobi.py +++ b/src/calibre/ebooks/metadata/mobi.py @@ -11,11 +11,11 @@ __docformat__ = 'restructuredtext en' from struct import pack, unpack from cStringIO import StringIO -from datetime import datetime from calibre.ebooks.mobi import MobiError from calibre.ebooks.mobi.writer import rescale_image, MAX_THUMB_DIMEN from calibre.ebooks.mobi.langcodes import iana2mobi +from calibre.utils.date import now as nowf class StreamSlicer(object): @@ -331,7 +331,7 @@ class MetadataUpdater(object): recs.append((106, self.timestamp)) pop_exth_record(106) else: - recs.append((106, str(datetime.now()).encode(self.codec, 'replace'))) + recs.append((106, nowf().isoformat().encode(self.codec, 'replace'))) pop_exth_record(106) if self.cover_record is not None: recs.append((201, pack('>I', self.cover_rindex))) diff --git a/src/calibre/ebooks/metadata/opf2.py b/src/calibre/ebooks/metadata/opf2.py index c2244fd892..5e57b0b515 100644 --- a/src/calibre/ebooks/metadata/opf2.py +++ b/src/calibre/ebooks/metadata/opf2.py @@ -12,12 +12,12 @@ from urllib import unquote from urlparse import urlparse from lxml import etree -from dateutil import parser from calibre.ebooks.chardet import xml_to_unicode from calibre.constants import __appname__, __version__, filesystem_encoding from calibre.ebooks.metadata.toc import TOC from calibre.ebooks.metadata import MetaInformation, string_to_authors +from calibre.utils.date import parse_date, isoformat class Resource(object): @@ -272,6 +272,10 @@ class Spine(ResourceCollection): self.id = idfunc(self.path) self.idref = None + def __repr__(self): + return 'Spine.Item(path=%r, id=%s, is_linear=%s)' % \ + (self.path, self.id, self.is_linear) + @staticmethod def from_opf_spine_element(itemrefs, manifest): s = Spine(manifest) @@ -280,7 +284,7 @@ class Spine(ResourceCollection): if idref is not None: path = s.manifest.path_for_id(idref) if path: - r = Spine.Item(s.manifest.id_for_path, path, is_path=True) + r = Spine.Item(lambda x:idref, path, is_path=True) r.is_linear = itemref.get('linear', 'yes') == 'yes' r.idref = idref s.append(r) @@ -441,6 +445,8 @@ class OPF(object): guide_path = XPath('descendant::*[re:match(name(), "guide", "i")]/*[re:match(name(), "reference", "i")]') title = MetadataField('title', formatter=lambda x: re.sub(r'\s+', ' ', x)) + title_sort = MetadataField('title_sort', formatter=lambda x: + re.sub(r'\s+', ' ', x), is_dc=False) publisher = MetadataField('publisher') language = MetadataField('language') comments = MetadataField('description') @@ -449,12 +455,14 @@ class OPF(object): series = MetadataField('series', is_dc=False) series_index = MetadataField('series_index', is_dc=False, formatter=float, none_is=1) rating = MetadataField('rating', is_dc=False, formatter=int) - pubdate = MetadataField('date', formatter=parser.parse) + pubdate = MetadataField('date', formatter=parse_date) publication_type = MetadataField('publication_type', is_dc=False) - timestamp = MetadataField('timestamp', is_dc=False, formatter=parser.parse) + timestamp = MetadataField('timestamp', is_dc=False, + formatter=parse_date) - def __init__(self, stream, basedir=os.getcwdu(), unquote_urls=True): + def __init__(self, stream, basedir=os.getcwdu(), unquote_urls=True, + populate_spine=True): if not hasattr(stream, 'read'): stream = open(stream, 'rb') raw = stream.read() @@ -477,7 +485,7 @@ class OPF(object): self.manifest = Manifest.from_opf_manifest_element(m, basedir) self.spine = None s = self.spine_path(self.root) - if s: + if populate_spine and s: self.spine = Spine.from_opf_spine_element(s, self.manifest) self.guide = None guide = self.guide_path(self.root) @@ -584,6 +592,15 @@ class OPF(object): if x.get('id', None) == idref: yield x.get('href', '') + def first_spine_item(self): + items = self.iterspine() + if not items: + return None + idref = items[0].get('idref', '') + for x in self.itermanifest(): + if x.get('id', None) == idref: + return x.get('href', None) + def create_spine_item(self, idref): ans = etree.Element('{%s}itemref'%self.NAMESPACES['opf'], idref=idref) ans.tail = '\n\t\t' @@ -675,29 +692,6 @@ class OPF(object): return property(fget=fget, fset=fset) - @dynamic_property - def title_sort(self): - - def fget(self): - matches = self.title_path(self.metadata) - if matches: - for match in matches: - ans = match.get('{%s}file-as'%self.NAMESPACES['opf'], None) - if not ans: - ans = match.get('file-as', None) - if ans: - return ans - - def fset(self, val): - matches = self.title_path(self.metadata) - if matches: - for key in matches[0].attrib: - if key.endswith('file-as'): - matches[0].attrib.pop(key) - matches[0].set('file-as', unicode(val)) - - return property(fget=fget, fset=fset) - @dynamic_property def tags(self): @@ -869,7 +863,8 @@ class OPF(object): def smart_update(self, mi): for attr in ('title', 'authors', 'author_sort', 'title_sort', 'publisher', 'series', 'series_index', 'rating', - 'isbn', 'language', 'tags', 'category', 'comments'): + 'isbn', 'language', 'tags', 'category', 'comments', + 'pubdate'): val = getattr(mi, attr, None) if val is not None and val != [] and val != (None, None): setattr(self, attr, val) @@ -1041,12 +1036,12 @@ def metadata_to_opf(mi, as_string=True): elem.text = text.strip() metadata.append(elem) - factory(DC('title'), mi.title, mi.title_sort) + factory(DC('title'), mi.title) for au in mi.authors: factory(DC('creator'), au, mi.author_sort, 'aut') factory(DC('contributor'), mi.book_producer, __appname__, 'bkp') if hasattr(mi.pubdate, 'isoformat'): - factory(DC('date'), mi.pubdate.isoformat()) + factory(DC('date'), isoformat(mi.pubdate)) factory(DC('language'), mi.language) if mi.category: factory(DC('type'), mi.category) @@ -1069,9 +1064,11 @@ def metadata_to_opf(mi, as_string=True): if mi.rating is not None: meta('rating', str(mi.rating)) if hasattr(mi.timestamp, 'isoformat'): - meta('timestamp', mi.timestamp.isoformat()) + meta('timestamp', isoformat(mi.timestamp)) if mi.publication_type: meta('publication_type', mi.publication_type) + if mi.title_sort: + meta('title_sort', mi.title_sort) metadata[-1].tail = '\n' +(' '*4) @@ -1088,12 +1085,12 @@ def metadata_to_opf(mi, as_string=True): def test_m2o(): - from datetime import datetime + from calibre.utils.date import now as nowf from cStringIO import StringIO mi = MetaInformation('test & title', ['a"1', "a'2"]) mi.title_sort = 'a\'"b' mi.author_sort = 'author sort' - mi.pubdate = datetime.now() + mi.pubdate = nowf() mi.language = 'en' mi.category = 'test' mi.comments = 'what a fun book\n\n' @@ -1103,7 +1100,7 @@ def test_m2o(): mi.series = 's"c\'l&<>' mi.series_index = 3.34 mi.rating = 3 - mi.timestamp = datetime.now() + mi.timestamp = nowf() mi.publication_type = 'ooooo' mi.rights = 'yes' mi.cover = 'asd.jpg' diff --git a/src/calibre/ebooks/mobi/reader.py b/src/calibre/ebooks/mobi/reader.py index 4aac84e599..88396b4346 100644 --- a/src/calibre/ebooks/mobi/reader.py +++ b/src/calibre/ebooks/mobi/reader.py @@ -4,13 +4,11 @@ __copyright__ = '2008, Kovid Goyal ' Read data from .mobi files ''' -import datetime import functools import os import re import struct import textwrap - import cStringIO try: @@ -23,6 +21,7 @@ from lxml import html, etree from calibre import entity_to_unicode, CurrentDir from calibre.utils.filenames import ascii_filename +from calibre.utils.date import parse_date from calibre.ptempfile import TemporaryDirectory from calibre.ebooks import DRMError from calibre.ebooks.chardet import ENCODING_PATS @@ -68,7 +67,10 @@ class EXTHHeader(object): pass elif id == 503: # Long title if not title or title == _('Unknown'): - title = content + try: + title = content.decode(codec) + except: + pass #else: # print 'unknown record', id, repr(content) if title: @@ -96,8 +98,7 @@ class EXTHHeader(object): self.mi.tags = list(set(self.mi.tags)) elif id == 106: try: - self.mi.publish_date = datetime.datetime.strptime( - content, '%Y-%m-%d', ).date() + self.mi.pubdate = parse_date(content, as_utc=False) except: pass elif id == 108: diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py index b885f08962..0d8eed4692 100644 --- a/src/calibre/ebooks/oeb/base.py +++ b/src/calibre/ebooks/oeb/base.py @@ -1578,14 +1578,17 @@ class TOC(object): parent = etree.Element(NCX('navMap')) for node in self.nodes: id = node.id or unicode(uuid.uuid4()) - attrib = {'id': id, 'playOrder': str(node.play_order)} + po = node.play_order + if po == 0: + po = 1 + attrib = {'id': id, 'playOrder': str(po)} if node.klass: attrib['class'] = node.klass point = element(parent, NCX('navPoint'), attrib=attrib) label = etree.SubElement(point, NCX('navLabel')) title = node.title if title: - title = re.sub(r'\s', ' ', title) + title = re.sub(r'\s+', ' ', title) element(label, NCX('text')).text = title element(point, NCX('content'), src=urlunquote(node.href)) node.to_ncx(point) diff --git a/src/calibre/ebooks/oeb/iterator.py b/src/calibre/ebooks/oeb/iterator.py index 8f8668b215..8959d62fac 100644 --- a/src/calibre/ebooks/oeb/iterator.py +++ b/src/calibre/ebooks/oeb/iterator.py @@ -184,8 +184,9 @@ class EbookIterator(object): if hasattr(self.pathtoopf, 'manifest'): self.pathtoopf = write_oebbook(self.pathtoopf, self.base) - - self.opf = OPF(self.pathtoopf, os.path.dirname(self.pathtoopf)) + self.opf = getattr(plumber.input_plugin, 'optimize_opf_parsing', None) + if self.opf is None: + self.opf = OPF(self.pathtoopf, os.path.dirname(self.pathtoopf)) self.language = self.opf.language if self.language: self.language = self.language.lower() diff --git a/src/calibre/ebooks/oeb/transforms/metadata.py b/src/calibre/ebooks/oeb/transforms/metadata.py index bb621c9412..97bad07a58 100644 --- a/src/calibre/ebooks/oeb/transforms/metadata.py +++ b/src/calibre/ebooks/oeb/transforms/metadata.py @@ -7,7 +7,7 @@ __copyright__ = '2009, Kovid Goyal ' __docformat__ = 'restructuredtext en' import os -from datetime import datetime +from calibre.utils.date import isoformat, now def meta_info_to_oeb_metadata(mi, m, log): from calibre.ebooks.oeb.base import OPF @@ -60,10 +60,10 @@ def meta_info_to_oeb_metadata(mi, m, log): m.add('subject', t) if mi.pubdate is not None: m.clear('date') - m.add('date', mi.pubdate.isoformat()) + m.add('date', isoformat(mi.pubdate)) if mi.timestamp is not None: m.clear('timestamp') - m.add('timestamp', mi.timestamp.isoformat()) + m.add('timestamp', isoformat(mi.timestamp)) if mi.rights is not None: m.clear('rights') m.add('rights', mi.rights) @@ -71,7 +71,7 @@ def meta_info_to_oeb_metadata(mi, m, log): m.clear('publication_type') m.add('publication_type', mi.publication_type) if not m.timestamp: - m.add('timestamp', datetime.now().isoformat()) + m.add('timestamp', isoformat(now())) class MergeMetadata(object): diff --git a/src/calibre/ebooks/pdf/reflow.py b/src/calibre/ebooks/pdf/reflow.py index a904ec3d8b..584d631d0b 100644 --- a/src/calibre/ebooks/pdf/reflow.py +++ b/src/calibre/ebooks/pdf/reflow.py @@ -6,7 +6,7 @@ __license__ = 'GPL v3' __copyright__ = '2009, Kovid Goyal ' __docformat__ = 'restructuredtext en' -import sys +import sys, os from lxml import etree @@ -47,6 +47,10 @@ class Image(Element): return '' % \ (self.src, int(self.width), int(self.height)) + def dump(self, f): + f.write(self.to_html()) + f.write('\n') + class Text(Element): @@ -91,6 +95,10 @@ class Text(Element): def to_html(self): return self.raw + def dump(self, f): + f.write(self.to_html().encode('utf-8')) + f.write('\n') + class FontSizeStats(dict): def __init__(self, stats): @@ -143,6 +151,14 @@ class Column(object): def add(self, elem): if elem in self.elements: return self.elements.append(elem) + self._post_add() + + def prepend(self, elem): + if elem in self.elements: return + self.elements.insert(0, elem) + self._post_add() + + def _post_add(self): self.elements.sort(cmp=lambda x,y:cmp(x.bottom,y.bottom)) self.top = self.elements[0].top self.bottom = self.elements[-1].bottom @@ -183,6 +199,11 @@ class Column(object): return None return self.elements[idx-1] + def dump(self, f, num): + f.write('******** Column %d\n\n'%num) + for elem in self.elements: + elem.dump(f) + class Box(list): @@ -282,7 +303,6 @@ class Region(object): mc = self.columns[0] return mc - print for c in singleton.columns: for elem in c: col = most_suitable_column(elem) @@ -308,16 +328,45 @@ class Region(object): self.absorb_region(region, at) def absorb_region(self, region, at): - src_iter = lambda x:x if at == 'bottom' else reversed - if len(region.columns) == len(self.columns): - for src, dest in zip(region.columns, self.columns): - for elem in src_iter(src): - if at == 'bottom': - dest.append(elem) - else: - dest.insert(0, elem) + if len(region.columns) <= len(self.columns): + for i in range(len(region.columns)): + src, dest = region.columns[i], self.columns[i] + if at != 'bottom': + src = reversed(list(iter(src))) + for elem in src: + func = dest.add if at == 'bottom' else dest.prepend + func(elem) + else: - pass + col_map = {} + for i, col in enumerate(region.columns): + max_overlap, max_overlap_index = 0, 0 + for j, dcol in enumerate(self.columns): + sint = Interval(col.left, col.right) + dint = Interval(dcol.left, dcol.right) + width = sint.intersection(dint).width + if width > max_overlap: + max_overlap = width + max_overlap_index = j + col_map[i] = max_overlap_index + lines = max(map(len, region.columns)) + if at == 'bottom': + lines = range(lines) + else: + lines = range(lines-1, -1, -1) + for i in lines: + for j, src in enumerate(region.columns): + dest = self.columns[col_map[j]] + if i < len(src): + func = dest.add if at == 'bottom' else dest.prepend + func(src.elements[i]) + + def dump(self, f): + f.write('############################################################\n') + f.write('########## Region (%d columns) ###############\n'%len(self.columns)) + f.write('############################################################\n\n') + for i, col in enumerate(self.columns): + col.dump(f, i) def linearize(self): self.elements = [] @@ -391,7 +440,8 @@ class Page(object): self.font_size_stats[t.font_size] = 0 self.font_size_stats[t.font_size] += len(t.text_as_string) self.average_text_height += t.height - self.average_text_height /= len(self.texts) + if len(self.texts): + self.average_text_height /= len(self.texts) self.font_size_stats = FontSizeStats(self.font_size_stats) @@ -446,7 +496,20 @@ class Page(object): if not current_region.is_empty: self.regions.append(current_region) + if self.opts.verbose > 2: + self.debug_dir = 'page-%d'%self.number + os.mkdir(self.debug_dir) + self.dump_regions('pre-coalesce') + self.coalesce_regions() + self.dump_regions('post-coalesce') + + def dump_regions(self, fname): + fname = 'regions-'+fname+'.txt' + with open(os.path.join(self.debug_dir, fname), 'wb') as f: + f.write('Page #%d\n\n'%self.number) + for region in self.regions: + region.dump(f) def coalesce_regions(self): # find contiguous sets of small regions @@ -455,19 +518,25 @@ class Page(object): # region) found = True absorbed = set([]) + processed = set([]) while found: found = False for i, region in enumerate(self.regions): - if region.is_small: + if region in absorbed: + continue + if region.is_small and region not in processed: found = True + processed.add(region) regions = [region] + end = i+1 for j in range(i+1, len(self.regions)): + end = j if self.regions[j].is_small: regions.append(self.regions[j]) else: break prev_region = None if i == 0 else i-1 - next_region = j if self.regions[j] not in regions else None + next_region = end if end < len(self.regions) and self.regions[end] not in regions else None absorb_at = 'bottom' if prev_region is None and next_region is not None: absorb_into = next_region @@ -476,29 +545,30 @@ class Page(object): absorb_into = prev_region elif prev_region is None and next_region is None: if len(regions) > 1: - absorb_into = regions[0] + absorb_into = i regions = regions[1:] else: absorb_into = None else: absorb_into = prev_region - if next_region.line_count >= prev_region.line_count: + if self.regions[next_region].line_count >= \ + self.regions[prev_region].line_count: avg_column_count = sum([len(r.columns) for r in regions])/float(len(regions)) - if next_region.line_count > prev_region.line_count \ - or abs(avg_column_count - len(prev_region.columns)) \ - > abs(avg_column_count - len(next_region.columns)): + if self.regions[next_region].line_count > \ + self.regions[prev_region].line_count \ + or abs(avg_column_count - + len(self.regions[prev_region].columns)) \ + > abs(avg_column_count - + len(self.regions[next_region].columns)): absorb_into = next_region absorb_at = 'top' if absorb_into is not None: - absorb_into.absorb_regions(regions, absorb_at) + self.regions[absorb_into].absorb_regions(regions, absorb_at) absorbed.update(regions) - i = j for region in absorbed: self.regions.remove(region) - - def sort_into_columns(self, elem, neighbors): neighbors.add(elem) neighbors = sorted(neighbors, cmp=lambda x,y:cmp(x.left, y.left)) @@ -617,8 +687,9 @@ class PDFDocument(object): for elem in self.elements: html.extend(elem.to_html()) html += ['', ''] + raw = (u'\n'.join(html)).replace('', '') with open('index.html', 'wb') as f: - f.write((u'\n'.join(html)).encode('utf-8')) + f.write(raw.encode('utf-8')) diff --git a/src/calibre/gui2/dialogs/metadata_single.py b/src/calibre/gui2/dialogs/metadata_single.py index 212445cba3..5d698f88f9 100644 --- a/src/calibre/gui2/dialogs/metadata_single.py +++ b/src/calibre/gui2/dialogs/metadata_single.py @@ -10,7 +10,6 @@ import os import re import time import traceback -from datetime import datetime, timedelta from PyQt4.Qt import SIGNAL, QObject, QCoreApplication, Qt, QTimer, QThread, QDate, \ QPixmap, QListWidgetItem, QDialog @@ -29,6 +28,7 @@ from calibre.ebooks.metadata.library_thing import cover_from_isbn from calibre import islinux from calibre.ebooks.metadata.meta import get_metadata from calibre.utils.config import prefs, tweaks +from calibre.utils.date import qt_to_dt from calibre.customize.ui import run_plugins_on_import, get_isbndb_key from calibre.gui2.dialogs.config.social import SocialMetadata @@ -354,12 +354,9 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog): self.comments.setPlainText(comments if comments else '') cover = self.db.cover(row) pubdate = db.pubdate(self.id, index_is_id=True) - self.local_timezone_offset = timedelta(seconds=time.timezone) - timedelta(hours=time.daylight) - pubdate = pubdate - self.local_timezone_offset self.pubdate.setDate(QDate(pubdate.year, pubdate.month, pubdate.day)) timestamp = db.timestamp(self.id, index_is_id=True) - timestamp = timestamp - self.local_timezone_offset self.date.setDate(QDate(timestamp.year, timestamp.month, timestamp.day)) @@ -583,7 +580,6 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog): if book.isbn: self.isbn.setText(book.isbn) if book.pubdate: d = book.pubdate - d = d - self.local_timezone_offset self.pubdate.setDate(QDate(d.year, d.month, d.day)) summ = book.comments if summ: @@ -656,12 +652,10 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog): self.db.set_series_index(self.id, self.series_index.value(), notify=False) self.db.set_comment(self.id, qstring_to_unicode(self.comments.toPlainText()), notify=False) d = self.pubdate.date() - d = datetime(d.year(), d.month(), d.day()) - d = d + self.local_timezone_offset + d = qt_to_dt(d) self.db.set_pubdate(self.id, d) d = self.date.date() - d = datetime(d.year(), d.month(), d.day()) - d = d + self.local_timezone_offset + d = qt_to_dt(d) self.db.set_timestamp(self.id, d) if self.cover_changed: diff --git a/src/calibre/gui2/dialogs/scheduler.py b/src/calibre/gui2/dialogs/scheduler.py index 8702322b7e..5aee71d7c6 100644 --- a/src/calibre/gui2/dialogs/scheduler.py +++ b/src/calibre/gui2/dialogs/scheduler.py @@ -7,7 +7,7 @@ __docformat__ = 'restructuredtext en' Scheduler for automated recipe downloads ''' -from datetime import datetime, timedelta +from datetime import timedelta from PyQt4.Qt import QDialog, SIGNAL, Qt, QTime, QObject, QMenu, \ QAction, QIcon, QMutex, QTimer @@ -17,6 +17,7 @@ from calibre.gui2.search_box import SearchBox2 from calibre.gui2 import config as gconf, error_dialog from calibre.web.feeds.recipes.model import RecipeModel from calibre.ptempfile import PersistentTemporaryFile +from calibre.utils.date import utcnow class SchedulerDialog(QDialog, Ui_Dialog): @@ -185,7 +186,7 @@ class SchedulerDialog(QDialog, Ui_Dialog): self.day.setCurrentIndex(day+1) self.time.setTime(QTime(hour, minute)) - d = datetime.utcnow() - last_downloaded + d = utcnow() - last_downloaded def hm(x): return (x-x%3600)//3600, (x%3600 - (x%3600)%60)//60 hours, minutes = hm(d.seconds) tm = _('%d days, %d hours and %d minutes ago')%(d.days, hours, minutes) diff --git a/src/calibre/gui2/library.py b/src/calibre/gui2/library.py index fe8eca8ead..6b3e80c955 100644 --- a/src/calibre/gui2/library.py +++ b/src/calibre/gui2/library.py @@ -1,8 +1,7 @@ from calibre.ebooks.metadata import authors_to_string __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal ' -import os, textwrap, traceback, time, re -from datetime import timedelta, datetime +import os, textwrap, traceback, re from operator import attrgetter from math import cos, sin, pi @@ -25,6 +24,7 @@ from calibre.utils.search_query_parser import SearchQueryParser from calibre.ebooks.metadata.meta import set_metadata as _set_metadata from calibre.ebooks.metadata import string_to_authors, fmt_sidx from calibre.utils.config import tweaks +from calibre.utils.date import dt_factory, qt_to_dt, isoformat class LibraryDelegate(QItemDelegate): COLOR = QColor("blue") @@ -567,13 +567,11 @@ class BooksModel(QAbstractTableModel): def timestamp(r): dt = self.db.data[r][tmdx] if dt: - dt = dt - timedelta(seconds=time.timezone) + timedelta(hours=time.daylight) return QDate(dt.year, dt.month, dt.day) def pubdate(r): dt = self.db.data[r][pddx] if dt: - dt = dt - timedelta(seconds=time.timezone) + timedelta(hours=time.daylight) return QDate(dt.year, dt.month, dt.day) def rating(r): @@ -670,13 +668,11 @@ class BooksModel(QAbstractTableModel): elif column == 'timestamp': if val.isNull() or not val.isValid(): return False - dt = datetime(val.year(), val.month(), val.day()) + timedelta(seconds=time.timezone) - timedelta(hours=time.daylight) - self.db.set_timestamp(id, dt) + self.db.set_timestamp(id, qt_to_dt(val, as_utc=False)) elif column == 'pubdate': if val.isNull() or not val.isValid(): return False - dt = datetime(val.year(), val.month(), val.day()) + timedelta(seconds=time.timezone) - timedelta(hours=time.daylight) - self.db.set_pubdate(id, dt) + self.db.set_pubdate(id, qt_to_dt(val, as_utc=False)) else: self.db.set(row, column, val) self.emit(SIGNAL("dataChanged(QModelIndex, QModelIndex)"), \ @@ -1032,7 +1028,8 @@ class DeviceBooksModel(BooksModel): def datecmp(x, y): x = self.db[x].datetime y = self.db[y].datetime - return cmp(datetime(*x[0:6]), datetime(*y[0:6])) + return cmp(dt_factory(x, assume_utc=True), dt_factory(y, + assume_utc=True)) def sizecmp(x, y): x, y = int(self.db[x].size), int(self.db[y].size) return cmp(x, y) @@ -1081,10 +1078,8 @@ class DeviceBooksModel(BooksModel): type = ext[1:].lower() data[_('Format')] = type data[_('Path')] = item.path - dt = item.datetime - dt = datetime(*dt[0:6]) - dt = dt - timedelta(seconds=time.timezone) + timedelta(hours=time.daylight) - data[_('Timestamp')] = strftime('%a %b %d %H:%M:%S %Y', dt.timetuple()) + dt = dt_factory(item.datetime, assume_utc=True) + data[_('Timestamp')] = isoformat(dt, sep=' ', as_utc=False) data[_('Tags')] = ', '.join(item.tags) self.emit(SIGNAL('new_bookdisplay_data(PyQt_PyObject)'), data) @@ -1119,8 +1114,7 @@ class DeviceBooksModel(BooksModel): return QVariant(BooksView.human_readable(size)) elif col == 3: dt = self.db[self.map[row]].datetime - dt = datetime(*dt[0:6]) - dt = dt - timedelta(seconds=time.timezone) + timedelta(hours=time.daylight) + dt = dt_factory(dt, assume_utc=True, as_utc=False) return QVariant(strftime(BooksView.TIME_FMT, dt.timetuple())) elif col == 4: tags = self.db[self.map[row]].tags diff --git a/src/calibre/gui2/lrf_renderer/document.py b/src/calibre/gui2/lrf_renderer/document.py index 13407d45bf..f0c53b12f9 100644 --- a/src/calibre/gui2/lrf_renderer/document.py +++ b/src/calibre/gui2/lrf_renderer/document.py @@ -79,6 +79,8 @@ class _Canvas(QGraphicsRectItem): pen = QPen() pen.setStyle(Qt.NoPen) self.setPen(pen) + if not hasattr(self, 'children'): + self.children = self.childItems def layout_block(self, block, x, y): if isinstance(block, TextBlock): diff --git a/src/calibre/gui2/lrf_renderer/text.py b/src/calibre/gui2/lrf_renderer/text.py index 05e87532f4..b6a2788353 100644 --- a/src/calibre/gui2/lrf_renderer/text.py +++ b/src/calibre/gui2/lrf_renderer/text.py @@ -358,6 +358,8 @@ class Line(QGraphicsItem): self.links = collections.deque() self.current_link = None self.valign = None + if not hasattr(self, 'children'): + self.children = self.childItems def start_link(self, refobj, slot): self.current_link = [self.current_width, sys.maxint, refobj, slot] @@ -481,10 +483,9 @@ class Line(QGraphicsItem): painter.restore() painter.save() painter.setPen(QPen(Qt.NoPen)) - if hasattr(self, 'children'): - for c in self.children(): - painter.setBrush(c.brush) - painter.drawRect(c.boundingRect()) + for c in self.children(): + painter.setBrush(c.brush) + painter.drawRect(c.boundingRect()) painter.restore() painter.save() for tok in self.tokens: diff --git a/src/calibre/gui2/tools.py b/src/calibre/gui2/tools.py index 6525000781..90686f7d86 100644 --- a/src/calibre/gui2/tools.py +++ b/src/calibre/gui2/tools.py @@ -252,17 +252,20 @@ def generate_catalog(parent, dbspec, ids, device): # Parallel initialization in calibre.library.cli:command_catalog() connected_device = { 'storage':None,'serial':None,'name':None} if device: - storage = [] - if device._main_prefix: - storage.append(os.path.join(device._main_prefix, device.EBOOK_DIR_MAIN)) - if device._card_a_prefix: - storage.append(os.path.join(device._card_a_prefix, device.EBOOK_DIR_CARD_A)) - if device._card_b_prefix: - storage.append(os.path.join(device._card_b_prefix, device.EBOOK_DIR_CARD_B)) - connected_device = {'storage': storage, - 'serial': device.detected_device.serial if \ - hasattr(device.detected_device,'serial') else None, - 'name': device.gui_name} + try: + storage = [] + if device._main_prefix: + storage.append(os.path.join(device._main_prefix, device.EBOOK_DIR_MAIN)) + if device._card_a_prefix: + storage.append(os.path.join(device._card_a_prefix, device.EBOOK_DIR_CARD_A)) + if device._card_b_prefix: + storage.append(os.path.join(device._card_b_prefix, device.EBOOK_DIR_CARD_B)) + connected_device = {'storage': storage, + 'serial': device.detected_device.serial if \ + hasattr(device.detected_device,'serial') else None, + 'name': device.gui_name} + except: + pass # These args are passed inline to gui2.convert.gui_conversion:gui_catalog args = [ diff --git a/src/calibre/library/catalog.py b/src/calibre/library/catalog.py index d48bb3d949..b7ece0bc78 100644 --- a/src/calibre/library/catalog.py +++ b/src/calibre/library/catalog.py @@ -11,6 +11,7 @@ from calibre.customize.conversion import OptionRecommendation, DummyReporter from calibre.ebooks.BeautifulSoup import BeautifulSoup, BeautifulStoneSoup, Tag, NavigableString from calibre.ptempfile import PersistentTemporaryDirectory from calibre.utils.logging import Log +from calibre.utils.date import isoformat FIELDS = ['all', 'author_sort', 'authors', 'comments', 'cover', 'formats', 'id', 'isbn', 'pubdate', 'publisher', 'rating', @@ -103,7 +104,9 @@ class CSV_XML(CatalogPlugin): item = ', '.join(item) elif field == 'isbn': # Could be 9, 10 or 13 digits - field = u'%s' % re.sub(r'[\D]','',field) + item = u'%s' % re.sub(r'[\D]', '', item) + elif field in ['pubdate', 'timestamp']: + item = isoformat(item) if x < len(fields) - 1: if item is not None: @@ -164,12 +167,12 @@ class CSV_XML(CatalogPlugin): if 'date' in fields: record_child = etree.SubElement(record, 'date') record_child.set(PY + "if", "record['date']") - record_child.text = "${record['date']}" + record_child.text = "${record['date'].isoformat()}" if 'pubdate' in fields: record_child = etree.SubElement(record, 'pubdate') record_child.set(PY + "if", "record['pubdate']") - record_child.text = "${record['pubdate']}" + record_child.text = "${record['pubdate'].isoformat()}" if 'size' in fields: record_child = etree.SubElement(record, 'size') diff --git a/src/calibre/library/cli.py b/src/calibre/library/cli.py index 9dfa2c17a6..f474590f7d 100644 --- a/src/calibre/library/cli.py +++ b/src/calibre/library/cli.py @@ -17,6 +17,7 @@ from calibre.ebooks.metadata.meta import get_metadata from calibre.library.database2 import LibraryDatabase2 from calibre.ebooks.metadata.opf2 import OPFCreator, OPF from calibre.utils.genshi.template import MarkupTemplate +from calibre.utils.date import isoformat FIELDS = set(['title', 'authors', 'author_sort', 'publisher', 'rating', 'timestamp', 'size', 'tags', 'comments', 'series', 'series_index', @@ -37,8 +38,8 @@ XML_TEMPLATE = '''\ ${record['publisher']} ${record['rating']} - ${record['timestamp']} - ${record['pubdate']} + ${record['timestamp'].isoformat()} + ${record['pubdate'].isoformat()} ${record['size']} @@ -68,7 +69,7 @@ STANZA_TEMPLATE='''\ http://calibre-ebook.com $id - ${updated.strftime('%Y-%m-%dT%H:%M:%SZ')} + ${updated.isoformat()} ${subtitle} @@ -77,7 +78,7 @@ STANZA_TEMPLATE='''\ ${record['title']} urn:calibre:${record['uuid']} ${record['author_sort']} - ${record['timestamp'].strftime('%Y-%m-%dT%H:%M:%SZ')} + ${record['timestamp'].isoformat()} @@ -144,7 +145,10 @@ def do_list(db, fields, sort_by, ascending, search_text, line_width, separator, widths = list(map(lambda x : 0, fields)) for record in data: for f in record.keys(): - record[f] = unicode(record[f]) + if hasattr(record[f], 'isoformat'): + record[f] = isoformat(record[f], as_utc=False) + else: + record[f] = unicode(record[f]) record[f] = record[f].replace('\n', ' ') for i in data: for j, field in enumerate(fields): diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py index d1a0c24cef..ed902c8ea4 100644 --- a/src/calibre/library/database2.py +++ b/src/calibre/library/database2.py @@ -9,7 +9,6 @@ The database used to store ebook metadata import os, re, sys, shutil, cStringIO, glob, collections, textwrap, \ itertools, functools, traceback from itertools import repeat -from datetime import datetime from math import floor from PyQt4.QtCore import QThread, QReadWriteLock @@ -34,6 +33,7 @@ from calibre.ptempfile import PersistentTemporaryFile from calibre.customize.ui import run_plugins_on_import from calibre.utils.filenames import ascii_filename +from calibre.utils.date import utcnow, now as nowf, utcfromtimestamp from calibre.ebooks import BOOK_EXTENSIONS if iswindows: @@ -241,6 +241,7 @@ class ResultCache(SearchQueryParser): for x in all: MAP[x] = FIELD_MAP[x] EXCLUDE_FIELDS = [MAP['rating'], MAP['cover']] + SPLITABLE_FIELDS = [MAP['authors'], MAP['tags'], MAP['formats']] location = [location] if location != 'all' else list(MAP.keys()) for i, loc in enumerate(location): location[i] = MAP[loc] @@ -275,14 +276,14 @@ class ResultCache(SearchQueryParser): matches.add(item[0]) continue if loc not in EXCLUDE_FIELDS: - if loc == MAP['tags'] or loc == MAP['authors']: - vals = item[loc].split(',') ### check individual tags/authors, not the long string + if loc in SPLITABLE_FIELDS: + vals = item[loc].split(',') ### check individual tags/authors/formats, not the long string else: vals = [item[loc]] ### make into list to make _match happy if _match(q, vals, matchkind): matches.add(item[0]) continue - return matches + return matches def remove(self, id): self._data[id] = None @@ -714,12 +715,12 @@ class LibraryDatabase2(LibraryDatabase): def last_modified(self): ''' Return last modified time as a UTC datetime object''' - return datetime.utcfromtimestamp(os.stat(self.dbpath).st_mtime) + return utcfromtimestamp(os.stat(self.dbpath).st_mtime) def check_if_modified(self): if self.last_modified() > self.last_update_check: self.refresh() - self.last_update_check = datetime.utcnow() + self.last_update_check = utcnow() def path(self, index, index_is_id=False): 'Return the relative path to the directory containing this books files as a unicode string.' @@ -1122,7 +1123,7 @@ class LibraryDatabase2(LibraryDatabase): def tags_older_than(self, tag, delta): tag = tag.lower().strip() - now = datetime.now() + now = nowf() for r in self.data._data: if r is not None: if (now - r[FIELD_MAP['timestamp']]) > delta: @@ -1483,7 +1484,7 @@ class LibraryDatabase2(LibraryDatabase): stream.close() self.conn.commit() if existing: - t = datetime.utcnow() + t = utcnow() self.set_timestamp(db_id, t, notify=False) self.set_pubdate(db_id, t, notify=False) self.data.refresh_ids(self, [db_id]) # Needed to update format list and size diff --git a/src/calibre/library/server.py b/src/calibre/library/server.py index a1c8aec0bd..50c5ccafbd 100644 --- a/src/calibre/library/server.py +++ b/src/calibre/library/server.py @@ -11,7 +11,6 @@ import sys, textwrap, operator, os, re, logging, cStringIO import __builtin__ from itertools import repeat from logging.handlers import RotatingFileHandler -from datetime import datetime from threading import Thread import cherrypy @@ -31,15 +30,16 @@ from calibre.utils.config import config_dir from calibre.utils.mdns import publish as publish_zeroconf, \ stop_server as stop_zeroconf from calibre.ebooks.metadata import fmt_sidx, title_sort +from calibre.utils.date import now as nowf, fromtimestamp def strftime(fmt='%Y/%m/%d %H:%M:%S', dt=None): if not hasattr(dt, 'timetuple'): - dt = datetime.now() + dt = nowf() dt = dt.timetuple() try: return _strftime(fmt, dt) except: - return _strftime(fmt, datetime.now().timetuple()) + return _strftime(fmt, nowf().timetuple()) def expose(func): @@ -351,7 +351,7 @@ class LibraryServer(object): map(int, self.opts.max_cover.split('x')) self.max_stanza_items = opts.max_opds_items path = P('content_server') - self.build_time = datetime.fromtimestamp(os.stat(path).st_mtime) + self.build_time = fromtimestamp(os.stat(path).st_mtime) self.default_cover = open(P('content_server/default_cover.jpg'), 'rb').read() cherrypy.config.update({ @@ -429,7 +429,7 @@ class LibraryServer(object): cherrypy.response.headers['Content-Type'] = 'image/jpeg' cherrypy.response.timeout = 3600 path = getattr(cover, 'name', False) - updated = datetime.utcfromtimestamp(os.stat(path).st_mtime) if path and \ + updated = fromtimestamp(os.stat(path).st_mtime) if path and \ os.access(path, os.R_OK) else self.build_time cherrypy.response.headers['Last-Modified'] = self.last_modified(updated) try: @@ -476,7 +476,7 @@ class LibraryServer(object): cherrypy.response.timeout = 3600 path = getattr(fmt, 'name', None) if path and os.path.exists(path): - updated = datetime.utcfromtimestamp(os.stat(path).st_mtime) + updated = fromtimestamp(os.stat(path).st_mtime) cherrypy.response.headers['Last-Modified'] = self.last_modified(updated) return fmt.read() @@ -517,8 +517,8 @@ class LibraryServer(object): def get_matches(self, location, query): base = self.db.data.get_matches(location, query) - epub = self.db.data.get_matches('format', 'epub') - pdb = self.db.data.get_matches('format', 'pdb') + epub = self.db.data.get_matches('format', '=epub') + pdb = self.db.data.get_matches('format', '=pdb') return base.intersection(epub.union(pdb)) def stanza_sortby_subcategory(self, updated, sortby, offset): @@ -540,15 +540,15 @@ class LibraryServer(object): what, subtitle = sortby[2:], '' if sortby == 'byseries': data = self.db.all_series() - data = [(x[0], x[1], len(self.get_matches('series', x[1]))) for x in data] + data = [(x[0], x[1], len(self.get_matches('series', '='+x[1]))) for x in data] subtitle = 'Books by series' elif sortby == 'byauthor': data = self.db.all_authors() - data = [(x[0], x[1], len(self.get_matches('authors', x[1]))) for x in data] + data = [(x[0], x[1], len(self.get_matches('authors', '='+x[1]))) for x in data] subtitle = 'Books by author' elif sortby == 'bytag': data = self.db.all_tags2() - data = [(x[0], x[1], len(self.get_matches('tags', x[1]))) for x in data] + data = [(x[0], x[1], len(self.get_matches('tags', '='+x[1]))) for x in data] subtitle = 'Books by tag' fcmp = author_cmp if sortby == 'byauthor' else cmp data = [x for x in data if x[2] > 0] @@ -841,7 +841,7 @@ class LibraryServer(object): if not os.path.exists(path): raise cherrypy.HTTPError(404, '%s not found'%name) if self.opts.develop: - lm = datetime.fromtimestamp(os.stat(path).st_mtime) + lm = fromtimestamp(os.stat(path).st_mtime) cherrypy.response.headers['Last-Modified'] = self.last_modified(lm) return open(path, 'rb').read() diff --git a/src/calibre/library/sqlite.py b/src/calibre/library/sqlite.py index ae9ab181f2..498d00005a 100644 --- a/src/calibre/library/sqlite.py +++ b/src/calibre/library/sqlite.py @@ -12,46 +12,18 @@ from sqlite3 import IntegrityError, OperationalError from threading import Thread from Queue import Queue from threading import RLock -from datetime import tzinfo, datetime, timedelta +from datetime import datetime from calibre.ebooks.metadata import title_sort +from calibre.utils.date import parse_date, isoformat global_lock = RLock() def convert_timestamp(val): - datepart, timepart = val.split(' ') - tz, mult = None, 1 - x = timepart.split('+') - if len(x) > 1: - timepart, tz = x - else: - x = timepart.split('-') - if len(x) > 1: - timepart, tz = x - mult = -1 - - year, month, day = map(int, datepart.split("-")) - timepart_full = timepart.split(".") - hours, minutes, seconds = map(int, timepart_full[0].split(":")) - if len(timepart_full) == 2: - microseconds = int(timepart_full[1]) - else: - microseconds = 0 - if tz is not None: - h, m = map(int, tz.split(':')) - delta = timedelta(minutes=mult*(60*h + m)) - tz = type('CustomTZ', (tzinfo,), {'utcoffset':lambda self, dt:delta, - 'dst':lambda self,dt:timedelta(0)})() - - val = datetime(year, month, day, hours, minutes, seconds, microseconds, - tzinfo=tz) - if tz is not None: - val = datetime(*(val.utctimetuple()[:6])) - return val + return parse_date(val, as_utc=False) def adapt_datetime(dt): - dt = datetime(*(dt.utctimetuple()[:6])) - return dt.isoformat(' ') + return isoformat(dt) sqlite.register_adapter(datetime, adapt_datetime) sqlite.register_converter('timestamp', convert_timestamp) diff --git a/src/calibre/manual/faq.rst b/src/calibre/manual/faq.rst index a3c5bd32c4..bafc13f388 100644 --- a/src/calibre/manual/faq.rst +++ b/src/calibre/manual/faq.rst @@ -146,6 +146,8 @@ Now you should be able to access your books on your iPhone by opening Stanza. Go Replace ``192.168.1.2`` with the local IP address of the computer running |app|. If you have changed the port the |app| content server is running on, you will have to change ``8080`` as well to the new port. The local IP address is the IP address you computer is assigned on your home network. A quick Google search will tell you how to find out your local IP address. Now click "Save" and you are done. +If you get timeout errors while browsing the calibre catalog in Stanza, try increasing the connection timeout value in the stanza settings. Go to Info->Settings and increase the value of Download Timeout. + How do I use |app| with my Android phone? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/src/calibre/utils/date.py b/src/calibre/utils/date.py new file mode 100644 index 0000000000..8b26ab02bc --- /dev/null +++ b/src/calibre/utils/date.py @@ -0,0 +1,87 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai +from __future__ import with_statement + +__license__ = 'GPL v3' +__copyright__ = '2010, Kovid Goyal ' +__docformat__ = 'restructuredtext en' + +from datetime import datetime + +from dateutil.parser import parse +from dateutil.tz import tzlocal, tzutc + +utc_tz = _utc_tz = tzutc() +local_tz = _local_tz = tzlocal() + +def parse_date(date_string, assume_utc=False, as_utc=True, default=None): + ''' + Parse a date/time string into a timezone aware datetime object. The timezone + is always either UTC or the local timezone. + + :param assume_utc: If True and date_string does not specify a timezone, + assume UTC, otherwise assume local timezone. + + :param as_utc: If True, return a UTC datetime + + :param default: Missing fields are filled in from default. If None, the + current date is used. + ''' + if default is None: + func = datetime.utcnow if assume_utc else datetime.now + default = func().replace(hour=0, minute=0, second=0, microsecond=0, + tzinfo=_utc_tz if assume_utc else _local_tz) + dt = parse(date_string, default=default) + if dt.tzinfo is None: + dt = dt.replace(tzinfo=_utc_tz if assume_utc else _local_tz) + return dt.astimezone(_utc_tz if as_utc else _local_tz) + +def strptime(val, fmt, assume_utc=False, as_utc=True): + dt = datetime.strptime(val, fmt) + if dt.tzinfo is None: + dt = dt.replace(tzinfo=_utc_tz if assume_utc else _local_tz) + return dt.astimezone(_utc_tz if as_utc else _local_tz) + +def dt_factory(time_t, assume_utc=False, as_utc=True): + dt = datetime(*(time_t[0:6])) + if dt.tzinfo is None: + dt = dt.replace(tzinfo=_utc_tz if assume_utc else _local_tz) + return dt.astimezone(_utc_tz if as_utc else _local_tz) + +def qt_to_dt(qdate_or_qdatetime, as_utc=True): + from PyQt4.Qt import Qt + o = qdate_or_qdatetime + if hasattr(o, 'toUTC'): + # QDateTime + o = unicode(o.toUTC().toString(Qt.ISODate)) + return parse_date(o, assume_utc=True, as_utc=as_utc) + dt = datetime(o.year(), o.month(), o.day()).replace(tzinfo=_local_tz) + return dt.astimezone(_utc_tz if as_utc else _local_tz) + +def fromtimestamp(ctime, as_utc=True): + dt = datetime.utcfromtimestamp().replace(tzinfo=_utc_tz) + if not as_utc: + dt = dt.astimezone(_local_tz) + return dt + +def fromordinal(day, as_utc=True): + return datetime.fromordinal(day).replace( + tzinfo=_utc_tz if as_utc else _local_tz) + +def isoformat(date_time, assume_utc=False, as_utc=True, sep='T'): + if not hasattr(date_time, 'tzinfo'): + return unicode(date_time.isoformat()) + if date_time.tzinfo is None: + date_time = date_time.replace(tzinfo=_utc_tz if assume_utc else + _local_tz) + date_time = date_time.astimezone(_utc_tz if as_utc else _local_tz) + return unicode(date_time.isoformat(sep)) + +def now(): + return datetime.now().replace(tzinfo=_local_tz) + +def utcnow(): + return datetime.utcnow().replace(tzinfo=_utc_tz) + +def utcfromtimestamp(stamp): + return datetime.utcfromtimestamp(stamp).replace(tzinfo=_utc_tz) diff --git a/src/calibre/utils/mdns.py b/src/calibre/utils/mdns.py index 033b903e11..74547b9573 100644 --- a/src/calibre/utils/mdns.py +++ b/src/calibre/utils/mdns.py @@ -3,46 +3,66 @@ __license__ = 'GPL 3' __copyright__ = '2009, Kovid Goyal ' __docformat__ = 'restructuredtext en' -import socket +import socket, time, atexit _server = None -def get_external_ip(): +def _get_external_ip(): 'Get IP address of interface used to connect to the outside world' try: ipaddr = socket.gethostbyname(socket.gethostname()) except: ipaddr = '127.0.0.1' if ipaddr == '127.0.0.1': - try: - s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) - s.connect(('google.com', 0)) - ipaddr = s.getsockname()[0] - except: - pass + for addr in ('192.0.2.0', '198.51.100.0', 'google.com'): + try: + s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + s.connect((addr, 0)) + ipaddr = s.getsockname()[0] + if ipaddr != '127.0.0.1': + return ipaddr + except: + time.sleep(0.3) return ipaddr +_ext_ip = None +def get_external_ip(): + global _ext_ip + if _ext_ip is None: + _ext_ip = _get_external_ip() + return _ext_ip + def start_server(): global _server if _server is None: from calibre.utils.Zeroconf import Zeroconf - _server = Zeroconf() + try: + _server = Zeroconf() + except: + time.sleep(0.2) + _server = Zeroconf() + + atexit.register(stop_server) + return _server def publish(desc, type, port, properties=None, add_hostname=True): ''' Publish a service. - + :param desc: Description of service :param type: Name and type of service. For example _stanza._tcp :param port: Port the service listens on - :param properties: An optional dictionary whose keys and values will be put - into the TXT record. + :param properties: An optional dictionary whose keys and values will be put + into the TXT record. ''' port = int(port) server = start_server() - hostname = socket.gethostname().partition('.')[0] if add_hostname: + try: + hostname = socket.gethostname().partition('.')[0] + except: + hostname = 'Unknown' desc += ' (on %s)'%hostname local_ip = get_external_ip() type = type+'.local.' @@ -53,8 +73,11 @@ def publish(desc, type, port, properties=None, add_hostname=True): properties=properties, server=hostname+'.local.') server.registerService(service) - + def stop_server(): global _server if _server is not None: - _server.close() + try: + _server.close() + finally: + _server = None diff --git a/src/calibre/utils/zipfile.py b/src/calibre/utils/zipfile.py index 6bf6f2f83c..9943119968 100644 --- a/src/calibre/utils/zipfile.py +++ b/src/calibre/utils/zipfile.py @@ -6,8 +6,8 @@ from __future__ import with_statement import struct, os, time, sys, shutil import binascii, cStringIO from contextlib import closing +from tempfile import SpooledTemporaryFile -from calibre.ptempfile import TemporaryDirectory from calibre import sanitize_file_name from calibre.constants import filesystem_encoding from calibre.ebooks.chardet import detect @@ -467,6 +467,7 @@ class ZipExtFile: def __init__(self, fileobj, zipinfo, decrypt=None): self.fileobj = fileobj + self.orig_pos = fileobj.tell() self.decrypter = decrypt self.bytes_read = 0L self.rawbuffer = '' @@ -582,6 +583,20 @@ class ZipExtFile: result.append(line) return result + def read_raw(self): + pos = self.fileobj.tell() + self.fileobj.seek(self.orig_pos) + bytes_to_read = self.compress_size + if self.decrypter is not None: + bytes_to_read -= 12 + raw = b'' + + if bytes_to_read > 0: + raw = self.fileobj.read(bytes_to_read) + self.fileobj.seek(pos) + return raw + + def read(self, size = None): # act like file() obj and return empty string if size is 0 if size == 0: @@ -925,6 +940,11 @@ class ZipFile: """Return file bytes (as a string) for name.""" return self.open(name, "r", pwd).read() + def read_raw(self, name, mode="r", pwd=None): + """Return the raw bytes in the zipfile corresponding to name.""" + zef = self.open(name, mode=mode, pwd=pwd) + return zef.read_raw() + def open(self, name, mode="r", pwd=None): """Return file-like object for 'name'.""" if mode not in ("r", "U", "rU"): @@ -1159,10 +1179,13 @@ class ZipFile: self.filelist.append(zinfo) self.NameToInfo[zinfo.filename] = zinfo - def writestr(self, zinfo_or_arcname, bytes, permissions=0600, compression=ZIP_DEFLATED): + def writestr(self, zinfo_or_arcname, bytes, permissions=0600, + compression=ZIP_DEFLATED, raw_bytes=False): """Write a file into the archive. The contents is the string 'bytes'. 'zinfo_or_arcname' is either a ZipInfo instance or the name of the file in the archive.""" + assert not raw_bytes or (raw_bytes and + isinstance(zinfo_or_arcname, ZipInfo)) if not isinstance(zinfo_or_arcname, ZipInfo): if isinstance(zinfo_or_arcname, unicode): zinfo_or_arcname = zinfo_or_arcname.encode('utf-8') @@ -1177,18 +1200,20 @@ class ZipFile: raise RuntimeError( "Attempt to write to ZIP archive that was already closed") - zinfo.file_size = len(bytes) # Uncompressed size + if not raw_bytes: + zinfo.file_size = len(bytes) # Uncompressed size zinfo.header_offset = self.fp.tell() # Start of header bytes self._writecheck(zinfo) self._didModify = True - zinfo.CRC = crc32(bytes) & 0xffffffff # CRC-32 checksum - if zinfo.compress_type == ZIP_DEFLATED: - co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, - zlib.DEFLATED, -15) - bytes = co.compress(bytes) + co.flush() - zinfo.compress_size = len(bytes) # Compressed size - else: - zinfo.compress_size = zinfo.file_size + if not raw_bytes: + zinfo.CRC = crc32(bytes) & 0xffffffff # CRC-32 checksum + if zinfo.compress_type == ZIP_DEFLATED: + co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, + zlib.DEFLATED, -15) + bytes = co.compress(bytes) + co.flush() + zinfo.compress_size = len(bytes) # Compressed size + else: + zinfo.compress_size = zinfo.file_size zinfo.header_offset = self.fp.tell() # Start of header bytes self.fp.write(zinfo.FileHeader()) self.fp.write(bytes) @@ -1332,7 +1357,7 @@ class ZipFile: def safe_replace(zipstream, name, datastream): ''' Replace a file in a zip file in a safe manner. This proceeds by extracting - and re-creating the zipfile. This is neccessary because :method:`ZipFile.replace` + and re-creating the zipfile. This is necessary because :method:`ZipFile.replace` sometimes created corrupted zip files. :param zipstream: Stream from a zip file @@ -1340,21 +1365,20 @@ def safe_replace(zipstream, name, datastream): :param datastream: The data to replace the file with. ''' z = ZipFile(zipstream, 'r') - names = z.infolist() - with TemporaryDirectory('_zipfile_replace') as tdir: - z.extractall(path=tdir) - mapping = z.extract_mapping - path = os.path.join(tdir, *name.split('/')) - shutil.copyfileobj(datastream, open(path, 'wb')) + with SpooledTemporaryFile(max_size=100*1024*1024) as temp: + ztemp = ZipFile(temp, 'w') + for obj in z.infolist(): + if obj.filename == name: + ztemp.writestr(obj, datastream.read()) + else: + ztemp.writestr(obj, z.read_raw(obj), raw_bytes=True) + ztemp.close() + z.close() + temp.seek(0) zipstream.seek(0) zipstream.truncate() - with closing(ZipFile(zipstream, 'w')) as z: - for info in names: - current = mapping[info.filename] - if os.path.isdir(current): - z.writestr(info.filename+'/', '', 0700) - else: - z.write(current, info.filename, compress_type=info.compress_type) + shutil.copyfileobj(temp, zipstream) + zipstream.flush() class PyZipFile(ZipFile): """Class to create ZIP archives with Python library files and packages.""" diff --git a/src/calibre/web/feeds/__init__.py b/src/calibre/web/feeds/__init__.py index 886a825846..bf2c72be1a 100644 --- a/src/calibre/web/feeds/__init__.py +++ b/src/calibre/web/feeds/__init__.py @@ -6,17 +6,16 @@ __copyright__ = '2008, Kovid Goyal ' Contains the logic for parsing feeds. ''' import time, traceback, copy, re -from datetime import datetime + +from lxml import html from calibre.web.feeds.feedparser import parse from calibre.utils.logging import default_log from calibre import entity_to_unicode -from lxml import html +from calibre.utils.date import dt_factory, utcnow, local_tz class Article(object): - time_offset = datetime.now() - datetime.utcnow() - def __init__(self, id, title, url, author, summary, published, content): self.downloaded = False self.id = id @@ -48,8 +47,8 @@ class Article(object): self.author = author self.content = content self.date = published - self.utctime = datetime(*self.date[:6]) - self.localtime = self.utctime + self.time_offset + self.utctime = dt_factory(self.date, assume_utc=True, as_utc=True) + self.localtime = self.utctime.astimezone(local_tz) @dynamic_property def title(self): @@ -146,7 +145,7 @@ class Feed(object): content = item.get('content', '') author = item.get('author', '') article = Article(id, title, link, author, description, published, content) - delta = datetime.utcnow() - article.utctime + delta = utcnow() - article.utctime if delta.days*24*3600 + delta.seconds <= 24*3600*self.oldest_article: self.articles.append(article) else: @@ -183,7 +182,7 @@ class Feed(object): if not link and not content: return article = Article(id, title, link, author, description, published, content) - delta = datetime.utcnow() - article.utctime + delta = utcnow() - article.utctime if delta.days*24*3600 + delta.seconds <= 24*3600*self.oldest_article: self.articles.append(article) else: diff --git a/src/calibre/web/feeds/news.py b/src/calibre/web/feeds/news.py index 540f7cd93a..6e9c72de26 100644 --- a/src/calibre/web/feeds/news.py +++ b/src/calibre/web/feeds/news.py @@ -11,7 +11,6 @@ import os, time, traceback, re, urlparse, sys from collections import defaultdict from functools import partial from contextlib import nested, closing -from datetime import datetime from calibre import browser, __appname__, iswindows, \ @@ -29,7 +28,7 @@ from calibre.web.fetch.simple import RecursiveFetcher from calibre.utils.threadpool import WorkRequest, ThreadPool, NoResultsPending from calibre.ptempfile import PersistentTemporaryFile, \ PersistentTemporaryDirectory - +from calibre.utils.date import now as nowf class BasicNewsRecipe(Recipe): ''' @@ -1080,11 +1079,11 @@ class BasicNewsRecipe(Recipe): mi.publisher = __appname__ mi.author_sort = __appname__ mi.publication_type = 'periodical:'+self.publication_type - mi.timestamp = datetime.now() + mi.timestamp = nowf() mi.comments = self.description if not isinstance(mi.comments, unicode): mi.comments = mi.comments.decode('utf-8', 'replace') - mi.pubdate = datetime.now() + mi.pubdate = nowf() opf_path = os.path.join(dir, 'index.opf') ncx_path = os.path.join(dir, 'index.ncx') diff --git a/src/calibre/web/feeds/recipes/collection.py b/src/calibre/web/feeds/recipes/collection.py index 478947fcd9..b2b01b00e5 100644 --- a/src/calibre/web/feeds/recipes/collection.py +++ b/src/calibre/web/feeds/recipes/collection.py @@ -8,13 +8,14 @@ __docformat__ = 'restructuredtext en' import os, calendar from threading import RLock -from datetime import datetime, timedelta +from datetime import timedelta from lxml import etree from lxml.builder import ElementMaker -from dateutil import parser from calibre import browser +from calibre.utils.date import parse_date, now as nowf, utcnow, tzlocal, \ + isoformat, fromordinal NS = 'http://calibre-ebook.com/recipe_collection' E = ElementMaker(namespace=NS, nsmap={None:NS}) @@ -125,7 +126,12 @@ class SchedulerConfig(object): self.lock = RLock() if os.access(self.conf_path, os.R_OK): with ExclusiveFile(self.conf_path) as f: - self.root = etree.fromstring(f.read()) + try: + self.root = etree.fromstring(f.read()) + except: + print 'Failed to read recipe scheduler config' + import traceback + traceback.print_exc() elif os.path.exists(old_conf_path): self.migrate_old_conf(old_conf_path) @@ -151,17 +157,17 @@ class SchedulerConfig(object): ld = x.get('last_downloaded', None) if ld and last_downloaded is None: try: - last_downloaded = parser.parse(ld) + last_downloaded = parse_date(ld) except: pass self.root.remove(x) break if last_downloaded is None: - last_downloaded = datetime.fromordinal(1) + last_downloaded = fromordinal(1) sr = E.scheduled_recipe({ 'id' : recipe.get('id'), 'title': recipe.get('title'), - 'last_downloaded':last_downloaded.isoformat(), + 'last_downloaded':isoformat(last_downloaded), }, self.serialize_schedule(schedule_type, schedule)) self.root.append(sr) self.write_scheduler_file() @@ -189,7 +195,7 @@ class SchedulerConfig(object): def update_last_downloaded(self, recipe_id): with self.lock: - now = datetime.utcnow() + now = utcnow() for x in self.iter_recipes(): if x.get('id', False) == recipe_id: typ, sch, last_downloaded = self.un_serialize_schedule(x) @@ -199,7 +205,7 @@ class SchedulerConfig(object): if abs(actual_interval - nominal_interval) < \ timedelta(hours=1): now = last_downloaded + nominal_interval - x.set('last_downloaded', now.isoformat()) + x.set('last_downloaded', isoformat(now)) break self.write_scheduler_file() @@ -243,20 +249,18 @@ class SchedulerConfig(object): sch = float(sch) elif typ == 'day/time': sch = list(map(int, sch.split(':'))) - return typ, sch, parser.parse(recipe.get('last_downloaded')) + return typ, sch, parse_date(recipe.get('last_downloaded')) def recipe_needs_to_be_downloaded(self, recipe): try: typ, sch, ld = self.un_serialize_schedule(recipe) except: return False - utcnow = datetime.utcnow() if typ == 'interval': - return utcnow - ld > timedelta(sch) + return utcnow() - ld > timedelta(sch) elif typ == 'day/time': - now = datetime.now() - offset = now - utcnow - ld_local = ld + offset + now = nowf() + ld_local = ld.astimezone(tzlocal()) day, hour, minute = sch is_today = day < 0 or day > 6 or \