diff --git a/src/calibre/__init__.py b/src/calibre/__init__.py index d7c5f0b223..1c2d780412 100644 --- a/src/calibre/__init__.py +++ b/src/calibre/__init__.py @@ -308,14 +308,25 @@ def walk(dir): yield os.path.join(record[0], f) def strftime(fmt, t=None): - ''' A version of strtime that returns unicode strings. ''' + ''' A version of strtime that returns unicode strings and tries to handle dates + before 1900 ''' if t is None: t = time.localtime() + early_year = t[0] < 1900 + if early_year: + fmt = fmt.replace('%Y', '_early year hack##') + t = list(t) + orig_year = t[0] + t[0] = 1900 + ans = None if iswindows: if isinstance(fmt, unicode): fmt = fmt.encode('mbcs') - return plugins['winutil'][0].strftime(fmt, t) - return time.strftime(fmt, t).decode(preferred_encoding, 'replace') + ans = plugins['winutil'][0].strftime(fmt, t) + ans = time.strftime(fmt, t).decode(preferred_encoding, 'replace') + if early_year: + ans = ans.replace('_early year hack##', str(orig_year)) + return ans def my_unichr(num): try: diff --git a/src/calibre/customize/profiles.py b/src/calibre/customize/profiles.py index 9114dc9eee..69afd77a24 100644 --- a/src/calibre/customize/profiles.py +++ b/src/calibre/customize/profiles.py @@ -155,6 +155,9 @@ class OutputProfile(Plugin): # The image size for comics comic_screen_size = (584, 754) + # If True the MOBI renderer on the device supports MOBI indexing + supports_mobi_indexing = False + @classmethod def tags_to_string(cls, tags): return ', '.join(tags) @@ -254,6 +257,7 @@ class KindleOutput(OutputProfile): dpi = 168.451 fbase = 16 fsizes = [12, 12, 14, 16, 18, 20, 22, 24] + supports_mobi_indexing = True @classmethod def tags_to_string(cls, tags): @@ -269,6 +273,7 @@ class KindleDXOutput(OutputProfile): screen_size = (744, 1022) dpi = 150.0 comic_screen_size = (741, 1022) + supports_mobi_indexing = True @classmethod def tags_to_string(cls, tags): diff --git a/src/calibre/ebooks/conversion/plumber.py b/src/calibre/ebooks/conversion/plumber.py index fa807eb24f..dc6c0f8b52 100644 --- a/src/calibre/ebooks/conversion/plumber.py +++ b/src/calibre/ebooks/conversion/plumber.py @@ -563,6 +563,8 @@ OptionRecommendation(name='list_recipes', break self.read_user_metadata() + self.opts.no_inline_navbars = self.opts.output_profile.supports_mobi_indexing \ + and self.output_fmt == 'mobi' def flush(self): try: diff --git a/src/calibre/ebooks/epub/output.py b/src/calibre/ebooks/epub/output.py index 160676137e..be096eece3 100644 --- a/src/calibre/ebooks/epub/output.py +++ b/src/calibre/ebooks/epub/output.py @@ -80,6 +80,8 @@ class EPUBOutput(OutputFormatPlugin): ]) + recommendations = set([('pretty_print', True, OptionRecommendation.HIGH)]) + TITLEPAGE_COVER = '''\ @@ -134,6 +136,21 @@ class EPUBOutput(OutputFormatPlugin): ''' + def workaround_webkit_quirks(self): + from calibre.ebooks.oeb.base import XPath + for x in self.oeb.spine: + root = x.data + body = XPath('//h:body')(root) + if body: + body = body[0] + + if not hasattr(body, 'xpath'): + continue + + for pre in XPath('//h:pre')(body): + if not pre.text and len(pre) == 0: + pre.tag = 'div' + def convert(self, oeb, output_path, input_plugin, opts, log): self.log, self.opts, self.oeb = log, opts, oeb @@ -146,6 +163,7 @@ class EPUBOutput(OutputFormatPlugin): self.workaround_ade_quirks() + self.workaround_webkit_quirks() from calibre.ebooks.oeb.transforms.rescale import RescaleImages RescaleImages()(oeb, opts) diff --git a/src/calibre/ebooks/lrf/output.py b/src/calibre/ebooks/lrf/output.py index 6ca27ba9a4..e88317c402 100644 --- a/src/calibre/ebooks/lrf/output.py +++ b/src/calibre/ebooks/lrf/output.py @@ -29,7 +29,7 @@ class LRFOptions(object): self.use_metadata_cover = True self.output = output self.ignore_tables = opts.linearize_tables - self.base_font_size = 0 + self.base_font_size = opts.base_font_size self.blank_after_para = opts.insert_blank_line self.use_spine = True self.font_delta = 0 diff --git a/src/calibre/ebooks/metadata/__init__.py b/src/calibre/ebooks/metadata/__init__.py index 5f575eb2a9..e5619bee63 100644 --- a/src/calibre/ebooks/metadata/__init__.py +++ b/src/calibre/ebooks/metadata/__init__.py @@ -367,7 +367,7 @@ class MetaInformation(object): if self.pubdate is not None: ans += [(_('Published'), unicode(self.pubdate.isoformat(' ')))] if self.rights is not None: - ans += [(_('Rights'), unicode(self.rights.isoformat(' ')))] + ans += [(_('Rights'), unicode(self.rights))] for i, x in enumerate(ans): ans[i] = u'%s%s'%x return u'%s
'%u'\n'.join(ans) diff --git a/src/calibre/ebooks/metadata/meta.py b/src/calibre/ebooks/metadata/meta.py index a239933710..e74ce5757d 100644 --- a/src/calibre/ebooks/metadata/meta.py +++ b/src/calibre/ebooks/metadata/meta.py @@ -31,7 +31,7 @@ def metadata_from_formats(formats): try: return _metadata_from_formats(formats) except: - mi = metadata_from_filename(formats[0]) + mi = metadata_from_filename(list(formats)[0]) if not mi.authors: mi.authors = [_('Unknown')] @@ -126,14 +126,10 @@ def metadata_from_filename(name, pat=None): mi.title = match.group('title') except IndexError: pass - try: - mi.authors = [match.group('author')] - except IndexError: - pass try: au = match.group('authors') aus = string_to_authors(au) - mi.authors = authors + mi.authors = aus except IndexError: pass try: diff --git a/src/calibre/ebooks/metadata/opf2.py b/src/calibre/ebooks/metadata/opf2.py index c147c2b748..4571ac1d6f 100644 --- a/src/calibre/ebooks/metadata/opf2.py +++ b/src/calibre/ebooks/metadata/opf2.py @@ -452,9 +452,12 @@ class OPF(object): def __init__(self, stream, basedir=os.getcwdu(), unquote_urls=True): if not hasattr(stream, 'read'): stream = open(stream, 'rb') + raw = stream.read() + if not raw: + raise ValueError('Empty file: '+getattr(stream, 'name', 'stream')) self.basedir = self.base_dir = basedir self.path_to_html_toc = self.html_toc_fragment = None - raw, self.encoding = xml_to_unicode(stream.read(), strip_encoding_pats=True, resolve_entities=True) + raw, self.encoding = xml_to_unicode(raw, strip_encoding_pats=True, resolve_entities=True) raw = raw[raw.find('<'):] self.root = etree.fromstring(raw, self.PARSER) self.metadata = self.metadata_path(self.root) diff --git a/src/calibre/ebooks/mobi/output.py b/src/calibre/ebooks/mobi/output.py index c8fe87a161..4de346c0af 100644 --- a/src/calibre/ebooks/mobi/output.py +++ b/src/calibre/ebooks/mobi/output.py @@ -48,11 +48,7 @@ class MOBIOutput(OutputFormatPlugin): self.opts.mobi_periodical = False def check_for_masthead(self): - found = False - for typ in self.oeb.guide: - if type == 'masthead': - found = True - break + found = 'masthead' in self.oeb.guide if not found: self.oeb.log.debug('No masthead found, generating default one...') from calibre.resources import server_resources @@ -76,12 +72,14 @@ class MOBIOutput(OutputFormatPlugin): from calibre.ebooks.oeb.base import TOC toc = self.oeb.toc if toc and toc[0].klass != 'periodical': + start_href = self.oeb.spine[0].href self.log('Converting TOC for MOBI periodical indexing...') articles = {} - if toc.depth < 3: - sections = [TOC(klass='section')] + if toc.depth() < 3: + sections = [TOC(klass='section', title=_('All articles'), + href=start_href)] for x in toc: - sections[0].append(x) + sections[0].nodes.append(x) else: sections = list(toc) for x in sections: @@ -92,13 +90,13 @@ class MOBIOutput(OutputFormatPlugin): a.klass = 'article' articles[id(sec)].append(a) sec.nodes.remove(a) - root = TOC(klass='periodical', + root = TOC(klass='periodical', href=start_href, title=unicode(self.oeb.metadata.title[0])) for s in sections: if articles[id(s)]: for a in articles[id(s)]: s.nodes.append(a) - root.nodes.append(s) + root.nodes.append(s) for x in list(toc.nodes): toc.nodes.remove(x) diff --git a/src/calibre/ebooks/mobi/reader.py b/src/calibre/ebooks/mobi/reader.py index 8dc8d31150..ac7619cbb6 100644 --- a/src/calibre/ebooks/mobi/reader.py +++ b/src/calibre/ebooks/mobi/reader.py @@ -301,7 +301,7 @@ class MobiReader(object): root = html.fromstring(self.processed_html) if root.xpath('descendant::p/descendant::p'): from lxml.html import soupparser - self.log.warning('Malformed markup, parsing using BeatifulSoup') + self.log.warning('Malformed markup, parsing using BeautifulSoup') root = soupparser.fromstring(self.processed_html) if root.tag != 'html': @@ -439,7 +439,12 @@ class MobiReader(object): self.processed_html = '

' + self.processed_html.replace('\n\n', '

') + '' self.processed_html = self.processed_html.replace('\r\n', '\n') self.processed_html = self.processed_html.replace('> <', '>\n<') - self.processed_html = re.sub('\x14|\x15', '', self.processed_html) + self.processed_html = re.sub('\x14|\x15|\x1c|\x1d', '', self.processed_html) + + def ensure_unit(self, raw, unit='px'): + if re.search(r'\d+$', raw) is not None: + raw += unit + return raw def upshift_markup(self, root): self.log.debug('Converting style information to CSS...') @@ -469,13 +474,13 @@ class MobiReader(object): if attrib.has_key('height'): height = attrib.pop('height').strip() if height: - styles.append('margin-top: %s' % height) + styles.append('margin-top: %s' % self.ensure_unit(height)) if attrib.has_key('width'): width = attrib.pop('width').strip() if width: - styles.append('text-indent: %s' % width) + styles.append('text-indent: %s' % self.ensure_unit(width)) if width.startswith('-'): - styles.append('margin-left: %s' % (width[1:])) + styles.append('margin-left: %s' % self.ensure_unit(width[1:])) if attrib.has_key('align'): align = attrib.pop('align').strip() if align: diff --git a/src/calibre/ebooks/mobi/writer.py b/src/calibre/ebooks/mobi/writer.py index b8b5c8f796..8e8cff2aff 100644 --- a/src/calibre/ebooks/mobi/writer.py +++ b/src/calibre/ebooks/mobi/writer.py @@ -379,7 +379,7 @@ class MobiWriter(object): try: self._generate_index() except: - self.oeb.log.exception('Failed to generate index') + self._oeb.log.exception('Failed to generate index') self._generate_images() @@ -461,7 +461,7 @@ class MobiWriter(object): h = child.href if h not in self._id_offsets: - self._oeb.log.warning('Could not find TOC entry "%s", aborting indexing ...'% child.title) + self._oeb.log.warning(' Could not find TOC entry "%s", aborting indexing ...'% child.title) return False offset = self._id_offsets[h] @@ -573,7 +573,7 @@ class MobiWriter(object): # Entries continues with a stream of section+articles, section+articles ... h = child.href if h not in self._id_offsets: - self._oeb.log.warning('Could not find TOC entry "%s", aborting indexing ...'% child.title) + self._oeb.log.warning(' Could not find TOC entry "%s", aborting indexing ...'% child.title) return False offset = self._id_offsets[h] @@ -1178,40 +1178,29 @@ class MobiWriter(object): ''' toc = self._oeb.toc nodes = list(toc.iter())[1:] + toc_conforms = True for (i, child) in enumerate(nodes) : - if self.opts.verbose > 3 : - self._oeb.logger.info(" : %-25.25s \tklass=%-15.15s \tdepth:%d playOrder=%03d" % \ - (child.title, child.klass, child.depth(), child.play_order) ) + if child.klass == "periodical" and child.depth() != 3 or \ + child.klass == "section" and child.depth() != 2 or \ + child.klass == "article" and child.depth() != 1 : - if child.klass == "periodical" and child.depth() != 3 : - self._oeb.logger.info('<navPoint class="periodical"> found at depth %d, nonconforming TOC' % \ - child.depth() ) - return False - - if child.klass == "section" and child.depth() != 2 : - self._oeb.logger.info('<navPoint class="section"> found at depth %d, nonconforming TOC' % \ - child.depth() ) - return False - - if child.klass == "article" and child.depth() != 1 : - self._oeb.logger.info('<navPoint class="article"> found at depth %d, nonconforming TOC' % \ - child.depth() ) - return False + self._oeb.logger.warn('Nonconforming TOC entry: "%s" found at depth %d' % \ + (child.klass, child.depth()) ) + self._oeb.logger.warn(" <title>: '%-25.25s...' \t\tklass=%-15.15s \tdepth:%d \tplayOrder=%03d" % \ + (child.title, child.klass, child.depth(), child.play_order) ) + toc_conforms = False # We also need to know that we have a pubdate or timestamp in the metadata, which the Kindle needs if self._oeb.metadata['date'] == [] and self._oeb.metadata['timestamp'] == [] : - self._oeb.logger.info('metadata missing timestamp needed for periodical') - return False + self._oeb.logger.info('metadata missing date/timestamp') + toc_conforms = False - # Periodicals also need a mastheadImage in the manifest - has_mastheadImage = 'masthead' in self._oeb.guide + if not 'masthead' in self._oeb.guide : + self._oeb.logger.info('mastheadImage missing from manifest') + toc_conforms = False - if not has_mastheadImage : - self._oeb.logger.info('mastheadImage missing from manifest, aborting periodical indexing') - return False - - self._oeb.logger.info('TOC structure and pubdate verified') - return True + self._oeb.logger.info("%s" % " TOC structure conforms" if toc_conforms else " TOC structure non-conforming") + return toc_conforms def _generate_text(self): @@ -1231,12 +1220,12 @@ class MobiWriter(object): offset = 0 if self._compression != UNCOMPRESSED: - self._oeb.logger.info('Compressing markup content...') + self._oeb.logger.info(' Compressing markup content...') data, overlap = self._read_text_record(text) # Evaluate toc for conformance if self.opts.mobi_periodical : - self._oeb.logger.info('--mobi-periodical specified, evaluating TOC for periodical conformance ...') + self._oeb.logger.info(' MOBI periodical specified, evaluating TOC for periodical conformance ...') self._conforming_periodical_toc = self._evaluate_periodical_toc() # This routine decides whether to build flat or structured based on self._conforming_periodical_toc @@ -1249,11 +1238,11 @@ class MobiWriter(object): if len(entries) : self._indexable = self._generate_indexed_navpoints() else : - self._oeb.logger.info('No entries found in TOC ...') + self._oeb.logger.info(' No entries found in TOC ...') self._indexable = False if not self._indexable : - self._oeb.logger.info('Writing unindexed mobi ...') + self._oeb.logger.info(' Writing unindexed mobi ...') while len(data) > 0: if self._compression == PALMDOC: @@ -1271,7 +1260,8 @@ class MobiWriter(object): while breaks and (breaks[0] - offset) < RECORD_SIZE: # .pop returns item, removes it from list pbreak = (breaks.pop(0) - running) >> 3 - self._oeb.logger.info('pbreak = 0x%X at 0x%X' % (pbreak, record.tell()) ) + if self.opts.verbose > 2 : + self._oeb.logger.info('pbreak = 0x%X at 0x%X' % (pbreak, record.tell()) ) encoded = decint(pbreak, DECINT_FORWARD) record.write(encoded) running += pbreak << 3 @@ -1384,7 +1374,7 @@ class MobiWriter(object): # 0x002 MOBI book (chapter - chapter navigation) # 0x101 News - Hierarchical navigation with sections and articles # 0x102 News feed - Flat navigation - # 0x103 News magazine - same as 1x101 + # 0x103 News magazine - same as 0x101 # 0xC - 0xF : Text encoding (65001 is utf-8) # 0x10 - 0x13 : UID # 0x14 - 0x17 : Generator version @@ -1545,7 +1535,7 @@ class MobiWriter(object): exth.write(data) nrecs += 1 if term == 'rights' : - rights = unicode(oeb.metadata.rights[0]) + rights = unicode(oeb.metadata.rights[0]).encode('utf-8') exth.write(pack('>II', EXTH_CODES['rights'], len(rights) + 8)) exth.write(rights) @@ -1614,7 +1604,7 @@ class MobiWriter(object): self._write(record) def _generate_index(self): - self._oeb.log('Generating primary index ...') + self._oeb.log('Generating INDX ...') self._primary_index_record = None # Build the NCXEntries and INDX @@ -1917,18 +1907,18 @@ class MobiWriter(object): self._ctoc_map.append(ctoc_name_map) def _generate_ctoc(self): - # Generate the compiled TOC strings - # Each node has 1-4 CTOC entries: - # Periodical (0xDF) - # title, class - # Section (0xFF) - # title, class - # Article (0x3F) - # title, class, description, author - # Chapter (0x0F) - # title, class - # nb: Chapters don't actually have @class, so we synthesize it - # in reader._toc_from_navpoint + # Generate the compiled TOC strings + # Each node has 1-4 CTOC entries: + # Periodical (0xDF) + # title, class + # Section (0xFF) + # title, class + # Article (0x3F) + # title, class, description, author + # Chapter (0x0F) + # title, class + # nb: Chapters don't actually have @class, so we synthesize it + # in reader._toc_from_navpoint toc = self._oeb.toc reduced_toc = [] @@ -1953,6 +1943,8 @@ class MobiWriter(object): first = False else : self._oeb.logger.info('Generating flat CTOC ...') + previousOffset = -1 + currentOffset = 0 for (i, child) in enumerate(toc.iter()): # Only add chapters or articles at depth==1 # no class defaults to 'chapter' @@ -1961,8 +1953,20 @@ class MobiWriter(object): if self.opts.verbose > 2 : self._oeb.logger.info("adding (klass:%s depth:%d) %s to flat ctoc" % \ (child.klass, child.depth(), child) ) - self._add_flat_ctoc_node(child, ctoc) - reduced_toc.append(child) + + # Test to see if this child's offset is the same as the previous child's + # offset, skip it + h = child.href + currentOffset = self._id_offsets[h] + # print "_generate_ctoc: child offset: 0x%X" % currentOffset + + if currentOffset != previousOffset : + self._add_flat_ctoc_node(child, ctoc) + reduced_toc.append(child) + previousOffset = currentOffset + else : + self._oeb.logger.warn(" Ignoring redundant href: %s in '%s'" % (h, child.title)) + first = False else : if self.opts.verbose > 2 : @@ -2027,7 +2031,7 @@ class MobiWriter(object): indices.write(pack('>H', pos)) # Save the offset for IDXTIndices name = "%04X"%count indxt.write(chr(len(name)) + name) # Write the name - indxt.write(INDXT['section']) # entryType [0x0F | 0xDF | 0xFF | 0x3F] + indxt.write(INDXT['section']) # entryType [0x0F | 0xDF | 0xFF | 0x3F] indxt.write(chr(0)) # subType 0 indxt.write(decint(offset, DECINT_FORWARD)) # offset indxt.write(decint(length, DECINT_FORWARD)) # length @@ -2045,7 +2049,7 @@ class MobiWriter(object): indices.write(pack('>H', pos)) # Save the offset for IDXTIndices name = "%04X"%count indxt.write(chr(len(name)) + name) # Write the name - indxt.write(INDXT['article']) # entryType [0x0F | 0xDF | 0xFF | 0x3F] + indxt.write(INDXT['article']) # entryType [0x0F | 0xDF | 0xFF | 0x3F] hasAuthor = True if self._ctoc_map[index]['authorOffset'] else False hasDescription = True if self._ctoc_map[index]['descriptionOffset'] else False diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py index acf95df502..ba4ebbc598 100644 --- a/src/calibre/ebooks/oeb/base.py +++ b/src/calibre/ebooks/oeb/base.py @@ -1468,7 +1468,9 @@ class TOC(object): node.to_opf1(tour) return tour - def to_ncx(self, parent): + def to_ncx(self, parent=None): + if parent is None: + parent = etree.Element(NCX('navMap')) for node in self.nodes: id = node.id or unicode(uuid.uuid4()) attrib = {'id': id, 'playOrder': str(node.play_order)} diff --git a/src/calibre/ebooks/oeb/output.py b/src/calibre/ebooks/oeb/output.py index 18c20f334d..4df8c0f679 100644 --- a/src/calibre/ebooks/oeb/output.py +++ b/src/calibre/ebooks/oeb/output.py @@ -9,6 +9,8 @@ from lxml import etree from calibre.customize.conversion import OutputFormatPlugin from calibre import CurrentDir +from calibre.customize.conversion import OptionRecommendation + from urllib import unquote class OEBOutput(OutputFormatPlugin): @@ -17,6 +19,9 @@ class OEBOutput(OutputFormatPlugin): author = 'Kovid Goyal' file_type = 'oeb' + recommendations = set([('pretty_print', True, OptionRecommendation.HIGH)]) + + def convert(self, oeb_book, output_path, input_plugin, opts, log): self.log, self.opts = log, opts if not os.path.exists(output_path): diff --git a/src/calibre/ebooks/oeb/stylizer.py b/src/calibre/ebooks/oeb/stylizer.py index 5fcc7e3fac..18c16e44d5 100644 --- a/src/calibre/ebooks/oeb/stylizer.py +++ b/src/calibre/ebooks/oeb/stylizer.py @@ -169,7 +169,8 @@ class Stylizer(object): if not matches and class_sel_pat.match(text): found = False for x in tree.xpath('//*[@class]'): - if text.lower().endswith('.'+x.get('class').lower()): + if text.lower().endswith('.'+x.get('class').lower()) and \ + text.lower() != text: matches.append(x) found = True if found: diff --git a/src/calibre/ebooks/oeb/transforms/guide.py b/src/calibre/ebooks/oeb/transforms/guide.py index aaeba67d80..c1f0dd6669 100644 --- a/src/calibre/ebooks/oeb/transforms/guide.py +++ b/src/calibre/ebooks/oeb/transforms/guide.py @@ -34,7 +34,8 @@ class Clean(object): for x in list(self.oeb.guide): href = urldefrag(self.oeb.guide[x].href)[0] - if x.lower() not in ('cover', 'titlepage'): + if x.lower() not in ('cover', 'titlepage', 'masthead', 'toc', + 'title-page', 'copyright-page'): self.oeb.guide.remove(x) diff --git a/src/calibre/ebooks/oeb/transforms/split.py b/src/calibre/ebooks/oeb/transforms/split.py index 1fba7ffa64..d4b60e3a59 100644 --- a/src/calibre/ebooks/oeb/transforms/split.py +++ b/src/calibre/ebooks/oeb/transforms/split.py @@ -16,7 +16,7 @@ from lxml import etree from lxml.cssselect import CSSSelector from calibre.ebooks.oeb.base import OEB_STYLES, XPNSMAP as NAMESPACES, \ - urldefrag, rewrite_links, urlunquote, barename + urldefrag, rewrite_links, urlunquote, barename, XHTML from calibre.ebooks.epub import rules XPath = functools.partial(_XPath, namespaces=NAMESPACES) @@ -216,7 +216,25 @@ class FlowSplitter(object): self.trees.append(before) tree = after self.trees.append(tree) - self.trees = [t for t in self.trees if not self.is_page_empty(t.getroot())] + trees, ids = [], set([]) + for tree in self.trees: + root = tree.getroot() + if self.is_page_empty(root): + discarded_ids = root.xpath('//*[@id]') + for x in discarded_ids: + x = x.get('id') + if not x.startswith('calibre_'): + ids.add(x) + else: + if ids: + body = self.get_body(root) + if body is not None: + for x in ids: + body.insert(0, body.makeelement(XHTML('div'), + id=x, style='height:0pt')) + ids = set([]) + trees.append(tree) + self.trees = trees def get_body(self, root): body = root.xpath('//h:body', namespaces=NAMESPACES) diff --git a/src/calibre/gui2/add.py b/src/calibre/gui2/add.py index ec253e5ae0..b5572e34d6 100644 --- a/src/calibre/gui2/add.py +++ b/src/calibre/gui2/add.py @@ -107,8 +107,6 @@ class Adder(QObject): self.callback(self.paths, self.names, self.infos) self.callback_called = True - - def update(self): if not self.ids: self.timer.stop() diff --git a/src/calibre/gui2/images/news/eltiempo_hn.png b/src/calibre/gui2/images/news/eltiempo_hn.png new file mode 100644 index 0000000000..56bba04b0a Binary files /dev/null and b/src/calibre/gui2/images/news/eltiempo_hn.png differ diff --git a/src/calibre/gui2/images/news/laprensa_hn.png b/src/calibre/gui2/images/news/laprensa_hn.png new file mode 100644 index 0000000000..99be6dee67 Binary files /dev/null and b/src/calibre/gui2/images/news/laprensa_hn.png differ diff --git a/src/calibre/gui2/images/news/latribuna.png b/src/calibre/gui2/images/news/latribuna.png new file mode 100644 index 0000000000..06c9ebeef1 Binary files /dev/null and b/src/calibre/gui2/images/news/latribuna.png differ diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py index 43bd6e6434..41e4387e1e 100644 --- a/src/calibre/library/database2.py +++ b/src/calibre/library/database2.py @@ -657,6 +657,8 @@ class LibraryDatabase2(LibraryDatabase): author = sanitize_file_name(authors.split(',')[0][:self.PATH_LIMIT]).decode(filesystem_encoding, 'replace') title = sanitize_file_name(self.title(id, index_is_id=True)[:self.PATH_LIMIT]).decode(filesystem_encoding, 'replace') name = title + ' - ' + author + while name.endswith('.'): + name = name[:-1] return name def rmtree(self, path): @@ -1074,6 +1076,8 @@ class LibraryDatabase2(LibraryDatabase): self.set_isbn(id, mi.isbn, notify=False) if mi.series_index: self.set_series_index(id, mi.series_index, notify=False) + if mi.pubdate: + self.set_pubdate(id, mi.pubdate, notify=False) if getattr(mi, 'timestamp', None) is not None: self.set_timestamp(id, mi.timestamp, notify=False) self.set_path(id, True) @@ -1734,7 +1738,7 @@ books_series_link feeds formats = self.find_books_in_directory(dirpath, True) if not formats: return - + formats = list(formats) mi = metadata_from_formats(formats) if mi.title is None: return diff --git a/src/calibre/library/server.py b/src/calibre/library/server.py index eab159bc95..0a13800f75 100644 --- a/src/calibre/library/server.py +++ b/src/calibre/library/server.py @@ -366,10 +366,9 @@ class LibraryServer(object): @expose def index(self, **kwargs): 'The / URL' - stanza = cherrypy.request.headers.get('Stanza-Device-Name', 919) - if stanza == 919: - return self.static('index.html') - return self.stanza() + want_opds = cherrypy.request.headers.get('Stanza-Device-Name', 919) != \ + 919 or cherrypy.request.headers.get('Want-OPDS-Catalog', 919) != 919 + return self.stanza() if want_opds else self.static('index.html') @expose diff --git a/src/calibre/web/feeds/news.py b/src/calibre/web/feeds/news.py index 17bff315d4..9f74b6263f 100644 --- a/src/calibre/web/feeds/news.py +++ b/src/calibre/web/feeds/news.py @@ -469,6 +469,7 @@ class BasicNewsRecipe(Recipe): self.username = options.username self.password = options.password self.lrf = options.lrf + self.include_navbars = not options.no_inline_navbars self.output_dir = os.path.abspath(self.output_dir) if options.test: @@ -539,7 +540,7 @@ class BasicNewsRecipe(Recipe): if first_fetch and job_info: url, f, a, feed_len = job_info body = soup.find('body') - if body is not None: + if body is not None and self.include_navbars: templ = self.navbar.generate(False, f, a, feed_len, not self.has_single_feed, url, __appname__, @@ -907,12 +908,13 @@ class BasicNewsRecipe(Recipe): body = soup.find('body') if body is not None: prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last)))) - templ = self.navbar.generate(True, num, j, len(f), - not self.has_single_feed, - a.orig_url, __appname__, prefix=prefix, - center=self.center_navbar) - elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div') - body.insert(len(body.contents), elem) + if self.include_navbars: + templ = self.navbar.generate(True, num, j, len(f), + not self.has_single_feed, + a.orig_url, __appname__, prefix=prefix, + center=self.center_navbar) + elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div') + body.insert(len(body.contents), elem) with open(last, 'wb') as fi: fi.write(unicode(soup).encode('utf-8')) @@ -923,7 +925,7 @@ class BasicNewsRecipe(Recipe): if po is None: self.play_order_counter += 1 po = self.play_order_counter - desc = f.description + desc = getattr(f, 'description', None) if not desc: desc = None feed_index(i, toc.add_item('feed_%d/index.html'%i, None, diff --git a/src/calibre/web/feeds/recipes/__init__.py b/src/calibre/web/feeds/recipes/__init__.py index 44fb9bd46e..51f0000605 100644 --- a/src/calibre/web/feeds/recipes/__init__.py +++ b/src/calibre/web/feeds/recipes/__init__.py @@ -51,9 +51,11 @@ recipe_modules = ['recipe_' + r for r in ( 'theeconomictimes_india', '7dias', 'buenosaireseconomico', 'diagonales', 'miradasalsur', 'newsweek_argentina', 'veintitres', 'gva_be', 'hln', 'tijd', 'degentenaar', 'inquirer_net', 'uncrate', - 'fastcompany', 'accountancyage', + 'fastcompany', 'accountancyage', 'laprensa_hn', 'latribuna', + 'eltiempo_hn', )] + import re, imp, inspect, time, os from calibre.web.feeds.news import BasicNewsRecipe, CustomIndexRecipe, AutomaticNewsRecipe from calibre.ebooks.BeautifulSoup import BeautifulSoup diff --git a/src/calibre/web/feeds/recipes/recipe_eltiempo_hn.py b/src/calibre/web/feeds/recipes/recipe_eltiempo_hn.py new file mode 100644 index 0000000000..e7fd23b797 --- /dev/null +++ b/src/calibre/web/feeds/recipes/recipe_eltiempo_hn.py @@ -0,0 +1,52 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>' +''' +www.tiempo.hn +''' + +from calibre.web.feeds.news import BasicNewsRecipe +from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag + +class ElTiempoHn(BasicNewsRecipe): + title = 'El Tiempo - Honduras' + __author__ = 'Darko Miletic' + description = 'Noticias de Honduras y mundo' + publisher = 'El Tiempo' + category = 'news, politics, Honduras' + oldest_article = 2 + max_articles_per_feed = 100 + use_embedded_content = False + no_stylesheets = True + remove_javascript = True + encoding = 'utf-8' + language = _('Spanish') + lang = 'es-HN' + direction = 'ltr' + + html2lrf_options = [ + '--comment', description + , '--category', category + , '--publisher', publisher + , '--ignore-tables' + ] + + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True\npretty_print=True\noverride_css=" p {text-indent: 0cm; margin-top: 0em; margin-bottom: 0.5em} img {text-indent: 0cm; margin-top: 0em; margin-bottom: 0.5em}"' + + remove_tags = [dict(name=['form','object','embed','base'])] + + keep_only_tags = [dict(name='td' , attrs={'id':'mainbodycont'})] + + feeds = [(u'Noticias', u'http://www.tiempo.hn/index.php?format=feed&type=rss')] + + def preprocess_html(self, soup): + soup.html['lang'] = self.lang + soup.html['dir' ] = self.direction + mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)]) + mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")]) + soup.head.insert(0,mlang) + soup.head.insert(1,mcharset) + for item in soup.findAll(style=True): + del item['style'] + return self.adeify_images(soup) diff --git a/src/calibre/web/feeds/recipes/recipe_laprensa_hn.py b/src/calibre/web/feeds/recipes/recipe_laprensa_hn.py new file mode 100644 index 0000000000..b34f158400 --- /dev/null +++ b/src/calibre/web/feeds/recipes/recipe_laprensa_hn.py @@ -0,0 +1,54 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>' +''' +www.laprensahn.com +''' + +from calibre.web.feeds.news import BasicNewsRecipe +from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag + +class LaPrensaHn(BasicNewsRecipe): + title = 'La Prensa - Honduras' + __author__ = 'Darko Miletic' + description = 'Noticias de Honduras y mundo' + publisher = 'La Prensa' + category = 'news, politics, Honduras' + oldest_article = 2 + max_articles_per_feed = 100 + use_embedded_content = False + no_stylesheets = True + remove_javascript = True + encoding = 'utf-8' + language = _('Spanish') + lang = 'es-HN' + direction = 'ltr' + + html2lrf_options = [ + '--comment', description + , '--category', category + , '--publisher', publisher + ] + + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\npretty_print=True\noverride_css=" p {text-indent: 0cm; margin-top: 0em; margin-bottom: 0.5em} "' + + remove_tags = [dict(name=['form','object','embed'])] + + keep_only_tags = [ + dict(name='h1' , attrs={'class':'titulo1'}) + ,dict(name='div', attrs={'class':['sumario11','hora','texto']}) + ] + + feeds = [(u'Noticias', u'http://feeds.feedburner.com/laprensa_titulares')] + + def preprocess_html(self, soup): + soup.html['lang'] = self.lang + soup.html['dir' ] = self.direction + mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)]) + mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")]) + soup.head.insert(0,mlang) + soup.head.insert(1,mcharset) + for item in soup.findAll(style=True): + del item['style'] + return soup diff --git a/src/calibre/web/feeds/recipes/recipe_latribuna.py b/src/calibre/web/feeds/recipes/recipe_latribuna.py new file mode 100644 index 0000000000..d3a9a333cb --- /dev/null +++ b/src/calibre/web/feeds/recipes/recipe_latribuna.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>' +''' +www.latribuna.hn +''' + +from calibre.web.feeds.news import BasicNewsRecipe +from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag + +class LaTribuna(BasicNewsRecipe): + title = 'La Tribuna - Honduras' + __author__ = 'Darko Miletic' + description = 'Noticias de Honduras y mundo' + publisher = 'La Tribuna' + category = 'news, politics, Honduras' + oldest_article = 2 + max_articles_per_feed = 100 + use_embedded_content = False + no_stylesheets = True + remove_javascript = True + encoding = 'utf-8' + language = _('Spanish') + lang = 'es-HN' + direction = 'ltr' + + html2lrf_options = [ + '--comment', description + , '--category', category + , '--publisher', publisher + ] + + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\npretty_print=True\noverride_css=" p {text-indent: 0cm; margin-top: 0em; margin-bottom: 0.5em} "' + + remove_tags = [dict(name=['form','object','embed'])] + + keep_only_tags = [ + dict(name='p', attrs={'id':['BlogTitle','BlogDate']}) + ,dict(name='div', attrs={'id':'BlogContent'}) + ] + + feeds = [(u'Noticias', u'http://www.latribuna.hn/web2.0/?feed=rss')] + + def print_version(self, url): + return url + '&print=1' + + def preprocess_html(self, soup): + soup.html['lang'] = self.lang + soup.html['dir' ] = self.direction + mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)]) + mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")]) + soup.head.insert(0,mlang) + soup.head.insert(1,mcharset) + for item in soup.findAll(style=True): + del item['style'] + return soup + + def get_cover_url(self): + cover_url = None + soup = self.index_to_soup('http://www.latribuna.hn/web2.0/') + cover_item = soup.find('div',attrs={'class':'portada_impresa'}) + if cover_item: + cover_url = cover_item.a.img['src'] + return cover_url diff --git a/src/calibre/web/feeds/recipes/recipe_nytimes_sub.py b/src/calibre/web/feeds/recipes/recipe_nytimes_sub.py index 5d91dbae38..4449ba1aa2 100644 --- a/src/calibre/web/feeds/recipes/recipe_nytimes_sub.py +++ b/src/calibre/web/feeds/recipes/recipe_nytimes_sub.py @@ -11,7 +11,7 @@ from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.ebooks.BeautifulSoup import BeautifulSoup class NYTimes(BasicNewsRecipe): - + title = 'The New York Times (subscription)' __author__ = 'Kovid Goyal' language = _('English') @@ -20,13 +20,13 @@ class NYTimes(BasicNewsRecipe): needs_subscription = True remove_tags_before = dict(id='article') remove_tags_after = dict(id='article') - remove_tags = [dict(attrs={'class':['articleTools', 'post-tools', 'side_tool', 'nextArticleLink clearfix']}), - dict(id=['footer', 'toolsRight', 'articleInline', 'navigation', 'archive', 'side_search', 'blog_sidebar', 'side_tool', 'side_index']), + remove_tags = [dict(attrs={'class':['articleTools', 'post-tools', 'side_tool', 'nextArticleLink clearfix']}), + dict(id=['footer', 'toolsRight', 'articleInline', 'navigation', 'archive', 'side_search', 'blog_sidebar', 'side_tool', 'side_index']), dict(name=['script', 'noscript', 'style'])] encoding = 'cp1252' no_stylesheets = True extra_css = 'h1 {font: sans-serif large;}\n.byline {font:monospace;}' - + def get_browser(self): br = BasicNewsRecipe.get_browser() if self.username is not None and self.password is not None: @@ -36,24 +36,24 @@ class NYTimes(BasicNewsRecipe): br['PASSWORD'] = self.password br.submit() return br - + def parse_index(self): soup = self.index_to_soup('http://www.nytimes.com/pages/todayspaper/index.html') - + def feed_title(div): return ''.join(div.findAll(text=True, recursive=False)).strip() - + articles = {} key = None ans = [] - for div in soup.findAll(True, + for div in soup.findAll(True, attrs={'class':['section-headline', 'story', 'story headline']}): - + if div['class'] == 'section-headline': key = string.capwords(feed_title(div)) articles[key] = [] ans.append(key) - + elif div['class'] in ['story', 'story headline']: a = div.find('a', href=True) if not a: @@ -66,21 +66,21 @@ class NYTimes(BasicNewsRecipe): summary = div.find(True, attrs={'class':'summary'}) if summary: description = self.tag_to_string(summary, use_alt=False) - + feed = key if key is not None else 'Uncategorized' if not articles.has_key(feed): articles[feed] = [] if not 'podcasts' in url: articles[feed].append( - dict(title=title, url=url, date=pubdate, + dict(title=title, url=url, date=pubdate, description=description, content='')) - ans = self.sort_index_by(ans, {'The Front Page':-1, - 'Dining In, Dining Out':1, + ans = self.sort_index_by(ans, {'The Front Page':-1, + 'Dining In, Dining Out':1, 'Obituaries':2}) ans = [(key, articles[key]) for key in ans if articles.has_key(key)] return ans - + def preprocess_html(self, soup): refresh = soup.find('meta', {'http-equiv':'refresh'}) if refresh is None: diff --git a/src/calibre/web/feeds/recipes/recipe_wsj.py b/src/calibre/web/feeds/recipes/recipe_wsj.py index 67211d75dc..962f7cb30b 100644 --- a/src/calibre/web/feeds/recipes/recipe_wsj.py +++ b/src/calibre/web/feeds/recipes/recipe_wsj.py @@ -53,6 +53,10 @@ class WallStreetJournal(BasicNewsRecipe): def postprocess_html(self, soup, first): for tag in soup.findAll(name=['table', 'tr', 'td']): tag.name = 'div' + + for tag in soup.findAll('div', dict(id=["articleImage_1", "articleImage_2", "articleImage_3", "articleImage_4", "articleImage_5", "articleImage_6", "articleImage_7"])): + tag.extract() + return soup def get_article_url(self, article): @@ -70,7 +74,7 @@ class WallStreetJournal(BasicNewsRecipe): #('Most Emailed - Month', 'http://online.wsj.com/xml/rss/3_7254.xml'), (' Most Viewed - Day', 'http://online.wsj.com/xml/rss/3_7198.xml'), (' Most Viewed - Week', 'http://online.wsj.com/xml/rss/3_7251.xml'), - # ('Most Viewed - Month', 'http://online.wsj.com/xml/rss/3_7252.xml'), + #('Most Viewed - Month', 'http://online.wsj.com/xml/rss/3_7252.xml'), ('Today\'s Newspaper - Page One', 'http://online.wsj.com/xml/rss/3_7205.xml'), ('Today\'s Newspaper - Marketplace', 'http://online.wsj.com/xml/rss/3_7206.xml'), ('Today\'s Newspaper - Money & Investing', 'http://online.wsj.com/xml/rss/3_7207.xml'),