diff --git a/Changelog.yaml b/Changelog.yaml index d0c3478ba3..d40031ad9f 100644 --- a/Changelog.yaml +++ b/Changelog.yaml @@ -4,6 +4,164 @@ # for important features/bug fixes. # Also, each release can have new and improved recipes. +- version: 0.7.26 + date: 2010-10-30 + + new features: + - title: "Check library: Allow wildcards in ignore names field" + + bug fixes: + - title: "Fix regression in 0.7.25 that broke reading metadata from filenames." + + - title: "Fix regression in 0.7.25 that caused original files to be mistakenly removed when adding books recursively" + + - title: "Fix long series/publisher causing edit metadata in bulk dialog to become very large" + tickets: [7332] + + - title: "Only add SONY periodical code to downloaded news if output profile is set to one of the SONY reader profiles. This is needed because the ever delightful Stanza crashes and burns when an EPUB has the periodical code" + + improved recipes: + - El Periodico + - New Zealand Herald + + new recipes: + - title: "Taggeschau.de" + author: "Florian Andreas Pfaff" + + - title: "Gamespot Reviews" + author: "Marc Tonsing" + +- version: 0.7.25 + date: 2010-10-29 + + new features: + - title: "Add support for the SONY periodical format." + description: "This means that news downloaded by calibre and sent to a newer SONY device (350/650/900) should appear in the Periodicals section and have the special periodicals navigation user interface" + type: major + + - title: "Content server: Make the new browsing interface the default. The old interface can be accessed at /old" + + - title: "Content server: Allow running of content server as a WSGI application within another server. Add tutorial for this to the User Manual." + + - title: "Support for the Pico Life reader, Kobo Wifi and HTC Aria" + + - title: "Content server: Add a new --url-prefix command line option to ease the use of the server with a reverse proxy" + + - title: "New social metadata plugin for Amazon that does not rely on AWS. Since Amazon broke AWS, it is recommended you upgrade to this version if you use metadata from Amazon" + + - title: "Add a tweak to specify the fonts used when geenrating the default cover" + + - title: "Add an output profile for generic Tablet devices" + tickets: [7289] + + - title: "SONY driver: Allow sorting of collections by arbitrary field via a new tweak." + + - title: "Content server: Make /mobile a little prettier" + + - title: "Add button to 'Library Check' to automatically delete spurious files and folders" + + bug fixes: + - title: "FB2 Input: Lots of love. Handle stylesheets and style attributes. Make parsinf malformed FB2 files more robust." + tickets: [7219, 7230] + + - title: "Fix auto send of news to device with multiple calibre libraries. The fix means that if you have any pending news to be sent, it will be ignored after the update. Future news downloads will once again be automatically sent to the device." + + - title: "MOBI Output: Conversion of super/sub scripts now handles nested tags." + tickets: [7264] + + - title: "Conversion pipeline: Fix parsing of XML encoding declarations." + tickets: [7328] + + - title: "Pandigital (Kobo): Upload thumbnails to correct location" + tickets: [7165] + + - title: "Fix auto emailed news with non asci characters in title not being deliverd to Kindle" + tickets: [7322] + + - title: "Read metadata only after on import plugins have run when adding books to GUI" + tickets: [7245] + + - title: "Various fixes for bugs caused by non ascii temporary paths on windows with non UTF-8 filesystem encodings" + tickets: [7288] + + - title: "Various fixes/enhancements to SNB Output" + + - title: "Allow Tag editor in edit metadata dialog to be used even if tags have been changed" + tickets: [7298] + + - title: "Fix crash on some OS X machines when Preferences->Conversion->Output is clicked" + + - title: "MOBI indexing: Fix last entry missing sometimes" + tickets: [6595] + + - title: "Fix regression causing books to be deselected after sending to device" + tickets: [7271] + + - title: "Conversion pipeline: Fix rescaling of GIF images not working" + tickets: [7306] + + - title: "Update PDF metadata/conversion libraries in windows build" + + - title: "Fix timezone bug when searching on date fields" + tickets: [7300] + + - title: "Fix regression that caused the viewr to crash if the main application is closed" + tickets: [7276] + + - title: "Fix bug causing a spurious metadata.opf file to be written at the root of the calibre library when adding books" + + - title: "Use the same title casing algorithm in all places" + + - title: "Fix bulk edit of dual state boolean custom columns" + + - title: "Increase image size for comics in Kindle DX profile for better conversion of comics to PDF" + + - title: "Fix restore db to not dies when conflicting custom columns are encountered and report conflicting columns errors. Fix exceptions when referencing invalid _index fields." + + - title: "Fix auto merge books not respecting article sort tweak" + tickets: [7147] + + - title: "Linux device drivers: Fix udisks based ejecting for devices with multiple nodes" + + - title: "Linux device mounting: Mount the drive with the lowest kernel name as main memory" + + - title: "Fix use of numeric fields in templates" + + - title: "EPUB Input: Handle EPUB files with multiple OPF files." + tickets: [7229] + + - title: "Setting EPUB metadata: Fix date format. Fix language being overwritten by und when unspecified. Fix empty ISBN identifier being created" + + - title: "Fix cannot delete a Series listing from List view also dismiss fetch metadata dialog when no metadata found automatically" + tickets: [7221, 7220] + + - title: "Content server: Handle switch library in GUI gracefully" + + - title: "calibre-server: Use cherrypy implementation of --pidfile and --daemonize" + + new recipes: + - title: "Ming Pao" + author: "Eddie Lau" + + - title: "lenta.ru" + author: "Nikolai Kotchetkov" + + - title: "frazpc.pl" + author: "Tomasz Dlugosz" + + - title: "Perfil and The Economic Collapse Blog" + author: "Darko Miletic" + + - title: "STNN" + author: "Larry Chan" + + improved recipes: + - CubaDebate + - El Pais + - Fox News + - New Scientist + - The Economic Times of India + - version: 0.7.24 date: 2010-10-17 diff --git a/resources/content_server/browse/browse.css b/resources/content_server/browse/browse.css index 92ed4c3ce6..1243795e55 100644 --- a/resources/content_server/browse/browse.css +++ b/resources/content_server/browse/browse.css @@ -208,6 +208,8 @@ h2.library_name { } +.toplevel li a { text-decoration: none; } + .toplevel li img { vertical-align: middle; margin-right: 1em; @@ -261,9 +263,16 @@ h2.library_name { } -.category div.category-item span.href { display: none } +.category div.category-item a { text-decoration: none; color: inherit; } -#groups span.load_href { display: none } +#groups a.load_href { + text-decoration: none; + color: inherit; + font-size: medium; + font-weight: normal; + padding: 0; + padding-left: 0.5em; +} #groups h3 { font-weight: bold; diff --git a/resources/content_server/browse/browse.html b/resources/content_server/browse/browse.html index 4acc15f3ea..ef312334d9 100644 --- a/resources/content_server/browse/browse.html +++ b/resources/content_server/browse/browse.html @@ -8,24 +8,25 @@ - - - + + + - - + + + src="{prefix}/static/jquery_ui/js/jquery-ui-1.8.5.custom.min.js"> + src="{prefix}/static/jquery.multiselect.min.js"> - + - - + + + + +
- Show first set of books Show previous set of books              Show next set of books Show last set of books + Show first set of books Show previous set of books              Show next set of books Show last set of books
@@ -38,7 +39,7 @@
- Loading... Loading… + Loading... Loading…
diff --git a/resources/content_server/mobile.css b/resources/content_server/mobile.css index 0022b2a134..28d12bb6db 100644 --- a/resources/content_server/mobile.css +++ b/resources/content_server/mobile.css @@ -1,5 +1,9 @@ /* CSS for the mobile version of the content server webpage */ +.body { + font-family: sans-serif; +} + .navigation table.buttons { width: 100%; } @@ -53,6 +57,7 @@ div.navigation { } #listing td { padding: 0.25em; + vertical-align: middle; } #listing td.thumbnail { @@ -73,6 +78,7 @@ div.navigation { overflow: hidden; text-align: center; text-decoration: none; + vertical-align: middle; } #logo { @@ -83,4 +89,17 @@ div.navigation { clear: both; } +.data-container { + display: inline-block; + vertical-align: middle; +} +.first-line { + font-size: larger; + font-weight: bold; +} + +.second-line { + margin-top: 0.75ex; + display: block; +} diff --git a/resources/content_server/read/monocle.js b/resources/content_server/read/monocle.js new file mode 100644 index 0000000000..c0642743f7 --- /dev/null +++ b/resources/content_server/read/monocle.js @@ -0,0 +1,3385 @@ +Monocle = { + VERSION: "1.0.0" +}; + + +Monocle.pieceLoaded = function (piece) { + if (typeof onMonoclePiece == 'function') { + onMonoclePiece(piece); + } +} + + +Monocle.defer = function (fn, time) { + if (fn && typeof fn == "function") { + return setTimeout(fn, time || 0); + } +} + + +Monocle.Browser = { engine: 'W3C' } + +Monocle.Browser.is = { + IE: (!!(window.attachEvent && navigator.userAgent.indexOf('Opera') === -1)) && + (Monocle.Browser.engine = "IE"), + Opera: navigator.userAgent.indexOf('Opera') > -1 && + (Monocle.Browser.engine = "Opera"), + WebKit: navigator.userAgent.indexOf('AppleWebKit/') > -1 && + (Monocle.Browser.engine = "WebKit"), + Gecko: navigator.userAgent.indexOf('Gecko') > -1 && + navigator.userAgent.indexOf('KHTML') === -1 && + (Monocle.Browser.engine = "Gecko"), + MobileSafari: !!navigator.userAgent.match(/AppleWebKit.*Mobile/) +} // ... with thanks to PrototypeJS. + + +Monocle.Browser.on = { + iPhone: navigator.userAgent.indexOf("iPhone") != -1, + iPad: navigator.userAgent.indexOf("iPad") != -1, + BlackBerry: navigator.userAgent.indexOf("BlackBerry") != -1, + Android: navigator.userAgent.indexOf('Android') != -1, + Kindle3: navigator.userAgent.match(/Kindle\/3/) +} + + +if (Monocle.Browser.is.MobileSafari) { + (function () { + var ver = navigator.userAgent.match(/ OS ([\d_]+)/); + if (ver) { + Monocle.Browser.iOSVersion = ver[1].replace(/_/g, '.'); + } else { + console.warn("Unknown MobileSafari user agent: "+navigator.userAgent); + } + })(); +} +Monocle.Browser.iOSVersionBelow = function (strOrNum) { + return Monocle.Browser.iOSVersion && Monocle.Browser.iOSVersion < strOrNum; +} + + +Monocle.Browser.CSSProps = { + engines: ["W3C", "WebKit", "Gecko", "Opera", "IE", "Konqueror"], + prefixes: ["", "-webkit-", "-moz-", "-o-", "-ms-", "-khtml-"], + domprefixes: ["", "Webkit", "Moz", "O", "ms", "Khtml"], + guineapig: document.createElement('div') +} + + +Monocle.Browser.CSSProps.capStr = function (wd) { + return wd ? wd.charAt(0).toUpperCase() + wd.substr(1) : ""; +} + + +Monocle.Browser.CSSProps.toDOMProps = function (prop, prefix) { + var parts = prop.split('-'); + for (var i = parts.length; i > 0; --i) { + parts[i] = Monocle.Browser.CSSProps.capStr(parts[i]); + } + + if (typeof(prefix) != 'undefined' && prefix != null) { + if (prefix) { + parts[0] = Monocle.Browser.CSSProps.capStr(parts[0]); + return prefix+parts.join(''); + } else { + return parts.join(''); + } + } + + var props = [parts.join('')]; + parts[0] = Monocle.Browser.CSSProps.capStr(parts[0]); + for (i = 0; i < Monocle.Browser.CSSProps.prefixes.length; ++i) { + var pf = Monocle.Browser.CSSProps.domprefixes[i]; + if (!pf) { continue; } + props.push(pf+parts.join('')); + } + return props; +} + + +Monocle.Browser.CSSProps.toDOMProp = function (prop) { + return Monocle.Browser.CSSProps.toDOMProps( + prop, + Monocle.Browser.CSSProps.domprefixes[ + Monocle.Browser.CSSProps.engines.indexOf(Monocle.Browser.engine) + ] + ); +} + + +Monocle.Browser.CSSProps.isSupported = function (props) { + for (var i in props) { + if (Monocle.Browser.CSSProps.guineapig.style[props[i]] !== undefined) { + return true; + } + } + return false; +} // Thanks modernizr! + + +Monocle.Browser.CSSProps.isSupportedForAnyPrefix = function (prop) { + return Monocle.Browser.CSSProps.isSupported( + Monocle.Browser.CSSProps.toDOMProps(prop) + ); +} + + +Monocle.Browser.CSSProps.supportsMediaQuery = function (query) { + var gpid = "monocle_guineapig"; + var div = Monocle.Browser.CSSProps.guineapig; + div.id = gpid; + var st = document.createElement('style'); + st.textContent = query+'{#'+gpid+'{height:3px}}'; + (document.head || document.getElementsByTagName('head')[0]).appendChild(st); + document.documentElement.appendChild(div); + + var result = Monocle.Browser.CSSProps.guineapig.offsetHeight === 3; + + st.parentNode.removeChild(st); + div.parentNode.removeChild(div); + + return result; +} // Thanks modernizr! + + +Monocle.Browser.CSSProps.supportsMediaQueryProperty = function (prop) { + return Monocle.Browser.CSSProps.supportsMediaQuery( + '@media ('+Monocle.Browser.CSSProps.prefixes.join(prop+'),(')+'monocle__)' + ); +} + + + +Monocle.Browser.has = {} +Monocle.Browser.has.touch = ('ontouchstart' in window) || + Monocle.Browser.CSSProps.supportsMediaQueryProperty('touch-enabled'); +Monocle.Browser.has.columns = Monocle.Browser.CSSProps.isSupportedForAnyPrefix( + 'column-width' +); +Monocle.Browser.has.transform3d = Monocle.Browser.CSSProps.isSupported([ + 'perspectiveProperty', + 'WebkitPerspective', + 'MozPerspective', + 'OPerspective', + 'msPerspective' +]) && Monocle.Browser.CSSProps.supportsMediaQueryProperty('transform-3d'); +Monocle.Browser.has.iframeTouchBug = Monocle.Browser.iOSVersionBelow("4.2"); +Monocle.Browser.has.selectThruBug = Monocle.Browser.iOSVersionBelow("4.2"); +Monocle.Browser.has.mustScrollSheaf = Monocle.Browser.is.MobileSafari; +Monocle.Browser.has.iframeDoubleWidthBug = Monocle.Browser.has.mustScrollSheaf; +Monocle.Browser.has.floatColumnBug = Monocle.Browser.is.WebKit; + + +if (typeof window.console == "undefined") { + window.console = { + messages: [], + log: function (msg) { + this.messages.push(msg); + } + } +} + + +window.console.compatDir = function (obj) { + var stringify = function (o) { + var parts = []; + for (x in o) { + parts.push(x + ": " + o[x]); + } + return parts.join("; "); + } + + window.console.log(stringify(obj)); +} + + +if (!Array.prototype.indexOf) { + Array.prototype.indexOf = function(elt /*, from*/) { + var len = this.length >>> 0; + + var from = Number(arguments[1]) || 0; + from = (from < 0) + ? Math.ceil(from) + : Math.floor(from); + if (from < 0) { + from += len; + } + + for (; from < len; from++) { + if (from in this && this[from] === elt) { + return from; + } + } + return -1; + }; +} + + +Monocle.pieceLoaded('compat'); +Monocle.Factory = function (element, label, index, reader) { + + var API = { constructor: Monocle.Factory }; + var k = API.constants = API.constructor; + var p = API.properties = { + element: element, + label: label, + index: index, + reader: reader, + prefix: reader.properties.classPrefix || '' + } + + + function initialize() { + var node = p.reader.properties.graph; + node[p.label] = node[p.label] || []; + if (typeof p.index == 'undefined' && node[p.label][p.index]) { + throw('Element already exists in graph: '+p.label+'['+p.index+']'); + } else { + p.index = p.index || node[p.label].length; + } + node[p.label][p.index] = p.element; + + addClass(p.label); + } + + + function find(oLabel, oIndex) { + if (!p.reader.properties.graph[oLabel]) { + return null; + } + return p.reader.properties.graph[oLabel][oIndex || 0]; + } + + + function claim(oElement, oLabel, oIndex) { + return oElement.dom = new Monocle.Factory( + oElement, + oLabel, + oIndex, + p.reader + ); + } + + + function make(tagName, oLabel, index_or_options, or_options) { + var oIndex, options; + if (arguments.length == 2) { + oIndex = 0; + options = {}; + } else if (arguments.length == 4) { + oIndex = arguments[2]; + options = arguments[3]; + } else if (arguments.length == 3) { + var lastArg = arguments[arguments.length - 1]; + if (typeof lastArg == "number") { + oIndex = lastArg; + options = {}; + } else { + oIndex = 0; + options = lastArg; + } + } + + var oElement = document.createElement(tagName); + claim(oElement, oLabel, oIndex); + if (options['class']) { + oElement.className += " "+p.prefix+options['class']; + } + if (options['html']) { + oElement.innerHTML = options['html']; + } + if (options['text']) { + oElement.appendChild(document.createTextNode(options['text'])); + } + + return oElement; + } + + + function append(tagName, oLabel, index_or_options, or_options) { + var oElement = make.apply(this, arguments); + p.element.appendChild(oElement); + return oElement; + } + + + function address() { + return [p.label, p.index, p.reader]; + } + + + function setStyles(rules) { + return Monocle.Styles.applyRules(p.element, rules); + } + + + function setBetaStyle(property, value) { + return Monocle.Styles.affix(p.element, property, value); + } + + + + function hasClass(name) { + name = p.prefix + name; + var klass = p.element.className; + if (!klass) { return false; } + if (klass == name) { return true; } + return new RegExp("(^|\\s)"+name+"(\\s|$)").test(klass); + } + + + function addClass(name) { + if (hasClass(name)) { return; } + var gap = p.element.className ? ' ' : ''; + return p.element.className += gap+p.prefix+name; + } + + + function removeClass(name) { + var reName = new RegExp("(^|\\s+)"+p.prefix+name+"(\\s+|$)"); + var reTrim = /^\s+|\s+$/g; + var klass = p.element.className; + p.element.className = klass.replace(reName, ' ').replace(reTrim, ''); + return p.element.className; + } + + + API.find = find; + API.claim = claim; + API.make = make; + API.append = append; + API.address = address; + + API.setStyles = setStyles; + API.setBetaStyle = setBetaStyle; + API.hasClass = hasClass; + API.addClass = addClass; + API.removeClass = removeClass; + + initialize(); + + return API; +} + +Monocle.pieceLoaded('factory'); +Monocle.Events = {} + + +Monocle.Events.listen = function (elem, evtType, fn, useCapture) { + if (elem.addEventListener) { + return elem.addEventListener(evtType, fn, useCapture || false); + } else if (elem.attachEvent) { + return elem.attachEvent('on'+evtType, fn); + } +} + + +Monocle.Events.deafen = function (elem, evtType, fn, useCapture) { + if (elem.removeEventListener) { + return elem.removeEventListener(evtType, fn, useCapture || false); + } else if (elem.detachEvent) { + try { + return elem.detachEvent('on'+evtType, fn); + } catch(e) {} + } +} + + +Monocle.Events.listenForContact = function (elem, fns, options) { + var listeners = {}; + + var cursorInfo = function (evt, ci) { + evt.m = { + pageX: ci.pageX, + pageY: ci.pageY + }; + + var target = evt.target || window.srcElement; + while (target.nodeType != 1 && target.parentNode) { + target = target.parentNode; + } + + var offset = offsetFor(evt, target); + evt.m.offsetX = offset[0]; + evt.m.offsetY = offset[1]; + + if (evt.currentTarget) { + offset = offsetFor(evt, evt.currentTarget); + evt.m.registrantX = offset[0]; + evt.m.registrantY = offset[1]; + } + + return evt; + } + + + var offsetFor = function (evt, elem) { + var r; + if (elem.getBoundingClientRect) { + var er = elem.getBoundingClientRect(); + var dr = document.body.getBoundingClientRect(); + r = { left: er.left - dr.left, top: er.top - dr.top }; + } else { + r = { left: elem.offsetLeft, top: elem.offsetTop } + while (elem = elem.parentNode) { + if (elem.offsetLeft || elem.offsetTop) { + r.left += elem.offsetLeft; + r.top += elem.offsetTop; + } + } + } + return [evt.m.pageX - r.left, evt.m.pageY - r.top]; + } + + + var capture = (options && options.useCapture) || false; + + if (!Monocle.Browser.has.touch) { + if (fns.start) { + listeners.mousedown = function (evt) { + if (evt.button != 0) { return; } + fns.start(cursorInfo(evt, evt)); + } + Monocle.Events.listen(elem, 'mousedown', listeners.mousedown, capture); + } + if (fns.move) { + listeners.mousemove = function (evt) { + fns.move(cursorInfo(evt, evt)); + } + Monocle.Events.listen(elem, 'mousemove', listeners.mousemove, capture); + } + if (fns.end) { + listeners.mouseup = function (evt) { + fns.end(cursorInfo(evt, evt)); + } + Monocle.Events.listen(elem, 'mouseup', listeners.mouseup, capture); + } + if (fns.cancel) { + listeners.mouseout = function (evt) { + obj = evt.relatedTarget || evt.fromElement; + while (obj && (obj = obj.parentNode)) { + if (obj == elem) { return; } + } + fns.cancel(cursorInfo(evt, evt)); + } + Monocle.Events.listen(elem, 'mouseout', listeners.mouseout, capture); + } + } else { + if (fns.start) { + listeners.start = function (evt) { + if (evt.touches.length > 1) { return; } + fns.start(cursorInfo(evt, evt.targetTouches[0])); + } + } + if (fns.move) { + listeners.move = function (evt) { + if (evt.touches.length > 1) { return; } + fns.move(cursorInfo(evt, evt.targetTouches[0])); + } + } + if (fns.end) { + listeners.end = function (evt) { + fns.end(cursorInfo(evt, evt.changedTouches[0])); + evt.preventDefault(); + } + } + if (fns.cancel) { + listeners.cancel = function (evt) { + fns.cancel(cursorInfo(evt, evt.changedTouches[0])); + } + } + + if (Monocle.Browser.has.iframeTouchBug) { + Monocle.Events.tMonitor = Monocle.Events.tMonitor || + new Monocle.Events.TouchMonitor(); + Monocle.Events.tMonitor.listen(elem, listeners, options); + } else { + for (etype in listeners) { + Monocle.Events.listen(elem, 'touch'+etype, listeners[etype], capture); + } + } + } + + return listeners; +} + + +Monocle.Events.deafenForContact = function (elem, listeners) { + var prefix = ""; + if (Monocle.Browser.has.touch) { + prefix = Monocle.Browser.has.iframeTouchBug ? "contact" : "touch"; + } + + for (evtType in listeners) { + Monocle.Events.deafen(elem, prefix + evtType, listeners[evtType]); + } +} + + +Monocle.Events.listenForTap = function (elem, fn) { + var startPos; + + if (Monocle.Browser.on.Kindle3) { + Monocle.Events.listen(elem, 'click', function () {}); + } + + var annulIfOutOfBounds = function (evt) { + if (evt.type.match(/^mouse/)) { + return; + } + if (Monocle.Browser.is.MobileSafari && Monocle.Browser.iOSVersion < "3.2") { + return; + } + if ( + evt.m.registrantX < 0 || evt.m.registrantX > elem.offsetWidth || + evt.m.registrantY < 0 || evt.m.registrantY > elem.offsetHeight + ) { + startPos = null; + } else { + evt.preventDefault(); + } + } + + return Monocle.Events.listenForContact( + elem, + { + start: function (evt) { + startPos = [evt.m.pageX, evt.m.pageY]; + evt.preventDefault(); + }, + move: annulIfOutOfBounds, + end: function (evt) { + annulIfOutOfBounds(evt); + if (startPos) { + evt.m.startOffset = startPos; + fn(evt); + } + }, + cancel: function (evt) { + startPos = null; + } + }, + { + useCapture: false + } + ); +} + + +Monocle.Events.deafenForTap = Monocle.Events.deafenForContact; + + +Monocle.Events.TouchMonitor = function () { + if (Monocle.Events == this) { + return new Monocle.Events.TouchMonitor(); + } + + var API = { constructor: Monocle.Events.TouchMonitor } + var k = API.constants = API.constructor; + var p = API.properties = { + touching: null, + edataPrev: null, + originator: null, + brokenModel_4_1: navigator.userAgent.match(/ OS 4_1/) + } + + + function listenOnIframe(iframe) { + if (iframe.contentDocument) { + enableTouchProxy(iframe.contentDocument); + iframe.contentDocument.isTouchFrame = true; + } + + if (p.brokenModel_4_1) { + enableTouchProxy(iframe); + } + } + + + function listen(element, fns, useCapture) { + for (etype in fns) { + Monocle.Events.listen(element, 'contact'+etype, fns[etype], useCapture); + } + enableTouchProxy(element, useCapture); + } + + + function enableTouchProxy(element, useCapture) { + if (element.monocleTouchProxy) { + return; + } + element.monocleTouchProxy = true; + + var fn = function (evt) { touchProxyHandler(element, evt) } + Monocle.Events.listen(element, "touchstart", fn, useCapture); + Monocle.Events.listen(element, "touchmove", fn, useCapture); + Monocle.Events.listen(element, "touchend", fn, useCapture); + Monocle.Events.listen(element, "touchcancel", fn, useCapture); + } + + + function touchProxyHandler(element, evt) { + var edata = { + start: evt.type == "touchstart", + move: evt.type == "touchmove", + end: evt.type == "touchend" || evt.type == "touchcancel", + time: new Date().getTime(), + frame: element.isTouchFrame + } + + if (!p.touching) { + p.originator = element; + } + + var target = element; + var touch = evt.touches[0] || evt.changedTouches[0]; + target = document.elementFromPoint(touch.screenX, touch.screenY); + + if (target) { + translateTouchEvent(element, target, evt, edata); + } + } + + + function translateTouchEvent(element, target, evt, edata) { + if ( + p.brokenModel_4_1 && + !edata.frame && + !p.touching && + edata.start && + p.edataPrev && + p.edataPrev.end && + (edata.time - p.edataPrev.time) < 30 + ) { + evt.preventDefault(); + return; + } + + if (!p.touching && !edata.end) { + return fireStart(evt, target, edata); + } + + if (edata.move && p.touching) { + return fireMove(evt, edata); + } + + if (p.brokenModel_4_1) { + if (p.touching && !edata.frame) { + return fireProvisionalEnd(evt, edata); + } + } else { + if (edata.end && p.touching) { + return fireProvisionalEnd(evt, edata); + } + } + + if ( + p.brokenModel_4_1 && + p.originator != element && + edata.frame && + edata.end + ) { + evt.preventDefault(); + return; + } + + if (edata.frame && edata.end && p.touching) { + return fireProvisionalEnd(evt, edata); + } + } + + + function fireStart(evt, target, edata) { + p.touching = target; + p.edataPrev = edata; + return fireTouchEvent(p.touching, 'start', evt); + } + + + function fireMove(evt, edata) { + clearProvisionalEnd(); + p.edataPrev = edata; + return fireTouchEvent(p.touching, 'move', evt); + } + + + function fireEnd(evt, edata) { + var result = fireTouchEvent(p.touching, 'end', evt); + p.edataPrev = edata; + p.touching = null; + return result; + } + + + function fireProvisionalEnd(evt, edata) { + clearProvisionalEnd(); + var mimicEvt = mimicTouchEvent(p.touching, 'end', evt); + p.edataPrev = edata; + + p.provisionalEnd = setTimeout( + function() { + if (p.touching) { + p.touching.dispatchEvent(mimicEvt); + p.touching = null; + } + }, + 30 + ); + } + + + function clearProvisionalEnd() { + if (p.provisionalEnd) { + clearTimeout(p.provisionalEnd); + p.provisionalEnd = null; + } + } + + + function mimicTouchEvent(target, newtype, evt) { + var cloneTouch = function (t) { + return document.createTouch( + document.defaultView, + target, + t.identifier, + t.screenX, + t.screenY, + t.screenX, + t.screenY + ); + } + + var findTouch = function (id) { + for (var i = 0; i < touches.all.length; ++i) { + if (touches.all[i].identifier == id) { + return touches.all[i]; + } + } + } + + var touches = { all: [], target: [], changed: [] }; + for (var i = 0; i < evt.touches.length; ++i) { + touches.all.push(cloneTouch(evt.touches[i])); + } + for (var i = 0; i < evt.targetTouches.length; ++i) { + touches.target.push( + findTouch(evt.targetTouches[i].identifier) || + cloneTouch(evt.targetTouches[i]) + ); + } + for (var i = 0; i < evt.changedTouches.length; ++i) { + touches.changed.push( + findTouch(evt.changedTouches[i].identifier) || + cloneTouch(evt.changedTouches[i]) + ); + } + + var mimicEvt = document.createEvent('TouchEvent'); + mimicEvt.initTouchEvent( + "contact"+newtype, + true, + true, + document.defaultView, + evt.detail, + evt.screenX, + evt.screenY, + evt.screenX, + evt.screenY, + evt.ctrlKey, + evt.altKey, + evt.shiftKey, + evt.metaKey, + document.createTouchList.apply(document, touches.all), + document.createTouchList.apply(document, touches.target), + document.createTouchList.apply(document, touches.changed), + evt.scale, + evt.rotation + ); + + return mimicEvt; + } + + + function fireTouchEvent(target, newtype, evt) { + var mimicEvt = mimicTouchEvent(target, newtype, evt); + var result = target.dispatchEvent(mimicEvt); + if (!result) { + evt.preventDefault(); + } + return result; + } + + + API.listen = listen; + API.listenOnIframe = listenOnIframe; + + return API; +} + + +Monocle.Events.listenOnIframe = function (frame) { + if (!Monocle.Browser.has.iframeTouchBug) { + return; + } + Monocle.Events.tMonitor = Monocle.Events.tMonitor || + new Monocle.Events.TouchMonitor(); + Monocle.Events.tMonitor.listenOnIframe(frame); +} + +Monocle.pieceLoaded('events'); +Monocle.Styles = { + applyRules: function (elem, rules) { + if (typeof rules != 'string') { + var parts = []; + for (var declaration in rules) { + parts.push(declaration+": "+rules[declaration]+";") + } + rules = parts.join(" "); + } + elem.style.cssText += ';'+rules; + return elem.style.cssText; + }, + + affix: function (elem, property, value) { + var target = elem.style ? elem.style : elem; + target[Monocle.Browser.CSSProps.toDOMProp(property)] = value; + }, + + setX: function (elem, x) { + var s = elem.style; + if (typeof x == "number") { x += "px"; } + if (Monocle.Browser.has.transform3d) { + s.webkitTransform = "translate3d("+x+", 0, 0)"; + } else { + s.webkitTransform = "translateX("+x+")"; + } + s.MozTransform = s.OTransform = s.transform = "translateX("+x+")"; + return x; + }, + + setY: function (elem, y) { + var s = elem.style; + if (typeof y == "number") { y += "px"; } + if (Monocle.Browser.has.transform3d) { + s.webkitTransform = "translate3d(0, "+y+", 0)"; + } else { + s.webkitTransform = "translateY("+y+")"; + } + s.MozTransform = s.OTransform = s.transform = "translateY("+y+")"; + return y; + } +} + + +Monocle.Styles.container = { + "position": "absolute", + "top": "0", + "left": "0", + "bottom": "0", + "right": "0" +} + +Monocle.Styles.page = { + "position": "absolute", + "z-index": "1", + "-webkit-user-select": "none", + "-moz-user-select": "none", + "user-select": "none", + "-webkit-transform": "translate3d(0,0,0)" + + /* + "background": "white", + "top": "0", + "left": "0", + "bottom": "0", + "right": "0" + */ +} + +Monocle.Styles.sheaf = { + "position": "absolute", + "overflow": "hidden" // Required by MobileSafari to constrain inner iFrame. + + /* + "top": "0", + "left": "0", + "bottom": "0", + "right": "0" + */ +} + +Monocle.Styles.component = { + "display": "block", + "width": "100%", + "height": "100%", + "border": "none", + "overflow": "hidden", + "-webkit-user-select": "none", + "-moz-user-select": "none", + "user-select": "none" +} + +Monocle.Styles.control = { + "z-index": "100", + "cursor": "pointer" +} + +Monocle.Styles.overlay = { + "position": "absolute", + "display": "none", + "width": "100%", + "height": "100%", + "z-index": "1000" +} + + + +Monocle.pieceLoaded('styles'); +Monocle.Reader = function (node, bookData, options, onLoadCallback) { + if (Monocle == this) { + return new Monocle.Reader(node, bookData, options, onLoadCallback); + } + + var API = { constructor: Monocle.Reader } + var k = API.constants = API.constructor; + var p = API.properties = { + initialized: false, + + book: null, + + graph: {}, + + pageStylesheets: [], + + systemId: (options ? options.systemId : null) || k.DEFAULT_SYSTEM_ID, + + classPrefix: k.DEFAULT_CLASS_PREFIX, + + controls: [], + + resizeTimer: null + } + + var dom; + + + function initialize(node, bookData, options, onLoadCallback) { + var box = typeof(node) == "string" ? document.getElementById(node) : node; + dom = API.dom = box.dom = new Monocle.Factory(box, 'box', 0, API); + + options = options || {} + + dispatchEvent("monocle:initializing"); + + var bk; + if (bookData) { + bk = new Monocle.Book(bookData); + } else { + bk = Monocle.Book.fromNodes([box.cloneNode(true)]); + } + box.innerHTML = ""; + + positionBox(); + + attachFlipper(options.flipper); + + createReaderElements(); + + p.defaultStyles = addPageStyles(k.DEFAULT_STYLE_RULES, false); + + primeFrames(options.primeURL, function () { + applyStyles(); + + listen('monocle:componentchange', persistPageStylesOnComponentChange); + + p.flipper.listenForInteraction(options.panels); + + setBook(bk, options.place, function () { + p.initialized = true; + if (onLoadCallback) { onLoadCallback(API); } + dispatchEvent("monocle:loaded"); + }); + }); + } + + + function positionBox() { + var currPosVal; + var box = dom.find('box'); + if (document.defaultView) { + var currStyle = document.defaultView.getComputedStyle(box, null); + currPosVal = currStyle.getPropertyValue('position'); + } else if (box.currentStyle) { + currPosVal = box.currentStyle.position + } + if (["absolute", "relative"].indexOf(currPosVal) == -1) { + box.style.position = "relative"; + } + } + + + function attachFlipper(flipperClass) { + if (!Monocle.Browser.has.columns) { + flipperClass = Monocle.Flippers[k.FLIPPER_LEGACY_CLASS]; + if (!flipperClass) { + return dom.append( + 'div', + 'abortMsg', + { 'class': k.abortMessage.CLASSNAME, 'html': k.abortMessage.TEXT } + ); + } + } else if (!flipperClass) { + flipperClass = Monocle.Flippers[k.FLIPPER_DEFAULT_CLASS]; + if (!flipperClass) { + throw("No flipper class"); + } + } + p.flipper = new flipperClass(API, null, p.readerOptions); + } + + + function createReaderElements() { + var cntr = dom.append('div', 'container'); + for (var i = 0; i < p.flipper.pageCount; ++i) { + var page = cntr.dom.append('div', 'page', i); + page.m = { reader: API, pageIndex: i, place: null } + page.m.sheafDiv = page.dom.append('div', 'sheaf', i); + page.m.activeFrame = page.m.sheafDiv.dom.append('iframe', 'component', i); + page.m.activeFrame.m = { 'pageDiv': page } + p.flipper.addPage(page); + Monocle.Events.listenOnIframe(page.m.activeFrame); + } + dom.append('div', 'overlay'); + dispatchEvent("monocle:loading"); + } + + + function primeFrames(url, callback) { + url = url || "about:blank"; + + var pageMax = p.flipper.pageCount; + var pageCount = 0; + + var cb = function (evt) { + var frame = evt.target || evt.srcElement; + Monocle.Events.deafen(frame, 'load', cb); + if (Monocle.Browser.is.WebKit) { + frame.contentDocument.documentElement.style.overflow = "hidden"; + } + if ((pageCount += 1) == pageMax) { + Monocle.defer(callback); + } + } + + for (var i = 0; i < pageMax; ++i) { + var page = dom.find('page', i); + page.m.activeFrame.style.visibility = "hidden"; + page.m.activeFrame.setAttribute('frameBorder', 0); + page.m.activeFrame.setAttribute('scrolling', 'no'); + Monocle.Events.listen(page.m.activeFrame, 'load', cb); + page.m.activeFrame.src = url; + } + } + + + function applyStyles() { + dom.find('container').dom.setStyles(Monocle.Styles.container); + for (var i = 0; i < p.flipper.pageCount; ++i) { + var page = dom.find('page', i); + page.dom.setStyles(Monocle.Styles.page); + dom.find('sheaf', i).dom.setStyles(Monocle.Styles.sheaf); + var cmpt = dom.find('component', i) + cmpt.dom.setStyles(Monocle.Styles.component); + Monocle.Styles.applyRules(cmpt.contentDocument.body, Monocle.Styles.body); + } + dom.find('overlay').dom.setStyles(Monocle.Styles.overlay); + dispatchEvent('monocle:styles'); + } + + + function setBook(bk, place, callback) { + p.book = bk; + var pageCount = 0; + if (typeof callback == 'function') { + var watcher = function (evt) { + if ((pageCount += 1) == p.flipper.pageCount) { + deafen('monocle:componentchange', watcher); + callback(); + } + } + listen('monocle:componentchange', watcher); + } + p.flipper.moveTo(place || { page: 1 }); + } + + + function getBook() { + return p.book; + } + + + function resized() { + if (!p.initialized) { + console.warn('Attempt to resize book before initialization.'); + } + if (!dispatchEvent("monocle:resizing", {}, true)) { + return; + } + clearTimeout(p.resizeTimer); + p.resizeTimer = setTimeout( + function () { + p.flipper.moveTo({ page: pageNumber() }); + dispatchEvent("monocle:resize"); + }, + k.durations.RESIZE_DELAY + ); + } + + + function pageNumber(pageDiv) { + var place = getPlace(pageDiv); + return place ? (place.pageNumber() || 1) : 1; + } + + + function getPlace(pageDiv) { + if (!p.initialized) { + console.warn('Attempt to access place before initialization.'); + } + return p.flipper.getPlace(pageDiv); + } + + + function moveTo(locus, callback) { + if (!p.initialized) { + console.warn('Attempt to move place before initialization.'); + } + var fn = callback; + if (!locus.direction) { + dispatchEvent('monocle:jumping', { locus: locus }); + fn = function () { + dispatchEvent('monocle:jump', { locus: locus }); + if (callback) { callback(); } + } + } + p.flipper.moveTo(locus, fn); + } + + + function skipToChapter(src) { + var locus = p.book.locusOfChapter(src); + if (locus) { + moveTo(locus); + return true; + } else { + dispatchEvent("monocle:notfound", { href: src }); + return false; + } + } + + + function addControl(ctrl, cType, options) { + for (var i = 0; i < p.controls.length; ++i) { + if (p.controls[i].control == ctrl) { + console.warn("Already added control: " + ctrl); + return; + } + } + + options = options || {}; + + var ctrlData = { + control: ctrl, + elements: [], + controlType: cType + } + p.controls.push(ctrlData); + + var ctrlElem; + var cntr = dom.find('container'), overlay = dom.find('overlay'); + if (!cType || cType == "standard") { + ctrlElem = ctrl.createControlElements(cntr); + cntr.appendChild(ctrlElem); + ctrlData.elements.push(ctrlElem); + } else if (cType == "page") { + for (var i = 0; i < p.flipper.pageCount; ++i) { + var page = dom.find('page', i); + var runner = ctrl.createControlElements(page); + page.appendChild(runner); + ctrlData.elements.push(runner); + } + } else if (cType == "modal" || cType == "popover") { + ctrlElem = ctrl.createControlElements(overlay); + overlay.appendChild(ctrlElem); + ctrlData.elements.push(ctrlElem); + ctrlData.usesOverlay = true; + } else if (cType == "invisible") { + if ( + typeof(ctrl.createControlElements) == "function" && + (ctrlElem = ctrl.createControlElements(cntr)) + ) { + cntr.appendChild(ctrlElem); + ctrlData.elements.push(ctrlElem); + } + } else { + console.warn("Unknown control type: " + cType); + } + + for (var i = 0; i < ctrlData.elements.length; ++i) { + Monocle.Styles.applyRules(ctrlData.elements[i], Monocle.Styles.control); + } + + if (options.hidden) { + hideControl(ctrl); + } else { + showControl(ctrl); + } + + if (typeof ctrl.assignToReader == 'function') { + ctrl.assignToReader(API); + } + + return ctrl; + } + + + function dataForControl(ctrl) { + for (var i = 0; i < p.controls.length; ++i) { + if (p.controls[i].control == ctrl) { + return p.controls[i]; + } + } + } + + + function hideControl(ctrl) { + var controlData = dataForControl(ctrl); + if (!controlData) { + console.warn("No data for control: " + ctrl); + return; + } + if (controlData.hidden) { + return; + } + for (var i = 0; i < controlData.elements.length; ++i) { + controlData.elements[i].style.display = "none"; + } + if (controlData.usesOverlay) { + var overlay = dom.find('overlay'); + overlay.style.display = "none"; + Monocle.Events.deafenForContact(overlay, overlay.listeners); + } + controlData.hidden = true; + if (ctrl.properties) { + ctrl.properties.hidden = true; + } + dispatchEvent('controlhide', ctrl, false); + } + + + function showControl(ctrl) { + var controlData = dataForControl(ctrl); + if (!controlData) { + console.warn("No data for control: " + ctrl); + return; + } + if (controlData.hidden == false) { + return; + } + for (var i = 0; i < controlData.elements.length; ++i) { + controlData.elements[i].style.display = "block"; + } + var overlay = dom.find('overlay'); + if (controlData.usesOverlay) { + overlay.style.display = "block"; + } + if (controlData.controlType == "popover") { + overlay.listeners = Monocle.Events.listenForContact( + overlay, + { + start: function (evt) { + obj = evt.target || window.event.srcElement; + do { + if (obj == controlData.elements[0]) { return true; } + } while (obj && (obj = obj.parentNode)); + hideControl(ctrl); + }, + move: function (evt) { + evt.preventDefault(); + } + } + ); + } + controlData.hidden = false; + if (ctrl.properties) { + ctrl.properties.hidden = false; + } + dispatchEvent('controlshow', ctrl, false); + } + + + function dispatchEvent(evtType, data, cancelable) { + if (!document.createEvent) { + return true; + } + var evt = document.createEvent("Events"); + evt.initEvent(evtType, false, cancelable || false); + evt.m = data; + try { + return dom.find('box').dispatchEvent(evt); + } catch(e) { + console.warn("Failed to dispatch event: " + evtType); + return false; + } + } + + + function listen(evtType, fn, useCapture) { + Monocle.Events.listen(dom.find('box'), evtType, fn, useCapture); + } + + + function deafen(evtType, fn) { + Monocle.Events.deafen(dom.find('box'), evtType, fn); + } + + + /* PAGE STYLESHEETS */ + + function addPageStyles(styleRules, restorePlace) { + return changingStylesheet(function () { + p.pageStylesheets.push(styleRules); + var sheetIndex = p.pageStylesheets.length - 1; + + for (var i = 0; i < p.flipper.pageCount; ++i) { + var doc = dom.find('component', i).contentDocument; + addPageStylesheet(doc, sheetIndex); + } + return sheetIndex; + }, restorePlace); + } + + + function updatePageStyles(sheetIndex, styleRules, restorePlace) { + return changingStylesheet(function () { + p.pageStylesheets[sheetIndex] = styleRules; + if (typeof styleRules.join == "function") { + styleRules = styleRules.join("\n"); + } + for (var i = 0; i < p.flipper.pageCount; ++i) { + var doc = dom.find('component', i).contentDocument; + var styleTag = doc.getElementById('monStylesheet'+sheetIndex); + if (!styleTag) { + console.warn('No such stylesheet: ' + sheetIndex); + return; + } + if (styleTag.styleSheet) { + styleTag.styleSheet.cssText = styleRules; + } else { + styleTag.replaceChild( + doc.createTextNode(styleRules), + styleTag.firstChild + ); + } + } + }, restorePlace); + } + + + function removePageStyles(sheetIndex, restorePlace) { + return changingStylesheet(function () { + p.pageStylesheets[sheetIndex] = null; + for (var i = 0; i < p.flipper.pageCount; ++i) { + var doc = dom.find('component', i).contentDocument; + var styleTag = doc.getElementById('monStylesheet'+sheetIndex); + styleTag.parentNode.removeChild(styleTag); + } + }, restorePlace); + } + + + function persistPageStylesOnComponentChange(evt) { + var doc = evt.m['document']; + doc.documentElement.id = p.systemId; + for (var i = 0; i < p.pageStylesheets.length; ++i) { + if (p.pageStylesheets[i]) { + addPageStylesheet(doc, i); + } + } + } + + + function changingStylesheet(callback, restorePlace) { + restorePlace = (restorePlace === false) ? false : true; + if (restorePlace) { + dispatchEvent("monocle:stylesheetchanging", {}); + } + var result = callback(); + if (restorePlace) { + p.flipper.moveTo({ page: pageNumber() }); + Monocle.defer( + function () { dispatchEvent("monocle:stylesheetchange", {}); } + ); + } + return result; + } + + + function addPageStylesheet(doc, sheetIndex) { + var styleRules = p.pageStylesheets[sheetIndex]; + + if (!styleRules) { + return; + } + + var head = doc.getElementsByTagName('head')[0]; + if (!head) { + if (!doc.documentElement) { return; } // FIXME: IE doesn't like docElem. + head = doc.createElement('head'); + doc.documentElement.appendChild(head); + } + + if (typeof styleRules.join == "function") { + styleRules = styleRules.join("\n"); + } + + var styleTag = doc.createElement('style'); + styleTag.type = 'text/css'; + styleTag.id = "monStylesheet"+sheetIndex; + if (styleTag.styleSheet) { + styleTag.styleSheet.cssText = styleRules; + } else { + styleTag.appendChild(doc.createTextNode(styleRules)); + } + + head.appendChild(styleTag); + + return styleTag; + } + + + function visiblePages() { + return p.flipper.visiblePages ? p.flipper.visiblePages() : [dom.find('page')]; + } + + + API.getBook = getBook; + API.getPlace = getPlace; + API.moveTo = moveTo; + API.skipToChapter = skipToChapter; + API.resized = resized; + API.addControl = addControl; + API.hideControl = hideControl; + API.showControl = showControl; + API.dispatchEvent = dispatchEvent; + API.listen = listen; + API.deafen = deafen; + API.addPageStyles = addPageStyles; + API.updatePageStyles = updatePageStyles; + API.removePageStyles = removePageStyles; + API.visiblePages = visiblePages; + + initialize(node, bookData, options, onLoadCallback); + + return API; +} + +Monocle.Reader.durations = { + RESIZE_DELAY: 100 +} +Monocle.Reader.abortMessage = { + CLASSNAME: "monocleAbortMessage", + TEXT: "Your browser does not support this technology." +} +Monocle.Reader.DEFAULT_SYSTEM_ID = 'RS:monocle' +Monocle.Reader.DEFAULT_CLASS_PREFIX = 'monelem_' +Monocle.Reader.FLIPPER_DEFAULT_CLASS = "Slider"; +Monocle.Reader.FLIPPER_LEGACY_CLASS = "Legacy"; +Monocle.Reader.DEFAULT_STYLE_RULES = [ + "html * {" + + "text-rendering: auto !important;" + + "word-wrap: break-word !important;" + + (Monocle.Browser.has.floatColumnBug ? "float: none !important;" : "") + + "}" + + "body {" + + "margin: 0 !important;" + + "padding: 0 !important;" + + "-webkit-text-size-adjust: none;" + + "}" + + "table, img {" + + "max-width: 100% !important;" + + "max-height: 90% !important;" + + "}" +] + + +Monocle.pieceLoaded('reader'); +/* BOOK */ + +/* The Book handles movement through the content by the reader page elements. + * + * It's responsible for instantiating components as they are required, + * and for calculating which component and page number to move to (based on + * requests from the Reader). + * + * It should set and know the place of each page element too. + * + */ +Monocle.Book = function (dataSource) { + if (Monocle == this) { return new Monocle.Book(dataSource); } + + var API = { constructor: Monocle.Book } + var k = API.constants = API.constructor; + var p = API.properties = { + dataSource: dataSource, + components: [], + chapters: {} // flat arrays of chapters per component + } + + + function initialize() { + p.componentIds = dataSource.getComponents(); + p.contents = dataSource.getContents(); + p.lastCIndex = p.componentIds.length - 1; + } + + + function pageNumberAt(pageDiv, locus) { + locus.load = false; + var currComponent = pageDiv.m.activeFrame ? + pageDiv.m.activeFrame.m.component : + null; + var component = null; + var cIndex = p.componentIds.indexOf(locus.componentId); + if (cIndex < 0 && !currComponent) { + locus.load = true; + locus.componentId = p.componentIds[0]; + return locus; + } else if (cIndex < 0) { + component = currComponent; + locus.componentId = pageDiv.m.activeFrame.m.component.properties.id; + cIndex = p.componentIds.indexOf(locus.componentId); + } else if (!p.components[cIndex] || p.components[cIndex] != currComponent) { + locus.load = true; + return locus; + } else { + component = currComponent; + } + + var result = { load: false, componentId: locus.componentId, page: 1 } + + var lastPageNum = { 'old': component.lastPageNumber() } + var changedDims = component.updateDimensions(pageDiv); + lastPageNum['new'] = component.lastPageNumber(); + + if (typeof(locus.page) == "number") { + result.page = locus.page; + } else if (typeof(locus.pagesBack) == "number") { + result.page = lastPageNum['new'] + locus.pagesBack; + } else if (typeof(locus.percent) == "number") { + var place = new Monocle.Place(); + place.setPlace(component, 1); + result.page = place.pageAtPercentageThrough(locus.percent); + } else if (typeof(locus.direction) == "number") { + if (!pageDiv.m.place) { + console.warn("Can't move in a direction if pageDiv has no place."); + } + result.page = pageDiv.m.place.pageNumber(); + result.page += locus.direction; + } else if (typeof(locus.anchor) == "string") { + result.page = component.pageForChapter(locus.anchor, pageDiv); + } else if (typeof(locus.position) == "string") { + if (locus.position == "start") { + result.page = 1; + } else if (locus.position == "end") { + result.page = lastPageNum['new']; + } + } else { + console.warn("Unrecognised locus: " + locus); + } + + if (changedDims && lastPageNum['old']) { + result.page = Math.round( + lastPageNum['new'] * (result.page / lastPageNum['old']) + ); + } + + if (result.page < 1) { + if (cIndex == 0) { + result.page = 1; + } else { + result.load = true; + result.componentId = p.componentIds[cIndex - 1]; + result.pagesBack = result.page; + result.page = null; + } + } else if (result.page > lastPageNum['new']) { + if (cIndex == p.lastCIndex) { + result.page = lastPageNum['new']; + } else { + result.load = true; + result.componentId = p.componentIds[cIndex + 1]; + result.page -= lastPageNum['new']; + } + } + + return result; + } + + + function setPageAt(pageDiv, locus) { + locus = pageNumberAt(pageDiv, locus); + if (!locus.load) { + var component = p.components[p.componentIds.indexOf(locus.componentId)]; + pageDiv.m.place = pageDiv.m.place || new Monocle.Place(); + pageDiv.m.place.setPlace(component, locus.page); + + var evtData = { + page: pageDiv, + locus: locus, + pageNumber: pageDiv.m.place.pageNumber(), + componentId: locus.componentId + } + pageDiv.m.reader.dispatchEvent("monocle:pagechange", evtData); + } + return locus; + } + + + function loadPageAt(pageDiv, locus, callback, progressCallback) { + var cIndex = p.componentIds.indexOf(locus.componentId); + if (!locus.load || cIndex < 0) { + locus = pageNumberAt(pageDiv, locus); + } + + if (!locus.load) { + callback(locus); + return; + } + + var findPageNumber = function () { + locus = setPageAt(pageDiv, locus); + if (locus.load) { + loadPageAt(pageDiv, locus, callback, progressCallback) + } else { + callback(locus); + } + } + + var pgFindPageNumber = function () { + progressCallback ? progressCallback(findPageNumber) : findPageNumber(); + } + + var applyComponent = function (component) { + component.applyTo(pageDiv, pgFindPageNumber); + } + + var pgApplyComponent = function (component) { + progressCallback ? + progressCallback(function () { applyComponent(component) }) : + applyComponent(component); + } + + loadComponent(cIndex, pgApplyComponent, pageDiv); + } + + + function setOrLoadPageAt(pageDiv, locus, callback, progressCallback) { + locus = setPageAt(pageDiv, locus); + if (locus.load) { + loadPageAt(pageDiv, locus, callback, progressCallback); + } else { + callback(locus); + } + } + + + function loadComponent(index, callback, pageDiv) { + if (p.components[index]) { + return callback(p.components[index]); + } + var cmptId = p.componentIds[index]; + if (pageDiv) { + var evtData = { 'page': pageDiv, 'component': cmptId, 'index': index }; + pageDiv.m.reader.dispatchEvent('monocle:componentloading', evtData); + } + var fn = function (cmptSource) { + if (pageDiv) { + evtData['source'] = cmptSource; + pageDiv.m.reader.dispatchEvent('monocle:componentloaded', evtData); + html = evtData['html']; + } + p.components[index] = new Monocle.Component( + API, + cmptId, + index, + chaptersForComponent(cmptId), + cmptSource + ); + callback(p.components[index]); + } + var cmptSource = p.dataSource.getComponent(cmptId, fn); + if (cmptSource && !p.components[index]) { + fn(cmptSource); + } + } + + + function chaptersForComponent(cmptId) { + if (p.chapters[cmptId]) { + return p.chapters[cmptId]; + } + p.chapters[cmptId] = []; + var matcher = new RegExp('^'+cmptId+"(\#(.+)|$)"); + var matches; + var recurser = function (chp) { + if (matches = chp.src.match(matcher)) { + p.chapters[cmptId].push({ + title: chp.title, + fragment: matches[2] || null + }); + } + if (chp.children) { + for (var i = 0; i < chp.children.length; ++i) { + recurser(chp.children[i]); + } + } + } + + for (var i = 0; i < p.contents.length; ++i) { + recurser(p.contents[i]); + } + return p.chapters[cmptId]; + } + + + function locusOfChapter(src) { + var matcher = new RegExp('^(.+?)(#(.*))?$'); + var matches = src.match(matcher); + if (!matches) { return null; } + var cmptId = componentIdMatching(matches[1]); + if (!cmptId) { return null; } + var locus = { componentId: cmptId } + matches[3] ? locus.anchor = matches[3] : locus.position = "start"; + return locus; + } + + + function componentIdMatching(str) { + for (var i = 0; i < p.componentIds.length; ++i) { + if (str.indexOf(p.componentIds[i]) > -1) { + return p.componentIds[i]; + } + } + return null; + } + + + API.getMetaData = dataSource.getMetaData; + API.pageNumberAt = pageNumberAt; + API.setPageAt = setPageAt; + API.loadPageAt = loadPageAt; + API.setOrLoadPageAt = setOrLoadPageAt; + API.chaptersForComponent = chaptersForComponent; + API.locusOfChapter = locusOfChapter; + + initialize(); + + return API; +} + + +Monocle.Book.fromNodes = function (nodes) { + var bookData = { + getComponents: function () { + return ['anonymous']; + }, + getContents: function () { + return []; + }, + getComponent: function (n) { + return { 'nodes': nodes }; + }, + getMetaData: function (key) { + } + } + + return new Monocle.Book(bookData); +} + +Monocle.pieceLoaded('book'); + +Monocle.Place = function () { + + var API = { constructor: Monocle.Place } + var k = API.constants = API.constructor; + var p = API.properties = { + component: null, + percent: null + } + + + function setPlace(cmpt, pageN) { + p.component = cmpt; + p.percent = pageN / cmpt.lastPageNumber(); + p.chapter = null; + } + + + function setPercentageThrough(cmpt, percent) { + p.component = cmpt; + p.percent = percent; + p.chapter = null; + } + + + function componentId() { + return p.component.properties.id; + } + + + function percentageThrough() { + return p.percent; + } + + + function pageAtPercentageThrough(pc) { + return Math.max(Math.round(p.component.lastPageNumber() * pc), 1); + } + + + function pageNumber() { + return pageAtPercentageThrough(p.percent); + } + + + function chapterInfo() { + if (p.chapter) { + return p.chapter; + } + return p.chapter = p.component.chapterForPage(pageNumber()); + } + + + function chapterTitle() { + var chp = chapterInfo(); + return chp ? chp.title : null; + } + + + function chapterSrc() { + var src = componentId(); + var cinfo = chapterInfo(); + if (cinfo && cinfo.fragment) { + src += "#" + cinfo.fragment; + } + return src; + } + + + function getLocus(options) { + options = options || {}; + var locus = { + page: pageNumber(), + componentId: componentId() + } + if (options.direction) { + locus.page += options.direction; + } + return locus; + } + + + function percentageOfBook() { + componentIds = p.component.properties.book.properties.componentIds; + componentSize = 1.0 / componentIds.length; + var pc = componentIds.indexOf(componentId()) * componentSize; + pc += componentSize * p.percent; + return pc; + } + + + function onFirstPageOfBook() { + return p.component.properties.index == 0 && pageNumber() == 1; + } + + + function onLastPageOfBook() { + return ( + p.component.properties.index == + p.component.properties.book.properties.lastCIndex && + pageNumber() == p.component.lastPageNumber() + ); + } + + + API.setPlace = setPlace; + API.setPercentageThrough = setPercentageThrough; + API.componentId = componentId; + API.percentageThrough = percentageThrough; + API.pageAtPercentageThrough = pageAtPercentageThrough; + API.pageNumber = pageNumber; + API.chapterInfo = chapterInfo; + API.chapterTitle = chapterTitle; + API.chapterSrc = chapterSrc; + API.getLocus = getLocus; + API.percentageOfBook = percentageOfBook; + API.onFirstPageOfBook = onFirstPageOfBook; + API.onLastPageOfBook = onLastPageOfBook; + + return API; +} + + +Monocle.Place.FromPageNumber = function (component, pageNumber) { + var place = new Monocle.Place(); + place.setPlace(component, pageNumber); + return place; +} + +Monocle.Place.FromPercentageThrough = function (component, percent) { + var place = new Monocle.Place(); + place.setPercentageThrough(component, percent); + return place; +} + +Monocle.pieceLoaded('place'); +/* COMPONENT */ + +Monocle.Component = function (book, id, index, chapters, source) { + + var API = { constructor: Monocle.Component } + var k = API.constants = API.constructor; + var p = API.properties = { + book: book, + + id: id, + + index: index, + + chapters: chapters, + + source: source + } + + + function applyTo(pageDiv, callback) { + var evtData = { 'page': pageDiv, 'source': p.source }; + pageDiv.m.reader.dispatchEvent('monocle:componentchanging', evtData); + + return loadFrame( + pageDiv, + function () { + setupFrame(pageDiv, pageDiv.m.activeFrame); + callback(pageDiv, API); + } + ); + } + + + function loadFrame(pageDiv, callback) { + var frame = pageDiv.m.activeFrame; + + frame.m.component = API; + + frame.style.visibility = "hidden"; + + + if (p.source.html || (typeof p.source == "string")) { // HTML + return loadFrameFromHTML(p.source.html || p.source, frame, callback); + } else if (p.source.url) { // URL + return loadFrameFromURL(p.source.url, frame, callback); + } else if (p.source.nodes) { // NODES + return loadFrameFromNodes(p.source.nodes, frame, callback); + } else if (p.source.doc) { // DOCUMENT + return loadFrameFromDocument(p.source.doc, frame, callback); + } + } + + + function loadFrameFromHTML(src, frame, callback) { + src = src.replace(/\s+/g, ' '); + + src = src.replace(/\'/g, '\\\''); + + + if (Monocle.Browser.is.Gecko) { + var doctypeFragment = "]*>"; + src = src.replace(new RegExp(doctypeFragment, 'm'), ''); + } + + src = "javascript: '" + src + "';"; + + frame.onload = function () { + frame.onload = null; + Monocle.defer(callback); + } + frame.src = src; + } + + + function loadFrameFromURL(url, frame, callback) { + frame.onload = function () { + frame.onload = null; + Monocle.defer(callback); + } + frame.contentWindow.location.replace(url); + } + + + function loadFrameFromNodes(nodes, frame, callback) { + var destDoc = frame.contentDocument; + destDoc.documentElement.innerHTML = ""; + var destHd = destDoc.createElement("head"); + var destBdy = destDoc.createElement("body"); + + for (var i = 0; i < nodes.length; ++i) { + var node = destDoc.importNode(nodes[i], true); + destBdy.appendChild(node); + } + + var oldHead = destDoc.getElementsByTagName('head')[0]; + if (oldHead) { + destDoc.documentElement.replaceChild(destHd, oldHead); + } else { + destDoc.documentElement.appendChild(destHd); + } + if (destDoc.body) { + destDoc.documentElement.replaceChild(destBdy, destDoc.body); + } else { + destDoc.documentElement.appendChild(destBdy); + } + + if (callback) { callback(); } + } + + + function loadFrameFromDocument(srcDoc, frame, callback) { + var destDoc = frame.contentDocument; + + var srcBases = srcDoc.getElementsByTagName('base'); + if (srcBases[0]) { + var head = destDoc.getElementsByTagName('head')[0]; + if (!head) { + try { + head = destDoc.createElement('head'); + if (destDoc.body) { + destDoc.insertBefore(head, destDoc.body); + } else { + destDoc.appendChild(head); + } + } catch (e) { + head = destDoc.body; + } + } + var bases = destDoc.getElementsByTagName('base'); + var base = bases[0] ? bases[0] : destDoc.createElement('base'); + base.setAttribute('href', srcBases[0].getAttribute('href')); + head.appendChild(base); + } + + destDoc.replaceChild( + destDoc.importNode(srcDoc.documentElement, true), + destDoc.documentElement + ); + + + Monocle.defer(callback); + } + + + function setupFrame(pageDiv, frame) { + Monocle.Events.listenOnIframe(frame); + + var evtData = { + 'page': pageDiv, + 'document': frame.contentDocument, + 'component': API + }; + pageDiv.m.reader.dispatchEvent('monocle:componentchange', evtData); + + var doc = frame.contentDocument; + var win = doc.defaultView; + var currStyle = win.getComputedStyle(doc.body, null); + var lh = parseFloat(currStyle.getPropertyValue('line-height')); + var fs = parseFloat(currStyle.getPropertyValue('font-size')); + doc.body.style.lineHeight = lh / fs; + + p.pageLength = pageDiv.m.dimensions.measure(); + frame.style.visibility = "visible"; + + locateChapters(pageDiv); + } + + + function updateDimensions(pageDiv) { + if (pageDiv.m.dimensions.hasChanged()) { + p.pageLength = pageDiv.m.dimensions.measure(); + return true; + } else { + return false; + } + } + + + function locateChapters(pageDiv) { + if (p.chapters[0] && typeof p.chapters[0].percent == "number") { + return; + } + for (var i = 0; i < p.chapters.length; ++i) { + var chp = p.chapters[i]; + chp.percent = 0; + if (chp.fragment) { + chp.percent = pageDiv.m.dimensions.percentageThroughOfId(chp.fragment); + } + } + return p.chapters; + } + + + function chapterForPage(pageN) { + var cand = null; + var percent = (pageN - 1) / p.pageLength; + for (var i = 0; i < p.chapters.length; ++i) { + if (percent >= p.chapters[i].percent) { + cand = p.chapters[i]; + } else { + return cand; + } + } + return cand; + } + + + function pageForChapter(fragment, pageDiv) { + if (!fragment) { + return 1; + } + var pc2pn = function (pc) { return Math.floor(pc * p.pageLength) + 1 } + for (var i = 0; i < p.chapters.length; ++i) { + if (p.chapters[i].fragment == fragment) { + return pc2pn(p.chapters[i].percent); + } + } + var percent = pageDiv.m.dimensions.percentageThroughOfId(fragment); + return pc2pn(percent); + } + + + function lastPageNumber() { + return p.pageLength; + } + + + API.applyTo = applyTo; + API.updateDimensions = updateDimensions; + API.chapterForPage = chapterForPage; + API.pageForChapter = pageForChapter; + API.lastPageNumber = lastPageNumber; + + return API; +} + +Monocle.pieceLoaded('component'); + +Monocle.Dimensions = {} +Monocle.Controls = {}; +Monocle.Flippers = {}; +Monocle.Panels = {}; + +Monocle.Controls.Panel = function () { + + var API = { constructor: Monocle.Controls.Panel } + var k = API.constants = API.constructor; + var p = API.properties = { + evtCallbacks: {} + } + + function createControlElements(cntr) { + p.div = cntr.dom.make('div', k.CLS.panel); + p.div.dom.setStyles(k.DEFAULT_STYLES); + Monocle.Events.listenForContact( + p.div, + { + 'start': start, + 'move': move, + 'end': end, + 'cancel': cancel + }, + { useCapture: false } + ); + return p.div; + } + + + function listenTo(evtCallbacks) { + p.evtCallbacks = evtCallbacks; + } + + + function deafen() { + p.evtCallbacks = {} + } + + + function start(evt) { + p.contact = true; + evt.m.offsetX += p.div.offsetLeft; + evt.m.offsetY += p.div.offsetTop; + expand(); + invoke('start', evt); + } + + + function move(evt) { + if (!p.contact) { + return; + } + invoke('move', evt); + } + + + function end(evt) { + if (!p.contact) { + return; + } + Monocle.Events.deafenForContact(p.div, p.listeners); + contract(); + p.contact = false; + invoke('end', evt); + } + + + function cancel(evt) { + if (!p.contact) { + return; + } + Monocle.Events.deafenForContact(p.div, p.listeners); + contract(); + p.contact = false; + invoke('cancel', evt); + } + + + function invoke(evtType, evt) { + if (p.evtCallbacks[evtType]) { + p.evtCallbacks[evtType](API, evt.m.offsetX, evt.m.offsetY); + } + evt.preventDefault(); + } + + + function expand() { + if (p.expanded) { + return; + } + p.div.dom.addClass(k.CLS.expanded); + p.expanded = true; + } + + + function contract(evt) { + if (!p.expanded) { + return; + } + p.div.dom.removeClass(k.CLS.expanded); + p.expanded = false; + } + + + API.createControlElements = createControlElements; + API.listenTo = listenTo; + API.deafen = deafen; + API.expand = expand; + API.contract = contract; + + return API; +} + + +Monocle.Controls.Panel.CLS = { + panel: 'panel', + expanded: 'controls_panel_expanded' +} +Monocle.Controls.Panel.DEFAULT_STYLES = { + position: 'absolute', + height: '100%' +} + + +Monocle.pieceLoaded('controls/panel'); +Monocle.Panels.TwoPane = function (flipper, evtCallbacks) { + + var API = { constructor: Monocle.Panels.TwoPane } + var k = API.constants = API.constructor; + var p = API.properties = {} + + + function initialize() { + p.panels = { + forwards: new Monocle.Controls.Panel(), + backwards: new Monocle.Controls.Panel() + } + + for (dir in p.panels) { + flipper.properties.reader.addControl(p.panels[dir]); + p.panels[dir].listenTo(evtCallbacks); + p.panels[dir].properties.direction = flipper.constants[dir.toUpperCase()]; + var style = { "width": k.WIDTH }; + style[(dir == "forwards" ? "right" : "left")] = 0; + p.panels[dir].properties.div.dom.setStyles(style); + } + } + + + initialize(); + + return API; +} + +Monocle.Panels.TwoPane.WIDTH = "50%"; + +Monocle.pieceLoaded('panels/twopane'); +Monocle.Dimensions.Vert = function (pageDiv) { + + var API = { constructor: Monocle.Dimensions.Vert } + var k = API.constants = API.constructor; + var p = API.properties = { + page: pageDiv, + reader: pageDiv.m.reader + } + + + function initialize() { + p.reader.listen('monocle:componentchange', componentChanged); + } + + + function hasChanged() { + return getBodyHeight() != p.bodyHeight || getPageHeight != p.pageHeight; + } + + + function measure() { + p.bodyHeight = getBodyHeight(); + p.pageHeight = getPageHeight(); + p.length = Math.ceil(p.bodyHeight / p.pageHeight); + return p.length; + } + + + function pages() { + return p.length; + } + + + function getBodyHeight() { + return p.page.m.activeFrame.contentDocument.body.scrollHeight; + } + + + function getPageHeight() { + return p.page.m.activeFrame.offsetHeight - k.GUTTER; + } + + + function percentageThroughOfId(id) { + var doc = p.page.m.activeFrame.contentDocument; + var target = doc.getElementById(id); + var offset = 0; + if (target.getBoundingClientRect) { + offset = target.getBoundingClientRect().top; + offset -= doc.body.getBoundingClientRect().top; + } else { + var oldScrollTop = doc.body.scrollTop; + target.scrollIntoView(); + offset = doc.body.scrollTop; + doc.body.scrollLeft = 0; + doc.body.scrollTop = oldScrollTop; + } + + var percent = offset / p.bodyHeight; + return percent; + } + + + function componentChanged(evt) { + if (evt.m['page'] != p.page) { return; } + var sheaf = p.page.m.sheafDiv; + var cmpt = p.page.m.activeFrame; + sheaf.dom.setStyles(k.SHEAF_STYLES); + cmpt.dom.setStyles(k.COMPONENT_STYLES); + var doc = evt.m['document']; + doc.documentElement.style.overflow = 'hidden'; + doc.body.style.marginRight = '10px !important'; + cmpt.contentWindow.scrollTo(0,0); + } + + + function locusToOffset(locus) { + return p.pageHeight * (locus.page - 1); + } + + + API.hasChanged = hasChanged; + API.measure = measure; + API.pages = pages; + API.percentageThroughOfId = percentageThroughOfId; + API.locusToOffset = locusToOffset; + + initialize(); + + return API; +} + +Monocle.Dimensions.Vert.GUTTER = 10; +Monocle.Flippers.Legacy = function (reader) { + + var API = { constructor: Monocle.Flippers.Legacy } + var k = API.constants = API.constructor; + var p = API.properties = { + pageCount: 1, + divs: {} + } + + + function initialize() { + p.reader = reader; + } + + + function addPage(pageDiv) { + pageDiv.m.dimensions = new Monocle.Dimensions.Vert(pageDiv); + } + + + function getPlace() { + return page().m.place; + } + + + function moveTo(locus, callback) { + var fn = frameToLocus; + if (typeof callback == "function") { + fn = function () { frameToLocus(); callback(); } + } + p.reader.getBook().setOrLoadPageAt(page(), locus, fn); + } + + + function listenForInteraction(panelClass) { + if (typeof panelClass != "function") { + panelClass = k.DEFAULT_PANELS_CLASS; + if (!panelClass) { + console.warn("Invalid panel class.") + } + } + p.panels = new panelClass(API, { 'end': turn }); + } + + + function page() { + return p.reader.dom.find('page'); + } + + + function turn(panel) { + var dir = panel.properties.direction; + var place = getPlace(); + if ( + (dir < 0 && place.onFirstPageOfBook()) || + (dir > 0 && place.onLastPageOfBook()) + ) { return; } + moveTo({ page: getPlace().pageNumber() + dir }); + } + + + function frameToLocus(locus) { + var cmpt = p.reader.dom.find('component'); + var win = cmpt.contentWindow; + var srcY = scrollPos(win); + var dims = page().m.dimensions; + var pageHeight = dims.properties.pageHeight; + var destY = dims.locusToOffset(locus); + + if (Math.abs(destY - srcY) > pageHeight) { + return win.scrollTo(0, destY); + } + + showIndicator(win, srcY < destY ? srcY + pageHeight : srcY); + Monocle.defer( + function () { smoothScroll(win, srcY, destY, 300, scrollingFinished); }, + 150 + ); + } + + + function scrollPos(win) { + if (win.pageYOffset) { + return win.pageYOffset; + } + if (win.document.documentElement && win.document.documentElement.scrollTop) { + return win.document.documentElement.scrollTop; + } + if (win.document.body.scrollTop) { + return win.document.body.scrollTop; + } + return 0; + } + + + function smoothScroll(win, currY, finalY, duration, callback) { + clearTimeout(win.smoothScrollInterval); + var stamp = (new Date()).getTime(); + var frameRate = 40; + var step = (finalY - currY) * (frameRate / duration); + var stepFn = function () { + var destY = currY + step; + if ( + (new Date()).getTime() - stamp > duration || + Math.abs(currY - finalY) < Math.abs((currY + step) - finalY) + ) { + clearTimeout(win.smoothScrollInterval); + win.scrollTo(0, finalY); + if (callback) { callback(); } + } else { + win.scrollTo(0, destY); + currY = destY; + } + } + win.smoothScrollInterval = setInterval(stepFn, frameRate); + } + + + function scrollingFinished() { + hideIndicator(page().m.activeFrame.contentWindow); + p.reader.dispatchEvent('monocle:turn'); + } + + + function showIndicator(win, pos) { + if (p.hideTO) { clearTimeout(p.hideTO); } + + var doc = win.document; + if (!doc.body.indicator) { + doc.body.indicator = createIndicator(doc); + doc.body.appendChild(doc.body.indicator); + } + doc.body.indicator.line.style.display = "block"; + doc.body.indicator.style.opacity = 1; + positionIndicator(pos); + } + + + function hideIndicator(win) { + var doc = win.document; + p.hideTO = Monocle.defer( + function () { + if (!doc.body.indicator) { + doc.body.indicator = createIndicator(doc); + doc.body.appendChild(doc.body.indicator); + } + var dims = page().m.dimensions; + positionIndicator( + dims.locusToOffset(getPlace().getLocus()) + dims.properties.pageHeight + ) + doc.body.indicator.line.style.display = "none"; + doc.body.indicator.style.opacity = 0.5; + }, + 600 + ); + } + + + function createIndicator(doc) { + var iBox = doc.createElement('div'); + doc.body.appendChild(iBox); + Monocle.Styles.applyRules(iBox, k.STYLES.iBox); + + iBox.arrow = doc.createElement('div'); + iBox.appendChild(iBox.arrow); + Monocle.Styles.applyRules(iBox.arrow, k.STYLES.arrow); + + iBox.line = doc.createElement('div'); + iBox.appendChild(iBox.line); + Monocle.Styles.applyRules(iBox.line, k.STYLES.line); + + return iBox; + } + + + function positionIndicator(y) { + var p = page(); + var doc = p.m.activeFrame.contentDocument; + var maxHeight = p.m.dimensions.properties.bodyHeight; + maxHeight -= doc.body.indicator.offsetHeight; + if (y > maxHeight) { + y = maxHeight; + } + doc.body.indicator.style.top = y + "px"; + } + + + API.pageCount = p.pageCount; + API.addPage = addPage; + API.getPlace = getPlace; + API.moveTo = moveTo; + API.listenForInteraction = listenForInteraction; + + initialize(); + + return API; +} + +Monocle.Flippers.Legacy.FORWARDS = 1; +Monocle.Flippers.Legacy.BACKWARDS = -1; +Monocle.Flippers.Legacy.DEFAULT_PANELS_CLASS = Monocle.Panels.TwoPane; + +Monocle.Flippers.Legacy.STYLES = { + iBox: { + 'position': 'absolute', + 'right': 0, + 'left': 0, + 'height': '10px' + }, + arrow: { + 'position': 'absolute', + 'right': 0, + 'height': '10px', + 'width': '10px', + 'background': '#333', + 'border-radius': '6px' + }, + line: { + 'width': '100%', + 'border-top': '2px dotted #333', + 'margin-top': '5px' + } +} + +Monocle.pieceLoaded('flippers/legacy'); +Monocle.Dimensions.Columns = function (pageDiv) { + + var API = { constructor: Monocle.Dimensions.Columns } + var k = API.constants = API.constructor; + var p = API.properties = { + page: pageDiv, + reader: pageDiv.m.reader, + dirty: true + } + + + function initialize() { + p.reader.listen('monocle:componentchange', componentChanged); + } + + + function hasChanged() { + if (p.dirty) { return true; } + var newMeasurements = rawMeasurements(); + return ( + (!p.measurements) || + (p.measurements.width != newMeasurements.width) || + (p.measurements.height != newMeasurements.height) || + (p.measurements.scrollWidth != newMeasurements.scrollWidth) || + (p.measurements.fontSize != newMeasurements.fontSize) + ); + } + + + function measure() { + setColumnWidth(); + p.measurements = rawMeasurements(); + + if ( + Monocle.Browser.has.iframeDoubleWidthBug && + p.measurements.scrollWidth == p.measurements.width * 2 + ) { + var doc = p.page.m.activeFrame.contentDocument; + var lc; + for (var i = doc.body.childNodes.length - 1; i >= 0; --i) { + lc = doc.body.childNodes[i]; + if (lc.getBoundingClientRect) { break; } + } + if (!lc || !lc.getBoundingClientRect) { + console.warn('Empty document for page['+p.page.m.pageIndex+']'); + p.measurements.scrollWidth = p.measurements.width; + } else if (lc.getBoundingClientRect().bottom > p.measurements.height) { + p.measurements.scrollWidth = p.measurements.width * 2; + } else { + p.measurements.scrollWidth = p.measurements.width; + } + } + + p.length = Math.ceil(p.measurements.scrollWidth / p.measurements.width); + p.dirty = false; + return p.length; + } + + + function pages() { + if (p.dirty) { + console.warn('Accessing pages() when dimensions are dirty.') + return 0; + } + return p.length; + } + + + function percentageThroughOfId(id) { + var doc = p.page.m.activeFrame.contentDocument; + var target = doc.getElementById(id); + if (!target) { + return 0; + } + var offset = 0; + if (target.getBoundingClientRect) { + offset = target.getBoundingClientRect().left; + offset -= doc.body.getBoundingClientRect().left; + } else { + var scroller = scrollerElement(); + var oldScrollLeft = scroller.scrollLeft; + target.scrollIntoView(); + offset = scroller.scrollLeft; + scroller.scrollTop = 0; + scroller.scrollLeft = oldScrollLeft; + } + + var percent = offset / p.measurements.scrollWidth; + return percent; + } + + + function componentChanged(evt) { + if (evt.m['page'] != p.page) { return; } + var doc = evt.m['document']; + Monocle.Styles.applyRules(doc.body, k.BODY_STYLES); + + if (Monocle.Browser.is.WebKit) { + doc.documentElement.style.overflow = 'hidden'; + } + p.dirty = true; + } + + + function setColumnWidth() { + var cw = p.page.m.sheafDiv.clientWidth; + var doc = p.page.m.activeFrame.contentDocument; + if (currBodyStyleValue('column-width') != cw+"px") { + Monocle.Styles.affix(doc.body, 'column-width', cw+"px"); + p.dirty = true; + } + } + + + function rawMeasurements() { + var sheaf = p.page.m.sheafDiv; + return { + width: sheaf.clientWidth, + height: sheaf.clientHeight, + scrollWidth: scrollerWidth(), + fontSize: currBodyStyleValue('font-size') + } + } + + + function scrollerElement() { + if (Monocle.Browser.has.mustScrollSheaf) { + return p.page.m.sheafDiv; + } else { + return p.page.m.activeFrame.contentDocument.body; + } + } + + + function scrollerWidth() { + var bdy = p.page.m.activeFrame.contentDocument.body; + if (Monocle.Browser.has.iframeDoubleWidthBug) { + if (Monocle.Browser.iOSVersion < "4.1") { + var hbw = bdy.scrollWidth / 2; + var sew = scrollerElement().scrollWidth; + return Math.max(sew, hbw); + } else { + bdy.scrollWidth; // Throw one away. Nuts. + var hbw = bdy.scrollWidth / 2; + return hbw; + } + } else if (Monocle.Browser.is.Gecko) { + var lc = bdy.lastChild; + while (lc && lc.nodeType != 1) { + lc = lc.previousSibling; + } + if (lc && lc.getBoundingClientRect) { + return lc.getBoundingClientRect().right; + } + } + return scrollerElement().scrollWidth; + } + + + function currBodyStyleValue(property) { + var win = p.page.m.activeFrame.contentWindow; + var doc = win.document; + if (!doc.body) { return null; } + var currStyle = win.getComputedStyle(doc.body, null); + return currStyle.getPropertyValue(property); + } + + + function locusToOffset(locus) { + return 0 - (p.measurements.width * (locus.page - 1)); + } + + + function translateToLocus(locus) { + var offset = locusToOffset(locus); + var bdy = p.page.m.activeFrame.contentDocument.body; + Monocle.Styles.affix(bdy, "transform", "translateX("+offset+"px)"); + return offset; + } + + + API.hasChanged = hasChanged; + API.measure = measure; + API.pages = pages; + API.percentageThroughOfId = percentageThroughOfId; + + API.locusToOffset = locusToOffset; + API.translateToLocus = translateToLocus; + + initialize(); + + return API; +} + + +Monocle.Dimensions.Columns.BODY_STYLES = { + "position": "absolute", + "height": "100%", + "-webkit-column-gap": "0", + "-webkit-column-fill": "auto", + "-moz-column-gap": "0", + "-moz-column-fill": "auto", + "column-gap": "0", + "column-fill": "auto" +} + +if (Monocle.Browser.has.iframeDoubleWidthBug) { + Monocle.Dimensions.Columns.BODY_STYLES["min-width"] = "200%"; +} else { + Monocle.Dimensions.Columns.BODY_STYLES["width"] = "100%"; +} +Monocle.Flippers.Slider = function (reader) { + if (Monocle.Flippers == this) { + return new Monocle.Flippers.Slider(reader); + } + + var API = { constructor: Monocle.Flippers.Slider } + var k = API.constants = API.constructor; + var p = API.properties = { + pageCount: 2, + activeIndex: 1, + turnData: {} + } + + + function initialize() { + p.reader = reader; + } + + + function addPage(pageDiv) { + pageDiv.m.dimensions = new Monocle.Dimensions.Columns(pageDiv); + } + + + function visiblePages() { + return [upperPage()]; + } + + + function listenForInteraction(panelClass) { + interactiveMode(true); + interactiveMode(false); + + if (typeof panelClass != "function") { + panelClass = k.DEFAULT_PANELS_CLASS; + if (!panelClass) { + console.warn("Invalid panel class.") + } + } + var q = function (action, panel, x) { + var dir = panel.properties.direction; + if (action == "lift") { + lift(dir, x); + } else if (action == "release") { + release(dir, x); + } + } + p.panels = new panelClass( + API, + { + 'start': function (panel, x) { q('lift', panel, x); }, + 'move': function (panel, x) { turning(panel.properties.direction, x); }, + 'end': function (panel, x) { q('release', panel, x); }, + 'cancel': function (panel, x) { q('release', panel, x); } + } + ); + } + + + function interactiveMode(bState) { + if (!Monocle.Browser.has.selectThruBug) { + return; + } + if (p.interactive = bState) { + if (p.activeIndex != 0) { + var place = getPlace(); + if (place) { + setPage( + p.reader.dom.find('page', 0), + place.getLocus(), + function () { + flipPages(); + prepareNextPage(); + } + ); + } else { + flipPages(); + } + } + } + } + + + function getPlace(pageDiv) { + pageDiv = pageDiv || upperPage(); + return pageDiv.m ? pageDiv.m.place : null; + } + + + function moveTo(locus, callback) { + var fn = function () { + prepareNextPage(announceTurn); + if (typeof callback == "function") { + callback(); + } + } + setPage(upperPage(), locus, fn); + } + + + function setPage(pageDiv, locus, callback) { + p.reader.getBook().setOrLoadPageAt( + pageDiv, + locus, + function (locus) { + pageDiv.m.dimensions.translateToLocus(locus); + if (callback) { callback(); } + } + ); + } + + + function upperPage() { + return p.reader.dom.find('page', p.activeIndex); + } + + + function lowerPage() { + return p.reader.dom.find('page', (p.activeIndex + 1) % 2); + } + + + function flipPages() { + upperPage().style.zIndex = 1; + lowerPage().style.zIndex = 2; + return p.activeIndex = (p.activeIndex + 1) % 2; + } + + + function lift(dir, boxPointX) { + if (p.turnData.lifting || p.turnData.releasing) { return; } + + p.turnData.points = { + start: boxPointX, + min: boxPointX, + max: boxPointX + } + p.turnData.lifting = true; + + if (dir == k.FORWARDS) { + if (getPlace().onLastPageOfBook()) { + resetTurnData(); + return; + } + onGoingForward(boxPointX); + } else if (dir == k.BACKWARDS) { + if (getPlace().onFirstPageOfBook()) { + resetTurnData(); + return; + } + onGoingBackward(boxPointX); + } else { + console.warn("Invalid direction: " + dir); + } + } + + + function turning(dir, boxPointX) { + if (!p.turnData.points) { return; } + if (p.turnData.lifting || p.turnData.releasing) { return; } + checkPoint(boxPointX); + slideToCursor(boxPointX, null, "0"); + } + + + function release(dir, boxPointX) { + if (!p.turnData.points) { + return; + } + if (p.turnData.lifting) { + p.turnData.releaseArgs = [dir, boxPointX]; + return; + } + if (p.turnData.releasing) { + return; + } + + checkPoint(boxPointX); + + p.turnData.releasing = true; + + if (dir == k.FORWARDS) { + if ( + p.turnData.points.tap || + p.turnData.points.start - boxPointX > 60 || + p.turnData.points.min >= boxPointX + ) { + slideOut(afterGoingForward); + } else { + slideIn(afterCancellingForward); + } + } else if (dir == k.BACKWARDS) { + if ( + p.turnData.points.tap || + boxPointX - p.turnData.points.start > 60 || + p.turnData.points.max <= boxPointX + ) { + slideIn(afterGoingBackward); + } else { + slideOut(afterCancellingBackward); + } + } else { + console.warn("Invalid direction: " + dir); + } + } + + + function checkPoint(boxPointX) { + p.turnData.points.min = Math.min(p.turnData.points.min, boxPointX); + p.turnData.points.max = Math.max(p.turnData.points.max, boxPointX); + p.turnData.points.tap = p.turnData.points.max - p.turnData.points.min < 10; + } + + + function onGoingForward(x) { + lifted(x); + } + + + function onGoingBackward(x) { + var lp = lowerPage(); + jumpOut(lp, // move lower page off-screen + function () { + flipPages(); // flip lower to upper + setPage( // set upper page to previous + lp, + getPlace(lowerPage()).getLocus({ direction: k.BACKWARDS }), + function () { lifted(x); } + ); + } + ); + } + + + function afterGoingForward() { + var up = upperPage(); + if (p.interactive) { + setPage( // set upper (off screen) to current + up, + getPlace().getLocus({ direction: k.FORWARDS }), + function () { + jumpIn(up, function () { prepareNextPage(announceTurn); }); + } + ); + } else { + flipPages(); + jumpIn(up, function () { prepareNextPage(announceTurn); }); + } + } + + + function afterGoingBackward() { + if (p.interactive) { + setPage( // set lower page to current + lowerPage(), + getPlace().getLocus(), + function () { + flipPages(); // flip lower to upper + prepareNextPage(announceTurn); + } + ); + } else { + announceTurn(); + } + } + + + function afterCancellingForward() { + resetTurnData(); + } + + + function afterCancellingBackward() { + flipPages(); // flip upper to lower + jumpIn( // move lower back onto screen + lowerPage(), + function () { prepareNextPage(resetTurnData); } + ); + } + + + function prepareNextPage(callback) { + setPage( + lowerPage(), + getPlace().getLocus({ direction: k.FORWARDS }), + callback + ); + } + + + function lifted(x) { + p.turnData.lifting = false; + var releaseArgs = p.turnData.releaseArgs; + if (releaseArgs) { + p.turnData.releaseArgs = null; + release(releaseArgs[0], releaseArgs[1]); + } else if (x) { + slideToCursor(x); + } + } + + + function announceTurn() { + p.reader.dispatchEvent('monocle:turn'); + resetTurnData(); + } + + + function resetTurnData() { + p.turnData = {}; + } + + + function setX(elem, x, options, callback) { + var duration; + + if (!options.duration) { + duration = 0; + } else { + duration = parseInt(options['duration']); + } + + if (typeof(x) == "number") { x = x + "px"; } + + if (typeof WebKitTransitionEvent != "undefined") { + if (duration) { + transition = '-webkit-transform'; + transition += ' ' + duration + "ms"; + transition += ' ' + (options['timing'] || 'linear'); + transition += ' ' + (options['delay'] || 0) + 'ms'; + } else { + transition = 'none'; + } + elem.style.webkitTransition = transition; + if (Monocle.Browser.has.transform3d) { + elem.style.webkitTransform = "translate3d("+x+",0,0)"; + } else { + elem.style.webkitTransform = "translateX("+x+")"; + } + + } else if (duration > 0) { + clearTimeout(elem.setXTransitionInterval) + + var stamp = (new Date()).getTime(); + var frameRate = 40; + var finalX = parseInt(x); + var currX = getX(elem); + var step = (finalX - currX) * (frameRate / duration); + var stepFn = function () { + var destX = currX + step; + if ( + (new Date()).getTime() - stamp > duration || + Math.abs(currX - finalX) <= Math.abs((currX + step) - finalX) + ) { + clearTimeout(elem.setXTransitionInterval) + Monocle.Styles.setX(elem, finalX); + if (elem.setXTCB) { + elem.setXTCB(); + } + } else { + Monocle.Styles.setX(elem, destX); + currX = destX; + } + } + + elem.setXTransitionInterval = setInterval(stepFn, frameRate); + } else { + Monocle.Styles.setX(elem, x); + } + + if (elem.setXTCB) { + Monocle.Events.deafen(elem, 'webkitTransitionEnd', elem.setXTCB); + elem.setXTCB = null; + } + + elem.setXTCB = function () { + if (callback) { callback(); } + } + + var sX = getX(elem); + if (!duration || sX == parseInt(x)) { + elem.setXTCB(); + } else { + Monocle.Events.listen(elem, 'webkitTransitionEnd', elem.setXTCB); + } + } + + + /* + function setX(elem, x, options, callback) { + var duration, transition; + + if (!Monocle.Browser.has.transitions) { + duration = 0; + } else if (!options.duration) { + duration = 0; + } else { + duration = parseInt(options['duration']); + } + + if (typeof(x) == "number") { x = x + "px"; } + + if (duration) { + transition = duration + "ms"; + transition += ' ' + (options['timing'] || 'linear'); + transition += ' ' + (options['delay'] || 0) + 'ms'; + } else { + transition = "none"; + } + + if (elem.setXTCB) { + Monocle.Events.deafen(elem, 'webkitTransitionEnd', elem.setXTCB); + Monocle.Events.deafen(elem, 'transitionend', elem.setXTCB); + elem.setXTCB = null; + } + + elem.setXTCB = function () { + if (callback) { callback(); } + } + + elem.dom.setBetaStyle('transition', transition); + if (Monocle.Browser.has.transform3d) { + elem.dom.setBetaStyle('transform', 'translate3d('+x+',0,0)'); + } else { + elem.dom.setBetaStyle('transform', 'translateX('+x+')'); + } + + if (!duration) { + elem.setXTCB(); + } else { + Monocle.Events.listen(elem, 'webkitTransitionEnd', elem.setXTCB); + Monocle.Events.listen(elem, 'transitionend', elem.setXTCB); + } + } + */ + + + function getX(elem) { + if (typeof WebKitCSSMatrix == "object") { + var matrix = window.getComputedStyle(elem).webkitTransform; + matrix = new WebKitCSSMatrix(matrix); + return matrix.m41; + } else { + var prop = elem.style.MozTransform; + if (!prop || prop == "") { return 0; } + return parseFloat((/translateX\((\-?.*)px\)/).exec(prop)[1]) || 0; + } + } + + + function jumpIn(pageDiv, callback) { + setX(pageDiv, 0, { duration: 1 }, callback); + } + + + function jumpOut(pageDiv, callback) { + setX(pageDiv, 0 - pageDiv.offsetWidth, { duration: 1 }, callback); + } + + + + function slideIn(callback) { + var slideOpts = { + duration: k.durations.SLIDE, + timing: 'ease-in' + }; + setX(upperPage(), 0, slideOpts, callback); + } + + + function slideOut(callback) { + var slideOpts = { + duration: k.durations.SLIDE, + timing: 'ease-in' + }; + setX(upperPage(), 0 - upperPage().offsetWidth, slideOpts, callback); + } + + + function slideToCursor(cursorX, callback, duration) { + setX( + upperPage(), + Math.min(0, cursorX - upperPage().offsetWidth), + { duration: duration || k.durations.FOLLOW_CURSOR }, + callback + ); + } + + + API.pageCount = p.pageCount; + API.addPage = addPage; + API.getPlace = getPlace; + API.moveTo = moveTo; + API.listenForInteraction = listenForInteraction; + + API.visiblePages = visiblePages; + API.interactiveMode = interactiveMode; + + initialize(); + + return API; +} + + +Monocle.Flippers.Slider.DEFAULT_PANELS_CLASS = Monocle.Panels.TwoPane; +Monocle.Flippers.Slider.FORWARDS = 1; +Monocle.Flippers.Slider.BACKWARDS = -1; +Monocle.Flippers.Slider.durations = { + SLIDE: 220, + FOLLOW_CURSOR: 100 +} + +Monocle.pieceLoaded('flippers/slider'); + +Monocle.pieceLoaded('monocle'); diff --git a/resources/default_tweaks.py b/resources/default_tweaks.py index 86921886ad..0f570bab40 100644 --- a/resources/default_tweaks.py +++ b/resources/default_tweaks.py @@ -106,7 +106,8 @@ title_sort_articles=r'^(A|The|An)\s+' auto_connect_to_folder = '' -# Specify renaming rules for sony collections. Collections on Sonys are named +# Specify renaming rules for sony collections. This tweak is only applicable if +# metadata management is set to automatic. Collections on Sonys are named # depending upon whether the field is standard or custom. A collection derived # from a standard field is named for the value in that field. For example, if # the standard 'series' column contains the name 'Darkover', then the series @@ -137,6 +138,24 @@ auto_connect_to_folder = '' sony_collection_renaming_rules={} +# Specify how sony collections are sorted. This tweak is only applicable if +# metadata management is set to automatic. You can indicate which metadata is to +# be used to sort on a collection-by-collection basis. The format of the tweak +# is a list of metadata fields from which collections are made, followed by the +# name of the metadata field containing the sort value. +# Example: The following indicates that collections built from pubdate and tags +# are to be sorted by the value in the custom column '#mydate', that collections +# built from 'series' are to be sorted by 'series_index', and that all other +# collections are to be sorted by title. If a collection metadata field is not +# named, then if it is a series- based collection it is sorted by series order, +# otherwise it is sorted by title order. +# [(['pubdate', 'tags'],'#mydate'), (['series'],'series_index'), (['*'], 'title')] +# Note that the bracketing and parentheses are required. The syntax is +# [ ( [list of fields], sort field ) , ( [ list of fields ] , sort field ) ] +# Default: empty (no rules), so no collection attributes are named. +sony_collection_sorting_rules = [] + + # Create search terms to apply a query across several built-in search terms. # Syntax: {'new term':['existing term 1', 'term 2', ...], 'new':['old'...] ...} # Example: create the term 'myseries' that when used as myseries:foo would @@ -184,3 +203,11 @@ content_server_wont_display = [''] # level sorts, and if you are seeing a slowdown, reduce the value of this tweak. maximum_resort_levels = 5 +# Absolute path to a TTF font file to use as the font for the title and author +# when generating a default cover. Useful if the default font (Liberation +# Serif) does not contain glyphs for the language of the books in your library. +generate_cover_title_font = None + +# Absolute path to a TTF font file to use as the font for the footer in the +# default cover +generate_cover_foot_font = None diff --git a/resources/images/news/perfil.png b/resources/images/news/perfil.png new file mode 100644 index 0000000000..54c8159e48 Binary files /dev/null and b/resources/images/news/perfil.png differ diff --git a/resources/images/news/theecocolapse.png b/resources/images/news/theecocolapse.png new file mode 100644 index 0000000000..1c45ec14bf Binary files /dev/null and b/resources/images/news/theecocolapse.png differ diff --git a/resources/recipes/atlantic.recipe b/resources/recipes/atlantic.recipe index a41a931e37..5ae0f7d993 100644 --- a/resources/recipes/atlantic.recipe +++ b/resources/recipes/atlantic.recipe @@ -71,7 +71,9 @@ class TheAtlantic(BasicNewsRecipe): for poem in soup.findAll('div', attrs={'class':'poem'}): title = self.tag_to_string(poem.find('h4')) desc = self.tag_to_string(poem.find(attrs={'class':'author'})) - url = 'http://www.theatlantic.com'+poem.find('a')['href'] + url = poem.find('a')['href'] + if url.startswith('/'): + url = 'http://www.theatlantic.com' + url self.log('\tFound article:', title, 'at', url) self.log('\t\t', desc) poems.append({'title':title, 'url':url, 'description':desc, @@ -83,7 +85,9 @@ class TheAtlantic(BasicNewsRecipe): if div is not None: self.log('Found section: Advice') title = self.tag_to_string(div.find('h4')) - url = 'http://www.theatlantic.com'+div.find('a')['href'] + url = div.find('a')['href'] + if url.startswith('/'): + url = 'http://www.theatlantic.com' + url desc = self.tag_to_string(div.find('p')) self.log('\tFound article:', title, 'at', url) self.log('\t\t', desc) diff --git a/resources/recipes/cacm.recipe b/resources/recipes/cacm.recipe index 1618bae742..e4af9d2024 100644 --- a/resources/recipes/cacm.recipe +++ b/resources/recipes/cacm.recipe @@ -1,37 +1,37 @@ -import datetime -from calibre.web.feeds.news import BasicNewsRecipe - -class AdvancedUserRecipe1286242553(BasicNewsRecipe): - title = u'CACM' - oldest_article = 7 - max_articles_per_feed = 100 - needs_subscription = True - feeds = [(u'CACM', u'http://cacm.acm.org/magazine.rss')] - language = 'en' - __author__ = 'jonmisurda' - no_stylesheets = True - remove_tags = [ - dict(name='div', attrs={'class':['FeatureBox', 'ArticleComments', 'SideColumn', \ - 'LeftColumn', 'RightColumn', 'SiteSearch', 'MainNavBar','more', 'SubMenu', 'inner']}) - ] - cover_url_pattern = 'http://cacm.acm.org/magazines/%d/%d' - - def get_browser(self): - br = BasicNewsRecipe.get_browser() - if self.username is not None and self.password is not None: - br.open('https://cacm.acm.org/login') - br.select_form(nr=1) - br['current_member[user]'] = self.username - br['current_member[passwd]'] = self.password - br.submit() - return br - - def get_cover_url(self): - now = datetime.datetime.now() - - cover_url = None - soup = self.index_to_soup(self.cover_url_pattern % (now.year, now.month)) - cover_item = soup.find('img',attrs={'alt':'magazine cover image'}) - if cover_item: - cover_url = cover_item['src'] - return cover_url +import datetime +from calibre.web.feeds.news import BasicNewsRecipe + +class AdvancedUserRecipe1286242553(BasicNewsRecipe): + title = u'CACM' + oldest_article = 7 + max_articles_per_feed = 100 + needs_subscription = True + feeds = [(u'CACM', u'http://cacm.acm.org/magazine.rss')] + language = 'en' + __author__ = 'jonmisurda' + no_stylesheets = True + remove_tags = [ + dict(name='div', attrs={'class':['FeatureBox', 'ArticleComments', 'SideColumn', \ + 'LeftColumn', 'RightColumn', 'SiteSearch', 'MainNavBar','more', 'SubMenu', 'inner']}) + ] + cover_url_pattern = 'http://cacm.acm.org/magazines/%d/%d' + + def get_browser(self): + br = BasicNewsRecipe.get_browser() + if self.username is not None and self.password is not None: + br.open('https://cacm.acm.org/login') + br.select_form(nr=1) + br['current_member[user]'] = self.username + br['current_member[passwd]'] = self.password + br.submit() + return br + + def get_cover_url(self): + now = datetime.datetime.now() + + cover_url = None + soup = self.index_to_soup(self.cover_url_pattern % (now.year, now.month)) + cover_item = soup.find('img',attrs={'alt':'magazine cover image'}) + if cover_item: + cover_url = cover_item['src'] + return cover_url diff --git a/resources/recipes/calcalist.recipe b/resources/recipes/calcalist.recipe new file mode 100644 index 0000000000..267924d72c --- /dev/null +++ b/resources/recipes/calcalist.recipe @@ -0,0 +1,43 @@ +from calibre.web.feeds.news import BasicNewsRecipe +from calibre.ebooks.BeautifulSoup import re + +class AdvancedUserRecipe1283848012(BasicNewsRecipe): + description = 'This is a recipe of Calcalist.co.il. The recipe downloads the article page to not hurt the sites advertising income.' + cover_url = 'http://ftp5.bizportal.co.il/web/giflib/news/calcalist.JPG' + title = u'Calcalist' + language = 'he' + __author__ = 'marbs' + extra_css='img {max-width:100%;} body{direction: rtl;},title{direction: rtl; } ,article_description{direction: rtl; }, a.article{direction: rtl; } ,calibre_feed_description{direction: rtl; }' + simultaneous_downloads = 5 + remove_javascript = True + timefmt = '[%a, %d %b, %Y]' + oldest_article = 1 + max_articles_per_feed = 100 + remove_attributes = ['width'] + simultaneous_downloads = 5 + keep_only_tags =dict(name='div', attrs={'id':'articleContainer'}) + remove_tags = [dict(name='p', attrs={'text':[' ']})] + max_articles_per_feed = 100 + preprocess_regexps = [ + (re.compile(r'

 

', re.DOTALL|re.IGNORECASE), lambda match: '') + ] + + + feeds = [(u'\u05d3\u05e3 \u05d4\u05d1\u05d9\u05ea', u'http://www.calcalist.co.il/integration/StoryRss8.xml'), + (u'24/7', u'http://www.calcalist.co.il/integration/StoryRss3674.xml'), + (u'\u05d1\u05d0\u05d6\u05d6', u'http://www.calcalist.co.il/integration/StoryRss3674.xml'), + (u'\u05de\u05d1\u05d6\u05e7\u05d9\u05dd', u'http://www.calcalist.co.il/integration/StoryRss184.xml'), + (u'\u05d4\u05e9\u05d5\u05e7', u'http://www.calcalist.co.il/integration/StoryRss2.xml'), + (u'\u05d1\u05d0\u05e8\u05e5', u'http://www.calcalist.co.il/integration/StoryRss14.xml'), + (u'\u05d4\u05db\u05e1\u05e3', u'http://www.calcalist.co.il/integration/StoryRss9.xml'), + (u'\u05e0\u05d3\u05dc"\u05df', u'http://www.calcalist.co.il/integration/StoryRss7.xml'), + (u'\u05e2\u05d5\u05dc\u05dd', u'http://www.calcalist.co.il/integration/StoryRss13.xml'), + (u'\u05e4\u05e8\u05e1\u05d5\u05dd \u05d5\u05e9\u05d9\u05d5\u05d5\u05e7', u'http://www.calcalist.co.il/integration/StoryRss5.xml'), + (u'\u05e4\u05e0\u05d0\u05d9', u'http://www.calcalist.co.il/integration/StoryRss3.xml'), + (u'\u05d8\u05db\u05e0\u05d5\u05dc\u05d5\u05d2\u05d9', u'http://www.calcalist.co.il/integration/StoryRss4.xml'), + (u'\u05e2\u05e1\u05e7\u05d9 \u05e1\u05e4\u05d5\u05e8\u05d8', u'http://www.calcalist.co.il/integration/StoryRss18.xml')] + + def print_version(self, url): + split1 = url.split("-") + print_url = 'http://www.calcalist.co.il/Ext/Comp/ArticleLayout/CdaArticlePrintPreview/1,2506,L-' + split1[1] + return print_url diff --git a/resources/recipes/clic_rbs.recipe b/resources/recipes/clic_rbs.recipe new file mode 100644 index 0000000000..559dfa2000 --- /dev/null +++ b/resources/recipes/clic_rbs.recipe @@ -0,0 +1,50 @@ +from calibre.web.feeds.news import BasicNewsRecipe + +class ClicRBS(BasicNewsRecipe): + title = u'ClicRBS' + language = 'pt' + __author__ = 'arvoredo' + oldest_article = 3 + max_articles_per_feed = 9 + cover_url = 'http://www.publicidade.clicrbs.com.br/clicrbs/imgs/logo_clic.gif' + + remove_tags = [ + dict(name='div', attrs={'class':['clic-barra-inner', 'botao-versao-mobile ']}) + ] + + remove_tags_before = dict(name='div ', attrs={'class':'descricao'}) + remove_tags_before = dict(name='div', attrs={'id':'glb-corpo'}) + remove_tags_before = dict(name='div', attrs={'class':'descricao'}) + remove_tags_before = dict(name='div', attrs={'class':'coluna'}) + remove_tags_after = dict(name='div', attrs={'class':'extra'}) + remove_tags_after = dict(name='div', attrs={'id':'links-patrocinados'}) + remove_tags_after = dict(name='h4', attrs={'class':'tipo-c comente'}) + remove_tags_after = dict(name='ul', attrs={'class':'lista'}) + + feeds = [ + (u'zerohora.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?uf=1&local=1&channel=13') + , (u'diariocatarinense.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?uf=2&local=18&channel=67') + , (u'Concursos e Emprego', u'http://g1.globo.com/Rss2/0,,AS0-9654,00.xml') + , (u'Pioneiro.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?channel=87&uf=1&local=1') + , (u'Economia, zerohora.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=801&uf=1&local=1&channel=13') + , (u'Esportes, zerohora.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=802&uf=1&local=1&channel=13') + , (u'Economia, Pioneiro.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=1180&channel=87&uf=1&local=1') + , (u'Política, Pioneiro.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=1185&channel=87&uf=1&local=1') + , (u'Mundo, Pioneiro.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=1184&channel=87&uf=1&local=1') + , (u'Catarinense, Esportes, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=&theme=371&uf=2&channel=2') + , (u'Geral, Pioneiro.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=1183&channel=87&uf=1&local=1') + , (u'Estilo de Vida, zerohora.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=805&uf=1&local=1&channel=13') + , (u'Corrida, Corrida, Esportes, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=1313&theme=15704&uf=1&channel=2') + , (u'Jornal de Santa Catarina, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?espid=159&uf=2&local=18') + , (u'Grêmio, Futebol, Esportes, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=11&theme=65&uf=1&channel=2') + , (u'Velocidade, Esportes, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=1314&theme=2655&uf=1&channel=2') + ] + + extra_css = ''' + cite{color:#007BB5; font-size:xx-small; font-style:italic;} + body{font-family:Arial,Helvetica,sans-serif;font-size:x-small;} + h3{font-size:large; color:#082963; font-weight:bold;} + #ident{color:#0179B4; font-size:xx-small;} + p{color:#000000;font-weight:normal;} + .commentario p{color:#007BB5; font-style:italic;} + ''' diff --git a/resources/recipes/cm_journal.recipe b/resources/recipes/cm_journal.recipe new file mode 100644 index 0000000000..c47fb35775 --- /dev/null +++ b/resources/recipes/cm_journal.recipe @@ -0,0 +1,44 @@ +from calibre.web.feeds.news import BasicNewsRecipe + +class CMJornal_pt(BasicNewsRecipe): + title = 'Correio da Manha - Portugal' + __author__ = 'jmst' + description = 'As noticias de Portugal e do Mundo' + publisher = 'Cofina Media' + category = '' + oldest_article = 1 + no_stylesheets = True + encoding = 'utf-8' + use_embedded_content = False + language = 'pt' + extra_css = ' .publish{font-style: italic; line-height: 1.2em; border-bottom: 1px dotted; padding: 5px 0} .entity{line-height: 1.2em} .overview{line-height:1.2em} ' + + conversion_options = { + 'comment' : description + , 'tags' : category + , 'publisher' : publisher + , 'language' : language + } + + keep_only_tags = [ + dict(name=['h2','h1']) + , dict(name='div', attrs={'class': ['news']}) + ] + + remove_tags = [ + dict(name=['object','embed','iframe']) + ,dict(name='a',attrs={'href':['#']}) + ] + + feeds = [ + (u'Actualidade' , u'http://www.cmjornal.xl.pt/rss/rss.aspx?channelID=00000009-0000-0000-0000-000000000009' ) + ,(u'Portugal' , u'http://www.cmjornal.xl.pt/rss/rss.aspx?channelID=00000010-0000-0000-0000-000000000010' ) + ,(u'Economia' , u'http://www.cmjornal.xl.pt/rss/rss.aspx?channelID=00000011-0000-0000-0000-000000000011' ) + ,(u'Mundo' , u'http://www.cmjornal.xl.pt/rss/rss.aspx?channelID=00000091-0000-0000-0000-000000000091' ) + ,(u'Desporto' , u'http://www.cmjornal.xl.pt/rss/rss.aspx?channelID=00000012-0000-0000-0000-000000000012' ) + ,(u'TV & Media', u'http://www.cmjornal.xl.pt/rss/rss.aspx?channelID=00000092-0000-0000-0000-000000000092') + ] + + def print_version(self, url): + return url.replace('noticia.aspx', 'Imprimir.aspx') + diff --git a/resources/recipes/cubadebate.recipe b/resources/recipes/cubadebate.recipe index 88d06d412d..f8887b2672 100644 --- a/resources/recipes/cubadebate.recipe +++ b/resources/recipes/cubadebate.recipe @@ -1,9 +1,7 @@ -#!/usr/bin/env python - __license__ = 'GPL v3' -__copyright__ = '2009, Darko Miletic ' +__copyright__ = '2009-2010, Darko Miletic ' ''' -newyorker.com +cubadebate.cu ''' from calibre.web.feeds.news import BasicNewsRecipe @@ -13,32 +11,44 @@ class CubaDebate(BasicNewsRecipe): __author__ = 'Darko Miletic' description = 'Contra el Terorismo Mediatico' oldest_article = 15 - language = 'es' - + language = 'es' max_articles_per_feed = 100 no_stylesheets = True use_embedded_content = False publisher = 'Cubadebate' category = 'news, politics, Cuba' encoding = 'utf-8' - extra_css = ' #BlogTitle{font-size: x-large; font-weight: bold} ' + masthead_url = 'http://www.cubadebate.cu/wp-content/themes/cubadebate/images/logo.gif' + publication_type = 'newsportal' + extra_css = """ + #BlogTitle{font-size: xx-large; font-weight: bold} + body{font-family: Verdana, Arial, Tahoma, sans-serif} + """ conversion_options = { 'comments' : description ,'tags' : category - ,'language' : 'es' + ,'language' : language ,'publisher' : publisher - ,'pretty_print': True } keep_only_tags = [dict(name='div', attrs={'id':'Outline'})] remove_tags_after = dict(name='div',attrs={'id':'BlogContent'}) - remove_tags = [dict(name='link')] + remove_tags = [ + dict(name=['link','base','embed','object','meta','iframe']) + ,dict(attrs={'id':'addthis_container'}) + ] feeds = [(u'Articulos', u'http://www.cubadebate.cu/feed/')] - + remove_attributes=['width','height','lang'] + def print_version(self, url): return url + 'print/' def preprocess_html(self, soup): - return self.adeify_images(soup) + for item in soup.findAll(style=True): + del item['style'] + for item in soup.findAll('img'): + if not item.has_key('alt'): + item['alt'] = 'image' + return soup diff --git a/resources/recipes/el_faro.recipe b/resources/recipes/el_faro.recipe new file mode 100644 index 0000000000..ec1b74b5cb --- /dev/null +++ b/resources/recipes/el_faro.recipe @@ -0,0 +1,77 @@ +from calibre.web.feeds.news import BasicNewsRecipe + +class ElFaroDeVigo(BasicNewsRecipe): + title = u'El Faro de Vigo' + oldest_article = 1 + max_articles_per_feed = 100 + __author__ = 'Jefferson Frantz' + description = 'Noticias de Vigo' + timefmt = ' [%d %b, %Y]' + language = 'es' + encoding = 'cp1252' + no_stylesheets = True + remove_javascript = True + + feeds = [ +## (u'Vigo', u'http://www.farodevigo.es/elementosInt/rss/1'), +## (u'Gran Vigo', u'http://www.farodevigo.es/elementosInt/rss/2'), + (u'Galicia', u'http://www.farodevigo.es/elementosInt/rss/4'), + (u'España', u'http://www.farodevigo.es/elementosInt/rss/6'), + (u'Mundo', u'http://www.farodevigo.es/elementosInt/rss/7'), +## (u'Opinión', u'http://www.farodevigo.es/elementosInt/rss/5'), + (u'Economía', u'http://www.farodevigo.es/elementosInt/rss/10'), + (u'Sociedad y Cultura', u'http://www.farodevigo.es/elementosInt/rss/8'), + (u'Sucesos', u'http://www.farodevigo.es/elementosInt/rss/9'), + (u'Deportes', u'http://www.farodevigo.es/elementosInt/rss/11'), + (u'Agenda', u'http://www.farodevigo.es/elementosInt/rss/21'), + (u'Gente', u'http://www.farodevigo.es/elementosInt/rss/24'), + (u'Televisión', u'http://www.farodevigo.es/elementosInt/rss/25'), + (u'Ciencia y Tecnología', u'http://www.farodevigo.es/elementosInt/rss/26')] + + extra_css = '''.noticia_texto{ font-family: sans-serif; font-size: medium; text-align: justify } + h1{font-family: serif; font-size: x-large; font-weight: bold; color: #000000; text-align: center} + h2{font-family: serif; font-size: medium; font-weight: bold; color: #000000; text-align: left} + .enlacenegrita10{font-family: serif; font-size: small; font-weight: bold; color: #000000; text-align: left} + .noticia_titular{font-family: serif; font-size: x-large; font-weight: bold; color: #000000; text-align: center}''' + + + def preprocess_html(self, soup): + for item in soup.findAll(style=True): + del item['style'] + + url = 'http://estaticos00.farodevigo.es//elementosWeb/mediaweb/images/compartir/barrapunto.gif' + fitem = soup.find('img',src=url) + if fitem: + par = fitem.parent + par.extract() + url = 'http://estaticos01.farodevigo.es//elementosWeb/mediaweb/images/compartir/barrapunto.gif' + fitem = soup.find('img',src=url) + if fitem: + par = fitem.parent + par.extract() + url = 'http://estaticos02.farodevigo.es//elementosWeb/mediaweb/images/compartir/barrapunto.gif' + fitem = soup.find('img',src=url) + if fitem: + par = fitem.parent + par.extract() + + return self.adeify_images(soup) + + def postprocess_html(self, soup, first_fetch): + divs = soup.findAll(True, {'class':'enlacenegrita10'}) + for div in divs: + div['align'] = 'left' + + return soup + + + keep_only_tags = [dict(name='div', attrs={'class':['noticias']})] + + remove_tags = [ + dict(name=['object','link','script','ul','iframe','ol']) + ,dict(name='div', attrs={'class':['noticiadd2', 'cintillo2', 'noticiadd', 'noticiadd2']}) + ,dict(name='div', attrs={'class':['imagen_derecha', 'noticiadd3', 'extraHTML']}) + + ] + + diff --git a/resources/recipes/el_pais.recipe b/resources/recipes/el_pais.recipe index 1e2164b2af..2e358060b8 100644 --- a/resources/recipes/el_pais.recipe +++ b/resources/recipes/el_pais.recipe @@ -2,7 +2,7 @@ __license__ = 'GPL v3' __author__ = 'Jordi Balcells, based on an earlier version by Lorenzo Vigentini & Kovid Goyal' __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' -description = 'Main daily newspaper from Spain - v1.03 (03, September 2010)' +description = 'Main daily newspaper from Spain - v1.04 (19, October 2010)' __docformat__ = 'restructuredtext en' ''' @@ -32,19 +32,16 @@ class ElPais(BasicNewsRecipe): remove_javascript = True no_stylesheets = True - keep_only_tags = [ dict(name='div', attrs={'class':['cabecera_noticia','cabecera_noticia_reportaje','cabecera_noticia_opinion','contenido_noticia','caja_despiece','presentacion']})] - - extra_css = ''' - p{style:normal size:12 serif} + keep_only_tags = [ dict(name='div', attrs={'class':['cabecera_noticia_reportaje estirar','cabecera_noticia_opinion estirar','cabecera_noticia estirar','contenido_noticia','caja_despiece']})] - ''' + extra_css = ' p{text-align: justify; font-size: 100%} body{ text-align: left; font-family: serif; font-size: 100% } h1{ font-family: sans-serif; font-size:200%; font-weight: bolder; text-align: justify; } h2{ font-family: sans-serif; font-size:150%; font-weight: 500; text-align: justify } h3{ font-family: sans-serif; font-size:125%; font-weight: 500; text-align: justify } img{margin-bottom: 0.4em} ' remove_tags = [ dict(name='div', attrs={'class':['zona_superior','pie_enlaces_inferiores','contorno_f','ampliar']}), - dict(name='div', attrs={'class':['limpiar','mod_apoyo','borde_sup','votos','info_complementa','info_relacionada','buscador_m','nav_ant_sig']}), + dict(name='div', attrs={'class':['limpiar','mod_apoyo','borde_sup','votos estirar','info_complementa','info_relacionada','buscador_m','nav_ant_sig']}), dict(name='div', attrs={'id':['suscribirse suscrito','google_noticia','utilidades','coment','foros_not','pie','lomas','calendar']}), dict(name='p', attrs={'class':'nav_meses'}), - dict(attrs={'class':['enlaces_m','miniaturas_m']}) + dict(attrs={'class':['enlaces_m','miniaturas_m','nav_miniaturas_m']}) ] feeds = [ diff --git a/resources/recipes/elperiodico_catalan.recipe b/resources/recipes/elperiodico_catalan.recipe index e2bcb738b7..6b78f923cb 100644 --- a/resources/recipes/elperiodico_catalan.recipe +++ b/resources/recipes/elperiodico_catalan.recipe @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- __license__ = 'GPL v3' -__copyright__ = '2009, Darko Miletic ' +__copyright__ = '30 October 2010, Jordi Balcells based on an earlier recipe by Darko Miletic ' ''' elperiodico.cat ''' @@ -12,8 +12,8 @@ from calibre.ebooks.BeautifulSoup import Tag class ElPeriodico_cat(BasicNewsRecipe): title = 'El Periodico de Catalunya' - __author__ = 'Darko Miletic' - description = 'Noticias desde Catalunya' + __author__ = 'Jordi Balcells/Darko Miletic' + description = 'Noticies des de Catalunya' publisher = 'elperiodico.cat' category = 'news, politics, Spain, Catalunya' oldest_article = 2 @@ -33,15 +33,25 @@ class ElPeriodico_cat(BasicNewsRecipe): html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' - feeds = [(u"Tota l'edició", u'http://www.elperiodico.cat/rss.asp?id=46')] + feeds = [(u'Portada', u'http://www.elperiodico.cat/ca/rss/rss_portada.xml'), + (u'Internacional', u'http://www.elperiodico.cat/ca/rss/internacional/rss.xml'), + (u'Societat', u'http://www.elperiodico.cat/ca/rss/societat/rss.xml'), + (u'Ci\xe8ncia i tecnologia', u'http://www.elperiodico.cat/ca/rss/ciencia-i-tecnologia/rss.xml'), + (u'Esports', u'http://www.elperiodico.cat/ca/rss/esports/rss.xml'), + (u'Gent', u'http://www.elperiodico.cat/ca/rss/gent/rss.xml'), + (u'Opini\xf3', u'http://www.elperiodico.cat/ca/rss/opinio/rss.xml'), + (u'Pol\xedtica', u'http://www.elperiodico.cat/ca/rss/politica/rss.xml'), + (u'Barcelona', u'http://www.elperiodico.cat/ca/rss/barcelona/rss.xml'), + (u'Economia', u'http://www.elperiodico.cat/ca/rss/economia/rss.xml'), + (u'Cultura i espectacles', u'http://www.elperiodico.cat/ca/rss/cultura-i-espectacles/rss.xml'), + (u'Tele', u'http://www.elperiodico.cat/ca/rss/tele/rss.xml')] - keep_only_tags = [dict(name='div', attrs={'id':'noticia'})] + keep_only_tags = [dict(name='div', attrs={'class':'titularnoticia'}), + dict(name='div', attrs={'class':'noticia_completa'})] - remove_tags = [ - dict(name=['object','link','script']) - ,dict(name='ul',attrs={'class':'herramientasDeNoticia'}) - ,dict(name='div', attrs={'id':'inferiores'}) + remove_tags = [dict(name='div', attrs={'class':['opcionb','opcionb last','columna_noticia']}), + dict(name='span', attrs={'class':'opcionesnoticia'}) ] def print_version(self, url): diff --git a/resources/recipes/elperiodico_spanish.recipe b/resources/recipes/elperiodico_spanish.recipe index 073863fa15..d19adc5e58 100644 --- a/resources/recipes/elperiodico_spanish.recipe +++ b/resources/recipes/elperiodico_spanish.recipe @@ -2,17 +2,17 @@ # -*- coding: utf-8 -*- __license__ = 'GPL v3' -__copyright__ = '2009, Darko Miletic ' +__copyright__ = '30 October 2010, Jordi Balcells based on an earlier recipe by Darko Miletic ' ''' -elperiodico.com +elperiodico.cat ''' from calibre.web.feeds.news import BasicNewsRecipe from calibre.ebooks.BeautifulSoup import Tag -class ElPeriodico_esp(BasicNewsRecipe): +class ElPeriodico_cat(BasicNewsRecipe): title = 'El Periodico de Catalunya' - __author__ = 'Darko Miletic' + __author__ = 'Jordi Balcells/Darko Miletic' description = 'Noticias desde Catalunya' publisher = 'elperiodico.com' category = 'news, politics, Spain, Catalunya' @@ -33,15 +33,25 @@ class ElPeriodico_esp(BasicNewsRecipe): html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' - feeds = [(u"Toda la edición", u'http://www.elperiodico.com/rss.asp?id=46')] + feeds = [(u'Portada', u'http://www.elperiodico.com/es/rss/rss_portada.xml'), + (u'Internacional', u'http://elperiodico.com/es/rss/internacional/rss.xml'), + (u'Sociedad', u'http://elperiodico.com/es/rss/sociedad/rss.xml'), + (u'Ciencia y Tecnolog\xeda', u'http://elperiodico.com/es/rss/ciencia-y-tecnologia/rss.xml'), + (u'Deportes', u'http://elperiodico.com/es/rss/deportes/rss.xml'), + (u'Gente', u'http://elperiodico.com/es/rss/gente/rss.xml'), + (u'Opini\xf3n', u'http://elperiodico.com/es/rss/opinion/rss.xml'), + (u'Pol\xedtica', u'http://elperiodico.com/es/rss/politica/rss.xml'), + (u'Barcelona', u'http://elperiodico.com/es/rss/barcelona/rss.xml'), + (u'Econom\xeda', u'http://elperiodico.com/es/rss/economia/rss.xml'), + (u'Cultura y espect\xe1culos', u'http://elperiodico.com/es/rss/cultura-y-espectaculos/rss.xml'), + (u'Tele', u'http://elperiodico.com/es/rss/cultura-y-espectaculos/rss.xml')] - keep_only_tags = [dict(name='div', attrs={'id':'noticia'})] + keep_only_tags = [dict(name='div', attrs={'class':'titularnoticia'}), + dict(name='div', attrs={'class':'noticia_completa'})] - remove_tags = [ - dict(name=['object','link','script']) - ,dict(name='ul',attrs={'class':'herramientasDeNoticia'}) - ,dict(name='div', attrs={'id':'inferiores'}) + remove_tags = [dict(name='div', attrs={'class':['opcionb','opcionb last','columna_noticia']}), + dict(name='span', attrs={'class':'opcionesnoticia'}) ] def print_version(self, url): diff --git a/resources/recipes/foxnews.recipe b/resources/recipes/foxnews.recipe index e7e76390b5..916bd28ad2 100644 --- a/resources/recipes/foxnews.recipe +++ b/resources/recipes/foxnews.recipe @@ -4,7 +4,6 @@ __copyright__ = '2010, Darko Miletic ' foxnews.com ''' -import re from calibre.web.feeds.news import BasicNewsRecipe class FoxNews(BasicNewsRecipe): @@ -21,11 +20,10 @@ class FoxNews(BasicNewsRecipe): language = 'en' publication_type = 'newsportal' remove_empty_feeds = True - extra_css = ' body{font-family: Arial,sans-serif } img{margin-bottom: 0.4em} .caption{font-size: x-small} ' - - preprocess_regexps = [ - (re.compile(r'.*?', re.DOTALL|re.IGNORECASE),lambda match: '') - ] + extra_css = """ + body{font-family: Arial,sans-serif } + .caption{font-size: x-small} + """ conversion_options = { 'comment' : description @@ -34,27 +32,15 @@ class FoxNews(BasicNewsRecipe): , 'language' : language } - remove_attributes = ['xmlns'] - - keep_only_tags = [ - dict(name='div', attrs={'id' :['story','browse-story-content']}) - ,dict(name='div', attrs={'class':['posts articles','slideshow']}) - ,dict(name='h4' , attrs={'class':'storyDate'}) - ,dict(name='h1' , attrs={'xmlns:functx':'http://www.functx.com'}) - ,dict(name='div', attrs={'class':'authInfo'}) - ,dict(name='div', attrs={'id':'articleCont'}) - ] + remove_attributes = ['xmlns','lang'] remove_tags = [ - dict(name='div', attrs={'class':['share-links','quigo quigo2','share-text','storyControls','socShare','btm-links']}) - ,dict(name='div', attrs={'id' :['otherMedia','loomia_display','img-all-path','story-vcmId','story-url','pane-browse-story-comments','story_related']}) - ,dict(name='ul' , attrs={'class':['tools','tools alt','tools alt2','tabs']}) - ,dict(name='a' , attrs={'class':'join-discussion'}) - ,dict(name='ul' , attrs={'class':['tools','tools alt','tools alt2']}) - ,dict(name='p' , attrs={'class':'see_fullarchive'}) - ,dict(name=['object','embed','link','script']) + dict(name=['object','embed','link','script','iframe','meta','base']) + ,dict(attrs={'class':['user-control','url-description','ad-context']}) ] + remove_tags_before=dict(name='h1') + remove_tags_after =dict(attrs={'class':'url-description'}) feeds = [ (u'Latest Headlines', u'http://feeds.foxnews.com/foxnews/latest' ) @@ -67,8 +53,5 @@ class FoxNews(BasicNewsRecipe): ,(u'Entertainment' , u'http://feeds.foxnews.com/foxnews/entertainment' ) ] - def preprocess_html(self, soup): - for item in soup.findAll(style=True): - del item['style'] - return self.adeify_images(soup) - + def print_version(self, url): + return url + 'print' diff --git a/resources/recipes/gamespot.recipe b/resources/recipes/gamespot.recipe new file mode 100644 index 0000000000..d9a5f20c23 --- /dev/null +++ b/resources/recipes/gamespot.recipe @@ -0,0 +1,41 @@ +__license__ = 'GPL v3' +__author__ = u'Marc T\xf6nsing' + +from calibre.web.feeds.news import BasicNewsRecipe + +class GamespotCom(BasicNewsRecipe): + + title = u'Gamespot.com Reviews' + description = 'review articles from gamespot.com' + language = 'en' + __author__ = u'Marc T\xf6nsing' + + oldest_article = 7 + max_articles_per_feed = 40 + remove_empty_feeds = True + no_stylesheets = True + no_javascript = True + + feeds = [ + ('PC Reviews', 'http://www.gamespot.com/rss/game_updates.php?type=5&platform=5'), + ('XBOX 360 Reviews', 'http://www.gamespot.com/rss/game_updates.php?type=5&platform=1029'), + ('Wii Reviews', 'http://www.gamespot.com/rss/game_updates.php?type=5&platform=1031'), + ('PlayStation 3 Reviews', 'http://www.gamespot.com/rss/game_updates.php?type=5&platform=1028'), + ('PlayStation 2 Reviews', 'http://www.gamespot.com/rss/game_updates.php?type=5&platform=7'), + ('PlayStation Portable Reviews', 'http://www.gamespot.com/rss/game_updates.php?type=5&platform=1024'), + ('Nintendo DS Reviews', 'http://www.gamespot.com/rss/game_updates.php?type=5&platform=1026'), + ('iPhone Reviews', 'http://www.gamespot.com/rss/game_updates.php?type=5&platform=1049'), + ] + + remove_tags = [ + dict(name='div', attrs={'class':'top_bar'}), + dict(name='div', attrs={'class':'video_embed'}) + ] + + def get_cover_url(self): + return 'http://image.gamespotcdn.net/gamespot/shared/gs5/gslogo_bw.gif' + + def get_article_url(self, article): + return article.get('link') + '?print=1' + + diff --git a/resources/recipes/lenta_ru.recipe b/resources/recipes/lenta_ru.recipe new file mode 100644 index 0000000000..d400bc5886 --- /dev/null +++ b/resources/recipes/lenta_ru.recipe @@ -0,0 +1,177 @@ +#!/usr/bin/env python + +''' +Lenta.ru +''' + +from calibre.web.feeds.feedparser import parse +from calibre.ebooks.BeautifulSoup import Tag +from calibre.web.feeds.news import BasicNewsRecipe +import re + +class LentaRURecipe(BasicNewsRecipe): + title = u'Lenta.ru: \u041d\u043e\u0432\u043e\u0441\u0442\u0438' + __author__ = 'Nikolai Kotchetkov' + publisher = 'lenta.ru' + category = 'news, Russia' + description = u'''\u0415\u0436\u0435\u0434\u043d\u0435\u0432\u043d\u0430\u044f + \u0438\u043d\u0442\u0435\u0440\u043d\u0435\u0442-\u0433\u0430\u0437\u0435\u0442\u0430. + \u041d\u043e\u0432\u043e\u0441\u0442\u0438 \u0441\u043e + \u0432\u0441\u0435\u0433\u043e \u043c\u0438\u0440\u0430 \u043d\u0430 + \u0440\u0443\u0441\u0441\u043a\u043e\u043c + \u044f\u0437\u044b\u043a\u0435''' + description = u'Ежедневная интернет-газета. Новости со всего мира на русском языке' + oldest_article = 3 + max_articles_per_feed = 100 + + masthead_url = u'http://img.lenta.ru/i/logowrambler.gif' + cover_url = u'http://img.lenta.ru/i/logowrambler.gif' + + #Add feed names if you want them to be sorted (feeds of this list appear first) + sortOrder = [u'_default', u'В России', u'б.СССР', u'В мире'] + + encoding = 'cp1251' + language = 'ru' + no_stylesheets = True + remove_javascript = True + recursions = 0 + + conversion_options = { + 'comment' : description + , 'tags' : category + , 'publisher' : publisher + , 'language' : language + } + + + keep_only_tags = [dict(name='td', attrs={'class':['statya','content']})] + + remove_tags_after = [dict(name='p', attrs={'class':'links'}), dict(name='div', attrs={'id':'readers-block'})] + + remove_tags = [dict(name='table', attrs={'class':['vrezka','content']}), dict(name='div', attrs={'class':'b240'}), dict(name='div', attrs={'id':'readers-block'}), dict(name='p', attrs={'class':'links'})] + + feeds = [u'http://lenta.ru/rss/'] + + extra_css = 'h1 {font-size: 1.2em; margin: 0em 0em 0em 0em;} h2 {font-size: 1.0em; margin: 0em 0em 0em 0em;} h3 {font-size: 0.8em; margin: 0em 0em 0em 0em;}' + + def parse_index(self): + try: + feedData = parse(self.feeds[0]) + if not feedData: + raise NotImplementedError + self.log("parse_index: Feed loaded successfully.") + if feedData.feed.has_key('title'): + self.title = feedData.feed.title + self.log("parse_index: Title updated to: ", self.title) + if feedData.feed.has_key('image'): + self.log("HAS IMAGE!!!!") + + def get_virtual_feed_articles(feed): + if feeds.has_key(feed): + return feeds[feed][1] + self.log("Adding new feed: ", feed) + articles = [] + feeds[feed] = (feed, articles) + return articles + + feeds = {} + + #Iterate feed items and distribute articles using tags + for item in feedData.entries: + link = item.get('link', ''); + title = item.get('title', ''); + if '' == link or '' == title: + continue + article = {'title':title, 'url':link, 'description':item.get('description', ''), 'date':item.get('date', ''), 'content':''}; + if not item.has_key('tags'): + get_virtual_feed_articles('_default').append(article) + continue + for tag in item.tags: + addedToDefault = False + term = tag.get('term', '') + if '' == term: + if (not addedToDefault): + get_virtual_feed_articles('_default').append(article) + continue + get_virtual_feed_articles(term).append(article) + + #Get feed list + #Select sorted feeds first of all + result = [] + for feedName in self.sortOrder: + if (not feeds.has_key(feedName)): continue + result.append(feeds[feedName]) + del feeds[feedName] + result = result + feeds.values() + + return result + + except Exception, err: + self.log(err) + raise NotImplementedError + + def preprocess_html(self, soup): + return self.adeify_images(soup) + + def postprocess_html(self, soup, first_fetch): + #self.log('Original: ', soup.prettify()) + + contents = Tag(soup, 'div') + + #Extract tags with given attributes + extractElements = {'div' : [{'id' : 'readers-block'}]} + + #Remove all elements that were not extracted before + for tag, attrs in extractElements.iteritems(): + for attr in attrs: + garbage = soup.findAll(tag, attr) + if garbage: + for pieceOfGarbage in garbage: + pieceOfGarbage.extract() + + #Find article text using header + #and add all elements to contents + element = soup.find({'h1' : True, 'h2' : True}) + if (element): + element.name = 'h1' + while element: + nextElement = element.nextSibling + element.extract() + contents.insert(len(contents.contents), element) + element = nextElement + + #Place article date after header + dates = soup.findAll(text=re.compile('\d{2}\.\d{2}\.\d{4}, \d{2}:\d{2}:\d{2}')) + if dates: + for date in dates: + for string in date: + parent = date.parent + if (parent and isinstance(parent, Tag) and 'div' == parent.name and 'dt' == parent['class']): + #Date div found + parent.extract() + parent['style'] = 'font-size: 0.5em; color: gray; font-family: monospace;' + contents.insert(1, parent) + break + + #Place article picture after date + pic = soup.find('img') + if pic: + picDiv = Tag(soup, 'div') + picDiv['style'] = 'width: 100%; text-align: center;' + pic.extract() + picDiv.insert(0, pic) + title = pic.get('title', None) + if title: + titleDiv = Tag(soup, 'div') + titleDiv['style'] = 'font-size: 0.5em;' + titleDiv.insert(0, title) + picDiv.insert(1, titleDiv) + contents.insert(2, picDiv) + + body = soup.find('td', {'class':['statya','content']}) + if body: + body.replaceWith(contents) + + #self.log('Result: ', soup.prettify()) + return soup + diff --git a/resources/recipes/mediapart.recipe b/resources/recipes/mediapart.recipe index ca5f787747..0cf8f21032 100644 --- a/resources/recipes/mediapart.recipe +++ b/resources/recipes/mediapart.recipe @@ -1,53 +1,79 @@ -#!/usr/bin/env python - __license__ = 'GPL v3' -__copyright__ = '2009, Mathieu Godlewski ' +__copyright__ = '2009, Mathieu Godlewski ; 2010, Louis Gesbert ' ''' Mediapart ''' -import re, string -from calibre.ebooks.BeautifulSoup import BeautifulSoup +from calibre.ebooks.BeautifulSoup import Tag from calibre.web.feeds.news import BasicNewsRecipe class Mediapart(BasicNewsRecipe): title = 'Mediapart' - __author__ = 'Mathieu Godlewski ' + __author__ = 'Mathieu Godlewski' description = 'Global news in french from online newspapers' oldest_article = 7 language = 'fr' + needs_subscription = True max_articles_per_feed = 50 no_stylesheets = True - html2lrf_options = ['--base-font-size', '10'] + cover_url = 'http://www.mediapart.fr/sites/all/themes/mediapart/mediapart/images/annonce.jpg' feeds = [ ('Les articles', 'http://www.mediapart.fr/articles/feed'), ] - preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE|re.DOTALL), i[1]) for i in - [ - (r'', lambda match : '

'+match.group(1)+'

'), - (r'

Mediapart\.fr

', lambda match : ''), - (r']*>[\s]*

', lambda match : ''), - (r'

[^>]*

', lambda match : ''), +# -- print-version has poor quality on this website, better do the conversion ourselves +# +# preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE|re.DOTALL), i[1]) for i in +# [ +# (r'', lambda match : '

'+match.group(1)+'

'), +# (r'[^>]+]*>([^<]*)[^<]*', +# lambda match : ''+match.group(1)+''), +# (r'\'', lambda match: '’'), +# ] +# ] +# +# remove_tags = [ dict(name='div', attrs={'class':'print-source_url'}), +# dict(name='div', attrs={'class':'print-links'}), +# dict(name='img', attrs={'src':'entete_article.png'}), +# dict(name='br') ] +# +# def print_version(self, url): +# raw = self.browser.open(url).read() +# soup = BeautifulSoup(raw.decode('utf8', 'replace')) +# div = soup.find('div', {'id':re.compile('node-\d+')}) +# if div is None: +# return None +# article_id = string.replace(div['id'], 'node-', '') +# if article_id is None: +# return None +# return 'http://www.mediapart.fr/print/'+article_id + +# -- Non-print version [dict(name='div', attrs={'class':'advert'})] + + keep_only_tags = [ + dict(name='h1', attrs={'class':'title'}), + dict(name='div', attrs={'class':'page_papier_detail'}), ] - ] - remove_tags = [ dict(name='div', attrs={'class':'print-source_url'}), - dict(name='div', attrs={'class':'print-links'}), - dict(name='img', attrs={'src':'entete_article.png'}), - ] + def preprocess_html(self,soup): + for title in soup.findAll('div', {'class':'titre'}): + tag = Tag(soup, 'h3') + title.replaceWith(tag) + tag.insert(0,title) + return soup +# -- Handle login + + def get_browser(self): + br = BasicNewsRecipe.get_browser() + if self.username is not None and self.password is not None: + br.open('http://www.mediapart.fr/') + br.select_form(nr=1) + br['name'] = self.username + br['pass'] = self.password + br.submit() + return br - def print_version(self, url): - raw = self.browser.open(url).read() - soup = BeautifulSoup(raw.decode('utf8', 'replace')) - div = soup.find('div', {'class':'node node-type-article'}) - if div is None: - return None - article_id = string.replace(div['id'], 'node-', '') - if article_id is None: - return None - return 'http://www.mediapart.fr/print/'+article_id diff --git a/resources/recipes/ming_pao.recipe b/resources/recipes/ming_pao.recipe new file mode 100644 index 0000000000..162a3c774e --- /dev/null +++ b/resources/recipes/ming_pao.recipe @@ -0,0 +1,61 @@ +__license__ = 'GPL v3' +__copyright__ = '2010, Eddie Lau' +''' +modified from Singtao Toronto calibre recipe by rty +Change Log: +2010/10/31: skip repeated articles in section pages +''' + +import datetime +from calibre.web.feeds.recipes import BasicNewsRecipe + +class AdvancedUserRecipe1278063072(BasicNewsRecipe): + title = 'Ming Pao - Hong Kong' + oldest_article = 1 + max_articles_per_feed = 100 + __author__ = 'Eddie Lau' + description = 'Hong Kong Chinese Newspaper' + publisher = 'news.mingpao.com' + category = 'Chinese, News, Hong Kong' + remove_javascript = True + use_embedded_content = False + no_stylesheets = True + language = 'zh' + encoding = 'Big5-HKSCS' + recursions = 0 + conversion_options = {'linearize_tables':True} + masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif' + keep_only_tags = [dict(name='h1'), + dict(attrs={'id':['newscontent01','newscontent02']})] + + def get_fetchdate(self): + dt_utc = datetime.datetime.utcnow() + # convert UTC to local hk time - at around HKT 5.30am, all news are available + dt_local = dt_utc - datetime.timedelta(-2.5/24) + return dt_local.strftime("%Y%m%d") + + def parse_index(self): + feeds = [] + dateStr = self.get_fetchdate() + for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'), (u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm'), (u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'), (u'\u793e\u8a55\u2027\u7b46\u9663 Editorial', 'http://news.mingpao.com/' + dateStr + '/mrindex.htm'), (u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'), (u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm'), (u'\u570b\u969b World', 'http://news.mingpao.com/' + dateStr + '/taindex.htm'), ('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'), (u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm'), (u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),]: + articles = self.parse_section(url) + if articles: + feeds.append((title, articles)) + return feeds + + def parse_section(self, url): + dateStr = self.get_fetchdate() + soup = self.index_to_soup(url) + divs = soup.findAll(attrs={'class': ['bullet']}) + current_articles = [] + included_urls = [] + for i in divs: + a = i.find('a', href = True) + title = self.tag_to_string(a) + url = a.get('href', False) + url = 'http://news.mingpao.com/' + dateStr + '/' +url + if url not in included_urls: + current_articles.append({'title': title, 'url': url, 'description':''}) + included_urls.append(url) + return current_articles + diff --git a/resources/recipes/new_scientist.recipe b/resources/recipes/new_scientist.recipe index 2e864565ff..02bbbe4d42 100644 --- a/resources/recipes/new_scientist.recipe +++ b/resources/recipes/new_scientist.recipe @@ -8,11 +8,11 @@ import re from calibre.web.feeds.news import BasicNewsRecipe class NewScientist(BasicNewsRecipe): - title = 'New Scientist - Online News' + title = 'New Scientist - Online News w. subscription' __author__ = 'Darko Miletic' description = 'Science news and science articles from New Scientist.' language = 'en' - publisher = 'New Scientist' + publisher = 'Reed Business Information Ltd.' category = 'science news, science articles, science jobs, drugs, cancer, depression, computer software' oldest_article = 7 max_articles_per_feed = 100 @@ -21,7 +21,12 @@ class NewScientist(BasicNewsRecipe): cover_url = 'http://www.newscientist.com/currentcover.jpg' masthead_url = 'http://www.newscientist.com/img/misc/ns_logo.jpg' encoding = 'utf-8' - extra_css = ' body{font-family: Arial,sans-serif} img{margin-bottom: 0.8em} ' + needs_subscription = 'optional' + extra_css = """ + body{font-family: Arial,sans-serif} + img{margin-bottom: 0.8em} + .quotebx{font-size: x-large; font-weight: bold; margin-right: 2em; margin-left: 2em} + """ conversion_options = { 'comment' : description @@ -33,15 +38,27 @@ class NewScientist(BasicNewsRecipe): keep_only_tags = [dict(name='div', attrs={'id':['pgtop','maincol','blgmaincol','nsblgposts','hldgalcols']})] + def get_browser(self): + br = BasicNewsRecipe.get_browser() + br.open('http://www.newscientist.com/') + if self.username is not None and self.password is not None: + br.open('https://www.newscientist.com/user/login?redirectURL=') + br.select_form(nr=2) + br['loginId' ] = self.username + br['password'] = self.password + br.submit() + return br + remove_tags = [ dict(name='div' , attrs={'class':['hldBd','adline','pnl','infotext' ]}) ,dict(name='div' , attrs={'id' :['compnl','artIssueInfo','artTools','comments','blgsocial','sharebtns']}) ,dict(name='p' , attrs={'class':['marker','infotext' ]}) ,dict(name='meta' , attrs={'name' :'description' }) - ,dict(name='a' , attrs={'rel' :'tag' }) + ,dict(name='a' , attrs={'rel' :'tag' }) + ,dict(name=['link','base','meta','iframe','object','embed']) ] remove_tags_after = dict(attrs={'class':['nbpcopy','comments']}) - remove_attributes = ['height','width'] + remove_attributes = ['height','width','lang'] feeds = [ (u'Latest Headlines' , u'http://feeds.newscientist.com/science-news' ) @@ -62,6 +79,8 @@ class NewScientist(BasicNewsRecipe): return url + '?full=true&print=true' def preprocess_html(self, soup): + for item in soup.findAll(['quote','quotetext']): + item.name='p' for tg in soup.findAll('a'): if tg.string == 'Home': tg.parent.extract() diff --git a/resources/recipes/nytimes.recipe b/resources/recipes/nytimes.recipe index a2d5135045..c656450990 100644 --- a/resources/recipes/nytimes.recipe +++ b/resources/recipes/nytimes.recipe @@ -5,62 +5,59 @@ __copyright__ = '2008, Kovid Goyal ' ''' nytimes.com ''' -import re -import time -from calibre import entity_to_unicode +import re, string, time +from calibre import entity_to_unicode, strftime from calibre.web.feeds.recipes import BasicNewsRecipe -from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, NavigableString, \ -Comment, BeautifulStoneSoup +from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, BeautifulStoneSoup class NYTimes(BasicNewsRecipe): - title = 'New York Times Top Stories' - __author__ = 'GRiker' - language = 'en' - requires_version = (0, 7, 5) - description = 'Top Stories from the New York Times' + # set headlinesOnly to True for the headlines-only version + headlinesOnly = True - # List of sections typically included in Top Stories. Use a keyword from the - # right column in the excludeSectionKeywords[] list to skip downloading that section - sections = { - 'arts' : 'Arts', - 'business' : 'Business', - 'diningwine' : 'Dining & Wine', - 'editorials' : 'Editorials', - 'health' : 'Health', - 'magazine' : 'Magazine', - 'mediaadvertising' : 'Media & Advertising', - 'newyorkregion' : 'New York/Region', - 'oped' : 'Op-Ed', - 'politics' : 'Politics', - 'science' : 'Science', - 'sports' : 'Sports', - 'technology' : 'Technology', - 'topstories' : 'Top Stories', - 'travel' : 'Travel', - 'us' : 'U.S.', - 'world' : 'World' - } + # includeSections: List of sections to include. If empty, all sections found will be included. + # Otherwise, only the sections named will be included. For example, + # + # includeSections = ['Politics','Sports'] + # + # would cause only the Politics and Sports sections to be included. - # Add section keywords from the right column above to skip that section - # For example, to skip sections containing the word 'Sports' or 'Dining', use: - # excludeSectionKeywords = ['Sports', 'Dining'] - # Fetch only Business and Technology - # excludeSectionKeywords = ['Arts','Dining','Editorials','Health','Magazine','Media','Region','Op-Ed','Politics','Science','Sports','Top Stories','Travel','U.S.','World'] - # Fetch only Top Stories - # excludeSectionKeywords = ['Arts','Business','Dining','Editorials','Health','Magazine','Media','Region','Op-Ed','Politics','Science','Sports','Technology','Travel','U.S.','World'] - # By default, no sections are skipped. - excludeSectionKeywords = [] + includeSections = [] # by default, all sections included + + # excludeSections: List of sections to exclude. If empty, all sections found will be included. + # Otherwise, the sections named will be excluded. For example, + # + # excludeSections = ['Politics','Sports'] + # + # would cause the Politics and Sports sections to be excluded. This parameter can be used + # in conjuction with includeSections although in most cases using one or the other, but + # not both, is sufficient. + + excludeSections = [] # one_picture_per_article specifies that calibre should only use the first image # from an article (if one exists). If one_picture_per_article = True, the image # will be moved to a location between the headline and the byline. # If one_picture_per_article = False, all images from the article will be included + # and shown in their original location. one_picture_per_article = True # The maximum number of articles that will be downloaded - max_articles_per_feed = 40 + max_articles_per_feed = 100 + + + if headlinesOnly: + title='New York Times Headlines' + description = 'Headlines from the New York Times' + else: + title='New York Times' + description = 'Today\'s New York Times' + + __author__ = 'GRiker/Kovid Goyal/Nick Redding' + language = 'en' + requires_version = (0, 7, 5) + timefmt = '' needs_subscription = True @@ -82,6 +79,7 @@ class NYTimes(BasicNewsRecipe): 'entry-response module', 'icon enlargeThis', 'leftNavTabs', + 'metaFootnote', 'module box nav', 'nextArticleLink', 'nextArticleLink clearfix', @@ -89,12 +87,13 @@ class NYTimes(BasicNewsRecipe): 'relatedSearchesModule', 'side_tool', 'singleAd', - 'subNavigation clearfix', - 'subNavigation tabContent active', - 'subNavigation tabContent active clearfix', + re.compile('^subNavigation'), + re.compile('^leaderboard'), + re.compile('^module'), ]}), dict(id=[ 'adxLeaderboard', + 'adxSponLink', 'archive', 'articleExtras', 'articleInline', @@ -105,87 +104,98 @@ class NYTimes(BasicNewsRecipe): 'footer', 'header', 'header_search', + 'inlineBox', 'login', 'masthead', 'masthead-nav', 'memberTools', 'navigation', 'portfolioInline', + 'readerReviews', + 'readerReviewsCount', 'relatedArticles', + 'relatedTopics', 'respond', 'side_search', 'side_index', 'side_tool', 'toolsRight', ]), - dict(name=['script', 'noscript', 'style'])] - + dict(name=['script', 'noscript', 'style','form','hr'])] no_stylesheets = True - extra_css = '.headline {text-align: left;}\n \ - .byline {font-family: monospace; \ - text-align: left; \ - margin-top: 0px; \ - margin-bottom: 0px;}\n \ - .dateline {font-size: small; \ - margin-top: 0px; \ - margin-bottom: 0px;}\n \ - .timestamp {font-size: small; \ - margin-top: 0px; \ - margin-bottom: 0px;}\n \ - .source {text-align: left;}\n \ - .image {text-align: center;}\n \ - .credit {text-align: right; \ - font-size: small; \ - margin-top: 0px; \ - margin-bottom: 0px;}\n \ - .articleBody {text-align: left;}\n \ - .authorId {text-align: left; \ - font-style: italic;}\n ' + extra_css = ''' + .articleHeadline { text-align: left; margin-top:0.5em; margin-bottom:0.25em; } + .credit { text-align: right; font-size: small; line-height:1em; margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; } + .byline { text-align: left; font-size: small; line-height:1em; margin-top:10px; margin-left:0; margin-right:0; margin-bottom: 0; } + .dateline { text-align: left; font-size: small; line-height:1em;margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; } + .kicker { font-size: small; line-height:1em;margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; } + .timestamp { text-align: left; font-size: small; } + .caption { font-size: small; font-style:italic; line-height:1em; margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; } + a:link {text-decoration: none; } + .articleBody { } + .authorId {text-align: left; } + .image {text-align: center;} + .source {text-align: left; }''' - def dump_ans(self, ans) : + def filter_ans(self, ans) : total_article_count = 0 - for section in ans : + idx = 0 + idx_max = len(ans)-1 + while idx <= idx_max: + if self.includeSections != []: + if ans[idx][0] not in self.includeSections: + print "SECTION NOT INCLUDED: ",ans[idx][0] + del ans[idx] + idx_max = idx_max-1 + continue + if ans[idx][0] in self.excludeSections: + print "SECTION EXCLUDED: ",ans[idx][0] + del ans[idx] + idx_max = idx_max-1 + continue if self.verbose: - self.log("section %s: %d articles" % (section[0], len(section[1])) ) - for article in section[1]: + self.log("Section %s: %d articles" % (ans[idx][0], len(ans[idx][1])) ) + for article in ans[idx][1]: total_article_count += 1 if self.verbose: self.log("\t%-40.40s... \t%-60.60s..." % (article['title'].encode('cp1252','replace'), article['url'].encode('cp1252','replace'))) + idx = idx+1 + self.log( "Queued %d articles" % total_article_count ) + return ans def fixChars(self,string): # Replace lsquo (\x91) - fixed = re.sub("\x91","‘",string) + fixed = re.sub("\x91","‘",string) # Replace rsquo (\x92) - fixed = re.sub("\x92","’",fixed) + fixed = re.sub("\x92","’",fixed) # Replace ldquo (\x93) - fixed = re.sub("\x93","“",fixed) + fixed = re.sub("\x93","“",fixed) # Replace rdquo (\x94) - fixed = re.sub("\x94","”",fixed) + fixed = re.sub("\x94","”",fixed) # Replace ndash (\x96) - fixed = re.sub("\x96","–",fixed) + fixed = re.sub("\x96","–",fixed) # Replace mdash (\x97) - fixed = re.sub("\x97","—",fixed) + fixed = re.sub("\x97","—",fixed) return fixed def get_browser(self): br = BasicNewsRecipe.get_browser() if self.username is not None and self.password is not None: - try: - br.open('http://www.nytimes.com/auth/login') - br.select_form(name='login') - br['USERID'] = self.username - br['PASSWORD'] = self.password - br.submit() - except: - self.log("\nFailed to login") + br.open('http://www.nytimes.com/auth/login') + br.select_form(name='login') + br['USERID'] = self.username + br['PASSWORD'] = self.password + raw = br.submit().read() + if 'Please try again' in raw: + raise Exception('Your username and password are incorrect') return br def skip_ad_pages(self, soup): @@ -213,6 +223,9 @@ class NYTimes(BasicNewsRecipe): cover = None return cover + def short_title(self): + return self.title + def index_to_soup(self, url_or_raw, raw=False): ''' OVERRIDE of class method @@ -255,157 +268,184 @@ class NYTimes(BasicNewsRecipe): # Kindle TOC descriptions won't render certain characters if description: massaged = unicode(BeautifulStoneSoup(description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES)) - # Replace '&' with '&' - massaged = re.sub("&","&", massaged) + # Replace '&' with '&' + massaged = re.sub("&","&", massaged) return self.fixChars(massaged) else: return description - def parse_index(self): + def parse_todays_index(self): + + def feed_title(div): + return ''.join(div.findAll(text=True, recursive=True)).strip() + + articles = {} + key = None + ans = [] + url_list = [] + + def handle_article(div): + a = div.find('a', href=True) + if not a: + return + url = re.sub(r'\?.*', '', a['href']) + if not url.startswith("http"): + return + if not url.endswith(".html"): + return + if 'podcast' in url: + return + if '/video/' in url: + return + url += '?pagewanted=all' + if url in url_list: + return + url_list.append(url) + title = self.tag_to_string(a, use_alt=True).strip() + description = '' + pubdate = strftime('%a, %d %b') + summary = div.find(True, attrs={'class':'summary'}) + if summary: + description = self.tag_to_string(summary, use_alt=False) + author = '' + authorAttribution = div.find(True, attrs={'class':'byline'}) + if authorAttribution: + author = self.tag_to_string(authorAttribution, use_alt=False) + else: + authorAttribution = div.find(True, attrs={'class':'byline'}) + if authorAttribution: + author = self.tag_to_string(authorAttribution, use_alt=False) + feed = key if key is not None else 'Uncategorized' + if not articles.has_key(feed): + ans.append(feed) + articles[feed] = [] + articles[feed].append( + dict(title=title, url=url, date=pubdate, + description=description, author=author, + content='')) + + + soup = self.index_to_soup('http://www.nytimes.com/pages/todayspaper/index.html') + + + # Find each article + for div in soup.findAll(True, + attrs={'class':['section-headline', 'story', 'story headline','sectionHeader','headlinesOnly multiline flush']}): + + if div['class'] in ['section-headline','sectionHeader']: + key = string.capwords(feed_title(div)) + key = key.replace('Op-ed','Op-Ed') + key = key.replace('U.s.','U.S.') + elif div['class'] in ['story', 'story headline'] : + handle_article(div) + elif div['class'] == 'headlinesOnly multiline flush': + for lidiv in div.findAll('li'): + handle_article(lidiv) + + ans = [(key, articles[key]) for key in ans if articles.has_key(key)] + return self.filter_ans(ans) + + def parse_headline_index(self): + articles = {} ans = [] - - feed = key = 'All Top Stories' - articles[key] = [] - ans.append(key) - self.log("Scanning 1 section ...") + url_list = [] soup = self.index_to_soup('http://www.nytimes.com/pages/todaysheadlines/') - # Fetch the outer table - table = soup.find('table') - previousTable = table + # Fetch the content table + content_table = soup.find('table',{'id':'content'}) + if content_table is None: + self.log("FATAL ERROR: CANNOT FIND CONTENT TABLE") + return None - # Find the deepest table containing the stories - while True : - table = table.find('table') - if table.find(text=re.compile('top stories start')) : - previousTable = table - continue - else : - table = previousTable - break + # Within this table are entries, each containing one or more h6 tags which represent sections - # There are multiple subtables, find the one containing the stories - for block in table.findAll('table') : - if block.find(text=re.compile('top stories start')) : - table = block - break - else : - continue + for td_col in content_table.findAll('td', {'id' : re.compile('Column')}): + for div_sec in td_col.findAll('div',recursive=False): + for h6_sec_name in div_sec.findAll('h6',{'style' : re.compile('text-transform: *uppercase')}): + section_name = self.tag_to_string(h6_sec_name,use_alt=False) + section_name = re.sub(r'^ *$','',section_name) + if section_name == '': + continue + section_name=string.capwords(section_name) + if section_name == 'U.s.': + section_name = 'U.S.' + elif section_name == 'Op-ed': + section_name = 'Op-Ed' + pubdate = strftime('%a, %d %b') - # Again there are multiple subtables, find the one containing the stories - for storyblock in table.findAll('table') : - if storyblock.find(text=re.compile('top stories start')) : - break - else : - continue - - skipThisSection = False - todays_article_count = 0 - # Within this table are entries - self.log("Fetching feed Top Stories") - for tr in storyblock.findAllNext('tr'): - if tr.find('span') is not None : - - sectionblock = tr.find(True, attrs={'face':['times new roman, times,sans serif', - 'times new roman,times, sans serif', - 'times new roman, times, sans serif']}) - section = None - bylines = [] - descriptions = [] - pubdate = None - - # Get the Section title - for (x,i) in enumerate(sectionblock.contents) : - skipThisSection = False - # Extract the section title - if ('Comment' in str(i.__class__)) : - if 'start(name=' in i : - section = i[i.find('=')+1:-2] - - if not self.sections.has_key(section) : - skipThisSection = True + search_div = div_sec + for next_tag in h6_sec_name.findNextSiblings(True): + if next_tag.__class__.__name__ == 'Tag': + if next_tag.name == 'div': + search_div = next_tag break - # Check for excluded section - if len(self.excludeSectionKeywords): - key = self.sections[section] - excluded = re.compile('|'.join(self.excludeSectionKeywords)) - if excluded.search(key) or articles.has_key(key): - skipThisSection = True - break - - # Get the bylines and descriptions - if not skipThisSection : - lines = sectionblock.contents - contentStrings = [] - - for line in lines: - if not isinstance(line, Comment) and line.strip and line.strip() > "": - contentStrings.append(line.strip()) - - # Gather the byline/description pairs - bylines = [] - descriptions = [] - for contentString in contentStrings: - if contentString[0:3] == 'By ' and contentString[3].isupper() : - bylines.append(contentString) + # Get the articles + for h3_item in search_div.findAll('h3'): + byline = h3_item.h6 + if byline is not None: + author = self.tag_to_string(byline,usa_alt=False) else: - descriptions.append(contentString) - - # Fetch the article titles and URLs - articleCount = len(sectionblock.findAll('span')) - todays_article_count += articleCount - for (i,span) in enumerate(sectionblock.findAll(attrs={'class':'headlineWrapper'})) : - a = span.find('a', href=True) + author = '' + a = h3_item.find('a', href=True) + if not a: + continue url = re.sub(r'\?.*', '', a['href']) + if not url.startswith("http"): + continue + if not url.endswith(".html"): + continue + if 'podcast' in url: + continue + if 'video' in url: + continue url += '?pagewanted=all' + if url in url_list: + continue + url_list.append(url) + self.log("URL %s" % url) + title = self.tag_to_string(a, use_alt=True).strip() + desc = h3_item.find('p') + if desc is not None: + description = self.tag_to_string(desc,use_alt=False) + else: + description = '' + if not articles.has_key(section_name): + ans.append(section_name) + articles[section_name] = [] + articles[section_name].append(dict(title=title, url=url, date=pubdate, description=description, author=author, content='')) - title = self.tag_to_string(a, use_alt=True) - # prepend the section name - title = self.sections[section] + " · " + title - if not isinstance(title, unicode): - title = title.decode('utf-8', 'replace') - - # Allow for unattributed, undescribed entries "Editor's Note" - if i >= len(descriptions) : - description = None - else : - description = descriptions[i] - - if len(bylines) == articleCount : - author = bylines[i] - else : - author = None - - # Check for duplicates - duplicateFound = False - if len(articles[feed]) > 1: - for article in articles[feed] : - if url == article['url'] : - duplicateFound = True - break - - if duplicateFound: - # Continue fetching, don't add this article - todays_article_count -= 1 - continue - - if not articles.has_key(feed): - articles[feed] = [] - articles[feed].append( - dict(title=title, url=url, date=pubdate, - description=description, author=author, content='')) -# self.log("Queuing %d articles from %s" % (todays_article_count, "Top Stories")) - - ans = self.sort_index_by(ans, {'Top Stories':-1}) ans = [(key, articles[key]) for key in ans if articles.has_key(key)] - self.dump_ans(ans) - return ans + return self.filter_ans(ans) + + def parse_index(self): + if self.headlinesOnly: + return self.parse_headline_index() + else: + return self.parse_todays_index() + + def strip_anchors(self,soup): + paras = soup.findAll(True) + for para in paras: + aTags = para.findAll('a') + for a in aTags: + if a.img is None: + a.replaceWith(a.renderContents().decode('cp1252','replace')) + return soup + def preprocess_html(self, soup): + + kicker_tag = soup.find(attrs={'class':'kicker'}) + if kicker_tag: # remove Op_Ed author head shots + tagline = self.tag_to_string(kicker_tag) + if tagline=='Op-Ed Columnist': + img_div = soup.find('div','inlineImage module') + if img_div: + img_div.extract() return self.strip_anchors(soup) def postprocess_html(self,soup, True): @@ -422,8 +462,9 @@ class NYTimes(BasicNewsRecipe): firstImg = inlineImgs[0] for inlineImg in inlineImgs[1:]: inlineImg.extract() - # Move firstImg after headline - cgFirst = soup.find(True, {'class':'columnGroup first'}) + # Move firstImg before article body + #article_body = soup.find(True, {'id':'articleBody'}) + cgFirst = soup.find(True, {'class':re.compile('columnGroup *first')}) if cgFirst: # Strip all sibling NavigableStrings: noise navstrings = cgFirst.findAll(text=True, recursive=False) @@ -443,30 +484,18 @@ class NYTimes(BasicNewsRecipe): if headline_found: cgFirst.insert(insertLoc,firstImg) else: - self.log(">>> No class:'columnGroup first' found <<<") - # Change class="kicker" to

- kicker = soup.find(True, {'class':'kicker'}) - if kicker and kicker.contents[0]: - h3Tag = Tag(soup, "h3") - h3Tag.insert(0, self.fixChars(self.tag_to_string(kicker, - use_alt=False))) - kicker.replaceWith(h3Tag) + self.log(">>> No class:'columnGroup first' found <<<") - # Change captions to italic -1 + # Change captions to italic for caption in soup.findAll(True, {'class':'caption'}) : if caption and caption.contents[0]: - emTag = Tag(soup, "em") + cTag = Tag(soup, "p", [("class", "caption")]) c = self.fixChars(self.tag_to_string(caption,use_alt=False)).strip() mp_off = c.find("More Photos") if mp_off >= 0: c = c[:mp_off] - emTag.insert(0, c) - #hrTag = Tag(soup, 'hr') - #hrTag['class'] = 'caption_divider' - hrTag = Tag(soup, 'div') - hrTag['class'] = 'divider' - emTag.insert(1, hrTag) - caption.replaceWith(emTag) + cTag.insert(0, c) + caption.replaceWith(cTag) # Change to

h1 = soup.find('h1') @@ -506,17 +535,6 @@ class NYTimes(BasicNewsRecipe): bTag.insert(0, subhead.contents[0]) subhead.replaceWith(bTag) - # Synthesize a section header - dsk = soup.find('meta', attrs={'name':'dsk'}) - if dsk and dsk.has_key('content'): - hTag = Tag(soup,'h3') - hTag['class'] = 'section' - hTag.insert(0,NavigableString(dsk['content'])) - articleTag = soup.find(True, attrs={'id':'article'}) - if articleTag: - articleTag.insert(0,hTag) - - # Add class="articleBody" to
so we can format with CSS divTag = soup.find('div',attrs={'id':'articleBody'}) if divTag: divTag['class'] = divTag['id'] @@ -532,11 +550,3 @@ class NYTimes(BasicNewsRecipe): return soup - def strip_anchors(self,soup): - paras = soup.findAll(True) - for para in paras: - aTags = para.findAll('a') - for a in aTags: - if a.img is None: - a.replaceWith(a.renderContents().decode('cp1252','replace')) - return soup diff --git a/resources/recipes/nytimes_sub.recipe b/resources/recipes/nytimes_sub.recipe index 1814132667..ed1ba75f0f 100644 --- a/resources/recipes/nytimes_sub.recipe +++ b/resources/recipes/nytimes_sub.recipe @@ -4,56 +4,66 @@ __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal ' ''' nytimes.com -V5 - One picture per article, moved to top: -Headline -Image -Byline -Story ''' import re, string, time -from calibre import strftime +from calibre import entity_to_unicode, strftime from calibre.web.feeds.recipes import BasicNewsRecipe -from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup, NavigableString, Tag +from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, BeautifulStoneSoup class NYTimes(BasicNewsRecipe): - title = 'The New York Times' - __author__ = 'GRiker' - language = 'en' - requires_version = (0, 7, 5) + # set headlinesOnly to True for the headlines-only version + headlinesOnly = False - description = 'Daily news from the New York Times (subscription version)' - allSectionKeywords = ['The Front Page', 'International','National','Obituaries','Editorials', - 'New York','Business Day','Science Times','Sports','Dining','Arts', - 'Home','Styles','Sunday Business','Week In Review','Travel','Magazine', - 'Book Review','Weddings','Real Estate','Automobiles',"T Men's Fashion", - "T Women's Fashion"] + # includeSections: List of sections to include. If empty, all sections found will be included. + # Otherwise, only the sections named will be included. For example, + # + # includeSections = ['Politics','Sports'] + # + # would cause only the Politics and Sports sections to be included. - # List of sections to exclude - # To add a section, copy the section name from the allSectionKeywords list above - # For example, to exclude 'Dining' and 'Weddings': - #excludeSectionKeywords = ['Dining','Weddings'] - excludeSectionKeywords = [] + includeSections = [] # by default, all sections included - # List of sections to include (test and debug only) - # By default, any sections in today's paper that are not listed in excludeSectionKeywords - # are downloaded. fetch_only specifies that only certain sections are to be downloaded. - # This should only be used for testing and debugging. - # For example, to download only 'The Front Page' section: - # fetch_only = set(['The Front Page']) - fetch_only = set([]) - if fetch_only: - excludeSectionKeywords = list(set(allSectionKeywords) ^ fetch_only) + # excludeSections: List of sections to exclude. If empty, all sections found will be included. + # Otherwise, the sections named will be excluded. For example, + # + # excludeSections = ['Politics','Sports'] + # + # would cause the Politics and Sports sections to be excluded. This parameter can be used + # in conjuction with includeSections although in most cases using one or the other, but + # not both, is sufficient. + + excludeSections = [] # one_picture_per_article specifies that calibre should only use the first image # from an article (if one exists). If one_picture_per_article = True, the image # will be moved to a location between the headline and the byline. # If one_picture_per_article = False, all images from the article will be included + # and shown in their original location. one_picture_per_article = True + # The maximum number of articles that will be downloaded + max_articles_per_feed = 100 + + + if headlinesOnly: + title='New York Times Headlines' + description = 'Headlines from the New York Times' + else: + title='New York Times' + description = 'Today\'s New York Times' + + __author__ = 'GRiker/Kovid Goyal/Nick Redding' + language = 'en' + requires_version = (0, 7, 5) + + timefmt = '' needs_subscription = True + masthead_url = 'http://graphics8.nytimes.com/images/misc/nytlogo379x64.gif' + cover_margins = (18,18,'grey99') + remove_tags_before = dict(id='article') remove_tags_after = dict(id='article') remove_tags = [dict(attrs={'class':[ @@ -69,6 +79,7 @@ class NYTimes(BasicNewsRecipe): 'entry-response module', 'icon enlargeThis', 'leftNavTabs', + 'metaFootnote', 'module box nav', 'nextArticleLink', 'nextArticleLink clearfix', @@ -76,12 +87,13 @@ class NYTimes(BasicNewsRecipe): 'relatedSearchesModule', 'side_tool', 'singleAd', - 'subNavigation clearfix', - 'subNavigation tabContent active', - 'subNavigation tabContent active clearfix', + re.compile('^subNavigation'), + re.compile('^leaderboard'), + re.compile('^module'), ]}), dict(id=[ 'adxLeaderboard', + 'adxSponLink', 'archive', 'articleExtras', 'articleInline', @@ -92,61 +104,110 @@ class NYTimes(BasicNewsRecipe): 'footer', 'header', 'header_search', + 'inlineBox', 'login', 'masthead', 'masthead-nav', 'memberTools', 'navigation', 'portfolioInline', + 'readerReviews', + 'readerReviewsCount', 'relatedArticles', + 'relatedTopics', 'respond', 'side_search', 'side_index', 'side_tool', 'toolsRight', ]), - dict(name=['script', 'noscript', 'style'])] - masthead_url = 'http://graphics8.nytimes.com/images/misc/nytlogo379x64.gif' - cover_margins = (18,18,'grey99') + dict(name=['script', 'noscript', 'style','form','hr'])] no_stylesheets = True - extra_css = '.headline {text-align: left;}\n \ - .byline {font-family: monospace; \ - text-align: left; \ - margin-top: 0px; \ - margin-bottom: 0px;}\n \ - .dateline {font-size: small; \ - margin-top: 0px; \ - margin-bottom: 0px;}\n \ - .timestamp {font-size: small; \ - margin-top: 0px; \ - margin-bottom: 0px;}\n \ - .source {text-align: left;}\n \ - .image {text-align: center;}\n \ - .credit {text-align: right; \ - font-size: small; \ - margin-top: 0px; \ - margin-bottom: 0px;}\n \ - .articleBody {text-align: left;}\n \ - .authorId {text-align: left; \ - font-style: italic;}\n ' + extra_css = ''' + .articleHeadline { text-align: left; margin-top:0.5em; margin-bottom:0.25em; } + .credit { text-align: right; font-size: small; line-height:1em; margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; } + .byline { text-align: left; font-size: small; line-height:1em; margin-top:10px; margin-left:0; margin-right:0; margin-bottom: 0; } + .dateline { text-align: left; font-size: small; line-height:1em;margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; } + .kicker { font-size: small; line-height:1em;margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; } + .timestamp { text-align: left; font-size: small; } + .caption { font-size: small; font-style:italic; line-height:1em; margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; } + a:link {text-decoration: none; } + .articleBody { } + .authorId {text-align: left; } + .image {text-align: center;} + .source {text-align: left; }''' + + def filter_ans(self, ans) : + total_article_count = 0 + idx = 0 + idx_max = len(ans)-1 + while idx <= idx_max: + if self.includeSections != []: + if ans[idx][0] not in self.includeSections: + print "SECTION NOT INCLUDED: ",ans[idx][0] + del ans[idx] + idx_max = idx_max-1 + continue + if ans[idx][0] in self.excludeSections: + print "SECTION EXCLUDED: ",ans[idx][0] + del ans[idx] + idx_max = idx_max-1 + continue + if self.verbose: + self.log("Section %s: %d articles" % (ans[idx][0], len(ans[idx][1])) ) + for article in ans[idx][1]: + total_article_count += 1 + if self.verbose: + self.log("\t%-40.40s... \t%-60.60s..." % (article['title'].encode('cp1252','replace'), + article['url'].encode('cp1252','replace'))) + idx = idx+1 + + self.log( "Queued %d articles" % total_article_count ) + return ans + + def fixChars(self,string): + # Replace lsquo (\x91) + fixed = re.sub("\x91","‘",string) + + # Replace rsquo (\x92) + fixed = re.sub("\x92","’",fixed) + + # Replace ldquo (\x93) + fixed = re.sub("\x93","“",fixed) + + # Replace rdquo (\x94) + fixed = re.sub("\x94","”",fixed) + + # Replace ndash (\x96) + fixed = re.sub("\x96","–",fixed) + + # Replace mdash (\x97) + fixed = re.sub("\x97","—",fixed) + + return fixed def get_browser(self): br = BasicNewsRecipe.get_browser() if self.username is not None and self.password is not None: - try: - br.open('http://www.nytimes.com/auth/login') - br.select_form(name='login') - br['USERID'] = self.username - br['PASSWORD'] = self.password - raw = br.submit().read() - if 'Sorry, we could not find the combination you entered. Please try again.' in raw: - raise Exception('Your username and password are incorrect') - #open('/t/log.html', 'wb').write(raw) - except: - self.log("\nFailed to login") - + br.open('http://www.nytimes.com/auth/login') + br.select_form(name='login') + br['USERID'] = self.username + br['PASSWORD'] = self.password + raw = br.submit().read() + if 'Please try again' in raw: + raise Exception('Your username and password are incorrect') return br + def skip_ad_pages(self, soup): + # Skip ad pages served before actual article + skip_tag = soup.find(True, {'name':'skip'}) + if skip_tag is not None: + self.log.warn("Found forwarding link: %s" % skip_tag.parent['href']) + url = 'http://www.nytimes.com' + re.sub(r'\?.*', '', skip_tag.parent['href']) + url += '?pagewanted=all' + self.log.warn("Skipping ad to article at '%s'" % url) + return self.index_to_soup(url, raw=True) + def get_cover_url(self): cover = None st = time.localtime() @@ -162,143 +223,232 @@ class NYTimes(BasicNewsRecipe): cover = None return cover - def get_masthead_title(self): + def short_title(self): return self.title - def dump_ans(self, ans): - total_article_count = 0 - for section in ans : - if self.verbose: - self.log("section %s: %d articles" % (section[0], len(section[1])) ) - for article in section[1]: - total_article_count += 1 - if self.verbose: - self.log("\t%-40.40s... \t%-60.60s..." % (article['title'].encode('mac-roman','replace'), - article['url'].encode('mac-roman','replace'))) - self.log( "Queued %d articles" % total_article_count ) + def index_to_soup(self, url_or_raw, raw=False): + ''' + OVERRIDE of class method + deals with various page encodings between index and articles + ''' + def get_the_soup(docEncoding, url_or_raw, raw=False) : + if re.match(r'\w+://', url_or_raw): + f = self.browser.open(url_or_raw) + _raw = f.read() + f.close() + if not _raw: + raise RuntimeError('Could not fetch index from %s'%url_or_raw) + else: + _raw = url_or_raw + if raw: + return _raw - def dump_hex(self, src, length=16): - ''' Diagnostic ''' - FILTER=''.join([(len(repr(chr(x)))==3) and chr(x) or '.' for x in range(256)]) - N=0; result='' - while src: - s,src = src[:length],src[length:] - hexa = ' '.join(["%02X"%ord(x) for x in s]) - s = s.translate(FILTER) - result += "%04X %-*s %s\n" % (N, length*3, hexa, s) - N+=length - print result + if not isinstance(_raw, unicode) and self.encoding: + _raw = _raw.decode(docEncoding, 'replace') + massage = list(BeautifulSoup.MARKUP_MASSAGE) + massage.append((re.compile(r'&(\S+?);'), lambda match: entity_to_unicode(match, encoding=self.encoding))) + return BeautifulSoup(_raw, markupMassage=massage) - def fixChars(self,string): - # Replace lsquo (\x91) - fixed = re.sub("\x91","‘",string) + # Entry point + print "index_to_soup()" + soup = get_the_soup( self.encoding, url_or_raw ) + contentType = soup.find(True,attrs={'http-equiv':'Content-Type'}) + docEncoding = str(contentType)[str(contentType).find('charset=') + len('charset='):str(contentType).rfind('"')] + if docEncoding == '' : + docEncoding = self.encoding - # Replace rsquo (\x92) - fixed = re.sub("\x92","’",fixed) + if self.verbose > 2: + self.log( " document encoding: '%s'" % docEncoding) + if docEncoding != self.encoding : + soup = get_the_soup(docEncoding, url_or_raw) - # Replace ldquo (\x93) - fixed = re.sub("\x93","“",fixed) - - # Replace rdquo (\x94) - fixed = re.sub("\x94","”",fixed) - - # Replace ndash (\x96) - fixed = re.sub("\x96","–",fixed) - - # Replace mdash (\x97) - fixed = re.sub("\x97","—",fixed) - - return fixed + return soup def massageNCXText(self, description): # Kindle TOC descriptions won't render certain characters if description: massaged = unicode(BeautifulStoneSoup(description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES)) - # Replace '&' with '&' - massaged = re.sub("&","&", massaged) + # Replace '&' with '&' + massaged = re.sub("&","&", massaged) return self.fixChars(massaged) else: return description - def parse_index(self): - soup = self.index_to_soup('http://www.nytimes.com/pages/todayspaper/index.html') + def parse_todays_index(self): def feed_title(div): - return ''.join(div.findAll(text=True, recursive=False)).strip() + return ''.join(div.findAll(text=True, recursive=True)).strip() articles = {} key = None ans = [] - # Find each instance of class="section-headline", class="story", class="story headline" - for div in soup.findAll(True, - attrs={'class':['section-headline', 'story', 'story headline']}): + url_list = [] - if div['class'] == 'section-headline': - key = string.capwords(feed_title(div)) - if self.excludeSectionKeywords: - excluded = re.compile('|'.join(self.excludeSectionKeywords)) - if excluded.search(key): - self.log("Skipping section %s" % key) - continue - articles[key] = [] - ans.append(key) - - elif div['class'] in ['story', 'story headline'] : - a = div.find('a', href=True) - if not a: - continue - url = re.sub(r'\?.*', '', a['href']) - url += '?pagewanted=all' - - title = self.massageNCXText(self.tag_to_string(a, use_alt=True).strip()) - - description = '' - pubdate = strftime('%a, %d %b') - summary = div.find(True, attrs={'class':'summary'}) - if summary: - description = self.massageNCXText(self.tag_to_string(summary, use_alt=False)) - - author = '' - authorAttribution = div.find(True, attrs={'class':'storyheadline-author'}) + def handle_article(div): + a = div.find('a', href=True) + if not a: + return + url = re.sub(r'\?.*', '', a['href']) + if not url.startswith("http"): + return + if not url.endswith(".html"): + return + if 'podcast' in url: + return + if '/video/' in url: + return + url += '?pagewanted=all' + if url in url_list: + return + url_list.append(url) + title = self.tag_to_string(a, use_alt=True).strip() + description = '' + pubdate = strftime('%a, %d %b') + summary = div.find(True, attrs={'class':'summary'}) + if summary: + description = self.tag_to_string(summary, use_alt=False) + author = '' + authorAttribution = div.find(True, attrs={'class':'byline'}) + if authorAttribution: + author = self.tag_to_string(authorAttribution, use_alt=False) + else: + authorAttribution = div.find(True, attrs={'class':'byline'}) if authorAttribution: author = self.tag_to_string(authorAttribution, use_alt=False) - else: - authorAttribution = div.find(True, attrs={'class':'byline'}) - if authorAttribution: - author = self.tag_to_string(authorAttribution, use_alt=False) - # Kill commas - Kindle switches to '&' - author = re.sub(',','',author) + feed = key if key is not None else 'Uncategorized' + if not articles.has_key(feed): + ans.append(feed) + articles[feed] = [] + articles[feed].append( + dict(title=title, url=url, date=pubdate, + description=description, author=author, + content='')) + + + soup = self.index_to_soup('http://www.nytimes.com/pages/todayspaper/index.html') + + + # Find each article + for div in soup.findAll(True, + attrs={'class':['section-headline', 'story', 'story headline','sectionHeader','headlinesOnly multiline flush']}): + + if div['class'] in ['section-headline','sectionHeader']: + key = string.capwords(feed_title(div)) + key = key.replace('Op-ed','Op-Ed') + key = key.replace('U.s.','U.S.') + elif div['class'] in ['story', 'story headline'] : + handle_article(div) + elif div['class'] == 'headlinesOnly multiline flush': + for lidiv in div.findAll('li'): + handle_article(lidiv) - feed = key if key is not None else 'Uncategorized' - if not articles.has_key(feed): - articles[feed] = [] - if not 'podcasts' in url: - articles[feed].append( - dict(title=title, url=url, date=pubdate, - description=description, author=author, - content='')) - ans = self.sort_index_by(ans, {'The Front Page':-1, - 'Dining In, Dining Out':1, - 'Obituaries':2}) ans = [(key, articles[key]) for key in ans if articles.has_key(key)] - self.dump_ans(ans) - return ans + return self.filter_ans(ans) + + def parse_headline_index(self): + + articles = {} + ans = [] + url_list = [] + + soup = self.index_to_soup('http://www.nytimes.com/pages/todaysheadlines/') + + # Fetch the content table + content_table = soup.find('table',{'id':'content'}) + if content_table is None: + self.log("FATAL ERROR: CANNOT FIND CONTENT TABLE") + return None + + # Within this table are entries, each containing one or more h6 tags which represent sections + + for td_col in content_table.findAll('td', {'id' : re.compile('Column')}): + for div_sec in td_col.findAll('div',recursive=False): + for h6_sec_name in div_sec.findAll('h6',{'style' : re.compile('text-transform: *uppercase')}): + section_name = self.tag_to_string(h6_sec_name,use_alt=False) + section_name = re.sub(r'^ *$','',section_name) + if section_name == '': + continue + section_name=string.capwords(section_name) + if section_name == 'U.s.': + section_name = 'U.S.' + elif section_name == 'Op-ed': + section_name = 'Op-Ed' + pubdate = strftime('%a, %d %b') + + search_div = div_sec + for next_tag in h6_sec_name.findNextSiblings(True): + if next_tag.__class__.__name__ == 'Tag': + if next_tag.name == 'div': + search_div = next_tag + break + + # Get the articles + for h3_item in search_div.findAll('h3'): + byline = h3_item.h6 + if byline is not None: + author = self.tag_to_string(byline,usa_alt=False) + else: + author = '' + a = h3_item.find('a', href=True) + if not a: + continue + url = re.sub(r'\?.*', '', a['href']) + if not url.startswith("http"): + continue + if not url.endswith(".html"): + continue + if 'podcast' in url: + continue + if 'video' in url: + continue + url += '?pagewanted=all' + if url in url_list: + continue + url_list.append(url) + self.log("URL %s" % url) + title = self.tag_to_string(a, use_alt=True).strip() + desc = h3_item.find('p') + if desc is not None: + description = self.tag_to_string(desc,use_alt=False) + else: + description = '' + if not articles.has_key(section_name): + ans.append(section_name) + articles[section_name] = [] + articles[section_name].append(dict(title=title, url=url, date=pubdate, description=description, author=author, content='')) + + + ans = [(key, articles[key]) for key in ans if articles.has_key(key)] + return self.filter_ans(ans) + + def parse_index(self): + if self.headlinesOnly: + return self.parse_headline_index() + else: + return self.parse_todays_index() + + def strip_anchors(self,soup): + paras = soup.findAll(True) + for para in paras: + aTags = para.findAll('a') + for a in aTags: + if a.img is None: + a.replaceWith(a.renderContents().decode('cp1252','replace')) + return soup - def skip_ad_pages(self, soup): - # Skip ad pages served before actual article - skip_tag = soup.find(True, {'name':'skip'}) - if skip_tag is not None: - self.log.warn("Found forwarding link: %s" % skip_tag.parent['href']) - url = 'http://www.nytimes.com' + re.sub(r'\?.*', '', skip_tag.parent['href']) - url += '?pagewanted=all' - self.log.warn("Skipping ad to article at '%s'" % url) - return self.index_to_soup(url, raw=True) def preprocess_html(self, soup): + + kicker_tag = soup.find(attrs={'class':'kicker'}) + if kicker_tag: # remove Op_Ed author head shots + tagline = self.tag_to_string(kicker_tag) + if tagline=='Op-Ed Columnist': + img_div = soup.find('div','inlineImage module') + if img_div: + img_div.extract() return self.strip_anchors(soup) def postprocess_html(self,soup, True): - print "\npostprocess_html()\n" if self.one_picture_per_article: # Remove all images after first @@ -312,8 +462,9 @@ class NYTimes(BasicNewsRecipe): firstImg = inlineImgs[0] for inlineImg in inlineImgs[1:]: inlineImg.extract() - # Move firstImg after headline - cgFirst = soup.find(True, {'class':'columnGroup first'}) + # Move firstImg before article body + #article_body = soup.find(True, {'id':'articleBody'}) + cgFirst = soup.find(True, {'class':re.compile('columnGroup *first')}) if cgFirst: # Strip all sibling NavigableStrings: noise navstrings = cgFirst.findAll(text=True, recursive=False) @@ -333,30 +484,18 @@ class NYTimes(BasicNewsRecipe): if headline_found: cgFirst.insert(insertLoc,firstImg) else: - self.log(">>> No class:'columnGroup first' found <<<") - # Change class="kicker" to

- kicker = soup.find(True, {'class':'kicker'}) - if kicker and kicker.contents and kicker.contents[0]: - h3Tag = Tag(soup, "h3") - h3Tag.insert(0, self.fixChars(self.tag_to_string(kicker, - use_alt=False))) - kicker.replaceWith(h3Tag) + self.log(">>> No class:'columnGroup first' found <<<") - # Change captions to italic -1 + # Change captions to italic for caption in soup.findAll(True, {'class':'caption'}) : if caption and caption.contents[0]: - emTag = Tag(soup, "em") + cTag = Tag(soup, "p", [("class", "caption")]) c = self.fixChars(self.tag_to_string(caption,use_alt=False)).strip() mp_off = c.find("More Photos") if mp_off >= 0: c = c[:mp_off] - emTag.insert(0, c) - #hrTag = Tag(soup, 'hr') - #hrTag['class'] = 'caption_divider' - hrTag = Tag(soup, 'div') - hrTag['class'] = 'divider' - emTag.insert(1, hrTag) - caption.replaceWith(emTag) + cTag.insert(0, c) + caption.replaceWith(cTag) # Change to

h1 = soup.find('h1') @@ -396,17 +535,6 @@ class NYTimes(BasicNewsRecipe): bTag.insert(0, subhead.contents[0]) subhead.replaceWith(bTag) - # Synthesize a section header - dsk = soup.find('meta', attrs={'name':'dsk'}) - if dsk and dsk.has_key('content'): - hTag = Tag(soup,'h3') - hTag['class'] = 'section' - hTag.insert(0,NavigableString(dsk['content'])) - articleTag = soup.find(True, attrs={'id':'article'}) - if articleTag: - articleTag.insert(0,hTag) - - # Add class="articleBody" to
so we can format with CSS divTag = soup.find('div',attrs={'id':'articleBody'}) if divTag: divTag['class'] = divTag['id'] @@ -422,56 +550,3 @@ class NYTimes(BasicNewsRecipe): return soup - def populate_article_metadata(self,article,soup,first): - ''' - Extract author and description from article, add to article metadata - ''' - def extract_author(soup): - byline = soup.find('meta',attrs={'name':['byl','CLMST']}) - if byline : - author = byline['content'] - else : - # Try for