From e5b1f5538ffb7ff4207723268379aa8e3e91533d Mon Sep 17 00:00:00 2001 From: Jon Ziebell Date: Wed, 8 Jan 2020 20:43:05 -0500 Subject: [PATCH] Fixed #212 - Server running out of memory when doing global/all comparisons Added chunking to the thermostat group select to reduce memory usage. --- api/cora/database.php | 25 +++++++++- api/thermostat_group.php | 102 ++++++++++++++++++++++----------------- 2 files changed, 81 insertions(+), 46 deletions(-) diff --git a/api/cora/database.php b/api/cora/database.php index b3bb510..aa2fd24 100644 --- a/api/cora/database.php +++ b/api/cora/database.php @@ -413,11 +413,13 @@ final class database extends \mysqli { * @param array $columns The columns to return. If not specified, all * columns are returned. * @param mixed $order_by String or array of order_bys. + * @param mixed $limit Number or array of numbers as arguments to the MySQL + * limit clause. * * @return array An array of the database rows with the specified columns. * Even a single result will still be returned in an array of size 1. */ - public function read($resource, $attributes = [], $columns = [], $order_by = []) { + public function read($resource, $attributes = [], $columns = [], $order_by = [], $limit = []) { $table = $this->get_table($resource); // Build the column listing. @@ -454,6 +456,7 @@ final class database extends \mysqli { ); } + // Order by if (is_array($order_by) === false) { $order_by = [$order_by]; } @@ -471,9 +474,27 @@ final class database extends \mysqli { ); } + // Limit + if (is_array($limit) === false) { + $limit = [$limit]; + } + + if (count($limit) === 0) { + $limit = ''; + } else { + $limit = ' limit ' . + implode( + ',', + array_map( + [$this, 'escape'], + $limit + ) + ); + } + // Put everything together and return the result. $query = 'select ' . $columns . ' from ' . - $this->escape_identifier($table) . $where . $order_by; + $this->escape_identifier($table) . $where . $order_by . $limit; $result = $this->query($query); /** diff --git a/api/thermostat_group.php b/api/thermostat_group.php index 2b3a9f9..e879d6c 100644 --- a/api/thermostat_group.php +++ b/api/thermostat_group.php @@ -204,56 +204,70 @@ class thermostat_group extends cora\crud { unset($attributes['address_radius']); } - // Get all matching thermostat groups. - $other_thermostat_groups = $this->database->read( - 'thermostat_group', - $attributes - ); - - // Get all the scores from the other thermostat groups $scores = []; - foreach($other_thermostat_groups as $other_thermostat_group) { - if( - isset($other_thermostat_group['temperature_profile'][$type]) === true && - isset($other_thermostat_group['temperature_profile'][$type]['score']) === true && - $other_thermostat_group['temperature_profile'][$type]['score'] !== null && - isset($other_thermostat_group['temperature_profile'][$type]['metadata']) === true && - isset($other_thermostat_group['temperature_profile'][$type]['metadata']['generated_at']) === true && - strtotime($other_thermostat_group['temperature_profile'][$type]['metadata']['generated_at']) > strtotime('-1 month') - ) { - // Skip thermostat_groups that are too far away. + $limit_start = 0; + $limit_count = 1000; + + /** + * Selecting lots of rows can eventually run PHP out of memory, so chunk + * this up into several queries to avoid that. + */ + do { + // Get all matching thermostat groups. + $other_thermostat_groups = $this->database->read( + 'thermostat_group', + $attributes, + [], // columns + [], // order_by + [$limit_start, $limit_count] // limit + ); + + // Get all the scores from the other thermostat groups + foreach($other_thermostat_groups as $other_thermostat_group) { if( - isset($address_radius) === true && - $this->haversine_great_circle_distance( - $address_latitude, - $address_longitude, - $other_thermostat_group['address_latitude'], - $other_thermostat_group['address_longitude'] - ) > $address_radius + isset($other_thermostat_group['temperature_profile'][$type]) === true && + isset($other_thermostat_group['temperature_profile'][$type]['score']) === true && + $other_thermostat_group['temperature_profile'][$type]['score'] !== null && + isset($other_thermostat_group['temperature_profile'][$type]['metadata']) === true && + isset($other_thermostat_group['temperature_profile'][$type]['metadata']['generated_at']) === true && + strtotime($other_thermostat_group['temperature_profile'][$type]['metadata']['generated_at']) > strtotime('-1 month') ) { - continue; - } + // Skip thermostat_groups that are too far away. + if( + isset($address_radius) === true && + $this->haversine_great_circle_distance( + $address_latitude, + $address_longitude, + $other_thermostat_group['address_latitude'], + $other_thermostat_group['address_longitude'] + ) > $address_radius + ) { + continue; + } - // Ignore profiles with too few datapoints. Ideally this would be time- - // based...so don't use a profile if it hasn't experienced a full year - // or heating/cooling system, but that isn't stored presently. A good - // approximation is to make sure there is a solid set of data driving - // the profile. - $required_delta_count = (($type === 'resist') ? 40 : 20); - if(count($other_thermostat_group['temperature_profile'][$type]['deltas']) < $required_delta_count) { - continue; - } + // Ignore profiles with too few datapoints. Ideally this would be time- + // based...so don't use a profile if it hasn't experienced a full year + // or heating/cooling system, but that isn't stored presently. A good + // approximation is to make sure there is a solid set of data driving + // the profile. + $required_delta_count = (($type === 'resist') ? 40 : 20); + if(count($other_thermostat_group['temperature_profile'][$type]['deltas']) < $required_delta_count) { + continue; + } - // Round the scores so they can be better displayed on a histogram or - // bell curve. - // TODO: Might be able to get rid of this? I don't think new scores are calculated at this level of detail anymore... - // $scores[] = round( - // $other_thermostat_group['temperature_profile'][$type]['score'], - // 1 - // ); - $scores[] = $other_thermostat_group['temperature_profile'][$type]['score']; + // Round the scores so they can be better displayed on a histogram or + // bell curve. + // TODO: Might be able to get rid of this? I don't think new scores are calculated at this level of detail anymore... + // $scores[] = round( + // $other_thermostat_group['temperature_profile'][$type]['score'], + // 1 + // ); + $scores[] = $other_thermostat_group['temperature_profile'][$type]['score']; + } } - } + + $limit_start += $limit_count; + } while (count($other_thermostat_groups) === $limit_count); sort($scores);