We are no longer offering accounts on this server. Consider https://gitlab.freedesktop.org/ as a place to host projects.

Commit c74aea58 authored by Brion Vibber's avatar Brion Vibber

Stomp queue restructuring for mass scalability:

- Multiplexing queues into groups and for multiple sites.
- Sharing vs breakout configurable per site and per queue via $config['queue']['breakout']
- Detect how many times a message is redelivered, discard if it's killed too many daemons
 - count configurable with $config['queue']['max_retries']
 - can dump the items to files in $config['queue']['dead_letter_dir']

Queue daemon memory & resource leak fixes:
- avoid unnecessary reconnections to memcached server (switch persistent connections back in on second initialization, assuming it's child process)
- monkey-patch for leaky .ini loads in DB_DataObject::databaseStructure() - was leaking 200k per active switch
- applied leak fixes to Status_network as well, using intermediate base Safe_DataObject for both it and Memcache_DataObject

Misc queue fixes:
- correct handling of child processes exiting due to signal termination instead of regular exit
- shutdown instead of infinite respawn loop if we're already past the soft memory limit at startup
- Added --all option for xmppdaemon... still opens one xmpp connection per site that has xmpp active

Cache updates:
- add Cache::increment() method with native support for memcached atomic increment
parent 3d0c3f05
......@@ -19,57 +19,8 @@
if (!defined('STATUSNET') && !defined('LACONICA')) { exit(1); }
class Memcached_DataObject extends DB_DataObject
class Memcached_DataObject extends Safe_DataObject
{
/**
* Destructor to free global memory resources associated with
* this data object when it's unset or goes out of scope.
* DB_DataObject doesn't do this yet by itself.
*/
function __destruct()
{
$this->free();
if (method_exists('DB_DataObject', '__destruct')) {
parent::__destruct();
}
}
/**
* Magic function called at serialize() time.
*
* We use this to drop a couple process-specific references
* from DB_DataObject which can cause trouble in future
* processes.
*
* @return array of variable names to include in serialization.
*/
function __sleep()
{
$vars = array_keys(get_object_vars($this));
$skip = array('_DB_resultid', '_link_loaded');
return array_diff($vars, $skip);
}
/**
* Magic function called at unserialize() time.
*
* Clean out some process-specific variables which might
* be floating around from a previous process's cached
* objects.
*
* Old cached objects may still have them.
*/
function __wakeup()
{
// Refers to global state info from a previous process.
// Clear this out so we don't accidentally break global
// state in *this* process.
$this->_DB_resultid = null;
// We don't have any local DBO refs, so clear these out.
$this->_link_loaded = false;
}
/**
* Wrapper for DB_DataObject's static lookup using memcached
* as backing instead of an in-process cache array.
......@@ -579,3 +530,4 @@ class Memcached_DataObject extends DB_DataObject
return $c->set($cacheKey, $value);
}
}
<?php
/*
* StatusNet - the distributed open-source microblogging tool
* Copyright (C) 2010, StatusNet, Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
if (!defined('STATUSNET') && !defined('LACONICA')) { exit(1); }
/**
* Extended DB_DataObject to improve a few things:
* - free global resources from destructor
* - remove bogus global references from serialized objects
* - don't leak memory when loading already-used .ini files
* (eg when using the same schema on thousands of databases)
*/
class Safe_DataObject extends DB_DataObject
{
/**
* Destructor to free global memory resources associated with
* this data object when it's unset or goes out of scope.
* DB_DataObject doesn't do this yet by itself.
*/
function __destruct()
{
$this->free();
if (method_exists('DB_DataObject', '__destruct')) {
parent::__destruct();
}
}
/**
* Magic function called at serialize() time.
*
* We use this to drop a couple process-specific references
* from DB_DataObject which can cause trouble in future
* processes.
*
* @return array of variable names to include in serialization.
*/
function __sleep()
{
$vars = array_keys(get_object_vars($this));
$skip = array('_DB_resultid', '_link_loaded');
return array_diff($vars, $skip);
}
/**
* Magic function called at unserialize() time.
*
* Clean out some process-specific variables which might
* be floating around from a previous process's cached
* objects.
*
* Old cached objects may still have them.
*/
function __wakeup()
{
// Refers to global state info from a previous process.
// Clear this out so we don't accidentally break global
// state in *this* process.
$this->_DB_resultid = null;
// We don't have any local DBO refs, so clear these out.
$this->_link_loaded = false;
}
/**
* Work around memory-leak bugs...
* Had to copy-paste the whole function in order to patch a couple lines of it.
* Would be nice if this code was better factored.
*
* @param optional string name of database to assign / read
* @param optional array structure of database, and keys
* @param optional array table links
*
* @access public
* @return true or PEAR:error on wrong paramenters.. or false if no file exists..
* or the array(tablename => array(column_name=>type)) if called with 1 argument.. (databasename)
*/
function databaseStructure()
{
global $_DB_DATAOBJECT;
// Assignment code
if ($args = func_get_args()) {
if (count($args) == 1) {
// this returns all the tables and their structure..
if (!empty($_DB_DATAOBJECT['CONFIG']['debug'])) {
$this->debug("Loading Generator as databaseStructure called with args",1);
}
$x = new DB_DataObject;
$x->_database = $args[0];
$this->_connect();
$DB = &$_DB_DATAOBJECT['CONNECTIONS'][$this->_database_dsn_md5];
$tables = $DB->getListOf('tables');
class_exists('DB_DataObject_Generator') ? '' :
require_once 'DB/DataObject/Generator.php';
foreach($tables as $table) {
$y = new DB_DataObject_Generator;
$y->fillTableSchema($x->_database,$table);
}
return $_DB_DATAOBJECT['INI'][$x->_database];
} else {
$_DB_DATAOBJECT['INI'][$args[0]] = isset($_DB_DATAOBJECT['INI'][$args[0]]) ?
$_DB_DATAOBJECT['INI'][$args[0]] + $args[1] : $args[1];
if (isset($args[1])) {
$_DB_DATAOBJECT['LINKS'][$args[0]] = isset($_DB_DATAOBJECT['LINKS'][$args[0]]) ?
$_DB_DATAOBJECT['LINKS'][$args[0]] + $args[2] : $args[2];
}
return true;
}
}
if (!$this->_database) {
$this->_connect();
}
// loaded already?
if (!empty($_DB_DATAOBJECT['INI'][$this->_database])) {
// database loaded - but this is table is not available..
if (
empty($_DB_DATAOBJECT['INI'][$this->_database][$this->__table])
&& !empty($_DB_DATAOBJECT['CONFIG']['proxy'])
) {
if (!empty($_DB_DATAOBJECT['CONFIG']['debug'])) {
$this->debug("Loading Generator to fetch Schema",1);
}
class_exists('DB_DataObject_Generator') ? '' :
require_once 'DB/DataObject/Generator.php';
$x = new DB_DataObject_Generator;
$x->fillTableSchema($this->_database,$this->__table);
}
return true;
}
if (empty($_DB_DATAOBJECT['CONFIG'])) {
DB_DataObject::_loadConfig();
}
// if you supply this with arguments, then it will take those
// as the database and links array...
$schemas = isset($_DB_DATAOBJECT['CONFIG']['schema_location']) ?
array("{$_DB_DATAOBJECT['CONFIG']['schema_location']}/{$this->_database}.ini") :
array() ;
if (isset($_DB_DATAOBJECT['CONFIG']["ini_{$this->_database}"])) {
$schemas = is_array($_DB_DATAOBJECT['CONFIG']["ini_{$this->_database}"]) ?
$_DB_DATAOBJECT['CONFIG']["ini_{$this->_database}"] :
explode(PATH_SEPARATOR,$_DB_DATAOBJECT['CONFIG']["ini_{$this->_database}"]);
}
/* BEGIN CHANGED FROM UPSTREAM */
$_DB_DATAOBJECT['INI'][$this->_database] = $this->parseIniFiles($schemas);
/* END CHANGED FROM UPSTREAM */
// now have we loaded the structure..
if (!empty($_DB_DATAOBJECT['INI'][$this->_database][$this->__table])) {
return true;
}
// - if not try building it..
if (!empty($_DB_DATAOBJECT['CONFIG']['proxy'])) {
class_exists('DB_DataObject_Generator') ? '' :
require_once 'DB/DataObject/Generator.php';
$x = new DB_DataObject_Generator;
$x->fillTableSchema($this->_database,$this->__table);
// should this fail!!!???
return true;
}
$this->debug("Cant find database schema: {$this->_database}/{$this->__table} \n".
"in links file data: " . print_r($_DB_DATAOBJECT['INI'],true),"databaseStructure",5);
// we have to die here!! - it causes chaos if we dont (including looping forever!)
$this->raiseError( "Unable to load schema for database and table (turn debugging up to 5 for full error message)", DB_DATAOBJECT_ERROR_INVALIDARGS, PEAR_ERROR_DIE);
return false;
}
/** For parseIniFiles */
protected static $iniCache = array();
/**
* When switching site configurations, DB_DataObject was loading its
* .ini files over and over, leaking gobs of memory.
* This refactored helper function uses a local cache of .ini files
* to minimize the leaks.
*
* @param array of .ini file names $schemas
* @return array
*/
protected function parseIniFiles($schemas)
{
$key = implode("|", $schemas);
if (!isset(Safe_DataObject::$iniCache[$key])) {
$data = array();
foreach ($schemas as $ini) {
if (file_exists($ini) && is_file($ini)) {
$data = array_merge($data, parse_ini_file($ini, true));
if (!empty($_DB_DATAOBJECT['CONFIG']['debug'])) {
if (!is_readable ($ini)) {
$this->debug("ini file is not readable: $ini","databaseStructure",1);
} else {
$this->debug("Loaded ini file: $ini","databaseStructure",1);
}
}
} else {
if (!empty($_DB_DATAOBJECT['CONFIG']['debug'])) {
$this->debug("Missing ini file: $ini","databaseStructure",1);
}
}
}
Safe_DataObject::$iniCache[$key] = $data;
}
return Safe_DataObject::$iniCache[$key];
}
}
......@@ -21,7 +21,7 @@
if (!defined('STATUSNET') && !defined('LACONICA')) { exit(1); }
class Status_network extends DB_DataObject
class Status_network extends Safe_DataObject
{
###START_AUTOCODE
/* the code below is auto generated do not remove the above tag */
......@@ -57,6 +57,7 @@ class Status_network extends DB_DataObject
###END_AUTOCODE
static $cache = null;
static $cacheInitialized = false;
static $base = null;
static $wildcard = null;
......@@ -78,11 +79,15 @@ class Status_network extends DB_DataObject
if (class_exists('Memcache')) {
self::$cache = new Memcache();
// Can't close persistent connections, making forking painful.
// If we're a parent command-line process we need
// to be able to close out the connection after
// forking, so disable persistence.
//
// @fixme only do this in *parent* CLI processes.
// single-process and child-processes *should* use persistent.
$persist = php_sapi_name() != 'cli';
// We'll turn it back on again the second time
// through which will either be in a child process,
// or a single-process script which is switching
// configurations.
$persist = php_sapi_name() != 'cli' || self::$cacheInitialized;
if (is_array($servers)) {
foreach($servers as $server) {
self::$cache->addServer($server, 11211, $persist);
......@@ -90,6 +95,7 @@ class Status_network extends DB_DataObject
} else {
self::$cache->addServer($servers, 11211, $persist);
}
self::$cacheInitialized = true;
}
self::$base = $dbname;
......
......@@ -157,6 +157,32 @@ class Cache
return $success;
}
/**
* Atomically increment an existing numeric value.
* Existing expiration time should remain unchanged, if any.
*
* @param string $key The key to use for lookups
* @param int $step Amount to increment (default 1)
*
* @return mixed incremented value, or false if not set.
*/
function increment($key, $step=1)
{
$value = false;
if (Event::handle('StartCacheIncrement', array(&$key, &$step, &$value))) {
// Fallback is not guaranteed to be atomic,
// and may original expiry value.
$value = $this->get($key);
if ($value !== false) {
$value += $step;
$ok = $this->set($key, $value);
$got = $this->get($key);
}
Event::handle('EndCacheIncrement', array($key, $step, $value));
}
return $value;
}
/**
* Delete the value associated with a key
*
......
......@@ -72,7 +72,7 @@ class DBQueueManager extends QueueManager
public function poll()
{
$this->_log(LOG_DEBUG, 'Checking for notices...');
$qi = Queue_item::top($this->getQueues());
$qi = Queue_item::top($this->activeQueues());
if (empty($qi)) {
$this->_log(LOG_DEBUG, 'No notices waiting; idling.');
return false;
......@@ -142,9 +142,4 @@ class DBQueueManager extends QueueManager
$this->stats('error', $queue);
}
protected function _log($level, $msg)
{
common_log($level, 'DBQueueManager: '.$msg);
}
}
......@@ -81,7 +81,7 @@ $default =
'subsystem' => 'db', # default to database, or 'stomp'
'stomp_server' => null,
'queue_basename' => '/queue/statusnet/',
'control_channel' => '/topic/statusnet-control', // broadcasts to all queue daemons
'control_channel' => '/topic/statusnet/control', // broadcasts to all queue daemons
'stomp_username' => null,
'stomp_password' => null,
'stomp_persistent' => true, // keep items across queue server restart, if persistence is enabled
......@@ -91,6 +91,12 @@ $default =
'spawndelay' => 1, // Wait at least N seconds between (re)spawns of child processes to avoid slamming the queue server with subscription startup
'debug_memory' => false, // true to spit memory usage to log
'inboxes' => true, // true to do inbox distribution & output queueing from in background via 'distrib' queue
'breakout' => array('*' => 'shared'), // set global or per-handler queue breakout
// 'shared': use a shared queue for all sites
// 'handler': share each/this handler over multiple sites
// 'site': break out for each/this handler on this site
'max_retries' => 10, // drop messages after N failed attempts to process (Stomp)
'dead_letter_dir' => false, // set to directory to save dropped messages into (Stomp)
),
'license' =>
array('type' => 'cc', # can be 'cc', 'allrightsreserved', 'private'
......
......@@ -59,9 +59,10 @@ abstract class IoManager
* your manager about each site you'll have to handle so you
* can do any necessary per-site setup.
*
* @param string $site target site server name
* The new site will be the currently live configuration during
* this call.
*/
public function addSite($site)
public function addSite()
{
/* no-op */
}
......
......@@ -56,9 +56,9 @@ abstract class IoMaster
$this->multiSite = $multiSite;
}
if ($this->multiSite) {
$this->sites = $this->findAllSites();
$this->sites = StatusNet::findAllSites();
} else {
$this->sites = array(common_config('site', 'server'));
$this->sites = array(StatusNet::currentSite());
}
if (empty($this->sites)) {
......@@ -66,9 +66,7 @@ abstract class IoMaster
}
foreach ($this->sites as $site) {
if ($site != common_config('site', 'server')) {
StatusNet::init($site);
}
StatusNet::switchSite($site);
$this->initManagers();
}
}
......@@ -81,58 +79,32 @@ abstract class IoMaster
*/
abstract function initManagers();
/**
* Pull all local sites from status_network table.
* @return array of hostnames
*/
protected function findAllSites()
{
$hosts = array();
$sn = new Status_network();
$sn->find();
while ($sn->fetch()) {
$hosts[] = $sn->getServerName();
}
return $hosts;
}
/**
* Instantiate an i/o manager class for the current site.
* If a multi-site capable handler is already present,
* we don't need to build a new one.
*
* @param string $class
* @param mixed $manager class name (to run $class::get()) or object
*/
protected function instantiate($class)
protected function instantiate($manager)
{
if (isset($this->singletons[$class])) {
// Already instantiated a multi-site-capable handler.
// Just let it know it should listen to this site too!
$this->singletons[$class]->addSite(common_config('site', 'server'));
return;
if (is_string($manager)) {
$manager = call_user_func(array($class, 'get'));
}
$manager = $this->getManager($class);
if ($this->multiSite) {
$caps = $manager->multiSite();
if ($caps == IoManager::SINGLE_ONLY) {
$caps = $manager->multiSite();
if ($caps == IoManager::SINGLE_ONLY) {
if ($this->multiSite) {
throw new Exception("$class can't run with --all; aborting.");
}
if ($caps == IoManager::INSTANCE_PER_PROCESS) {
// Save this guy for later!
// We'll only need the one to cover multiple sites.
$this->singletons[$class] = $manager;
$manager->addSite(common_config('site', 'server'));
}
} else if ($caps == IoManager::INSTANCE_PER_PROCESS) {
$manager->addSite();
}
$this->managers[] = $manager;
}
protected function getManager($class)
{
return call_user_func(array($class, 'get'));
if (!in_array($manager, $this->managers, true)) {
// Only need to save singletons once
$this->managers[] = $manager;
}
}
/**
......@@ -146,6 +118,7 @@ abstract class IoMaster
{
$this->logState('init');
$this->start();
$this->checkMemory(false);
while (!$this->shutdown) {
$timeouts = array_values($this->pollTimeouts);
......@@ -209,17 +182,24 @@ abstract class IoMaster
/**
* Check runtime memory usage, possibly triggering a graceful shutdown
* and thread respawn if we've crossed the soft limit.
*
* @param boolean $respawn if false we'll shut down instead of respawning
*/
protected function checkMemory()
protected function checkMemory($respawn=true)
{
$memoryLimit = $this->softMemoryLimit();
if ($memoryLimit > 0) {
$usage = memory_get_usage();
if ($usage > $memoryLimit) {
common_log(LOG_INFO, "Queue thread hit soft memory limit ($usage > $memoryLimit); gracefully restarting.");
$this->requestRestart();
if ($respawn) {
$this->requestRestart();
} else {
$this->requestShutdown();
}
} else if (common_config('queue', 'debug_memory')) {
common_log(LOG_DEBUG, "Memory usage $usage");
$fmt = number_format($usage);
common_log(LOG_DEBUG, "Memory usage $fmt");
}
}
}
......
......@@ -63,7 +63,7 @@ class Queued_XMPP extends XMPPHP_XMPP
*/
public function send($msg, $timeout=NULL)
{
$qm = QueueManager::get();
$qm = QueueManager::get('xmppout');
$qm->enqueue(strval($msg), 'xmppout');
}
......
......@@ -39,9 +39,10 @@ abstract class QueueManager extends IoManager
{
static $qm = null;
public $master = null;
public $handlers = array();
public $groups = array();
protected $master = null;
protected $handlers = array();
protected $groups = array();
protected $activeGroups = array();
/**
* Factory function to pull the appropriate QueueManager object
......@@ -215,55 +216,64 @@ abstract class QueueManager extends IoManager
if (class_exists($class)) {
return new $class();
} else {
common_log(LOG_ERR, "Nonexistent handler class '$class' for queue '$queue'");
$this->_log(LOG_ERR, "Nonexistent handler class '$class' for queue '$queue'");
}
} else {
common_log(LOG_ERR, "Requested handler for unkown queue '$queue'");
$this->_log(LOG_ERR, "Requested handler for unkown queue '$queue'");
}
return null;
}
/**
* Get a list of registered queue transport names to be used
* for this daemon.
* for listening in this daemon.
*
* @return array of strings
*/
function getQueues()
function activeQueues()
{
$group = $this->activeGroup();
return array_keys($this->groups[$group]);
$queues = array();
foreach ($this->activeGroups as $group) {
if (isset($this->groups[$group])) {
$queues = array_merge($queues, $this->groups[$group]);
}
}
return array_keys($queues);
}
/**
* Initialize the list of queue handlers
* Initialize the list of queue handlers for the current site.
*
* @event StartInitializeQueueManager
* @event EndInitializeQueueManager
*/
function initialize()
{
// @fixme we'll want to be able to listen to particular queues...
$this->handlers = array();
$this->groups = array();
$this->groupsByTransport =