Commit 22d6ff25 authored by Evan Prodromou's avatar Evan Prodromou

Merge branch 'newmaster'

parents 135b398d 1100831b
<?php
/**
* StatusNet - the distributed open-source microblogging tool
* Copyright (C) 2010, StatusNet, Inc.
*
* Creates a dynamic sitemap for a StatusNet site
*
* PHP version 5
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* @category Sample
* @package StatusNet
* @author Evan Prodromou <evan@status.net>
* @copyright 2010 StatusNet, Inc.
* @license http://www.fsf.org/licensing/licenses/agpl-3.0.html AGPL 3.0
* @link http://status.net/
*/
if (!defined('STATUSNET')) {
// This check helps protect against security problems;
// your code file can't be executed directly from the web.
exit(1);
}
/**
* Sitemap plugin
*
* @category Sample
* @package StatusNet
* @author Evan Prodromou <evan@status.net>
* @copyright 2010 StatusNet, Inc.
* @license http://www.fsf.org/licensing/licenses/agpl-3.0.html AGPL 3.0
* @link http://status.net/
*/
class SitemapPlugin extends Plugin
{
const USERS_PER_MAP = 50000;
const NOTICES_PER_MAP = 50000;
/**
* Load related modules when needed
*
* @param string $cls Name of the class to be loaded
*
* @return boolean hook value; true means continue processing, false means stop.
*/
function onAutoload($cls)
{
$dir = dirname(__FILE__);
switch ($cls)
{
case 'Sitemap_user_count':
case 'Sitemap_notice_count':
require_once $dir . '/' . $cls . '.php';
return false;
case 'SitemapindexAction':
case 'NoticesitemapAction':
case 'UsersitemapAction':
require_once $dir . '/' . strtolower(mb_substr($cls, 0, -6)) . '.php';
return false;
case 'SitemapAction':
require_once $dir . '/' . strtolower($cls) . '.php';
return false;
default:
return true;
}
}
/**
* Add sitemap-related information at the end of robots.txt
*
* @param Action $action Action being run
*
* @return boolean hook value.
*/
function onEndRobotsTxt($action)
{
$url = common_local_url('sitemapindex');
print "\nSitemap: $url\n";
return true;
}
/**
* Map URLs to actions
*
* @param Net_URL_Mapper $m path-to-action mapper
*
* @return boolean hook value; true means continue processing, false means stop.
*/
function onRouterInitialized($m)
{
$m->connect('sitemapindex.xml',
array('action' => 'sitemapindex'));
$m->connect('/notice-sitemap-:year-:month-:day-:index.xml',
array('action' => 'noticesitemap'),
array('year' => '[0-9]{4}',
'month' => '[01][0-9]',
'day' => '[0123][0-9]',
'index' => '[1-9][0-9]*'));
$m->connect('/user-sitemap-:year-:month-:day-:index.xml',
array('action' => 'usersitemap'),
array('year' => '[0-9]{4}',
'month' => '[01][0-9]',
'day' => '[0123][0-9]',
'index' => '[1-9][0-9]*'));
return true;
}
/**
* Database schema setup
*
* We cache some data persistently to avoid overlong queries.
*
* @see Sitemap_user_count
* @see Sitemap_notice_count
*
* @return boolean hook value; true means continue processing, false means stop.
*/
function onCheckSchema()
{
$schema = Schema::get();
$schema->ensureTable('sitemap_user_count',
array(new ColumnDef('registration_date', 'date', null,
true, 'PRI'),
new ColumnDef('user_count', 'integer'),
new ColumnDef('created', 'datetime',
null, false),
new ColumnDef('modified', 'timestamp')));
$schema->ensureTable('sitemap_notice_count',
array(new ColumnDef('notice_date', 'date', null,
true, 'PRI'),
new ColumnDef('notice_count', 'integer'),
new ColumnDef('created', 'datetime',
null, false),
new ColumnDef('modified', 'timestamp')));
return true;
}
}
<?php
/**
* Data class for counting notice postings by date
*
* PHP version 5
*
* @category Data
* @package StatusNet
* @author Evan Prodromou <evan@status.net>
* @license http://www.fsf.org/licensing/licenses/agpl.html AGPLv3
* @link http://status.net/
*
* StatusNet - the distributed open-source microblogging tool
* Copyright (C) 2010, StatusNet, Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
if (!defined('STATUSNET')) {
exit(1);
}
require_once INSTALLDIR . '/classes/Memcached_DataObject.php';
/**
* Data class for counting notices by date
*
* We make a separate sitemap for each notice posted by date.
* To save ourselves some (not inconsiderable) processing effort,
* we cache this data in the sitemap_notice_count table. Each
* row represents a day since the site has been started, with a count
* of notices posted on that day. Since, after the end of the day,
* this number doesn't change, it's a good candidate for persistent caching.
*
* @category Data
* @package StatusNet
* @author Evan Prodromou <evan@status.net>
* @license http://www.fsf.org/licensing/licenses/agpl.html AGPLv3
* @link http://status.net/
*
* @see DB_DataObject
*/
class Sitemap_notice_count extends Memcached_DataObject
{
public $__table = 'sitemap_notice_count'; // table name
public $notice_date; // date primary_key not_null
public $notice_count; // int(4)
public $created;
public $modified;
/**
* Get an instance by key
*
* This is a utility method to get a single instance with a given key value.
*
* @param string $k Key to use to lookup (usually 'notice_id' for this class)
* @param mixed $v Value to lookup
*
* @return Sitemap_notice_count object found, or null for no hits
*
*/
function staticGet($k, $v=null)
{
return Memcached_DataObject::staticGet('Sitemap_notice_count', $k, $v);
}
/**
* return table definition for DB_DataObject
*
* DB_DataObject needs to know something about the table to manipulate
* instances. This method provides all the DB_DataObject needs to know.
*
* @return array array of column definitions
*/
function table()
{
return array('notice_date' => DB_DATAOBJECT_DATE + DB_DATAOBJECT_NOTNULL,
'notice_count' => DB_DATAOBJECT_INT,
'created' => DB_DATAOBJECT_STR + DB_DATAOBJECT_DATE + DB_DATAOBJECT_TIME + DB_DATAOBJECT_NOTNULL,
'modified' => DB_DATAOBJECT_STR + DB_DATAOBJECT_DATE + DB_DATAOBJECT_TIME + DB_DATAOBJECT_NOTNULL);
}
/**
* return key definitions for DB_DataObject
*
* DB_DataObject needs to know about keys that the table has; this function
* defines them.
*
* @return array key definitions
*/
function keys()
{
return array('notice_date' => 'K');
}
/**
* return key definitions for Memcached_DataObject
*
* Our caching system uses the same key definitions, but uses a different
* method to get them.
*
* @return array key definitions
*/
function keyTypes()
{
return $this->keys();
}
static function getAll()
{
$noticeCounts = self::cacheGet('sitemap:notice:counts');
if ($noticeCounts === false) {
$snc = new Sitemap_notice_count();
$snc->orderBy('notice_date DESC');
// Fetch the first one to check up-to-date-itude
$n = $snc->find(true);
$today = self::today();
$noticeCounts = array();
if (!$n) { // No counts saved yet
$noticeCounts = self::initializeCounts();
} else if ($snc->notice_date < $today) { // There are counts but not up to today
$noticeCounts = self::fillInCounts($snc->notice_date);
} else if ($snc->notice_date == $today) { // Refresh today's
$noticeCounts[$today] = self::updateToday();
}
// starts with second-to-last date
while ($snc->fetch()) {
$noticeCounts[$snc->notice_date] = $snc->notice_count;
}
self::cacheSet('sitemap:notice:counts', $noticeCounts);
}
return $noticeCounts;
}
static function initializeCounts()
{
$firstDate = self::getFirstDate(); // awww
$today = self::today();
$counts = array();
for ($d = $firstDate; $d <= $today; $d = self::incrementDay($d)) {
$n = self::getCount($d);
self::insertCount($d, $n);
$counts[$d] = $n;
}
return $counts;
}
static function fillInCounts($lastDate)
{
$today = self::today();
$counts = array();
$n = self::getCount($lastDate);
self::updateCount($lastDate, $n);
$counts[$lastDate] = $n;
for ($d = self::incrementDay($lastDate); $d <= $today; $d = self::incrementDay($d)) {
$n = self::getCount($d);
self::insertCount($d, $n);
}
return $counts;
}
static function updateToday()
{
$today = self::today();
$n = self::getCount($today);
self::updateCount($today, $n);
return $n;
}
static function getCount($d)
{
$notice = new Notice();
$notice->whereAdd('created BETWEEN "'.$d.' 00:00:00" AND "'.self::incrementDay($d).' 00:00:00"');
$n = $notice->count();
return $n;
}
static function insertCount($d, $n)
{
$snc = new Sitemap_notice_count();
$snc->notice_date = DB_DataObject_Cast::date($d);
$snc->notice_count = $n;
$snc->created = common_sql_now();
$snc->modified = $snc->created;
if (!$snc->insert()) {
common_log(LOG_WARNING, "Could not save user counts for '$d'");
}
}
static function updateCount($d, $n)
{
$snc = Sitemap_notice_count::staticGet('notice_date', DB_DataObject_Cast::date($d));
if (empty($snc)) {
throw new Exception("No such registration date: $d");
}
$orig = clone($snc);
$snc->notice_date = DB_DataObject_Cast::date($d);
$snc->notice_count = $n;
$snc->created = common_sql_now();
$snc->modified = $snc->created;
if (!$snc->update($orig)) {
common_log(LOG_WARNING, "Could not save user counts for '$d'");
}
}
static function incrementDay($d)
{
$dt = self::dateStrToInt($d);
return self::dateIntToStr($dt + 24 * 60 * 60);
}
static function dateStrToInt($d)
{
return strtotime($d.' 00:00:00');
}
static function dateIntToStr($dt)
{
return date('Y-m-d', $dt);
}
static function getFirstDate()
{
$n = new Notice();
$n->selectAdd();
$n->selectAdd('date(min(created)) as first_date');
if ($n->find(true)) {
return $n->first_date;
} else {
// Is this right?
return self::dateIntToStr(time());
}
}
static function today()
{
return self::dateIntToStr(time());
}
}
<?php
/**
* Data class for counting user registrations by date
*
* PHP version 5
*
* @category Data
* @package StatusNet
* @author Evan Prodromou <evan@status.net>
* @license http://www.fsf.org/licensing/licenses/agpl.html AGPLv3
* @link http://status.net/
*
* StatusNet - the distributed open-source microblogging tool
* Copyright (C) 2010, StatusNet, Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
if (!defined('STATUSNET')) {
exit(1);
}
require_once INSTALLDIR . '/classes/Memcached_DataObject.php';
/**
* Data class for counting users by date
*
* We make a separate sitemap for each user registered by date.
* To save ourselves some processing effort, we cache this data
*
* @category Action
* @package StatusNet
* @author Evan Prodromou <evan@status.net>
* @license http://www.fsf.org/licensing/licenses/agpl.html AGPLv3
* @link http://status.net/
*
* @see DB_DataObject
*/
class Sitemap_user_count extends Memcached_DataObject
{
public $__table = 'sitemap_user_count'; // table name
public $registration_date; // date primary_key not_null
public $user_count; // int(4)
public $created;
public $modified;
/**
* Get an instance by key
*
* This is a utility method to get a single instance with a given key value.
*
* @param string $k Key to use to lookup (usually 'user_id' for this class)
* @param mixed $v Value to lookup
*
* @return Sitemap_user_count object found, or null for no hits
*
*/
function staticGet($k, $v=null)
{
return Memcached_DataObject::staticGet('Sitemap_user_count', $k, $v);
}
/**
* return table definition for DB_DataObject
*
* DB_DataObject needs to know something about the table to manipulate
* instances. This method provides all the DB_DataObject needs to know.
*
* @return array array of column definitions
*/
function table()
{
return array('registration_date' => DB_DATAOBJECT_DATE + DB_DATAOBJECT_NOTNULL,
'user_count' => DB_DATAOBJECT_INT,
'created' => DB_DATAOBJECT_STR + DB_DATAOBJECT_DATE + DB_DATAOBJECT_TIME + DB_DATAOBJECT_NOTNULL,
'modified' => DB_DATAOBJECT_STR + DB_DATAOBJECT_DATE + DB_DATAOBJECT_TIME + DB_DATAOBJECT_NOTNULL);
}
/**
* return key definitions for DB_DataObject
*
* DB_DataObject needs to know about keys that the table has; this function
* defines them.
*
* @return array key definitions
*/
function keys()
{
return array('registration_date' => 'K');
}
function sequenceKey()
{
return array(false, false, false);
}
/**
* return key definitions for Memcached_DataObject
*
* Our caching system uses the same key definitions, but uses a different
* method to get them.
*
* @return array key definitions
*/
function keyTypes()
{
return $this->keys();
}
static function getAll()
{
$userCounts = self::cacheGet('sitemap:user:counts');
if ($userCounts === false) {
$suc = new Sitemap_user_count();
$suc->orderBy('registration_date DESC');
// Fetch the first one to check up-to-date-itude
$n = $suc->find(true);
$today = self::today();
$userCounts = array();
if (!$n) { // No counts saved yet
$userCounts = self::initializeCounts();
} else if ($suc->registration_date < $today) { // There are counts but not up to today
$userCounts = self::fillInCounts($suc->registration_date);
} else if ($suc->registration_date == $today) { // Refresh today's
$userCounts[$today] = self::updateToday();
}
// starts with second-to-last date
while ($suc->fetch()) {
$userCounts[$suc->registration_date] = $suc->user_count;
}
self::cacheSet('sitemap:user:counts', $userCounts);
}
return $userCounts;
}
static function initializeCounts()
{
$firstDate = self::getFirstDate(); // awww
$today = self::today();
$counts = array();
for ($d = $firstDate; $d <= $today; $d = self::incrementDay($d)) {
$n = self::getCount($d);
self::insertCount($d, $n);
$counts[$d] = $n;
}
return $counts;
}
static function fillInCounts($lastDate)
{
$today = self::today();
$counts = array();
$n = self::getCount($lastDate);
self::updateCount($lastDate, $n);
$counts[$lastDate] = $n;
for ($d = self::incrementDay($lastDate); $d <= $today; $d = self::incrementDay($d)) {
$n = self::getCount($d);
self::insertCount($d, $n);
}
return $counts;
}
static function updateToday()
{
$today = self::today();
$n = self::getCount($today);
self::updateCount($today, $n);
return $n;
}
static function getCount($d)
{
$user = new User();
$user->whereAdd('created BETWEEN "'.$d.' 00:00:00" AND "'.self::incrementDay($d).' 00:00:00"');
$n = $user->count();
return $n;
}
static function insertCount($d, $n)
{
$suc = new Sitemap_user_count();
$suc->registration_date = DB_DataObject_Cast::date($d);
$suc->user_count = $n;
$suc->created = common_sql_now();
$suc->modified = $suc->created;
if (!$suc->insert()) {
common_log(LOG_WARNING, "Could not save user counts for '$d'");
}
}
static function updateCount($d, $n)
{
$suc = Sitemap_user_count::staticGet('registration_date', DB_DataObject_Cast::date($d));
if (empty($suc)) {
throw new Exception("No such registration date: $d");
}