Commit d17dd32a authored by Evan Prodromou's avatar Evan Prodromou

Merge branch 'sitemap' into newmaster

parents 4bbb259b ef51cc9a
......@@ -232,7 +232,8 @@ class ApiDirectMessageAction extends ApiAuthAction
function showXmlDirectMessages()
{
$this->initDocument('xml');
$this->elementStart('direct-messages', array('type' => 'array'));
$this->elementStart('direct-messages', array('type' => 'array',
'xmlns:statusnet' => 'http://status.net/schema/api/1/'));
foreach ($this->messages as $m) {
$dm_array = $this->directMessageArray($m);
......
......@@ -113,7 +113,7 @@ class ApiUserShowAction extends ApiPrivateAuthAction
if ($this->format == 'xml') {
$this->initDocument('xml');
$this->showTwitterXmlUser($twitter_user);
$this->showTwitterXmlUser($twitter_user, 'user', true);
$this->endDocument('xml');
} elseif ($this->format == 'json') {
$this->initDocument('json');
......
......@@ -232,6 +232,10 @@ class ApiAction extends Action
}
}
// StatusNet-specific
$twitter_user['statusnet:profile_url'] = $profile->profileurl;
return $twitter_user;
}
......@@ -333,6 +337,10 @@ class ApiAction extends Action
$twitter_status['user'] = $twitter_user;
}
// StatusNet-specific
$twitter_status['statusnet:html'] = $notice->rendered;
return $twitter_status;
}
......@@ -500,9 +508,13 @@ class ApiAction extends Action
}
}
function showTwitterXmlStatus($twitter_status, $tag='status')
function showTwitterXmlStatus($twitter_status, $tag='status', $namespaces=false)
{
$this->elementStart($tag);
$attrs = array();
if ($namespaces) {
$attrs['xmlns:statusnet'] = 'http://status.net/schema/api/1/';
}
$this->elementStart($tag, $attrs);
foreach($twitter_status as $element => $value) {
switch ($element) {
case 'user':
......@@ -536,9 +548,13 @@ class ApiAction extends Action
$this->elementEnd('group');
}
function showTwitterXmlUser($twitter_user, $role='user')
function showTwitterXmlUser($twitter_user, $role='user', $namespaces=false)
{
$this->elementStart($role);
$attrs = array();
if ($namespaces) {
$attrs['xmlns:statusnet'] = 'http://status.net/schema/api/1/';
}
$this->elementStart($role, $attrs);
foreach($twitter_user as $element => $value) {
if ($element == 'status') {
$this->showTwitterXmlStatus($twitter_user['status']);
......@@ -620,7 +636,7 @@ class ApiAction extends Action
{
$this->initDocument('xml');
$twitter_status = $this->twitterStatusArray($notice);
$this->showTwitterXmlStatus($twitter_status);
$this->showTwitterXmlStatus($twitter_status, 'status', true);
$this->endDocument('xml');
}
......@@ -636,7 +652,8 @@ class ApiAction extends Action
{
$this->initDocument('xml');
$this->elementStart('statuses', array('type' => 'array'));
$this->elementStart('statuses', array('type' => 'array',
'xmlns:statusnet' => 'http://status.net/schema/api/1/'));
if (is_array($notice)) {
foreach ($notice as $n) {
......@@ -803,9 +820,13 @@ class ApiAction extends Action
$this->elementEnd('entry');
}
function showXmlDirectMessage($dm)
function showXmlDirectMessage($dm, $namespaces=false)
{
$this->elementStart('direct_message');
$attrs = array();
if ($namespaces) {
$attrs['xmlns:statusnet'] = 'http://status.net/schema/api/1/';
}
$this->elementStart('direct_message', $attrs);
foreach($dm as $element => $value) {
switch ($element) {
case 'sender':
......@@ -882,7 +903,7 @@ class ApiAction extends Action
{
$this->initDocument('xml');
$dmsg = $this->directMessageArray($message);
$this->showXmlDirectMessage($dmsg);
$this->showXmlDirectMessage($dmsg, true);
$this->endDocument('xml');
}
......@@ -999,7 +1020,8 @@ class ApiAction extends Action
{
$this->initDocument('xml');
$this->elementStart('users', array('type' => 'array'));
$this->elementStart('users', array('type' => 'array',
'xmlns:statusnet' => 'http://status.net/schema/api/1/'));
if (is_array($user)) {
foreach ($user as $u) {
......
<?php
/**
* StatusNet - the distributed open-source microblogging tool
* Copyright (C) 2010, StatusNet, Inc.
*
* Creates a dynamic sitemap for a StatusNet site
*
* PHP version 5
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* @category Sample
* @package StatusNet
* @author Evan Prodromou <evan@status.net>
* @copyright 2010 StatusNet, Inc.
* @license http://www.fsf.org/licensing/licenses/agpl-3.0.html AGPL 3.0
* @link http://status.net/
*/
if (!defined('STATUSNET')) {
// This check helps protect against security problems;
// your code file can't be executed directly from the web.
exit(1);
}
/**
* Sitemap plugin
*
* @category Sample
* @package StatusNet
* @author Evan Prodromou <evan@status.net>
* @copyright 2010 StatusNet, Inc.
* @license http://www.fsf.org/licensing/licenses/agpl-3.0.html AGPL 3.0
* @link http://status.net/
*/
class SitemapPlugin extends Plugin
{
const USERS_PER_MAP = 50000;
const NOTICES_PER_MAP = 50000;
/**
* Load related modules when needed
*
* @param string $cls Name of the class to be loaded
*
* @return boolean hook value; true means continue processing, false means stop.
*/
function onAutoload($cls)
{
$dir = dirname(__FILE__);
switch ($cls)
{
case 'Sitemap_user_count':
case 'Sitemap_notice_count':
require_once $dir . '/' . $cls . '.php';
return false;
case 'SitemapindexAction':
case 'NoticesitemapAction':
case 'UsersitemapAction':
require_once $dir . '/' . strtolower(mb_substr($cls, 0, -6)) . '.php';
return false;
case 'SitemapAction':
require_once $dir . '/' . strtolower($cls) . '.php';
return false;
default:
return true;
}
}
/**
* Add sitemap-related information at the end of robots.txt
*
* @param Action $action Action being run
*
* @return boolean hook value.
*/
function onEndRobotsTxt($action)
{
$url = common_local_url('sitemapindex');
print "\nSitemap: $url\n";
return true;
}
/**
* Map URLs to actions
*
* @param Net_URL_Mapper $m path-to-action mapper
*
* @return boolean hook value; true means continue processing, false means stop.
*/
function onRouterInitialized($m)
{
$m->connect('sitemapindex.xml',
array('action' => 'sitemapindex'));
$m->connect('/notice-sitemap-:year-:month-:day-:index.xml',
array('action' => 'noticesitemap'),
array('year' => '[0-9]{4}',
'month' => '[01][0-9]',
'day' => '[0123][0-9]',
'index' => '[1-9][0-9]*'));
$m->connect('/user-sitemap-:year-:month-:day-:index.xml',
array('action' => 'usersitemap'),
array('year' => '[0-9]{4}',
'month' => '[01][0-9]',
'day' => '[0123][0-9]',
'index' => '[1-9][0-9]*'));
return true;
}
/**
* Database schema setup
*
* We cache some data persistently to avoid overlong queries.
*
* @see Sitemap_user_count
* @see Sitemap_notice_count
*
* @return boolean hook value; true means continue processing, false means stop.
*/
function onCheckSchema()
{
$schema = Schema::get();
$schema->ensureTable('sitemap_user_count',
array(new ColumnDef('registration_date', 'date', null,
true, 'PRI'),
new ColumnDef('user_count', 'integer'),
new ColumnDef('created', 'datetime',
null, false),
new ColumnDef('modified', 'timestamp')));
$schema->ensureTable('sitemap_notice_count',
array(new ColumnDef('notice_date', 'date', null,
true, 'PRI'),
new ColumnDef('notice_count', 'integer'),
new ColumnDef('created', 'datetime',
null, false),
new ColumnDef('modified', 'timestamp')));
return true;
}
}
<?php
/**
* Data class for counting notice postings by date
*
* PHP version 5
*
* @category Data
* @package StatusNet
* @author Evan Prodromou <evan@status.net>
* @license http://www.fsf.org/licensing/licenses/agpl.html AGPLv3
* @link http://status.net/
*
* StatusNet - the distributed open-source microblogging tool
* Copyright (C) 2010, StatusNet, Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
if (!defined('STATUSNET')) {
exit(1);
}
require_once INSTALLDIR . '/classes/Memcached_DataObject.php';
/**
* Data class for counting notices by date
*
* We make a separate sitemap for each notice posted by date.
* To save ourselves some (not inconsiderable) processing effort,
* we cache this data in the sitemap_notice_count table. Each
* row represents a day since the site has been started, with a count
* of notices posted on that day. Since, after the end of the day,
* this number doesn't change, it's a good candidate for persistent caching.
*
* @category Data
* @package StatusNet
* @author Evan Prodromou <evan@status.net>
* @license http://www.fsf.org/licensing/licenses/agpl.html AGPLv3
* @link http://status.net/
*
* @see DB_DataObject
*/
class Sitemap_notice_count extends Memcached_DataObject
{
public $__table = 'sitemap_notice_count'; // table name
public $notice_date; // date primary_key not_null
public $notice_count; // int(4)
public $created;
public $modified;
/**
* Get an instance by key
*
* This is a utility method to get a single instance with a given key value.
*
* @param string $k Key to use to lookup (usually 'notice_id' for this class)
* @param mixed $v Value to lookup
*
* @return Sitemap_notice_count object found, or null for no hits
*
*/
function staticGet($k, $v=null)
{
return Memcached_DataObject::staticGet('Sitemap_notice_count', $k, $v);
}
/**
* return table definition for DB_DataObject
*
* DB_DataObject needs to know something about the table to manipulate
* instances. This method provides all the DB_DataObject needs to know.
*
* @return array array of column definitions
*/
function table()
{
return array('notice_date' => DB_DATAOBJECT_DATE + DB_DATAOBJECT_NOTNULL,
'notice_count' => DB_DATAOBJECT_INT,
'created' => DB_DATAOBJECT_STR + DB_DATAOBJECT_DATE + DB_DATAOBJECT_TIME + DB_DATAOBJECT_NOTNULL,
'modified' => DB_DATAOBJECT_STR + DB_DATAOBJECT_DATE + DB_DATAOBJECT_TIME + DB_DATAOBJECT_NOTNULL);
}
/**
* return key definitions for DB_DataObject
*
* DB_DataObject needs to know about keys that the table has; this function
* defines them.
*
* @return array key definitions
*/
function keys()
{
return array('notice_date' => 'K');
}
/**
* return key definitions for Memcached_DataObject
*
* Our caching system uses the same key definitions, but uses a different
* method to get them.
*
* @return array key definitions
*/
function keyTypes()
{
return $this->keys();
}
static function getAll()
{
$noticeCounts = self::cacheGet('sitemap:notice:counts');
if ($noticeCounts === false) {
$snc = new Sitemap_notice_count();
$snc->orderBy('notice_date DESC');
// Fetch the first one to check up-to-date-itude
$n = $snc->find(true);
$today = self::today();
$noticeCounts = array();
if (!$n) { // No counts saved yet
$noticeCounts = self::initializeCounts();
} else if ($snc->notice_date < $today) { // There are counts but not up to today
$noticeCounts = self::fillInCounts($snc->notice_date);
} else if ($snc->notice_date == $today) { // Refresh today's
$noticeCounts[$today] = self::updateToday();
}
// starts with second-to-last date
while ($snc->fetch()) {
$noticeCounts[$snc->notice_date] = $snc->notice_count;
}
self::cacheSet('sitemap:notice:counts', $noticeCounts);
}
return $noticeCounts;
}
static function initializeCounts()
{
$firstDate = self::getFirstDate(); // awww
$today = self::today();
$counts = array();
for ($d = $firstDate; $d <= $today; $d = self::incrementDay($d)) {
$n = self::getCount($d);
self::insertCount($d, $n);
$counts[$d] = $n;
}
return $counts;
}
static function fillInCounts($lastDate)
{
$today = self::today();
$counts = array();
$n = self::getCount($lastDate);
self::updateCount($lastDate, $n);
$counts[$lastDate] = $n;
for ($d = self::incrementDay($lastDate); $d <= $today; $d = self::incrementDay($d)) {
$n = self::getCount($d);
self::insertCount($d, $n);
}
return $counts;
}
static function updateToday()
{
$today = self::today();
$n = self::getCount($today);
self::updateCount($today, $n);
return $n;
}
static function getCount($d)
{
$notice = new Notice();
$notice->whereAdd('created BETWEEN "'.$d.' 00:00:00" AND "'.self::incrementDay($d).' 00:00:00"');
$n = $notice->count();
return $n;
}
static function insertCount($d, $n)
{
$snc = new Sitemap_notice_count();
$snc->notice_date = DB_DataObject_Cast::date($d);
$snc->notice_count = $n;
$snc->created = common_sql_now();
$snc->modified = $snc->created;
if (!$snc->insert()) {
common_log(LOG_WARNING, "Could not save user counts for '$d'");
}
}
static function updateCount($d, $n)
{
$snc = Sitemap_notice_count::staticGet('notice_date', DB_DataObject_Cast::date($d));
if (empty($snc)) {
throw new Exception("No such registration date: $d");
}
$orig = clone($snc);
$snc->notice_date = DB_DataObject_Cast::date($d);
$snc->notice_count = $n;
$snc->created = common_sql_now();
$snc->modified = $snc->created;
if (!$snc->update($orig)) {
common_log(LOG_WARNING, "Could not save user counts for '$d'");
}
}
static function incrementDay($d)
{
$dt = self::dateStrToInt($d);
return self::dateIntToStr($dt + 24 * 60 * 60);
}
static function dateStrToInt($d)
{
return strtotime($d.' 00:00:00');
}
static function dateIntToStr($dt)
{
return date('Y-m-d', $dt);
}
static function getFirstDate()
{
$n = new Notice();
$n->selectAdd();
$n->selectAdd('date(min(created)) as first_date');
if ($n->find(true)) {
return $n->first_date;
} else {
// Is this right?
return self::dateIntToStr(time());
}
}
static function today()
{
return self::dateIntToStr(time());
}
}
<?php
/**
* Data class for counting user registrations by date
*
* PHP version 5
*
* @category Data
* @package StatusNet
* @author Evan Prodromou <evan@status.net>
* @license http://www.fsf.org/licensing/licenses/agpl.html AGPLv3
* @link http://status.net/
*
* StatusNet - the distributed open-source microblogging tool
* Copyright (C) 2010, StatusNet, Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
if (!defined('STATUSNET')) {
exit(1);
}
require_once INSTALLDIR . '/classes/Memcached_DataObject.php';
/**
* Data class for counting users by date
*
* We make a separate sitemap for each user registered by date.
* To save ourselves some processing effort, we cache this data
*
* @category Action
* @package StatusNet
* @author Evan Prodromou <evan@status.net>
* @license http://www.fsf.org/licensing/licenses/agpl.html AGPLv3
* @link http://status.net/
*
* @see DB_DataObject
*/
class Sitemap_user_count extends Memcached_DataObject
{
public $__table = 'sitemap_user_count'; // table name
public $registration_date; // date primary_key not_null
public $user_count; // int(4)
public $created;
public $modified;
/**
* Get an instance by key
*
* This is a utility method to get a single instance with a given key value.
*
* @param string $k Key to use to lookup (usually 'user_id' for this class)
* @param mixed $v Value to lookup
*
* @return Sitemap_user_count object found, or null for no hits
*
*/
function staticGet($k, $v=null)
{
return Memcached_DataObject::staticGet('Sitemap_user_count', $k, $v);
}
/**
* return table definition for DB_DataObject
*
* DB_DataObject needs to know something about the table to manipulate
* instances. This method provides all the DB_DataObject needs to know.
*
* @return array array of column definitions
*/
function table()
{
return array('registration_date' => DB_DATAOBJECT_DATE + DB_DATAOBJECT_NOTNULL,
'user_count' => DB_DATAOBJECT_INT,
'created' => DB_DATAOBJECT_STR + DB_DATAOBJECT_DATE + DB_DATAOBJECT_TIME + DB_DATAOBJECT_NOTNULL,
'modified' => DB_DATAOBJECT_STR + DB_DATAOBJECT_DATE + DB_DATAOBJECT_TIME + DB_DATAOBJECT_NOTNULL);
}
/**
* return key definitions for DB_DataObject
*
* DB_DataObject needs to know about keys that the table has; this function
* defines them.
*
* @return array key definitions
*/
function keys()
{
return array('registration_date' => 'K');
}
function sequenceKey()
{
return array(false, false, false);
}
/**
* return key definitions for Memcached_DataObject
*
* Our caching system uses the same key definitions, but uses a different
* method to get them.
*
* @return array key definitions
*/
function keyTypes()
{
return $this->keys();
}
static function getAll()
{
$userCounts = self::cacheGet('sitemap:user:counts');
if ($userCounts === false) {
$suc = new Sitemap_user_count();
$suc->orderBy('registration_date DESC');
// Fetch the first one to check up-to-date-itude
$n = $suc->find(true);
$today = self::today();
$userCounts = array();
if (!$n) { // No counts saved yet
$userCounts = self::initializeCounts();
} else if ($suc->registration_date < $today) { // There are counts but not up to today
$userCounts = self::fillInCounts($suc->registration_date);
} else if ($suc->registration_date == $today) { // Refresh today's
$userCounts[$today] = self::updateToday();