Commit 7a9777df authored by Evan Prodromou's avatar Evan Prodromou

Merge ActivitySpam plugin

parents 74e919e8 69ec86a3
<?php
/**
* StatusNet - the distributed open-source microblogging tool
* Copyright (C) 2011,2012, StatusNet, Inc.
*
* ActivitySpam Plugin
*
* PHP version 5
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* @category Spam
* @package StatusNet
* @author Evan Prodromou <evan@status.net>
* @copyright 2011,2012 StatusNet, Inc.
* @license http://www.fsf.org/licensing/licenses/agpl-3.0.html AGPL 3.0
* @link http://status.net/
*/
if (!defined('STATUSNET')) {
// This check helps protect against security problems;
// your code file can't be executed directly from the web.
exit(1);
}
/**
* Check new notices with activity spam service.
*
* @category Spam
* @package StatusNet
* @author Evan Prodromou <evan@status.net>
* @copyright 2011,2012 StatusNet, Inc.
* @license http://www.fsf.org/licensing/licenses/agpl-3.0.html AGPL 3.0
* @link http://status.net/
*/
class ActivitySpamPlugin extends Plugin
{
public $server = null;
public $hideSpam = false;
const REVIEWSPAM = 'ActivitySpamPlugin::REVIEWSPAM';
const TRAINSPAM = 'ActivitySpamPlugin::TRAINSPAM';
/**
* Initializer
*
* @return boolean hook value; true means continue processing, false means stop.
*/
function initialize()
{
$this->filter = new SpamFilter(common_config('activityspam', 'server'),
common_config('activityspam', 'consumerkey'),
common_config('activityspam', 'secret'));
$this->hideSpam = common_config('activityspam', 'hidespam');
return true;
}
/**
* Database schema setup
*
* @see Schema
* @see ColumnDef
*
* @return boolean hook value; true means continue processing, false means stop.
*/
function onCheckSchema()
{
$schema = Schema::get();
$schema->ensureTable('spam_score', Spam_score::schemaDef());
Spam_score::upgrade();
return true;
}
/**
* Load related modules when needed
*
* @param string $cls Name of the class to be loaded
*
* @return boolean hook value; true means continue processing, false means stop.
*/
function onAutoload($cls)
{
$dir = dirname(__FILE__);
switch ($cls)
{
case 'TrainAction':
case 'SpamAction':
include_once $dir . '/' . strtolower(mb_substr($cls, 0, -6)) . '.php';
return false;
case 'Spam_score':
include_once $dir . '/'.$cls.'.php';
return false;
case 'SpamFilter':
case 'SpamNoticeStream':
case 'TrainSpamForm':
case 'TrainHamForm':
include_once $dir . '/'.strtolower($cls).'.php';
return false;
default:
return true;
}
}
/**
* When a notice is saved, check its spam score
*
* @param Notice $notice Notice that was just saved
*
* @return boolean hook value; true means continue processing, false means stop.
*/
function onEndNoticeSave($notice)
{
try {
$result = $this->filter->test($notice);
$score = Spam_score::saveNew($notice, $result);
$this->log(LOG_INFO, "Notice " . $notice->id . " has spam score " . $score->score);
} catch (Exception $e) {
// Log but continue
$this->log(LOG_ERR, $e->getMessage());
}
return true;
}
function onNoticeDeleteRelated($notice) {
$score = Spam_score::staticGet('notice_id', $notice->id);
if (!empty($score)) {
$score->delete();
}
return true;
}
function onUserRightsCheck($profile, $right, &$result) {
switch ($right) {
case self::REVIEWSPAM:
case self::TRAINSPAM:
$result = ($profile->hasRole(Profile_role::MODERATOR) || $profile->hasRole('modhelper'));
return false;
default:
return true;
}
}
function onGetSpamFilter(&$filter) {
$filter = $this->filter;
return false;
}
function onEndShowNoticeOptionItems($nli)
{
$profile = Profile::current();
if (!empty($profile) && $profile->hasRight(self::TRAINSPAM)) {
$notice = $nli->getNotice();
$out = $nli->getOut();
if (!empty($notice)) {
$score = $this->getScore($notice);
if (empty($score)) {
$this->debug("No score for notice " . $notice->id);
// XXX: show a question-mark or something
} else if ($score->is_spam) {
$form = new TrainHamForm($out, $notice);
$form->show();
} else if (!$score->is_spam) {
$form = new TrainSpamForm($out, $notice);
$form->show();
}
}
}
return true;
}
/**
* Map URLs to actions
*
* @param Net_URL_Mapper $m path-to-action mapper
*
* @return boolean hook value; true means continue processing, false means stop.
*/
function onRouterInitialized($m)
{
$m->connect('main/train/spam',
array('action' => 'train', 'category' => 'spam'));
$m->connect('main/train/ham',
array('action' => 'train', 'category' => 'ham'));
$m->connect('main/spam',
array('action' => 'spam'));
return true;
}
function onEndShowStyles($action)
{
$action->element('style', null,
'.form-train-spam input.submit { background: url('.$this->path('icons/bullet_black.png').') no-repeat 0px 0px } ' . "\n" .
'.form-train-ham input.submit { background: url('.$this->path('icons/exclamation.png').') no-repeat 0px 0px } ');
return true;
}
function onEndPublicGroupNav($nav)
{
$user = common_current_user();
if (!empty($user) && $user->hasRight(self::REVIEWSPAM)) {
$nav->out->menuItem(common_local_url('spam'),
_m('MENU','Spam'),
// TRANS: Menu item title in search group navigation panel.
_('Notices marked as spam'),
$nav->actionName == 'spam',
'nav_timeline_spam');
}
return true;
}
function onPluginVersion(&$versions)
{
$versions[] = array('name' => 'ActivitySpam',
'version' => STATUSNET_VERSION,
'author' => 'Evan Prodromou',
'homepage' => 'http://status.net/wiki/Plugin:ActivitySpam',
'description' =>
_m('Test notices against the Activity Spam service.'));
return true;
}
function getScore($notice)
{
$score = Spam_score::staticGet('notice_id', $notice->id);
if (!empty($score)) {
return $score;
}
try {
$result = $this->filter->test($notice);
$score = Spam_score::saveNew($notice, $result);
$this->log(LOG_INFO, "Notice " . $notice->id . " has spam score " . $score->score);
} catch (Exception $e) {
// Log but continue
$this->log(LOG_ERR, $e->getMessage());
$score = null;
}
return $score;
}
function onStartReadWriteTables(&$alwaysRW, &$rwdb) {
$alwaysRW[] = 'spam_score';
return true;
}
function onEndNoticeInScope($notice, $profile, &$bResult)
{
if ($this->hideSpam) {
if ($bResult) {
$score = Spam_score::staticGet('notice_id', $notice->id);
if (!empty($score) && $score->is_spam) {
if (empty($profile) ||
($profile->id !== $notice->profile_id &&
!$profile->hasRight(self::REVIEWSPAM))) {
$bResult = false;
}
}
}
}
return true;
}
/**
* Pre-cache our spam scores if needed.
*/
function onEndNoticeListPrefill(&$notices, &$profiles, $avatarSize) {
if ($this->hideSpam) {
foreach ($notices as $notice) {
$ids[] = $notice->id;
}
Memcached_DataObject::multiGet('Spam_score', 'notice_id', $ids);
}
return true;
}
}
<?php
/**
* StatusNet - the distributed open-source microblogging tool
* Copyright (C) 2011, StatusNet, Inc.
*
* Score of a notice by activity spam service
*
* PHP version 5
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* @category Spam
* @package StatusNet
* @author Evan Prodromou <evan@status.net>
* @copyright 2011 StatusNet, Inc.
* @license http://www.fsf.org/licensing/licenses/agpl-3.0.html AGPL 3.0
* @link http://status.net/
*/
if (!defined('STATUSNET')) {
exit(1);
}
/**
* Score of a notice per the activity spam service
*
* @category Spam
* @package StatusNet
* @author Evan Prodromou <evan@status.net>
* @license http://www.fsf.org/licensing/licenses/agpl.html AGPLv3
* @link http://status.net/
*
* @see DB_DataObject
*/
class Spam_score extends Managed_DataObject
{
const MAX_SCALE = 10000;
public $__table = 'spam_score'; // table name
public $notice_id; // int
public $score; // float
public $created; // datetime
/**
* Get an instance by key
*
* @param string $k Key to use to lookup (usually 'notice_id' for this class)
* @param mixed $v Value to lookup
*
* @return Spam_score object found, or null for no hits
*
*/
function staticGet($k, $v=null)
{
return Managed_DataObject::staticGet('Spam_score', $k, $v);
}
function saveNew($notice, $result) {
$score = new Spam_score();
$score->notice_id = $notice->id;
$score->score = $result->probability;
$score->is_spam = $result->isSpam;
$score->scaled = Spam_score::scale($score->score);
$score->created = common_sql_now();
$score->notice_created = $notice->created;
$score->insert();
self::blow('spam_score:notice_ids');
return $score;
}
function save($notice, $result) {
$orig = null;
$score = Spam_score::staticGet('notice_id', $notice->id);
if (empty($score)) {
$score = new Spam_score();
} else {
$orig = clone($score);
}
$score->notice_id = $notice->id;
$score->score = $result->probability;
$score->is_spam = $result->isSpam;
$score->scaled = Spam_score::scale($score->score);
$score->created = common_sql_now();
$score->notice_created = $notice->created;
if (empty($orig)) {
$score->insert();
} else {
$score->update($orig);
}
self::blow('spam_score:notice_ids');
return $score;
}
function delete()
{
self::blow('spam_score:notice_ids');
self::blow('spam_score:notice_ids;last');
parent::delete();
}
/**
* The One True Thingy that must be defined and declared.
*/
public static function schemaDef()
{
return array(
'description' => 'score of the notice per activityspam',
'fields' => array(
'notice_id' => array('type' => 'int',
'not null' => true,
'description' => 'notice getting scored'),
'score' => array('type' => 'double',
'not null' => true,
'description' => 'score for the notice (0.0, 1.0)'),
'scaled' => array('type' => 'int',
'description' => 'scaled score for the notice (0, 10000)'),
'is_spam' => array('type' => 'tinyint',
'description' => 'flag for spamosity'),
'created' => array('type' => 'datetime',
'not null' => true,
'description' => 'date this record was created'),
'notice_created' => array('type' => 'datetime',
'description' => 'date the notice was created'),
),
'primary key' => array('notice_id'),
'foreign keys' => array(
'spam_score_notice_id_fkey' => array('notice', array('notice_id' => 'id')),
),
'indexes' => array(
'spam_score_created_idx' => array('created'),
'spam_score_scaled_idx' => array('scaled'),
),
);
}
public static function upgrade()
{
Spam_score::upgradeScaled();
Spam_score::upgradeIsSpam();
Spam_score::upgradeNoticeCreated();
}
protected static function upgradeScaled()
{
$score = new Spam_score();
$score->whereAdd('scaled IS NULL');
if ($score->find()) {
while ($score->fetch()) {
$orig = clone($score);
$score->scaled = Spam_score::scale($score->score);
$score->update($orig);
}
}
}
protected static function upgradeIsSpam()
{
$score = new Spam_score();
$score->whereAdd('is_spam IS NULL');
if ($score->find()) {
while ($score->fetch()) {
$orig = clone($score);
$score->is_spam = ($score->score >= 0.90) ? 1 : 0;
$score->update($orig);
}
}
}
protected static function upgradeNoticeCreated()
{
$score = new Spam_score();
$score->whereAdd('notice_created IS NULL');
if ($score->find()) {
while ($score->fetch()) {
$notice = Notice::staticGet('id', $score->notice_id);
if (!empty($notice)) {
$orig = clone($score);
$score->notice_created = $notice->created;
$score->update($orig);
}
}
}
}
public static function scale($score)
{
$raw = round($score * Spam_score::MAX_SCALE);
return max(0, min(Spam_score::MAX_SCALE, $raw));
}
}
<?php
/*
* StatusNet - the distributed open-source microblogging tool
* Copyright (C) 2012 StatusNet, Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
define('INSTALLDIR', realpath(dirname(__FILE__) . '/../../../..'));
$shortoptions = 'i:n:a';
$longoptions = array('id=', 'nickname=', 'all');
$helptext = <<<END_OF_TESTUSER_HELP
testuser.php [options]
Test user activities against the spam filter
-i --id ID of user to export
-n --nickname nickname of the user to export
-a --all All users
END_OF_TESTUSER_HELP;
require_once INSTALLDIR.'/scripts/commandline.inc';
function testAllUsers($filter) {
$found = false;
$offset = 0;
$limit = 1000;
do {
$user = new User();
$user->orderBy('created');
$user->limit($offset, $limit);
$found = $user->find();
if ($found) {
while ($user->fetch()) {
try {
testUser($filter, $user);
} catch (Exception $e) {
printfnq("ERROR testing user %s\n: %s", $user->nickname, $e->getMessage());
}
}
$offset += $found;
}
} while ($found > 0);
}
function testUser($filter, $user) {
printfnq("Testing user %s\n", $user->nickname);
$profile = Profile::staticGet('id', $user->id);
$str = new ProfileNoticeStream($profile, $profile);
$offset = 0;
$limit = 100;
do {
$notice = $str->getNotices($offset, $limit);
while ($notice->fetch()) {
try {
printfv("Testing notice %d...", $notice->id);
$result = $filter->test($notice);
Spam_score::save($notice, $result);
printfv("%s\n", ($result->isSpam) ? "SPAM" : "HAM");
} catch (Exception $e) {
printfnq("ERROR testing notice %d: %s\n", $notice->id, $e->getMessage());
}
}
$offset += $notice->N;
} while ($notice->N > 0);
}
try {
$filter = null;
Event::handle('GetSpamFilter', array(&$filter));
if (empty($filter)) {
throw new Exception(_("No spam filter."));
}
if (have_option('a', 'all')) {
testAllUsers($filter);
} else {
$user = getUser();
testUser($filter, $user);
}
} catch (Exception $e) {
print $e->getMessage()."\n";
exit(1);
}
<?php
/*
* StatusNet - the distributed open-source microblogging tool
* Copyright (C) 2012 StatusNet, Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
define('INSTALLDIR', realpath(dirname(__FILE__) . '/../../../..'));
$shortoptions = 'i:n:t:';
$longoptions = array('id=', 'nickname=', 'category=');
$helptext = <<<END_OF_TRAINUSER_HELP
trainuser.php [options]
Train user activities against the spam filter
-i --id ID of user to export
-n --nickname nickname of the user to export
-t --category Category; one of "spam" or "ham"
END_OF_TRAINUSER_HELP;
require_once INSTALLDIR.'/scripts/commandline.inc';
function trainUser($filter, $user, $category) {
printfnq("Training user %s\n", $user->nickname);
$profile = Profile::staticGet('id', $user->id);
$str = new ProfileNoticeStream($profile, $profile);
$offset = 0;
$limit = 100;
do {
$notice = $str->getNotices($offset, $limit);
while ($notice->fetch()) {
try {
printfv("Training notice %d...", $notice->id);
$filter->trainOnError($notice, $category);
$result = $filter->test($notice);
$score = Spam_score::save($notice, $result);
printfv("%s\n", ($result->isSpam) ? "SPAM" : "HAM");
} catch (Exception $e) {
printfnq("ERROR training notice %d\n: %s", $notice->id, $e->getMessage());
}
}
$offset += $notice->N;
} while ($notice->N > 0);
}
try {
$filter = null;
Event::handle('GetSpamFilter', array(&$filter));
if (empty($filter)) {
throw new Exception(_("No spam filter."));
}
$user = getUser();
$category = get_option_value('t', 'category');
if ($category !== SpamFilter::HAM &&
$category !== SpamFilter::SPAM) {
throw new Exception(_("No such category."));
}
trainUser($filter, $user, $category);
} catch (Exception $e) {
print $e->getMessage()."\n";