We are no longer offering accounts on this server. Consider https://gitlab.freedesktop.org/ as a place to host projects.

util.php 78.8 KB
Newer Older
1
<?php
Evan Prodromou's avatar
Evan Prodromou committed
2
/*
3
 * StatusNet - the distributed open-source microblogging tool
4
 * Copyright (C) 2008-2011, StatusNet, Inc.
Evan Prodromou's avatar
Evan Prodromou committed
5
 *
6 7 8 9
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
Evan Prodromou's avatar
Evan Prodromou committed
10
 *
11 12 13 14
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
Evan Prodromou's avatar
Evan Prodromou committed
15
 *
16 17 18 19
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

20
/* XXX: break up into separate modules (HTTP, user, files) */
21

22 23 24
/**
 * Show a server error.
 */
25 26
function common_server_error($msg, $code=500)
{
27 28
    $err = new ServerErrorAction($msg, $code);
    $err->showPage();
29 30
}

31 32 33
/**
 * Show a user error.
 */
34 35
function common_user_error($msg, $code=400)
{
36 37
    $err = new ClientErrorAction($msg, $code);
    $err->showPage();
38 39
}

40 41 42
/**
 * This should only be used at setup; processes switching languages
 * to send text to other users should use common_switch_locale().
43
 *
44 45 46 47
 * @param string $language Locale language code (optional; empty uses
 *                         current user's preference or site default)
 * @return mixed success
 */
48 49
function common_init_locale($language=null)
{
50 51 52 53 54
    if(!$language) {
        $language = common_language();
    }
    putenv('LANGUAGE='.$language);
    putenv('LANG='.$language);
55
    $ok =  setlocale(LC_ALL, $language . ".utf8",
56 57 58 59
                     $language . ".UTF8",
                     $language . ".utf-8",
                     $language . ".UTF-8",
                     $language);
60 61

    return $ok;
62 63
}

64 65 66
/**
 * Initialize locale and charset settings and gettext with our message catalog,
 * using the current user's language preference or the site default.
67
 *
68 69
 * This should generally only be run at framework initialization; code switching
 * languages at runtime should call common_switch_language().
70
 *
71 72
 * @access private
 */
73 74
function common_init_language()
{
75
    mb_internal_encoding('UTF-8');
76

77 78
    // Note that this setlocale() call may "fail" but this is harmless;
    // gettext will still select the right language.
79 80
    $language = common_language();
    $locale_set = common_init_locale($language);
Evan Prodromou's avatar
Evan Prodromou committed
81

82 83 84 85 86 87 88 89 90 91 92 93
    if (!$locale_set) {
        // The requested locale doesn't exist on the system.
        //
        // gettext seems very picky... We first need to setlocale()
        // to a locale which _does_ exist on the system, and _then_
        // we can set in another locale that may not be set up
        // (say, ga_ES for Galego/Galician) it seems to take it.
        //
        // For some reason C and POSIX which are guaranteed to work
        // don't do the job. en_US.UTF-8 should be there most of the
        // time, but not guaranteed.
        $ok = common_init_locale("en_US");
94 95
        if (!$ok && strtolower(substr(PHP_OS, 0, 3)) != 'win') {
            // Try to find a complete, working locale on Unix/Linux...
96 97 98 99 100 101 102 103 104 105 106
            // @fixme shelling out feels awfully inefficient
            // but I don't think there's a more standard way.
            $all = `locale -a`;
            foreach (explode("\n", $all) as $locale) {
                if (preg_match('/\.utf[-_]?8$/i', $locale)) {
                    $ok = setlocale(LC_ALL, $locale);
                    if ($ok) {
                        break;
                    }
                }
            }
107 108 109
        }
        if (!$ok) {
            common_log(LOG_ERR, "Unable to find a UTF-8 locale on this system; UI translations may not work.");
110 111 112 113
        }
        $locale_set = common_init_locale($language);
    }

114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139
    common_init_gettext();
}

/**
 * @access private
 */
function common_init_gettext()
{
    setlocale(LC_CTYPE, 'C');
    // So we do not have to make people install the gettext locales
    $path = common_config('site','locale_path');
    bindtextdomain("statusnet", $path);
    bind_textdomain_codeset("statusnet", "UTF-8");
    textdomain("statusnet");
}

/**
 * Switch locale during runtime, and poke gettext until it cries uncle.
 * Otherwise, sometimes it doesn't actually switch away from the old language.
 *
 * @param string $language code for locale ('en', 'fr', 'pt_BR' etc)
 */
function common_switch_locale($language=null)
{
    common_init_locale($language);

140
    setlocale(LC_CTYPE, 'C');
Siebrand Mazeland's avatar
Siebrand Mazeland committed
141
    // So we do not have to make people install the gettext locales
142 143
    $path = common_config('site','locale_path');
    bindtextdomain("statusnet", $path);
144 145
    bind_textdomain_codeset("statusnet", "UTF-8");
    textdomain("statusnet");
146 147
}

148 149
function common_timezone()
{
150 151 152 153 154 155
    if (common_logged_in()) {
        $user = common_current_user();
        if ($user->timezone) {
            return $user->timezone;
        }
    }
156

157
    return common_config('site', 'timezone');
158 159
}

160 161 162 163 164 165 166 167 168 169 170 171 172 173
function common_valid_language($lang)
{
    if ($lang) {
        // Validate -- we don't want to end up with a bogus code
        // left over from some old junk.
        foreach (common_config('site', 'languages') as $code => $info) {
            if ($info['lang'] == $lang) {
                return true;
            }
        }
    }
    return false;
}

174 175
function common_language()
{
176 177 178 179 180 181 182 183
    // Allow ?uselang=xx override, very useful for debugging
    // and helping translators check usage and context.
    if (isset($_GET['uselang'])) {
        $uselang = strval($_GET['uselang']);
        if (common_valid_language($uselang)) {
            return $uselang;
        }
    }
184

185 186
    // If there is a user logged in and they've set a language preference
    // then return that one...
187
    if (_have_config() && common_logged_in()) {
188
        $user = common_current_user();
189 190 191

        if (common_valid_language($user->language)) {
            return $user->language;
192
        }
193
    }
194

195 196
    // Otherwise, find the best match for the languages requested by the
    // user's browser...
Brion Vibber's avatar
Brion Vibber committed
197 198 199 200 201 202 203
    if (common_config('site', 'langdetect')) {
        $httplang = isset($_SERVER['HTTP_ACCEPT_LANGUAGE']) ? $_SERVER['HTTP_ACCEPT_LANGUAGE'] : null;
        if (!empty($httplang)) {
            $language = client_prefered_language($httplang);
            if ($language)
              return $language;
        }
204
    }
205

206 207
    // Finally, if none of the above worked, use the site's default...
    return common_config('site', 'language');
208
}
209

210 211 212
/**
 * Salted, hashed passwords are stored in the DB.
 */
213
function common_munge_password($password, Profile $profile=null)
214
{
215 216 217 218 219 220 221
    $hashed = null;

    if (Event::handle('StartHashPassword', array(&$hashed, $password, $profile))) {
        Event::handle('EndHashPassword', array(&$hashed, $password, $profile));
    }
    if (empty($hashed)) {
        throw new PasswordHashException();
222
    }
223 224

    return $hashed;
225 226
}

227 228 229
/**
 * Check if a username exists and has matching password.
 */
230 231
function common_check_user($nickname, $password)
{
232 233 234 235 236
    // empty nickname always unacceptable
    if (empty($nickname)) {
        return false;
    }

237 238 239
    $authenticatedUser = false;

    if (Event::handle('StartCheckPassword', array($nickname, $password, &$authenticatedUser))) {
240 241

        if (common_is_email($nickname)) {
242
            $user = User::getKV('email', common_canonical_email($nickname));
243
        } else {
244
            $user = User::getKV('nickname', Nickname::normalize($nickname));
245 246
        }

247
        if ($user instanceof User && !empty($password)) {
248
            if (0 == strcmp(common_munge_password($password, $user->getProfile()), $user->password)) {
249 250
                //internal checking passed
                $authenticatedUser = $user;
Craig Andrews's avatar
Craig Andrews committed
251 252
            }
        }
253
    }
254
    Event::handle('EndCheckPassword', array($nickname, $password, $authenticatedUser));
255 256

    return $authenticatedUser;
257 258
}

259 260 261
/**
 * Is the current user logged in?
 */
262 263
function common_logged_in()
{
264
    return (!is_null(common_current_user()));
265 266
}

267 268
function common_have_session()
{
269
    return (0 != strcmp(session_id(), ''));
270 271
}

272 273
function common_ensure_session()
{
Evan Prodromou's avatar
Evan Prodromou committed
274
    $c = null;
275
    if (array_key_exists(session_name(), $_COOKIE)) {
Evan Prodromou's avatar
Evan Prodromou committed
276 277
        $c = $_COOKIE[session_name()];
    }
278
    if (!common_have_session()) {
279 280 281
        if (common_config('sessions', 'handle')) {
            Session::setSaveHandler();
        }
Evan Prodromou's avatar
Evan Prodromou committed
282 283 284 285 286 287 288 289
	if (array_key_exists(session_name(), $_GET)) {
	    $id = $_GET[session_name()];
	} else if (array_key_exists(session_name(), $_COOKIE)) {
	    $id = $_COOKIE[session_name()];
	}
	if (isset($id)) {
	    session_id($id);
	}
290
        @session_start();
Evan Prodromou's avatar
Evan Prodromou committed
291 292
        if (!isset($_SESSION['started'])) {
            $_SESSION['started'] = time();
Evan Prodromou's avatar
Evan Prodromou committed
293
            if (!empty($id)) {
Evan Prodromou's avatar
Evan Prodromou committed
294 295 296 297
                common_log(LOG_WARNING, 'Session cookie "' . $_COOKIE[session_name()] . '" ' .
                           ' is set but started value is null');
            }
        }
298
    }
299 300
}

301 302 303 304
// Three kinds of arguments:
// 1) a user object
// 2) a nickname
// 3) null to clear
305

306
// Initialize to false; set to null if none found
307 308
$_cur = false;

309 310
function common_set_user($user)
{
311 312
    global $_cur;

313 314 315 316 317 318
    if (is_null($user) && common_have_session()) {
        $_cur = null;
        unset($_SESSION['userid']);
        return true;
    } else if (is_string($user)) {
        $nickname = $user;
319
        $user = User::getKV('nickname', $nickname);
320
    } else if (!$user instanceof User) {
321 322 323 324
        return false;
    }

    if ($user) {
Craig Andrews's avatar
Craig Andrews committed
325
        if (Event::handle('StartSetUser', array(&$user))) {
326 327
            if (!empty($user)) {
                if (!$user->hasRight(Right::WEBLOGIN)) {
328
                    // TRANS: Authorisation exception thrown when a user a not allowed to login.
329 330
                    throw new AuthorizationException(_('Not allowed to log in.'));
                }
Craig Andrews's avatar
Craig Andrews committed
331 332 333 334 335 336 337
                common_ensure_session();
                $_SESSION['userid'] = $user->id;
                $_cur = $user;
                Event::handle('EndSetUser', array($user));
                return $_cur;
            }
        }
338 339
    }
    return false;
340 341
}

342 343
function common_set_cookie($key, $value, $expiration=0)
{
344 345
    $path = common_config('site', 'path');
    $server = common_config('site', 'server');
346

347 348 349 350 351 352 353 354 355
    if ($path && ($path != '/')) {
        $cookiepath = '/' . $path . '/';
    } else {
        $cookiepath = '/';
    }
    return setcookie($key,
                     $value,
                     $expiration,
                     $cookiepath,
356 357
                     $server,
                     common_config('site', 'ssl')=='always');
358 359 360
}

define('REMEMBERME', 'rememberme');
361
define('REMEMBERME_EXPIRY', 30 * 24 * 60 * 60); // 30 days
362

363 364
function common_rememberme($user=null)
{
365 366 367 368 369 370
    if (!$user) {
        $user = common_current_user();
        if (!$user) {
            return false;
        }
    }
371

372
    $rm = new Remember_me();
373

374
    $rm->code = common_random_hexstr(16);
375
    $rm->user_id = $user->id;
376

377
    // Wrap the insert in some good ol' fashioned transaction code
378 379 380

    $rm->query('BEGIN');

381
    $result = $rm->insert();
382

383 384
    if (!$result) {
        common_log_db_error($rm, 'INSERT', __FILE__);
385
        $rm->query('ROLLBACK');
386
        return false;
387 388
    }

389 390
    $rm->query('COMMIT');

391 392
    $cookieval = $rm->user_id . ':' . $rm->code;

393
    common_log(LOG_INFO, 'adding rememberme cookie "' . $cookieval . '" for ' . $user->nickname);
394

395
    common_set_cookie(REMEMBERME, $cookieval, time() + REMEMBERME_EXPIRY);
396

397
    return true;
398 399
}

400 401
function common_remembered_user()
{
402
    $user = null;
403

404
    $packed = isset($_COOKIE[REMEMBERME]) ? $_COOKIE[REMEMBERME] : null;
405

406 407
    if (!$packed) {
        return null;
408 409 410 411 412
    }

    list($id, $code) = explode(':', $packed);

    if (!$id || !$code) {
413
        common_log(LOG_WARNING, 'Malformed rememberme cookie: ' . $packed);
414
        common_forgetme();
415
        return null;
416 417
    }

418
    $rm = Remember_me::getKV('code', $code);
419 420

    if (!$rm) {
421
        common_log(LOG_WARNING, 'No such remember code: ' . $code);
422
        common_forgetme();
423
        return null;
424 425 426
    }

    if ($rm->user_id != $id) {
427
        common_log(LOG_WARNING, 'Rememberme code for wrong user: ' . $rm->user_id . ' != ' . $id);
428
        common_forgetme();
429
        return null;
430 431
    }

432
    $user = User::getKV('id', $rm->user_id);
433

434
    if (!$user instanceof User) {
435
        common_log(LOG_WARNING, 'No such user for rememberme: ' . $rm->user_id);
436
        common_forgetme();
437
        return null;
438 439
    }

440
    // successful!
441 442 443 444
    $result = $rm->delete();

    if (!$result) {
        common_log_db_error($rm, 'DELETE', __FILE__);
445
        common_log(LOG_WARNING, 'Could not delete rememberme: ' . $code);
446
        common_forgetme();
447
        return null;
448 449 450 451
    }

    common_log(LOG_INFO, 'logging in ' . $user->nickname . ' using rememberme code ' . $rm->code);

452
    common_set_user($user);
453 454
    common_real_login(false);

455 456
    // We issue a new cookie, so they can log in
    // automatically again after this session
457 458 459

    common_rememberme($user);

460
    return $user;
461 462
}

463 464 465
/**
 * must be called with a valid user!
 */
466 467
function common_forgetme()
{
468
    common_set_cookie(REMEMBERME, '', 0);
469 470
}

471 472 473
/**
 * Who is the current user?
 */
474 475
function common_current_user()
{
476 477
    global $_cur;

478 479 480 481
    if (!_have_config()) {
        return null;
    }

482 483
    if ($_cur === false) {

484 485
        if (isset($_COOKIE[session_name()]) || isset($_GET[session_name()])
            || (isset($_SESSION['userid']) && $_SESSION['userid'])) {
486 487 488
            common_ensure_session();
            $id = isset($_SESSION['userid']) ? $_SESSION['userid'] : false;
            if ($id) {
489 490
                $user = User::getKV('id', $id);
                if ($user instanceof User) {
491 492 493
                	$_cur = $user;
                	return $_cur;
                }
494 495 496
            }
        }

497
        // that didn't work; try to remember; will init $_cur to null on failure
498 499 500
        $_cur = common_remembered_user();

        if ($_cur) {
501
            // XXX: Is this necessary?
502 503 504 505
            $_SESSION['userid'] = $_cur->id;
        }
    }

506
    return $_cur;
507 508
}

509 510 511 512 513
/**
 * Logins that are 'remembered' aren't 'real' -- they're subject to
 * cookie-stealing. So, we don't let them do certain things. New reg,
 * OpenID, and password logins _are_ real.
 */
514 515
function common_real_login($real=true)
{
516 517
    common_ensure_session();
    $_SESSION['real_login'] = $real;
518 519
}

520 521
function common_is_real_login()
{
522
    return common_logged_in() && $_SESSION['real_login'];
523 524
}

525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547
/**
 * Get a hash portion for HTTP caching Etags and such including
 * info on the current user's session. If login/logout state changes,
 * or we've changed accounts, or we've renamed the current user,
 * we'll get a new hash value.
 *
 * This should not be considered secure information.
 *
 * @param User $user (optional; uses common_current_user() if left out)
 * @return string
 */
function common_user_cache_hash($user=false)
{
    if ($user === false) {
        $user = common_current_user();
    }
    if ($user) {
        return crc32($user->id . ':' . $user->nickname);
    } else {
        return '0';
    }
}

548 549 550 551 552
/**
 * get canonical version of nickname for comparison
 *
 * @param string $nickname
 * @return string
553 554 555
 *
 * @throws NicknameException on invalid input
 * @deprecated call Nickname::normalize() directly.
556
 */
557 558
function common_canonical_nickname($nickname)
{
559
    return Nickname::normalize($nickname);
560 561
}

562 563 564 565 566 567 568 569 570
/**
 * get canonical version of email for comparison
 *
 * @fixme actually normalize
 * @fixme reject invalid input
 *
 * @param string $email
 * @return string
 */
571 572
function common_canonical_email($email)
{
573 574 575
    // XXX: canonicalize UTF-8
    // XXX: lcase the domain part
    return $email;
576 577
}

578 579
function common_purify($html)
{
mattl's avatar
mattl committed
580
    require_once INSTALLDIR.'/extlib/HTMLPurifier/HTMLPurifier.auto.php';
581

mattl's avatar
mattl committed
582 583
    $cfg = HTMLPurifier_Config::createDefault();
    $cfg->set('HTML.ForbiddenAttributes', array('style'));  // id, on* etc. are already filtered by default
mattl's avatar
mattl committed
584
    $cfg->set('URI.AllowedSchemes', array_fill_keys(common_url_schemes(), true));
585

mattl's avatar
mattl committed
586 587 588
    // Remove more elements than what the default filter removes, default in GNU social are remotely
    // linked resources such as img, video, audio
    $forbiddenElements = array();
589 590
    foreach (common_config('htmlfilter') as $tag=>$filter) {
        if ($filter === true) {
mattl's avatar
mattl committed
591
            $forbiddenElements[] = $tag;
592 593
        }
    }
mattl's avatar
mattl committed
594
    $cfg->set('HTML.ForbiddenElements', $forbiddenElements);
595

596 597
    $html = common_remove_unicode_formatting($html);

mattl's avatar
mattl committed
598 599
    $purifier = new HTMLPurifier($cfg);
    $purified = $purifier->purify($html);
hannes's avatar
hannes committed
600
    Event::handle('EndCommonPurify', array(&$purified, $html));
hannes's avatar
hannes committed
601 602
    
    return $purified;
603 604 605 606 607 608 609 610 611
}

function common_remove_unicode_formatting($text)
{
    // Strip Unicode text formatting/direction codes
    // this is pretty dangerous for visualisation of text and can be used for mischief
    return preg_replace('/[\\x{200b}-\\x{200f}\\x{202a}-\\x{202e}]/u', '', $text);
}

612 613 614
/**
 * Partial notice markup rendering step: build links to !group references.
 *
615 616 617
 * @param string    $text partially rendered HTML
 * @param Profile   $author the Profile that is composing the current notice
 * @param Notice    $parent the Notice this is sent in reply to, if any
618 619
 * @return string partially rendered HTML
 */
620
function common_render_content($text, Profile $author, Notice $parent=null)
621
{
622
    $text = common_render_text($text);
623
    $text = common_linkify_mentions($text, $author, $parent);
624
    return $text;
625 626
}

627 628 629 630 631 632
/**
 * Finds @-mentions within the partially-rendered text section and
 * turns them into live links.
 *
 * Should generally not be called except from common_render_content().
 *
633 634 635
 * @param string    $text   partially-rendered HTML
 * @param Profile   $author the Profile that is composing the current notice
 * @param Notice    $parent the Notice this is sent in reply to, if any
636 637
 * @return string partially-rendered HTML
 */
638
function common_linkify_mentions($text, Profile $author, Notice $parent=null)
639
{
640
    $mentions = common_find_mentions($text, $author, $parent);
641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658

    // We need to go through in reverse order by position,
    // so our positions stay valid despite our fudging with the
    // string!

    $points = array();

    foreach ($mentions as $mention)
    {
        $points[$mention['position']] = $mention;
    }

    krsort($points);

    foreach ($points as $position => $mention) {

        $linkText = common_linkify_mention($mention);

659
        $text = substr_replace($text, $linkText, $position, $mention['length']);
660 661 662 663 664
    }

    return $text;
}

665
function common_linkify_mention(array $mention)
666 667 668 669 670 671 672 673
{
    $output = null;

    if (Event::handle('StartLinkifyMention', array($mention, &$output))) {

        $xs = new XMLStringer(false);

        $attrs = array('href' => $mention['url'],
mattl's avatar
mattl committed
674
                       'class' => 'h-card '.$mention['type']);
675 676 677 678 679

        if (!empty($mention['title'])) {
            $attrs['title'] = $mention['title'];
        }

mattl's avatar
mattl committed
680
        $xs->element('a', $attrs, $mention['text']);
681 682 683 684 685 686 687 688 689

        $output = $xs->getString();

        Event::handle('EndLinkifyMention', array($mention, &$output));
    }

    return $output;
}

690
function common_get_attentions($text, Profile $sender, Notice $parent=null)
691 692 693 694 695
{
    $mentions = common_find_mentions($text, $sender, $parent);
    $atts = array();
    foreach ($mentions as $mention) {
        foreach ($mention['mentioned'] as $mentioned) {
696
            $atts[$mentioned->getUri()] = $mentioned->getObjectType();
697 698
        }
    }
699 700 701 702 703
    if ($parent instanceof Notice) {
        $parentAuthor = $parent->getProfile();
        // afaik groups can't be authors
        $atts[$parentAuthor->getUri()] = ActivityObject::PERSON;
    }
704 705 706
    return $atts;
}

707
/**
708 709 710 711 712 713
 * Find @-mentions in the given text, using the given notice object as context.
 * References will be resolved with common_relative_profile() against the user
 * who posted the notice.
 *
 * Note the return data format is internal, to be used for building links and
 * such. Should not be used directly; rather, call common_linkify_mentions().
714
 *
715 716 717
 * @param string    $text
 * @param Profile   $sender the Profile that is sending the current text
 * @param Notice    $parent the Notice this text is in reply to, if any
718
 *
719
 * @return array
720 721
 *
 * @access private
722
 */
723
function common_find_mentions($text, Profile $sender, Notice $parent=null)
724
{
725 726
    $mentions = array();

727
    if (Event::handle('StartFindMentions', array($sender, $text, &$mentions))) {
728
        // Get the context of the original notice, if any
729
        $origMentions = array();
730 731
        // Does it have a parent notice for context?
        if ($parent instanceof Notice) {
732 733 734
            foreach ($parent->getAttentionProfiles() as $repliedTo) {
                if (!$repliedTo->isPerson()) {
                    continue;
735
                }
736
                $origMentions[$repliedTo->id] = $repliedTo;
737 738 739
            }
        }

740
        $matches = common_find_mentions_raw($text);
741 742

        foreach ($matches as $match) {
743 744 745 746 747 748
            try {
                $nickname = Nickname::normalize($match[0]);
            } catch (NicknameException $e) {
                // Bogus match? Drop it.
                continue;
            }
749

750 751 752 753 754 755 756 757 758 759 760
			// primarily mention the profiles mentioned in the parent
            $mention_found_in_origMentions = false;
            foreach($origMentions as $origMentionsId=>$origMention) {
                if($origMention->getNickname() == $nickname) {
                    $mention_found_in_origMentions = $origMention;
                    // don't mention same twice! the parent might have mentioned 
                    // two users with same nickname on different instances
                    unset($origMentions[$origMentionsId]);
                    break;
                }
            }
761

762 763 764 765 766
            // Try to get a profile for this nickname.
            // Start with parents mentions, then go to parents sender context
            if ($mention_found_in_origMentions) {
                $mentioned = $mention_found_in_origMentions;            
            } else if ($parent instanceof Notice && $parent->getProfile()->getNickname() === $nickname) {
767
                $mentioned = $parent->getProfile();
768
            } else {
769
                // sets to null if no match
770 771
                $mentioned = common_relative_profile($sender, $nickname);
            }
772

773
            if ($mentioned instanceof Profile) {
774
                try {
775 776 777 778
                    $url = $mentioned->getUri();    // prefer the URI as URL, if it is one.
                    if (!common_valid_http_url($url)) {
                        $url = $mentioned->getUrl();
                    }
779 780
                } catch (InvalidUrlException $e) {
                    $url = common_local_url('userbyid', array('id' => $mentioned->getID()));
781 782 783
                }

                $mention = array('mentioned' => array($mentioned),
784
                                 'type' => 'mention',
785 786
                                 'text' => $match[0],
                                 'position' => $match[1],
787
                                 'length' => mb_strlen($match[0]),
788
                                 'title' => $mentioned->getFullname(),
789 790 791 792 793 794 795 796 797
                                 'url' => $url);

                $mentions[] = $mention;
            }
        }

        // @#tag => mention of all subscriptions tagged 'tag'

        preg_match_all('/(?:^|[\s\.\,\:\;]+)@#([\pL\pN_\-\.]{1,64})/',
798
                       $text, $hmatches, PREG_OFFSET_CAPTURE);
799 800
        foreach ($hmatches[1] as $hmatch) {
            $tag = common_canonical_tag($hmatch[0]);
801
            $plist = Profile_list::getByTaggerAndTag($sender->getID(), $tag);
802 803 804 805
            if (!$plist instanceof Profile_list || $plist->private) {
                continue;
            }
            $tagged = $sender->getTaggedSubscribers($tag);
806

807
            $url = common_local_url('showprofiletag',
808
                                    array('nickname' => $sender->getNickname(),
809 810 811
                                          'tag' => $tag));

            $mentions[] = array('mentioned' => $tagged,
812
                                'type'      => 'list',
813 814
                                'text' => $hmatch[0],
                                'position' => $hmatch[1],
815
                                'length' => mb_strlen($hmatch[0]),
816 817
                                'url' => $url);
        }
818

819 820 821 822 823 824 825 826
        preg_match_all('/(?:^|[\s\.\,\:\;]+)!(' . Nickname::DISPLAY_FMT . ')/',
                       $text, $hmatches, PREG_OFFSET_CAPTURE);
        foreach ($hmatches[1] as $hmatch) {
            $nickname = Nickname::normalize($hmatch[0]);
            $group = User_group::getForNickname($nickname, $sender);

            if (!$group instanceof User_group || !$sender->isMember($group)) {
                continue;
827
            }
828 829 830

            $profile = $group->getProfile();

831
            $mentions[] = array('mentioned' => array($profile),
832
                                'type'      => 'group',
833 834
                                'text'      => $hmatch[0],
                                'position'  => $hmatch[1],
835
                                'length'    => mb_strlen($hmatch[0]),
836
                                'url'       => $group->permalink(),
837
                                'title'     => $group->getFancyName());
838 839 840 841 842 843 844 845
        }

        Event::handle('EndFindMentions', array($sender, $text, &$mentions));
    }

    return $mentions;
}

846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861
/**
 * Does the actual regex pulls to find @-mentions in text.
 * Should generally not be called directly; for use in common_find_mentions.
 *
 * @param string $text
 * @return array of PCRE match arrays
 */
function common_find_mentions_raw($text)
{
    $tmatches = array();
    preg_match_all('/^T (' . Nickname::DISPLAY_FMT . ') /',
                   $text,
                   $tmatches,
                   PREG_OFFSET_CAPTURE);

    $atmatches = array();
862 863
    // the regexp's "(?!\@)" makes sure it doesn't matches the single "@remote" in "@remote@server.com"
    preg_match_all('/(?:^|\s+)@(' . Nickname::DISPLAY_FMT . ')\b(?!\@)/',
864 865 866 867 868 869 870 871
                   $text,
                   $atmatches,
                   PREG_OFFSET_CAPTURE);

    $matches = array_merge($tmatches[1], $atmatches[1]);
    return $matches;
}

872 873
function common_render_text($text)
{
874 875
    $text = common_remove_unicode_formatting($text);
    $text = nl2br(htmlspecialchars($text));
876

877 878 879 880
    $text = preg_replace('/[\x{0}-\x{8}\x{b}-\x{c}\x{e}-\x{19}]/', '', $text);
    $text = common_replace_urls_callback($text, 'common_linkify');
    $text = preg_replace_callback('/(^|\&quot\;|\'|\(|\[|\{|\s+)#([\pL\pN_\-\.]{1,64})/u',
                function ($m) { return "{$m[1]}#".common_tag_link($m[2]); }, $text);
881
    // XXX: machine tags
882
    return $text;
Evan Prodromou's avatar
Evan Prodromou committed
883 884
}

mattl's avatar
mattl committed
885 886 887
define('_URL_SCHEME_COLON_DOUBLE_SLASH', 1);
define('_URL_SCHEME_SINGLE_COLON', 2);
define('_URL_SCHEME_NO_DOMAIN', 4);
mattl's avatar
mattl committed
888
define('_URL_SCHEME_COLON_COORDINATES', 8);
mattl's avatar
mattl committed
889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917

function common_url_schemes($filter=null)
{
    // TODO: move these to $config
    $schemes = [
                'http'      => _URL_SCHEME_COLON_DOUBLE_SLASH,
                'https'     => _URL_SCHEME_COLON_DOUBLE_SLASH,
                'ftp'       => _URL_SCHEME_COLON_DOUBLE_SLASH,
                'ftps'      => _URL_SCHEME_COLON_DOUBLE_SLASH,
                'mms'       => _URL_SCHEME_COLON_DOUBLE_SLASH,
                'rtsp'      => _URL_SCHEME_COLON_DOUBLE_SLASH,
                'gopher'    => _URL_SCHEME_COLON_DOUBLE_SLASH,
                'news'      => _URL_SCHEME_COLON_DOUBLE_SLASH,
                'nntp'      => _URL_SCHEME_COLON_DOUBLE_SLASH,
                'telnet'    => _URL_SCHEME_COLON_DOUBLE_SLASH,
                'wais'      => _URL_SCHEME_COLON_DOUBLE_SLASH,
                'file'      => _URL_SCHEME_COLON_DOUBLE_SLASH,
                'prospero'  => _URL_SCHEME_COLON_DOUBLE_SLASH,
                'webcal'    => _URL_SCHEME_COLON_DOUBLE_SLASH,
                'irc'       => _URL_SCHEME_COLON_DOUBLE_SLASH,
                'ircs'      => _URL_SCHEME_COLON_DOUBLE_SLASH,
                'aim'       => _URL_SCHEME_SINGLE_COLON,
                'bitcoin'   => _URL_SCHEME_SINGLE_COLON,
                'fax'       => _URL_SCHEME_SINGLE_COLON,
                'jabber'    => _URL_SCHEME_SINGLE_COLON,
                'mailto'    => _URL_SCHEME_SINGLE_COLON,
                'tel'       => _URL_SCHEME_SINGLE_COLON,
                'xmpp'      => _URL_SCHEME_SINGLE_COLON,
                'magnet'    => _URL_SCHEME_NO_DOMAIN,
mattl's avatar
mattl committed
918
                'geo'       => _URL_SCHEME_COLON_COORDINATES,
mattl's avatar
mattl committed
919 920 921 922 923 924 925 926 927 928
                ];

    return array_keys(
            array_filter($schemes,
                function ($scheme) use ($filter) {
                    return is_null($filter) || ($scheme & $filter);
                })
            );
}

929 930 931 932 933 934 935 936
/**
 * Find links in the given text and pass them to the given callback function.
 *
 * @param string $text
 * @param function($text, $arg) $callback: return replacement text
 * @param mixed $arg: optional argument will be passed on to the callback
 */
function common_replace_urls_callback($text, $callback, $arg = null) {
mattl's avatar
mattl committed
937 938 939 940 941 942 943
    $geouri_labeltext_regex = '\pN\pL\-';
    $geouri_mark_regex = '\-\_\.\!\~\*\\\'\(\)';    // the \\\' is really pretty
    $geouri_unreserved_regex = '\pN\pL' . $geouri_mark_regex;
    $geouri_punreserved_regex = '\[\]\:\&\+\$';
    $geouri_pctencoded_regex = '(?:\%[0-9a-fA-F][0-9a-fA-F])';
    $geouri_paramchar_regex = $geouri_unreserved_regex . $geouri_punreserved_regex; //FIXME: add $geouri_pctencoded_regex here so it works

944
    // Start off with a regex
945
    $regex = '#'.
946
    '(?:^|[\s\<\>\(\)\[\]\{\}\\\'\\\";]+)(?![\@\!\#])'.
947
    '('.
948
        '(?:'.
949 950
            '(?:'. //Known protocols
                '(?:'.
mattl's avatar
mattl committed
951
                    '(?:(?:' . implode('|', common_url_schemes(_URL_SCHEME_COLON_DOUBLE_SLASH)) . ')://)'.
952
                    '|'.
mattl's avatar
mattl committed
953
                    '(?:(?:' . implode('|', common_url_schemes(_URL_SCHEME_SINGLE_COLON)) . '):)'.
954
                ')'.
955
                '(?:[\pN\pL\-\_\+\%\~]+(?::[\pN\pL\-\_\+\%\~]+)?\@)?'. //user:pass@
956 957 958 959 960 961
                '(?:'.
                    '(?:'.
                        '\[[\pN\pL\-\_\:\.]+(?<![\.\:])\]'. //[dns]
                    ')|(?:'.
                        '[\pN\pL\-\_\:\.]+(?<![\.\:])'. //dns
                    ')'.
962
                ')'.
963
            ')'.
mattl's avatar
mattl committed
964 965 966 967 968 969 970 971 972 973
            '|(?:'.
                '(?:' . implode('|', common_url_schemes(_URL_SCHEME_COLON_COORDINATES)) . '):'.
                // There's an order that must be followed here too, if ;crs= is used, it must precede ;u=
                // Also 'crsp' (;crs=$crsp) must match $geouri_labeltext_regex
                // Also 'uval' (;u=$uval) must be a pnum: \-?[0-9]+
                '(?:'.
                    '(?:[0-9]+(?:\.[0-9]+)?(?:\,[0-9]+(?:\.[0-9]+)?){1,2})'.    // 1(.23)?(,4(.56)){1,2}
                    '(?:\;(?:['.$geouri_labeltext_regex.']+)(?:\=['.$geouri_paramchar_regex.']+)*)*'.
                ')'.
            ')'.
mattl's avatar
mattl committed
974
            // URLs without domain name, like magnet:?xt=...
mattl's avatar
mattl committed
975
            '|(?:(?:' . implode('|', common_url_schemes(_URL_SCHEME_NO_DOMAIN)) . '):(?=\?))'.  // zero-length lookahead requires ? after :
976 977 978 979 980 981 982 983
            (common_config('linkify', 'bare_ipv4')   // Convert IPv4 addresses to hyperlinks
                ? '|(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)'
                : '').
            (common_config('linkify', 'bare_ipv6')   // Convert IPv6 addresses to hyperlinks
                ? '|(?:'. //IPv6
                    '\[?(?:(?:(?:[0-9A-Fa-f]{1,4}:){7}(?:(?:[0-9A-Fa-f]{1,4})|:))|(?:(?:[0-9A-Fa-f]{1,4}:){6}(?::|(?:(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})(?:\.(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})|(?::[0-9A-Fa-f]{1,4})))|(?:(?:[0-9A-Fa-f]{1,4}:){5}(?:(?::(?:(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})(?:\.(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})?)|(?:(?::[0-9A-Fa-f]{1,4}){1,2})))|(?:(?:[0-9A-Fa-f]{1,4}:){4}(?::[0-9A-Fa-f]{1,4}){0,1}(?:(?::(?:(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})(?:\.(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})?)|(?:(?::[0-9A-Fa-f]{1,4}){1,2})))|(?:(?:[0-9A-Fa-f]{1,4}:){3}(?::[0-9A-Fa-f]{1,4}){0,2}(?:(?::(?:(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})(?:\.(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})?)|(?:(?::[0-9A-Fa-f]{1,4}){1,2})))|(?:(?:[0-9A-Fa-f]{1,4}:){2}(?::[0-9A-Fa-f]{1,4}){0,3}(?:(?::(?:(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})(?:\.(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})?)|(?:(?::[0-9A-Fa-f]{1,4}){1,2})))|(?:(?:[0-9A-Fa-f]{1,4}:)(?::[0-9A-Fa-f]{1,4}){0,4}(?:(?::(?:(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})(?:\.(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})?)|(?:(?::[0-9A-Fa-f]{1,4}){1,2})))|(?::(?::[0-9A-Fa-f]{1,4}){0,5}(?:(?::(?:(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})(?:\.(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})?)|(?:(?::[0-9A-Fa-f]{1,4}){1,2})))|(?:(?:(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})(?:\.(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})))\]?(?<!:)'.
                    ')'
                : '').
mattl's avatar
mattl committed
984 985 986 987 988 989 990 991
            (common_config('linkify', 'bare_domains')
                ? '|(?:'. //DNS
                    '(?:[\pN\pL\-\_\+\%\~]+(?:\:[\pN\pL\-\_\+\%\~]+)?\@)?'. //user:pass@
                    '[\pN\pL\-\_]+(?:\.[\pN\pL\-\_]+)*\.'.
                    //tld list from http://data.iana.org/TLD/tlds-alpha-by-domain.txt, also added local, loc, and onion
                    '(?:AC|AD|AE|AERO|AF|AG|AI|AL|AM|AN|AO|AQ|AR|ARPA|AS|ASIA|AT|AU|AW|AX|AZ|BA|BB|BD|BE|BF|BG|BH|BI|BIZ|BJ|BM|BN|BO|BR|BS|BT|BV|BW|BY|BZ|CA|CAT|CC|CD|CF|CG|CH|CI|CK|CL|CM|CN|CO|COM|COOP|CR|CU|CV|CX|CY|CZ|DE|DJ|DK|DM|DO|DZ|EC|EDU|EE|EG|ER|ES|ET|EU|FI|FJ|FK|FM|FO|FR|GA|GB|GD|GE|GF|GG|GH|GI|GL|GM|GN|GOV|GP|GQ|GR|GS|GT|GU|GW|GY|HK|HM|HN|HR|HT|HU|ID|IE|IL|IM|IN|INFO|INT|IO|IQ|IR|IS|IT|JE|JM|JO|JOBS|JP|KE|KG|KH|KI|KM|KN|KP|KR|KW|KY|KZ|LA|LB|LC|LI|LK|LR|LS|LT|LU|LV|LY|MA|MC|MD|ME|MG|MH|MIL|MK|ML|MM|MN|MO|MOBI|MP|MQ|MR|MS|MT|MU|MUSEUM|MV|MW|MX|MY|MZ|NA|NAME|NC|NE|NET|NF|NG|NI|NL|NO|NP|NR|NU|NZ|OM|ORG|PA|PE|PF|PG|PH|PK|PL|PM|PN|PR|PRO|PS|PT|PW|PY|QA|RE|RO|RS|RU|RW|SA|SB|SC|SD|SE|SG|SH|SI|SJ|SK|SL|SM|SN|SO|SR|ST|SU|SV|SY|SZ|TC|TD|TEL|TF|TG|TH|TJ|TK|TL|TM|TN|TO|TP|TR|TRAVEL|TT|TV|TW|TZ|UA|UG|UK|US|UY|UZ|VA|VC|VE|VG|VI|VN|VU|WF|WS|XN--0ZWM56D|测试|XN--11B5BS3A9AJ6G|परीक्षा|XN--80AKHBYKNJ4F|испытание|XN--9T4B11YI5A|테스트|XN--DEBA0AD|טעסט|XN--G6W251D|測試|XN--HGBK6AJ7F53BBA|آزمایشی|XN--HLCJ6AYA9ESC7A|பரிட்சை|XN--JXALPDLP|δοκιμή|XN--KGBECHTV|إختبار|XN--ZCKZAH|テスト|YE|YT|YU|ZA|ZM|ZONE|ZW|local|loc|onion)'.
            ')(?![\pN\pL\-\_])'
                : '') . // if common_config('linkify', 'bare_domains') is false, don't add anything here
992
        ')'.
993
        '(?:'.
994
            '(?:\:\d+)?'. //:port
995 996 997
            '(?:/[\pN\pL$\,\!\(\)\.\:\-\_\+\/\=\&\;\%\~\*\$\+\'@]*)?'. // /path
            '(?:\?[\pN\pL\$\,\!\(\)\.\:\-\_\+\/\=\&\;\%\~\*\$\+\'@\/]*)?'. // ?query string
            '(?:\#[\pN\pL$\,\!\(\)\.\:\-\_\+\/\=\&\;\%\~\*\$\+\'\@/\?\#]*)?'. // #fragment
998
        ')(?<![\?\.\,\#\,])'.
999
    ')'.
1000
    '#ixu';
1001
    //preg_match_all($regex,$text,$matches);
1002
    //print_r($matches);
1003
    return preg_replace_callback($regex, curry('callback_helper',$callback,$arg) ,$text);
1004
}
1005

1006 1007 1008 1009 1010 1011 1012 1013
/**
 * Intermediate callback for common_replace_links(), helps resolve some
 * ambiguous link forms before passing on to the final callback.
 *
 * @param array $matches
 * @param callable $callback
 * @param mixed $arg optional argument to pass on as second param to callback
 * @return string
1014
 *