We are no longer offering accounts on this server. Consider https://gitlab.freedesktop.org/ as a place to host projects.

util.php 69.7 KB
Newer Older
1
<?php
Evan Prodromou's avatar
Evan Prodromou committed
2
/*
3
 * StatusNet - the distributed open-source microblogging tool
4
 * Copyright (C) 2008-2011, StatusNet, Inc.
Evan Prodromou's avatar
Evan Prodromou committed
5
 *
6 7 8 9
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
Evan Prodromou's avatar
Evan Prodromou committed
10
 *
11 12 13 14
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
Evan Prodromou's avatar
Evan Prodromou committed
15
 *
16 17 18 19
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

20
/* XXX: break up into separate modules (HTTP, user, files) */
21

22 23 24
/**
 * Show a server error.
 */
25 26
function common_server_error($msg, $code=500)
{
27 28
    $err = new ServerErrorAction($msg, $code);
    $err->showPage();
29 30
}

31 32 33
/**
 * Show a user error.
 */
34 35
function common_user_error($msg, $code=400)
{
36 37
    $err = new ClientErrorAction($msg, $code);
    $err->showPage();
38 39
}

40 41 42
/**
 * This should only be used at setup; processes switching languages
 * to send text to other users should use common_switch_locale().
43
 *
44 45 46 47
 * @param string $language Locale language code (optional; empty uses
 *                         current user's preference or site default)
 * @return mixed success
 */
48 49
function common_init_locale($language=null)
{
50 51 52 53 54
    if(!$language) {
        $language = common_language();
    }
    putenv('LANGUAGE='.$language);
    putenv('LANG='.$language);
55
    $ok =  setlocale(LC_ALL, $language . ".utf8",
56 57 58 59
                     $language . ".UTF8",
                     $language . ".utf-8",
                     $language . ".UTF-8",
                     $language);
60 61

    return $ok;
62 63
}

64 65 66
/**
 * Initialize locale and charset settings and gettext with our message catalog,
 * using the current user's language preference or the site default.
67
 *
68 69
 * This should generally only be run at framework initialization; code switching
 * languages at runtime should call common_switch_language().
70
 *
71 72
 * @access private
 */
73 74
function common_init_language()
{
75
    mb_internal_encoding('UTF-8');
76

77 78
    // Note that this setlocale() call may "fail" but this is harmless;
    // gettext will still select the right language.
79 80
    $language = common_language();
    $locale_set = common_init_locale($language);
Evan Prodromou's avatar
Evan Prodromou committed
81

82 83 84 85 86 87 88 89 90 91 92 93
    if (!$locale_set) {
        // The requested locale doesn't exist on the system.
        //
        // gettext seems very picky... We first need to setlocale()
        // to a locale which _does_ exist on the system, and _then_
        // we can set in another locale that may not be set up
        // (say, ga_ES for Galego/Galician) it seems to take it.
        //
        // For some reason C and POSIX which are guaranteed to work
        // don't do the job. en_US.UTF-8 should be there most of the
        // time, but not guaranteed.
        $ok = common_init_locale("en_US");
94 95
        if (!$ok && strtolower(substr(PHP_OS, 0, 3)) != 'win') {
            // Try to find a complete, working locale on Unix/Linux...
96 97 98 99 100 101 102 103 104 105 106
            // @fixme shelling out feels awfully inefficient
            // but I don't think there's a more standard way.
            $all = `locale -a`;
            foreach (explode("\n", $all) as $locale) {
                if (preg_match('/\.utf[-_]?8$/i', $locale)) {
                    $ok = setlocale(LC_ALL, $locale);
                    if ($ok) {
                        break;
                    }
                }
            }
107 108 109
        }
        if (!$ok) {
            common_log(LOG_ERR, "Unable to find a UTF-8 locale on this system; UI translations may not work.");
110 111 112 113
        }
        $locale_set = common_init_locale($language);
    }

114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139
    common_init_gettext();
}

/**
 * @access private
 */
function common_init_gettext()
{
    setlocale(LC_CTYPE, 'C');
    // So we do not have to make people install the gettext locales
    $path = common_config('site','locale_path');
    bindtextdomain("statusnet", $path);
    bind_textdomain_codeset("statusnet", "UTF-8");
    textdomain("statusnet");
}

/**
 * Switch locale during runtime, and poke gettext until it cries uncle.
 * Otherwise, sometimes it doesn't actually switch away from the old language.
 *
 * @param string $language code for locale ('en', 'fr', 'pt_BR' etc)
 */
function common_switch_locale($language=null)
{
    common_init_locale($language);

140
    setlocale(LC_CTYPE, 'C');
Siebrand Mazeland's avatar
Siebrand Mazeland committed
141
    // So we do not have to make people install the gettext locales
142 143
    $path = common_config('site','locale_path');
    bindtextdomain("statusnet", $path);
144 145
    bind_textdomain_codeset("statusnet", "UTF-8");
    textdomain("statusnet");
146 147
}

148 149
function common_timezone()
{
150 151 152 153 154 155
    if (common_logged_in()) {
        $user = common_current_user();
        if ($user->timezone) {
            return $user->timezone;
        }
    }
156

157
    return common_config('site', 'timezone');
158 159
}

160 161 162 163 164 165 166 167 168 169 170 171 172 173
function common_valid_language($lang)
{
    if ($lang) {
        // Validate -- we don't want to end up with a bogus code
        // left over from some old junk.
        foreach (common_config('site', 'languages') as $code => $info) {
            if ($info['lang'] == $lang) {
                return true;
            }
        }
    }
    return false;
}

174 175
function common_language()
{
176 177 178 179 180 181 182 183
    // Allow ?uselang=xx override, very useful for debugging
    // and helping translators check usage and context.
    if (isset($_GET['uselang'])) {
        $uselang = strval($_GET['uselang']);
        if (common_valid_language($uselang)) {
            return $uselang;
        }
    }
184

185 186
    // If there is a user logged in and they've set a language preference
    // then return that one...
187
    if (_have_config() && common_logged_in()) {
188
        $user = common_current_user();
189 190 191

        if (common_valid_language($user->language)) {
            return $user->language;
192
        }
193
    }
194

195 196
    // Otherwise, find the best match for the languages requested by the
    // user's browser...
Brion Vibber's avatar
Brion Vibber committed
197 198 199 200 201 202 203
    if (common_config('site', 'langdetect')) {
        $httplang = isset($_SERVER['HTTP_ACCEPT_LANGUAGE']) ? $_SERVER['HTTP_ACCEPT_LANGUAGE'] : null;
        if (!empty($httplang)) {
            $language = client_prefered_language($httplang);
            if ($language)
              return $language;
        }
204
    }
205

206 207
    // Finally, if none of the above worked, use the site's default...
    return common_config('site', 'language');
208
}
209

210 211 212
/**
 * Salted, hashed passwords are stored in the DB.
 */
213 214
function common_munge_password($password, $id)
{
215 216 217 218 219
    if (is_object($id) || is_object($password)) {
        $e = new Exception();
        common_log(LOG_ERR, __METHOD__ . ' object in param to common_munge_password ' .
                   str_replace("\n", " ", $e->getTraceAsString()));
    }
220
    return md5($password . $id);
221 222
}

223 224 225
/**
 * Check if a username exists and has matching password.
 */
226 227
function common_check_user($nickname, $password)
{
228 229 230 231 232
    // empty nickname always unacceptable
    if (empty($nickname)) {
        return false;
    }

233 234 235
    $authenticatedUser = false;

    if (Event::handle('StartCheckPassword', array($nickname, $password, &$authenticatedUser))) {
236 237 238 239 240 241 242

        if (common_is_email($nickname)) {
            $user = User::staticGet('email', common_canonical_email($nickname));
        } else {
            $user = User::staticGet('nickname', common_canonical_nickname($nickname));
        }

243 244
        if (!empty($user)) {
            if (!empty($password)) { // never allow login with blank password
Craig Andrews's avatar
Craig Andrews committed
245 246 247
                if (0 == strcmp(common_munge_password($password, $user->id),
                                $user->password)) {
                    //internal checking passed
248
                    $authenticatedUser = $user;
Craig Andrews's avatar
Craig Andrews committed
249
                }
Craig Andrews's avatar
Craig Andrews committed
250 251
            }
        }
252
        Event::handle('EndCheckPassword', array($nickname, $password, $authenticatedUser));
253
    }
254 255

    return $authenticatedUser;
256 257
}

258 259 260
/**
 * Is the current user logged in?
 */
261 262
function common_logged_in()
{
263
    return (!is_null(common_current_user()));
264 265
}

266 267
function common_have_session()
{
268
    return (0 != strcmp(session_id(), ''));
269 270
}

271 272
function common_ensure_session()
{
Evan Prodromou's avatar
Evan Prodromou committed
273
    $c = null;
274
    if (array_key_exists(session_name(), $_COOKIE)) {
Evan Prodromou's avatar
Evan Prodromou committed
275 276
        $c = $_COOKIE[session_name()];
    }
277
    if (!common_have_session()) {
278 279 280
        if (common_config('sessions', 'handle')) {
            Session::setSaveHandler();
        }
Evan Prodromou's avatar
Evan Prodromou committed
281 282 283 284 285 286 287 288
	if (array_key_exists(session_name(), $_GET)) {
	    $id = $_GET[session_name()];
	} else if (array_key_exists(session_name(), $_COOKIE)) {
	    $id = $_COOKIE[session_name()];
	}
	if (isset($id)) {
	    session_id($id);
	}
289
        @session_start();
Evan Prodromou's avatar
Evan Prodromou committed
290 291
        if (!isset($_SESSION['started'])) {
            $_SESSION['started'] = time();
Evan Prodromou's avatar
Evan Prodromou committed
292
            if (!empty($id)) {
Evan Prodromou's avatar
Evan Prodromou committed
293 294 295 296
                common_log(LOG_WARNING, 'Session cookie "' . $_COOKIE[session_name()] . '" ' .
                           ' is set but started value is null');
            }
        }
297
    }
298 299
}

300 301 302 303
// Three kinds of arguments:
// 1) a user object
// 2) a nickname
// 3) null to clear
304

305
// Initialize to false; set to null if none found
306 307
$_cur = false;

308 309
function common_set_user($user)
{
310 311
    global $_cur;

312 313 314 315 316 317 318 319 320 321 322 323
    if (is_null($user) && common_have_session()) {
        $_cur = null;
        unset($_SESSION['userid']);
        return true;
    } else if (is_string($user)) {
        $nickname = $user;
        $user = User::staticGet('nickname', $nickname);
    } else if (!($user instanceof User)) {
        return false;
    }

    if ($user) {
Craig Andrews's avatar
Craig Andrews committed
324
        if (Event::handle('StartSetUser', array(&$user))) {
325 326
            if (!empty($user)) {
                if (!$user->hasRight(Right::WEBLOGIN)) {
327
                    // TRANS: Authorisation exception thrown when a user a not allowed to login.
328 329
                    throw new AuthorizationException(_('Not allowed to log in.'));
                }
Craig Andrews's avatar
Craig Andrews committed
330 331 332 333 334 335 336
                common_ensure_session();
                $_SESSION['userid'] = $user->id;
                $_cur = $user;
                Event::handle('EndSetUser', array($user));
                return $_cur;
            }
        }
337 338
    }
    return false;
339 340
}

341 342
function common_set_cookie($key, $value, $expiration=0)
{
343 344
    $path = common_config('site', 'path');
    $server = common_config('site', 'server');
345

346 347 348 349 350 351 352 353 354
    if ($path && ($path != '/')) {
        $cookiepath = '/' . $path . '/';
    } else {
        $cookiepath = '/';
    }
    return setcookie($key,
                     $value,
                     $expiration,
                     $cookiepath,
355 356
                     $server,
                     common_config('site', 'ssl')=='always');
357 358 359
}

define('REMEMBERME', 'rememberme');
360
define('REMEMBERME_EXPIRY', 30 * 24 * 60 * 60); // 30 days
361

362 363
function common_rememberme($user=null)
{
364 365 366 367 368 369
    if (!$user) {
        $user = common_current_user();
        if (!$user) {
            return false;
        }
    }
370

371
    $rm = new Remember_me();
372

373 374
    $rm->code = common_good_rand(16);
    $rm->user_id = $user->id;
375

376
    // Wrap the insert in some good ol' fashioned transaction code
377 378 379

    $rm->query('BEGIN');

380
    $result = $rm->insert();
381

382 383 384
    if (!$result) {
        common_log_db_error($rm, 'INSERT', __FILE__);
        return false;
385 386
    }

387 388
    $rm->query('COMMIT');

389 390
    $cookieval = $rm->user_id . ':' . $rm->code;

391
    common_log(LOG_INFO, 'adding rememberme cookie "' . $cookieval . '" for ' . $user->nickname);
392

393
    common_set_cookie(REMEMBERME, $cookieval, time() + REMEMBERME_EXPIRY);
394

395
    return true;
396 397
}

398 399
function common_remembered_user()
{
400
    $user = null;
401

402
    $packed = isset($_COOKIE[REMEMBERME]) ? $_COOKIE[REMEMBERME] : null;
403

404 405
    if (!$packed) {
        return null;
406 407 408 409 410
    }

    list($id, $code) = explode(':', $packed);

    if (!$id || !$code) {
411
        common_log(LOG_WARNING, 'Malformed rememberme cookie: ' . $packed);
412
        common_forgetme();
413
        return null;
414 415
    }

416
    $rm = Remember_me::staticGet('code', $code);
417 418

    if (!$rm) {
419
        common_log(LOG_WARNING, 'No such remember code: ' . $code);
420
        common_forgetme();
421
        return null;
422 423 424
    }

    if ($rm->user_id != $id) {
425
        common_log(LOG_WARNING, 'Rememberme code for wrong user: ' . $rm->user_id . ' != ' . $id);
426
        common_forgetme();
427
        return null;
428 429
    }

430
    $user = User::staticGet('id', $rm->user_id);
431 432

    if (!$user) {
433
        common_log(LOG_WARNING, 'No such user for rememberme: ' . $rm->user_id);
434
        common_forgetme();
435
        return null;
436 437
    }

438
    // successful!
439 440 441 442
    $result = $rm->delete();

    if (!$result) {
        common_log_db_error($rm, 'DELETE', __FILE__);
443
        common_log(LOG_WARNING, 'Could not delete rememberme: ' . $code);
444
        common_forgetme();
445
        return null;
446 447 448 449
    }

    common_log(LOG_INFO, 'logging in ' . $user->nickname . ' using rememberme code ' . $rm->code);

450
    common_set_user($user);
451 452
    common_real_login(false);

453 454
    // We issue a new cookie, so they can log in
    // automatically again after this session
455 456 457

    common_rememberme($user);

458
    return $user;
459 460
}

461 462 463
/**
 * must be called with a valid user!
 */
464 465
function common_forgetme()
{
466
    common_set_cookie(REMEMBERME, '', 0);
467 468
}

469 470 471
/**
 * Who is the current user?
 */
472 473
function common_current_user()
{
474 475
    global $_cur;

476 477 478 479
    if (!_have_config()) {
        return null;
    }

480 481
    if ($_cur === false) {

482 483
        if (isset($_COOKIE[session_name()]) || isset($_GET[session_name()])
            || (isset($_SESSION['userid']) && $_SESSION['userid'])) {
484 485 486
            common_ensure_session();
            $id = isset($_SESSION['userid']) ? $_SESSION['userid'] : false;
            if ($id) {
487 488 489 490 491
                $user = User::staticGet($id);
                if ($user) {
                	$_cur = $user;
                	return $_cur;
                }
492 493 494
            }
        }

495
        // that didn't work; try to remember; will init $_cur to null on failure
496 497 498
        $_cur = common_remembered_user();

        if ($_cur) {
499
            // XXX: Is this necessary?
500 501 502 503
            $_SESSION['userid'] = $_cur->id;
        }
    }

504
    return $_cur;
505 506
}

507 508 509 510 511
/**
 * Logins that are 'remembered' aren't 'real' -- they're subject to
 * cookie-stealing. So, we don't let them do certain things. New reg,
 * OpenID, and password logins _are_ real.
 */
512 513
function common_real_login($real=true)
{
514 515
    common_ensure_session();
    $_SESSION['real_login'] = $real;
516 517
}

518 519
function common_is_real_login()
{
520
    return common_logged_in() && $_SESSION['real_login'];
521 522
}

523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545
/**
 * Get a hash portion for HTTP caching Etags and such including
 * info on the current user's session. If login/logout state changes,
 * or we've changed accounts, or we've renamed the current user,
 * we'll get a new hash value.
 *
 * This should not be considered secure information.
 *
 * @param User $user (optional; uses common_current_user() if left out)
 * @return string
 */
function common_user_cache_hash($user=false)
{
    if ($user === false) {
        $user = common_current_user();
    }
    if ($user) {
        return crc32($user->id . ':' . $user->nickname);
    } else {
        return '0';
    }
}

546 547 548 549 550
/**
 * get canonical version of nickname for comparison
 *
 * @param string $nickname
 * @return string
551 552 553
 *
 * @throws NicknameException on invalid input
 * @deprecated call Nickname::normalize() directly.
554
 */
555 556
function common_canonical_nickname($nickname)
{
557
    return Nickname::normalize($nickname);
558 559
}

560 561 562 563 564 565 566 567 568
/**
 * get canonical version of email for comparison
 *
 * @fixme actually normalize
 * @fixme reject invalid input
 *
 * @param string $email
 * @return string
 */
569 570
function common_canonical_email($email)
{
571 572 573
    // XXX: canonicalize UTF-8
    // XXX: lcase the domain part
    return $email;
574 575
}

576 577 578 579 580 581 582
/**
 * Partial notice markup rendering step: build links to !group references.
 *
 * @param string $text partially rendered HTML
 * @param Notice $notice in whose context we're working
 * @return string partially rendered HTML
 */
583 584
function common_render_content($text, $notice)
{
585 586
    $r = common_render_text($text);
    $id = $notice->profile_id;
587
    $r = common_linkify_mentions($r, $notice);
588 589
    $r = preg_replace('/(^|[\s\.\,\:\;]+)!(' . Nickname::DISPLAY_FMT . ')/e',
                      "'\\1!'.common_group_link($id, '\\2')", $r);
590
    return $r;
591 592
}

593 594 595 596 597 598 599 600 601 602
/**
 * Finds @-mentions within the partially-rendered text section and
 * turns them into live links.
 *
 * Should generally not be called except from common_render_content().
 *
 * @param string $text partially-rendered HTML
 * @param Notice $notice in-progress or complete Notice object for context
 * @return string partially-rendered HTML
 */
603
function common_linkify_mentions($text, $notice)
604
{
605
    $mentions = common_find_mentions($text, $notice);
606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646

    // We need to go through in reverse order by position,
    // so our positions stay valid despite our fudging with the
    // string!

    $points = array();

    foreach ($mentions as $mention)
    {
        $points[$mention['position']] = $mention;
    }

    krsort($points);

    foreach ($points as $position => $mention) {

        $linkText = common_linkify_mention($mention);

        $text = substr_replace($text, $linkText, $position, mb_strlen($mention['text']));
    }

    return $text;
}

function common_linkify_mention($mention)
{
    $output = null;

    if (Event::handle('StartLinkifyMention', array($mention, &$output))) {

        $xs = new XMLStringer(false);

        $attrs = array('href' => $mention['url'],
                       'class' => 'url');

        if (!empty($mention['title'])) {
            $attrs['title'] = $mention['title'];
        }

        $xs->elementStart('span', 'vcard');
        $xs->elementStart('a', $attrs);
647
        $xs->element('span', 'fn nickname mention', $mention['text']);
648 649 650 651 652 653 654 655 656 657 658
        $xs->elementEnd('a');
        $xs->elementEnd('span');

        $output = $xs->getString();

        Event::handle('EndLinkifyMention', array($mention, &$output));
    }

    return $output;
}

659
/**
660 661 662 663 664 665
 * Find @-mentions in the given text, using the given notice object as context.
 * References will be resolved with common_relative_profile() against the user
 * who posted the notice.
 *
 * Note the return data format is internal, to be used for building links and
 * such. Should not be used directly; rather, call common_linkify_mentions().
666 667 668
 *
 * @param string $text
 * @param Notice $notice notice in whose context we're building links
669
 *
670
 * @return array
671 672
 *
 * @access private
673
 */
674
function common_find_mentions($text, $notice)
675 676 677
{
    $mentions = array();

678
    $sender = Profile::staticGet('id', $notice->profile_id);
679 680 681 682 683 684

    if (empty($sender)) {
        return $mentions;
    }

    if (Event::handle('StartFindMentions', array($sender, $text, &$mentions))) {
685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707
        // Get the context of the original notice, if any
        $originalAuthor   = null;
        $originalNotice   = null;
        $originalMentions = array();

        // Is it a reply?

        if (!empty($notice) && !empty($notice->reply_to)) {
            $originalNotice = Notice::staticGet('id', $notice->reply_to);
            if (!empty($originalNotice)) {
                $originalAuthor = Profile::staticGet('id', $originalNotice->profile_id);

                $ids = $originalNotice->getReplies();

                foreach ($ids as $id) {
                    $repliedTo = Profile::staticGet('id', $id);
                    if (!empty($repliedTo)) {
                        $originalMentions[$repliedTo->nickname] = $repliedTo;
                    }
                }
            }
        }

708
        $matches = common_find_mentions_raw($text);
709 710

        foreach ($matches as $match) {
711 712 713 714 715 716
            try {
                $nickname = Nickname::normalize($match[0]);
            } catch (NicknameException $e) {
                // Bogus match? Drop it.
                continue;
            }
717 718 719 720 721 722 723 724 725

            // Try to get a profile for this nickname.
            // Start with conversation context, then go to
            // sender context.

            if (!empty($originalAuthor) && $originalAuthor->nickname == $nickname) {
                $mentioned = $originalAuthor;
            } else if (!empty($originalMentions) &&
                       array_key_exists($nickname, $originalMentions)) {
726
                $mentioned = $originalMentions[$nickname];
727 728 729
            } else {
                $mentioned = common_relative_profile($sender, $nickname);
            }
730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762

            if (!empty($mentioned)) {
                $user = User::staticGet('id', $mentioned->id);

                if ($user) {
                    $url = common_local_url('userbyid', array('id' => $user->id));
                } else {
                    $url = $mentioned->profileurl;
                }

                $mention = array('mentioned' => array($mentioned),
                                 'text' => $match[0],
                                 'position' => $match[1],
                                 'url' => $url);

                if (!empty($mentioned->fullname)) {
                    $mention['title'] = $mentioned->fullname;
                }

                $mentions[] = $mention;
            }
        }

        // @#tag => mention of all subscriptions tagged 'tag'

        preg_match_all('/(?:^|[\s\.\,\:\;]+)@#([\pL\pN_\-\.]{1,64})/',
                       $text,
                       $hmatches,
                       PREG_OFFSET_CAPTURE);

        foreach ($hmatches[1] as $hmatch) {

            $tag = common_canonical_tag($hmatch[0]);
763 764 765 766 767 768 769 770 771 772 773 774 775
            $plist = Profile_list::getByTaggerAndTag($sender->id, $tag);
            if (!empty($plist) && !$plist->private) {
                $tagged = $sender->getTaggedSubscribers($tag);

                $url = common_local_url('showprofiletag',
                                        array('tagger' => $sender->nickname,
                                              'tag' => $tag));

                $mentions[] = array('mentioned' => $tagged,
                                    'text' => $hmatch[0],
                                    'position' => $hmatch[1],
                                    'url' => $url);
            }
776 777 778 779 780 781 782 783
        }

        Event::handle('EndFindMentions', array($sender, $text, &$mentions));
    }

    return $mentions;
}

784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808
/**
 * Does the actual regex pulls to find @-mentions in text.
 * Should generally not be called directly; for use in common_find_mentions.
 *
 * @param string $text
 * @return array of PCRE match arrays
 */
function common_find_mentions_raw($text)
{
    $tmatches = array();
    preg_match_all('/^T (' . Nickname::DISPLAY_FMT . ') /',
                   $text,
                   $tmatches,
                   PREG_OFFSET_CAPTURE);

    $atmatches = array();
    preg_match_all('/(?:^|\s+)@(' . Nickname::DISPLAY_FMT . ')\b/',
                   $text,
                   $atmatches,
                   PREG_OFFSET_CAPTURE);

    $matches = array_merge($tmatches[1], $atmatches[1]);
    return $matches;
}

809 810
function common_render_text($text)
{
811
    $r = htmlspecialchars($text);
812

813
    $r = preg_replace('/[\x{0}-\x{8}\x{b}-\x{c}\x{e}-\x{19}]/', '', $r);
814
    $r = common_replace_urls_callback($r, 'common_linkify');
815
    $r = preg_replace('/(^|\&quot\;|\'|\(|\[|\{|\s+)#([\pL\pN_\-\.]{1,64})/ue', "'\\1#'.common_tag_link('\\2')", $r);
816 817
    // XXX: machine tags
    return $r;
Evan Prodromou's avatar
Evan Prodromou committed
818 819
}

820 821 822 823 824 825 826 827
/**
 * Find links in the given text and pass them to the given callback function.
 *
 * @param string $text
 * @param function($text, $arg) $callback: return replacement text
 * @param mixed $arg: optional argument will be passed on to the callback
 */
function common_replace_urls_callback($text, $callback, $arg = null) {
828
    // Start off with a regex
829
    $regex = '#'.
830
    '(?:^|[\s\<\>\(\)\[\]\{\}\\\'\\\";]+)(?![\@\!\#])'.
831
    '('.
832
        '(?:'.
833 834
            '(?:'. //Known protocols
                '(?:'.
835
                    '(?:(?:https?|ftps?|mms|rtsp|gopher|news|nntp|telnet|wais|file|prospero|webcal|irc)://)'.
836
                    '|'.
837 838
                    '(?:(?:mailto|aim|tel|xmpp):)'.
                ')'.
839
                '(?:[\pN\pL\-\_\+\%\~]+(?::[\pN\pL\-\_\+\%\~]+)?\@)?'. //user:pass@
840 841 842 843 844 845
                '(?:'.
                    '(?:'.
                        '\[[\pN\pL\-\_\:\.]+(?<![\.\:])\]'. //[dns]
                    ')|(?:'.
                        '[\pN\pL\-\_\:\.]+(?<![\.\:])'. //dns
                    ')'.
846
                ')'.
847 848
            ')'.
            '|(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)'. //IPv4
849
            '|(?:'. //IPv6
850
                '\[?(?:(?:(?:[0-9A-Fa-f]{1,4}:){7}(?:(?:[0-9A-Fa-f]{1,4})|:))|(?:(?:[0-9A-Fa-f]{1,4}:){6}(?::|(?:(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})(?:\.(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})|(?::[0-9A-Fa-f]{1,4})))|(?:(?:[0-9A-Fa-f]{1,4}:){5}(?:(?::(?:(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})(?:\.(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})?)|(?:(?::[0-9A-Fa-f]{1,4}){1,2})))|(?:(?:[0-9A-Fa-f]{1,4}:){4}(?::[0-9A-Fa-f]{1,4}){0,1}(?:(?::(?:(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})(?:\.(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})?)|(?:(?::[0-9A-Fa-f]{1,4}){1,2})))|(?:(?:[0-9A-Fa-f]{1,4}:){3}(?::[0-9A-Fa-f]{1,4}){0,2}(?:(?::(?:(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})(?:\.(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})?)|(?:(?::[0-9A-Fa-f]{1,4}){1,2})))|(?:(?:[0-9A-Fa-f]{1,4}:){2}(?::[0-9A-Fa-f]{1,4}){0,3}(?:(?::(?:(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})(?:\.(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})?)|(?:(?::[0-9A-Fa-f]{1,4}){1,2})))|(?:(?:[0-9A-Fa-f]{1,4}:)(?::[0-9A-Fa-f]{1,4}){0,4}(?:(?::(?:(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})(?:\.(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})?)|(?:(?::[0-9A-Fa-f]{1,4}){1,2})))|(?::(?::[0-9A-Fa-f]{1,4}){0,5}(?:(?::(?:(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})(?:\.(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})?)|(?:(?::[0-9A-Fa-f]{1,4}){1,2})))|(?:(?:(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})(?:\.(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})))\]?(?<!:)'.
851
            ')|(?:'. //DNS
852
                '(?:[\pN\pL\-\_\+\%\~]+(?:\:[\pN\pL\-\_\+\%\~]+)?\@)?'. //user:pass@
853 854
                '[\pN\pL\-\_]+(?:\.[\pN\pL\-\_]+)*\.'.
                //tld list from http://data.iana.org/TLD/tlds-alpha-by-domain.txt, also added local, loc, and onion
855
                '(?:AC|AD|AE|AERO|AF|AG|AI|AL|AM|AN|AO|AQ|AR|ARPA|AS|ASIA|AT|AU|AW|AX|AZ|BA|BB|BD|BE|BF|BG|BH|BI|BIZ|BJ|BM|BN|BO|BR|BS|BT|BV|BW|BY|BZ|CA|CAT|CC|CD|CF|CG|CH|CI|CK|CL|CM|CN|CO|COM|COOP|CR|CU|CV|CX|CY|CZ|DE|DJ|DK|DM|DO|DZ|EC|EDU|EE|EG|ER|ES|ET|EU|FI|FJ|FK|FM|FO|FR|GA|GB|GD|GE|GF|GG|GH|GI|GL|GM|GN|GOV|GP|GQ|GR|GS|GT|GU|GW|GY|HK|HM|HN|HR|HT|HU|ID|IE|IL|IM|IN|INFO|INT|IO|IQ|IR|IS|IT|JE|JM|JO|JOBS|JP|KE|KG|KH|KI|KM|KN|KP|KR|KW|KY|KZ|LA|LB|LC|LI|LK|LR|LS|LT|LU|LV|LY|MA|MC|MD|ME|MG|MH|MIL|MK|ML|MM|MN|MO|MOBI|MP|MQ|MR|MS|MT|MU|MUSEUM|MV|MW|MX|MY|MZ|NA|NAME|NC|NE|NET|NF|NG|NI|NL|NO|NP|NR|NU|NZ|OM|ORG|PA|PE|PF|PG|PH|PK|PL|PM|PN|PR|PRO|PS|PT|PW|PY|QA|RE|RO|RS|RU|RW|SA|SB|SC|SD|SE|SG|SH|SI|SJ|SK|SL|SM|SN|SO|SR|ST|SU|SV|SY|SZ|TC|TD|TEL|TF|TG|TH|TJ|TK|TL|TM|TN|TO|TP|TR|TRAVEL|TT|TV|TW|TZ|UA|UG|UK|US|UY|UZ|VA|VC|VE|VG|VI|VN|VU|WF|WS|XN--0ZWM56D|测试|XN--11B5BS3A9AJ6G|परीक्षा|XN--80AKHBYKNJ4F|испытание|XN--9T4B11YI5A|테스트|XN--DEBA0AD|טעסט|XN--G6W251D|測試|XN--HGBK6AJ7F53BBA|آزمایشی|XN--HLCJ6AYA9ESC7A|பரிட்சை|XN--JXALPDLP|δοκιμή|XN--KGBECHTV|إختبار|XN--ZCKZAH|テスト|YE|YT|YU|ZA|ZM|ZW|local|loc|onion)'.
856
            ')(?![\pN\pL\-\_])'.
857
        ')'.
858
        '(?:'.
859
            '(?:\:\d+)?'. //:port
860 861 862
            '(?:/[\pN\pL$\,\!\(\)\.\:\-\_\+\/\=\&\;\%\~\*\$\+\'@]*)?'. // /path
            '(?:\?[\pN\pL\$\,\!\(\)\.\:\-\_\+\/\=\&\;\%\~\*\$\+\'@\/]*)?'. // ?query string
            '(?:\#[\pN\pL$\,\!\(\)\.\:\-\_\+\/\=\&\;\%\~\*\$\+\'\@/\?\#]*)?'. // #fragment
863
        ')(?<![\?\.\,\#\,])'.
864
    ')'.
865
    '#ixu';
866
    //preg_match_all($regex,$text,$matches);
867
    //print_r($matches);
868
    return preg_replace_callback($regex, curry('callback_helper',$callback,$arg) ,$text);
869
}
870

871 872 873 874 875 876 877 878
/**
 * Intermediate callback for common_replace_links(), helps resolve some
 * ambiguous link forms before passing on to the final callback.
 *
 * @param array $matches
 * @param callable $callback
 * @param mixed $arg optional argument to pass on as second param to callback
 * @return string
879
 *
880 881 882
 * @access private
 */
function callback_helper($matches, $callback, $arg=null) {
883
    $url=$matches[1];
884 885
    $left = strpos($matches[0],$url);
    $right = $left+strlen($url);
886

887 888 889 890 891 892 893 894 895 896 897 898
    $groupSymbolSets=array(
        array(
            'left'=>'(',
            'right'=>')'
        ),
        array(
            'left'=>'[',
            'right'=>']'
        ),
        array(
            'left'=>'{',
            'right'=>'}'
899 900 901 902
        ),
        array(
            'left'=>'<',
            'right'=>'>'
903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923
        )
    );
    $cannotEndWith=array('.','?',',','#');
    $original_url=$url;
    do{
        $original_url=$url;
        foreach($groupSymbolSets as $groupSymbolSet){
            if(substr($url,-1)==$groupSymbolSet['right']){
                $group_left_count = substr_count($url,$groupSymbolSet['left']);
                $group_right_count = substr_count($url,$groupSymbolSet['right']);
                if($group_left_count<$group_right_count){
                    $right-=1;
                    $url=substr($url,0,-1);
                }
            }
        }
        if(in_array(substr($url,-1),$cannotEndWith)){
            $right-=1;
            $url=substr($url,0,-1);
        }
    }while($original_url!=$url);
924

925
    $result = call_user_func_array($callback, array($url, $arg));
926
    return substr($matches[0],0,$left) . $result . substr($matches[0],$right);
927
}
928

929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945
if (version_compare(PHP_VERSION, '5.3.0', 'ge')) {
    // lambda implementation in a separate file; PHP 5.2 won't parse it.
    require_once INSTALLDIR . "/lib/curry.php";
} else {
    function curry($fn) {
        $args = func_get_args();
        array_shift($args);
        $id = uniqid('_partial');
        $GLOBALS[$id] = array($fn, $args);
        return create_function('',
                               '$args = func_get_args(); '.
                               'return call_user_func_array('.
                               '$GLOBALS["'.$id.'"][0],'.
                               'array_merge('.
                               '$args,'.
                               '$GLOBALS["'.$id.'"][1]));');
    }
946 947 948
}

function common_linkify($url) {
Evan Prodromou's avatar
Evan Prodromou committed
949 950 951
    // It comes in special'd, so we unspecial it before passing to the stringifying
    // functions
    $url = htmlspecialchars_decode($url);
952

953 954 955 956 957
    if (strpos($url, '@') !== false && strpos($url, ':') === false && Validate::email($url)) {
        //url is an email address without the mailto: protocol
        $canon = "mailto:$url";
        $longurl = "mailto:$url";
    } else {
958

959
        $canon = File_redirection::_canonUrl($url);
960

961
        $longurl_data = File_redirection::where($canon, common_config('attachments', 'process_links'));
962 963 964 965 966
        if (is_array($longurl_data)) {
            $longurl = $longurl_data['url'];
        } elseif (is_string($longurl_data)) {
            $longurl = $longurl_data;
        } else {
967 968 969 970
            // Unable to reach the server to verify contents, etc
            // Just pass the link on through for now.
            common_log(LOG_ERR, "Can't linkify url '$url'");
            $longurl = $url;
971
        }
972
    }
973 974

    $attrs = array('href' => $canon, 'title' => $longurl);
975

976 977 978 979
    $is_attachment = false;
    $attachment_id = null;
    $has_thumb = false;

980
    // Check to see whether this is a known "attachment" URL.
981

982
    $f = File::staticGet('url', $longurl);
983

984
    if (empty($f)) {
985 986 987 988
        if (common_config('attachments', 'process_links')) {
            // XXX: this writes to the database. :<
            $f = File::processNew($longurl);
        }
989 990
    }

991
    if (!empty($f)) {
992
        if ($f->getEnclosure()) {
993
            $is_attachment = true;
994
            $attachment_id = $f->id;
995 996 997 998

            $thumb = File_thumbnail::staticGet('file_id', $f->id);
            if (!empty($thumb)) {
                $has_thumb = true;
999
            }
1000 1001 1002 1003 1004 1005 1006
        }
    }

    // Add clippy
    if ($is_attachment) {
        $attrs['class'] = 'attachment';
        if ($has_thumb) {
1007 1008
            $attrs['class'] = 'attachment thumbnail';
        }
1009
        $attrs['id'] = "attachment-{$attachment_id}";
1010
    }
1011

1012 1013 1014 1015 1016 1017 1018 1019 1020 1021
    // Whether to nofollow

    $nf = common_config('nofollow', 'external');

    if ($nf == 'never') {
        $attrs['rel'] = 'external';
    } else {
        $attrs['rel'] = 'nofollow external';
    }

1022
    return XMLStringer::estring('a', $attrs, $url);
1023 1024
}

1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041
/**
 * Find and shorten links in a given chunk of text if it's longer than the
 * configured notice content limit (or unconditionally).
 *
 * Side effects: may save file and file_redirection records for referenced URLs.
 *
 * Pass the $user option or call $user->shortenLinks($text) to ensure the proper
 * user's options are used; otherwise the current web session user's setitngs
 * will be used or ur1.ca if there is no active web login.
 *
 * @param string $text
 * @param boolean $always (optional)
 * @param User $user (optional)
 *
 * @return string
 */
function common_shorten_links($text, $always = false, User $user=null)
1042
{
1043 1044 1045 1046
    $user = common_current_user();

    $maxLength = User_urlshortener_prefs::maxNoticeLength($user);

Brion Vibber's avatar
Brion Vibber committed
1047
    if ($always || mb_strlen($text) > $maxLength) {
Brion Vibber's avatar
Brion Vibber committed
1048
        return common_replace_urls_callback($text, array('File_redirection', 'forceShort'), $user);
1049
    } else {
Brion Vibber's avatar
Brion Vibber committed
1050
        return common_replace_urls_callback($text, array('File_redirection', 'makeShort'), $user);
1051
    }
1052 1053
}

1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066
/**
 * Very basic stripping of invalid UTF-8 input text.
 *
 * @param string $str
 * @return mixed string or null if invalid input
 *
 * @todo ideally we should drop bad chars, and maybe do some of the checks
 *       from common_xml_safe_str. But we can't strip newlines, etc.
 * @todo Unicode normalization might also be useful, but not needed now.
 */
function common_validate_utf8($str)
{
    // preg_replace will return NULL on invalid UTF-8 input.
1067 1068 1069 1070 1071 1072
    //
    // Note: empty regex //u also caused NULL return on some
    // production machines, but none of our test machines.
    //
    // This should be replaced with a more reliable check.
    return preg_replace('/\x00/u', '', $str);
1073 1074 1075 1076 1077 1078 1079 1080
}

/**
 * Make sure an arbitrary string is safe for output in XML as a single line.
 *
 * @param string $str
 * @return string
 */
1081 1082
function common_xml_safe_str($str)
{
1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104
    // Replace common eol and extra whitespace input chars
    $unWelcome = array(
        "\t",  // tab
        "\n",  // newline
        "\r",  // cr
        "\0",  // null byte eos
        "\x0B" // vertical tab
    );

    $replacement = array(
        ' ', // single space
        ' ',
        '',  // nothing
        '',
        ' '
    );

    $str = str_replace($unWelcome, $replacement, $str);

    // Neutralize any additional control codes and UTF-16 surrogates
    // (Twitter uses '*')
    return preg_replace('/[\p{Cc}\p{Cs}]/u', '*', $str);
1105 1106
}

1107 1108
function common_tag_link($tag)
{
1109
    $canonical = common_canonical_tag($tag);
1110 1111
    if (common_config('singleuser', 'enabled')) {
        // regular TagAction isn't set up in 1user mode
1112
        $nickname = User::singleUserNickname();
1113
        $url = common_local_url