We are no longer offering accounts on this server. Consider https://gitlab.freedesktop.org/ as a place to host projects.

util.php 63.1 KB
Newer Older
1
<?php
Evan Prodromou's avatar
Evan Prodromou committed
2
/*
3
 * StatusNet - the distributed open-source microblogging tool
4
 * Copyright (C) 2008, 2009, StatusNet, Inc.
Evan Prodromou's avatar
Evan Prodromou committed
5
 *
6 7 8 9
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
Evan Prodromou's avatar
Evan Prodromou committed
10
 *
11 12 13 14
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
Evan Prodromou's avatar
Evan Prodromou committed
15
 *
16 17 18 19
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

20
/* XXX: break up into separate modules (HTTP, user, files) */
21

22 23 24
/**
 * Show a server error.
 */
25 26
function common_server_error($msg, $code=500)
{
27 28
    $err = new ServerErrorAction($msg, $code);
    $err->showPage();
29 30
}

31 32 33
/**
 * Show a user error.
 */
34 35
function common_user_error($msg, $code=400)
{
36 37
    $err = new ClientErrorAction($msg, $code);
    $err->showPage();
38 39
}

40 41 42
/**
 * This should only be used at setup; processes switching languages
 * to send text to other users should use common_switch_locale().
43
 *
44 45 46 47
 * @param string $language Locale language code (optional; empty uses
 *                         current user's preference or site default)
 * @return mixed success
 */
48 49
function common_init_locale($language=null)
{
50 51 52 53 54
    if(!$language) {
        $language = common_language();
    }
    putenv('LANGUAGE='.$language);
    putenv('LANG='.$language);
55
    $ok =  setlocale(LC_ALL, $language . ".utf8",
56 57 58 59
                     $language . ".UTF8",
                     $language . ".utf-8",
                     $language . ".UTF-8",
                     $language);
60 61

    return $ok;
62 63
}

64 65 66
/**
 * Initialize locale and charset settings and gettext with our message catalog,
 * using the current user's language preference or the site default.
67
 *
68 69
 * This should generally only be run at framework initialization; code switching
 * languages at runtime should call common_switch_language().
70
 *
71 72
 * @access private
 */
73 74
function common_init_language()
{
75
    mb_internal_encoding('UTF-8');
76

77 78
    // Note that this setlocale() call may "fail" but this is harmless;
    // gettext will still select the right language.
79 80
    $language = common_language();
    $locale_set = common_init_locale($language);
Evan Prodromou's avatar
Evan Prodromou committed
81

82 83 84 85 86 87 88 89 90 91 92 93
    if (!$locale_set) {
        // The requested locale doesn't exist on the system.
        //
        // gettext seems very picky... We first need to setlocale()
        // to a locale which _does_ exist on the system, and _then_
        // we can set in another locale that may not be set up
        // (say, ga_ES for Galego/Galician) it seems to take it.
        //
        // For some reason C and POSIX which are guaranteed to work
        // don't do the job. en_US.UTF-8 should be there most of the
        // time, but not guaranteed.
        $ok = common_init_locale("en_US");
94 95
        if (!$ok && strtolower(substr(PHP_OS, 0, 3)) != 'win') {
            // Try to find a complete, working locale on Unix/Linux...
96 97 98 99 100 101 102 103 104 105 106
            // @fixme shelling out feels awfully inefficient
            // but I don't think there's a more standard way.
            $all = `locale -a`;
            foreach (explode("\n", $all) as $locale) {
                if (preg_match('/\.utf[-_]?8$/i', $locale)) {
                    $ok = setlocale(LC_ALL, $locale);
                    if ($ok) {
                        break;
                    }
                }
            }
107 108 109
        }
        if (!$ok) {
            common_log(LOG_ERR, "Unable to find a UTF-8 locale on this system; UI translations may not work.");
110 111 112 113
        }
        $locale_set = common_init_locale($language);
    }

114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139
    common_init_gettext();
}

/**
 * @access private
 */
function common_init_gettext()
{
    setlocale(LC_CTYPE, 'C');
    // So we do not have to make people install the gettext locales
    $path = common_config('site','locale_path');
    bindtextdomain("statusnet", $path);
    bind_textdomain_codeset("statusnet", "UTF-8");
    textdomain("statusnet");
}

/**
 * Switch locale during runtime, and poke gettext until it cries uncle.
 * Otherwise, sometimes it doesn't actually switch away from the old language.
 *
 * @param string $language code for locale ('en', 'fr', 'pt_BR' etc)
 */
function common_switch_locale($language=null)
{
    common_init_locale($language);

140
    setlocale(LC_CTYPE, 'C');
Siebrand Mazeland's avatar
Siebrand Mazeland committed
141
    // So we do not have to make people install the gettext locales
142 143
    $path = common_config('site','locale_path');
    bindtextdomain("statusnet", $path);
144 145
    bind_textdomain_codeset("statusnet", "UTF-8");
    textdomain("statusnet");
146 147
}

148 149
function common_timezone()
{
150 151 152 153 154 155
    if (common_logged_in()) {
        $user = common_current_user();
        if ($user->timezone) {
            return $user->timezone;
        }
    }
156

157
    return common_config('site', 'timezone');
158 159
}

160 161
function common_language()
{
162 163
    // If there is a user logged in and they've set a language preference
    // then return that one...
164
    if (_have_config() && common_logged_in()) {
165 166
        $user = common_current_user();
        $user_language = $user->language;
167 168 169 170 171 172 173 174 175 176

        if ($user->language) {
            // Validate -- we don't want to end up with a bogus code
            // left over from some old junk.
            foreach (common_config('site', 'languages') as $code => $info) {
                if ($info['lang'] == $user_language) {
                    return $user_language;
                }
            }
        }
177
    }
178

179 180
    // Otherwise, find the best match for the languages requested by the
    // user's browser...
Brion Vibber's avatar
Brion Vibber committed
181 182 183 184 185 186 187
    if (common_config('site', 'langdetect')) {
        $httplang = isset($_SERVER['HTTP_ACCEPT_LANGUAGE']) ? $_SERVER['HTTP_ACCEPT_LANGUAGE'] : null;
        if (!empty($httplang)) {
            $language = client_prefered_language($httplang);
            if ($language)
              return $language;
        }
188
    }
189

190 191
    // Finally, if none of the above worked, use the site's default...
    return common_config('site', 'language');
192
}
193

194 195 196
/**
 * Salted, hashed passwords are stored in the DB.
 */
197 198
function common_munge_password($password, $id)
{
199 200 201 202 203
    if (is_object($id) || is_object($password)) {
        $e = new Exception();
        common_log(LOG_ERR, __METHOD__ . ' object in param to common_munge_password ' .
                   str_replace("\n", " ", $e->getTraceAsString()));
    }
204
    return md5($password . $id);
205 206
}

207 208 209
/**
 * Check if a username exists and has matching password.
 */
210 211
function common_check_user($nickname, $password)
{
212 213 214 215 216
    // empty nickname always unacceptable
    if (empty($nickname)) {
        return false;
    }

217 218 219
    $authenticatedUser = false;

    if (Event::handle('StartCheckPassword', array($nickname, $password, &$authenticatedUser))) {
220
        $user = User::staticGet('nickname', common_canonical_nickname($nickname));
221 222
        if (!empty($user)) {
            if (!empty($password)) { // never allow login with blank password
Craig Andrews's avatar
Craig Andrews committed
223 224 225
                if (0 == strcmp(common_munge_password($password, $user->id),
                                $user->password)) {
                    //internal checking passed
226
                    $authenticatedUser = $user;
Craig Andrews's avatar
Craig Andrews committed
227
                }
Craig Andrews's avatar
Craig Andrews committed
228 229
            }
        }
230
        Event::handle('EndCheckPassword', array($nickname, $password, $authenticatedUser));
231
    }
232 233

    return $authenticatedUser;
234 235
}

236 237 238
/**
 * Is the current user logged in?
 */
239 240
function common_logged_in()
{
241
    return (!is_null(common_current_user()));
242 243
}

244 245
function common_have_session()
{
246
    return (0 != strcmp(session_id(), ''));
247 248
}

249 250
function common_ensure_session()
{
Evan Prodromou's avatar
Evan Prodromou committed
251
    $c = null;
252
    if (array_key_exists(session_name(), $_COOKIE)) {
Evan Prodromou's avatar
Evan Prodromou committed
253 254
        $c = $_COOKIE[session_name()];
    }
255
    if (!common_have_session()) {
256 257 258
        if (common_config('sessions', 'handle')) {
            Session::setSaveHandler();
        }
Evan Prodromou's avatar
Evan Prodromou committed
259 260 261 262 263 264 265 266
	if (array_key_exists(session_name(), $_GET)) {
	    $id = $_GET[session_name()];
	} else if (array_key_exists(session_name(), $_COOKIE)) {
	    $id = $_COOKIE[session_name()];
	}
	if (isset($id)) {
	    session_id($id);
	}
267
        @session_start();
Evan Prodromou's avatar
Evan Prodromou committed
268 269
        if (!isset($_SESSION['started'])) {
            $_SESSION['started'] = time();
Evan Prodromou's avatar
Evan Prodromou committed
270
            if (!empty($id)) {
Evan Prodromou's avatar
Evan Prodromou committed
271 272 273 274
                common_log(LOG_WARNING, 'Session cookie "' . $_COOKIE[session_name()] . '" ' .
                           ' is set but started value is null');
            }
        }
275
    }
276 277
}

278 279 280 281
// Three kinds of arguments:
// 1) a user object
// 2) a nickname
// 3) null to clear
282

283
// Initialize to false; set to null if none found
284 285
$_cur = false;

286 287
function common_set_user($user)
{
288 289
    global $_cur;

290 291 292 293 294 295 296 297 298 299 300 301
    if (is_null($user) && common_have_session()) {
        $_cur = null;
        unset($_SESSION['userid']);
        return true;
    } else if (is_string($user)) {
        $nickname = $user;
        $user = User::staticGet('nickname', $nickname);
    } else if (!($user instanceof User)) {
        return false;
    }

    if ($user) {
Craig Andrews's avatar
Craig Andrews committed
302 303 304 305 306 307 308 309 310
        if (Event::handle('StartSetUser', array(&$user))) {
            if($user){
                common_ensure_session();
                $_SESSION['userid'] = $user->id;
                $_cur = $user;
                Event::handle('EndSetUser', array($user));
                return $_cur;
            }
        }
311 312
    }
    return false;
313 314
}

315 316
function common_set_cookie($key, $value, $expiration=0)
{
317 318
    $path = common_config('site', 'path');
    $server = common_config('site', 'server');
319

320 321 322 323 324 325 326 327 328
    if ($path && ($path != '/')) {
        $cookiepath = '/' . $path . '/';
    } else {
        $cookiepath = '/';
    }
    return setcookie($key,
                     $value,
                     $expiration,
                     $cookiepath,
329 330
                     $server,
                     common_config('site', 'ssl')=='always');
331 332 333
}

define('REMEMBERME', 'rememberme');
334
define('REMEMBERME_EXPIRY', 30 * 24 * 60 * 60); // 30 days
335

336 337
function common_rememberme($user=null)
{
338 339 340 341 342 343
    if (!$user) {
        $user = common_current_user();
        if (!$user) {
            return false;
        }
    }
344

345
    $rm = new Remember_me();
346

347 348
    $rm->code = common_good_rand(16);
    $rm->user_id = $user->id;
349

350
    // Wrap the insert in some good ol' fashioned transaction code
351 352 353

    $rm->query('BEGIN');

354
    $result = $rm->insert();
355

356 357 358
    if (!$result) {
        common_log_db_error($rm, 'INSERT', __FILE__);
        return false;
359 360
    }

361 362
    $rm->query('COMMIT');

363 364
    $cookieval = $rm->user_id . ':' . $rm->code;

365
    common_log(LOG_INFO, 'adding rememberme cookie "' . $cookieval . '" for ' . $user->nickname);
366

367
    common_set_cookie(REMEMBERME, $cookieval, time() + REMEMBERME_EXPIRY);
368

369
    return true;
370 371
}

372 373
function common_remembered_user()
{
374
    $user = null;
375

376
    $packed = isset($_COOKIE[REMEMBERME]) ? $_COOKIE[REMEMBERME] : null;
377

378 379
    if (!$packed) {
        return null;
380 381 382 383 384
    }

    list($id, $code) = explode(':', $packed);

    if (!$id || !$code) {
385
        common_log(LOG_WARNING, 'Malformed rememberme cookie: ' . $packed);
386
        common_forgetme();
387
        return null;
388 389 390 391 392
    }

    $rm = Remember_me::staticGet($code);

    if (!$rm) {
393
        common_log(LOG_WARNING, 'No such remember code: ' . $code);
394
        common_forgetme();
395
        return null;
396 397 398
    }

    if ($rm->user_id != $id) {
399
        common_log(LOG_WARNING, 'Rememberme code for wrong user: ' . $rm->user_id . ' != ' . $id);
400
        common_forgetme();
401
        return null;
402 403 404 405 406
    }

    $user = User::staticGet($rm->user_id);

    if (!$user) {
407
        common_log(LOG_WARNING, 'No such user for rememberme: ' . $rm->user_id);
408
        common_forgetme();
409
        return null;
410 411
    }

412
    // successful!
413 414 415 416
    $result = $rm->delete();

    if (!$result) {
        common_log_db_error($rm, 'DELETE', __FILE__);
417
        common_log(LOG_WARNING, 'Could not delete rememberme: ' . $code);
418
        common_forgetme();
419
        return null;
420 421 422 423
    }

    common_log(LOG_INFO, 'logging in ' . $user->nickname . ' using rememberme code ' . $rm->code);

424
    common_set_user($user);
425 426
    common_real_login(false);

427 428
    // We issue a new cookie, so they can log in
    // automatically again after this session
429 430 431

    common_rememberme($user);

432
    return $user;
433 434
}

435 436 437
/**
 * must be called with a valid user!
 */
438 439
function common_forgetme()
{
440
    common_set_cookie(REMEMBERME, '', 0);
441 442
}

443 444 445
/**
 * Who is the current user?
 */
446 447
function common_current_user()
{
448 449
    global $_cur;

450 451 452 453
    if (!_have_config()) {
        return null;
    }

454 455
    if ($_cur === false) {

456 457
        if (isset($_COOKIE[session_name()]) || isset($_GET[session_name()])
            || (isset($_SESSION['userid']) && $_SESSION['userid'])) {
458 459 460
            common_ensure_session();
            $id = isset($_SESSION['userid']) ? $_SESSION['userid'] : false;
            if ($id) {
461 462 463 464 465
                $user = User::staticGet($id);
                if ($user) {
                	$_cur = $user;
                	return $_cur;
                }
466 467 468
            }
        }

469
        // that didn't work; try to remember; will init $_cur to null on failure
470 471 472
        $_cur = common_remembered_user();

        if ($_cur) {
473
            // XXX: Is this necessary?
474 475 476 477
            $_SESSION['userid'] = $_cur->id;
        }
    }

478
    return $_cur;
479 480
}

481 482 483 484 485
/**
 * Logins that are 'remembered' aren't 'real' -- they're subject to
 * cookie-stealing. So, we don't let them do certain things. New reg,
 * OpenID, and password logins _are_ real.
 */
486 487
function common_real_login($real=true)
{
488 489
    common_ensure_session();
    $_SESSION['real_login'] = $real;
490 491
}

492 493
function common_is_real_login()
{
494
    return common_logged_in() && $_SESSION['real_login'];
495 496
}

497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519
/**
 * Get a hash portion for HTTP caching Etags and such including
 * info on the current user's session. If login/logout state changes,
 * or we've changed accounts, or we've renamed the current user,
 * we'll get a new hash value.
 *
 * This should not be considered secure information.
 *
 * @param User $user (optional; uses common_current_user() if left out)
 * @return string
 */
function common_user_cache_hash($user=false)
{
    if ($user === false) {
        $user = common_current_user();
    }
    if ($user) {
        return crc32($user->id . ':' . $user->nickname);
    } else {
        return '0';
    }
}

520 521 522 523 524
/**
 * get canonical version of nickname for comparison
 *
 * @param string $nickname
 * @return string
525 526 527
 *
 * @throws NicknameException on invalid input
 * @deprecated call Nickname::normalize() directly.
528
 */
529 530
function common_canonical_nickname($nickname)
{
531
    return Nickname::normalize($nickname);
532 533
}

534 535 536 537 538 539 540 541 542
/**
 * get canonical version of email for comparison
 *
 * @fixme actually normalize
 * @fixme reject invalid input
 *
 * @param string $email
 * @return string
 */
543 544
function common_canonical_email($email)
{
545 546 547
    // XXX: canonicalize UTF-8
    // XXX: lcase the domain part
    return $email;
548 549
}

550 551 552 553 554 555 556
/**
 * Partial notice markup rendering step: build links to !group references.
 *
 * @param string $text partially rendered HTML
 * @param Notice $notice in whose context we're working
 * @return string partially rendered HTML
 */
557 558
function common_render_content($text, $notice)
{
559 560
    $r = common_render_text($text);
    $id = $notice->profile_id;
561
    $r = common_linkify_mentions($r, $notice);
562 563
    $r = preg_replace('/(^|[\s\.\,\:\;]+)!(' . Nickname::DISPLAY_FMT . ')/e',
                      "'\\1!'.common_group_link($id, '\\2')", $r);
564
    return $r;
565 566
}

567
function common_linkify_mentions($text, $notice)
568
{
569
    $mentions = common_find_mentions($text, $notice);
570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622

    // We need to go through in reverse order by position,
    // so our positions stay valid despite our fudging with the
    // string!

    $points = array();

    foreach ($mentions as $mention)
    {
        $points[$mention['position']] = $mention;
    }

    krsort($points);

    foreach ($points as $position => $mention) {

        $linkText = common_linkify_mention($mention);

        $text = substr_replace($text, $linkText, $position, mb_strlen($mention['text']));
    }

    return $text;
}

function common_linkify_mention($mention)
{
    $output = null;

    if (Event::handle('StartLinkifyMention', array($mention, &$output))) {

        $xs = new XMLStringer(false);

        $attrs = array('href' => $mention['url'],
                       'class' => 'url');

        if (!empty($mention['title'])) {
            $attrs['title'] = $mention['title'];
        }

        $xs->elementStart('span', 'vcard');
        $xs->elementStart('a', $attrs);
        $xs->element('span', 'fn nickname', $mention['text']);
        $xs->elementEnd('a');
        $xs->elementEnd('span');

        $output = $xs->getString();

        Event::handle('EndLinkifyMention', array($mention, &$output));
    }

    return $output;
}

623
/**
624 625 626 627 628 629
 * Find @-mentions in the given text, using the given notice object as context.
 * References will be resolved with common_relative_profile() against the user
 * who posted the notice.
 *
 * Note the return data format is internal, to be used for building links and
 * such. Should not be used directly; rather, call common_linkify_mentions().
630 631 632
 *
 * @param string $text
 * @param Notice $notice notice in whose context we're building links
633
 *
634
 * @return array
635 636
 *
 * @access private
637
 */
638
function common_find_mentions($text, $notice)
639 640 641
{
    $mentions = array();

642
    $sender = Profile::staticGet('id', $notice->profile_id);
643 644 645 646 647 648

    if (empty($sender)) {
        return $mentions;
    }

    if (Event::handle('StartFindMentions', array($sender, $text, &$mentions))) {
649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671
        // Get the context of the original notice, if any
        $originalAuthor   = null;
        $originalNotice   = null;
        $originalMentions = array();

        // Is it a reply?

        if (!empty($notice) && !empty($notice->reply_to)) {
            $originalNotice = Notice::staticGet('id', $notice->reply_to);
            if (!empty($originalNotice)) {
                $originalAuthor = Profile::staticGet('id', $originalNotice->profile_id);

                $ids = $originalNotice->getReplies();

                foreach ($ids as $id) {
                    $repliedTo = Profile::staticGet('id', $id);
                    if (!empty($repliedTo)) {
                        $originalMentions[$repliedTo->nickname] = $repliedTo;
                    }
                }
            }
        }

672
        preg_match_all('/^T (' . Nickname::DISPLAY_FMT . ') /',
673 674 675 676
                       $text,
                       $tmatches,
                       PREG_OFFSET_CAPTURE);

677
        preg_match_all('/(?:^|\s+)@(' . Nickname::DISPLAY_FMT . ')\b/',
678 679 680 681 682 683 684
                       $text,
                       $atmatches,
                       PREG_OFFSET_CAPTURE);

        $matches = array_merge($tmatches[1], $atmatches[1]);

        foreach ($matches as $match) {
685 686 687 688 689 690
            try {
                $nickname = Nickname::normalize($match[0]);
            } catch (NicknameException $e) {
                // Bogus match? Drop it.
                continue;
            }
691 692 693 694 695 696 697 698 699

            // Try to get a profile for this nickname.
            // Start with conversation context, then go to
            // sender context.

            if (!empty($originalAuthor) && $originalAuthor->nickname == $nickname) {
                $mentioned = $originalAuthor;
            } else if (!empty($originalMentions) &&
                       array_key_exists($nickname, $originalMentions)) {
700
                $mentioned = $originalMentions[$nickname];
701 702 703
            } else {
                $mentioned = common_relative_profile($sender, $nickname);
            }
704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755

            if (!empty($mentioned)) {
                $user = User::staticGet('id', $mentioned->id);

                if ($user) {
                    $url = common_local_url('userbyid', array('id' => $user->id));
                } else {
                    $url = $mentioned->profileurl;
                }

                $mention = array('mentioned' => array($mentioned),
                                 'text' => $match[0],
                                 'position' => $match[1],
                                 'url' => $url);

                if (!empty($mentioned->fullname)) {
                    $mention['title'] = $mentioned->fullname;
                }

                $mentions[] = $mention;
            }
        }

        // @#tag => mention of all subscriptions tagged 'tag'

        preg_match_all('/(?:^|[\s\.\,\:\;]+)@#([\pL\pN_\-\.]{1,64})/',
                       $text,
                       $hmatches,
                       PREG_OFFSET_CAPTURE);

        foreach ($hmatches[1] as $hmatch) {

            $tag = common_canonical_tag($hmatch[0]);

            $tagged = Profile_tag::getTagged($sender->id, $tag);

            $url = common_local_url('subscriptions',
                                    array('nickname' => $sender->nickname,
                                          'tag' => $tag));

            $mentions[] = array('mentioned' => $tagged,
                                'text' => $hmatch[0],
                                'position' => $hmatch[1],
                                'url' => $url);
        }

        Event::handle('EndFindMentions', array($sender, $text, &$mentions));
    }

    return $mentions;
}

756 757
function common_render_text($text)
{
758
    $r = htmlspecialchars($text);
759

760
    $r = preg_replace('/[\x{0}-\x{8}\x{b}-\x{c}\x{e}-\x{19}]/', '', $r);
761
    $r = common_replace_urls_callback($r, 'common_linkify');
Craig Andrews's avatar
Craig Andrews committed
762
    $r = preg_replace('/(^|\&quot\;|\'|\(|\[|\{|\s+)#([\pL\pN_\-\.]{1,64})/e', "'\\1#'.common_tag_link('\\2')", $r);
763 764
    // XXX: machine tags
    return $r;
Evan Prodromou's avatar
Evan Prodromou committed
765 766
}

767
function common_replace_urls_callback($text, $callback, $notice_id = null) {
768
    // Start off with a regex
769
    $regex = '#'.
770
    '(?:^|[\s\<\>\(\)\[\]\{\}\\\'\\\";]+)(?![\@\!\#])'.
771
    '('.
772
        '(?:'.
773 774
            '(?:'. //Known protocols
                '(?:'.
775
                    '(?:(?:https?|ftps?|mms|rtsp|gopher|news|nntp|telnet|wais|file|prospero|webcal|irc)://)'.
776
                    '|'.
777 778
                    '(?:(?:mailto|aim|tel|xmpp):)'.
                ')'.
779
                '(?:[\pN\pL\-\_\+\%\~]+(?::[\pN\pL\-\_\+\%\~]+)?\@)?'. //user:pass@
780 781 782 783 784 785
                '(?:'.
                    '(?:'.
                        '\[[\pN\pL\-\_\:\.]+(?<![\.\:])\]'. //[dns]
                    ')|(?:'.
                        '[\pN\pL\-\_\:\.]+(?<![\.\:])'. //dns
                    ')'.
786
                ')'.
787 788
            ')'.
            '|(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)'. //IPv4
789
            '|(?:'. //IPv6
790
                '\[?(?:(?:(?:[0-9A-Fa-f]{1,4}:){7}(?:(?:[0-9A-Fa-f]{1,4})|:))|(?:(?:[0-9A-Fa-f]{1,4}:){6}(?::|(?:(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})(?:\.(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})|(?::[0-9A-Fa-f]{1,4})))|(?:(?:[0-9A-Fa-f]{1,4}:){5}(?:(?::(?:(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})(?:\.(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})?)|(?:(?::[0-9A-Fa-f]{1,4}){1,2})))|(?:(?:[0-9A-Fa-f]{1,4}:){4}(?::[0-9A-Fa-f]{1,4}){0,1}(?:(?::(?:(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})(?:\.(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})?)|(?:(?::[0-9A-Fa-f]{1,4}){1,2})))|(?:(?:[0-9A-Fa-f]{1,4}:){3}(?::[0-9A-Fa-f]{1,4}){0,2}(?:(?::(?:(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})(?:\.(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})?)|(?:(?::[0-9A-Fa-f]{1,4}){1,2})))|(?:(?:[0-9A-Fa-f]{1,4}:){2}(?::[0-9A-Fa-f]{1,4}){0,3}(?:(?::(?:(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})(?:\.(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})?)|(?:(?::[0-9A-Fa-f]{1,4}){1,2})))|(?:(?:[0-9A-Fa-f]{1,4}:)(?::[0-9A-Fa-f]{1,4}){0,4}(?:(?::(?:(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})(?:\.(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})?)|(?:(?::[0-9A-Fa-f]{1,4}){1,2})))|(?::(?::[0-9A-Fa-f]{1,4}){0,5}(?:(?::(?:(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})(?:\.(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})?)|(?:(?::[0-9A-Fa-f]{1,4}){1,2})))|(?:(?:(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})(?:\.(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})))\]?(?<!:)'.
791
            ')|(?:'. //DNS
792
                '(?:[\pN\pL\-\_\+\%\~]+(?:\:[\pN\pL\-\_\+\%\~]+)?\@)?'. //user:pass@
793 794
                '[\pN\pL\-\_]+(?:\.[\pN\pL\-\_]+)*\.'.
                //tld list from http://data.iana.org/TLD/tlds-alpha-by-domain.txt, also added local, loc, and onion
795
                '(?:AC|AD|AE|AERO|AF|AG|AI|AL|AM|AN|AO|AQ|AR|ARPA|AS|ASIA|AT|AU|AW|AX|AZ|BA|BB|BD|BE|BF|BG|BH|BI|BIZ|BJ|BM|BN|BO|BR|BS|BT|BV|BW|BY|BZ|CA|CAT|CC|CD|CF|CG|CH|CI|CK|CL|CM|CN|CO|COM|COOP|CR|CU|CV|CX|CY|CZ|DE|DJ|DK|DM|DO|DZ|EC|EDU|EE|EG|ER|ES|ET|EU|FI|FJ|FK|FM|FO|FR|GA|GB|GD|GE|GF|GG|GH|GI|GL|GM|GN|GOV|GP|GQ|GR|GS|GT|GU|GW|GY|HK|HM|HN|HR|HT|HU|ID|IE|IL|IM|IN|INFO|INT|IO|IQ|IR|IS|IT|JE|JM|JO|JOBS|JP|KE|KG|KH|KI|KM|KN|KP|KR|KW|KY|KZ|LA|LB|LC|LI|LK|LR|LS|LT|LU|LV|LY|MA|MC|MD|ME|MG|MH|MIL|MK|ML|MM|MN|MO|MOBI|MP|MQ|MR|MS|MT|MU|MUSEUM|MV|MW|MX|MY|MZ|NA|NAME|NC|NE|NET|NF|NG|NI|NL|NO|NP|NR|NU|NZ|OM|ORG|PA|PE|PF|PG|PH|PK|PL|PM|PN|PR|PRO|PS|PT|PW|PY|QA|RE|RO|RS|RU|RW|SA|SB|SC|SD|SE|SG|SH|SI|SJ|SK|SL|SM|SN|SO|SR|ST|SU|SV|SY|SZ|TC|TD|TEL|TF|TG|TH|TJ|TK|TL|TM|TN|TO|TP|TR|TRAVEL|TT|TV|TW|TZ|UA|UG|UK|US|UY|UZ|VA|VC|VE|VG|VI|VN|VU|WF|WS|XN--0ZWM56D|测试|XN--11B5BS3A9AJ6G|परीक्षा|XN--80AKHBYKNJ4F|испытание|XN--9T4B11YI5A|테스트|XN--DEBA0AD|טעסט|XN--G6W251D|測試|XN--HGBK6AJ7F53BBA|آزمایشی|XN--HLCJ6AYA9ESC7A|பரிட்சை|XN--JXALPDLP|δοκιμή|XN--KGBECHTV|إختبار|XN--ZCKZAH|テスト|YE|YT|YU|ZA|ZM|ZW|local|loc|onion)'.
796
            ')(?![\pN\pL\-\_])'.
797
        ')'.
798
        '(?:'.
799
            '(?:\:\d+)?'. //:port
800 801 802
            '(?:/[\pN\pL$\,\!\(\)\.\:\-\_\+\/\=\&\;\%\~\*\$\+\'@]*)?'. // /path
            '(?:\?[\pN\pL\$\,\!\(\)\.\:\-\_\+\/\=\&\;\%\~\*\$\+\'@\/]*)?'. // ?query string
            '(?:\#[\pN\pL$\,\!\(\)\.\:\-\_\+\/\=\&\;\%\~\*\$\+\'\@/\?\#]*)?'. // #fragment
803
        ')(?<![\?\.\,\#\,])'.
804
    ')'.
805
    '#ixu';
806
    //preg_match_all($regex,$text,$matches);
807
    //print_r($matches);
Evan Prodromou's avatar
Evan Prodromou committed
808
    return preg_replace_callback($regex, curry('callback_helper',$callback,$notice_id) ,$text);
809
}
810

811
function callback_helper($matches, $callback, $notice_id) {
812
    $url=$matches[1];
813 814
    $left = strpos($matches[0],$url);
    $right = $left+strlen($url);
815

816 817 818 819 820 821 822 823 824 825 826 827
    $groupSymbolSets=array(
        array(
            'left'=>'(',
            'right'=>')'
        ),
        array(
            'left'=>'[',
            'right'=>']'
        ),
        array(
            'left'=>'{',
            'right'=>'}'
828 829 830 831
        ),
        array(
            'left'=>'<',
            'right'=>'>'
832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852
        )
    );
    $cannotEndWith=array('.','?',',','#');
    $original_url=$url;
    do{
        $original_url=$url;
        foreach($groupSymbolSets as $groupSymbolSet){
            if(substr($url,-1)==$groupSymbolSet['right']){
                $group_left_count = substr_count($url,$groupSymbolSet['left']);
                $group_right_count = substr_count($url,$groupSymbolSet['right']);
                if($group_left_count<$group_right_count){
                    $right-=1;
                    $url=substr($url,0,-1);
                }
            }
        }
        if(in_array(substr($url,-1),$cannotEndWith)){
            $right-=1;
            $url=substr($url,0,-1);
        }
    }while($original_url!=$url);
853

854
    if(empty($notice_id)){
855
        $result = call_user_func_array($callback, array($url));
856
    }else{
Craig Andrews's avatar
Craig Andrews committed
857
        $result = call_user_func_array($callback, array(array($url,$notice_id)) );
858
    }
859
    return substr($matches[0],0,$left) . $result . substr($matches[0],$right);
860
}
861

862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878
if (version_compare(PHP_VERSION, '5.3.0', 'ge')) {
    // lambda implementation in a separate file; PHP 5.2 won't parse it.
    require_once INSTALLDIR . "/lib/curry.php";
} else {
    function curry($fn) {
        $args = func_get_args();
        array_shift($args);
        $id = uniqid('_partial');
        $GLOBALS[$id] = array($fn, $args);
        return create_function('',
                               '$args = func_get_args(); '.
                               'return call_user_func_array('.
                               '$GLOBALS["'.$id.'"][0],'.
                               'array_merge('.
                               '$args,'.
                               '$GLOBALS["'.$id.'"][1]));');
    }
879 880 881
}

function common_linkify($url) {
Evan Prodromou's avatar
Evan Prodromou committed
882 883 884
    // It comes in special'd, so we unspecial it before passing to the stringifying
    // functions
    $url = htmlspecialchars_decode($url);
885

886
   if(strpos($url, '@') !== false && strpos($url, ':') === false) {
887
       //url is an email address without the mailto: protocol
888 889 890
       $canon = "mailto:$url";
       $longurl = "mailto:$url";
   }else{
891

892
        $canon = File_redirection::_canonUrl($url);
893

894
        $longurl_data = File_redirection::where($canon, common_config('attachments', 'process_links'));
895 896 897 898 899
        if (is_array($longurl_data)) {
            $longurl = $longurl_data['url'];
        } elseif (is_string($longurl_data)) {
            $longurl = $longurl_data;
        } else {
900 901 902 903
            // Unable to reach the server to verify contents, etc
            // Just pass the link on through for now.
            common_log(LOG_ERR, "Can't linkify url '$url'");
            $longurl = $url;
904
        }
905
    }
906 907

    $attrs = array('href' => $canon, 'title' => $longurl);
908

909 910 911 912
    $is_attachment = false;
    $attachment_id = null;
    $has_thumb = false;

913
    // Check to see whether this is a known "attachment" URL.
914

915
    $f = File::staticGet('url', $longurl);
916

917
    if (empty($f)) {
918 919 920 921
        if (common_config('attachments', 'process_links')) {
            // XXX: this writes to the database. :<
            $f = File::processNew($longurl);
        }
922 923
    }

924
    if (!empty($f)) {
925
        if ($f->getEnclosure()) {
926
            $is_attachment = true;
927
            $attachment_id = $f->id;
928 929 930 931

            $thumb = File_thumbnail::staticGet('file_id', $f->id);
            if (!empty($thumb)) {
                $has_thumb = true;
932
            }
933 934 935 936 937 938 939
        }
    }

    // Add clippy
    if ($is_attachment) {
        $attrs['class'] = 'attachment';
        if ($has_thumb) {
940 941
            $attrs['class'] = 'attachment thumbnail';
        }
942
        $attrs['id'] = "attachment-{$attachment_id}";
943
    }
944

945 946 947 948 949 950 951 952 953 954
    // Whether to nofollow

    $nf = common_config('nofollow', 'external');

    if ($nf == 'never') {
        $attrs['rel'] = 'external';
    } else {
        $attrs['rel'] = 'nofollow external';
    }

955
    return XMLStringer::estring('a', $attrs, $url);
956 957
}

958
function common_shorten_links($text, $always = false)
959
{
960
    $maxLength = Notice::maxContent();
961
    if (!$always && ($maxLength == 0 || mb_strlen($text) <= $maxLength)) return $text;
962
    return common_replace_urls_callback($text, array('File_redirection', 'makeShort'));
963 964
}

965 966 967 968 969 970 971 972 973 974 975 976 977
/**
 * Very basic stripping of invalid UTF-8 input text.
 *
 * @param string $str
 * @return mixed string or null if invalid input
 *
 * @todo ideally we should drop bad chars, and maybe do some of the checks
 *       from common_xml_safe_str. But we can't strip newlines, etc.
 * @todo Unicode normalization might also be useful, but not needed now.
 */
function common_validate_utf8($str)
{
    // preg_replace will return NULL on invalid UTF-8 input.
978 979 980 981 982 983
    //
    // Note: empty regex //u also caused NULL return on some
    // production machines, but none of our test machines.
    //
    // This should be replaced with a more reliable check.
    return preg_replace('/\x00/u', '', $str);
984 985 986 987 988 989 990 991
}

/**
 * Make sure an arbitrary string is safe for output in XML as a single line.
 *
 * @param string $str
 * @return string
 */
992 993
function common_xml_safe_str($str)
{
994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015
    // Replace common eol and extra whitespace input chars
    $unWelcome = array(
        "\t",  // tab
        "\n",  // newline
        "\r",  // cr
        "\0",  // null byte eos
        "\x0B" // vertical tab
    );

    $replacement = array(
        ' ', // single space
        ' ',
        '',  // nothing
        '',
        ' '
    );

    $str = str_replace($unWelcome, $replacement, $str);

    // Neutralize any additional control codes and UTF-16 surrogates
    // (Twitter uses '*')
    return preg_replace('/[\p{Cc}\p{Cs}]/u', '*', $str);
1016 1017
}

1018 1019
function common_tag_link($tag)
{
1020
    $canonical = common_canonical_tag($tag);
1021 1022
    if (common_config('singleuser', 'enabled')) {
        // regular TagAction isn't set up in 1user mode
1023
        $user = User::singleUser();
1024
        $url = common_local_url('showstream',
1025
                                array('nickname' => $user->nickname,
1026 1027 1028 1029
                                      'tag' => $canonical));
    } else {
        $url = common_local_url('tag', array('tag' => $canonical));
    }
1030 1031 1032 1033 1034
    $xs = new XMLStringer();
    $xs->elementStart('span', 'tag');
    $xs->element('a', array('href' => $url,
                            'rel' => 'tag'),
                 $tag);
1035
    $xs->elementEnd('span');
1036
    return $xs->getString();
1037 1038
}

1039 1040
function common_canonical_tag($tag)
{
1041 1042
  $tag = mb_convert_case($tag, MB_CASE_LOWER, "UTF-8");
  return str_replace(array('-', '_', '.'), '', $tag);
Mike Cochrane's avatar
Mike Cochrane committed
1043 1044
}

1045 1046
function common_valid_profile_tag($str)
{
1047
    return preg_match('/^[A-Za-z0-9_\-\.]{1,64}$/', $str);
1048 1049
}

1050 1051 1052 1053 1054 1055 1056
/**
 *
 * @param <type> $sender_id
 * @param <type> $nickname
 * @return <type>
 * @access private
 */
1057 1058 1059
function common_group_link($sender_id, $nickname)
{
    $sender = Profile::staticGet($sender_id);
1060
    $group = User_group::getForNickname($nickname, $sender);
1061
    if ($sender && $group && $sender->isMember($group)) {
1062 1063 1064
        $attrs = array('href' => $group->permalink(),
                       'class' => 'url');
        if (!empty($group->fullname)) {
1065
            $attrs['title'] = $group->getFancyName();
1066
        }
1067 1068
        $xs = new XMLStringer();
        $xs->elementStart('span', 'vcard');
1069
        $xs->elementStart('a', $attrs);
1070 1071 1072 1073
        $xs->element('span', 'fn nickname', $nickname);
        $xs->elementEnd('a');
        $xs->elementEnd('span');
        return $xs->getString();