discovery.php 12.2 KB
Newer Older
1 2 3 4 5
<?php
/**
 * StatusNet - the distributed open-source microblogging tool
 * Copyright (C) 2010, StatusNet, Inc.
 *
Evan Prodromou's avatar
Evan Prodromou committed
6
 * Use Hammer discovery stack to find out interesting things about an URI
7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
 *
 * PHP version 5
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
Evan Prodromou's avatar
Evan Prodromou committed
23
 * @category  Discovery
24 25 26 27 28 29 30
 * @package   StatusNet
 * @author    James Walker <james@status.net>
 * @copyright 2010 StatusNet, Inc.
 * @license   http://www.fsf.org/licensing/licenses/agpl-3.0.html AGPL 3.0
 * @link      http://status.net/
 */

31 32 33 34
if (!defined('STATUSNET')) {
    exit(1);
}

35 36 37 38
/**
 * This class implements LRDD-based service discovery based on the "Hammer Draft"
 * (including webfinger)
 *
Evan Prodromou's avatar
Evan Prodromou committed
39 40 41 42 43 44 45 46
 * @category  Discovery
 * @package   StatusNet
 * @author    James Walker <james@status.net>
 * @copyright 2010 StatusNet, Inc.
 * @license   http://www.fsf.org/licensing/licenses/agpl-3.0.html AGPL 3.0
 * @link      http://status.net/
 *
 * @see       http://groups.google.com/group/webfinger/browse_thread/thread/9f3d93a479e91bbf
47 48 49
 */
class Discovery
{
Evan Prodromou's avatar
Evan Prodromou committed
50
    const LRDD_REL    = 'lrdd';
51 52
    const PROFILEPAGE = 'http://webfinger.net/rel/profile-page';
    const UPDATESFROM = 'http://schemas.google.com/g/2010#updates-from';
Evan Prodromou's avatar
Evan Prodromou committed
53
    const HCARD       = 'http://microformats.org/profile/hcard';
54

55 56
    public $methods = array();

Evan Prodromou's avatar
Evan Prodromou committed
57 58 59 60 61 62 63 64
    /**
     * Constructor for a discovery object
     *
     * Registers different discovery methods.
     *
     * @return Discovery this
     */

65 66 67 68 69 70 71
    public function __construct()
    {
        $this->registerMethod('Discovery_LRDD_Host_Meta');
        $this->registerMethod('Discovery_LRDD_Link_Header');
        $this->registerMethod('Discovery_LRDD_Link_HTML');
    }

Evan Prodromou's avatar
Evan Prodromou committed
72 73
    /**
     * Register a discovery class
74
     *
Evan Prodromou's avatar
Evan Prodromou committed
75 76 77 78
     * @param string $class Class name
     *
     * @return void
     */
79 80 81 82
    public function registerMethod($class)
    {
        $this->methods[] = $class;
    }
83

84 85 86
    /**
     * Given a "user id" make sure it's normalized to either a webfinger
     * acct: uri or a profile HTTP URL.
Evan Prodromou's avatar
Evan Prodromou committed
87 88 89 90
     *
     * @param string $user_id User ID to normalize
     *
     * @return string normalized acct: or http(s)?: URI
91 92 93 94 95 96 97 98 99
     */
    public static function normalize($user_id)
    {
        if (substr($user_id, 0, 5) == 'http:' ||
            substr($user_id, 0, 6) == 'https:' ||
            substr($user_id, 0, 5) == 'acct:') {
            return $user_id;
        }

Evan Prodromou's avatar
Evan Prodromou committed
100
        if (strpos($user_id, '@') !== false) {
101 102 103 104 105 106
            return 'acct:' . $user_id;
        }

        return 'http://' . $user_id;
    }

Evan Prodromou's avatar
Evan Prodromou committed
107 108 109 110 111 112 113 114 115
    /**
     * Determine if a string is a Webfinger ID
     *
     * Webfinger IDs look like foo@example.com or acct:foo@example.com
     *
     * @param string $user_id ID to check
     *
     * @return boolean true if $user_id is a Webfinger, else false
     */
116 117 118
    public static function isWebfinger($user_id)
    {
        $uri = Discovery::normalize($user_id);
119

120 121 122 123
        return (substr($uri, 0, 5) == 'acct:');
    }

    /**
Evan Prodromou's avatar
Evan Prodromou committed
124 125 126 127 128
     * Given a user ID, return the first available XRD
     *
     * @param string $id User ID URI
     *
     * @return XRD XRD object for the user
129 130 131 132 133 134 135 136 137 138
     */
    public function lookup($id)
    {
        // Normalize the incoming $id to make sure we have a uri
        $uri = $this->normalize($id);

        foreach ($this->methods as $class) {
            $links = call_user_func(array($class, 'discover'), $uri);
            if ($link = Discovery::getService($links, Discovery::LRDD_REL)) {
                // Load the LRDD XRD
139
                if (!empty($link['template'])) {
140 141 142 143
                    $xrd_uri = Discovery::applyTemplate($link['template'], $uri);
                } else {
                    $xrd_uri = $link['href'];
                }
144

145 146 147 148 149 150 151
                $xrd = $this->fetchXrd($xrd_uri);
                if ($xrd) {
                    return $xrd;
                }
            }
        }

152
        // TRANS: Exception. %s is an ID.
Evan Prodromou's avatar
Evan Prodromou committed
153
        throw new Exception(sprintf(_('Unable to find services for %s.'), $id));
154 155
    }

Evan Prodromou's avatar
Evan Prodromou committed
156 157 158 159 160 161 162 163 164 165
    /**
     * Given an array of links, returns the matching service
     *
     * @param array  $links   Links to check
     * @param string $service Service to find
     *
     * @return array $link assoc array representing the link
     */
    public static function getService($links, $service)
    {
James Walker's avatar
James Walker committed
166 167 168
        if (!is_array($links)) {
            return false;
        }
169

170 171 172 173 174 175 176
        foreach ($links as $link) {
            if ($link['rel'] == $service) {
                return $link;
            }
        }
    }

Evan Prodromou's avatar
Evan Prodromou committed
177 178 179 180 181 182 183 184 185 186
    /**
     * Apply a template using an ID
     *
     * Replaces {uri} in template string with the ID given.
     *
     * @param string $template Template to match
     * @param string $id       User ID to replace with
     *
     * @return string replaced values
     */
187 188 189 190 191 192 193
    public static function applyTemplate($template, $id)
    {
        $template = str_replace('{uri}', urlencode($id), $template);

        return $template;
    }

Evan Prodromou's avatar
Evan Prodromou committed
194 195 196 197
    /**
     * Fetch an XRD file and parse
     *
     * @param string $url URL of the XRD
198
     *
Evan Prodromou's avatar
Evan Prodromou committed
199 200
     * @return XRD object representing the XRD file
     */
201 202 203
    public static function fetchXrd($url)
    {
        try {
Evan Prodromou's avatar
Evan Prodromou committed
204
            $client   = new HTTPClient();
205 206 207 208 209 210 211 212 213 214
            $response = $client->get($url);
        } catch (HTTP_Request2_Exception $e) {
            return false;
        }

        if ($response->getStatus() != 200) {
            return false;
        }

        return XRD::parse($response->getBody());
215
    }
216 217
}

Evan Prodromou's avatar
Evan Prodromou committed
218 219 220 221 222 223 224 225 226 227 228 229 230
/**
 * Abstract interface for discovery
 *
 * Objects that implement this interface can retrieve an array of
 * XRD links for the URI.
 *
 * @category  Discovery
 * @package   StatusNet
 * @author    James Walker <james@status.net>
 * @copyright 2010 StatusNet, Inc.
 * @license   http://www.fsf.org/licensing/licenses/agpl-3.0.html AGPL 3.0
 * @link      http://status.net/
 */
231 232
interface Discovery_LRDD
{
Evan Prodromou's avatar
Evan Prodromou committed
233 234 235 236 237 238 239
    /**
     * Discover interesting info about the URI
     *
     * @param string $uri URI to inquire about
     *
     * @return array Links in the XRD file
     */
James Walker's avatar
typo  
James Walker committed
240
    public function discover($uri);
241 242
}

Evan Prodromou's avatar
Evan Prodromou committed
243 244 245 246 247 248 249 250 251 252 253 254 255
/**
 * Implementation of discovery using host-meta file
 *
 * Discovers XRD file for a user by going to the organization's
 * host-meta file and trying to find a template for LRDD.
 *
 * @category  Discovery
 * @package   StatusNet
 * @author    James Walker <james@status.net>
 * @copyright 2010 StatusNet, Inc.
 * @license   http://www.fsf.org/licensing/licenses/agpl-3.0.html AGPL 3.0
 * @link      http://status.net/
 */
256 257
class Discovery_LRDD_Host_Meta implements Discovery_LRDD
{
Evan Prodromou's avatar
Evan Prodromou committed
258 259 260 261 262 263 264 265 266 267
    /**
     * Discovery core method
     *
     * For Webfinger and HTTP URIs, fetch the host-meta file
     * and look for LRDD templates
     *
     * @param string $uri URI to inquire about
     *
     * @return array Links in the XRD file
     */
James Walker's avatar
typo  
James Walker committed
268
    public function discover($uri)
269
    {
270 271 272 273 274
        if (Discovery::isWebfinger($uri)) {
            // We have a webfinger acct: - start with host-meta
            list($name, $domain) = explode('@', $uri);
        } else {
            $domain = parse_url($uri, PHP_URL_HOST);
275
        }
276

277 278 279 280 281 282 283 284 285 286
        $url = 'http://'. $domain .'/.well-known/host-meta';

        $xrd = Discovery::fetchXrd($url);

        if ($xrd) {
            return $xrd->links;
        }
    }
}

Evan Prodromou's avatar
Evan Prodromou committed
287 288 289 290 291 292 293 294 295 296 297 298 299
/**
 * Implementation of discovery using HTTP Link header
 *
 * Discovers XRD file for a user by fetching the URL and reading any
 * Link: headers in the HTTP response.
 *
 * @category  Discovery
 * @package   StatusNet
 * @author    James Walker <james@status.net>
 * @copyright 2010 StatusNet, Inc.
 * @license   http://www.fsf.org/licensing/licenses/agpl-3.0.html AGPL 3.0
 * @link      http://status.net/
 */
300 301
class Discovery_LRDD_Link_Header implements Discovery_LRDD
{
Evan Prodromou's avatar
Evan Prodromou committed
302 303 304 305 306 307 308 309 310
    /**
     * Discovery core method
     *
     * For HTTP IDs fetch the URL and look for Link headers.
     *
     * @param string $uri URI to inquire about
     *
     * @return array Links in the XRD file
     *
311
     * @todo fail out of Webfinger URIs faster
Evan Prodromou's avatar
Evan Prodromou committed
312
     */
313 314 315
    public function discover($uri)
    {
        try {
Evan Prodromou's avatar
Evan Prodromou committed
316
            $client   = new HTTPClient();
317
            $response = $client->get($uri);
318 319 320
        } catch (HTTP_Request2_Exception $e) {
            return false;
        }
321

322 323 324 325 326 327
        if ($response->getStatus() != 200) {
            return false;
        }

        $link_header = $response->getHeader('Link');
        if (!$link_header) {
328
            //            return false;
329
        }
330

331
        return array(Discovery_LRDD_Link_Header::parseHeader($link_header));
332 333
    }

Evan Prodromou's avatar
Evan Prodromou committed
334 335 336 337
    /**
     * Given a string or array of headers, returns XRD-like assoc array
     *
     * @param string|array $header string or array of strings for headers
338
     *
Evan Prodromou's avatar
Evan Prodromou committed
339 340
     * @return array Link header in XRD-like format
     */
341 342
    protected static function parseHeader($header)
    {
343
        $lh = new LinkHeader($header);
344

345 346 347
        return array('href' => $lh->href,
                     'rel'  => $lh->rel,
                     'type' => $lh->type);
348 349 350
    }
}

Evan Prodromou's avatar
Evan Prodromou committed
351 352 353 354 355 356 357 358 359 360 361 362 363
/**
 * Implementation of discovery using HTML <link> element
 *
 * Discovers XRD file for a user by fetching the URL and reading any
 * <link> elements in the HTML response.
 *
 * @category  Discovery
 * @package   StatusNet
 * @author    James Walker <james@status.net>
 * @copyright 2010 StatusNet, Inc.
 * @license   http://www.fsf.org/licensing/licenses/agpl-3.0.html AGPL 3.0
 * @link      http://status.net/
 */
364 365
class Discovery_LRDD_Link_HTML implements Discovery_LRDD
{
Evan Prodromou's avatar
Evan Prodromou committed
366 367 368 369 370 371 372 373 374 375
    /**
     * Discovery core method
     *
     * For HTTP IDs, fetch the URL and look for <link> elements
     * in the HTML response.
     *
     * @param string $uri URI to inquire about
     *
     * @return array Links in XRD-ish assoc array
     *
376
     * @todo fail out of Webfinger URIs faster
Evan Prodromou's avatar
Evan Prodromou committed
377
     */
378 379 380
    public function discover($uri)
    {
        try {
Evan Prodromou's avatar
Evan Prodromou committed
381
            $client   = new HTTPClient();
382
            $response = $client->get($uri);
383 384 385 386 387 388 389 390 391 392 393
        } catch (HTTP_Request2_Exception $e) {
            return false;
        }

        if ($response->getStatus() != 200) {
            return false;
        }

        return Discovery_LRDD_Link_HTML::parse($response->getBody());
    }

Evan Prodromou's avatar
Evan Prodromou committed
394 395 396 397 398 399 400 401 402
    /**
     * Parse HTML and return <link> elements
     *
     * Given an HTML string, scans the string for <link> elements
     *
     * @param string $html HTML to scan
     *
     * @return array array of associative arrays in XRD-ish format
     */
403 404 405
    public function parse($html)
    {
        $links = array();
406

407 408
        preg_match('/<head(\s[^>]*)?>(.*?)<\/head>/is', $html, $head_matches);
        $head_html = $head_matches[2];
409

410
        preg_match_all('/<link\s[^>]*>/i', $head_html, $link_matches);
411

412
        foreach ($link_matches[0] as $link_html) {
Evan Prodromou's avatar
Evan Prodromou committed
413 414
            $link_url  = null;
            $link_rel  = null;
415
            $link_type = null;
416

417 418 419 420 421 422
            preg_match('/\srel=(("|\')([^\\2]*?)\\2|[^"\'\s]+)/i', $link_html, $rel_matches);
            if ( isset($rel_matches[3]) ) {
                $link_rel = $rel_matches[3];
            } else if ( isset($rel_matches[1]) ) {
                $link_rel = $rel_matches[1];
            }
423

424 425 426 427 428 429
            preg_match('/\shref=(("|\')([^\\2]*?)\\2|[^"\'\s]+)/i', $link_html, $href_matches);
            if ( isset($href_matches[3]) ) {
                $link_uri = $href_matches[3];
            } else if ( isset($href_matches[1]) ) {
                $link_uri = $href_matches[1];
            }
430

431 432 433 434 435 436
            preg_match('/\stype=(("|\')([^\\2]*?)\\2|[^"\'\s]+)/i', $link_html, $type_matches);
            if ( isset($type_matches[3]) ) {
                $link_type = $type_matches[3];
            } else if ( isset($type_matches[1]) ) {
                $link_type = $type_matches[1];
            }
437

438 439 440 441 442 443
            $links[] = array(
                'href' => $link_url,
                'rel' => $link_rel,
                'type' => $link_type,
            );
        }
444

445 446 447
        return $links;
    }
}