httpclient.php 12.6 KB
Newer Older
1 2
<?php
/**
Evan Prodromou's avatar
Evan Prodromou committed
3
 * StatusNet, the distributed open-source microblogging tool
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
 *
 * Utility for doing HTTP-related things
 *
 * PHP version 5
 *
 * LICENCE: This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 * @category  Action
Evan Prodromou's avatar
Evan Prodromou committed
23
 * @package   StatusNet
24
 * @author    Evan Prodromou <evan@status.net>
Evan Prodromou's avatar
Evan Prodromou committed
25
 * @copyright 2009 StatusNet, Inc.
26
 * @license   http://www.fsf.org/licensing/licenses/agpl-3.0.html GNU Affero General Public License version 3.0
Evan Prodromou's avatar
Evan Prodromou committed
27
 * @link      http://status.net/
28 29
 */

30
if (!defined('GNUSOCIAL')) { exit(1); }
31

32
/**
Evan Prodromou's avatar
Evan Prodromou committed
33
 * Useful structure for HTTP responses
34 35 36 37 38
 *
 * We make HTTP calls in several places, and we have several different
 * ways of doing them. This class hides the specifics of what underlying
 * library (curl or PHP-HTTP or whatever) that's used.
 *
39 40
 * This extends the HTTP_Request2_Response class with methods to get info
 * about any followed redirects.
41 42 43
 * 
 * Originally used the name 'HTTPResponse' to match earlier code, but
 * this conflicts with a class in in the PECL HTTP extension.
44
 *
45
 * @category HTTP
46 47 48 49 50
 * @package StatusNet
 * @author Evan Prodromou <evan@status.net>
 * @author Brion Vibber <brion@status.net>
 * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html GNU Affero General Public License version 3.0
 * @link http://status.net/
51
 */
mmn's avatar
mmn committed
52
class GNUsocial_HTTPResponse extends HTTP_Request2_Response
53
{
54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
    function __construct(HTTP_Request2_Response $response, $url, $redirects=0)
    {
        foreach (get_object_vars($response) as $key => $val) {
            $this->$key = $val;
        }
        $this->url = strval($url);
        $this->redirectCount = intval($redirects);
    }

    /**
     * Get the count of redirects that have been followed, if any.
     * @return int
     */
    function getRedirectCount()
    {
        return $this->redirectCount;
    }

    /**
73
     * Gets the target URL, before any redirects. Use getEffectiveUrl() for final target.
74 75 76 77
     * @return string URL
     */
    function getUrl()
    {
78
        return $this->url;
79 80 81
    }

    /**
82
     * Check if the response is OK, generally a 200 or other 2xx status code.
83 84 85 86
     * @return bool
     */
    function isOk()
    {
87 88
        $status = $this->getStatus();
        return ($status >= 200 && $status < 300);
89
    }
90 91
}

Evan Prodromou's avatar
Evan Prodromou committed
92 93 94 95 96 97 98
/**
 * Utility class for doing HTTP client stuff
 *
 * We make HTTP calls in several places, and we have several different
 * ways of doing them. This class hides the specifics of what underlying
 * library (curl or PHP-HTTP or whatever) that's used.
 *
99 100
 * This extends the PEAR HTTP_Request2 package:
 * - sends StatusNet-specific User-Agent header
101
 * - 'follow_redirects' config option, defaulting on
102 103 104 105
 * - 'max_redirs' config option, defaulting to 10
 * - extended response class adds getRedirectCount() and getUrl() methods
 * - get() and post() convenience functions return body content directly
 *
Evan Prodromou's avatar
Evan Prodromou committed
106
 * @category HTTP
Evan Prodromou's avatar
Evan Prodromou committed
107
 * @package  StatusNet
108
 * @author   Evan Prodromou <evan@status.net>
109
 * @author   Brion Vibber <brion@status.net>
Evan Prodromou's avatar
Evan Prodromou committed
110
 * @license  http://www.fsf.org/licensing/licenses/agpl-3.0.html GNU Affero General Public License version 3.0
Evan Prodromou's avatar
Evan Prodromou committed
111
 * @link     http://status.net/
Evan Prodromou's avatar
Evan Prodromou committed
112 113
 */

114
class HTTPClient extends HTTP_Request2
115
{
Evan Prodromou's avatar
Evan Prodromou committed
116

117
    function __construct($url=null, $method=self::METHOD_GET, $config=array())
118
    {
119
        $this->config['connect_timeout'] = common_config('http', 'connect_timeout') ?: $this->config['connect_timeout'];
120 121
        $this->config['max_redirs'] = 10;
        $this->config['follow_redirects'] = true;
122 123 124 125 126 127 128 129 130
        
        // We've had some issues with keepalive breaking with
        // HEAD requests, such as to youtube which seems to be
        // emitting chunked encoding info for an empty body
        // instead of not emitting anything. This may be a
        // bug on YouTube's end, but the upstream libray
        // ought to be investigated to see if we can handle
        // it gracefully in that case as well.
        $this->config['protocol_version'] = '1.0';
131 132 133 134 135 136 137 138 139 140 141 142 143

        // Default state of OpenSSL seems to have no trusted
        // SSL certificate authorities, which breaks hostname
        // verification and means we have a hard time communicating
        // with other sites' HTTPS interfaces.
        //
        // Turn off verification unless we've configured a CA bundle.
        if (common_config('http', 'ssl_cafile')) {
            $this->config['ssl_cafile'] = common_config('http', 'ssl_cafile');
        } else {
            $this->config['ssl_verify_peer'] = false;
        }

144 145 146 147
        // This means "verify the cert hostname against what we connect to", it does not
        // imply CA trust or anything like that. Just the hostname.
        $this->config['ssl_verify_host'] = common_config('http', 'ssl_verify_host');

148 149 150 151
        if (common_config('http', 'curl') && extension_loaded('curl')) {
            $this->config['adapter'] = 'HTTP_Request2_Adapter_Curl';
        }

152 153 154 155 156 157 158 159
        foreach (array('host', 'port', 'user', 'password', 'auth_scheme') as $cf) {
            $k = 'proxy_'.$cf;
            $v = common_config('http', $k); 
            if (!empty($v)) {
                $this->config[$k] = $v;
            }
        }

160
        parent::__construct($url, $method, $config);
161
        $this->setHeader('User-Agent', self::userAgent());
162 163
    }

164 165 166 167 168
    /**
     * Convenience/back-compat instantiator
     * @return HTTPClient
     */
    public static function start()
169
    {
170
        return new HTTPClient();
171 172
    }

mmn's avatar
mmn committed
173 174 175
    /**
     * Quick static function to GET a URL
     */
176
    public static function quickGet($url, $accept=null, $params=array())
mmn's avatar
mmn committed
177
    {
178 179 180 181 182 183 184 185 186
        if (!empty($params)) {
            $params = http_build_query($params, null, '&');
            if (strpos($url, '?') === false) {
                $url .= '?' . $params;
            } else {
                $url .= '&' . $params;
            }
        }

mmn's avatar
mmn committed
187
        $client = new HTTPClient();
188 189 190
        if (!is_null($accept)) {
            $client->setHeader('Accept', $accept);
        }
mmn's avatar
mmn committed
191 192
        $response = $client->get($url);
        if (!$response->isOk()) {
193
            // TRANS: Exception. %s is the URL we tried to GET.
mmn's avatar
mmn committed
194 195 196 197 198
            throw new Exception(sprintf(_m('Could not GET URL %s.'), $url), $response->getStatus());
        }
        return $response->getBody();
    }

199 200 201 202 203 204 205 206 207 208
    public static function quickGetJson($url, $params=array())
    {
        $data = json_decode(self::quickGet($url, null, $params));
        if (is_null($data)) {
            common_debug('Could not decode JSON data from URL: '.$url);
            throw new ServerException('Could not decode JSON data from URL');
        }
        return $data;
    }

209 210 211
    /**
     * Convenience function to run a GET request.
     *
mmn's avatar
mmn committed
212
     * @return GNUsocial_HTTPResponse
213 214 215
     * @throws HTTP_Request2_Exception
     */
    public function get($url, $headers=array())
216
    {
217
        return $this->doRequest($url, self::METHOD_GET, $headers);
218 219
    }

220 221 222
    /**
     * Convenience function to run a HEAD request.
     *
223 224 225
     * NOTE: Will probably turn into a GET request if you let it follow redirects!
     *       That option is only there to be flexible and may be removed in the future!
     *
mmn's avatar
mmn committed
226
     * @return GNUsocial_HTTPResponse
227 228
     * @throws HTTP_Request2_Exception
     */
229
    public function head($url, $headers=array(), $follow_redirects=false)
230
    {
231 232 233 234 235 236 237 238 239 240 241 242 243 244
        // Save the configured value for follow_redirects
        $old_follow = $this->config['follow_redirects'];
        try {
            // Temporarily (possibly) override the follow_redirects setting
            $this->config['follow_redirects'] = $follow_redirects;
            return $this->doRequest($url, self::METHOD_HEAD, $headers);
        } catch (Exception $e) {
            // Let the exception go on its merry way.
            throw $e;
        } finally {
            // reset to the old value
            $this->config['follow_redirects'] = $old_follow;
        }
        //we've either returned or thrown exception here
245
    }
246

247 248 249 250 251 252
    /**
     * Convenience function to POST form data.
     *
     * @param string $url
     * @param array $headers optional associative array of HTTP headers
     * @param array $data optional associative array or blob of form data to submit
mmn's avatar
mmn committed
253
     * @return GNUsocial_HTTPResponse
254 255 256
     * @throws HTTP_Request2_Exception
     */
    public function post($url, $headers=array(), $data=array())
257
    {
258 259 260 261
        if ($data) {
            $this->addPostParameter($data);
        }
        return $this->doRequest($url, self::METHOD_POST, $headers);
262 263
    }

264
    /**
mmn's avatar
mmn committed
265
     * @return GNUsocial_HTTPResponse
266 267 268
     * @throws HTTP_Request2_Exception
     */
    protected function doRequest($url, $method, $headers)
269
    {
270
        $this->setUrl($url);
271 272 273 274 275 276 277 278 279

        // Workaround for HTTP_Request2 not setting up SNI in socket contexts;
        // This fixes cert validation for SSL virtual hosts using SNI.
        // Requires PHP 5.3.2 or later and OpenSSL with SNI support.
        if ($this->url->getScheme() == 'https' && defined('OPENSSL_TLSEXT_SERVER_NAME')) {
            $this->config['ssl_SNI_enabled'] = true;
            $this->config['ssl_SNI_server_name'] = $this->url->getHost();
        }

280 281 282 283 284 285 286
        $this->setMethod($method);
        if ($headers) {
            foreach ($headers as $header) {
                $this->setHeader($header);
            }
        }
        $response = $this->send();
287 288
        if (is_null($response)) {
            // TRANS: Failed to retrieve a remote web resource, %s is the target URL.
289
            throw new NoHttpResponseException($url);
290
        }
291 292 293 294 295 296 297
        return $response;
    }
    
    protected function log($level, $detail) {
        $method = $this->getMethod();
        $url = $this->getUrl();
        common_log($level, __CLASS__ . ": HTTP $method $url - $detail");
298
    }
299

300
    /**
301
     * Pulls up GNU Social's customized user-agent string, so services
302 303 304 305
     * we hit can track down the responsible software.
     *
     * @return string
     */
306
    static public function userAgent()
307
    {
308 309
        return GNUSOCIAL_ENGINE . '/' . GNUSOCIAL_VERSION
                . ' (' . GNUSOCIAL_CODENAME . ')';
310
    }
311 312

    /**
313
     * Actually performs the HTTP request and returns a
mmn's avatar
mmn committed
314
     * GNUsocial_HTTPResponse object with response body and header info.
315 316 317
     *
     * Wraps around parent send() to add logging and redirection processing.
     *
mmn's avatar
mmn committed
318
     * @return GNUsocial_HTTPResponse
319 320 321 322 323
     * @throw HTTP_Request2_Exception
     */
    public function send()
    {
        $maxRedirs = intval($this->config['max_redirs']);
324
        if (empty($this->config['max_redirs'])) {
325 326 327
            $maxRedirs = 0;
        }
        $redirs = 0;
328
        $redirUrls = array();
329 330 331
        do {
            try {
                $response = parent::send();
332
            } catch (Exception $e) {
333 334 335 336
                $this->log(LOG_ERR, $e->getMessage());
                throw $e;
            }
            $code = $response->getStatus();
337 338 339
            $effectiveUrl = $response->getEffectiveUrl();            
            $redirUrls[] = $effectiveUrl;       
            $response->redirUrls = $redirUrls;
340 341
            if ($code >= 200 && $code < 300) {
                $reason = $response->getReasonPhrase();
hannes's avatar
hannes committed
342
                $this->log(LOG_INFO, "$code $reason");
343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364
            } elseif ($code >= 300 && $code < 400) {
                $url = $this->getUrl();
                $target = $response->getHeader('Location');
                
                if (++$redirs >= $maxRedirs) {
                    common_log(LOG_ERR, __CLASS__ . ": Too many redirects: skipping $code redirect from $url to $target");
                    break;
                }
                try {
                    $this->setUrl($target);
                    $this->setHeader('Referer', $url);
                    common_log(LOG_INFO, __CLASS__ . ": Following $code redirect from $url to $target");
                    continue;
                } catch (HTTP_Request2_Exception $e) {
                    common_log(LOG_ERR, __CLASS__ . ": Invalid $code redirect from $url to $target");
                }
            } else {
                $reason = $response->getReasonPhrase();
                $this->log(LOG_ERR, "$code $reason");
            }
            break;
        } while ($maxRedirs);
mmn's avatar
mmn committed
365
        return new GNUsocial_HTTPResponse($response, $this->getUrl(), $redirs);
366
    }
367
}