We are no longer offering accounts on this server. Consider https://gitlab.freedesktop.org/ as a place to host projects.

Commit fb50a2d8 authored by Zach Copley's avatar Zach Copley

Merge branch 'testing' of gitorious.org:statusnet/mainline into testing

* 'testing' of gitorious.org:statusnet/mainline:
  Parse RSS items as activities
  Remove hkit and do our own hcard parsing
  Work around weird bug with HTML normalization via PHP DOM module; if source had xmlns and xml:lang I ended up with double output, breaking the subsequent parsing. Will have to track this down later and report upstream if not already resolved.
parents 1f160bb7 05e3768e
......@@ -643,38 +643,11 @@ class ActivityObject
);
if ($element->tagName == 'author') {
$this->type = self::PERSON; // XXX: is this fair?
$this->title = $this->_childContent($element, self::NAME);
$this->id = $this->_childContent($element, self::URI);
if (empty($this->id)) {
$email = $this->_childContent($element, self::EMAIL);
if (!empty($email)) {
// XXX: acct: ?
$this->id = 'mailto:'.$email;
}
}
$this->_fromAuthor($element);
} else if ($element->tagName == 'item') {
$this->_fromRssItem($element);
} else {
$this->type = $this->_childContent($element, Activity::OBJECTTYPE,
Activity::SPEC);
if (empty($this->type)) {
$this->type = ActivityObject::NOTE;
}
$this->id = $this->_childContent($element, self::ID);
$this->title = $this->_childContent($element, self::TITLE);
$this->summary = $this->_childContent($element, self::SUMMARY);
$this->source = $this->_getSource($element);
$this->content = ActivityUtils::getContent($element);
$this->link = ActivityUtils::getPermalink($element);
$this->_fromAtomEntry($element);
}
// Some per-type attributes...
......@@ -697,6 +670,72 @@ class ActivityObject
}
}
private function _fromAuthor($element)
{
$this->type = self::PERSON; // XXX: is this fair?
$this->title = $this->_childContent($element, self::NAME);
$this->id = $this->_childContent($element, self::URI);
if (empty($this->id)) {
$email = $this->_childContent($element, self::EMAIL);
if (!empty($email)) {
// XXX: acct: ?
$this->id = 'mailto:'.$email;
}
}
}
private function _fromAtomEntry($element)
{
$this->type = $this->_childContent($element, Activity::OBJECTTYPE,
Activity::SPEC);
if (empty($this->type)) {
$this->type = ActivityObject::NOTE;
}
$this->id = $this->_childContent($element, self::ID);
$this->title = $this->_childContent($element, self::TITLE);
$this->summary = $this->_childContent($element, self::SUMMARY);
$this->source = $this->_getSource($element);
$this->content = ActivityUtils::getContent($element);
$this->link = ActivityUtils::getPermalink($element);
}
// @fixme rationalize with Activity::_fromRssItem()
private function _fromRssItem($item)
{
$this->title = ActivityUtils::childContent($item, ActivityObject::TITLE, Activity::RSS);
$contentEl = ActivityUtils::child($item, ActivityUtils::CONTENT, Activity::CONTENTNS);
if (!empty($contentEl)) {
$this->content = htmlspecialchars_decode($contentEl->textContent, ENT_QUOTES);
} else {
$descriptionEl = ActivityUtils::child($item, Activity::DESCRIPTION, Activity::RSS);
if (!empty($descriptionEl)) {
$this->content = htmlspecialchars_decode($descriptionEl->textContent, ENT_QUOTES);
}
}
$this->link = ActivityUtils::childContent($item, ActivityUtils::LINK, Activity::RSS);
$guidEl = ActivityUtils::child($item, Activity::GUID, Activity::RSS);
if (!empty($guidEl)) {
$this->id = $guidEl->textContent;
if ($guidEl->hasAttribute('isPermaLink')) {
// overwrites <link>
$this->link = $this->id;
}
}
}
private function _childContent($element, $tag, $namespace=ActivityUtils::ATOM)
{
return ActivityUtils::childContent($element, $tag, $namespace);
......@@ -1051,6 +1090,21 @@ class Activity
const PUBLISHED = 'published';
const UPDATED = 'updated';
const RSS = null; // no namespace!
const PUBDATE = 'pubDate';
const DESCRIPTION = 'description';
const GUID = 'guid';
const SELF = 'self';
const IMAGE = 'image';
const URL = 'url';
const DC = 'http://purl.org/dc/elements/1.1/';
const CREATOR = 'creator';
const CONTENTNS = 'http://purl.org/rss/1.0/modules/content/';
public $actor; // an ActivityObject
public $verb; // a string (the URL)
public $object; // an ActivityObject
......@@ -1081,8 +1135,6 @@ class Activity
return;
}
$this->entry = $entry;
// Insist on a feed's root DOMElement; don't allow a DOMDocument
if ($feed instanceof DOMDocument) {
throw new ClientException(
......@@ -1090,8 +1142,22 @@ class Activity
);
}
$this->entry = $entry;
$this->feed = $feed;
if ($entry->namespaceURI == Activity::ATOM &&
$entry->localName == 'entry') {
$this->_fromAtomEntry($entry, $feed);
} else if ($entry->namespaceURI == Activity::RSS &&
$entry->localName == 'item') {
$this->_fromRssItem($entry, $feed);
} else {
throw new Exception("Unknown DOM element: {$entry->namespaceURI} {$entry->localName}");
}
}
function _fromAtomEntry($entry, $feed)
{
$pubEl = $this->_child($entry, self::PUBLISHED, self::ATOM);
if (!empty($pubEl)) {
......@@ -1177,6 +1243,69 @@ class Activity
}
}
function _fromRssItem($item, $rss)
{
$verbEl = $this->_child($item, self::VERB);
if (!empty($verbEl)) {
$this->verb = trim($verbEl->textContent);
} else {
$this->verb = ActivityVerb::POST;
// XXX: do other implied stuff here
}
$pubDateEl = $this->_child($item, self::PUBDATE, self::RSS);
if (!empty($pubDateEl)) {
$this->time = strtotime($pubDateEl->textContent);
}
$authorEl = $this->_child($item, self::AUTHOR, self::RSS);
if (!empty($authorEl)) {
$this->actor = $this->_fromRssAuthor($authorEl);
} else {
$dcCreatorEl = $this->_child($item, self::CREATOR, self::DC);
if (!empty($dcCreatorEl)) {
$this->actor = $this->_fromDcCreator($dcCreatorEl);
} else if (!empty($rss)) {
$this->actor = $this->_fromRss($rss);
}
}
$this->title = ActivityUtils::childContent($item, ActivityObject::TITLE, self::RSS);
$contentEl = ActivityUtils::child($item, ActivityUtils::CONTENT, self::CONTENTNS);
if (!empty($contentEl)) {
$this->content = htmlspecialchars_decode($contentEl->textContent, ENT_QUOTES);
} else {
$descriptionEl = ActivityUtils::child($item, self::DESCRIPTION, self::RSS);
if (!empty($descriptionEl)) {
$this->content = htmlspecialchars_decode($descriptionEl->textContent, ENT_QUOTES);
}
}
$this->link = ActivityUtils::childContent($item, ActivityUtils::LINK, self::RSS);
// @fixme enclosures
// @fixme thumbnails... maybe
$guidEl = ActivityUtils::child($item, self::GUID, self::RSS);
if (!empty($guidEl)) {
$this->id = $guidEl->textContent;
if ($guidEl->hasAttribute('isPermaLink') && $guidEl->getAttribute('isPermaLink') != 'false') {
// overwrites <link>
$this->link = $this->id;
}
}
$this->object = new ActivityObject($item);
$this->context = new ActivityContext($item);
}
/**
* Returns an Atom <entry> based on this activity
*
......@@ -1249,6 +1378,83 @@ class Activity
return $xs->getString();
}
function _fromRssAuthor($el)
{
$text = $el->textContent;
if (preg_match('/^(.*?) \((.*)\)$/', $text, $match)) {
$email = $match[1];
$name = $match[2];
} else if (preg_match('/^(.*?) <(.*)>$/', $text, $match)) {
$name = $match[1];
$email = $match[2];
} else if (preg_match('/.*@.*/', $text)) {
$email = $text;
$name = null;
} else {
$name = $text;
$email = null;
}
// Not really enough info
$actor = new ActivityObject();
$actor->element = $el;
$actor->type = ActivityObject::PERSON;
$actor->title = $name;
if (!empty($email)) {
$actor->id = 'mailto:'.$email;
}
return $actor;
}
function _fromDcCreator($el)
{
// Not really enough info
$text = $el->textContent;
$actor = new ActivityObject();
$actor->element = $el;
$actor->title = $text;
$actor->type = ActivityObject::PERSON;
return $actor;
}
function _fromRss($el)
{
$actor = new ActivityObject();
$actor->element = $el;
$actor->type = ActivityObject::PERSON; // @fixme guess better
$actor->title = ActivityUtils::childContent($el, ActivityObject::TITLE, self::RSS);
$actor->link = ActivityUtils::childContent($el, ActivityUtils::LINK, self::RSS);
$actor->id = ActivityUtils::getLink($el, self::SELF);
$desc = ActivityUtils::childContent($el, self::DESCRIPTION, self::RSS);
if (!empty($desc)) {
$actor->content = htmlspecialchars_decode($desc, ENT_QUOTES);
}
$imageEl = ActivityUtils::child($el, self::IMAGE, self::RSS);
if (!empty($imageEl)) {
$actor->avatarLinks[] = ActivityUtils::childContent($imageEl, self::URL, self::RSS);
}
return $actor;
}
private function _child($element, $tag, $namespace=self::SPEC)
{
return ActivityUtils::child($element, $tag, $namespace);
......
<?php
// hcard profile for hkit
$this->root_class = 'vcard';
$this->classes = array(
'fn', array('honorific-prefix', 'given-name', 'additional-name', 'family-name', 'honorific-suffix'),
'n', array('honorific-prefix', 'given-name', 'additional-name', 'family-name', 'honorific-suffix'),
'adr', array('post-office-box', 'extended-address', 'street-address', 'postal-code', 'country-name', 'type', 'region', 'locality'),
'label', 'bday', 'agent', 'nickname', 'photo', 'class',
'email', array('type', 'value'),
'category', 'key', 'logo', 'mailer', 'note',
'org', array('organization-name', 'organization-unit'),
'tel', array('type', 'value'),
'geo', array('latitude', 'longitude'),
'tz', 'uid', 'url', 'rev', 'role', 'sort-string', 'sound', 'title'
);
// classes that must only appear once per card
$this->singles = array(
'fn'
);
// classes that are required (not strictly enforced - give at least one!)
$this->required = array(
'fn'
);
$this->att_map = array(
'fn' => array('IMG|alt'),
'url' => array('A|href', 'IMG|src', 'AREA|href'),
'photo' => array('IMG|src'),
'bday' => array('ABBR|title'),
'logo' => array('IMG|src'),
'email' => array('A|href'),
'geo' => array('ABBR|title')
);
$this->callbacks = array(
'url' => array($this, 'resolvePath'),
'photo' => array($this, 'resolvePath'),
'logo' => array($this, 'resolvePath'),
'email' => array($this, 'resolveEmail')
);
function hKit_hcard_post($a)
{
foreach ($a as &$vcard){
hKit_implied_n_optimization($vcard);
hKit_implied_n_from_fn($vcard);
}
return $a;
}
function hKit_implied_n_optimization(&$vcard)
{
if (array_key_exists('fn', $vcard) && !is_array($vcard['fn']) &&
!array_key_exists('n', $vcard) && (!array_key_exists('org', $vcard) || $vcard['fn'] != $vcard['org'])){
if (sizeof(explode(' ', $vcard['fn'])) == 2){
$patterns = array();
$patterns[] = array('/^(\S+),\s*(\S{1})$/', 2, 1); // Lastname, Initial
$patterns[] = array('/^(\S+)\s*(\S{1})\.*$/', 2, 1); // Lastname Initial(.)
$patterns[] = array('/^(\S+),\s*(\S+)$/', 2, 1); // Lastname, Firstname
$patterns[] = array('/^(\S+)\s*(\S+)$/', 1, 2); // Firstname Lastname
foreach ($patterns as $pattern){
if (preg_match($pattern[0], $vcard['fn'], $matches) === 1){
$n = array();
$n['given-name'] = $matches[$pattern[1]];
$n['family-name'] = $matches[$pattern[2]];
$vcard['n'] = $n;
break;
}
}
}
}
}
function hKit_implied_n_from_fn(&$vcard)
{
if (array_key_exists('fn', $vcard) && is_array($vcard['fn'])
&& !array_key_exists('n', $vcard) && (!array_key_exists('org', $vcard) || $vcard['fn'] != $vcard['org'])){
$vcard['n'] = $vcard['fn'];
}
if (array_key_exists('fn', $vcard) && is_array($vcard['fn'])){
$vcard['fn'] = $vcard['fn']['text'];
}
}
?>
\ No newline at end of file
<?php
/*
hKit Library for PHP5 - a generic library for parsing Microformats
Copyright (C) 2006 Drew McLellan
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Author
Drew McLellan - http://allinthehead.com/
Contributors:
Scott Reynen - http://www.randomchaos.com/
Version 0.5, 22-Jul-2006
fixed by-ref issue cropping up in PHP 5.0.5
fixed a bug with a@title
added support for new fn=n optimisation
added support for new a.include include-pattern
Version 0.4, 23-Jun-2006
prevented nested includes from causing infinite loops
returns false if URL can't be fetched
added pre-flight check for base support level
added deduping of once-only classnames
prevented accumulation of multiple 'value' values
tuned whitespace handling and treatment of DEL elements
Version 0.3, 21-Jun-2006
added post-processor callback method into profiles
fixed minor problems raised by hcard testsuite
added support for include-pattern
added support for td@headers pattern
added implied-n optimization into default hcard profile
Version 0.2, 20-Jun-2006
added class callback mechanism
added resolvePath & resolveEmail
added basic BASE support
Version 0.1.1, 19-Jun-2006 (different timezone, no time machine)
added external Tidy option
Version 0.1, 20-Jun-2006
initial release
*/
class hKit
{
public $tidy_mode = 'proxy'; // 'proxy', 'exec', 'php' or 'none'
public $tidy_proxy = 'http://cgi.w3.org/cgi-bin/tidy?forceXML=on&docAddr='; // required only for tidy_mode=proxy
public $tmp_dir = '/path/to/writable/dir/'; // required only for tidy_mode=exec
private $root_class = '';
private $classes = '';
private $singles = '';
private $required = '';
private $att_map = '';
private $callbacks = '';
private $processor = '';
private $url = '';
private $base = '';
private $doc = '';
public function hKit()
{
// pre-flight checks
$pass = true;
$required = array('dom_import_simplexml', 'file_get_contents', 'simplexml_load_string');
$missing = array();
foreach ($required as $f){
if (!function_exists($f)){
$pass = false;
$missing[] = $f . '()';
}
}
if (!$pass)
die('hKit error: these required functions are not available: <strong>' . implode(', ', $missing) . '</strong>');
}
public function getByURL($profile='', $url='')
{
if ($profile=='' || $url == '') return false;
$this->loadProfile($profile);
$source = $this->loadURL($url);
if ($source){
$tidy_xhtml = $this->tidyThis($source);
$fragment = false;
if (strrchr($url, '#'))
$fragment = array_pop(explode('#', $url));
$doc = $this->loadDoc($tidy_xhtml, $fragment);
$s = $this->processNodes($doc, $this->classes);
$s = $this->postProcess($profile, $s);
return $s;
}else{
return false;
}
}
public function getByString($profile='', $input_xml='')
{
if ($profile=='' || $input_xml == '') return false;
$this->loadProfile($profile);
$doc = $this->loadDoc($input_xml);
$s = $this->processNodes($doc, $this->classes);
$s = $this->postProcess($profile, $s);
return $s;
}
private function processNodes($items, $classes, $allow_includes=true){
$out = array();
foreach($items as $item){
$data = array();
for ($i=0; $i<sizeof($classes); $i++){
if (!is_array($classes[$i])){
$xpath = ".//*[contains(concat(' ',normalize-space(@class),' '),' " . $classes[$i] . " ')]";
$results = $item->xpath($xpath);
if ($results){
foreach ($results as $result){
if (isset($classes[$i+1]) && is_array($classes[$i+1])){
$nodes = $this->processNodes($results, $classes[$i+1]);
if (sizeof($nodes) > 0){
$nodes = array_merge(array('text'=>$this->getNodeValue($result, $classes[$i])), $nodes);
$data[$classes[$i]] = $nodes;
}else{
$data[$classes[$i]] = $this->getNodeValue($result, $classes[$i]);
}
}else{
if (isset($data[$classes[$i]])){
if (is_array($data[$classes[$i]])){
// is already an array - append
$data[$classes[$i]][] = $this->getNodeValue($result, $classes[$i]);
}else{
// make it an array
if ($classes[$i] == 'value'){ // unless it's the 'value' of a type/value pattern
$data[$classes[$i]] .= $this->getNodeValue($result, $classes[$i]);
}else{
$old_val = $data[$classes[$i]];
$data[$classes[$i]] = array($old_val, $this->getNodeValue($result, $classes[$i]));
$old_val = false;
}
}
}else{
// set as normal value
$data[$classes[$i]] = $this->getNodeValue($result, $classes[$i]);
}
}
// td@headers pattern
if (strtoupper(dom_import_simplexml($result)->tagName)== "TD" && $result['headers']){
$include_ids = explode(' ', $result['headers']);
$doc = $this->doc;
foreach ($include_ids as $id){
$xpath = "//*[@id='$id']/..";
$includes = $doc->xpath($xpath);
foreach ($includes as $include){
$tmp = $this->processNodes($include, $this->classes);
if (is_array($tmp)) $data = array_merge($data, $tmp);
}
}
}
}
}
}
$result = false;
}