ARC2_RDFParser.php 3.27 KB
Newer Older
tobyink's avatar
tobyink committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127
<?php
/*
homepage: http://arc.semsol.org/
license:  http://arc.semsol.org/license

class:    ARC2 RDF Parser (generic)
author:   Benjamin Nowack
version:  2008-09-30 (Addition: Support for Atom)
*/

ARC2::inc('Class');

class ARC2_RDFParser extends ARC2_Class {

  function __construct($a = '', &$caller) {
    parent::__construct($a, $caller);
  }
  
  function ARC2_RDFParser($a = '', &$caller) {
    $this->__construct($a, $caller);
  }

  function __init() {/* proxy_host, proxy_port, proxy_skip, http_accept_header, http_user_agent_header, max_redirects, reader, skip_dupes */
    parent::__init();
    $this->a['format'] = $this->v('format', false, $this->a);
    $this->triples = array();
    $this->t_count = 0;
    $this->added_triples = array();
    $this->skip_dupes = $this->v('skip_dupes', false, $this->a);
    $this->bnode_prefix = $this->v('bnode_prefix', 'arc'.substr(md5(uniqid(rand())), 0, 4).'b', $this->a);
    $this->bnode_id = 0;
  }

  /*  */
  
  function setReader(&$reader) {
    $this->reader =& $reader;
  }
  
  function parse($path, $data = '') {
    /* reader */
    if (!isset($this->reader)) {
      ARC2::inc('Reader');
      $this->reader = & new ARC2_Reader($this->a, $this);
    }
    $this->reader->activate($path, $data) ;
    /* format detection */
    $mappings = array(
      'rdfxml' => 'RDFXML', 
      'turtle' => 'Turtle', 
      'sparqlxml' => 'SPOG', 
      'ntriples' => 'Turtle', 
      'html' => 'SemHTML',
      'rss' => 'RSS',
      'atom' => 'Atom',
      'sgajson' => 'SGAJSON',
      'cbjson' => 'CBJSON'
    );
    $format = $this->reader->getFormat();
    if (!$format || !isset($mappings[$format])) {
      return $this->addError('No parser available for "' . $format . '".');
    }
    /* format parser */
    $suffix = $mappings[$format] . 'Parser';
    ARC2::inc($suffix);
    $cls = 'ARC2_' . $suffix;
    $this->parser =& new $cls($this->a, $this);
    $this->parser->setReader($this->reader);
    return $this->parser->parse($path, $data);
  }
  
  function parseData($data) {
    return $this->parse(ARC2::getScriptURI(), $data);
  }
  
  /*  */

  function done() {
  }

  /*  */
  
  function createBnodeID(){
    $this->bnode_id++;
    return '_:' . $this->bnode_prefix . $this->bnode_id;
  }

  function getTriples() {
    return $this->v('parser') ? $this->m('getTriples', false, array(), $this->v('parser')) : array();
  }
  
  function countTriples() {
    return $this->v('parser') ? $this->m('countTriples', false, 0, $this->v('parser')) : 0;
  }
  
  function getSimpleIndex($flatten_objects = 1, $vals = '') {
    return ARC2::getSimpleIndex($this->getTriples(), $flatten_objects, $vals);
  }
  
  function reset() {
    $this->__init();
    if (isset($this->reader)) unset($this->reader);
    if (isset($this->parser)) {
      $this->parser->__init();
      unset($this->parser);
    }
  }
  
  /*  */
  
  function extractRDF($formats = '') {
    if (method_exists($this->parser, 'extractRDF')) {
      return $this->parser->extractRDF($formats);
    }
  }
  
  /*  */
  
  function getEncoding($src = 'config') {
    if (method_exists($this->parser, 'getEncoding')) {
      return $this->parser->getEncoding($src);
    }
  }

  /*  */

}