Skip to content

Commit

Permalink
add presto phpclient
Browse files Browse the repository at this point in the history
  • Loading branch information
helight committed Nov 12, 2018
1 parent 4297569 commit 29865ef
Show file tree
Hide file tree
Showing 3 changed files with 351 additions and 0 deletions.
220 changes: 220 additions & 0 deletions presto_phpclient/PrestoClient.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,220 @@
<?php
/*
PrestoClient provides a way to communicate with Presto server REST interface. Presto is a fast query
engine developed by Facebook that runs distributed queries against Hadoop HDFS servers.
Copyright 2013 Xtendsys | xtendsys.net
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at:
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.*/

class PrestoClient {
/**
* The following parameters may be modified depending on your configuration
*/
private $source = 'PrestoPhpClient';
private $version = '0.3';
private $maximumRetries = 5;
private $prestoUser = "presto";
private $prestoSchema = "default";
private $prestoCatalog = "hive";
private $userAgent = "";

//Do not modify below this line
private $nextUri =" ";
private $infoUri = "";
private $partialCancelUri = "";
private $state = "NONE";

private $url;
private $headers;
private $result;
private $request;


public $HTTP_error;
public $data = array();


/**
* Constructs the presto connection instance
*
* @param $connectUrl
* @param $catalog
*/
public function __construct($connectUrl,$catalog){
$this->url = $connectUrl;
$this->prestoCatalog = $catalog;
}
/**
* Return Data as an array. Check that the current status is FINISHED
*
* @return array|false
*/
public function GetData(){
if ($this->state!="FINISHED"){
return false;
}
return $this->data;
}

/**
* prepares the query
*
* @param $query
* @return bool
* @throws Exception
*/
public function Query($query) {

$this->data=array();
$this->userAgent = $this->source."/".$this->version;

$this->request = $query;
//check that no other queries are already running for this object
if ($this->state === "RUNNING") {
return false;
}

/**
* check that query is completed, and that we don't start
* a new query before the previous is finished
*/
if ($query="") {
return false;
}

$this->headers = array(
"X-Presto-User: ".$this->prestoUser,
"X-Presto-Catalog: ".$this->prestoCatalog,
"X-Presto-Schema: ".$this->prestoSchema,
"User-Agent: ".$this->userAgent);

$connect = \curl_init();
\curl_setopt($connect,CURLOPT_URL, $this->url);
\curl_setopt($connect,CURLOPT_HTTPHEADER, $this->headers);
\curl_setopt($connect,CURLOPT_RETURNTRANSFER, 1);
\curl_setopt($connect,CURLOPT_POST, 1);
\curl_setopt($connect,CURLOPT_POSTFIELDS, $this->request);

$this->result = \curl_exec($connect);

$httpCode = \curl_getinfo($connect, CURLINFO_HTTP_CODE);

if($httpCode!="200"){
$this->HTTP_error = $httpCode;
echo "HTTP ERRROR: ".$this->HTTP_error;
return false;
// throw new PrestoException("HTTP ERRROR: $this->HTTP_error");
}

//set status to RUNNING
curl_close($connect);
$this->state = "RUNNING";
return true;
}


/**
* waits until query was executed
*
* @return bool
* @throws PrestoException
*/
function WaitQueryExec() {

$this->GetVarFromResult();

while ($this->nextUri){

usleep(500000);
$this->result = file_get_contents($this->nextUri);
$this->GetVarFromResult();
}

if ($this->state!="FINISHED"){
// throw new PrestoException("Incoherent State at end of query");
echo "Incoherent State at end of query";
return false;
}

return true;

}

/**
* Provide Information on the query execution
* The server keeps the information for 15minutes
* Return the raw JSON message for now
*
* @return string
*/
function GetInfo() {

$connect = \curl_init();
\curl_setopt($connect,CURLOPT_URL, $this->infoUri);
\curl_setopt($connect,CURLOPT_HTTPHEADER, $this->headers);
$infoRequest = \curl_exec($connect);
\curl_close($connect);

return $infoRequest;
}

public function GetResult() {
return json_decode($this->result);
}

private function GetVarFromResult() {
/* Retrieve the variables from the JSON answer */

$decodedJson = json_decode($this->result);

if (isset($decodedJson->{'nextUri'})){
$this->nextUri = $decodedJson->{'nextUri'};} else {$this->nextUri = false;}

if (isset($decodedJson->{'data'})){
$this->data = array_merge($this->data,$decodedJson->{'data'});}

if (isset($decodedJson->{'infoUri'})){
$this->infoUri = $decodedJson->{'infoUri'};}

if (isset($decodedJson->{'partialCancelUri'})){
$this->partialCancelUri = $decodedJson->{'partialCancelUri'};}

if (isset($decodedJson->{'stats'})){
$status = $decodedJson->{'stats'};
$this->state = $status->{'state'};}
}

/**
* Provide a function to cancel current request if not yet finished
*/
private function Cancel(){
if (!isset($this->partialCancelUri)){
return false;

$connect = \curl_init();
\curl_setopt($connect,CURLOPT_URL, $this->partialCancelUri);
\curl_setopt($connect,CURLOPT_HTTPHEADER, $this->headers);
$infoRequest = \curl_exec($connect);
\curl_close($connect);

$httpCode = \curl_getinfo($connect, CURLINFO_HTTP_CODE);

if($httpCode!="204"){
return false;}else{
return true;}
}
}
}

?>
63 changes: 63 additions & 0 deletions presto_phpclient/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
PrestoPhpClient
===============

Simple Php Class to connect to a PrestoDB Server that runs distributed queries against
a Hadoop HDFS cluster.
Presto uses a subset of SQL as its query language. Presto is an alternative for
Hadoop-Hive or Impala.

Change from: https://github.com/360d-io-labs/PhpPrestoClient


Usage
-----------------
See index.php for a short demo on how to use it

Requirement
-----------------
Php-Curl

Compatibility
-----------------
Tested successfully with all version of Presto up to 0.58

Presto client protocol
----------------------
The following description was made by Ivo Herweijer for its Python interface

The communication protocol used between Presto clients and servers is not documented yet. It seems to
be as follows:

Client sends http POST request to the Presto server, page: "/v1/statement". Header information should
include: X-Presto-Catalog, X-Presto-Source, X-Presto-Schema, User-Agent, X-Presto-User. The body of the
request should contain the sql statement. The server responds by returning JSON data (http status-code 200).
This reply may contain up to 3 uri's. One giving the link to get more information about the query execution
('infoUri'), another giving the link to fetch the next packet of data ('nextUri') and one with the uri to
cancel the query ('partialCancelUri').

The client should send GET requests to the server (Header: X-Presto-Source, User-Agent, X-Presto-User.
Body: empty) following the 'nextUri' link from the previous response until the servers response does not
contain an 'nextUri' link anymore. When there is no 'nextUri' the query is finished. If the last response
from the server included an error section ('error') the query failed, otherwise the query succeeded. If
the http status of the server response is anything other than 200 with Content-Type application/json, the
query should also be considered failed. A 503 http response means that the server is (too) busy. Retry the
request after waiting at least 50ms.
The server response may contain a 'state' variable. This is for informational purposes only (may be subject
to change in future implementations).
Each response by the server to a 'nextUri' may contain information about the columns returned by the query
and all- or part of the querydata. If the response contains a data section the columns section will always
be available.

The server reponse may contain a variable with the uri to cancel the query ('partialCancelUri'). The client
may issue a DELETE request to the server using this link. Response http status-code is 204.

The Presto server will retain information about finished queries for 15 minutes. When a client does not
respond to the server (by following the 'nextUri' links) the server will cancel these 'dead' queries after
5 minutes. These timeouts are hardcoded in the Presto server source code.


Thanks
------

Thanks to Ivo Herweijer from easywarehousing.com that is doing a Python interface and from which I copied the
protocol description and took some inspiration for the Php interface.
68 changes: 68 additions & 0 deletions presto_phpclient/index.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
<?php
/*
Copyright 2013 Xtendsys | xtendsys.net
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at:
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.*/


require_once(__DIR__ . '/PrestoClient.php');

//Create a new connection object. Provide URL and catalog as parameters
$presto = new PrestoClient("http://localhost:8080/v1/statement","mysql");

$str_sql = "select * from mysql.helight.MyClass a, mysql2.helight.myclass b where a.id =b.id";
//Prepare your sql request
try {
// $presto->Query("select count(*) from hive.default.my_table");
$presto->Query($str_sql);
} catch (PrestoException $e) {
var_dump($e);
}

//Execute the request and build the result
if ($presto->WaitQueryExec()) {
$res = $presto->GetResult()->columns; //[0]->name;
foreach($res as $key=>$value) {
$a[$key]["name"]=$value->name;
$a[$key]["type"]=$value->type;
}

foreach($a as $key=>$value) {
//print_r($key."\n".$value["type"]."\n".$value["name"]."\n");
$colum[$key]=array("DataType"=>"System.".$value["type"],"Index"=>$key,"Name"=>$value["name"],"DisplayName"=>null);
$colmap[$key]=array("ColumnName"=>$value["name"],"Description"=>"","DisplayName"=>"");
}

//print_r($res);
//Get the result
$data=array();
$rows=array();
$answer = $presto->GetData();
foreach($answer as $row=>$value1) { //get rows
foreach($value1 as $col=>$value2) {
$data[$col]=array("ColumnName"=>$a[$col]["name"],"Value"=>$value2);
$item=array("ItemArray"=>$data);
}
$rows[$row]=$item;
//$rows=array("Rows"=&gt;$tmp);
}
$table[0]=array("Name"=>"sql","Sql"=>$str_sql,"Status"=>"0","ColumnMapping"=>$colmap,"Columns"=>$colum,"Rows"=>$rows);
$output=array("HasError"=>"false","Message"=>"","Tables"=>$table);
print_r(json_encode($output));
} else {
#$output=array("HasError"=&gt;"true","Message"=&gt;"table is not found!","Tables"=&gt;array());
$output=array("HasError"=>"true","Message"=>$err,"Tables"=>array());
print_r(json_encode($output));
}

?>

0 comments on commit 29865ef

Please sign in to comment.