Merge branch 'v0.14.x'

viant · Feb 26, 2024 · 0435cef · 0435cef
2 parents a6a3adc + 48c8606
commit 0435cef
Show file tree

Hide file tree

Showing 120 changed files with 4,864 additions and 968 deletions.
diff --git a/README.md b/README.md
@@ -12,24 +12,23 @@ This library is compatible with Go 1.17+
 - [Contribution](#contributing-to-mly)
 - [License](#license)
 
-# Motivation
+# Introduction
 
 The goal of this library to provide a deep-learning model prediction HTTP service which can speed up end to end execution by leveraging a caching system. 
 Currently the only deep-learning library supported is TensorFlow.
-Caching has to be seamless, meaning the client, behind the scenes, should take care of any dictionary-based key generation and model changes automatically. 
+
+The client cares of any dictionary-based key generation and model changes automatically. 
 
 In practice this library can provide substantial (100x) E2E execution improvement from the client side perspective, with the input space of billions of distinct keys. 
 
 Each model provides both TensorFlow and cache-level performance metrics via HTTP REST API.
 
-# Introduction
-
 This project provides libraries for both the client and the web service.
 
 The web service supports multiple TensorFlow model integrations on the URI level, with `GET`, `POST`  method support with HTTP 2.0 in full duplex mode as provided by Golang libraries.
 
 The service automatically detects and reloads any model changes; it will poll the source files for modifications once a minute.
-Technically, any HTTP client can work with the service, but to get the seamless caching benefit, it's recommended to use provided client.
+Technically, any HTTP client can work with the service, but the provided client provides extra caching support.
 
 # Quickstart
 
@@ -42,7 +41,7 @@ Endpoint:
   Port: 8086
 Models:
   - ID: ml0
-    URL: gs://modelBucket/Ml0ModelFolder
+    URL: /path/to/model/ml0
 ```
 
 The `URL` is loaded using [afs](https://github.com/viant/afs).
@@ -93,7 +92,7 @@ Endpoint:
 
 Models:
   - ID: mlx
-    URL: s3://myBucket/myModelX
+    URL: /path/to/myModelX
     Datastore: mlxCache
 
 Datastores:
@@ -117,37 +116,32 @@ Once a client detects a change in dictionary hash code, it automatically initiat
 
 ## Server
 
+See [`service/endpoint/config.go`](service/endpoint/config.go).
 The server accepts configuration with the following options:
-See `service/config/model.go`.
 
-* `Models` : list of models
-  - `ID`: `string` - required - model ID.
+* `Models` : list of models - see [`service/config/model.go`](service/config/model.go) for all options.
+  - `ID`: `string` - required - model ID, used to generate the URLs.
   - `Debug`: `bool` - optional - enables further output and debugging.
-  - `Location`: `string` - optional - path the model will be copied to.
-  - `Dir`: `string` - optional - if `Location` is not provided, will use this directory after `os.TempDir()` and `ID` as `Location`, ignored if `Location` is provided.
   - `URL`: `string` - required - model location source.
     * to use S3, set environment variable `AWS_SDK_LOAD_CONFIG=true`
     * to use GCS, set environment variable `GOOGLE_APPLICATION_CREDENTIALS=true`
-  - `Tags`: `[]string` - optional - model tags, default "serve".
-  - `UseDict`: `bool` - optional - enables cache key space reduction, default `true`.
-  - `DictURL`: `string` - deprecated, optional - URL that has dictionary; this is no longer supported and will be removed.
+  - `DataStore`: `string` - optional - name of Datastore to cache, should match `Datastores[].ID`.
+  - `Transformer`: `string` - optional - name of model output transformer. See [#Transformer](#Transformer).
+  - `Batch`: optional - enables or overrides server-side batching configuration. See [`service/tfmodel/batcher/config/config.go`](service/tfmodel/batcher/config/config.go).
+  - `Test`: optional - enables a client request to send to self on start up.
+    * `Test`: `bool` - if `true`, a client will generate a non-batch request with random values based on the model input signature.
+    * `Single`: `map[string]interface{}` - if present, will use the provided values for certain input keys, otherwise randomly generated based on model input signature.
+    * `SingleBatch`: `bool` - if `true`, a client will generate a batch request with random values based on the model input signature; if `Single` is set, values will be used for provided keys.
+    * `Batch`: `map[string][]interface{}` - if present, will be used to generate a batch of requests for the self-test.
   - `Inputs`: optional - used to further provide or define inputs, a list of `shared.Field`.
     * `Name`: `string` - required - input name, only required if an entry is provided.
     * `Index`: `int` - optional - used to maintain cache key ordering.
-    * `DataType`: `string` - optional - will be extracted from the model.
     * `Auxiliary`: `bool` - optional - the input is permitted to be provided in an evaluation request.
     * `Wildcard`: `bool` - conditionally required - if enabled this input will not have a vocabulary for lookup; if `UseDict` is true, the service will refuse to start if it cannot guess the vocabulary extraction Operation. 
     * `Precision`: `int` - conditionally required - if the input is a float type and dictionary is enabled, this can be used to round the value to a lower precision which can improve cache hit rates; if `UseDict` is true, the service will refuse to start if it encounters a float input without a `Precision`.
-  - `KeyFields`: `[]string` - deprecated, optional - list of fields used to generate caching key (by default, all model inputs, sorted alphabetically). Deprecated, no longer used; all inputs are used to generate a cache key.
+  - `KeyFields`: `[]string` - optional - list of fields used to generate caching key (by default, all model inputs, sorted alphabetically). Can be used to order and add valid inputs that can be used as a cache key but not used as prediction input.
   - `Auxiliary`: `[]string` - deprecated, optional - list of additional fields that are acceptable for eval server call. Deprecated, use `Field.Auxiliary`.
   - `Outputs`: `[]shared.Field` - deprecated, optional - model outputs are automatically pulled from the model.
-  - `DataStore`: `string` - optional - name of Datastore to cache, should match `Datastores[].ID`.
-  - `Transformer`: `string` - optional - name of model output transformer.
-  - `Test`: optional - enables a client request to send to self on start up.
-    * `Test`: `bool` - if `true`, a client will generate a non-batch request with random values based on the model input signature
-    * `Single`: `map[string]interface{}` - if present, will use the provided values for certain input keys, otherwise randomly generated based on model input signature
-    * `SingleBatch`: `bool` - if `true`, a client will generate a batch request with random values based on the model input signature; if `Single` is set, values will be used for provided keys
-    * `Batch`: `map[string][]interface{}` - if present, will be used to generate a batch of requests
 
 * `Connection`: optional - list of external Aerospike connections.
   - `ID`: `string` - required - connection ID
@@ -177,7 +171,7 @@ See `service/config/model.go`.
 
 ## Client
 
-`mly` client does not come with external config file.
+`mly` client does not come with an external config file.
 
 To create a client, use the following snippet:
 
@@ -190,7 +184,7 @@ Where optional `options` can be of, but not limited to, the following:
   * `NewCacheScope(CacheScopeLocal|CacheScopeL1|CacheScopeL2)`
   * `NewGmetric()` - custom instance of `gmetric` service
 
-See `shared/client/option.go` for more options. 
+See [`shared/client/option.go`](shared/client/option.go) for more options. 
 
 # Usage
 
@@ -250,7 +244,7 @@ func main() {
 }
 ```
 
-# Output post-processing
+# Transformer 
 
 By default, the model signature output name alongside the model prediction gets used to produce cachable output.
 This process can be customized for specific needs.
@@ -324,6 +318,8 @@ The `/v1/api/health` endpoint will provide a response like:
 
 ## `/v1/api/metric/operations` 
 
+TODO - Add more metrics added from server-side batching.
+
 All metrics registered in the web service.
 These are provided via [`gmetric`](https://github.com/viant/gmetric).
 

diff --git a/example/client/mlyc/mlyc.go b/example/client/mlyc/mlyc.go
@@ -26,9 +26,8 @@ func main() {
 	})
 
 	// this is an example usage of a non-Storable type being bound to Response.Data
-	client.CustomMakerRegistry.Register("custom", func() interface{} {
-		// TODO provide actual example
-		return struct{}{}
+	client.CustomMakerRegistry.Register("slft_batch", func() interface{} {
+		return new(slfmodel.Segmented)
 	})
 
 	client.Run(os.Args[1:])

diff --git a/example/client/option.go b/example/client/option.go
@@ -2,15 +2,19 @@ package client
 
 import (
 	"fmt"
+	"strconv"
+	"strings"
 
 	"github.com/viant/mly/shared/client"
 )
 
 type Options struct {
-	Host      string `short:"h" long:"host" description:"endpoint host"`
-	Port      int    `short:"p" long:"port" description:"endpoint port"`
-	Debug     bool   `long:"debug"`
-	TimeoutUs int    `short:"t" long:"timeout"`
+	Host    string `long:"host" description:"endpoint host"`
+	Port    int    `short:"p" long:"port" description:"endpoint port"`
+	Address string `long:"address" description:"address overrides host and port"`
+
+	Debug     bool `long:"debug"`
+	TimeoutUs int  `short:"t" long:"timeout"`
 
 	Model       string `short:"m" long:"model" description:"model"`
 	Storable    string `short:"s" long:"storable"`
@@ -19,7 +23,13 @@ type Options struct {
 	CacheMB     int  `long:"cache"`
 	NoHashCheck bool `long:"nohash"`
 
+	// The total number of requests sent will be payloads * concurrent * repeats.
+
+	Workers    int `long:"workers"`
 	Concurrent int `long:"concurrent"`
+	// In the case Workers > 1, Repeats is the total number of times each payload
+	// is run, not the number of times each workers sends the paylod.
+	Repeats int `long:"repeats" description:"times to repeat all payloads"`
 
 	PayloadStr   []string `short:"a" long:"payload"`
 	PayloadPause int      `long:"pause" description:"pause seconds between payloads"`
@@ -30,6 +40,11 @@ type Options struct {
 	NoOutput     bool `long:"noout"`
 	Metrics      bool `long:"metrics"`
 	ErrorHistory bool `long:"errhist"`
+
+	// Report forces NoOutput and SkipError true, Metrics and ErrorHistory false.
+	// Will generate a final JSON object as its only output to stdout.
+	// stderr may have other output if Debug is true or there are other errors.
+	Report bool `long:"report"`
 }
 
 type C uint8
@@ -49,10 +64,25 @@ func (o *Options) Init() {
 		o.Port = 8086
 	}
 
+	if o.Repeats <= 0 {
+		o.Repeats = 1
+	}
+
 	if o.Concurrent <= 0 {
 		o.Concurrent = 1
 	}
 
+	if o.Workers <= 0 {
+		o.Workers = 1
+	}
+
+	if o.Report {
+		o.SkipError = true
+		o.NoOutput = true
+
+		o.Metrics = false
+		o.ErrorHistory = false
+	}
 }
 
 func (o *Options) Payloads() ([]*CliPayload, error) {
@@ -80,5 +110,33 @@ func (o *Options) Validate() error {
 }
 
 func (o *Options) Hosts() []*client.Host {
-	return []*client.Host{{Name: o.Host, Port: o.Port}}
+	if o.Address != "" {
+		elems := strings.Split(o.Address, ",")
+		hosts := make([]*client.Host, len(elems))
+		for i, addr := range elems {
+			components := strings.Split(addr, ":")
+
+			var domain string
+			var port int
+			if len(components) == 1 {
+				// no port separator, assume domain only
+				domain = addr
+			} else if len(components) == 2 {
+				domain = components[0]
+				var err error
+				port, err = strconv.Atoi(components[1])
+				if err != nil {
+					panic(err)
+				}
+			} else {
+				panic(fmt.Sprintf("unknown address: %s", addr))
+			}
+
+			hosts[i] = client.NewHost(domain, port)
+		}
+
+		return hosts
+	}
+
+	return []*client.Host{client.NewHost(o.Host, o.Port)}
 }
diff --git a/example/client/report.go b/example/client/report.go
@@ -0,0 +1,35 @@
+package client
+
+import (
+	"time"
+
+	"github.com/viant/gmetric"
+	"github.com/viant/mly/shared/client"
+)
+
+type (
+	WPRun struct {
+		Response *client.Response
+		Err      error
+	}
+
+	WorkerPayload struct {
+		Payload *CliPayload
+		Worker  int
+
+		Runs []WPRun
+	}
+
+	RepeatSet struct {
+		Num       int
+		WPayloads []WorkerPayload
+	}
+
+	Report struct {
+		Start time.Time
+		End   time.Time
+
+		Runs    []RepeatSet
+		Metrics []gmetric.Operation
+	}
+)