Skip to content

Commit

Permalink
NEOS-1704: Adds Neosync Transformers to Anonymization API (#3125)
Browse files Browse the repository at this point in the history
  • Loading branch information
nickzelei authored Jan 14, 2025
1 parent 1e71364 commit f9c6401
Show file tree
Hide file tree
Showing 27 changed files with 1,967 additions and 794 deletions.
3 changes: 3 additions & 0 deletions .mockery.yml
Original file line number Diff line number Diff line change
Expand Up @@ -120,3 +120,6 @@ packages:
interfaces:
Interface:
EntityEnforcer:
github.com/nucleuscloud/neosync/internal/ee/transformers/functions:
interfaces:
NeosyncOperatorApi:
1,234 changes: 654 additions & 580 deletions backend/gen/go/protos/mgmt/v1alpha1/transformer.pb.go

Large diffs are not rendered by default.

10 changes: 10 additions & 0 deletions backend/gen/go/protos/mgmt/v1alpha1/transformer.pb.json.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions backend/protos/mgmt/v1alpha1/transformer.proto
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,8 @@ message PiiAnonymizer {
Mask mask = 3;
// Configures the anonymizer to hash the identified PII
Hash hash = 4;
// Configures the anonymizer to use a Neosync configured transformer
Transform transform = 5;
}

message Replace {
Expand Down Expand Up @@ -313,6 +315,10 @@ message PiiAnonymizer {
HASH_TYPE_SHA512 = 3;
}
}
message Transform {
// The transformer to use. If not provided, a transformer will automatically be selected (if supported), otherwise it falls back to generating a random hash.
TransformerConfig config = 1;
}
}

enum GenerateEmailType {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,15 @@ package v1alpha_anonymizationservice
import (
"context"
"fmt"
"iter"
"strings"
"time"

"connectrpc.com/connect"
"github.com/google/uuid"
mgmtv1alpha1 "github.com/nucleuscloud/neosync/backend/gen/go/protos/mgmt/v1alpha1"
"github.com/nucleuscloud/neosync/backend/gen/go/protos/mgmt/v1alpha1/mgmtv1alpha1connect"
logger_interceptor "github.com/nucleuscloud/neosync/backend/internal/connect/interceptors/logger"
nucleuserrors "github.com/nucleuscloud/neosync/backend/internal/errors"
"github.com/nucleuscloud/neosync/backend/internal/neosyncdb"
"github.com/nucleuscloud/neosync/backend/pkg/metrics"
Expand All @@ -29,6 +31,7 @@ func (s *Service) AnonymizeMany(
ctx context.Context,
req *connect.Request[mgmtv1alpha1.AnonymizeManyRequest],
) (*connect.Response[mgmtv1alpha1.AnonymizeManyResponse], error) {
logger := logger_interceptor.GetLoggerFromContextOrDefault(ctx)
if !s.cfg.IsNeosyncCloud {
return nil, nucleuserrors.NewNotImplemented(
fmt.Sprintf("%s is not implemented in the OSS version of Neosync.", strings.TrimPrefix(mgmtv1alpha1connect.AnonymizationServiceAnonymizeManyProcedure, "/")),
Expand Down Expand Up @@ -59,6 +62,12 @@ func (s *Service) AnonymizeMany(
)
}

for cfg := range getTransformerConfigsToValidate(req.Msg) {
if err := validateTransformerConfig(cfg); err != nil {
return nil, err
}
}

requestedCount := uint64(len(req.Msg.InputData))
resp, err := s.useraccountService.IsAccountStatusValid(ctx, connect.NewRequest(&mgmtv1alpha1.IsAccountStatusValidRequest{
AccountId: req.Msg.GetAccountId(),
Expand All @@ -77,6 +86,7 @@ func (s *Service) AnonymizeMany(
jsonanonymizer.WithDefaultTransformers(req.Msg.DefaultTransformers),
jsonanonymizer.WithHaltOnFailure(req.Msg.HaltOnFailure),
jsonanonymizer.WithConditionalAnonymizeConfig(s.cfg.IsPresidioEnabled, s.analyze, s.anonymize, s.cfg.PresidioDefaultLanguage),
jsonanonymizer.WithLogger(logger),
)
if err != nil {
return nil, err
Expand Down Expand Up @@ -135,6 +145,7 @@ func (s *Service) AnonymizeSingle(
ctx context.Context,
req *connect.Request[mgmtv1alpha1.AnonymizeSingleRequest],
) (*connect.Response[mgmtv1alpha1.AnonymizeSingleResponse], error) {
logger := logger_interceptor.GetLoggerFromContextOrDefault(ctx)
user, err := s.userdataclient.GetUser(ctx)
if err != nil {
return nil, err
Expand Down Expand Up @@ -167,6 +178,12 @@ func (s *Service) AnonymizeSingle(
}
}

for cfg := range getTransformerConfigsToValidate(req.Msg) {
if err := validateTransformerConfig(cfg); err != nil {
return nil, err
}
}

requestedCount := uint64(len(req.Msg.InputData))
resp, err := s.useraccountService.IsAccountStatusValid(ctx, connect.NewRequest(&mgmtv1alpha1.IsAccountStatusValidRequest{
AccountId: req.Msg.GetAccountId(),
Expand All @@ -184,6 +201,7 @@ func (s *Service) AnonymizeSingle(
jsonanonymizer.WithTransformerMappings(req.Msg.TransformerMappings),
jsonanonymizer.WithDefaultTransformers(req.Msg.DefaultTransformers),
jsonanonymizer.WithConditionalAnonymizeConfig(s.cfg.IsPresidioEnabled, s.analyze, s.anonymize, s.cfg.PresidioDefaultLanguage),
jsonanonymizer.WithLogger(logger),
)
if err != nil {
return nil, err
Expand Down Expand Up @@ -246,3 +264,63 @@ func getTraceID(ctx context.Context) string {
}
return ""
}

// ensures the transformer config is of a valid configuration
// the main thing it does today is ensure that TransformPiiText is not being used recursively
func validateTransformerConfig(cfg *mgmtv1alpha1.TransformerConfig) error {
if cfg == nil {
return fmt.Errorf("transformer config is nil")
}
root := cfg.GetTransformPiiTextConfig()
if root == nil {
return nil
}
defaultAnonymizer := root.GetDefaultAnonymizer()
if defaultAnonymizer != nil {
child := defaultAnonymizer.GetTransform().GetConfig().GetTransformPiiTextConfig()
if child != nil {
return nucleuserrors.NewBadRequest("found nested TransformPiiText config in default anonymizer. TransformPiiText may not be used deeply nested within itself.")
}
}
entityAnonymizers := root.GetEntityAnonymizers()
for entity, entityAnonymizer := range entityAnonymizers {
child := entityAnonymizer.GetTransform().GetConfig().GetTransformPiiTextConfig()
if child != nil {
return nucleuserrors.NewBadRequest(fmt.Sprintf("found nested TransformPiiText config in entity (%s) anonymizer. TransformPiiText may not be used deeply nested within itself.", entity))
}
}
return nil
}

type transformerMsgToValidate interface {
GetDefaultTransformers() *mgmtv1alpha1.DefaultTransformersConfig
GetTransformerMappings() []*mgmtv1alpha1.TransformerMapping
}

func getTransformerConfigsToValidate(msg transformerMsgToValidate) iter.Seq[*mgmtv1alpha1.TransformerConfig] {
return func(yield func(*mgmtv1alpha1.TransformerConfig) bool) {
if msg.GetDefaultTransformers().GetBoolean() != nil {
if !yield(msg.GetDefaultTransformers().GetBoolean()) {
return
}
}
if msg.GetDefaultTransformers().GetN() != nil {
if !yield(msg.GetDefaultTransformers().GetN()) {
return
}
}
if msg.GetDefaultTransformers().GetS() != nil {
if !yield(msg.GetDefaultTransformers().GetS()) {
return
}
}

for _, mapping := range msg.GetTransformerMappings() {
if mapping.GetTransformer() != nil {
if !yield(mapping.GetTransformer()) {
return
}
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,96 @@ func (s *IntegrationTestSuite) Test_AnonymizeService_AnonymizeSingle() {
}
}

func (s *IntegrationTestSuite) Test_AnonymizeService_AnonymizeSingle_InvalidTransformerConfig() {
t := s.T()

t.Run("no-nested-transformpiitext", func(t *testing.T) {
userclient := s.NeosyncCloudAuthenticatedLicensedClients.Users(integrationtests_test.WithUserId(testAuthUserId))
anonclient := s.NeosyncCloudAuthenticatedLicensedClients.Anonymize(integrationtests_test.WithUserId(testAuthUserId))

s.setUser(s.ctx, userclient)
accountId := s.createBilledTeamAccount(s.ctx, userclient, "team34", "foo34")

t.Run("default-boolean", func(t *testing.T) {
resp, err := anonclient.AnonymizeSingle(
s.ctx,
connect.NewRequest(&mgmtv1alpha1.AnonymizeSingleRequest{
AccountId: accountId,
InputData: "foo",
DefaultTransformers: &mgmtv1alpha1.DefaultTransformersConfig{
Boolean: &mgmtv1alpha1.TransformerConfig{
Config: &mgmtv1alpha1.TransformerConfig_TransformPiiTextConfig{
TransformPiiTextConfig: &mgmtv1alpha1.TransformPiiText{
DefaultAnonymizer: &mgmtv1alpha1.PiiAnonymizer{
Config: &mgmtv1alpha1.PiiAnonymizer_Transform_{Transform: &mgmtv1alpha1.PiiAnonymizer_Transform{
Config: &mgmtv1alpha1.TransformerConfig{
Config: &mgmtv1alpha1.TransformerConfig_TransformPiiTextConfig{},
},
}},
},
},
},
},
},
}),
)
requireErrResp(t, resp, err)
requireConnectError(t, err, connect.CodeInvalidArgument)
})

t.Run("transformer-mappings", func(t *testing.T) {
resp, err := anonclient.AnonymizeSingle(
s.ctx,
connect.NewRequest(&mgmtv1alpha1.AnonymizeSingleRequest{
AccountId: accountId,
InputData: "foo",
DefaultTransformers: &mgmtv1alpha1.DefaultTransformersConfig{
Boolean: &mgmtv1alpha1.TransformerConfig{
Config: &mgmtv1alpha1.TransformerConfig_TransformPiiTextConfig{
TransformPiiTextConfig: &mgmtv1alpha1.TransformPiiText{
DefaultAnonymizer: &mgmtv1alpha1.PiiAnonymizer{
Config: &mgmtv1alpha1.PiiAnonymizer_Transform_{
Transform: &mgmtv1alpha1.PiiAnonymizer_Transform{
Config: &mgmtv1alpha1.TransformerConfig{
Config: &mgmtv1alpha1.TransformerConfig_TransformPiiTextConfig{},
},
},
},
},
},
},
},
},
TransformerMappings: []*mgmtv1alpha1.TransformerMapping{
{
Expression: ".details.name",
Transformer: &mgmtv1alpha1.TransformerConfig{
Config: &mgmtv1alpha1.TransformerConfig_TransformPiiTextConfig{
TransformPiiTextConfig: &mgmtv1alpha1.TransformPiiText{
EntityAnonymizers: map[string]*mgmtv1alpha1.PiiAnonymizer{
"PERSON": {
Config: &mgmtv1alpha1.PiiAnonymizer_Transform_{
Transform: &mgmtv1alpha1.PiiAnonymizer_Transform{
Config: &mgmtv1alpha1.TransformerConfig{
Config: &mgmtv1alpha1.TransformerConfig_TransformPiiTextConfig{},
},
},
},
},
},
},
},
},
},
},
}),
)
requireErrResp(t, resp, err)
requireConnectError(t, err, connect.CodeInvalidArgument)
})
})
}

func (s *IntegrationTestSuite) Test_AnonymizeService_AnonymizeSingle_ForbiddenTransformers() {
t := s.T()

Expand Down
23 changes: 23 additions & 0 deletions docs/openapi/mgmt/v1alpha1/anonymization.openapi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -732,6 +732,8 @@ components:
- redact
- required:
- replace
- required:
- transform
- not:
anyOf:
- required:
Expand All @@ -742,6 +744,8 @@ components:
- redact
- required:
- replace
- required:
- transform
anyOf:
- required:
- hash
Expand All @@ -751,6 +755,8 @@ components:
- redact
- required:
- replace
- required:
- transform
- not:
anyOf:
- required:
Expand All @@ -761,6 +767,8 @@ components:
- redact
- required:
- replace
- required:
- transform
properties:
replace:
allOf:
Expand All @@ -782,6 +790,11 @@ components:
- title: hash
description: Configures the anonymizer to hash the identified PII
- $ref: '#/components/schemas/mgmt.v1alpha1.PiiAnonymizer.Hash'
transform:
allOf:
- title: transform
description: Configures the anonymizer to use a Neosync configured transformer
- $ref: '#/components/schemas/mgmt.v1alpha1.PiiAnonymizer.Transform'
title: PiiAnonymizer
additionalProperties: false
mgmt.v1alpha1.PiiAnonymizer.Hash:
Expand Down Expand Up @@ -876,6 +889,16 @@ components:
description: 'The value to replace. If not provided, a template token of the anonymizer is used (e.g. A PERSON entity is replaced with: <PERSON>)'
title: Replace
additionalProperties: false
mgmt.v1alpha1.PiiAnonymizer.Transform:
type: object
properties:
config:
allOf:
- title: config
description: The transformer to use. If not provided, a transformer will automatically be selected (if supported), otherwise it falls back to generating a random hash.
- $ref: '#/components/schemas/mgmt.v1alpha1.TransformerConfig'
title: Transform
additionalProperties: false
mgmt.v1alpha1.PiiDenyRecognizer:
type: object
properties:
Expand Down
Loading

0 comments on commit f9c6401

Please sign in to comment.