Skip to content

Commit

Permalink
[ML] string_utils cleanup. (elastic#71092) (elastic#71253)
Browse files Browse the repository at this point in the history
- Converts string_utils to TypeScript.
- Removes sortByKey() from string_utils, we no longer make use of it.
- Fixes elastic#69499, stringMatch() was defined twice, now moved to string_utils.
- Fixes elastic#69498, OMIT_FIELDS was defined twice, now moved to common/constants/field_types.ts.
  • Loading branch information
walterra authored Jul 9, 2020
1 parent 4dc0986 commit ebfd803
Show file tree
Hide file tree
Showing 10 changed files with 53 additions and 132 deletions.
3 changes: 3 additions & 0 deletions x-pack/plugins/ml/common/constants/field_types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,6 @@ export enum ML_JOB_FIELD_TYPES {

export const MLCATEGORY = 'mlcategory';
export const DOC_COUNT = 'doc_count';

// List of system fields we don't want to display.
export const OMIT_FIELDS: string[] = ['_source', '_type', '_index', '_id', '_version', '_score'];
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ export interface Detector {
function: string;
over_field_name?: string;
partition_field_name?: string;
use_null?: string;
use_null?: boolean;
custom_rules?: CustomRule[];
}
export interface AnalysisLimits {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,6 @@ import { BASIC_NUMERICAL_TYPES, EXTENDED_NUMERICAL_TYPES } from '../../../../com

export const CATEGORICAL_TYPES = new Set(['ip', 'keyword']);

// List of system fields we want to ignore for the numeric field check.
export const OMIT_FIELDS: string[] = ['_source', '_type', '_index', '_id', '_version', '_score'];

// Regression supports numeric fields. Classification supports categorical, numeric, and boolean.
export const shouldAddAsDepVarOption = (field: Field, jobType: AnalyticsJobType) => {
if (field.id === EVENT_RATE_FIELD_ID) return false;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,9 @@ import { FormattedMessage } from '@kbn/i18n/react';
import { AnalyticsJobType } from '../../../analytics_management/hooks/use_create_analytics_form/state';
import { ANALYSIS_CONFIG_TYPE } from '../../../../common/analytics';
import { Field, EVENT_RATE_FIELD_ID } from '../../../../../../../common/types/fields';
import { OMIT_FIELDS } from '../../../../../../../common/constants/field_types';
import { BASIC_NUMERICAL_TYPES, EXTENDED_NUMERICAL_TYPES } from '../../../../common/fields';
import { OMIT_FIELDS, CATEGORICAL_TYPES } from './form_options_validation';
import { CATEGORICAL_TYPES } from './form_options_validation';
import { ES_FIELD_TYPES } from '../../../../../../../../../../src/plugins/data/public';
import { newJobCapsService } from '../../../../../services/new_job_capabilities_service';

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ import {
import { getAnalyticsFactory } from '../../services/analytics_service';
import { getTaskStateBadge, getJobTypeBadge, useColumns } from './use_columns';
import { ExpandedRow } from './expanded_row';
import { stringMatch } from '../../../../../util/string_utils';
import {
ProgressBar,
mlInMemoryTableFactory,
Expand All @@ -65,14 +66,6 @@ function getItemIdToExpandedRowMap(
}, {} as ItemIdToExpandedRowMap);
}

function stringMatch(str: string | undefined, substr: any) {
return (
typeof str === 'string' &&
typeof substr === 'string' &&
(str.toLowerCase().match(substr.toLowerCase()) === null) === false
);
}

const MlInMemoryTable = mlInMemoryTableFactory<DataFrameAnalyticsListRow>();

interface Props {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,12 @@ import { getToastNotifications } from '../../../util/dependency_cache';
import { IndexPattern } from '../../../../../../../../src/plugins/data/public';

import { SavedSearchQuery } from '../../../contexts/ml';
import { OMIT_FIELDS } from '../../../../../common/constants/field_types';
import { IndexPatternTitle } from '../../../../../common/types/kibana';

import { ml } from '../../../services/ml_api_service';
import { FieldRequestConfig } from '../common';

// List of system fields we don't want to display.
const OMIT_FIELDS: string[] = ['_source', '_type', '_index', '_id', '_version', '_score'];
// Maximum number of examples to obtain for text type fields.
const MAX_EXAMPLES_DEFAULT: number = 10;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import rison from 'rison-node';
import { mlJobService } from '../../../services/job_service';
import { ml } from '../../../services/ml_api_service';
import { getToastNotifications } from '../../../util/dependency_cache';
import { stringMatch } from '../../../util/string_utils';
import { JOB_STATE, DATAFEED_STATE } from '../../../../../common/constants/states';
import { parseInterval } from '../../../../../common/util/parse_interval';
import { i18n } from '@kbn/i18n';
Expand Down Expand Up @@ -350,14 +351,6 @@ export function checkForAutoStartDatafeed() {
}
}

function stringMatch(str, substr) {
return (
typeof str === 'string' &&
typeof substr === 'string' &&
(str.toLowerCase().match(substr.toLowerCase()) === null) === false
);
}

function jobProperty(job, prop) {
const propMap = {
job_state: 'jobState',
Expand Down
21 changes: 0 additions & 21 deletions x-pack/plugins/ml/public/application/util/string_utils.d.ts

This file was deleted.

60 changes: 11 additions & 49 deletions x-pack/plugins/ml/public/application/util/string_utils.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,20 @@
* you may not use this file except in compliance with the Elastic License.
*/

import { CustomUrlAnomalyRecordDoc } from '../../../common/types/custom_urls';
import { Detector } from '../../../common/types/anomaly_detection_jobs';

import {
replaceStringTokens,
detectorToString,
sortByKey,
toLocaleString,
mlEscape,
escapeForElasticsearchQuery,
} from './string_utils';

describe('ML - string utils', () => {
describe('replaceStringTokens', () => {
const testRecord = {
const testRecord: CustomUrlAnomalyRecordDoc = {
job_id: 'test_job',
result_type: 'record',
probability: 0.0191711,
Expand All @@ -30,6 +32,10 @@ describe('ML - string utils', () => {
testfield1: 'test$tring=[+-?]',
testfield2: '{<()>}',
testfield3: 'host=\\\\[email protected]',
earliest: '0',
latest: '0',
is_interim: false,
initial_record_score: 0,
};

test('returns correct values without URI encoding', () => {
Expand Down Expand Up @@ -68,17 +74,17 @@ describe('ML - string utils', () => {

describe('detectorToString', () => {
test('returns the correct descriptions for detectors', () => {
const detector1 = {
const detector1: Detector = {
function: 'count',
};

const detector2 = {
const detector2: Detector = {
function: 'count',
by_field_name: 'airline',
use_null: false,
};

const detector3 = {
const detector3: Detector = {
function: 'mean',
field_name: 'CPUUtilization',
partition_field_name: 'region',
Expand All @@ -95,50 +101,6 @@ describe('ML - string utils', () => {
});
});

describe('sortByKey', () => {
const obj = {
zebra: 'stripes',
giraffe: 'neck',
elephant: 'trunk',
};

const valueComparator = function (value: string) {
return value;
};

test('returns correct ordering with default comparator', () => {
const result = sortByKey(obj, false);
const keys = Object.keys(result);
expect(keys[0]).toBe('elephant');
expect(keys[1]).toBe('giraffe');
expect(keys[2]).toBe('zebra');
});

test('returns correct ordering with default comparator and order reversed', () => {
const result = sortByKey(obj, true);
const keys = Object.keys(result);
expect(keys[0]).toBe('zebra');
expect(keys[1]).toBe('giraffe');
expect(keys[2]).toBe('elephant');
});

test('returns correct ordering with comparator', () => {
const result = sortByKey(obj, false, valueComparator);
const keys = Object.keys(result);
expect(keys[0]).toBe('giraffe');
expect(keys[1]).toBe('zebra');
expect(keys[2]).toBe('elephant');
});

test('returns correct ordering with comparator and order reversed', () => {
const result = sortByKey(obj, true, valueComparator);
const keys = Object.keys(result);
expect(keys[0]).toBe('elephant');
expect(keys[1]).toBe('zebra');
expect(keys[2]).toBe('giraffe');
});
});

describe('toLocaleString', () => {
test('returns correct comma placement for large numbers', () => {
expect(toLocaleString(1)).toBe('1');
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,21 @@
import _ from 'lodash';
import d3 from 'd3';

import { CustomUrlAnomalyRecordDoc } from '../../../common/types/custom_urls';
import { Detector } from '../../../common/types/anomaly_detection_jobs';

// Replaces all instances of dollar delimited tokens in the specified String
// with corresponding values from the supplied object, optionally
// encoding the replacement for a URI component.
// For example if passed a String 'http://www.google.co.uk/#q=airline+code+$airline$'
// and valuesByTokenName of {"airline":"AAL"}, will return
// 'http://www.google.co.uk/#q=airline+code+AAL'.
// If a corresponding key is not found in valuesByTokenName, then the String is not replaced.
export function replaceStringTokens(str, valuesByTokenName, encodeForURI) {
export function replaceStringTokens(
str: string,
valuesByTokenName: CustomUrlAnomalyRecordDoc,
encodeForURI: boolean
) {
return String(str).replace(/\$([^?&$\'"]+)\$/g, (match, name) => {
// Use lodash get to allow nested JSON fields to be retrieved.
let tokenValue = _.get(valuesByTokenName, name, null);
Expand All @@ -31,7 +38,7 @@ export function replaceStringTokens(str, valuesByTokenName, encodeForURI) {
}

// creates the default description for a given detector
export function detectorToString(dtr) {
export function detectorToString(dtr: Detector): string {
const BY_TOKEN = ' by ';
const OVER_TOKEN = ' over ';
const USE_NULL_OPTION = ' use_null=';
Expand Down Expand Up @@ -73,36 +80,18 @@ export function detectorToString(dtr) {
}

// wrap a the inputed string in quotes if it contains non-word characters
function quoteField(field) {
function quoteField(field: string): string {
if (field.match(/\W/g)) {
return '"' + field + '"';
} else {
return field;
}
}

// re-order an object based on the value of the keys
export function sortByKey(list, reverse, comparator) {
let keys = _.sortBy(_.keys(list), (key) => {
return comparator ? comparator(list[key], key) : key;
});

if (reverse) {
keys = keys.reverse();
}

return _.zipObject(
keys,
_.map(keys, (key) => {
return list[key];
})
);
}

// add commas to large numbers
// Number.toLocaleString is not supported on safari
export function toLocaleString(x) {
let result = x;
export function toLocaleString(x: number): string {
let result = x.toString();
if (x && typeof x === 'number') {
const parts = x.toString().split('.');
parts[0] = parts[0].replace(/\B(?=(\d{3})+(?!\d))/g, ',');
Expand All @@ -112,8 +101,8 @@ export function toLocaleString(x) {
}

// escape html characters
export function mlEscape(str) {
const entityMap = {
export function mlEscape(str: string): string {
const entityMap: { [escapeChar: string]: string } = {
'&': '&amp;',
'<': '&lt;',
'>': '&gt;',
Expand All @@ -125,39 +114,44 @@ export function mlEscape(str) {
}

// Escapes reserved characters for use in Elasticsearch query terms.
export function escapeForElasticsearchQuery(str) {
export function escapeForElasticsearchQuery(str: string): string {
// Escape with a leading backslash any of the characters that
// Elastic document may cause a syntax error when used in queries:
// + - = && || > < ! ( ) { } [ ] ^ " ~ * ? : \ /
// https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-query-string-query.html#_reserved_characters
return String(str).replace(/[-[\]{}()+!<>=?:\/\\^"~*&|\s]/g, '\\$&');
}

export function calculateTextWidth(txt, isNumber, elementSelection) {
txt = isNumber ? d3.format(',')(txt) : txt;
let svg = elementSelection;
let $el;
if (elementSelection === undefined) {
// Create a temporary selection to append the label to.
// Note styling of font will be inherited from CSS of page.
const $body = d3.select('body');
$el = $body.append('div');
svg = $el.append('svg');
}
export function calculateTextWidth(txt: string | number, isNumber: boolean) {
txt = isNumber && typeof txt === 'number' ? d3.format(',')(txt) : txt;

// Create a temporary selection to append the label to.
// Note styling of font will be inherited from CSS of page.
const $body = d3.select('body');
const $el = $body.append('div');
const svg = $el.append('svg');

const tempLabelText = svg
.append('g')
.attr('class', 'temp-axis-label tick')
.selectAll('text.temp.axis')
.data('a')
.data(['a'])
.enter()
.append('text')
.text(txt);
const width = tempLabelText[0][0].getBBox().width;
const width = (tempLabelText[0][0] as SVGSVGElement).getBBox().width;

d3.select('.temp-axis-label').remove();
if ($el !== undefined) {
$el.remove();
}
return Math.ceil(width);
}

export function stringMatch(str: string | undefined, substr: any) {
return (
typeof str === 'string' &&
typeof substr === 'string' &&
(str.toLowerCase().match(substr.toLowerCase()) === null) === false
);
}

0 comments on commit ebfd803

Please sign in to comment.