Skip to content

Commit e58b3f6

Browse files
authored
feat: add a base model to the repository
1 parent 4c6eee7 commit e58b3f6

File tree

7 files changed

+357
-80
lines changed

7 files changed

+357
-80
lines changed

model/ver20220624/model.json

+199
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,199 @@
1+
{
2+
"modelTopology": {
3+
"class_name": "Sequential",
4+
"config": {
5+
"name": "sequential_1",
6+
"layers": [
7+
{
8+
"class_name": "Dense",
9+
"config": {
10+
"units": 32,
11+
"activation": "relu",
12+
"use_bias": true,
13+
"kernel_initializer": {
14+
"class_name": "VarianceScaling",
15+
"config": {
16+
"scale": 1,
17+
"mode": "fan_avg",
18+
"distribution": "normal",
19+
"seed": null
20+
}
21+
},
22+
"bias_initializer": { "class_name": "Zeros", "config": {} },
23+
"kernel_regularizer": null,
24+
"bias_regularizer": null,
25+
"activity_regularizer": null,
26+
"kernel_constraint": null,
27+
"bias_constraint": null,
28+
"name": "dense_Dense1",
29+
"trainable": true,
30+
"batch_input_shape": [null, 512],
31+
"dtype": "float32"
32+
}
33+
},
34+
{
35+
"class_name": "BatchNormalization",
36+
"config": {
37+
"axis": -1,
38+
"momentum": 0.99,
39+
"epsilon": 0.001,
40+
"center": true,
41+
"scale": true,
42+
"beta_initializer": { "class_name": "Zeros", "config": {} },
43+
"gamma_initializer": { "class_name": "Ones", "config": {} },
44+
"moving_mean_initializer": { "class_name": "Zeros", "config": {} },
45+
"moving_variance_initializer": {
46+
"class_name": "Ones",
47+
"config": {}
48+
},
49+
"beta_regularizer": null,
50+
"gamma_regularizer": null,
51+
"beta_constraint": null,
52+
"gamma_constraint": null,
53+
"name": "batch_normalization_BatchNormalization1",
54+
"trainable": true
55+
}
56+
},
57+
{
58+
"class_name": "Dense",
59+
"config": {
60+
"units": 32,
61+
"activation": "relu",
62+
"use_bias": true,
63+
"kernel_initializer": {
64+
"class_name": "VarianceScaling",
65+
"config": {
66+
"scale": 1,
67+
"mode": "fan_avg",
68+
"distribution": "normal",
69+
"seed": null
70+
}
71+
},
72+
"bias_initializer": { "class_name": "Zeros", "config": {} },
73+
"kernel_regularizer": null,
74+
"bias_regularizer": null,
75+
"activity_regularizer": null,
76+
"kernel_constraint": null,
77+
"bias_constraint": null,
78+
"name": "dense_Dense2",
79+
"trainable": true
80+
}
81+
},
82+
{
83+
"class_name": "BatchNormalization",
84+
"config": {
85+
"axis": -1,
86+
"momentum": 0.99,
87+
"epsilon": 0.001,
88+
"center": true,
89+
"scale": true,
90+
"beta_initializer": { "class_name": "Zeros", "config": {} },
91+
"gamma_initializer": { "class_name": "Ones", "config": {} },
92+
"moving_mean_initializer": { "class_name": "Zeros", "config": {} },
93+
"moving_variance_initializer": {
94+
"class_name": "Ones",
95+
"config": {}
96+
},
97+
"beta_regularizer": null,
98+
"gamma_regularizer": null,
99+
"beta_constraint": null,
100+
"gamma_constraint": null,
101+
"name": "batch_normalization_BatchNormalization2",
102+
"trainable": true
103+
}
104+
},
105+
{
106+
"class_name": "Dense",
107+
"config": {
108+
"units": 1,
109+
"activation": "sigmoid",
110+
"use_bias": true,
111+
"kernel_initializer": {
112+
"class_name": "VarianceScaling",
113+
"config": {
114+
"scale": 1,
115+
"mode": "fan_avg",
116+
"distribution": "normal",
117+
"seed": null
118+
}
119+
},
120+
"bias_initializer": { "class_name": "Zeros", "config": {} },
121+
"kernel_regularizer": null,
122+
"bias_regularizer": null,
123+
"activity_regularizer": null,
124+
"kernel_constraint": null,
125+
"bias_constraint": null,
126+
"name": "dense_Dense3",
127+
"trainable": true
128+
}
129+
}
130+
]
131+
},
132+
"keras_version": "tfjs-layers 3.18.0",
133+
"backend": "tensor_flow.js"
134+
},
135+
"weightsManifest": [
136+
{
137+
"paths": ["weights.bin"],
138+
"weights": [
139+
{
140+
"name": "dense_Dense1/kernel",
141+
"shape": [512, 32],
142+
"dtype": "float32"
143+
},
144+
{ "name": "dense_Dense1/bias", "shape": [32], "dtype": "float32" },
145+
{
146+
"name": "batch_normalization_BatchNormalization1/gamma",
147+
"shape": [32],
148+
"dtype": "float32"
149+
},
150+
{
151+
"name": "batch_normalization_BatchNormalization1/beta",
152+
"shape": [32],
153+
"dtype": "float32"
154+
},
155+
{
156+
"name": "dense_Dense2/kernel",
157+
"shape": [32, 32],
158+
"dtype": "float32"
159+
},
160+
{ "name": "dense_Dense2/bias", "shape": [32], "dtype": "float32" },
161+
{
162+
"name": "batch_normalization_BatchNormalization2/gamma",
163+
"shape": [32],
164+
"dtype": "float32"
165+
},
166+
{
167+
"name": "batch_normalization_BatchNormalization2/beta",
168+
"shape": [32],
169+
"dtype": "float32"
170+
},
171+
{ "name": "dense_Dense3/kernel", "shape": [32, 1], "dtype": "float32" },
172+
{ "name": "dense_Dense3/bias", "shape": [1], "dtype": "float32" },
173+
{
174+
"name": "batch_normalization_BatchNormalization1/moving_mean",
175+
"shape": [32],
176+
"dtype": "float32"
177+
},
178+
{
179+
"name": "batch_normalization_BatchNormalization1/moving_variance",
180+
"shape": [32],
181+
"dtype": "float32"
182+
},
183+
{
184+
"name": "batch_normalization_BatchNormalization2/moving_mean",
185+
"shape": [32],
186+
"dtype": "float32"
187+
},
188+
{
189+
"name": "batch_normalization_BatchNormalization2/moving_variance",
190+
"shape": [32],
191+
"dtype": "float32"
192+
}
193+
]
194+
}
195+
],
196+
"format": "layers-model",
197+
"generatedBy": "TensorFlow.js tfjs-layers v3.18.0",
198+
"convertedBy": null
199+
}

model/ver20220624/weights.bin

69.4 KB
Binary file not shown.

trainer/datasets.ts

+76
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
import * as use from '@tensorflow-models/universal-sentence-encoder'
2+
import * as tf from '@tensorflow/tfjs-node'
3+
4+
/**
5+
* 스마일게이트 데이터셋을 universal-sentence-encoder 를 통해 encoding한 tf.data.Dataset을 반환한다.
6+
* @param filepath 데이터셋 CSV URL @see getUnsmileDataUrl
7+
* @param encoder use.UniversalSentenceEncoder 를 사용하여 string을 인코딩
8+
* @link https://github.com/smilegate-ai/korean_unsmile_dataset
9+
*/
10+
async function loadUnsmileData({
11+
filepath,
12+
encoder,
13+
}: {
14+
filepath: string
15+
encoder: use.UniversalSentenceEncoder
16+
}): Promise<tf.data.Dataset<tf.TensorContainer>> {
17+
return tf.data
18+
.csv(filepath, {
19+
delimiter: '\t',
20+
hasHeader: true,
21+
configuredColumnsOnly: true,
22+
columnConfigs: {
23+
clean: {
24+
dtype: 'int32',
25+
isLabel: true,
26+
},
27+
문장: {
28+
dtype: 'string',
29+
},
30+
},
31+
})
32+
.mapAsync(async (data: any) => {
33+
const out = await encoder.embed(data.xs['문장'])
34+
return {
35+
xs: out.flatten(),
36+
ys: Object.values(data.ys),
37+
}
38+
})
39+
.batch(32)
40+
.shuffle(32)
41+
}
42+
43+
/**
44+
* 스마일게이트 데이터셋을 universal-sentence-encoder 를 통해 encoding한 tf.data.Dataset을 반환한다.
45+
* 학습 데이터와 밸리데이션 데이터를 tf.data.Dataset 형태로 반환한다.
46+
*
47+
* @param encoder use.UniversalSentenceEncoder
48+
* @returns
49+
*/
50+
export async function loadUnsmileTrainValidData(
51+
encoder: use.UniversalSentenceEncoder,
52+
): Promise<{
53+
trainData: tf.data.Dataset<tf.TensorContainer>
54+
valData: tf.data.Dataset<tf.TensorContainer>
55+
}> {
56+
const trainData = await loadUnsmileData({
57+
filepath: getUnsmileDataUrl('train', 'v1.0'),
58+
encoder,
59+
})
60+
const valData = await loadUnsmileData({
61+
filepath: getUnsmileDataUrl('valid', 'v1.0'),
62+
encoder,
63+
})
64+
return { trainData, valData }
65+
}
66+
67+
/**
68+
* 스마일게이트 데이터셋 CSV URL을 위한 도움 함수.
69+
*
70+
* @param type "train" or "valid"
71+
* @param version "v1.0"
72+
* @returns full url path
73+
*/
74+
function getUnsmileDataUrl(type: string, version: string): string {
75+
return `https://raw.githubusercontent.com/smilegate-ai/korean_unsmile_dataset/main/unsmile_${type}_${version}.tsv`
76+
}

trainer/model.ts

+44
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
import * as tf from '@tensorflow/tfjs-node'
2+
import path from 'path'
3+
4+
const FILE_SCHEME = 'file://'
5+
6+
/**
7+
* 모델을 불러오거나 불러오는데 실패할 경우 새로운 모델을 생성한다.
8+
*
9+
* @param modelDirectoryPath 저장된 모델의 경로. 인풋 형식은 https://www.tensorflow.org/js/guide/save_load 참조 할 것.
10+
* @returns 학습 모델을 반환
11+
*/
12+
export async function getModel(
13+
modelDirectoryPath: string,
14+
): Promise<tf.LayersModel | tf.Sequential> {
15+
try {
16+
const modelPath =
17+
FILE_SCHEME +
18+
path.join(modelDirectoryPath.replace(FILE_SCHEME, ''), 'model.json')
19+
console.info(`Trying to load a model from ${modelPath}`)
20+
return await tf.loadLayersModel(modelPath)
21+
} catch (e) {
22+
console.warn(e)
23+
console.warn(`Unable to load a model. Creating a new model`)
24+
return tf.sequential({
25+
layers: [
26+
tf.layers.dense({
27+
inputDim: 512,
28+
units: 32,
29+
activation: 'relu',
30+
}),
31+
tf.layers.batchNormalization(),
32+
tf.layers.dense({
33+
units: 32,
34+
activation: 'relu',
35+
}),
36+
tf.layers.batchNormalization(),
37+
tf.layers.dense({
38+
units: 1,
39+
activation: 'sigmoid',
40+
}),
41+
],
42+
})
43+
}
44+
}

trainer/package.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
"description": "",
55
"main": "index.js",
66
"scripts": {
7-
"build": "npx ts-node trainer.ts"
7+
"start": "ts-node trainer.ts"
88
},
99
"keywords": [],
1010
"author": "",

0 commit comments

Comments
 (0)