1. Environments 1 !pip install tensorflow_datasets
1 2 3 4 5 6 7 8 import pandas as pdimport numpy as npimport matplotlib.pyplot as pltimport reimport urllib.requestimport timeimport tensorflow_datasets as tfdsimport tensorflow as tf
1 2 import osos.environ["CUDA_VISIBLE_DEVICES" ] = "3"
1 2 3 4 5 6 7 with tf.device('/device:GPU:3' ): a = tf.constant([[1.0 , 2.0 , 3.0 ], [4.0 , 5.0 , 6.0 ]]) b = tf.constant([[1.0 , 2.0 ], [3.0 , 4.0 ], [5.0 , 6.0 ]]) c = tf.matmul(a, b) print (c)
tf.Tensor(
[[22. 28.]
[49. 64.]], shape=(2, 2), dtype=float32)
2. 데이터 전처리 1 2 data = pd.read_csv('./ChatbotData.csv' ) data = data[0 :5290 ]
1 2 3 4 5 6 7 8 9 f = open (r'./conversation_office.txt' ,"r" ) lines = f.readlines() Q = [] A = [] for i in range (len (lines)) : if i%2 == 0 : Q.append(lines[i][2 :-1 ]) A.append(lines[i+1 ][2 :-1 ])
1 2 3 4 5 6 import pandas as pddf = pd.DataFrame() df['Q' ] = Q df['A' ] = A df['label' ] = 1
1 2 3 4 train_data = pd.concat([data, df],ignore_index=True ) train_data = train_data.sample(frac=1 ).reset_index(drop=True )
3. 단어 집합 생성 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 questions = [] for sentence in train_data['Q' ]: sentence = re.sub(r"([?.!,])" , r" \1 " , sentence) sentence = sentence.strip() questions.append(sentence) answers = [] for sentence in train_data['A' ]: sentence = re.sub(r"([?.!,])" , r" \1 " , sentence) sentence = sentence.strip() answers.append(sentence)
1 2 3 4 5 6 7 8 9 tokenizer = tfds.deprecated.text.SubwordTextEncoder.build_from_corpus( questions + answers, target_vocab_size=2 **13 ) START_TOKEN, END_TOKEN = [tokenizer.vocab_size], [tokenizer.vocab_size + 1 ] VOCAB_SIZE = tokenizer.vocab_size + 2
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 print ('임의의 질문 샘플을 정수 인코딩 : {}' .format (tokenizer.encode(questions[20 ])))MAX_LENGTH = 40 def tokenize_and_filter (inputs, outputs ): tokenized_inputs, tokenized_outputs = [], [] for (sentence1, sentence2) in zip (inputs, outputs): sentence1 = START_TOKEN + tokenizer.encode(sentence1) + END_TOKEN sentence2 = START_TOKEN + tokenizer.encode(sentence2) + END_TOKEN tokenized_inputs.append(sentence1) tokenized_outputs.append(sentence2) tokenized_inputs = tf.keras.preprocessing.sequence.pad_sequences( tokenized_inputs, maxlen=MAX_LENGTH, padding='post' ) tokenized_outputs = tf.keras.preprocessing.sequence.pad_sequences( tokenized_outputs, maxlen=MAX_LENGTH, padding='post' ) return tokenized_inputs, tokenized_outputs
임의의 질문 샘플을 정수 인코딩 : [2704, 1081, 13, 542]
1 questions, answers = tokenize_and_filter(questions, answers)
1 2 3 4 print (questions[0 ])print (answers[0 ])
[10023 31 121 4282 10024 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0]
[10023 3607 213 13 21 1 10024 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0]
1 2 from tensorflow.python.client import device_libdevice_lib.list_local_devices()
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 with tf.device('/device:GPU:3' ): BATCH_SIZE = 64 BUFFER_SIZE = 20000 dataset = tf.data.Dataset.from_tensor_slices(( { 'inputs' : questions, 'dec_inputs' : answers[:, :-1 ] }, { 'outputs' : answers[:, 1 :] }, )) dataset = dataset.cache() dataset = dataset.shuffle(BUFFER_SIZE) dataset = dataset.batch(BATCH_SIZE) dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE)
4. 트랜스포머 모델 만들기 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 def transformer (vocab_size, num_layers, dff, d_model, num_heads, dropout, name="transformer" ): inputs = tf.keras.Input(shape=(None ,), name="inputs" ) dec_inputs = tf.keras.Input(shape=(None ,), name="dec_inputs" ) enc_padding_mask = tf.keras.layers.Lambda( create_padding_mask, output_shape=(1 , 1 , None ), name='enc_padding_mask' )(inputs) look_ahead_mask = tf.keras.layers.Lambda( create_look_ahead_mask, output_shape=(1 , None , None ), name='look_ahead_mask' )(dec_inputs) dec_padding_mask = tf.keras.layers.Lambda( create_padding_mask, output_shape=(1 , 1 , None ), name='dec_padding_mask' )(inputs) enc_outputs = encoder(vocab_size=vocab_size, num_layers=num_layers, dff=dff, d_model=d_model, num_heads=num_heads, dropout=dropout, )(inputs=[inputs, enc_padding_mask]) dec_outputs = decoder(vocab_size=vocab_size, num_layers=num_layers, dff=dff, d_model=d_model, num_heads=num_heads, dropout=dropout, )(inputs=[dec_inputs, enc_outputs, look_ahead_mask, dec_padding_mask]) outputs = tf.keras.layers.Dense(units=vocab_size, name="outputs" )(dec_outputs) return tf.keras.Model(inputs=[inputs, dec_inputs], outputs=outputs, name=name)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 class PositionalEncoding (tf.keras.layers.Layer): def __init__ (self, position, d_model ): super (PositionalEncoding, self).__init__() self.pos_encoding = self.positional_encoding(position, d_model) def get_angles (self, position, i, d_model ): angles = 1 / tf.pow (10000 , (2 * (i // 2 )) / tf.cast(d_model, tf.float32)) return position * angles def positional_encoding (self, position, d_model ): angle_rads = self.get_angles( position=tf.range (position, dtype=tf.float32)[:, tf.newaxis], i=tf.range (d_model, dtype=tf.float32)[tf.newaxis, :], d_model=d_model) sines = tf.math.sin(angle_rads[:, 0 ::2 ]) cosines = tf.math.cos(angle_rads[:, 1 ::2 ]) angle_rads = np.zeros(angle_rads.shape) angle_rads[:, 0 ::2 ] = sines angle_rads[:, 1 ::2 ] = cosines pos_encoding = tf.constant(angle_rads) pos_encoding = pos_encoding[tf.newaxis, ...] print (pos_encoding.shape) return tf.cast(pos_encoding, tf.float32) def call (self, inputs ): return inputs + self.pos_encoding[:, :tf.shape(inputs)[1 ], :]
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 def create_padding_mask (x ): mask = tf.cast(tf.math.equal(x, 0 ), tf.float32) return mask[:, tf.newaxis, tf.newaxis, :] def create_look_ahead_mask (x ): seq_len = tf.shape(x)[1 ] look_ahead_mask = 1 - tf.linalg.band_part(tf.ones((seq_len, seq_len)), -1 , 0 ) padding_mask = create_padding_mask(x) return tf.maximum(look_ahead_mask, padding_mask) def encoder (vocab_size, num_layers, dff, d_model, num_heads, dropout, name="encoder" ): inputs = tf.keras.Input(shape=(None ,), name="inputs" ) padding_mask = tf.keras.Input(shape=(1 , 1 , None ), name="padding_mask" ) embeddings = tf.keras.layers.Embedding(vocab_size, d_model)(inputs) embeddings *= tf.math.sqrt(tf.cast(d_model, tf.float32)) embeddings = PositionalEncoding(vocab_size, d_model)(embeddings) outputs = tf.keras.layers.Dropout(rate=dropout)(embeddings) for i in range (num_layers): outputs = encoder_layer(dff=dff, d_model=d_model, num_heads=num_heads, dropout=dropout, name="encoder_layer_{}" .format (i), )([outputs, padding_mask]) return tf.keras.Model( inputs=[inputs, padding_mask], outputs=outputs, name=name)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 def encoder_layer (dff, d_model, num_heads, dropout, name="encoder_layer" ): inputs = tf.keras.Input(shape=(None , d_model), name="inputs" ) padding_mask = tf.keras.Input(shape=(1 , 1 , None ), name="padding_mask" ) attention = MultiHeadAttention( d_model, num_heads, name="attention" )({ 'query' : inputs, 'key' : inputs, 'value' : inputs, 'mask' : padding_mask }) attention = tf.keras.layers.Dropout(rate=dropout)(attention) attention = tf.keras.layers.LayerNormalization( epsilon=1e-6 )(inputs + attention) outputs = tf.keras.layers.Dense(units=dff, activation='relu' )(attention) outputs = tf.keras.layers.Dense(units=d_model)(outputs) outputs = tf.keras.layers.Dropout(rate=dropout)(outputs) outputs = tf.keras.layers.LayerNormalization( epsilon=1e-6 )(attention + outputs) return tf.keras.Model( inputs=[inputs, padding_mask], outputs=outputs, name=name)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 class MultiHeadAttention (tf.keras.layers.Layer): def __init__ (self, d_model, num_heads, name="multi_head_attention" ): super (MultiHeadAttention, self).__init__(name=name) self.num_heads = num_heads self.d_model = d_model assert d_model % self.num_heads == 0 self.depth = d_model // self.num_heads self.query_dense = tf.keras.layers.Dense(units=d_model) self.key_dense = tf.keras.layers.Dense(units=d_model) self.value_dense = tf.keras.layers.Dense(units=d_model) self.dense = tf.keras.layers.Dense(units=d_model) def split_heads (self, inputs, batch_size ): inputs = tf.reshape( inputs, shape=(batch_size, -1 , self.num_heads, self.depth)) return tf.transpose(inputs, perm=[0 , 2 , 1 , 3 ]) def call (self, inputs ): query, key, value, mask = inputs['query' ], inputs['key' ], inputs[ 'value' ], inputs['mask' ] batch_size = tf.shape(query)[0 ] query = self.query_dense(query) key = self.key_dense(key) value = self.value_dense(value) query = self.split_heads(query, batch_size) key = self.split_heads(key, batch_size) value = self.split_heads(value, batch_size) scaled_attention, _ = scaled_dot_product_attention(query, key, value, mask) scaled_attention = tf.transpose(scaled_attention, perm=[0 , 2 , 1 , 3 ]) concat_attention = tf.reshape(scaled_attention, (batch_size, -1 , self.d_model)) outputs = self.dense(concat_attention) return outputs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 def scaled_dot_product_attention (query, key, value, mask ): matmul_qk = tf.matmul(query, key, transpose_b=True ) depth = tf.cast(tf.shape(key)[-1 ], tf.float32) logits = matmul_qk / tf.math.sqrt(depth) if mask is not None : logits += (mask * -1e9 ) attention_weights = tf.nn.softmax(logits, axis=-1 ) output = tf.matmul(attention_weights, value) return output, attention_weights
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 def decoder (vocab_size, num_layers, dff, d_model, num_heads, dropout, name='decoder' ): inputs = tf.keras.Input(shape=(None ,), name='inputs' ) enc_outputs = tf.keras.Input(shape=(None , d_model), name='encoder_outputs' ) look_ahead_mask = tf.keras.Input( shape=(1 , None , None ), name='look_ahead_mask' ) padding_mask = tf.keras.Input(shape=(1 , 1 , None ), name='padding_mask' ) embeddings = tf.keras.layers.Embedding(vocab_size, d_model)(inputs) embeddings *= tf.math.sqrt(tf.cast(d_model, tf.float32)) embeddings = PositionalEncoding(vocab_size, d_model)(embeddings) outputs = tf.keras.layers.Dropout(rate=dropout)(embeddings) for i in range (num_layers): outputs = decoder_layer(dff=dff, d_model=d_model, num_heads=num_heads, dropout=dropout, name='decoder_layer_{}' .format (i), )(inputs=[outputs, enc_outputs, look_ahead_mask, padding_mask]) return tf.keras.Model( inputs=[inputs, enc_outputs, look_ahead_mask, padding_mask], outputs=outputs, name=name)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 def decoder_layer (dff, d_model, num_heads, dropout, name="decoder_layer" ): inputs = tf.keras.Input(shape=(None , d_model), name="inputs" ) enc_outputs = tf.keras.Input(shape=(None , d_model), name="encoder_outputs" ) look_ahead_mask = tf.keras.Input( shape=(1 , None , None ), name="look_ahead_mask" ) padding_mask = tf.keras.Input(shape=(1 , 1 , None ), name='padding_mask' ) attention1 = MultiHeadAttention( d_model, num_heads, name="attention_1" )(inputs={ 'query' : inputs, 'key' : inputs, 'value' : inputs, 'mask' : look_ahead_mask }) attention1 = tf.keras.layers.LayerNormalization( epsilon=1e-6 )(attention1 + inputs) attention2 = MultiHeadAttention( d_model, num_heads, name="attention_2" )(inputs={ 'query' : attention1, 'key' : enc_outputs, 'value' : enc_outputs, 'mask' : padding_mask }) attention2 = tf.keras.layers.Dropout(rate=dropout)(attention2) attention2 = tf.keras.layers.LayerNormalization( epsilon=1e-6 )(attention2 + attention1) outputs = tf.keras.layers.Dense(units=dff, activation='relu' )(attention2) outputs = tf.keras.layers.Dense(units=d_model)(outputs) outputs = tf.keras.layers.Dropout(rate=dropout)(outputs) outputs = tf.keras.layers.LayerNormalization( epsilon=1e-6 )(outputs + attention2) return tf.keras.Model( inputs=[inputs, enc_outputs, look_ahead_mask, padding_mask], outputs=outputs, name=name)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 tf.keras.backend.clear_session() D_MODEL = 256 NUM_LAYERS = 2 NUM_HEADS = 8 DFF = 512 DROPOUT = 0.1 model = transformer( vocab_size=VOCAB_SIZE, num_layers=NUM_LAYERS, dff=DFF, d_model=D_MODEL, num_heads=NUM_HEADS, dropout=DROPOUT)
(1, 10025, 256)
(1, 10025, 256)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 class CustomSchedule (tf.keras.optimizers.schedules.LearningRateSchedule): def __init__ (self, d_model, warmup_steps=4000 ): super (CustomSchedule, self).__init__() self.d_model = d_model self.d_model = tf.cast(self.d_model, tf.float32) self.warmup_steps = warmup_steps def __call__ (self, step ): arg1 = tf.math.rsqrt(step) arg2 = step * (self.warmup_steps**-1.5 ) return tf.math.rsqrt(self.d_model) * tf.math.minimum(arg1, arg2) def loss_function (y_true, y_pred ): y_true = tf.reshape(y_true, shape=(-1 , MAX_LENGTH - 1 )) loss = tf.keras.losses.SparseCategoricalCrossentropy( from_logits=True , reduction='none' )(y_true, y_pred) mask = tf.cast(tf.not_equal(y_true, 0 ), tf.float32) loss = tf.multiply(loss, mask) return tf.reduce_mean(loss)
1 2 3 4 5 6 7 8 9 10 11 learning_rate = CustomSchedule(D_MODEL) optimizer = tf.keras.optimizers.Adam( learning_rate, beta_1=0.9 , beta_2=0.98 , epsilon=1e-9 ) def accuracy (y_true, y_pred ): y_true = tf.reshape(y_true, shape=(-1 , MAX_LENGTH - 1 )) return tf.keras.metrics.sparse_categorical_accuracy(y_true, y_pred) model.compile (optimizer=optimizer, loss=loss_function, metrics=[accuracy])
1 2 3 EPOCHS = 50 model.fit(dataset, epochs=EPOCHS)
Epoch 1/50
104/104 [==============================] - 12s 54ms/step - loss: 1.2164 - accuracy: 0.0149
Epoch 2/50
104/104 [==============================] - 6s 54ms/step - loss: 1.0725 - accuracy: 0.0285
Epoch 3/50
104/104 [==============================] - 6s 54ms/step - loss: 0.9082 - accuracy: 0.0472
Epoch 4/50
104/104 [==============================] - 6s 54ms/step - loss: 0.7714 - accuracy: 0.0482
...
104/104 [==============================] - 6s 54ms/step - loss: 0.0160 - accuracy: 0.1321
Epoch 46/50
104/104 [==============================] - 6s 56ms/step - loss: 0.0158 - accuracy: 0.1320
Epoch 47/50
104/104 [==============================] - 6s 55ms/step - loss: 0.0152 - accuracy: 0.1320
Epoch 48/50
104/104 [==============================] - 6s 54ms/step - loss: 0.0151 - accuracy: 0.1322
Epoch 49/50
104/104 [==============================] - 6s 55ms/step - loss: 0.0149 - accuracy: 0.1321
Epoch 50/50
104/104 [==============================] - 6s 53ms/step - loss: 0.0148 - accuracy: 0.1321
<keras.callbacks.History at 0x7f794c0f0880>
5. 챗봇 평가하기
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 def preprocess_sentence (sentence ): sentence = re.sub(r"([?.!,])" , r" \1 " , sentence) sentence = sentence.strip() return sentence def evaluate (sentence ): sentence = preprocess_sentence(sentence) sentence = tf.expand_dims( START_TOKEN + tokenizer.encode(sentence) + END_TOKEN, axis=0 ) output = tf.expand_dims(START_TOKEN, 0 ) for i in range (MAX_LENGTH): predictions = model(inputs=[sentence, output], training=False ) predictions = predictions[:, -1 :, :] predicted_id = tf.cast(tf.argmax(predictions, axis=-1 ), tf.int32) if tf.equal(predicted_id, END_TOKEN[0 ]): break output = tf.concat([output, predicted_id], axis=-1 ) return tf.squeeze(output, axis=0 )
1 2 3 4 5 6 7 8 9 10 11 12 def predict (sentence ): prediction = evaluate(sentence) predicted_sentence = tokenizer.decode( [i for i in prediction if i < tokenizer.vocab_size]) print ('Master: {}' .format (sentence)) print ('Chatbot: {}' .format (predicted_sentence)) return predicted_sentence
1 2 output = predict("굿모닝" )
Input: 굿모닝
Output: 좋은 아침이에요 .
1 output = predict("오늘 날씨" )
Input: 오늘 날씨
Output: 충분히 아름다워요 .
1 output = predict("오늘 날씨 어때?" )
Input: 오늘 날씨 어때?
Output: 오전엔 화창하지만 오후에는 비가 올 것입니다 .
Input: 집중력
Output: 병원 가보세요 .
Input: 퇴근
Output: 인생은 채워나가는거죠 .
1 output = predict("야근 싫어" )
Input: 야근 싫어
Output: 얼른 집에 가서 쉬시길 바랄게요 .
1 2 3 output = str (input ("오피스 챗봇입니다. 무엇을 도와드릴까요?:" )) output = predict(output)
Master: 일하기 싫어
Chatbot: 저도요 ! !
**code: https://github.com/jmj3047/mj_chatbot_prac/blob/c5bec233b833b24345deeffe7391621415dc1dcb/chatbot_backend.py