1 minute read

Tags: ,

複習Permalink

神經網路概念複習Permalink

一層神經網路矩陣表示法Permalink

人工智慧解決問題的流程Permalink

RNNPermalink

RNN 概念介紹Permalink

RNN Cell 運作方式Permalink






RNN 應用Permalink

RNN 應用之對話機器人Permalink

  • 對話機器人
    • 每次輸入和輸出都不是固定長度!

對話機器人的變形應用Permalink

  • 應用

    • 翻譯
    • Video Captioning 生成影片敘述
    • 生成一段文字
    • 畫一半的圖完成它
  • Andrej Karpathy

情意分析和 slot filingPermalink

Slot filing

Imgur

RNN 全壘打預測實例Permalink






RNN Cell 的運作方式Permalink

Imgur Imgur
Imgur Imgur
Imgur  

RNN的應用類型Permalink

Imgur Imgur
Imgur Imgur

Bidirectional RNNPermalink

  • 看看我的耳藝術天分!

    Imgur Imgur
    Imgur  



可愛作業? LoLPermalink

簡單 RNN 作業Permalink

Imgur Imgur

Numpy 計算 RNN 作業Permalink

Imgur

計算作業完整輸出Permalink

完成第一個 RNNPermalink




RNN 的學習法和問題Permalink

LSTM 的 GatesPermalink

Imgur Imgur Imgur

LSTM的運作方式Permalink

GRUPermalink

實作Permalink

回顧複習 RNNPermalink

  • 設定 RNN 要輸出的大小

    • 先初始化 weight, weight shape = (n, m + n) = (3, 9)
      • n 代表要轉換的維度,m代表原本的feature大小
Imgur
Imgur
Imgur
Imgur
Imgur
Imgur
Imgur
Imgur
Imgur
Imgur
Imgur
Imgur
Imgur
Imgur
Imgur

RNN 實作 MNISTPermalink

  1. 前置作業 import package
import numpy as np
from pprint import pprint
import matplotlib.pyplot as plt
from sklearn.utils import shuffle 
import tensorflow as tf
  1. Set hyperparameters
learning_rate = 0.001
batch_size = 128
epochs = 10
  1. Load data and preprocess
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()

print('Data shape: ', X_train[0].shape)
print('Label: ', y_train[2])
plt.figure(figsize=(6, 6))
plt.imshow(X_train[2], cmap='binary')
plt.show()

X_train = X_train / 255.
X_test = X_test / 255.
y_train = np.eye(10)[y_train[:]]
y_test = np.eye(10)[y_test[:]]

def batch_gen(X, y, batch_size):
   X, y = shuffle(X, y)
   batch_index = 0

   while batch_index < len(X):
      batch_X = X[batch_index : batch_index + batch_size]
      batch_y = y[batch_index : batch_index + batch_size]
      batch_index += batch_size
      yield batch_X, batch_y
  1. Build the graph
def Rnn_layer(inputs, units):
   BasicRNN_cell = tf.nn.rnn_cell.BasicLSTMCell(num_units=units)
   # init_stae = tf.zeros([tf.shape(inputs)[0], units])
   init_state = BasicRNN_cell.zero_state(tf.shape(inputs)[0], dtype=tf.float32) # shape = (batch, units)
   outputs, states = tf.nn.dynamic_rnn(BasicRNN_cell, inputs, initial_state=init_state)
   return outputs
tf.reset_default_graph()

with tf.name_scope("inputs"):
    input_data = tf.placeholder(dtype=tf.float32, shape=[None, 28, 28], name="input_data")
    y_label = tf.placeholder(dtype=tf.float32, shape=[None, 10], name='label')

with tf.variabel_scope("RNN_layer"):
    outputs = RNN_layer(input_data, 32)

with tf.variable_scope("output_layer"):
    RNN_last_outputs = outputs[:,-1,:]  # outputs shape = (batch, timestep, feature)
    prediction = tf.layers.dense(inputs=RNN_last_outputs, units=10)

with tf.name_scope("loss"):
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=prediction,labels=y_label))

with tf.name_scope("optimizer"):
    opt = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss)

with tf.name_scope("accuracy"):
    correct_prediction = tf.equal(tf.argmax(prediction, 1), tf.argmax(y_label, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))    

init = tf.global_variables_initializer()
# with tf.keras

tf.reset_default_graph()

with tf.name_scope("inputs"):
    input_data = tf.placeholder(dtype=tf.float32, shape=[None, 28, 28], name='input_data')
    y_label = tf.placeholder(dtype=tf.float32, shape=[None, 10], name='label')

with tf.variable_scope("RNN_layer"):
    rnn_out = tf.keras.layers.SimpleRNN(units=32)(input_data)

with tf.variable_scope("output_layer"):
    prediction = tf.layers.dense(inputs=rnn_out, units=10)

with tf.name_scope("loss"):
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=prediction,labels=y_label))

with tf.name_scope("optimizer"):
    opt = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss)

with tf.name_scope("accuracy"):
    correct_prediction = tf.equal(tf.argmax(prediction, 1), tf.argmax(y_label, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))    

init = tf.global_variables_initializer()
  1. Tain the model
sess = tf.Session()
sess.run(init)
for epoch_index in range(epochs):
    loss_ls, acc_ls = [], []
    get_batch = batch_gen(X_train, y_train, batch_size)
    
    for batch_X, batch_y in get_batch:
        _,  batch_acc, batch_loss = sess.run([opt, accuracy, loss], feed_dict={input_data: batch_X, y_label:batch_y})
        loss_ls.append(batch_loss)
        acc_ls.append(batch_acc)

    print("Epoch ", epoch_index)
    print("Accuracy ", np.mean(acc_ls), "     Loss ", np.mean(loss_ls))
    print("__________________") 
sess.close()