本文共 11538 字,大约阅读时间需要 38 分钟。
来自GitHub上《》【】
We stack multiple LSTM layers to improve on our Shakespeare language generation. (Character level vocabulary)
堆叠多个LSTM层以改进我们的莎士比亚语言生成。 (字符级词汇)
此内容为《深度学习之LSTM案例分析(二)》的补充。【】
#Stacking LSTM Layers#---------------------# Here we implement an LSTM model on all a data set of Shakespeare works.# 在这里,我们在莎士比亚作品的所有数据集上实现LSTM模型。# We will stack multiple LSTM models for a more accurate representation of Shakespearean language. # We will also use characters instead of words.# 我们将堆叠多个LSTM模型,以更准确地表示莎士比亚语言。我们还将使用字符而不是单词。import osimport reimport stringimport requestsimport numpy as npimport collectionsimport randomimport pickleimport matplotlib.pyplot as pltimport tensorflow as tffrom tensorflow.python.framework import opsops.reset_default_graph()'''Start a computational graph session.'''# Start a sessionsess = tf.Session()# Set RNN Parametersnum_layers = 3 # Number of RNN layers stackedmin_word_freq = 5 # Trim the less frequent words offrnn_size = 128 # RNN Model size, has to equal embedding sizeepochs = 10 # Number of epochs to cycle through databatch_size = 100 # Train on this many examples at oncelearning_rate = 0.0005 # Learning ratetraining_seq_len = 50 # how long of a word group to considersave_every = 500 # How often to save model checkpointseval_every = 50 # How often to evaluate the test sentencesprime_texts = ['thou art more', 'to be or not to', 'wherefore art thou']# Download/store Shakespeare datadata_dir = 'temp'data_file = 'shakespeare.txt'model_path = 'shakespeare_model'full_model_dir = os.path.join(data_dir, model_path)# Declare punctuation to remove, everything except hyphens and apostrophespunctuation = string.punctuationpunctuation = ''.join([x for x in punctuation if x not in ['-', "'"]])# Make Model Directoryif not os.path.exists(full_model_dir): os.makedirs(full_model_dir)# Make data directoryif not os.path.exists(data_dir): os.makedirs(data_dir)'''Load the Shakespeare Data'''print('Loading Shakespeare Data')# Check if file is downloaded.if not os.path.isfile(os.path.join(data_dir, data_file)): print('Not found, downloading Shakespeare texts from www.gutenberg.org') shakespeare_url = 'http://www.gutenberg.org/cache/epub/100/pg100.txt' # Get Shakespeare text response = requests.get(shakespeare_url) shakespeare_file = response.content # Decode binary into string s_text = shakespeare_file.decode('utf-8') # Drop first few descriptive paragraphs. s_text = s_text[7675:] # Remove newlines s_text = s_text.replace('\r\n', '') s_text = s_text.replace('\n', '') # Write to file with open(os.path.join(data_dir, data_file), 'w') as out_conn: out_conn.write(s_text)else: # If file has been saved, load from that file with open(os.path.join(data_dir, data_file), 'r') as file_conn: s_text = file_conn.read().replace('\n', '')'''运行结果:Loading Shakespeare DataNot found, downloading Shakespeare texts from www.gutenberg.orgDone Loading Data.''''''Clean and split the text data.'''# Clean textprint('Cleaning Text')s_text = re.sub(r'[{}]'.format(punctuation), ' ', s_text)s_text = re.sub('\s+', ' ', s_text).strip().lower()# Split up by characterschar_list = list(s_text)'''运行结果:Cleaning Text''''''Build word vocabulary function and transform the text.'''# Build word vocabulary functiondef build_vocab(characters): character_counts = collections.Counter(characters) # Create vocab --> index mapping chars = character_counts.keys() vocab_to_ix_dict = {key: (inx + 1) for inx, key in enumerate(chars)} # Add unknown key --> 0 index vocab_to_ix_dict['unknown'] = 0 # Create index --> vocab mapping ix_to_vocab_dict = {val: key for key, val in vocab_to_ix_dict.items()} return ix_to_vocab_dict, vocab_to_ix_dict# Build Shakespeare vocabularyprint('Building Shakespeare Vocab by Characters')ix2vocab, vocab2ix = build_vocab(char_list)vocab_size = len(ix2vocab)print('Vocabulary Length = {}'.format(vocab_size))# Sanity Checkassert(len(ix2vocab) == len(vocab2ix))'''运行结果:Building Shakespeare Vocab by CharactersVocabulary Length = 40'''# Convert text to word vectorss_text_ix = []for x in char_list: try: s_text_ix.append(vocab2ix[x]) except KeyError: s_text_ix.append(0)s_text_ix = np.array(s_text_ix)# Define LSTM RNN Model Classclass LSTM_Model(): def __init__(self, rnn_size, num_layers, batch_size, learning_rate, training_seq_len, vocab_size, infer_sample=False): self.rnn_size = rnn_size self.num_layers = num_layers self.vocab_size = vocab_size self.infer_sample = infer_sample self.learning_rate = learning_rate if infer_sample: self.batch_size = 1 self.training_seq_len = 1 else: self.batch_size = batch_size self.training_seq_len = training_seq_len self.lstm_cell = tf.contrib.rnn.BasicLSTMCell(rnn_size) self.lstm_cell = tf.contrib.rnn.MultiRNNCell([self.lstm_cell for _ in range(self.num_layers)]) '''新增''' self.initial_state = self.lstm_cell.zero_state(self.batch_size, tf.float32) self.x_data = tf.placeholder(tf.int32, [self.batch_size, self.training_seq_len]) self.y_output = tf.placeholder(tf.int32, [self.batch_size, self.training_seq_len]) with tf.variable_scope('lstm_vars'): # Softmax Output Weights W = tf.get_variable('W', [self.rnn_size, self.vocab_size], tf.float32, tf.random_normal_initializer()) b = tf.get_variable('b', [self.vocab_size], tf.float32, tf.constant_initializer(0.0)) # Define Embedding embedding_mat = tf.get_variable('embedding_mat', [self.vocab_size, self.rnn_size], tf.float32, tf.random_normal_initializer()) embedding_output = tf.nn.embedding_lookup(embedding_mat, self.x_data) rnn_inputs = tf.split(axis=1, num_or_size_splits=self.training_seq_len, value=embedding_output) rnn_inputs_trimmed = [tf.squeeze(x, [1]) for x in rnn_inputs] decoder = tf.contrib.legacy_seq2seq.rnn_decoder outputs, last_state = decoder(rnn_inputs_trimmed, self.initial_state, self.lstm_cell) # RNN outputs output = tf.reshape(tf.concat(axis=1, values=outputs), [-1, rnn_size]) # Logits and output self.logit_output = tf.matmul(output, W) + b self.model_output = tf.nn.softmax(self.logit_output) loss_fun = tf.contrib.legacy_seq2seq.sequence_loss_by_example loss = loss_fun([self.logit_output],[tf.reshape(self.y_output, [-1])], [tf.ones([self.batch_size * self.training_seq_len])], self.vocab_size) self.cost = tf.reduce_sum(loss) / (self.batch_size * self.training_seq_len) self.final_state = last_state gradients, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tf.trainable_variables()), 4.5) optimizer = tf.train.AdamOptimizer(self.learning_rate) self.train_op = optimizer.apply_gradients(zip(gradients, tf.trainable_variables())) def sample(self, sess, words=ix2vocab, vocab=vocab2ix, num=20, prime_text='thou art'): state = sess.run(self.lstm_cell.zero_state(1, tf.float32)) char_list = list(prime_text) for char in char_list[:-1]: x = np.zeros((1, 1)) x[0, 0] = vocab[char] feed_dict = {self.x_data: x, self.initial_state:state} [state] = sess.run([self.final_state], feed_dict=feed_dict) out_sentence = prime_text char = char_list[-1] for n in range(num): x = np.zeros((1, 1)) x[0, 0] = vocab[char] feed_dict = {self.x_data: x, self.initial_state:state} [model_output, state] = sess.run([self.model_output, self.final_state], feed_dict=feed_dict) sample = np.argmax(model_output[0]) if sample == 0: break char = words[sample] out_sentence = out_sentence + char return out_sentence'''Initialize the LSTM Model'''# Define LSTM Modellstm_model = LSTM_Model(rnn_size, num_layers, batch_size, learning_rate, training_seq_len, vocab_size)# Tell TensorFlow we are reusing the scope for the testingwith tf.variable_scope(tf.get_variable_scope(), reuse=True): test_lstm_model = LSTM_Model(rnn_size, num_layers, batch_size, learning_rate, training_seq_len, vocab_size, infer_sample=True)# Create model saversaver = tf.train.Saver(tf.global_variables())# Create batches for each epochnum_batches = int(len(s_text_ix)/(batch_size * training_seq_len)) + 1# Split up text indices into subarrays, of equal sizebatches = np.array_split(s_text_ix, num_batches)# Reshape each split into [batch_size, training_seq_len]batches = [np.resize(x, [batch_size, training_seq_len]) for x in batches]# Initialize all variablesinit = tf.global_variables_initializer()sess.run(init)# Train modeltrain_loss = []iteration_count = 1for epoch in range(epochs): # Shuffle word indices random.shuffle(batches) # Create targets from shuffled batches targets = [np.roll(x, -1, axis=1) for x in batches] # Run a through one epoch print('Starting Epoch #{} of {}.'.format(epoch+1, epochs)) # Reset initial LSTM state every epoch state = sess.run(lstm_model.initial_state) for ix, batch in enumerate(batches): training_dict = {lstm_model.x_data: batch, lstm_model.y_output: targets[ix]} # We need to update initial state for each RNN cell: for i, (c, h) in enumerate(lstm_model.initial_state): training_dict[c] = state[i].c training_dict[h] = state[i].h temp_loss, state, _ = sess.run([lstm_model.cost, lstm_model.final_state, lstm_model.train_op], feed_dict=training_dict) train_loss.append(temp_loss) # Print status every 10 gens if iteration_count % 10 == 0: summary_nums = (iteration_count, epoch+1, ix+1, num_batches+1, temp_loss) print('Iteration: {}, Epoch: {}, Batch: {} out of {}, Loss: {:.2f}'.format(*summary_nums)) # Save the model and the vocab if iteration_count % save_every == 0: # Save model model_file_name = os.path.join(full_model_dir, 'model') saver.save(sess, model_file_name, global_step=iteration_count) print('Model Saved To: {}'.format(model_file_name)) # Save vocabulary dictionary_file = os.path.join(full_model_dir, 'vocab.pkl') with open(dictionary_file, 'wb') as dict_file_conn: pickle.dump([vocab2ix, ix2vocab], dict_file_conn) if iteration_count % eval_every == 0: for sample in prime_texts: print(test_lstm_model.sample(sess, ix2vocab, vocab2ix, num=10, prime_text=sample)) iteration_count += 1# Plot loss over timeplt.plot(train_loss, 'k-')plt.title('Sequence to Sequence Loss')plt.xlabel('Generation')plt.ylabel('Loss')plt.show()
(Placeholder)
转载地址:http://bvqn.baihongyu.com/