目录
Sentiment Analysis Two approaches Single layer Multi-layers
Sentiment Analysis
Two approaches
SimpleRNNCell
single layer
multi-layers
RNNCell
Single layer
import?os
import?tensorflow?as?tf
import?numpy?as?np
from?tensorflow?import?keras
from?tensorflow.keras?import?layers
tf.random.set_seed(22)
np.random.seed(22)
os.environ['TF_CPP_MIN_LOG_LEVEL']?=?'2'
assert?tf.__version__.startswith('2.')
batchsz?=?128
#?the?most?frequest?words
total_words?=?10000
max_review_len?=?80
embedding_len?=?100
(x_train,
?y_train),?(x_test,
????????????y_test)?=?keras.datasets.imdb.load_data(num_words=total_words)
#?x_train:[b,?80]
#?x_test:?[b,?80]
x_train?=?keras.preprocessing.sequence.pad_sequences(x_train,
?????????????????????????????????????????????????????maxlen=max_review_len)
x_test?=?keras.preprocessing.sequence.pad_sequences(x_test,
????????????????????????????????????????????????????maxlen=max_review_len)
db_train?=?tf.data.Dataset.from_tensor_slices((x_train,?y_train))
db_train?=?db_train.shuffle(1000).batch(batchsz,?drop_remainder=True)
db_test?=?tf.data.Dataset.from_tensor_slices((x_test,?y_test))
db_test?=?db_test.batch(batchsz,?drop_remainder=True)
print('x_train?shape:',?x_train.shape,?tf.reduce_max(y_train),
??????tf.reduce_min(y_train))
print('x_test?shape:',?x_test.shape)
class?MyRNN(keras.Model):
????def?__init__(self,?units):
????????super(MyRNN,?self).__init__()
????????#?[b,?64]
????????self.state0?=?[tf.zeros([batchsz,?units])]
????????self.state1?=?[tf.zeros([batchsz,?units])]
????????#?transform?text?to?embedding?representation
????????#?[b,?80]?=>?[b,?80,?100]
????????self.embedding?=?layers.Embedding(total_words,
??????????????????????????????????????????embedding_len,
??????????????????????????????????????????input_length=max_review_len)
????????#?[b,?80,?100]?,?h_dim:?64
????????#?RNN:?cell1?,cell2,?cell3
????????#?SimpleRNN,units=64表示100个向量转成64个初始的状态
????????self.rnn_cell0?=?layers.SimpleRNNCell(units,?dropout=0.5)
????????self.rnn_cell1?=?layers.SimpleRNNCell(units,?dropout=0.5)
????????#?fc,?[b,?80,?100]?=>?[b,?64]?=>?[b,?1]
????????self.outlayer?=?layers.Dense(1)
????def?call(self,?inputs,?training=None):
????????"""
????????net(x)?net(x,?training=True)?:train?mode
????????net(x,?training=False):?test
????????:param?inputs:?[b,?80]
????????:param?training:
????????:return:
????????"""
????????#?[b,?80]
????????x?=?inputs
????????#?embedding:?[b,?80]?=>?[b,?80,?100]
????????x?=?self.embedding(x)
????????#?rnn?cell?compute
????????#?[b,?80,?100]?=>?[b,?64]
????????state0?=?self.state0
????????state1?=?self.state1
????????for?word?in?tf.unstack(x,?axis=1):??#?word:?[b,?100]
????????????#?h1?=?x*wxh+h0*whh
????????????#?out0:?[b,?64]
????????????out0,?state0?=?self.rnn_cell0(word,?state0,?training)
????????????#?out1:?[b,?64]
????????????out1,?state1?=?self.rnn_cell1(out0,?state1,?training)
????????#?out:?[b,?64]?=>?[b,?1]
????????x?=?self.outlayer(out1)
????????#?p(y?is?pos|x)
????????prob?=?tf.sigmoid(x)
????????return?prob
def?main():
????units?=?64
????epochs?=?4
????model?=?MyRNN(units)
????model测试数据pile(optimizer=keras.optimizers.Adam(0.001),
??????????????????loss=tf.losses.BinaryCrossentropy(),
??????????????????metrics=['accuracy'])
????model.fit(db_train,?epochs=epochs,?validation_data=db_test)
????model.evaluate(db_test)
if?__name__?==?'__main__':
????main()Multi-layers
import?os
import?tensorflow?as?tf
import?numpy?as?np
from?tensorflow?import?keras
from?tensorflow.keras?import?layers
tf.random.set_seed(22)
np.random.seed(22)
os.environ['TF_CPP_MIN_LOG_LEVEL']?=?'2'
assert?tf.__version__.startswith('2.')
batchsz?=?128
#?the?most?frequest?words
total_words?=?10000??#?编码10000个单词
max_review_len?=?80??#?句子长度80
embedding_len?=?100
(x_train,
?y_train),?(x_test,
????????????y_test)?=?keras.datasets.imdb.load_data(num_words=total_words)
#?x_train:[b,?80]
#?x_test:?[b,?80]
x_train?=?keras.preprocessing.sequence.pad_sequences(x_train,
?????????????????????????????????????????????????????maxlen=max_review_len)
x_test?=?keras.preprocessing.sequence.pad_sequences(x_test,
????????????????????????????????????????????????????maxlen=max_review_len)
db_train?=?tf.data.Dataset.from_tensor_slices((x_train,?y_train))
#?drop_remainder,丢弃最后一个大小不合适的batch
db_train?=?db_train.shuffle(1000).batch(batchsz,?drop_remainder=True)
db_test?=?tf.data.Dataset.from_tensor_slices((x_test,?y_test))
db_test?=?db_test.batch(batchsz,?drop_remainder=True)
print('x_train?shape:',?x_train.shape,?tf.reduce_max(y_train),
??????tf.reduce_min(y_train))
print('x_test?shape:',?x_test.shape)
class?MyRNN(keras.Model):
????def?__init__(self,?units):
????????super(MyRNN,?self).__init__()
????????#?transform?text?to?embedding?representation
????????#?[b,?80]?=>?[b,?80,?100]??#?embedding_len=100表示一个单词为100的向量
????????self.embedding?=?layers.Embedding(total_words,
??????????????????????????????????????????embedding_len,
??????????????????????????????????????????input_length=max_review_len)
????????#?[b,?80,?100]?,?h_dim:?64
????????self.rnn?=?keras.Sequential([
????????????layers.SimpleRNN(units,
?????????????????????????????dropout=0.5,
?????????????????????????????return_sequences=True,
?????????????????????????????unroll=True),
????????????layers.SimpleRNN(units,?dropout=0.5,?unroll=True)
????????])
????????#?fc,?[b,?80,?100]?=>?[b,?64]?=>?[b,?1]?#?得到分类结果
????????self.outlayer?=?layers.Dense(1)
????def?call(self,?inputs,?training=None):
????????"""
????????net(x)?net(x,?training=True)?:train?mode
????????net(x,?training=False):?test
????????:param?inputs:?[b,?80]
????????:param?training:?计算过程是train还是test
????????:return:
????????"""
????????#?[b,?80]
????????x?=?inputs
????????#?embedding:?[b,?80]?=>?[b,?80,?100]
????????x?=?self.embedding(x)
????????#?rnn?cell?compute
????????#?x:?[b,?80,?100]?=>?[b,?64]
????????x?=?self.rnn(x)
????????#?out:?[b,?64]?=>?[b,?1]
????????x?=?self.outlayer(x)
????????#?p(y?is?pos|x)
????????prob?=?tf.sigmoid(x)
????????return?prob
def?main():
????units?=?64
????epochs?=?4
????model?=?MyRNN(units)
????model测试数据pile(optimizer=keras.optimizers.Adam(0.001),
??????????????????loss=tf.losses.BinaryCrossentropy(),
??????????????????metrics=['accuracy'])
????model.fit(db_train,?epochs=epochs,?validation_data=db_test)
????model.evaluate(db_test)
if?__name__?==?'__main__':
????main()
查看更多关于RNN与情感分类问题实战-加载IMDB数据集的详细内容...
声明:本文来自网络,不代表【好得很程序员自学网】立场,转载请注明出处:http://haodehen.cn/did127414