I want to re-implement the word embedding here
here is the original tensorflow code (version: 0.12.1)
import tensorflow as tf
class Network(object):
def __init__(
self, user_length,item_length, num_classes, user_vocab_size,item_vocab_size,fm_k,n_latent,user_num,item_num,
embedding_size, filter_sizes, num_filters, l2_reg_lambda=0.0,l2_reg_V=0.0):
# Skip the embedding
pooled_outputs_u = []
for i, filter_size in enumerate(filter_sizes):
with tf.name_scope("user_conv-maxpool-%s" % filter_size):
# Convolution Layer
filter_shape = [filter_size, embedding_size, 1, num_filters]
W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W")
b = tf.Variable(tf.constant(0.1, shape=[num_filters]), name="b")
conv = tf.nn.conv2d(
self.embedded_users,
W,
strides=[1, 1, 1, 1],
padding="VALID",
name="conv")
# Apply nonlinearity
h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu")
# Maxpooling over the outputs
pooled = tf.nn.max_pool(
h,
ksize=[1, user_length - filter_size + 1, 1, 1],
strides=[1, 1, 1, 1],
padding='VALID',
name="pool")
pooled_outputs_u.append(pooled)
num_filters_total = num_filters * len(filter_sizes)
self.h_pool_u = tf.concat(3,pooled_outputs_u)
self.h_pool_flat_u = tf.reshape(self.h_pool_u, [-1, num_filters_total])
with tf.name_scope("dropout"):
self.h_drop_u = tf.nn.dropout(self.h_pool_flat_u, 1.0)
self.h_drop_i= tf.nn.dropout(self.h_pool_flat_i, 1.0)
with tf.name_scope("get_fea"):
Wu = tf.get_variable(
"Wu",
shape=[num_filters_total, n_latent],
initializer=tf.contrib.layers.xavier_initializer())
bu = tf.Variable(tf.constant(0.1, shape=[n_latent]), name="bu")
self.u_fea=tf.matmul(self.h_drop_u, Wu) + bu
#self.u_fea = tf.nn.dropout(self.u_fea,self.dropout_keep_prob)
Wi = tf.get_variable(
"Wi",
shape=[num_filters_total, n_latent],
initializer=tf.contrib.layers.xavier_initializer())
bi = tf.Variable(tf.constant(0.1, shape=[n_latent]), name="bi")
self.i_fea = tf.matmul(self.h_drop_i, Wi) + bi
#self.i_fea=tf.nn.dropout(self.i_fea,self.dropout_keep_prob)
with tf.name_scope('fm'):
self.z=tf.nn.relu(tf.concat(1,[self.u_fea,self.i_fea]))
#self.z=tf.nn.dropout(self.z,self.dropout_keep_prob)
WF1=tf.Variable(
tf.random_uniform([n_latent*2, 1], -0.1, 0.1), name='fm1')
Wf2=tf.Variable(
tf.random_uniform([n_latent*2, fm_k], -0.1, 0.1), name='fm2')
one=tf.matmul(self.z,WF1)
inte1=tf.matmul(self.z,Wf2)
inte2=tf.matmul(tf.square(self.z),tf.square(Wf2))
inter=(tf.square(inte1)-inte2)*0.5
inter=tf.nn.dropout(inter,self.dropout_keep_prob)
inter=tf.reduce_sum(inter,1,keep_dims=True)
print inter
b=tf.Variable(tf.constant(0.1), name='bias')
And here is the pytorch version 1.0 that I try
class Network(torch.nn.Module):
def __init__(
self, user_length,item_length, num_classes, user_vocab_size,item_vocab_size,fm_k,n_latent,user_num,item_num,
embedding_size, filter_sizes, num_filters, l2_reg_lambda=0.0,l2_reg_V=0.0):
pooled_outputs_u = []
def forward():
I mainly have the trouble with the convolutional layer tranforming.
Pytorch is much easy to use since we can skip lots of W
and b
definition. Is there some one can help me with the rest? Thanks
For someone who is an expert in TensorFlow might wonder about the differences in switching from one library to another. The transition for switching from TensorFlow to PyTorch isn't too complex because PyTorch offers a pythonic approach to solve most problems.
That being said, when using TensorFlow you can achive everything you can do in PyTorch, but with more effort (you have more control as a bonus). Also it is worth noting that both frameworks support distributed execution and provide high level interfaces for defining clusters.
If you're just starting to explore deep learning, you should learn PyTorch first due to its popularity in the research community. However, if you're familiar with machine learning and deep learning and focused on getting a job in the industry as soon as possible, learn TensorFlow first.
The pytorch equivalent of the tensorflow part of the code will be, explained with comments in the code itself, you have to import truncnorm from scipy.
from scipy.stats import truncnorm #extra import equivalent to tf.trunc initialise
pooled_outputs_u = []
for i, filter_size in enumerate(filter_sizes):
filter_shape = [filter_size, embedding_size, 1, num_filters]
#W is just a tensor now that will act as weight to the conv layer
W=torch.tensor(truncnorm.rvs(-1,1,size=[10,10]))
#bias initialized with 0.1 initial values
b=torch.zeros([num_filters])+0.1
#conv layer with the same parameters as the tensorflow layer more on this in the link
conv=torch.nn.functional.conv2d(self.embedded_users,W,bias=b,strides=[1,1,1,1],padding=0)
#can use torch sequential to include it all in a single line but did it like this for better understanding.
h=torch.relu(conv)
#look at link2 for what a max pool layer does, basically it is a kind of feature extraction
pooled=torch.nn.functional.max_pool(h,kernal_size=[1,user_length-filter_size+1,1,1],strides=[1,1,1,1],padding=0)
pooled_outputs_u.append(pooled)
num_filters_total = num_filters * len(filter_sizes)
self.h_pool_u=torch.concat(3,pooled_outputs_u)
self.h_pool_flat_u=torch.reshape(self.h_pool_u,[-1,num_filters_total])
Reference:
Link 1
Link 2
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With