Implementing seq2seq with beam search

Tags:

python

tensorflow

I'm now implementing seq2seq model based on the example code that tensorflow provides. And I want to get a top-5 decoder outputs to do a reinforcement learning.

However, they implemented translation model with attention decoder so, I should implement beam-search for getting top-k results.

There is a part of code that now implement (this code is added to translate.py).

Reference by https://github.com/tensorflow/tensorflow/issues/654

with tf.Graph().as_default():
  beam_size = FLAGS.beam_size # Number of hypotheses in beam
  num_symbols = FLAGS.tar_vocab_size # Output vocabulary size
  embedding_size = 10
  num_steps = 5
  embedding = tf.zeros([num_symbols, embedding_size])
  output_projection = None

  log_beam_probs, beam_symbols, beam_path = [], [], []

  def beam_search(prev, i):
    if output_projection is not None:
      prev = tf.nn.xw_plus_b(prev, output_projection[0], output_projection[1])

    probs = tf.log(tf.nn.softmax(prev))

    if i > 1:
      probs = tf.reshape(probs + log_beam_probs[-1], [-1, beam_size * num_symbols])

    best_probs, indices = tf.nn.top_k(probs, beam_size)
    indices = tf.stop_gradient(tf.squeeze(tf.reshape(indices, [-1, 1])))
    best_probs = tf.stop_gradient(tf.reshape(best_probs, [-1, 1]))

    symbols = indices % num_symbols      # which word in vocabulary
    beam_parent = indices // num_symbols # which hypothesis it came from

    beam_symbols.append(symbols)
    beam_path.append(beam_parent)
    log_beam_probs.append(best_probs)

    return tf.nn.embedding_lookup(embedding, symbols)

  # Setting up graph.
  inputs = [tf.placeholder(tf.float32, shape=[None, num_symbols]) for i in range(num_steps)]

  for i in range(num_steps):
    beam_search(inputs[i], i+1)

  input_vals = tf.zeros([1, beam_size], dtype=tf.float32)

  input_feed = {inputs[i]: input_vals[i][:beam_size, :] for i in xrange(num_steps)}
  output_feed = beam_symbols + beam_path + log_beam_probs
  session = tf.InteractiveSession()
  outputs = session.run(output_feed, feed_dict=input_feed)

  print("Top_5 Sentences ")
  for predicted in enumerate(outputs[:5]):
    print(list(predicted))
    print("\n")

In input_feed part, there is an error:

ValueError: Shape (1, 12) must have rank 1

Is there any problem on my code to do beam-search?

236

asked May 04 '16 12:05

IH_K

1 Answers

A tried and true demo:

# -*- coding: utf-8 -*-

from __future__ import unicode_literals, print_function
from __future__ import absolute_import
from __future__ import division

import tensorflow as tf

tf.app.flags.DEFINE_integer('beam_size', 4, 'beam size for beam search decoding.')
tf.app.flags.DEFINE_integer('vocab_size', 40, 'vocabulary size.')
tf.app.flags.DEFINE_integer('batch_size', 5, 'the batch size.')
tf.app.flags.DEFINE_integer('num_steps', 10, 'the batch size.')
tf.app.flags.DEFINE_integer('embedding_size', 50, 'the batch size.')

FLAGS = tf.app.flags.FLAGS


with tf.Graph().as_default():
    batch_size = FLAGS.batch_size
    beam_size = FLAGS.beam_size  # Number of hypotheses in beam
    vocab_size = FLAGS.vocab_size  # Output vocabulary size
    num_steps = FLAGS.num_steps
    embedding_size = FLAGS.embedding_size
    embedding = tf.random_normal([vocab_size, embedding_size], -2, 4, dtype=tf.float32, seed=0)
    output_projection = [
        tf.random_normal([embedding_size, vocab_size], mean=2, stddev=1, dtype=tf.float32, seed=0),
        tf.random_normal([vocab_size], mean=0, stddev=1, dtype=tf.float32, seed=0),
    ]
    index_base = tf.reshape(
        tf.tile(tf.expand_dims(tf.range(batch_size) * beam_size, axis=1), [1, beam_size]), [-1])

    log_beam_probs, beam_symbols = [], []

    def beam_search(prev, i):
        if output_projection is not None:
            prev = tf.nn.xw_plus_b(prev, output_projection[0], output_projection[1])
            # (batch_size*beam_size, embedding_size) -> (batch_size*beam_size, vocab_size)

        log_probs = tf.nn.log_softmax(prev)

        if i > 1:
            # total probability
            log_probs = tf.reshape(tf.reduce_sum(tf.stack(log_beam_probs, axis=1), axis=1) + log_probs,
                                   [-1, beam_size * vocab_size])
            # (batch_size*beam_size, vocab_size) -> (batch_size, beam_size*vocab_size)

        best_probs, indices = tf.nn.top_k(log_probs, beam_size)
        # (batch_size, beam_size)
        indices = tf.squeeze(tf.reshape(indices, [-1, 1]))
        best_probs = tf.reshape(best_probs, [-1, 1])
        # (batch_size*beam_size)

        symbols = indices % vocab_size       # which word in vocabulary
        beam_parent = indices // vocab_size  # which hypothesis it came from

        beam_symbols.append(symbols)

        # (batch_size*beam_size, num_steps)
        real_path = beam_parent + index_base
        # get rid of the previous probability
        if i > 1:
            pre_sum = tf.reduce_sum(tf.stack(log_beam_probs, axis=1), axis=1)
            pre_sum = tf.gather(pre_sum, real_path)
        else:
            pre_sum = 0
        log_beam_probs.append(best_probs-pre_sum)
        # adapt the previous symbols according to the current symbol
        if i > 1:
            for j in range(i)[:0:-1]:
                beam_symbols[j-1] = tf.gather(beam_symbols[j-1], real_path)
                log_beam_probs[j-1] = tf.gather(log_beam_probs[j-1], real_path)

        return tf.nn.embedding_lookup(embedding, symbols)
        # (batch_size*beam_size, embedding_size)

    # Setting up graph.
    init_input = tf.placeholder(tf.float32, shape=[batch_size, embedding_size])
    next_input = init_input

    for i in range(num_steps):
        next_input = beam_search(next_input, i+1)

    seq_rank = tf.stack(values=beam_symbols, axis=1)
    seq_rank = tf.reshape(seq_rank, [batch_size, beam_size, num_steps])
    # (batch_size*beam_size, num_steps)

    init_in = tf.random_uniform([batch_size], minval=0, maxval=vocab_size, dtype=tf.int32, seed=0),
    init_emb = tf.squeeze(tf.nn.embedding_lookup(embedding, init_in))
    session = tf.InteractiveSession()
    init_emb = init_emb.eval()

    seq_rank = session.run(seq_rank, feed_dict={init_input: init_emb})
    best_seq = seq_rank[:, 1, :]
    for i in range(batch_size):
        print("rank %s" % i, end=": ")
        print(best_seq[i])

It is simplified from the beam search model in my seq2seq model. Python2.7 and TF1.4

180

answered Oct 24 '22 22:10

Lerner Zhang

Related questions
                            
                                How to efficiently create a SparseDataFrame from a long table?
                            
                                logging module in virtualenv
                            
                                Is uWSGI's reload-on-rss per worker/process?
                            
                                Send email through smtp in superlance using crashmail
                            
                                python subprocess: check to see if the executed script is asking for user input
                            
                                Python Graphs: Latex Math rendering of node labels
                            
                                Detect holes, ends and beginnings of a line using openCV?
                            
                                Spyne Soap server with WSDL-file
                            
                                How to Read Data from Arduino with Raspberry pi via I2C
                            
                                Using groupby and apply to add column to each group
                            
                                Flask-Restful taking over exception handling from Flask during non debug mode
                            
                                pandas read_sql return query string with arguments passed
                            
                                Sublime Text 3 Python Interactive Console? [duplicate]
                            
                                How to list the names of PyPI packages corresponding to imports in a script?
                            
                                Rendering Bokeh widgets in django Templates
                            
                                Crawling slows down drastically towards the end
                            
                                Skip loop if a function is taking too long?
                            
                                Matplotlibs pyplot.subplots() crashes kernel
                            
                                Shift theorem in Discrete Fourier Transform
                            
                                How to get python 3.5.1 running with heroku local?

Donate For Us

If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!

Donate Us With