Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

How can I diagnose strange OpenGL glitches?

Tags:

I'm trying to use a geometry shader to inflate points into quads for some simple 2D rendering. Most frames render exactly as I expect, but every so often some of the vertices render with some of their attributes incorrect. I've spent some time simplifying this down from a bigger program with many moving parts, so that I'm doing the bare minimum during rendering, but unfortunately there's still an awful lot of setup. The complete code is here:

http://pastebin.com/mQyRcTjJ

#!/usr/bin/env python

# Copyright 2011-2013, Andrew Wilson
# Licensed under the MIT license:
# http://www.opensource.org/licenses/MIT

# memglitch.py

from OpenGL import GL
import sys
import pygame
import pygame.image
import pygame.key
import pygame as PG
import numpy
import hashlib
import collections
import ctypes


######## SHADERS ########

vertex_shader = '''\
#version 330

uniform vec2 screen_dimensions;
uniform vec2 cam_position;
uniform float zoom;

layout(location=0) in vec2 position;
layout(location=1) in vec2 size;
layout(location=2) in vec2 other;

out VertexData
{
   vec2 position;
   vec2 size;
   float layer;
   float rotation;

} outData;

void main()
{
    outData.position = position;
    outData.size = size;
    outData.rotation = other.x;
    outData.layer = other.y;
}
'''

geometry_shader = '''\
#version 330
#extension GL_EXT_gpu_shader4 : enable

layout (points) in;
layout (triangle_strip, max_vertices = 4) out;

uniform vec2 screen_dimensions;
uniform vec2 cam_position;
uniform float zoom;

in VertexData
{
   vec2 position;
   vec2 size;
   float rotation;
   float layer;
} vert[];

out FragData
{
    smooth vec2 texcoord;
    smooth float layer;
} vertOut;

vec4 calcPosition(in vec2 pos)
{
    // Transform a position in world-space into screen-space
    vec4 result;
    result.xy =
        (
            pos
            - cam_position
        )
        * zoom
        / screen_dimensions;
    result.zw = vec2(0.0, 1.0);
    return result;
}

void main()
{
    // Inflate each input point into a quad.
    float r = vert[0].rotation;
    mat2 rotation_matrix = mat2(cos(r), -sin(r), sin(r), cos(r));
    vec2 currentPos;
    vec4 texcoords = vec4(0,0,1,1);

    currentPos = vert[0].position + vert[0].size * vec2(-0.5, -0.5) * rotation_matrix;
    gl_Position = calcPosition(currentPos);
    vertOut.texcoord = texcoords.xy;
    vertOut.layer = vert[0].layer;
    gl_PrimitiveID = gl_PrimitiveIDIn;
    EmitVertex();

    currentPos = vert[0].position + vert[0].size * vec2(-0.5, 0.5) * rotation_matrix;
    gl_Position = calcPosition(currentPos);
    vertOut.texcoord = texcoords.xw;
    vertOut.layer = vert[0].layer;
    gl_PrimitiveID = gl_PrimitiveIDIn;
    EmitVertex();

    currentPos = vert[0].position + vert[0].size * vec2(0.5, -0.5) * rotation_matrix;
    gl_Position = calcPosition(currentPos);
    vertOut.texcoord = texcoords.zy;
    vertOut.layer = vert[0].layer;
    gl_PrimitiveID = gl_PrimitiveIDIn;
    EmitVertex();

    currentPos = vert[0].position + vert[0].size * vec2(0.5, 0.5) * rotation_matrix;
    gl_Position = calcPosition(currentPos);
    vertOut.texcoord = texcoords.zw;
    vertOut.layer = vert[0].layer;
    gl_PrimitiveID = gl_PrimitiveIDIn;
    EmitVertex();
}

'''

fragment_shader = '''\
#version 330
#extension GL_EXT_gpu_shader4 : enable

uniform sampler2DArray texture_atlas;

uniform float zoom;

in FragData
{
    smooth vec2 texcoord;
    smooth float layer;
};

layout(location=0) out vec4 fragcolor;

void main()
{
    fragcolor = texture(
        texture_atlas,
        vec3(texcoord, float(layer)));
}
'''


######## TEXTURE_SETUP ########

def make_texture_array(
        image,
        across=8,
        down=8):
    '''
    Split up an input image with a grid and assemble a
    texture array from all of the sub-images.
    '''

    source_width, source_height = image.get_size()
    width = source_width // across
    height = source_height // down
    subpixels = []

    for y in xrange(down):
        for x in xrange(across):
            subimage = image.subsurface((x*width, y*height, width, height))
            subpixels.append(pygame.image.tostring(subimage, "RGBA", True))

    pixels = "".join(subpixels)

    texture = GL.glGenTextures(1)
    GL.glBindTexture(GL.GL_TEXTURE_2D_ARRAY, texture)

    def tex_param(name, value):
        GL.glTexParameteri(GL.GL_TEXTURE_2D_ARRAY, name, value)

    tex_param(GL.GL_TEXTURE_MIN_FILTER, GL.GL_NEAREST)
    tex_param(GL.GL_TEXTURE_MAG_FILTER, GL.GL_NEAREST)
    tex_param(GL.GL_TEXTURE_WRAP_S, GL.GL_CLAMP_TO_EDGE)
    tex_param(GL.GL_TEXTURE_WRAP_T, GL.GL_CLAMP_TO_EDGE)
    tex_param(GL.GL_TEXTURE_BASE_LEVEL, 0)
    tex_param(GL.GL_TEXTURE_MAX_LEVEL, 0)

    targetformat = GL.GL_RGBA8
    sourceformat = GL.GL_RGBA

    GL.glTexImage3D(
        GL.GL_TEXTURE_2D_ARRAY,
        0,
        targetformat,
        width,
        height,
        across*down,
        0,
        sourceformat,
        GL.GL_UNSIGNED_BYTE,
        pixels)

    return texture


######## SHADER SETUP ########

def create_shader_program(resources):
    '''
    Compile the shader program. Populates resources.shader_program_object
    with the OpenGL program object and active_uniforms with a dictionary
    mapping uniform names to locations.
    '''
    writelog=sys.stderr.write
    shaders = []
    def compile_shader(source, gltype, name):
        writelog("Compiling {0} shader...\n".format(name))
        shader = make_shader(gltype, source)
        infolog = GL.glGetShaderInfoLog(shader)
        if len(infolog)==0:
            writelog("...completed\n")
        else:
            writelog("...completed with messages:\n")
            writelog(infolog)
            writelog("\n")
        shaders.append(shader)
    compile_shader(vertex_shader, GL.GL_VERTEX_SHADER, 'vertex')
    compile_shader(fragment_shader, GL.GL_FRAGMENT_SHADER, 'fragment')
    compile_shader(geometry_shader, GL.GL_GEOMETRY_SHADER, 'geometry')
    writelog("Compiling shader program...\n")
    program = make_program(*shaders)
    infolog = GL.glGetProgramInfoLog(program)
    if len(infolog)==0:
        writelog("...completed\n")
    else:
        writelog("...completed with messages:\n")
        writelog(infolog)
        writelog("\n")

    active_uniforms = GL.glGetProgramiv(program, GL.GL_ACTIVE_UNIFORMS)
    resources.uniform_locations = {}
    for i in range(active_uniforms):
        name, size, data_type = GL.glGetActiveUniform(program, i)
        resources.uniform_locations[name] = i
    resources.shader_program_object = program

def make_shader(shadertype, source):
    '''
    Compile and return an OpenGL shader object.
    '''
    shader = GL.glCreateShader(shadertype)
    GL.glShaderSource(shader, source)
    GL.glCompileShader(shader)
    retval = ctypes.c_uint(GL.GL_UNSIGNED_INT)
    GL.glGetShaderiv(shader, GL.GL_COMPILE_STATUS, retval)
    if not retval:
        print >> sys.stderr, "Failed to compile shader."
        print GL.glGetShaderInfoLog(shader)
        GL.glDeleteShader(shader)
        raise Exception("Failed to compile shader.")
    return shader

def make_program(*shaders):
    '''
    Compile and return an OpenGL program object.
    '''
    program = GL.glCreateProgram()
    for shader in shaders:
        GL.glAttachShader(program, shader)
    GL.glLinkProgram(program)
    retval = ctypes.c_int()
    GL.glGetProgramiv(program, GL.GL_LINK_STATUS, retval)
    if not retval:
        print >> sys.stderr, "Failed to link shader program."
        print GL.glGetProgramInfoLog(program)
        GL.glDeleteProgram(program)
        raise Exception("Failed to link shader program.")
    return program


######## RESOURCE ALLOCATION ########

class Resources(object):
    pass

def make_resources(screen_dimensions):
    loadimg = pygame.image.load
    spacemen_image = loadimg('diagnostic_numbers.png')

    resources = Resources()
    vertex_dtype = numpy.dtype([
        ("position", ("f4", 2)),
        ("size", ("f4", 2)),
        ("other", ("f4", 2))])
    resources.vertex_stride = 24
    resources.position_stream_offset = 0
    resources.size_stream_offset = 8
    resources.other_stream_offset = 16
    resources.vertex_array = numpy.zeros(512, dtype=vertex_dtype)
    resources.spacemen_texture = make_texture_array(spacemen_image, 16, 16)

    create_shader_program(resources)

    resources.array_buffer = GL.glGenBuffers(1)

    w,h = screen_dimensions
    resources.save_buffer = numpy.zeros((h,w,4),dtype="u1")

    return resources


######## SCREENSHOT #########

# pygame.surfarray.make_surface is broken in 1.9.1. It reads uninitialized
# stack contents on 64-bit systems. :( Here we use numpy to do the copying
# instead.
def make_surface(array):
    w,h,depth = array.shape
    if depth == 4:
        surf = pygame.Surface((w,h), depth=32, flags=pygame.SRCALPHA)
        pixels = pygame.surfarray.pixels3d(surf)
        pixels[:,:,:] = array[:,:,:3]
        alpha = pygame.surfarray.pixels_alpha(surf)
        alpha[:,:] = array[:,:,3]
    elif depth == 3:
        surf = pygame.Surface((w,h), depth=32)
        pixels = pygame.surfarray.pixels3d(surf)
        pixels[:,:,:depth] = array
    else:
        raise ValueError("Array must have minor dimension of 3 or 4.")
    return surf

class Screenshotter(object):
    '''
    Captures screenshots from OpenGL and records them by SHA1 hash.
    '''
    def __init__(self, save_buffer, screen_dimensions):
        self.hashes_seen = collections.Counter()
        self.save_buffer = save_buffer
        self.screen_dimensions = screen_dimensions
    def get_filename(self, screen_hash):
        return screen_hash + ".out.png"
    def take_screenshot(self):
        w,h = self.screen_dimensions
        save_buffer = self.save_buffer
        GL.glReadPixels(0, 0, w, h, GL.GL_RGBA, GL.GL_UNSIGNED_BYTE, self.save_buffer)
        byte_view = save_buffer.view("u1")
        screen_hash = hashlib.sha1(byte_view).hexdigest()
        if self.hashes_seen[screen_hash] == 0:
            oriented = numpy.swapaxes(save_buffer, 0, 1)[:,::-1,:]
            surf = make_surface(oriented)
            filename = self.get_filename(screen_hash)
            pygame.image.save(surf,filename)
            print filename
        self.hashes_seen[screen_hash] += 1
    def print_summary(self):
        for screen_hash, count in sorted(self.hashes_seen.items(), key=lambda(h,c):-c):
            print "{0} {1}".format(self.get_filename(screen_hash), count)


######## RENDERING ########

def prepare_context(resources, zoom, screen_dimensions):
    '''
    Prepare the OpenGL context for rendering.
    '''
    uniforms = resources.uniform_locations
    screen_w, screen_h = screen_dimensions

    GL.glViewport(0,0,screen_w,screen_h)

    GL.glEnable(GL.GL_BLEND)
    GL.glBlendFunc(GL.GL_SRC_ALPHA, GL.GL_ONE_MINUS_SRC_ALPHA)

    GL.glUseProgram(resources.shader_program_object)

    GL.glUniform2f(uniforms['cam_position'], 0, 0)
    GL.glUniform1f(uniforms['zoom'], zoom)
    GL.glUniform2f(uniforms['screen_dimensions'], screen_w, screen_h)
    GL.glActiveTexture(GL.GL_TEXTURE0)
    GL.glBindTexture(GL.GL_TEXTURE_2D_ARRAY, resources.spacemen_texture)
    GL.glUniform1i(uniforms['texture_atlas'], 0)

    GL.glBindBuffer(GL.GL_ARRAY_BUFFER, resources.array_buffer)

    GL.glBufferData(GL.GL_ARRAY_BUFFER, resources.vertex_array.nbytes, resources.vertex_array, GL.GL_STATIC_DRAW)

    GL.glEnableVertexAttribArray(0)
    GL.glEnableVertexAttribArray(1)
    GL.glEnableVertexAttribArray(2)
    GL.glVertexAttribPointer(
            0, 2, GL.GL_FLOAT, GL.GL_FALSE, resources.vertex_stride,
            ctypes.cast(resources.position_stream_offset, ctypes.c_void_p))
    GL.glVertexAttribPointer(
            1, 2, GL.GL_FLOAT, GL.GL_FALSE, resources.vertex_stride,
            ctypes.cast(resources.size_stream_offset, ctypes.c_void_p))
    GL.glVertexAttribPointer(
            2, 2, GL.GL_FLOAT, GL.GL_FALSE, resources.vertex_stride,
            ctypes.cast(resources.other_stream_offset, ctypes.c_void_p))

def render(resources, zoom, vertex_count):
    '''
    Render one frame.
    '''
    GL.glClearColor(0.4, 0.4, 0.4, 1.0)
    GL.glClear(GL.GL_COLOR_BUFFER_BIT)
    GL.glDrawArrays(
        GL.GL_POINTS,
        0,
        vertex_count)
    pygame.display.flip()


######## MAIN LOOP ########

def main():
    video_flags = PG.OPENGL|PG.DOUBLEBUF
    pygame.init()
    screen_dimensions = 512, 256
    pygame.display.set_mode(screen_dimensions, video_flags)
    resources = make_resources(screen_dimensions)
    frames = 3000
    done = 0
    zoom = 32.0
    vertex_count = 512
    screenshotter = Screenshotter(resources.save_buffer, screen_dimensions)
    for i in xrange(vertex_count):
        scale = 32.0
        y = (15 - i // 32) / 32.0 * scale - scale/4.0 + (scale/2.0/32.0)
        x = (i % 32) / 32.0 * scale - scale/2.0 + (scale/2.0/32.0)
        xx = i // 2
        lo = xx % 16
        hi = (xx // 16) % 16
        flavour = hi if i%2==0 else lo
        resources.vertex_array[i] =  ((x,y), (1,1), (0, flavour))

    prepare_context(resources, zoom, screen_dimensions)

    for i in xrange(frames):
        if done:
            break
        if i%100==0:
            print "{0}/{1}".format(i, frames)
        while 1:
            event = pygame.event.poll()
            if event.type == PG.NOEVENT:
                break
            if event.type == PG.QUIT:
                done = 1
        render(resources, zoom, vertex_count)
        screenshotter.take_screenshot()
    print "---"
    screenshotter.print_summary()

if __name__ == '__main__':
    main()

It also needs an input PNG called diagnostic_numbers.png in the working directory:

diagnostic_numbers.png

Here's an example of what it typically displays:

Intended rendering

Each square is a separate point, inflated to a quad by the geometry shader. Each input vertex has a 2D position (the first vertex is in the top left, then they are arranged in rows down the screen), a size (they are all width and height 1) a rotation (they all have rotation 0) and a layer (0-15). The layer determines which layer of a texture array to render from.

However, some frames are rendered with the incorrect layer for some vertices. E.g.:

A glitch

Since the vertices are all arranged in order, it's possible to see that each glitch is a block of 8 adjacent vertices. This appears always to be the case. It's also the case that the glitched vertices have been rendered with the layer values of a block of vertices vertices starting exactly 80 vertices previously in the array. In every case I've checked this has been true. I also don't think I've ever observed glitching within the first 128 256 vertices - the top half of the screen - it only starts somewhere after that.

Each frame, this is all the GL code that runs*:

def render(resources, zoom, vertex_count):
    '''
    Render one frame.
    '''
    GL.glClearColor(0.4, 0.4, 0.4, 1.0)
    GL.glClear(GL.GL_COLOR_BUFFER_BIT)
    GL.glDrawArrays(
        GL.GL_POINTS,
        0,
        vertex_count)
    pygame.display.flip()

The program will run for 3000 frames, calculate the SHA1 hash of each rendered frame and save a PNG for each distinct output frame. Here's an example of typical console output on my machine:

Compiling vertex shader...
...completed
Compiling fragment shader...
...completed
Compiling geometry shader...
...completed
Compiling shader program...
...completed
0/3000
6fdbf7d09076ef084e57b90d7d445d2e56c54ab8.out.png
100/3000
200/3000
300/3000
3c7558f5a257c6b53fae9815df0ee8d457db9b19.out.png
400/3000
500/3000
66edb4d0fb88951af944c717c37b92f5cfa37cd0.out.png
4c3844a6879af3992081807e1e429e8ac83753f5.out.png
c538c6cddea6c6f53c3c968ebf8ab46bceb017f9.out.png
600/3000
700/3000
800/3000
900/3000
1000/3000
443afa3ee4c28611f0cc6b6a60712de503e34f8e.out.png
1100/3000
1200/3000
1300/3000
1400/3000
1500/3000
231d09f859aac29aef23d0c590187071e4fad321.out.png
1600/3000
1700/3000
1800/3000
df3051821a6d8327c77cfcf3d8053fdfcaf13e32.out.png
1900/3000
50ac6e618e5b58a8709baf557a757d62b041ef36.out.png
c53a53ba0f4d2401094cf25b271ab2e50b3909bf.out.png
2000/3000
5cb6c6989c24dc16e123d74f46985ebb243b2935.out.png
183cb8faaf1241526bd74e1f2ca65e6d89ab6c74.out.png
2100/3000
3666fcaac2d9d9b555c23367e5f01bb4f435cb65.out.png
2200/3000
69e14d278f2e340f50acc6274922d0cecc932ecd.out.png
1620c62c2eb151244ce929aa37a7dbc97c5def54.out.png
2300/3000
2400/3000
5932cf4ce48ad73bee1b35dd88b59d745ac3c493.out.png
2500/3000
2600/3000
dce25b06206bf0e671fb46e3365b9a42f1146813.out.png
2700/3000
2800/3000
2900/3000
---
6fdbf7d09076ef084e57b90d7d445d2e56c54ab8.out.png 2821
c538c6cddea6c6f53c3c968ebf8ab46bceb017f9.out.png 93
3c7558f5a257c6b53fae9815df0ee8d457db9b19.out.png 46
5cb6c6989c24dc16e123d74f46985ebb243b2935.out.png 12
df3051821a6d8327c77cfcf3d8053fdfcaf13e32.out.png 7
183cb8faaf1241526bd74e1f2ca65e6d89ab6c74.out.png 4
c53a53ba0f4d2401094cf25b271ab2e50b3909bf.out.png 3
69e14d278f2e340f50acc6274922d0cecc932ecd.out.png 3
4c3844a6879af3992081807e1e429e8ac83753f5.out.png 3
dce25b06206bf0e671fb46e3365b9a42f1146813.out.png 1
1620c62c2eb151244ce929aa37a7dbc97c5def54.out.png 1
5932cf4ce48ad73bee1b35dd88b59d745ac3c493.out.png 1
443afa3ee4c28611f0cc6b6a60712de503e34f8e.out.png 1
3666fcaac2d9d9b555c23367e5f01bb4f435cb65.out.png 1
66edb4d0fb88951af944c717c37b92f5cfa37cd0.out.png 1
231d09f859aac29aef23d0c590187071e4fad321.out.png 1
50ac6e618e5b58a8709baf557a757d62b041ef36.out.png 1

Most frames render as expected, but a sizable minority demonstrate the glitching, and some patterns of glitching are much more common.

If you want to run the code, you need Python 2.7, pygame, PyOpenGL and numpy. On Ubuntu the packages I have installed are python-numpy, python-opengl and python-pygame.

I've tried it on NVIDIA driver versions 310 and 313 on 64-bit Ubuntu, and gotten the same results. My hardware (as shown in lspci) is an "NVIDIA Corporation G98M [GeForce 9300M GS]".

I'm not sure what to check now. I think I've uploaded the vertex data correctly, since it renders correctly at least some of the time, and I only upload it once at the start. But then I do so little each frame that I don't think I'm doing something wrong there. There are no warnings coming out of the shader compiler. What should I try next? Is it possible that it's a driver bug? How would I know if it was?

* - Except for the code to capture screenshots, but that can be disabled and the glitching still happens.


Things I've tried:

  • Adding EndPrimitive() to the end of the geometry shader. No difference.
  • Rearranging the fields in the vertex array. No difference.
  • Assigning the attributes different locations in the shader. If I put position in location 2, the glitch will affect the y element of position instead.
like image 274
Weeble Avatar asked Jul 07 '13 23:07

Weeble


1 Answers

I did not run your code, and frankly, it's too complex for psychic debugging. But this issue comes up all the time, and is actually not trivial to deal with. Here are some approaches that helped me in the past:

  • Be very formal, sciency like, in your debugging. Keep a written log of what you try, and what happened. Only change one variable at a time.
  • Sprinkle glGetError asserts all over the place. Never call glGetError without at least an assert.
  • Track your gl ids. glIs.. should always match what you think it is.
  • Always run single threaded.
  • With that, run on a few different drivers and hw. That fixes 99% of cases.
  • Don't blame the driver first. There are driver bugs, but often it is just the most convenient excuse, not the most likely issue.
  • Carefully look for things you are doing different than other, working, code.
  • One problem is that you are using an extra level of abstraction. Is the bug in GL or in the python bindings? If you can, at least look at the code of the bindings. There might just be a comment talking about the library doing something odd. In the end, the only way to rule out the library is to write straight C or your own bindings.
  • Make your code portable. Run it on windows and mac and linux (bonus android and ios). Code that works cross platform not only makes sure you only hit the well tested code path it also gets the benefit of three OS layer checks.
  • Try on different hardware, OS, and driver versions.
  • Use vendor debugging tools. People mentioned gDebugger, PerfHud, but try all of them. Apple's instrument is good. So is Qualcom's Adreno tools. They are all very finicky though and often all the previous points are less work than fully setting up one of them in a way that you understand it enough so you can trust it.
  • Build a minimal repro case and file a bug with a vendor. The timeframe for this is usually way too long to be useful. If the vendor accepts the bug, make sure to ask for a workaround. It can be easily two years from bug fix to fixed in the general public.

Hope this helps. It's hard and weird. Different than CPU debugging.

like image 130
starmole Avatar answered Nov 02 '22 22:11

starmole