I'm trying to use a geometry shader to inflate points into quads for some simple 2D rendering. Most frames render exactly as I expect, but every so often some of the vertices render with some of their attributes incorrect. I've spent some time simplifying this down from a bigger program with many moving parts, so that I'm doing the bare minimum during rendering, but unfortunately there's still an awful lot of setup. The complete code is here:
http://pastebin.com/mQyRcTjJ
#!/usr/bin/env python
# Copyright 2011-2013, Andrew Wilson
# Licensed under the MIT license:
# http://www.opensource.org/licenses/MIT
# memglitch.py
from OpenGL import GL
import sys
import pygame
import pygame.image
import pygame.key
import pygame as PG
import numpy
import hashlib
import collections
import ctypes
######## SHADERS ########
vertex_shader = '''\
#version 330
uniform vec2 screen_dimensions;
uniform vec2 cam_position;
uniform float zoom;
layout(location=0) in vec2 position;
layout(location=1) in vec2 size;
layout(location=2) in vec2 other;
out VertexData
{
vec2 position;
vec2 size;
float layer;
float rotation;
} outData;
void main()
{
outData.position = position;
outData.size = size;
outData.rotation = other.x;
outData.layer = other.y;
}
'''
geometry_shader = '''\
#version 330
#extension GL_EXT_gpu_shader4 : enable
layout (points) in;
layout (triangle_strip, max_vertices = 4) out;
uniform vec2 screen_dimensions;
uniform vec2 cam_position;
uniform float zoom;
in VertexData
{
vec2 position;
vec2 size;
float rotation;
float layer;
} vert[];
out FragData
{
smooth vec2 texcoord;
smooth float layer;
} vertOut;
vec4 calcPosition(in vec2 pos)
{
// Transform a position in world-space into screen-space
vec4 result;
result.xy =
(
pos
- cam_position
)
* zoom
/ screen_dimensions;
result.zw = vec2(0.0, 1.0);
return result;
}
void main()
{
// Inflate each input point into a quad.
float r = vert[0].rotation;
mat2 rotation_matrix = mat2(cos(r), -sin(r), sin(r), cos(r));
vec2 currentPos;
vec4 texcoords = vec4(0,0,1,1);
currentPos = vert[0].position + vert[0].size * vec2(-0.5, -0.5) * rotation_matrix;
gl_Position = calcPosition(currentPos);
vertOut.texcoord = texcoords.xy;
vertOut.layer = vert[0].layer;
gl_PrimitiveID = gl_PrimitiveIDIn;
EmitVertex();
currentPos = vert[0].position + vert[0].size * vec2(-0.5, 0.5) * rotation_matrix;
gl_Position = calcPosition(currentPos);
vertOut.texcoord = texcoords.xw;
vertOut.layer = vert[0].layer;
gl_PrimitiveID = gl_PrimitiveIDIn;
EmitVertex();
currentPos = vert[0].position + vert[0].size * vec2(0.5, -0.5) * rotation_matrix;
gl_Position = calcPosition(currentPos);
vertOut.texcoord = texcoords.zy;
vertOut.layer = vert[0].layer;
gl_PrimitiveID = gl_PrimitiveIDIn;
EmitVertex();
currentPos = vert[0].position + vert[0].size * vec2(0.5, 0.5) * rotation_matrix;
gl_Position = calcPosition(currentPos);
vertOut.texcoord = texcoords.zw;
vertOut.layer = vert[0].layer;
gl_PrimitiveID = gl_PrimitiveIDIn;
EmitVertex();
}
'''
fragment_shader = '''\
#version 330
#extension GL_EXT_gpu_shader4 : enable
uniform sampler2DArray texture_atlas;
uniform float zoom;
in FragData
{
smooth vec2 texcoord;
smooth float layer;
};
layout(location=0) out vec4 fragcolor;
void main()
{
fragcolor = texture(
texture_atlas,
vec3(texcoord, float(layer)));
}
'''
######## TEXTURE_SETUP ########
def make_texture_array(
image,
across=8,
down=8):
'''
Split up an input image with a grid and assemble a
texture array from all of the sub-images.
'''
source_width, source_height = image.get_size()
width = source_width // across
height = source_height // down
subpixels = []
for y in xrange(down):
for x in xrange(across):
subimage = image.subsurface((x*width, y*height, width, height))
subpixels.append(pygame.image.tostring(subimage, "RGBA", True))
pixels = "".join(subpixels)
texture = GL.glGenTextures(1)
GL.glBindTexture(GL.GL_TEXTURE_2D_ARRAY, texture)
def tex_param(name, value):
GL.glTexParameteri(GL.GL_TEXTURE_2D_ARRAY, name, value)
tex_param(GL.GL_TEXTURE_MIN_FILTER, GL.GL_NEAREST)
tex_param(GL.GL_TEXTURE_MAG_FILTER, GL.GL_NEAREST)
tex_param(GL.GL_TEXTURE_WRAP_S, GL.GL_CLAMP_TO_EDGE)
tex_param(GL.GL_TEXTURE_WRAP_T, GL.GL_CLAMP_TO_EDGE)
tex_param(GL.GL_TEXTURE_BASE_LEVEL, 0)
tex_param(GL.GL_TEXTURE_MAX_LEVEL, 0)
targetformat = GL.GL_RGBA8
sourceformat = GL.GL_RGBA
GL.glTexImage3D(
GL.GL_TEXTURE_2D_ARRAY,
0,
targetformat,
width,
height,
across*down,
0,
sourceformat,
GL.GL_UNSIGNED_BYTE,
pixels)
return texture
######## SHADER SETUP ########
def create_shader_program(resources):
'''
Compile the shader program. Populates resources.shader_program_object
with the OpenGL program object and active_uniforms with a dictionary
mapping uniform names to locations.
'''
writelog=sys.stderr.write
shaders = []
def compile_shader(source, gltype, name):
writelog("Compiling {0} shader...\n".format(name))
shader = make_shader(gltype, source)
infolog = GL.glGetShaderInfoLog(shader)
if len(infolog)==0:
writelog("...completed\n")
else:
writelog("...completed with messages:\n")
writelog(infolog)
writelog("\n")
shaders.append(shader)
compile_shader(vertex_shader, GL.GL_VERTEX_SHADER, 'vertex')
compile_shader(fragment_shader, GL.GL_FRAGMENT_SHADER, 'fragment')
compile_shader(geometry_shader, GL.GL_GEOMETRY_SHADER, 'geometry')
writelog("Compiling shader program...\n")
program = make_program(*shaders)
infolog = GL.glGetProgramInfoLog(program)
if len(infolog)==0:
writelog("...completed\n")
else:
writelog("...completed with messages:\n")
writelog(infolog)
writelog("\n")
active_uniforms = GL.glGetProgramiv(program, GL.GL_ACTIVE_UNIFORMS)
resources.uniform_locations = {}
for i in range(active_uniforms):
name, size, data_type = GL.glGetActiveUniform(program, i)
resources.uniform_locations[name] = i
resources.shader_program_object = program
def make_shader(shadertype, source):
'''
Compile and return an OpenGL shader object.
'''
shader = GL.glCreateShader(shadertype)
GL.glShaderSource(shader, source)
GL.glCompileShader(shader)
retval = ctypes.c_uint(GL.GL_UNSIGNED_INT)
GL.glGetShaderiv(shader, GL.GL_COMPILE_STATUS, retval)
if not retval:
print >> sys.stderr, "Failed to compile shader."
print GL.glGetShaderInfoLog(shader)
GL.glDeleteShader(shader)
raise Exception("Failed to compile shader.")
return shader
def make_program(*shaders):
'''
Compile and return an OpenGL program object.
'''
program = GL.glCreateProgram()
for shader in shaders:
GL.glAttachShader(program, shader)
GL.glLinkProgram(program)
retval = ctypes.c_int()
GL.glGetProgramiv(program, GL.GL_LINK_STATUS, retval)
if not retval:
print >> sys.stderr, "Failed to link shader program."
print GL.glGetProgramInfoLog(program)
GL.glDeleteProgram(program)
raise Exception("Failed to link shader program.")
return program
######## RESOURCE ALLOCATION ########
class Resources(object):
pass
def make_resources(screen_dimensions):
loadimg = pygame.image.load
spacemen_image = loadimg('diagnostic_numbers.png')
resources = Resources()
vertex_dtype = numpy.dtype([
("position", ("f4", 2)),
("size", ("f4", 2)),
("other", ("f4", 2))])
resources.vertex_stride = 24
resources.position_stream_offset = 0
resources.size_stream_offset = 8
resources.other_stream_offset = 16
resources.vertex_array = numpy.zeros(512, dtype=vertex_dtype)
resources.spacemen_texture = make_texture_array(spacemen_image, 16, 16)
create_shader_program(resources)
resources.array_buffer = GL.glGenBuffers(1)
w,h = screen_dimensions
resources.save_buffer = numpy.zeros((h,w,4),dtype="u1")
return resources
######## SCREENSHOT #########
# pygame.surfarray.make_surface is broken in 1.9.1. It reads uninitialized
# stack contents on 64-bit systems. :( Here we use numpy to do the copying
# instead.
def make_surface(array):
w,h,depth = array.shape
if depth == 4:
surf = pygame.Surface((w,h), depth=32, flags=pygame.SRCALPHA)
pixels = pygame.surfarray.pixels3d(surf)
pixels[:,:,:] = array[:,:,:3]
alpha = pygame.surfarray.pixels_alpha(surf)
alpha[:,:] = array[:,:,3]
elif depth == 3:
surf = pygame.Surface((w,h), depth=32)
pixels = pygame.surfarray.pixels3d(surf)
pixels[:,:,:depth] = array
else:
raise ValueError("Array must have minor dimension of 3 or 4.")
return surf
class Screenshotter(object):
'''
Captures screenshots from OpenGL and records them by SHA1 hash.
'''
def __init__(self, save_buffer, screen_dimensions):
self.hashes_seen = collections.Counter()
self.save_buffer = save_buffer
self.screen_dimensions = screen_dimensions
def get_filename(self, screen_hash):
return screen_hash + ".out.png"
def take_screenshot(self):
w,h = self.screen_dimensions
save_buffer = self.save_buffer
GL.glReadPixels(0, 0, w, h, GL.GL_RGBA, GL.GL_UNSIGNED_BYTE, self.save_buffer)
byte_view = save_buffer.view("u1")
screen_hash = hashlib.sha1(byte_view).hexdigest()
if self.hashes_seen[screen_hash] == 0:
oriented = numpy.swapaxes(save_buffer, 0, 1)[:,::-1,:]
surf = make_surface(oriented)
filename = self.get_filename(screen_hash)
pygame.image.save(surf,filename)
print filename
self.hashes_seen[screen_hash] += 1
def print_summary(self):
for screen_hash, count in sorted(self.hashes_seen.items(), key=lambda(h,c):-c):
print "{0} {1}".format(self.get_filename(screen_hash), count)
######## RENDERING ########
def prepare_context(resources, zoom, screen_dimensions):
'''
Prepare the OpenGL context for rendering.
'''
uniforms = resources.uniform_locations
screen_w, screen_h = screen_dimensions
GL.glViewport(0,0,screen_w,screen_h)
GL.glEnable(GL.GL_BLEND)
GL.glBlendFunc(GL.GL_SRC_ALPHA, GL.GL_ONE_MINUS_SRC_ALPHA)
GL.glUseProgram(resources.shader_program_object)
GL.glUniform2f(uniforms['cam_position'], 0, 0)
GL.glUniform1f(uniforms['zoom'], zoom)
GL.glUniform2f(uniforms['screen_dimensions'], screen_w, screen_h)
GL.glActiveTexture(GL.GL_TEXTURE0)
GL.glBindTexture(GL.GL_TEXTURE_2D_ARRAY, resources.spacemen_texture)
GL.glUniform1i(uniforms['texture_atlas'], 0)
GL.glBindBuffer(GL.GL_ARRAY_BUFFER, resources.array_buffer)
GL.glBufferData(GL.GL_ARRAY_BUFFER, resources.vertex_array.nbytes, resources.vertex_array, GL.GL_STATIC_DRAW)
GL.glEnableVertexAttribArray(0)
GL.glEnableVertexAttribArray(1)
GL.glEnableVertexAttribArray(2)
GL.glVertexAttribPointer(
0, 2, GL.GL_FLOAT, GL.GL_FALSE, resources.vertex_stride,
ctypes.cast(resources.position_stream_offset, ctypes.c_void_p))
GL.glVertexAttribPointer(
1, 2, GL.GL_FLOAT, GL.GL_FALSE, resources.vertex_stride,
ctypes.cast(resources.size_stream_offset, ctypes.c_void_p))
GL.glVertexAttribPointer(
2, 2, GL.GL_FLOAT, GL.GL_FALSE, resources.vertex_stride,
ctypes.cast(resources.other_stream_offset, ctypes.c_void_p))
def render(resources, zoom, vertex_count):
'''
Render one frame.
'''
GL.glClearColor(0.4, 0.4, 0.4, 1.0)
GL.glClear(GL.GL_COLOR_BUFFER_BIT)
GL.glDrawArrays(
GL.GL_POINTS,
0,
vertex_count)
pygame.display.flip()
######## MAIN LOOP ########
def main():
video_flags = PG.OPENGL|PG.DOUBLEBUF
pygame.init()
screen_dimensions = 512, 256
pygame.display.set_mode(screen_dimensions, video_flags)
resources = make_resources(screen_dimensions)
frames = 3000
done = 0
zoom = 32.0
vertex_count = 512
screenshotter = Screenshotter(resources.save_buffer, screen_dimensions)
for i in xrange(vertex_count):
scale = 32.0
y = (15 - i // 32) / 32.0 * scale - scale/4.0 + (scale/2.0/32.0)
x = (i % 32) / 32.0 * scale - scale/2.0 + (scale/2.0/32.0)
xx = i // 2
lo = xx % 16
hi = (xx // 16) % 16
flavour = hi if i%2==0 else lo
resources.vertex_array[i] = ((x,y), (1,1), (0, flavour))
prepare_context(resources, zoom, screen_dimensions)
for i in xrange(frames):
if done:
break
if i%100==0:
print "{0}/{1}".format(i, frames)
while 1:
event = pygame.event.poll()
if event.type == PG.NOEVENT:
break
if event.type == PG.QUIT:
done = 1
render(resources, zoom, vertex_count)
screenshotter.take_screenshot()
print "---"
screenshotter.print_summary()
if __name__ == '__main__':
main()
It also needs an input PNG called diagnostic_numbers.png
in the working directory:
Here's an example of what it typically displays:
Each square is a separate point, inflated to a quad by the geometry shader. Each input vertex has a 2D position (the first vertex is in the top left, then they are arranged in rows down the screen), a size (they are all width and height 1) a rotation (they all have rotation 0) and a layer (0-15). The layer determines which layer of a texture array to render from.
However, some frames are rendered with the incorrect layer for some vertices. E.g.:
Since the vertices are all arranged in order, it's possible to see that each glitch is a block of 8 adjacent vertices. This appears always to be the case. It's also the case that the glitched vertices have been rendered with the layer values of a block of vertices vertices starting exactly 80 vertices previously in the array. In every case I've checked this has been true. I also don't think I've ever observed glitching within the first 128 256 vertices - the top half of the screen - it only starts somewhere after that.
Each frame, this is all the GL code that runs*:
def render(resources, zoom, vertex_count):
'''
Render one frame.
'''
GL.glClearColor(0.4, 0.4, 0.4, 1.0)
GL.glClear(GL.GL_COLOR_BUFFER_BIT)
GL.glDrawArrays(
GL.GL_POINTS,
0,
vertex_count)
pygame.display.flip()
The program will run for 3000 frames, calculate the SHA1 hash of each rendered frame and save a PNG for each distinct output frame. Here's an example of typical console output on my machine:
Compiling vertex shader...
...completed
Compiling fragment shader...
...completed
Compiling geometry shader...
...completed
Compiling shader program...
...completed
0/3000
6fdbf7d09076ef084e57b90d7d445d2e56c54ab8.out.png
100/3000
200/3000
300/3000
3c7558f5a257c6b53fae9815df0ee8d457db9b19.out.png
400/3000
500/3000
66edb4d0fb88951af944c717c37b92f5cfa37cd0.out.png
4c3844a6879af3992081807e1e429e8ac83753f5.out.png
c538c6cddea6c6f53c3c968ebf8ab46bceb017f9.out.png
600/3000
700/3000
800/3000
900/3000
1000/3000
443afa3ee4c28611f0cc6b6a60712de503e34f8e.out.png
1100/3000
1200/3000
1300/3000
1400/3000
1500/3000
231d09f859aac29aef23d0c590187071e4fad321.out.png
1600/3000
1700/3000
1800/3000
df3051821a6d8327c77cfcf3d8053fdfcaf13e32.out.png
1900/3000
50ac6e618e5b58a8709baf557a757d62b041ef36.out.png
c53a53ba0f4d2401094cf25b271ab2e50b3909bf.out.png
2000/3000
5cb6c6989c24dc16e123d74f46985ebb243b2935.out.png
183cb8faaf1241526bd74e1f2ca65e6d89ab6c74.out.png
2100/3000
3666fcaac2d9d9b555c23367e5f01bb4f435cb65.out.png
2200/3000
69e14d278f2e340f50acc6274922d0cecc932ecd.out.png
1620c62c2eb151244ce929aa37a7dbc97c5def54.out.png
2300/3000
2400/3000
5932cf4ce48ad73bee1b35dd88b59d745ac3c493.out.png
2500/3000
2600/3000
dce25b06206bf0e671fb46e3365b9a42f1146813.out.png
2700/3000
2800/3000
2900/3000
---
6fdbf7d09076ef084e57b90d7d445d2e56c54ab8.out.png 2821
c538c6cddea6c6f53c3c968ebf8ab46bceb017f9.out.png 93
3c7558f5a257c6b53fae9815df0ee8d457db9b19.out.png 46
5cb6c6989c24dc16e123d74f46985ebb243b2935.out.png 12
df3051821a6d8327c77cfcf3d8053fdfcaf13e32.out.png 7
183cb8faaf1241526bd74e1f2ca65e6d89ab6c74.out.png 4
c53a53ba0f4d2401094cf25b271ab2e50b3909bf.out.png 3
69e14d278f2e340f50acc6274922d0cecc932ecd.out.png 3
4c3844a6879af3992081807e1e429e8ac83753f5.out.png 3
dce25b06206bf0e671fb46e3365b9a42f1146813.out.png 1
1620c62c2eb151244ce929aa37a7dbc97c5def54.out.png 1
5932cf4ce48ad73bee1b35dd88b59d745ac3c493.out.png 1
443afa3ee4c28611f0cc6b6a60712de503e34f8e.out.png 1
3666fcaac2d9d9b555c23367e5f01bb4f435cb65.out.png 1
66edb4d0fb88951af944c717c37b92f5cfa37cd0.out.png 1
231d09f859aac29aef23d0c590187071e4fad321.out.png 1
50ac6e618e5b58a8709baf557a757d62b041ef36.out.png 1
Most frames render as expected, but a sizable minority demonstrate the glitching, and some patterns of glitching are much more common.
If you want to run the code, you need Python 2.7, pygame, PyOpenGL and numpy. On Ubuntu the packages I have installed are python-numpy, python-opengl and python-pygame.
I've tried it on NVIDIA driver versions 310 and 313 on 64-bit Ubuntu, and gotten the same results. My hardware (as shown in lspci) is an "NVIDIA Corporation G98M [GeForce 9300M GS]".
I'm not sure what to check now. I think I've uploaded the vertex data correctly, since it renders correctly at least some of the time, and I only upload it once at the start. But then I do so little each frame that I don't think I'm doing something wrong there. There are no warnings coming out of the shader compiler. What should I try next? Is it possible that it's a driver bug? How would I know if it was?
* - Except for the code to capture screenshots, but that can be disabled and the glitching still happens.
Things I've tried:
I did not run your code, and frankly, it's too complex for psychic debugging. But this issue comes up all the time, and is actually not trivial to deal with. Here are some approaches that helped me in the past:
Hope this helps. It's hard and weird. Different than CPU debugging.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With