Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Making CIContext.render(CIImage, CVPixelBuffer) work with AVAssetWriter

I want to use Core Image for processing a bunch of CGImage objects and turning them into a QuickTime movie on macOS. The following code demonstrates what's needed, but the output contains a lot of blank (black) frames:

import AppKit
import AVFoundation
import CoreGraphics
import Foundation
import CoreVideo
import Metal

// Video output url.
let url: URL = try! FileManager.default.url(for: .downloadsDirectory, in: .userDomainMask, appropriateFor: nil, create: false).appendingPathComponent("av.mov")
try? FileManager.default.removeItem(at: url)

// Video frame size, total frame count, frame rate and frame image.
let frameSize: CGSize = CGSize(width: 2000, height: 1000)
let frameCount: Int = 100
let frameRate: Double = 1 / 30
let frameImage: CGImage

frameImage = NSImage(size: frameSize, flipped: false, drawingHandler: {
    NSColor.red.setFill()
    $0.fill()
    return true
}).cgImage(forProposedRect: nil, context: nil, hints: nil)!

let pixelBufferAttributes: [CFString: Any]
let outputSettings: [String: Any]

pixelBufferAttributes = [
    kCVPixelBufferPixelFormatTypeKey: Int(kCVPixelFormatType_32ARGB),
    kCVPixelBufferWidthKey: Float(frameSize.width),
    kCVPixelBufferHeightKey: Float(frameSize.height),
    kCVPixelBufferMetalCompatibilityKey: true,
    kCVPixelBufferCGImageCompatibilityKey: true,
    kCVPixelBufferCGBitmapContextCompatibilityKey: true,
]

outputSettings = [
    AVVideoCodecKey: AVVideoCodecType.h264,
    AVVideoWidthKey: Int(frameSize.width),
    AVVideoHeightKey: Int(frameSize.height),
]

let writer: AVAssetWriter = try! AVAssetWriter(outputURL: url, fileType: .mov)
let input: AVAssetWriterInput = AVAssetWriterInput(mediaType: .video, outputSettings: outputSettings)
let pixelBufferAdaptor: AVAssetWriterInputPixelBufferAdaptor = AVAssetWriterInputPixelBufferAdaptor(assetWriterInput: input, sourcePixelBufferAttributes: pixelBufferAttributes as [String: Any])

input.expectsMediaDataInRealTime = true

precondition(writer.canAdd(input))
writer.add(input)

precondition(writer.startWriting())
writer.startSession(atSourceTime: CMTime.zero)

let colorSpace: CGColorSpace = CGColorSpace(name: CGColorSpace.sRGB) ?? CGColorSpaceCreateDeviceRGB()
let context = CIContext(mtlDevice: MTLCreateSystemDefaultDevice()!)

Swift.print("Starting the render…")

// Preferred scenario: using CoreImage to fill the buffer from the pixel buffer adapter. Shows that
// CIImage + AVAssetWriterInputPixelBufferAdaptor are not working together.

for frameNumber in 0 ..< frameCount {
    var pixelBuffer: CVPixelBuffer?
    guard let pixelBufferPool: CVPixelBufferPool = pixelBufferAdaptor.pixelBufferPool else { preconditionFailure() }
    precondition(CVPixelBufferPoolCreatePixelBuffer(nil, pixelBufferPool, &pixelBuffer) == kCVReturnSuccess)

    precondition(CVPixelBufferLockBaseAddress(pixelBuffer!, []) == kCVReturnSuccess)
    defer { precondition(CVPixelBufferUnlockBaseAddress(pixelBuffer!, []) == kCVReturnSuccess) }

    let ciImage = CIImage(cgImage: frameImage)
    context.render(ciImage, to: pixelBuffer!)

    // πŸ’₯ This fails – the pixel buffer doesn't get filled. AT ALL! Why? How to make it work?
    let bytes = UnsafeBufferPointer(start: CVPixelBufferGetBaseAddress(pixelBuffer!)!.assumingMemoryBound(to: UInt8.self), count: CVPixelBufferGetDataSize(pixelBuffer!))
    precondition(bytes.contains(where: { $0 != 0 }))

    while !input.isReadyForMoreMediaData { Thread.sleep(forTimeInterval: 10 / 1000) }
    precondition(pixelBufferAdaptor.append(pixelBuffer!, withPresentationTime: CMTime(seconds: Double(frameNumber) * frameRate, preferredTimescale: 600)))
}


// Unpreferred scenario: using CoreImage to fill the manually created buffer. Proves that CIImage 
// can fill buffer and working.

// for frameNumber in 0 ..< frameCount {
//     var pixelBuffer: CVPixelBuffer?
//     precondition(CVPixelBufferCreate(nil, frameImage.width, frameImage.height, kCVPixelFormatType_32ARGB, pixelBufferAttributes as CFDictionary, &pixelBuffer) == kCVReturnSuccess)
//
//     precondition(CVPixelBufferLockBaseAddress(pixelBuffer!, []) == kCVReturnSuccess)
//     defer { precondition(CVPixelBufferUnlockBaseAddress(pixelBuffer!, []) == kCVReturnSuccess) }
//
//     let ciImage = CIImage(cgImage: frameImage)
//     context.render(ciImage, to: pixelBuffer!)
//
//     // βœ… This passes.
//     let bytes = UnsafeBufferPointer(start: CVPixelBufferGetBaseAddress(pixelBuffer!)!.assumingMemoryBound(to: UInt8.self), count: CVPixelBufferGetDataSize(pixelBuffer!))
//     precondition(bytes.contains(where: { $0 != 0 }))
//
//     while !input.isReadyForMoreMediaData { Thread.sleep(forTimeInterval: 10 / 1000) }
//     precondition(pixelBufferAdaptor.append(pixelBuffer!, withPresentationTime: CMTime(seconds: Double(frameNumber) * frameRate, preferredTimescale: 600)))
// }


// Unpreferred scenario: using CoreGraphics to fill the buffer from the pixel buffer adapter. Shows that
// buffer from pixel buffer adapter can be filled and working.

// for frameNumber in 0 ..< frameCount {
//     var pixelBuffer: CVPixelBuffer?
//     guard let pixelBufferPool: CVPixelBufferPool = pixelBufferAdaptor.pixelBufferPool else { preconditionFailure() }
//     precondition(CVPixelBufferPoolCreatePixelBuffer(nil, pixelBufferPool, &pixelBuffer) == kCVReturnSuccess)
//
//     precondition(CVPixelBufferLockBaseAddress(pixelBuffer!, []) == kCVReturnSuccess)
//     defer { precondition(CVPixelBufferUnlockBaseAddress(pixelBuffer!, []) == kCVReturnSuccess) }
//
//     guard let context: CGContext = CGContext(data: CVPixelBufferGetBaseAddress(pixelBuffer!), width: frameImage.width, height: frameImage.height, bitsPerComponent: 8, bytesPerRow: CVPixelBufferGetBytesPerRow(pixelBuffer!), space: colorSpace, bitmapInfo: CGImageAlphaInfo.premultipliedFirst.rawValue) else { preconditionFailure() }
//     context.clear(CGRect(origin: .zero, size: frameSize))
//     context.draw(frameImage, in: CGRect(origin: .zero, size: frameSize))
//
//     // βœ… This passes.
//     let bytes = UnsafeBufferPointer(start: CVPixelBufferGetBaseAddress(pixelBuffer!)!.assumingMemoryBound(to: UInt8.self), count: CVPixelBufferGetDataSize(pixelBuffer!))
//     precondition(bytes.contains(where: { $0 != 0 }))
//
//     while !input.isReadyForMoreMediaData { Thread.sleep(forTimeInterval: 10 / 1000) }
//     precondition(pixelBufferAdaptor.append(pixelBuffer!, withPresentationTime: CMTime(seconds: Double(frameNumber) * frameRate, preferredTimescale: 600)))
// }

let semaphore = DispatchSemaphore(value: 0)

input.markAsFinished()
writer.endSession(atSourceTime: CMTime(seconds: Double(frameCount) * frameRate, preferredTimescale: 600))
writer.finishWriting(completionHandler: { semaphore.signal() })

semaphore.wait()

Swift.print("Successfully finished rendering to \(url.path)")

The following, however, works with CGContext, but I need CIContext in order to make use of GPU. The problem seems to be with pixel buffers provided by the AVAssetWriterInputPixelBufferAdaptor's buffer pool. Rendering CIContext into individually created buffers and appending them to the adapter works, but is highly inefficient. Rendering CIContext into buffers provided by the adapter's pool results in no data being written into buffer at all, it literally contains all zeroes as if two are incompatible! However, rendering using CGImage works, so as copying the data manually.

The main observation is that CIContext.render appears to work asynchronously or something goes wrong between the buffer getting filled and data being written into the video stream. In other words there's no data in the buffer when it gets flushed. The following is kind of pointing in that direction:

  1. Removing buffer locking results in almost all frames being written, except for the first few, the above code actually produces a correct output, but with the actual data the behaviour is as described.
  2. Using a different codec, like ProRes422, results in almost all frames being written correctly, with just a few blanks – also the above code produces correct output, but larger and complex images result in skipped frames.

What's wrong with this code and what's the right way to do it?

P.S. Most iOS examples use pretty much the same implementation and seem to work perfectly fine. I found a hint that it might differ for macOS, but can't see any official documentation on this.

like image 249
Ian Bytchek Avatar asked May 07 '19 08:05

Ian Bytchek


2 Answers

For your use case it would be better to use the pull-style APIs of AVAssetWriterInput because you don't need to process any media in real-time (like you would when capturing from a camera).

So rather then pausing the thread when the input isn't ready, just wait for it to pull the next frame. Remember to also set expectsMediaDataInRealTime to false in this case.

I think the main problem with you current approach is that you pause the very thread that the video processing is happening in when the writer is not yet ready.

(By the way: you can create CIImages with a solid color directly (CIImage(color:)); no need to create a CGImage first.)

like image 191
Frank Schlegel Avatar answered Oct 23 '22 12:10

Frank Schlegel


After speaking with Apple Developer Technical Support it appears that:

Core Image defers the rendering until the client requests the access to the frame buffer, i.e. CVPixelBufferLockBaseAddress.

So, the solution is simply to do CVPixelBufferLockBaseAddress after calling CIContext.render as shown below:

for frameNumber in 0 ..< frameCount {
    var pixelBuffer: CVPixelBuffer?
    guard let pixelBufferPool: CVPixelBufferPool = pixelBufferAdaptor.pixelBufferPool else { preconditionFailure() }
    precondition(CVPixelBufferPoolCreatePixelBuffer(nil, pixelBufferPool, &pixelBuffer) == kCVReturnSuccess)

    let ciImage = CIImage(cgImage: frameImage)
    context.render(ciImage, to: pixelBuffer!)

    precondition(CVPixelBufferLockBaseAddress(pixelBuffer!, []) == kCVReturnSuccess)
    defer { precondition(CVPixelBufferUnlockBaseAddress(pixelBuffer!, []) == kCVReturnSuccess) }

    let bytes = UnsafeBufferPointer(start: CVPixelBufferGetBaseAddress(pixelBuffer!)!.assumingMemoryBound(to: UInt8.self), count: CVPixelBufferGetDataSize(pixelBuffer!))
    precondition(bytes.contains(where: { $0 != 0 }))

    while !input.isReadyForMoreMediaData { Thread.sleep(forTimeInterval: 10 / 1000) }
    precondition(pixelBufferAdaptor.append(pixelBuffer!, withPresentationTime: CMTime(seconds: Double(frameNumber) * frameRate, preferredTimescale: 600)))
}
like image 42
Ian Bytchek Avatar answered Oct 23 '22 11:10

Ian Bytchek