Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Measure FLOPS in Go

I would like to write a go program to benchmark my CPU and figure out my laptop's GFLOPS.

func benchmarkFlopTime(){
    num_operations := int(100000000)
    var timeArray[] time.Duration;

    var result float64
    for i:=0; i < num_operations; i++ {
        t1 := time.Now()
        result = 1.0 + 312.232
        elapsed := time.Since(t1)
        timeArray = append(timeArray, elapsed)
        result += 1.0
    }

    fmt.Println("Result (ns):", float64(sumTimeArray(timeArray))/float64(time.Duration(num_operations)))
}

Ouput1: Result (ns): 9.99604753ns

Interpretation:

This code gives me around 0.1 GFLOPS ((1sec/10ns)/10^-9). I know my CPU can do something like 8 Operations Per Cycle and I only do one, so I can have a factor 8, let's assume I've 1GFLOPS

Problems:

  • Although, the theoretical number of GFLOPS for my laptop with a 2.5GHZ i7, 8 cores, should be: 8*2.5*8 = 160GFLOPS. I'm far below this result.

I precise that I don't activate the optimisation when I compile go install -gcflags '-N -l' github.com/golang/cpu-benchmark.

Can we explain this value, far below the theoretical value? Is Go ok to measure that?

like image 999
nlassaux Avatar asked Feb 02 '16 19:02

nlassaux


1 Answers

Here's a (simplistic) Go benchmark which times a loop and twenty floating point operations.

package main

import (
    "fmt"
    "math"
    "runtime"
    "testing"
    "time"
)

var (
    f  float64
    e  float64 = math.E
    pi float64 = math.Pi
)

const nFlop = 20 // benchmarkFloatOps

func benchmarkFloatOps(n int) {
    for i := 0; i < n; i++ {
        f = pi + e
        f = pi - e
        f = pi * e
        f = pi / e
        f = pi + e
        f = pi - e
        f = pi * e
        f = pi / e
        f = pi + e
        f = pi - e
        f = pi * e
        f = pi / e
        f = pi + e
        f = pi - e
        f = pi * e
        f = pi / e
        f = pi + e
        f = pi - e
        f = pi * e
        f = pi / e
    }
}

func BenchmarkFloatOps(b *testing.B) {
    benchmarkFloatOps(b.N)
}

func Results(t time.Duration, n int, nFlop int) {
    fmt.Println(
        t, nFlop*n, "ops",
        float64(t)/float64(time.Duration((nFlop*n))), "ns/op",
    )
}

func main() {
    runtime.GOMAXPROCS(1)

    // testing benchmark
    br := testing.Benchmark(BenchmarkFloatOps)
    Results(br.T, br.N, nFlop)

    // manual benchmark
    n := br.N
    start := time.Now()
    benchmarkFloatOps(n)
    end := time.Now()
    elapsed := end.Sub(start)
    Results(elapsed, n, nFlop)
}

Output: Intel i7-6700 CPU 3.40GHz:

1.296967371s 4000000000 ops 0.32424184275 ns/op
1.299078813s 4000000000 ops 0.32476970325 ns/op

Go pseudocode for benchmarkFloatOps:

$ go tool compile -S flops.go

"".benchmarkFloatOps t=1 size=592 value=0 args=0x8 locals=0x0
    0x0000 00000 (flops.go:19)  TEXT    "".benchmarkFloatOps(SB), $0-8
    0x0000 00000 (flops.go:19)  NOP
    0x0000 00000 (flops.go:19)  NOP
    0x0000 00000 (flops.go:19)  MOVQ    "".n+8(FP), CX
    0x0005 00005 (flops.go:19)  FUNCDATA    $0, gclocals·5184031d3a32a42d85027f073f873668(SB)
    0x0005 00005 (flops.go:19)  FUNCDATA    $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
    0x0005 00005 (flops.go:20)  MOVQ    $0, AX
    0x0007 00007 (flops.go:20)  CMPQ    AX, CX
    0x000a 00010 (flops.go:20)  JGE $0, 588
    0x0010 00016 (flops.go:21)  MOVSD   "".pi(SB), X0
    0x0018 00024 (flops.go:21)  MOVSD   "".e(SB), X1
    0x0020 00032 (flops.go:21)  ADDSD   X1, X0
    0x0024 00036 (flops.go:21)  MOVSD   X0, "".f(SB)
    0x002c 00044 (flops.go:22)  MOVSD   "".pi(SB), X0
    0x0034 00052 (flops.go:22)  MOVSD   "".e(SB), X1
    0x003c 00060 (flops.go:22)  SUBSD   X1, X0
    0x0040 00064 (flops.go:22)  MOVSD   X0, "".f(SB)
    0x0048 00072 (flops.go:23)  MOVSD   "".pi(SB), X0
    0x0050 00080 (flops.go:23)  MOVSD   "".e(SB), X1
    0x0058 00088 (flops.go:23)  MULSD   X1, X0
    0x005c 00092 (flops.go:23)  MOVSD   X0, "".f(SB)
    0x0064 00100 (flops.go:24)  MOVSD   "".pi(SB), X0
    0x006c 00108 (flops.go:24)  MOVSD   "".e(SB), X1
    0x0074 00116 (flops.go:24)  DIVSD   X1, X0
    0x0078 00120 (flops.go:24)  MOVSD   X0, "".f(SB)
    0x0080 00128 (flops.go:25)  MOVSD   "".pi(SB), X0
    0x0088 00136 (flops.go:25)  MOVSD   "".e(SB), X1
    0x0090 00144 (flops.go:25)  ADDSD   X1, X0
    0x0094 00148 (flops.go:25)  MOVSD   X0, "".f(SB)
    0x009c 00156 (flops.go:26)  MOVSD   "".pi(SB), X0
    0x00a4 00164 (flops.go:26)  MOVSD   "".e(SB), X1
    0x00ac 00172 (flops.go:26)  SUBSD   X1, X0
    0x00b0 00176 (flops.go:26)  MOVSD   X0, "".f(SB)
    0x00b8 00184 (flops.go:27)  MOVSD   "".pi(SB), X0
    0x00c0 00192 (flops.go:27)  MOVSD   "".e(SB), X1
    0x00c8 00200 (flops.go:27)  MULSD   X1, X0
    0x00cc 00204 (flops.go:27)  MOVSD   X0, "".f(SB)
    0x00d4 00212 (flops.go:28)  MOVSD   "".pi(SB), X0
    0x00dc 00220 (flops.go:28)  MOVSD   "".e(SB), X1
    0x00e4 00228 (flops.go:28)  DIVSD   X1, X0
    0x00e8 00232 (flops.go:28)  MOVSD   X0, "".f(SB)
    0x00f0 00240 (flops.go:29)  MOVSD   "".pi(SB), X0
    0x00f8 00248 (flops.go:29)  MOVSD   "".e(SB), X1
    0x0100 00256 (flops.go:29)  ADDSD   X1, X0
    0x0104 00260 (flops.go:29)  MOVSD   X0, "".f(SB)
    0x010c 00268 (flops.go:30)  MOVSD   "".pi(SB), X0
    0x0114 00276 (flops.go:30)  MOVSD   "".e(SB), X1
    0x011c 00284 (flops.go:30)  SUBSD   X1, X0
    0x0120 00288 (flops.go:30)  MOVSD   X0, "".f(SB)
    0x0128 00296 (flops.go:31)  MOVSD   "".pi(SB), X0
    0x0130 00304 (flops.go:31)  MOVSD   "".e(SB), X1
    0x0138 00312 (flops.go:31)  MULSD   X1, X0
    0x013c 00316 (flops.go:31)  MOVSD   X0, "".f(SB)
    0x0144 00324 (flops.go:32)  MOVSD   "".pi(SB), X0
    0x014c 00332 (flops.go:32)  MOVSD   "".e(SB), X1
    0x0154 00340 (flops.go:32)  DIVSD   X1, X0
    0x0158 00344 (flops.go:32)  MOVSD   X0, "".f(SB)
    0x0160 00352 (flops.go:33)  MOVSD   "".pi(SB), X0
    0x0168 00360 (flops.go:33)  MOVSD   "".e(SB), X1
    0x0170 00368 (flops.go:33)  ADDSD   X1, X0
    0x0174 00372 (flops.go:33)  MOVSD   X0, "".f(SB)
    0x017c 00380 (flops.go:34)  MOVSD   "".pi(SB), X0
    0x0184 00388 (flops.go:34)  MOVSD   "".e(SB), X1
    0x018c 00396 (flops.go:34)  SUBSD   X1, X0
    0x0190 00400 (flops.go:34)  MOVSD   X0, "".f(SB)
    0x0198 00408 (flops.go:35)  MOVSD   "".pi(SB), X0
    0x01a0 00416 (flops.go:35)  MOVSD   "".e(SB), X1
    0x01a8 00424 (flops.go:35)  MULSD   X1, X0
    0x01ac 00428 (flops.go:35)  MOVSD   X0, "".f(SB)
    0x01b4 00436 (flops.go:36)  MOVSD   "".pi(SB), X0
    0x01bc 00444 (flops.go:36)  MOVSD   "".e(SB), X1
    0x01c4 00452 (flops.go:36)  DIVSD   X1, X0
    0x01c8 00456 (flops.go:36)  MOVSD   X0, "".f(SB)
    0x01d0 00464 (flops.go:37)  MOVSD   "".pi(SB), X0
    0x01d8 00472 (flops.go:37)  MOVSD   "".e(SB), X1
    0x01e0 00480 (flops.go:37)  ADDSD   X1, X0
    0x01e4 00484 (flops.go:37)  MOVSD   X0, "".f(SB)
    0x01ec 00492 (flops.go:38)  MOVSD   "".pi(SB), X0
    0x01f4 00500 (flops.go:38)  MOVSD   "".e(SB), X1
    0x01fc 00508 (flops.go:38)  SUBSD   X1, X0
    0x0200 00512 (flops.go:38)  MOVSD   X0, "".f(SB)
    0x0208 00520 (flops.go:39)  MOVSD   "".pi(SB), X0
    0x0210 00528 (flops.go:39)  MOVSD   "".e(SB), X1
    0x0218 00536 (flops.go:39)  MULSD   X1, X0
    0x021c 00540 (flops.go:39)  MOVSD   X0, "".f(SB)
    0x0224 00548 (flops.go:40)  MOVSD   "".pi(SB), X0
    0x022c 00556 (flops.go:40)  MOVSD   "".e(SB), X1
    0x0234 00564 (flops.go:40)  DIVSD   X1, X0
    0x0238 00568 (flops.go:40)  MOVSD   X0, "".f(SB)
    0x0240 00576 (flops.go:20)  INCQ    AX
    0x0243 00579 (flops.go:20)  NOP
    0x0243 00579 (flops.go:20)  CMPQ    AX, CX
    0x0246 00582 (flops.go:20)  JLT $0, 16
    0x024c 00588 (flops.go:42)  RET

Go object code for benchmarkFloatOps:

$ go build flops.go && go tool objdump -s benchmarkFloatOps ./flops

TEXT main.benchmarkFloatOps(SB) /home/peter/Dropbox/gopath/src/so/benchmark/flops.go
    flops.go:19 0x401000    488b4c2408      MOVQ 0x8(SP), CX
    flops.go:20 0x401005    31c0            XORL AX, AX
    flops.go:20 0x401007    4839c8          CMPQ CX, AX
    flops.go:20 0x40100a    0f8d3c020000        JGE 0x40124c
    flops.go:21 0x401010    f20f1005e8801b00    REPNE MOVSD_XMM 0x1b80e8(IP), X0
    flops.go:21 0x401018    f20f100dd8801b00    REPNE MOVSD_XMM 0x1b80d8(IP), X1
    flops.go:21 0x401020    f20f58c1        REPNE ADDSD X1, X0
    flops.go:21 0x401024    f20f110544451e00    REPNE MOVSD_XMM X0, 0x1e4544(IP)
    flops.go:22 0x40102c    f20f1005cc801b00    REPNE MOVSD_XMM 0x1b80cc(IP), X0
    flops.go:22 0x401034    f20f100dbc801b00    REPNE MOVSD_XMM 0x1b80bc(IP), X1
    flops.go:22 0x40103c    f20f5cc1        REPNE SUBSD X1, X0
    flops.go:22 0x401040    f20f110528451e00    REPNE MOVSD_XMM X0, 0x1e4528(IP)
    flops.go:23 0x401048    f20f1005b0801b00    REPNE MOVSD_XMM 0x1b80b0(IP), X0
    flops.go:23 0x401050    f20f100da0801b00    REPNE MOVSD_XMM 0x1b80a0(IP), X1
    flops.go:23 0x401058    f20f59c1        REPNE MULSD X1, X0
    flops.go:23 0x40105c    f20f11050c451e00    REPNE MOVSD_XMM X0, 0x1e450c(IP)
    flops.go:24 0x401064    f20f100594801b00    REPNE MOVSD_XMM 0x1b8094(IP), X0
    flops.go:24 0x40106c    f20f100d84801b00    REPNE MOVSD_XMM 0x1b8084(IP), X1
    flops.go:24 0x401074    f20f5ec1        REPNE DIVSD X1, X0
    flops.go:24 0x401078    f20f1105f0441e00    REPNE MOVSD_XMM X0, 0x1e44f0(IP)
    flops.go:25 0x401080    f20f100578801b00    REPNE MOVSD_XMM 0x1b8078(IP), X0
    flops.go:25 0x401088    f20f100d68801b00    REPNE MOVSD_XMM 0x1b8068(IP), X1
    flops.go:25 0x401090    f20f58c1        REPNE ADDSD X1, X0
    flops.go:25 0x401094    f20f1105d4441e00    REPNE MOVSD_XMM X0, 0x1e44d4(IP)
    flops.go:26 0x40109c    f20f10055c801b00    REPNE MOVSD_XMM 0x1b805c(IP), X0
    flops.go:26 0x4010a4    f20f100d4c801b00    REPNE MOVSD_XMM 0x1b804c(IP), X1
    flops.go:26 0x4010ac    f20f5cc1        REPNE SUBSD X1, X0
    flops.go:26 0x4010b0    f20f1105b8441e00    REPNE MOVSD_XMM X0, 0x1e44b8(IP)
    flops.go:27 0x4010b8    f20f100540801b00    REPNE MOVSD_XMM 0x1b8040(IP), X0
    flops.go:27 0x4010c0    f20f100d30801b00    REPNE MOVSD_XMM 0x1b8030(IP), X1
    flops.go:27 0x4010c8    f20f59c1        REPNE MULSD X1, X0
    flops.go:27 0x4010cc    f20f11059c441e00    REPNE MOVSD_XMM X0, 0x1e449c(IP)
    flops.go:28 0x4010d4    f20f100524801b00    REPNE MOVSD_XMM 0x1b8024(IP), X0
    flops.go:28 0x4010dc    f20f100d14801b00    REPNE MOVSD_XMM 0x1b8014(IP), X1
    flops.go:28 0x4010e4    f20f5ec1        REPNE DIVSD X1, X0
    flops.go:28 0x4010e8    f20f110580441e00    REPNE MOVSD_XMM X0, 0x1e4480(IP)
    flops.go:29 0x4010f0    f20f100508801b00    REPNE MOVSD_XMM 0x1b8008(IP), X0
    flops.go:29 0x4010f8    f20f100df87f1b00    REPNE MOVSD_XMM 0x1b7ff8(IP), X1
    flops.go:29 0x401100    f20f58c1        REPNE ADDSD X1, X0
    flops.go:29 0x401104    f20f110564441e00    REPNE MOVSD_XMM X0, 0x1e4464(IP)
    flops.go:30 0x40110c    f20f1005ec7f1b00    REPNE MOVSD_XMM 0x1b7fec(IP), X0
    flops.go:30 0x401114    f20f100ddc7f1b00    REPNE MOVSD_XMM 0x1b7fdc(IP), X1
    flops.go:30 0x40111c    f20f5cc1        REPNE SUBSD X1, X0
    flops.go:30 0x401120    f20f110548441e00    REPNE MOVSD_XMM X0, 0x1e4448(IP)
    flops.go:31 0x401128    f20f1005d07f1b00    REPNE MOVSD_XMM 0x1b7fd0(IP), X0
    flops.go:31 0x401130    f20f100dc07f1b00    REPNE MOVSD_XMM 0x1b7fc0(IP), X1
    flops.go:31 0x401138    f20f59c1        REPNE MULSD X1, X0
    flops.go:31 0x40113c    f20f11052c441e00    REPNE MOVSD_XMM X0, 0x1e442c(IP)
    flops.go:32 0x401144    f20f1005b47f1b00    REPNE MOVSD_XMM 0x1b7fb4(IP), X0
    flops.go:32 0x40114c    f20f100da47f1b00    REPNE MOVSD_XMM 0x1b7fa4(IP), X1
    flops.go:32 0x401154    f20f5ec1        REPNE DIVSD X1, X0
    flops.go:32 0x401158    f20f110510441e00    REPNE MOVSD_XMM X0, 0x1e4410(IP)
    flops.go:33 0x401160    f20f1005987f1b00    REPNE MOVSD_XMM 0x1b7f98(IP), X0
    flops.go:33 0x401168    f20f100d887f1b00    REPNE MOVSD_XMM 0x1b7f88(IP), X1
    flops.go:33 0x401170    f20f58c1        REPNE ADDSD X1, X0
    flops.go:33 0x401174    f20f1105f4431e00    REPNE MOVSD_XMM X0, 0x1e43f4(IP)
    flops.go:34 0x40117c    f20f10057c7f1b00    REPNE MOVSD_XMM 0x1b7f7c(IP), X0
    flops.go:34 0x401184    f20f100d6c7f1b00    REPNE MOVSD_XMM 0x1b7f6c(IP), X1
    flops.go:34 0x40118c    f20f5cc1        REPNE SUBSD X1, X0
    flops.go:34 0x401190    f20f1105d8431e00    REPNE MOVSD_XMM X0, 0x1e43d8(IP)
    flops.go:35 0x401198    f20f1005607f1b00    REPNE MOVSD_XMM 0x1b7f60(IP), X0
    flops.go:35 0x4011a0    f20f100d507f1b00    REPNE MOVSD_XMM 0x1b7f50(IP), X1
    flops.go:35 0x4011a8    f20f59c1        REPNE MULSD X1, X0
    flops.go:35 0x4011ac    f20f1105bc431e00    REPNE MOVSD_XMM X0, 0x1e43bc(IP)
    flops.go:36 0x4011b4    f20f1005447f1b00    REPNE MOVSD_XMM 0x1b7f44(IP), X0
    flops.go:36 0x4011bc    f20f100d347f1b00    REPNE MOVSD_XMM 0x1b7f34(IP), X1
    flops.go:36 0x4011c4    f20f5ec1        REPNE DIVSD X1, X0
    flops.go:36 0x4011c8    f20f1105a0431e00    REPNE MOVSD_XMM X0, 0x1e43a0(IP)
    flops.go:37 0x4011d0    f20f1005287f1b00    REPNE MOVSD_XMM 0x1b7f28(IP), X0
    flops.go:37 0x4011d8    f20f100d187f1b00    REPNE MOVSD_XMM 0x1b7f18(IP), X1
    flops.go:37 0x4011e0    f20f58c1        REPNE ADDSD X1, X0
    flops.go:37 0x4011e4    f20f110584431e00    REPNE MOVSD_XMM X0, 0x1e4384(IP)
    flops.go:38 0x4011ec    f20f10050c7f1b00    REPNE MOVSD_XMM 0x1b7f0c(IP), X0
    flops.go:38 0x4011f4    f20f100dfc7e1b00    REPNE MOVSD_XMM 0x1b7efc(IP), X1
    flops.go:38 0x4011fc    f20f5cc1        REPNE SUBSD X1, X0
    flops.go:38 0x401200    f20f110568431e00    REPNE MOVSD_XMM X0, 0x1e4368(IP)
    flops.go:39 0x401208    f20f1005f07e1b00    REPNE MOVSD_XMM 0x1b7ef0(IP), X0
    flops.go:39 0x401210    f20f100de07e1b00    REPNE MOVSD_XMM 0x1b7ee0(IP), X1
    flops.go:39 0x401218    f20f59c1        REPNE MULSD X1, X0
    flops.go:39 0x40121c    f20f11054c431e00    REPNE MOVSD_XMM X0, 0x1e434c(IP)
    flops.go:40 0x401224    f20f1005d47e1b00    REPNE MOVSD_XMM 0x1b7ed4(IP), X0
    flops.go:40 0x40122c    f20f100dc47e1b00    REPNE MOVSD_XMM 0x1b7ec4(IP), X1
    flops.go:40 0x401234    f20f5ec1        REPNE DIVSD X1, X0
    flops.go:40 0x401238    f20f110530431e00    REPNE MOVSD_XMM X0, 0x1e4330(IP)
    flops.go:20 0x401240    48ffc0          INCQ AX
    flops.go:20 0x401243    4839c8          CMPQ CX, AX
    flops.go:20 0x401246    0f8cc4fdffff        JL 0x401010
    flops.go:42 0x40124c    c3          RET
like image 82
peterSO Avatar answered Oct 22 '22 13:10

peterSO