Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Why are Go sockets slower than C++ sockets? [closed]

I benchmarked a simple socket ping pong test in Go and C++. The client begins by sending 0 to the server. The server increments whatever number it gets and sends it back to the client. The client echos the number back to the server, and stops once the number is 1,000,000.

Both the client and the server are on the same computer, so I use a Unix socket in both cases. (I also tried same-host TCP sockets, which showed a similar result).

The Go test takes 14 seconds, whereas the C++ test takes 8 seconds. This is surprising to me because I have run a fair number of Go vs. C++ benchmarks, and generally Go is as performant as C++ as long as I don't trigger the garbage collector.

I am on a Mac, though commenters have also reported that the Go version is slower on Linux.

Wondering if I am missing a way to optimize the Go program or if there are just inefficiencies under the hood.

Below are the commands I run to carry out the test, along with the test results. All code files are pasted at the bottom of this question.

Run Go server:

$ rm /tmp/go.sock
$ go run socketUnixServer.go

Run Go client:

$ go build socketUnixClient.go; time ./socketUnixClient

real    0m14.101s
user    0m5.242s
sys     0m7.883s

Run C++ server:

$ rm /tmp/cpp.sock
$ clang++ -std=c++11 tcpServerIncUnix.cpp -O3; ./a.out

Run C++ client:

$ clang++ -std=c++11 tcpClientIncUnix.cpp -O3; time ./a.out

real    0m8.690s
user    0m0.835s
sys     0m3.800s

Code files

Go server:

// socketUnixServer.go

package main

import (
    "log"
    "net"
    "encoding/binary"
)

func main() {
    ln, err := net.Listen("unix", "/tmp/go.sock")
    if err != nil {
        log.Fatal("Listen error: ", err)
    }

    c, err := ln.Accept()
    if err != nil {
        panic(err)
    }
    log.Println("Connected with client!")

    readbuf := make([]byte, 4)
    writebuf := make([]byte, 4)
    for {
        c.Read(readbuf)
        clientNum := binary.BigEndian.Uint32(readbuf)
        binary.BigEndian.PutUint32(writebuf, clientNum+1)

        c.Write(writebuf)
    }
}

Go client:

// socketUnixClient.go

package main

import (
    "log"
    "net"
    "encoding/binary"
)

const N = 1000000

func main() {
    c, err := net.Dial("unix", "/tmp/go.sock")
    if err != nil {
        log.Fatal("Dial error", err)
    }
    defer c.Close()

    readbuf := make([]byte, 4)
    writebuf := make([]byte, 4)

    var currNumber uint32 = 0
    for currNumber < N {
        binary.BigEndian.PutUint32(writebuf, currNumber)
        c.Write(writebuf)

        // Read the incremented number from server
        c.Read(readbuf[:])
        currNumber = binary.BigEndian.Uint32(readbuf)
    }
}

C++ server:

// tcpServerIncUnix.cpp

// Server side C/C++ program to demonstrate Socket programming
// #include <iostream>
#include <unistd.h>
#include <stdio.h>
#include <sys/un.h>
#include <sys/socket.h>
#include <stdlib.h>
#include <netinet/in.h>
#include <netinet/tcp.h>
#include <string.h>
#include <unistd.h>

// Big Endian (network order)
unsigned int fromBytes(unsigned char b[4]) {
    return b[3] | b[2]<<8 | b[1]<<16 | b[0]<<24;
}

void toBytes(unsigned int x, unsigned char (&b)[4]) {
    b[3] = x;
    b[2] = x>>8;
    b[1] = x>>16;
    b[0] = x>>24;
}

int main(int argc, char const *argv[])
{
    int server_fd, new_socket, valread;
    struct sockaddr_un saddr;
    int saddrlen = sizeof(saddr);
    unsigned char recv_buffer[4] = {0};
    unsigned char send_buffer[4] = {0};

    server_fd = socket(AF_UNIX, SOCK_STREAM, 0);

    saddr.sun_family = AF_UNIX;
    strncpy(saddr.sun_path, "/tmp/cpp.sock", sizeof(saddr.sun_path));
    saddr.sun_path[sizeof(saddr.sun_path)-1] = '\0';
    bind(server_fd, (struct sockaddr *)&saddr, sizeof(saddr));

    listen(server_fd, 3);

    // Accept one client connection
    new_socket = accept(server_fd, (struct sockaddr *)&saddr, (socklen_t*)&saddrlen);
    printf("Connected with client!\n");

    // Note: if /tmp/cpp.sock already exists, you'll get the Connected with client!
    // message before running the client. Delete this file first.

    unsigned int x = 0;

    while (true) {
        valread = read(new_socket, recv_buffer, 4);
        x = fromBytes(recv_buffer);
        toBytes(x+1, send_buffer);

        write(new_socket, send_buffer, 4);
    }
}

C++ client:

// tcpClientIncUnix.cpp

// Server side C/C++ program to demonstrate Socket programming
// #include <iostream>
#include <unistd.h>
#include <stdio.h>
#include <sys/socket.h>
#include <sys/un.h>
#include <stdlib.h>
#include <netinet/in.h>
#include <netinet/tcp.h>
#include <string.h>
#include <unistd.h>

// Big Endian (network order)
unsigned int fromBytes(unsigned char b[4]) {
    return b[3] | b[2]<<8 | b[1]<<16 | b[0]<<24;
}

void toBytes(unsigned int x, unsigned char (&b)[4]) {
    b[3] = x;
    b[2] = x>>8;
    b[1] = x>>16;
    b[0] = x>>24;
}

int main(int argc, char const *argv[])
{
    int sock, valread;
    struct sockaddr_un saddr;
    int opt = 1;
    int saddrlen = sizeof(saddr);

    // We'll be passing uint32's back and forth
    unsigned char recv_buffer[4] = {0};
    unsigned char send_buffer[4] = {0};

    sock = socket(AF_UNIX, SOCK_STREAM, 0);

    saddr.sun_family = AF_UNIX;
    strncpy(saddr.sun_path, "/tmp/cpp.sock", sizeof(saddr.sun_path));
    saddr.sun_path[sizeof(saddr.sun_path)-1] = '\0';

    // Accept one client connection
    if (connect(sock, (struct sockaddr *)&saddr, sizeof(saddr)) != 0) {
        throw("connect failed");
    }

    int n = 1000000;

    unsigned int currNumber = 0;
    while (currNumber < n) {
        toBytes(currNumber, send_buffer);
        write(sock, send_buffer, 4);

        // Read the incremented number from server
        valread = read(sock, recv_buffer, 4);
        currNumber = fromBytes(recv_buffer);
    }
}
like image 730
rampatowl Avatar asked Mar 05 '23 20:03

rampatowl


1 Answers

First of all, I confirm that the Go programs from this question do run noticeably slower than the C++ ones. I think that it's indeed interesting to know why.

I profiled the Go client and server with the pprof and found out that syscall.Syscall takes 70% of the total execution time. According to this ticket, in Go syscalls are approximately 1.4 times slower than in C.

(pprof) top -cum
Showing nodes accounting for 18.78s, 67.97% of 27.63s total
Dropped 44 nodes (cum <= 0.14s)
Showing top 10 nodes out of 44
  flat  flat%   sum%        cum   cum%
 0.11s   0.4%   0.4%     22.65s 81.98%  main.main
     0     0%   0.4%     22.65s 81.98%  runtime.main
18.14s 65.65% 66.05%     19.91s 72.06%  syscall.Syscall
 0.03s  0.11% 66.16%     12.91s 46.72%  net.(*conn).Read
 0.10s  0.36% 66.52%     12.88s 46.62%  net.(*netFD).Read
 0.16s  0.58% 67.10%     12.78s 46.25%  internal/poll.(*FD).Read
 0.06s  0.22% 67.32%     11.87s 42.96%  syscall.Read
 0.11s   0.4% 67.72%     11.81s 42.74%  syscall.read
 0.02s 0.072% 67.79%      9.30s 33.66%  net.(*conn).Write
 0.05s  0.18% 67.97%      9.28s 33.59%  net.(*netFD).Write

I gradually decreased the number of Conn.Write and Conn.Read calls and increased the size of the buffer accordingly, so that the number of transferred bytes stayed the same. The result is that the fewer these calls the program makes, the closer its performance to the C++ version.

like image 114
Andrey Dyatlov Avatar answered Mar 09 '23 07:03

Andrey Dyatlov