I'm building an application which contains two components - server written in Haskell, and client written in Qt (C++). I'm using thrift to communicate them, and I wonder why is it working so slow.
I made a performance test and here is the result on my machine
C++ server and C++ client: Sending 100 pings - 13.37 ms Transfering 1000000 size vector - 433.58 ms Recieved: 3906.25 kB Transfering 100000 items from server - 1090.19 ms Transfering 100000 items to server - 631.98 ms Haskell server and C++ client: Sending 100 pings 3959.97 ms Transfering 1000000 size vector - 12481.40 ms Recieved: 3906.25 kB Transfering 100000 items from server - 26066.80 ms Transfering 100000 items to server - 1805.44 ms
Why is Haskell so slow in this test? How can I improve it performance?
Here are the files:
namespace hs test namespace cpp test struct Item { 1: optional string name 2: optional list<i32> coordinates } struct ItemPack { 1: optional list<Item> items 2: optional map<i32, Item> mappers } service ItemStore { void ping() ItemPack getItems(1:string name, 2: i32 count) bool setItems(1: ItemPack items) list<i32> getVector(1: i32 count) }
{-# LANGUAGE ScopedTypeVariables #-} module Main where import Data.Int import Data.Maybe (fromJust) import qualified Data.Vector as Vector import qualified Data.HashMap.Strict as HashMap import Network -- Thrift libraries import Thrift.Server -- Generated Thrift modules import Performance_Types import ItemStore_Iface import ItemStore i32toi :: Int32 -> Int i32toi = fromIntegral itoi32 :: Int -> Int32 itoi32 = fromIntegral port :: PortNumber port = 9090 data ItemHandler = ItemHandler instance ItemStore_Iface ItemHandler where ping _ = return () --putStrLn "ping" getItems _ mtname mtsize = do let size = i32toi $ fromJust mtsize item i = Item mtname (Just $ Vector.fromList $ map itoi32 [i..100]) items = map item [0..(size-1)] itemsv = Vector.fromList items mappers = zip (map itoi32 [0..(size-1)]) items mappersh = HashMap.fromList mappers itemPack = ItemPack (Just itemsv) (Just mappersh) putStrLn "getItems" return itemPack setItems _ _ = do putStrLn "setItems" return True getVector _ mtsize = do putStrLn "getVector" let size = i32toi $ fromJust mtsize return $ Vector.generate size itoi32 main :: IO () main = do _ <- runBasicServer ItemHandler process port putStrLn "Server stopped"
#include <iostream> #include <chrono> #include "gen-cpp/ItemStore.h" #include <transport/TSocket.h> #include <transport/TBufferTransports.h> #include <protocol/TBinaryProtocol.h> using namespace apache::thrift; using namespace apache::thrift::protocol; using namespace apache::thrift::transport; using namespace test; using namespace std; #define TIME_INIT std::chrono::_V2::steady_clock::time_point start, stop; \ std::chrono::duration<long long int, std::ratio<1ll, 1000000000ll> > duration; #define TIME_START start = std::chrono::steady_clock::now(); #define TIME_END duration = std::chrono::steady_clock::now() - start; \ std::cout << chrono::duration <double, std::milli> (duration).count() << " ms" << std::endl; int main(int argc, char **argv) { boost::shared_ptr<TSocket> socket(new TSocket("localhost", 9090)); boost::shared_ptr<TTransport> transport(new TBufferedTransport(socket)); boost::shared_ptr<TProtocol> protocol(new TBinaryProtocol(transport)); ItemStoreClient server(protocol); transport->open(); TIME_INIT long pings = 100; cout << "Sending " << pings << " pings" << endl; TIME_START for(auto i = 0 ; i< pings ; ++i) server.ping(); TIME_END long vectorSize = 1000000; cout << "Transfering " << vectorSize << " size vector" << endl; std::vector<int> v; TIME_START server.getVector(v, vectorSize); TIME_END cout << "Recieved: " << v.size()*sizeof(int) / 1024.0 << " kB" << endl; long itemsSize = 100000; cout << "Transfering " << itemsSize << " items from server" << endl; ItemPack items; TIME_START server.getItems(items, "test", itemsSize); TIME_END cout << "Transfering " << itemsSize << " items to server" << endl; TIME_START server.setItems(items); TIME_END transport->close(); return 0; }
#include "gen-cpp/ItemStore.h" #include <thrift/protocol/TBinaryProtocol.h> #include <thrift/server/TSimpleServer.h> #include <thrift/transport/TServerSocket.h> #include <thrift/transport/TBufferTransports.h> #include <map> #include <vector> using namespace ::apache::thrift; using namespace ::apache::thrift::protocol; using namespace ::apache::thrift::transport; using namespace ::apache::thrift::server; using namespace test; using boost::shared_ptr; class ItemStoreHandler : virtual public ItemStoreIf { public: ItemStoreHandler() { } void ping() { // printf("ping\n"); } void getItems(ItemPack& _return, const std::string& name, const int32_t count) { std::vector <Item> items; std::map<int, Item> mappers; for(auto i = 0 ; i < count ; ++i){ std::vector<int> coordinates; for(auto c = i ; c< 100 ; ++c) coordinates.push_back(c); Item item; item.__set_name(name); item.__set_coordinates(coordinates); items.push_back(item); mappers[i] = item; } _return.__set_items(items); _return.__set_mappers(mappers); printf("getItems\n"); } bool setItems(const ItemPack& items) { printf("setItems\n"); return true; } void getVector(std::vector<int32_t> & _return, const int32_t count) { for(auto i = 0 ; i < count ; ++i) _return.push_back(i); printf("getVector\n"); } }; int main(int argc, char **argv) { int port = 9090; shared_ptr<ItemStoreHandler> handler(new ItemStoreHandler()); shared_ptr<TProcessor> processor(new ItemStoreProcessor(handler)); shared_ptr<TServerTransport> serverTransport(new TServerSocket(port)); shared_ptr<TTransportFactory> transportFactory(new TBufferedTransportFactory()); shared_ptr<TProtocolFactory> protocolFactory(new TBinaryProtocolFactory()); TSimpleServer server(processor, serverTransport, transportFactory, protocolFactory); server.serve(); return 0; }
GEN_SRC := gen-cpp/ItemStore.cpp gen-cpp/performance_constants.cpp gen-cpp/performance_types.cpp GEN_OBJ := $(patsubst %.cpp,%.o, $(GEN_SRC)) THRIFT_DIR := /usr/local/include/thrift BOOST_DIR := /usr/local/include INC := -I$(THRIFT_DIR) -I$(BOOST_DIR) .PHONY: all clean all: ItemStore_server ItemStore_client %.o: %.cpp $(CXX) --std=c++11 -Wall -DHAVE_INTTYPES_H -DHAVE_NETINET_IN_H $(INC) -c $< -o $@ ItemStore_server: ItemStore_server.o $(GEN_OBJ) $(CXX) $^ -o $@ -L/usr/local/lib -lthrift -DHAVE_INTTYPES_H -DHAVE_NETINET_IN_H ItemStore_client: ItemStore_client.o $(GEN_OBJ) $(CXX) $^ -o $@ -L/usr/local/lib -lthrift -DHAVE_INTTYPES_H -DHAVE_NETINET_IN_H clean: $(RM) *.o ItemStore_server ItemStore_client
I generate files (using thrift 0.9 available here) with:
$ thrift --gen cpp performance.thrift $ thrift --gen hs performance.thrift
Compile with
$ make $ ghc Main.hs gen-hs/ItemStore_Client.hs gen-hs/ItemStore.hs gen-hs/ItemStore_Iface.hs gen-hs/Performance_Consts.hs gen-hs/Performance_Types.hs -Wall -O2
Run Haskell test:
$ ./Main& $ ./ItemStore_client
Run C++ test:
$ ./ItemStore_server& $ ./ItemStore_client
Remember to kill server after each test
Edited getVector
method to use Vector.generate
instead of Vector.fromList
, but still no effect
Due to suggestion of @MdxBhmt I tested the getItems
function as follows:
getItems _ mtname mtsize = do let size = i32toi $! fromJust mtsize item i = Item mtname (Just $! Vector.enumFromN (i::Int32) (100- (fromIntegral i))) itemsv = Vector.map item $ Vector.enumFromN 0 (size-1) itemPack = ItemPack (Just itemsv) Nothing putStrLn "getItems" return itemPack
which is strict and has improved Vector generation vs its alternative based on my original implementation:
getItems _ mtname mtsize = do let size = i32toi $ fromJust mtsize item i = Item mtname (Just $ Vector.fromList $ map itoi32 [i..100]) items = map item [0..(size-1)] itemsv = Vector.fromList items itemPack = ItemPack (Just itemsv) Nothing putStrLn "getItems" return itemPack
Notice that there is no HashMap sent. The first version gives time 12338.2 ms and the second is 11698.7 ms, no speedup :(
I reported an issue to Thrift Jira
This is completely unscientific but using GHC 7.8.3 with Thrift 0.9.2 and @MdxBhmt's version of getItems
, the discrepancy is significantly reduced.
C++ server and C++ client: Sending 100 pings: 8.56 ms Transferring 1000000 size vector: 137.97 ms Recieved: 3906.25 kB Transferring 100000 items from server: 467.78 ms Transferring 100000 items to server: 207.59 ms Haskell server and C++ client: Sending 100 pings: 24.95 ms Recieved: 3906.25 kB Transferring 1000000 size vector: 378.60 ms Transferring 100000 items from server: 233.74 ms Transferring 100000 items to server: 913.07 ms
Multiple executions were performed, restarting the server each time. The results are reproducible.
Note that the source code from the original question (with @MdxBhmt's getItems
implementation) will not compile as-is. The following changes will have to be made:
getItems _ mtname mtsize = do let size = i32toi $! fromJust mtsize item i = Item mtname (Just $! Vector.enumFromN (i::Int32) (100- (fromIntegral i))) itemsv = Vector.map item $ Vector.enumFromN 0 (size-1) itemPack = ItemPack (Just itemsv) Nothing putStrLn "getItems" return itemPack getVector _ mtsize = do putStrLn "getVector" let size = i32toi $ fromJust mtsize return $ Vector.generate size itoi32
Everyone is pointing out that is the culprit is the thrift library, but I'll focus on your code (and where I can help getting some speed)
Using a simplified version of your code, where you calculate itemsv
:
testfunc mtsize = itemsv where size = i32toi $ fromJust mtsize item i = Item (Just $ Vector.fromList $ map itoi32 [i..100]) items = map item [0..(size-1)] itemsv = Vector.fromList items
First, you have many intermediate data being created in item i
. Due to lazyness, those small and fast to calculate vectors becomes delayed thunks of data, when we could had them right away.
Having 2 carefully placed $!
, that represent strict evaluation :
item i = Item (Just $! Vector.fromList $! map itoi32 [i..100])
Will give you a 25% decrease in runtime (for size 1e5 and 1e6).
But there is a more problematic pattern here: you generate a list to convert it as a vector, in place of building the vector directly.
Look those 2 last lines, you create a list -> map a function -> transform into a vector.
Well, vectors are very similar to list, you can do something similar! So you'll have to generate a vector -> vector.map over it and done. No more need to convert a list into a vector, and maping on vector is usually faster than a list!
So you can get rid of items
and re-write the following itemsv
:
itemsv = Vector.map item $ Vector.enumFromN 0 (size-1)
Reapplying the same logic to item i
, we eliminate all lists.
testfunc3 mtsize = itemsv where size = i32toi $! fromJust mtsize item i = Item (Just $! Vector.enumFromN (i::Int32) (100- (fromIntegral i))) itemsv = Vector.map item $ Vector.enumFromN 0 (size-1)
This has a 50% decrease over the initial runtime.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With