Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Why is my Trie lookup slower than that of the standard F# Map's?

So, I just ported the Trie from OCaml. Unfortunately, it runs slower than the standard Map in terms of tryFind. I don't understand this - the trie seems like it should be faster. Is F#'s code libraries built in some special way as to make them faster than the code that the user typically deploys?

Here's the code -

[<RequireQualifiedAccess>]
module Trie

type Node<'k, 'v when 'k : comparison> =
    { TrieMap : Map<'k, Node<'k, 'v>>
      TrieKvp : ('k list * 'v) option }
    member inline x.IsEmpty = x.TrieKvp.IsNone && x.TrieMap.IsEmpty

let inline make map kvp =
    { TrieMap = map
      TrieKvp = kvp }

let inline makeEmpty () : Node<'k, 'v> = make Map.empty None

let inline isEmpty (node : Node<'k, 'v>) = node.IsEmpty

let rec tryFind (key : 'k list) node =
    if key.IsEmpty then
        match node.TrieKvp with
        | Some (_, value) -> Some value
        | None -> None
    else
        let keyHead = key.Head
        let keyTail = key.Tail
        let optSubNode = Map.tryFind keyHead node.TrieMap
        match optSubNode with
        | Some subNode -> tryFind keyTail subNode
        | None -> None

let inline containsKey key node =
    (tryFind key node).IsSome

let rec addInternal (key : 'k list) value node =
    if key.IsEmpty then make node.TrieMap (Some (key, value))
    else
        let keyHead = key.Head
        let keyTail = key.Tail
        let newTrie =
            match Map.tryFind keyHead node.TrieMap with
            | Some subTrie -> subTrie
            | None -> makeEmpty ()
        let newTrie2 = addInternal keyTail value newTrie
        make (Map.add keyHead newTrie2 node.TrieMap) node.TrieKvp

let inline add key value node =
    addInternal key value node

let rec addMany kvps node =
    if Seq.isEmpty kvps then node
    else
        let kvpHead = Seq.head kvps
        let kvpTail = Seq.skip 1 kvps
        let newTrie = add (fst kvpHead) (snd kvpHead) node
        addMany kvpTail newTrie

let inline ofList kvps =
    addMany kvps (makeEmpty ())

let inline ofListBy by kvps =
    let pairs = List.map by kvps
    ofList pairs

let rec foldInternal folder rev node state =
    match node.TrieKvp with
    | Some (_, value) -> folder (Map.fold (fun state key value -> foldInternal folder (key :: rev) value state) state node.TrieMap) (List.rev rev) value
    | None -> Map.fold (fun state key value -> foldInternal folder (key :: rev) value state) state node.TrieMap

let inline fold folder state node =
    foldInternal folder [] node state

let rec map (mapper : 'k list -> 'v -> 'a) (node : Node<'k, 'v>) : Node<'k, 'a> =
    match node.TrieKvp with
    | Some (key, value) -> make (Map.map (fun _ value -> map mapper value) node.TrieMap) (Some (key, mapper key value))
    | None -> make (Map.map (fun _ value -> map mapper value) node.TrieMap) None

let inline toValueList node =
    fold (fun state _ value -> value :: state) [] node

let inline singleton (key, value) =
    add key value (makeEmpty ())

Here's a performance test that Jon Harrop provided that I find adequate for measuring improvements -

let xs = Array.init 1000000 (fun i -> [i])
let timer = System.Diagnostics.Stopwatch.StartNew()
let mutable t = Trie.makeEmpty()
for i=0 to xs.Length-1 do
    t <- Trie.add xs.[i] xs.[i] t
printfn "Trie took %fs to build" timer.Elapsed.TotalSeconds
timer.Restart()
for _ in 1..100 do
    for i=0 to xs.Length-1 do
        ignore(Trie.tryFind xs.[i])
printfn "Trie took %fs to search" timer.Elapsed.TotalSeconds

let timer = System.Diagnostics.Stopwatch.StartNew()
let mutable t = Map.empty
for i=0 to xs.Length-1 do
    t <- Map.add xs.[i] xs.[i] t
printfn "Map took %fs to build" timer.Elapsed.TotalSeconds
timer.Restart()
for _ in 1..100 do
    for i=0 to xs.Length-1 do
        ignore(Map.tryFind xs.[i])
printfn "Map took %fs to search" timer.Elapsed.TotalSeconds

NOTE: if you have a faster lookup data structure in mind, please note that I need a persistent data structure.

like image 690
Bryan Edds Avatar asked Dec 27 '22 21:12

Bryan Edds


2 Answers

Unfortunately, it runs slower than the standard Map in terms of tryFind. I don't understand this - the trie seems like it should be faster.

A quick benchmark here suggests that your trie is already faster than Map for at least simple case:

do
    let n = 0
    let xs = Array.init 1000000 (fun i -> [i])
    let timer = System.Diagnostics.Stopwatch.StartNew()
    let mutable t = Trie.makeEmpty()
    for i=0 to xs.Length-1 do
        t <- Trie.add xs.[i] xs.[i] t
    printfn "Trie took %fs to build" timer.Elapsed.TotalSeconds
    timer.Restart()
    for _ in 1..100 do
        for i=0 to xs.Length-1 do
            ignore(Trie.tryFind xs.[i])
    printfn "Trie took %fs to search" timer.Elapsed.TotalSeconds

    let timer = System.Diagnostics.Stopwatch.StartNew()
    let mutable t = Map.empty
    for i=0 to xs.Length-1 do
        t <- Map.add xs.[i] xs.[i] t
    printfn "Map took %fs to build" timer.Elapsed.TotalSeconds
    timer.Restart()
    for _ in 1..100 do
        for i=0 to xs.Length-1 do
            ignore(Map.tryFind xs.[i])
    printfn "Map took %fs to search" timer.Elapsed.TotalSeconds

I get 4s to build your Trie, 8.7s to build a Map and about 0.7 to search in both cases.

However, there is a lot of room for improvement in your implementation. I recently wrote an article about an optimized generic persistent hash trie implementation in F# that was published here.

Your later comments imply that you only want to use this to map over strings. If so, it would be vastly more efficient to specialize your trie for string keys.

EDIT

KVB suggested that I elaborate on the "room for improvement" so here's some feedback:

  • Use inline sparingly as an optimization and only on the basis of compelling performance measurements.
  • Make empty a value rather than a function.
  • Avoid List.head and List.tail whenever possible. Use pattern matching instead.
  • Avoid genericity when possible (e.g. hard-code for string keys in this case).
like image 83
J D Avatar answered Jan 14 '23 15:01

J D


Alright, so after a little more thinking, I hypothesized that the real difference in performance is in the use of lists for keys as opposed to strings. Strings (and array) have much better cache coherency. So, I changed the key from a 'k list to a string and voila! Performance is now actually better than the Map in my application!

Here's the code -

[<RequireQualifiedAccess>]
module StringTrie

type Node<'v> =
    { TrieMap : Map<char, Node<'v>>
      TrieKvp : (string * 'v) option }
    member inline x.IsEmpty = x.TrieKvp.IsNone && x.TrieMap.IsEmpty

let inline make map kvp =
    { TrieMap = map
      TrieKvp = kvp }

let inline makeEmpty () : Node<'v> = make Map.empty None

let inline isEmpty (node : Node<'v>) = node.IsEmpty

let rec tryFindInternal (key : string) index node =
    if key.Length = index then
        match node.TrieKvp with
        | Some (_, value) -> Some value
        | None -> None
    else
        let optSubNode = Map.tryFind key.[index] node.TrieMap
        match optSubNode with
        | Some subNode -> tryFindInternal key (index + 1) subNode
        | None -> None

let inline tryFind (key : string) node =
    tryFindInternal key 0 node

let inline containsKey key node =
    (tryFind key node).IsSome

let rec addInternal (key : string) index value node =
    if key.Length = index then make node.TrieMap (Some (key, value))
    else
        let char = key.[index]
        let newTrie =
            match Map.tryFind char node.TrieMap with
            | Some subTrie -> subTrie
            | None -> makeEmpty ()
        let newTrie2 = addInternal key (index + 1) value newTrie
        make (Map.add char newTrie2 node.TrieMap) node.TrieKvp

let inline add key value node =
    addInternal key 0 value node

let rec addMany kvps node =
    if Seq.isEmpty kvps then node
    else
        let kvpHead = Seq.head kvps
        let kvpTail = Seq.skip 1 kvps
        let newTrie = add (fst kvpHead) (snd kvpHead) node
        addMany kvpTail newTrie

let inline ofList kvps =
    addMany kvps (makeEmpty ())

let inline ofListBy by kvps =
    let pairs = List.map by kvps
    ofList pairs

let rec foldInternal folder rev node state =
    match node.TrieKvp with
    | Some (_, value) -> folder (Map.fold (fun state key value -> foldInternal folder (key :: rev) value state) state node.TrieMap) (List.rev rev) value
    | None -> Map.fold (fun state key value -> foldInternal folder (key :: rev) value state) state node.TrieMap

let inline fold folder state node =
    foldInternal folder [] node state

let rec map (mapper : string -> 'v -> 'a) (node : Node<'v>) : Node<'a> =
    match node.TrieKvp with
    | Some (key, value) -> make (Map.map (fun _ value -> map mapper value) node.TrieMap) (Some (key, mapper key value))
    | None -> make (Map.map (fun _ value -> map mapper value) node.TrieMap) None

let inline toValueList node =
    fold (fun state _ value -> value :: state) [] node

let inline singleton (key, value) =
    add key value (makeEmpty ())

I also built a version that works for arrays in general and is also fast -

[<RequireQualifiedAccess>]
module ArrayTrie

type Node<'k, 'v when 'k : comparison> =
    { TrieMap : Map<'k, Node<'k, 'v>>
      TrieKvp : ('k array * 'v) option }
    member inline x.IsEmpty = x.TrieKvp.IsNone && x.TrieMap.IsEmpty

let inline make map kvp =
    { TrieMap = map
      TrieKvp = kvp }

let inline makeEmpty () : Node<'k, 'v> = make Map.empty None

let inline isEmpty (node : Node<'k, 'v>) = node.IsEmpty

let rec tryFindInternal (key : 'k array) index node =
    if key.Length = index then
        match node.TrieKvp with
        | Some (_, value) -> Some value
        | None -> None
    else
        let optSubNode = Map.tryFind key.[index] node.TrieMap
        match optSubNode with
        | Some subNode -> tryFindInternal key (index + 1) subNode
        | None -> None

let inline tryFind (key : 'k array) node =
    tryFindInternal key 0 node

let inline containsKey key node =
    (tryFind key node).IsSome

let rec addInternal (key : 'k array) index value node =
    if key.Length = index then make node.TrieMap (Some (key, value))
    else
        let char = key.[index]
        let newTrie =
            match Map.tryFind char node.TrieMap with
            | Some subTrie -> subTrie
            | None -> makeEmpty ()
        let newTrie2 = addInternal key (index + 1) value newTrie
        make (Map.add char newTrie2 node.TrieMap) node.TrieKvp

let inline add key value node =
    addInternal key 0 value node

let rec addMany kvps node =
    if Seq.isEmpty kvps then node
    else
        let kvpHead = Seq.head kvps
        let kvpTail = Seq.skip 1 kvps
        let newTrie = add (fst kvpHead) (snd kvpHead) node
        addMany kvpTail newTrie

let inline ofList kvps =
    addMany kvps (makeEmpty ())

let inline ofListBy by kvps =
    let pairs = List.map by kvps
    ofList pairs

let rec foldInternal folder rev node state =
    match node.TrieKvp with
    | Some (_, value) -> folder (Map.fold (fun state key value -> foldInternal folder (key :: rev) value state) state node.TrieMap) (List.rev rev) value
    | None -> Map.fold (fun state key value -> foldInternal folder (key :: rev) value state) state node.TrieMap

let inline fold folder state node =
    foldInternal folder [] node state

let rec map (mapper : 'k array -> 'v -> 'a) (node : Node<'k, 'v>) : Node<'k, 'a> =
    match node.TrieKvp with
    | Some (key, value) -> make (Map.map (fun _ value -> map mapper value) node.TrieMap) (Some (key, mapper key value))
    | None -> make (Map.map (fun _ value -> map mapper value) node.TrieMap) None

let inline toValueList node =
    fold (fun state _ value -> value :: state) [] node

let inline singleton (key, value) =
    add key value (makeEmpty ())

The only thing left that seem like it would improve performance is to get an internal pointer to the string and inc that rather than doing indexes over and over. This doesn't seem easy in F#, but seems at least possible for arrays in C#.

like image 42
Bryan Edds Avatar answered Jan 14 '23 14:01

Bryan Edds