Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Erlang alternative to f# sequence

Tags:

erlang

f#

seq

Is there an alternative for F# "seq" construct in Erlang? For example, in F# I can write an O(1) memory integrate function

let integrate x1 x2 dx f =
    let N = int (abs (x2-x1)/dx)
    let sum = seq { for i in 0..N do yield dx*f(x1 + dx * (double i)) }
                |>  Seq.sum
    if x2>x1 then sum else -sum

In Erlang, I have an implementation which uses lists, and therefore has O(n) memory requirement which is unacceptable for such simple function,

create(Dx, N)->[0| create(Dx, N,[])].

create(Dx, 0, Init)->Init;
create(Dx, N, Init)->create(Dx,N-1, [Dx*N |Init]).

integral(X1,X2,Dx, F) ->
    N=trunc((X2-X1)/Dx),
    Points = create(Dx,N),      
    Vals = lists:map(fun(X)->F(X)*Dx end, Points),
    lists:sum(Vals).
like image 281
14 revs, 3 users 99% Avatar asked May 15 '15 17:05

14 revs, 3 users 99%


3 Answers

Disclaimer: the following is written under assumption that Erlang disallows mutation completely, of which I'm not sure, because I don't know Erlang well enough.

Seq is internally mutation-based. It maintains "current state" and mutates it on every iteration. So that when you do one iteration, you get the "next value", but you also get a side effect, which is that the enumerator's internal state has changed, and when you do next iteration, you will get a different "next value", and so on. This is usually nicely covered with functional-looking comprehensions, but if you were to ever work with IEnumerator directly, you will see the non-purity with naked eye.

Another way to think about it is, given a "sequence", you are getting two results: "next value" and "rest of sequence", and then "rest of sequence" becomes your new "sequence", and you can repeat the process. (and the original "sequence" is forever gone)

This line of thought can be directly expressed in F#:

type MySeq<'a> = MySeq of (unit -> ('a * MySeq<'a>))

Meaning: "a lazy sequence is a function that, when applied, returns its head and tail, where tail is another lazy sequence". MySeq of is included to keep the type from becoming infinite.
(sorry, I'll use F#, I don't know Erlang well enough; I'm sure you can translate)

But then, seeing how sequences are usually finite, the whole thing should be optional:

type MySeq<'a> = MySeq of (unit -> ('a * MySeq<'a>) option)

Given this definition, you can trivially make some constructors:

  module MySeq =
    let empty = MySeq <| fun () -> None
    let cons a rest = MySeq <| fun () -> Some (a, rest)
    let singleton a = cons a empty
    let rec repeat n a =
        if n <= 0 then empty
        else MySeq <| fun () -> Some (a, (repeat (n-1) a))
    let rec infinite a = MySeq <| fun() -> Some (a, infinite a)
    let rec ofList list =
        match list with
        | [] -> empty
        | x :: xs -> MySeq <| fun () -> Some (x, ofList xs)

Map and fold are also trivial:

let rec map f (MySeq s) = MySeq <| fun () ->
    match s() with
    | None -> None
    | Some (a, rest) -> Some (f a, map f rest)

let rec fold f acc0 (MySeq s) =
    match s() with
    | None -> acc0
    | Some (a, rest) -> fold f (f acc0 a) rest

And from fold you can build everything, which is not a lazy sequence itself. But to build lazy sequences, you need a "rolling fold" (sometimes called "scan"):

let rec scan f state0 (MySeq s) = MySeq <| fun() ->
    match s() with
    | None -> None
    | Some (a, rest) ->
        let newState = f state0 a
        Some (newState, scan f newState rest)

// reformulate map in terms of scan:
let map f = scan (fun _ a -> f a) Unchecked.defaultof<_>

Here's how to use it:

let emptySeq = MySeq.empty
let numbers = MySeq.ofList [1; 2; 3; 4]
let doubles = MySeq.map ((*) 2) numbers  // [2; 4; 6; 8]
let infiniteNumbers = 
    MySeq.infinite () 
    |> MySeq.scan (fun prev _ -> prev+1) 0
let infiniteDoubles = MySeq.map ((*) 2) infiniteNumbers

And in conclusion, I'd like to add that mutation-based solution will nearly always be more performant (all things being equal), at least a little. Even if you immediately throw away old state as you calculate new, the memory still needs to be reclaimed, which is itself a performance hit. The benefits of immutability do not include performance fine-tuning.

Update:
Here's my crack at Erlang version. Keep in mind that this is the very first code that I ever wrote in Erlang. As such, I'm sure there are better ways to encode this, and that there must be a library for this already available.

-module (seq).
-export ([empty/0, singleton/1, infinite/1, repeat/2, fold/3, scan/3, map/2, count/1]).

empty() -> empty.
singleton(A) -> fun() -> {A, empty} end.
infinite(A) -> fun() -> {A, infinite(A)} end.

repeat(0,_) -> empty;
repeat(N,A) -> fun() -> {A, repeat(N-1,A)} end.

fold(_, S0, empty) -> S0;
fold(F, S0, Seq) ->
  {Current, Rest} = Seq(),
  S1 = F(S0, Current),
  fold(F, S1, Rest).

scan(_, _, empty) -> empty;
scan(F, S0, Seq) -> fun() ->
  {Current, Rest} = Seq(),
  S1 = F(S0, Current),
  {S1, scan(F, S1, Rest)}
end.

map(F, Seq) -> scan( fun(_,A) -> F(A) end, 0, Seq ).
count(Seq) -> fold( fun(C,_) -> C+1 end, 0, Seq ).

Usage:

1> c(seq).
{ok,seq}
2> FiveTwos = seq:repeat(5,2).
#Fun<seq.2.133838528>
3> Doubles = seq:map( fun(A) -> A*2 end, FiveTwos ).
#Fun<seq.3.133838528>
5> seq:fold( fun(S,A) -> S+A end, 0, Doubles ).
20
6> seq:fold( fun(S,A) -> S+A end, 0, FiveTwos ).
10
11> seq:count( FiveTwos ).
5
like image 126
Fyodor Soikin Avatar answered Oct 12 '22 06:10

Fyodor Soikin


This is not tested, but is one way of doing it.

The idea is to turn the list into a process which yields the next value when asked. You can easily generalize the idea if you need to do so.

Alternatively, you can write an unfold which can then unfold the list a bit at a time and use this as input to the generic processor.

Another way is to implement lazy streams, based on the idea that any Expr can be delayed by fun () -> Expr end perhaps best written as -define(DELAY(X), fun() -> X end). as a macro and then used together with -define(FORCE(X), X()).

-module(z).

-export([integral/4]).

create(Dx, N) ->
  spawn_link(fun() -> create_loop(Dx, N) end).

create_loop(Dx, 0, Acc)->
    receive
        {grab, Target} -> Target ! done,
        ok
    after 5000 ->
       exit(timeout_error)
    end;
create_loop(Dx, N, Acc) ->
    receive
        {grab, Target} ->
            Target ! {next, Dx*N},
            create_loop(Dx, N-1)
    after 5000 ->
        exit(timeout_error)
    end.

next(Pid) ->
    Pid ! {grab, self()},
    receive
        {next, V} -> {next, V};
        done -> done
    after 5000 ->
        exit(timeout_error)
    end.

sum(F, Points, Acc) ->
    case next(Points) of
        {next, V} -> sum(F, Points, Acc + F(V));
        done -> Acc
    end.

integral(X1, X2, Dx, F) ->
    N = trunc( (X2 - X1) / Dx),
    Points = create(Dx, N),
    sum(fun(X) -> F(X) * Dx end, Points, 0).

The solution based on DELAY/FORCE is something like:

-module(z).
-define(DELAY(X), fun() -> X end).
-define(FORCE(X), X()).

create(Dx, N) ->
    [0 | ?DELAY(create_loop(Dx, N))].

create_loop(Dx, N) ->
    [Dx*N | ?DELAY(create_loop(Dx, N-1)]; % This is an abuse of improper lists
create_loop(_, 0) -> [].

map(F, []) -> [];
map(F, [V | Thunk]) ->
    [F(V) | ?DELAY(map(F, ?FORCE(Thunk)))].

sum([], Acc) -> Acc;
sum([V | Thunk], Acc) ->
    sum(?FORCE(Thunk), V + Acc).

integral(X1,X2,Dx, F) ->
    N = trunc((X2-X1) / Dx),
    Points = create(Dx, N),
    Vals = map(fun(X) -> F(X)*Dx end, Points),
    sum(Vals).

But not tested.

like image 22
I GIVE CRAP ANSWERS Avatar answered Oct 12 '22 07:10

I GIVE CRAP ANSWERS


The most popular way to create memory stable processing is to define tail recursive function. For example:

integrate_rec(X1, X2, DX, F) when X2 >= X1 ->
    integrate_rec(X1, X2, DX, F, X1, 0, 1);
integrate_rec(X1, X2, DX, F) when X2 < X1 ->
    integrate_rec(X2, X1, DX, F, X2, 0, -1).

integrate_rec(X1, X2, _DX, _F, X, Sum, Sign) when X >= X2 -> 
    Sign*Sum;
integrate_rec(X1, X2, DX, F, X, Sum, Sign) -> 
    integrate_rec(X1, X2, DX, F, X + DX, Sum + DX*F(X), Sign).

But it doesn't look clear... I had the same problem once and have made short helper for function that allows you to iterate without lists:

integrate_for(X1, X2, DX, F) ->
    Sign = if X2 < X1 -> -1; true -> 1 end,
    Sum = (for(0, {X1, X2, Sign*DX}))(
            fun (X, Sum) -> 
                Sum + DX*F(X)
            end),
    Sign*Sum.

Unfortunately, it's a little bit slower than direct recursion:

benchmark() ->
    X1 = 0, 
    X2 = math:pi(),
    DX = 0.0000001,
    F = fun math:sin/1,
    IntegrateFuns = [fun integrate_rec/4, fun integrate_for/4],
    Args = [X1, X2, DX, F],
    [timer:tc(IntegrateFun, Args) || IntegrateFun <- IntegrateFuns].

> [{3032398,2.000000000571214},{4069549,2.000000000571214}]

So it's ~3.03s to ~4.07s - not that bad.

like image 2
Łukasz Ptaszyński Avatar answered Oct 12 '22 07:10

Łukasz Ptaszyński