We have an etl process written if f# that takes data sorted in a relational database and transforms it into a star schema ready for a 3rd party platform. Because we are denormalization the data we have (almost) duplicate objects, types and properties scattered around our system. Up until now I’ve been happy with this because the objects are different enough to warrant different functions or we have been able to group common/shared properties into a sub-record.
However, we are now adding objects which needs to pick and choose different parts of the system and don’t fall into existing common grouping. After experimenting with a few different styles I’ve falling into using interfaces but something doesn’t feel right about using them. Has anyone come across this problem and come up with a different approach?
module rec MyModels =
type AccountType1 =
{ Id : int
Error : string option
Name : string option }
// PROBLEM: this get very bulky as more properties are shared
interface Props.Error<AccountType1> with member x.Optic = (fun _ -> x.Error), (fun v -> { x with Error = v })
interface Props.AccountId<AccountType1> with member x.Optic = (fun _ -> x.Id), (fun v -> { x with Id = v })
interface Props.AccountName<AccountType1> with member x.Optic = (fun _ -> x.Name), (fun v -> { x with Name = v })
type AccountType2 =
{ Id : int
Error : string option
AccountId : int
AccountName : string option
OtherValue : string }
interface Props.Error<AccountType2> with member x.Optic = (fun _ -> x.Error), (fun v -> { x with Error = v })
interface Props.AccountId<AccountType2> with member x.Optic = (fun _ -> x.AccountId), (fun v -> { x with AccountId = v })
interface Props.AccountName<AccountType2> with member x.Optic = (fun _ -> x.AccountName), (fun v -> { x with AccountName = v })
interface Props.OtherValue<AccountType2> with member x.Optic = (fun _ -> x.OtherValue), (fun v -> { x with OtherValue = v })
module Props =
type OpticProp<'a,'b> = (unit -> 'a) * ('a -> 'b)
// Common properties my models can share
// (I know they should start with an I)
type Error<'a> = abstract member Optic : OpticProp<string option, 'a>
let Error (h : Error<_>) = h.Optic
type AccountId<'a> = abstract member Optic : OpticProp<int, 'a>
let AccountId (h : AccountId<_>) = h.Optic
type AccountName<'a> = abstract member Optic : OpticProp<string option, 'a>
let AccountName (h : AccountName<_>) = h.Optic
type OtherValue<'a> = abstract member Optic : OpticProp<string, 'a>
let OtherValue (h : OtherValue<_>) = h.Optic
[<RequireQualifiedAccess>]
module Optics =
// Based on Aether
module Operators =
let inline (^.) o optic = (optic o |> fst) ()
let inline (^=) value optic = fun o -> (optic o |> snd) value
let inline get optic o =
let get, _ = optic o
get ()
let inline set optic v (o : 'a) : 'a =
let _, set = optic o
set v
open MyModels
open Optics.Operators
// Common functions that change the models
let error msg item =
item
|> (Some msg)^=Props.Error
|> Error
let accountName item =
match item^.Props.AccountId with
| 1 ->
item
|> (Some "Account 1")^=Props.AccountName
|> Ok
| 2 ->
item
|> (Some "Account 2")^=Props.AccountName
|> Ok
| _ ->
item
|> error "Can't find account"
let correctAccount item =
match item^.Props.AccountName with
| Some "Account 1" -> Ok item
| _ ->
item
|> error "This is not Account 1"
let otherValue lookup item =
let value = lookup ()
item
|> value^=Props.OtherValue
|> Ok
// Build the transform pipeline
let inline (>=>) a b =
fun value ->
match a value with
| Ok result -> b result
| Error error -> Error error
let account1TransformPipeline lookups = // Lookups can be passed around is needed
accountName
>=> correctAccount
let account2TransformPipeline lookups =
accountName
>=> correctAccount
>=> otherValue lookups
// Try out the pipelines
let account1 =
({ Id = 1; Error = None; Name = None } : AccountType1)
|> account1TransformPipeline ()
let account2 =
({ Id = 1; Error = None; AccountId = 1; AccountName = None; OtherValue = "foo" } : AccountType2)
|> account2TransformPipeline (fun () -> "bar")
Other thing’s I've tried:
I'm not really sure how to make your solution simpler - I think that the very fancy use of types in your approach makes the code quite complex. There may be other ways of simplifying this while keeping some kind of typing. Equally, I think there are cases where the logic you need to implement is just fairly dynamic and then it might be worth using some more dynamic techniques, even in F#.
To give an example, here is an example of doing this using the Deedle data frame library. This lets you represent data as data frames (with column names as strings).
Writing the two cleaning operations that you need over a data frame is relatively easy - the library is optimized for column-based operations, so the code structure is a bit different than yours (we calculate new column and then replace it for all rows in the data frame):
let correctAccount idCol nameCol df =
let newNames = df |> Frame.getCol idCol |> Series.map (fun _ id ->
match id with
| 1 -> "Account 1"
| 2 -> "Account 2"
| _ -> failwith "Cannot find account")
df |> Frame.replaceCol nameCol newNames
let otherValue newValue df =
let newOther = df |> Frame.getCol "OtherValue" |> Series.mapAll (fun _ _ -> Some newValue)
df |> Frame.replaceCol "OtherValue" newOther
Your pipeline can then take records, convert them to data frames and do all the processing:
[ { Id = 1; Error = None; Name = None } ]
|> Frame.ofRecords
|> correctAccount "Id" "Name"
[ { Id = 1; Error = None; AccountId = 1; AccountName = None; OtherValue = "foo" } ]
|> Frame.ofRecords
|> correctAccount "Id" "AccountName"
|> otherValue "bar"
This is less type-safe than your approach, but I believe that people can actually read the code and get a good idea of what it does, which might be worth the tradeoff.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With