Selecting Flat Parts of a Curve

Question

I'm trying to extract the weights of the flat parts of this curve.

plot(Unsubtracted.Weight ~ Sample.Temperature, data = tga.data, pch=19,
     ylim = c(-1,3))

So the parts at ca 2.4mg and ca 1mg ... I want to get the mean of these two flat parts of the curve.

I've tried the following code,

threshold <- 0.1  # Adjust threshold as needed

# Calculate rolling difference with window 200
rolling_diff <- abs(diff(tga.data$Unsubtracted.Weight, 1))

# Initialize empty list for flat section indices
flat_sections <- list()

# Loop to identify flat section indices
start_idx <- 1
for (i in 2:length(rolling_diff)) {
  if (rolling_diff[i] < threshold) {
    # Flat section continues
  } else {
    # End of flat section
    flat_sections[[length(flat_sections) + 1]] <- c(start_idx, i - 1)
    start_idx <- i
  }
}

# Check for last flat section at the end
if (rolling_diff[length(rolling_diff)] < threshold) {
  flat_sections[[length(flat_sections) + 1]] <- c(start_idx, length(tga.data$Unsubtracted.Weight))
}

# Calculate mean weight of each flat section
flat_means <- lapply(flat_sections, function(i) mean(tga.data$Unsubtracted.Weight[i[1]:i[2]]))

which doesn't give me two weights ... instead I get a number of different values depending upon the settings of my threshold value, and the value of the window in the diff function.

Is there a better way of doing it?

Data

dput of every 50th row, and only 2 columns of the data frame:

tga.data = data.frame(Unsubtracted.Weight = c(2.519903, 2.480581, 2.453806, 2.440516, 2.439226, 2.434226, 2.428516, 2.424839, 2.422839, 2.421839, 2.420419, 2.419258, 2.418258, 2.41729, 2.416645, 2.415677, 2.415097, 2.414194, 2.413032, 2.412516, 2.412806, 2.411839, 2.410677, 2.409935, 2.408355, 2.407452, 2.406323, 2.405419, 2.404355, 2.403355, 2.40271, 2.401839, 2.401, 2.400419, 2.400032, 2.399452, 2.39871, 2.397581, 2.39671, 2.395806, 2.395097, 2.394387, 2.393613, 2.386, 2.367258, 2.347581, 2.324, 2.287097, 2.230484, 2.144806, 2.016871, 1.846968, 1.639097, 1.408452, 1.172484, 0.960161, 0.873258, 0.873065, 0.873226, 0.874194, 0.87529, 0.875452, 0.876613, 0.876258, 0.877032, 0.877355, 0.878129, 0.878645, 0.879774, 0.880194, 0.880452, 0.881419, 0.882226, 0.882935, 0.883806, 0.884419, 0.885032, 0.885581, 0.886387, 0.887, 0.887645), Sample.Temperature = c(29.82, 29.95, 30, 30, 36.48, 53.15, 69.83, 86.5, 103.17, 119.82, 136.49, 153.16, 169.82, 186.48, 203.15, 219.83, 236.49, 253.15, 269.82, 286.48, 303.16, 319.82, 336.49, 353.15, 369.82, 386.49, 403.15, 419.82, 436.48, 453.15, 469.82, 486.48, 503.15, 519.81, 536.48, 553.15, 569.81, 586.48, 600, 600, 600, 600, 600, 600, 600, 600.21, 616.59, 633.26, 649.93, 666.6, 683.26, 699.92, 716.6, 733.26, 749.93, 766.6, 783.26, 799.92, 816.59, 833.25, 849.92, 866.59, 883.25, 899.92, 916.59, 933.25, 949.91, 966.57, 983.23, 999.68, 999.99, 1000.01, 1000.01, 999.99, 1000, 999.99, 1000, 1000, 999.99, 1000, 1000))

Serge de Gosson de Varennes · Accepted Answer

I think that a better approch would be to indentify the segments where the change is minimal. Do given this data

Unsubtracted.Weight <- c(2.519903, 2.480581, 2.453806, 2.440516, 2.439226, 2.434226, 2.428516, 2.424839, 2.422839, 2.421839, 
                         2.420419, 2.419258, 2.418258, 2.41729, 2.416645, 2.415677, 2.415097, 2.414194, 2.413032, 2.412516, 
                         2.412806, 2.411839, 2.410677, 2.409935, 2.408355, 2.407452, 2.406323, 2.405419, 2.404355, 2.403355, 
                         2.40271, 2.401839, 2.401, 2.400419, 2.400032, 2.399452, 2.39871, 2.397581, 2.39671, 2.395806, 
                         2.395097, 2.394387, 2.393613, 2.386, 2.367258, 2.347581, 2.324, 2.287097, 2.230484, 2.144806, 
                         2.016871, 1.846968, 1.639097, 1.408452, 1.172484, 0.960161, 0.873258, 0.873065, 0.873226, 0.874194, 
                         0.87529, 0.875452, 0.876613, 0.876258, 0.877032, 0.877355, 0.878129, 0.878645, 0.879774, 0.880194, 
                         0.880452, 0.881419, 0.882226, 0.882935, 0.883806, 0.884419, 0.885032, 0.885581, 0.886387, 0.887, 
                         0.887645)
Sample.Temperature <- c(29.82, 29.95, 30, 30, 36.48, 53.15, 69.83, 86.5, 103.17, 119.82, 136.49, 153.16, 169.82, 186.48, 203.15, 
                        219.83, 236.49, 253.15, 269.82, 286.48, 303.16, 319.82, 336.49, 353.15, 369.82, 386.49, 403.15, 419.82, 
                        436.48, 453.15, 469.82, 486.48, 503.15, 519.81, 536.48, 553.15, 569.81, 586.48, 600, 600, 600, 600, 600, 
                        600, 600.21, 616.59, 633.26, 649.93, 666.6, 683.26, 699.92, 716.6, 733.26, 749.93, 766.6, 783.26, 
                        799.92, 816.59, 833.25, 849.92, 866.59, 883.25, 899.92, 916.59, 933.25, 949.91, 966.57, 983.23, 999.68, 
                        999.99, 1000.01, 1000.01, 999.99, 1000, 999.99, 1000, 1000, 999.99, 1000, 1000)

length_to_use <- min(length(Unsubtracted.Weight), length(Sample.Temperature))
Unsubtracted.Weight <- Unsubtracted.Weight[1:length_to_use]
Sample.Temperature <- Sample.Temperature[1:length_to_use]

tga.data <- data.frame(Sample.Temperature, Unsubtracted.Weight)

plot(Unsubtracted.Weight ~ Sample.Temperature, data = tga.data, pch=19, ylim = c(-1,3))

enter image description here

you can then identify the segment and then do the necessary extraction and mean computations.

diffs <- abs(diff(tga.data$Unsubtracted.Weight))

threshold <- 0.01
flat_sections <- which(diffs < threshold)

flat_start <- c()
flat_end <- c()

i <- 1
while (i < length(flat_sections)) {
  start <- flat_sections[i]
  while (i < length(flat_sections) && flat_sections[i + 1] == flat_sections[i] + 1) {
    i <- i + 1
  }
  end <- flat_sections[i]
  flat_start <- c(flat_start, start)
  flat_end <- c(flat_end, end)
  i <- i + 1
}

flat_means <- sapply(1:length(flat_start), function(i) {
  mean(tga.data$Unsubtracted.Weight[flat_start[i]:flat_end[i]])
})

print(flat_means)

which gives

> print(flat_means)
[1] 2.4108065 0.8791627

Friede · Answer

Based on the simple idea of comparing the absolute values of the differences to a tolerance, we can do:

Edit

2024-05-26

Wrapped in a function. Included scale which makes setting tol more robust.

const1D = \(x, tol = .01) {
  stopifnot(is.numeric(x), is.numeric(tol))
  b = abs(diff(scale(x))) <= tol
  o = b[c(TRUE, !b[-length(b)] == b[-1L])]
  i = 1L + which(diff(b) != 0L)
  split(x, cumsum(seq_along(x) %in% i))[o]
}
vapply(const1D(tga.data$Unsubtracted.Weight), mean, numeric(1L))

Code

tol = .01
b = abs(diff(tga.data$Unsubtracted.Weight)) <= tol
o = b[c(TRUE, !b[-length(b)] == b[-1L])]
i = 1L + which(diff(b) != 0L)
l = with(tga.data, split(Unsubtracted.Weight, 
                         cumsum(seq_along(Unsubtracted.Weight) %in% i)))
vapply(l[o], mean, numeric(1L))
#>         1         3 
#> 2.4108065 0.8798155

Plot

with(tga.data, plot(x = Sample.Temperature, y = Unsubtracted.Weight,
                    col = c('red','blue')[b+1L], pch = ".", cex = 3L))

Note

More sophisticated approaches would involve change-point detection algorithms. Bayesian versions might be a good idea. For reference, have a look at beast() or beast.irreg() from {Rbeast}. Probably, at first sight, you will find the list of arguments (and the help file) overwhelmeing. If all your data series are single S-shaped curves (sigmoid(x) or, more precisly, -sigmoid(x) functions), this might be shooting at sparrows with cannons. Nonetheless, you will need to find a way to exclude the inflection points then, as those functions have exactly one.

Selecting Flat Parts of a Curve

Tags:

r

Data

DarrenRhodes

2 Answers

Serge de Gosson de Varennes

Edit

Code

Plot

Note

Friede

Recent Activity

Donate For Us

Selecting Flat Parts of a Curve

Tags:

r

Data

DarrenRhodes

2 Answers

Serge de Gosson de Varennes

Edit

Code

Plot

Note

Friede

Related questions

Recent Activity

Donate For Us