Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Automatically - "Convert numbers stored as text to numbers"

Tags:

r

excel

openxlsx

Lets consider this small example:

df1<- data.frame(A=c(1,NA,"pvalue",0.0003),B=c(0.5,7,"I destroy","numbers all day"),stringsAsFactors = T)

Write file:

openxlsx::write.xlsx(df1,"Test.xlsx")

In my resulting excel file, 1 and 7 are text cells. Excel has the "intuition" that they are numbers stored as text. I can convert them by hand.

How can I convert those "flagged" values automatically to numbers from inside R?

In the "What I want" I have by hand converted the TEXT into Numbers. It's an option behind the "green triangle" in the "What I get" Part (red arrows).

enter image description here

@Roland's comment: Rearranging as list does not work.

df1<- as.data.frame(cbind(A=list(1,NA_real_,"pvalue",0.0003),B=list(0.5,7,"I destroy","numbers all day")))
openxlsx::write.xlsx(df1,"Test2.xlsx")
like image 471
Andre Elrico Avatar asked Aug 27 '18 13:08

Andre Elrico


2 Answers

I wrote a small piece of code following the suggestions of @Roland and @phiver. It starts with a tidy data.frame (to preserve the data type of each cell) and save values one by one:

library(openxlsx)
df1<- as.data.frame(cbind(A=list(1,NA_real_,"pvalue",0.0003),B=list(0.5,7,"I destroy","numbers all day")))

wb <- createWorkbook()
sheet.name <- 'test'
addWorksheet(wb, sheet.name)

for(i in seq_along(df1)){
    writeData(wb, sheet = sheet.name, names(df1)[i], startCol = i, startRow = 1)
    icol <- df1[[i]]
    for(j in seq_along(icol)){
        x <- icol[[j]]
        writeData(wb, sheet = sheet.name, x, startCol = i, startRow = j + 1)
    }
}
saveWorkbook(wb, file = "Test.xlsx")

enter image description here

Hope this works for your data.

like image 176
mt1022 Avatar answered Nov 18 '22 22:11

mt1022


thanks @mt1022 added the validator to let 000123 stay 000123 in the helpers function part

A solution that can do what openxlsx::write.xlsx() can do + "finding meaningful types".

function: (its 98% openxlsx::write.xlsx)

writeXlsxWithTypes <- function(x, file, asTable = FALSE, ...) {
    library(magrittr);library(openxlsx);

    if(T) {
        setTypes <- function(x) {
            x %<>%
                lapply(function(xX){
                    lapply(xX ,function(u) {
                        if(canConvert(u)) { type.convert(as.character(u), as.is = TRUE) } else { u }
                    })
                }) %>% do.call(cbind, .) %>% as.data.frame
        } #types fun

        validateBorderStyle <- function(borderStyle){


            valid <- c("none", "thin", "medium", "dashed", "dotted", "thick", "double", "hair", "mediumDashed", 
                       "dashDot", "mediumDashDot", "dashDotDot", "mediumDashDotDot", "slantDashDot")

            ind <- match(tolower(borderStyle), tolower(valid))
            if(any(is.na(ind)))
                stop("Invalid borderStyle", call. = FALSE)

            return(valid[ind])

        }

        validateColour <- function(colour, errorMsg = "Invalid colour!"){

            ## check if
            if(is.null(colour))
                colour = "black"

            validColours <- colours()

            if(any(colour %in% validColours))
                colour[colour %in% validColours] <- col2hex(colour[colour %in% validColours])

            if(any(!grepl("^#[A-Fa-f0-9]{6}$", colour)))
                stop(errorMsg, call.=FALSE)

            colour <- gsub("^#", "FF", toupper(colour))

            return(colour)

        }
        #x="0001"
        canConvert <- function(x) {
            return( !grepl("^0+\\.?\\d",x) )
            }
    } # define helper functions

    if(T) {
        params <- list(...)
        if (!is.logical(asTable)) 
            stop("asTable must be a logical.")
        creator <- ifelse("creator" %in% names(params), params$creator, 
                          "")
        title <- params$title
        subject <- params$subject
        category <- params$category
        sheetName <- "Sheet 1"
        if ("sheetName" %in% names(params)) {
            if (any(nchar(params$sheetName) > 31)) 
                stop("sheetName too long! Max length is 31 characters.")
            sheetName <- as.character(params$sheetName)
            if ("list" %in% class(x) & length(sheetName) == length(x)) 
                names(x) <- sheetName
        }
        tabColour <- NULL
        if ("tabColour" %in% names(params)) 
            tabColour <- validateColour(params$tabColour, "Invalid tabColour!")
        zoom <- 100
        if ("zoom" %in% names(params)) {
            if (is.numeric(params$zoom)) {
                zoom <- params$zoom
            }
            else {
                stop("zoom must be numeric")
            }
        }
        gridLines <- TRUE
        if ("gridLines" %in% names(params)) {
            if (all(is.logical(params$gridLines))) {
                gridLines <- params$gridLines
            }
            else {
                stop("Argument gridLines must be TRUE or FALSE")
            }
        }
        overwrite <- TRUE
        if ("overwrite" %in% names(params)) {
            if (is.logical(params$overwrite)) {
                overwrite <- params$overwrite
            }
            else {
                stop("Argument overwrite must be TRUE or FALSE")
            }
        }
        withFilter <- TRUE
        if ("withFilter" %in% names(params)) {
            if (is.logical(params$withFilter)) {
                withFilter <- params$withFilter
            }
            else {
                stop("Argument withFilter must be TRUE or FALSE")
            }
        }
        startRow <- 1
        if ("startRow" %in% names(params)) {
            if (all(startRow > 0)) {
                startRow <- params$startRow
            }
            else {
                stop("startRow must be a positive integer")
            }
        }
        startCol <- 1
        if ("startCol" %in% names(params)) {
            if (all(startCol > 0)) {
                startCol <- params$startCol
            }
            else {
                stop("startCol must be a positive integer")
            }
        }
        colNames <- TRUE
        if ("colNames" %in% names(params)) {
            if (is.logical(params$colNames)) {
                colNames <- params$colNames
            }
            else {
                stop("Argument colNames must be TRUE or FALSE")
            }
        }
        if ("col.names" %in% names(params)) {
            if (is.logical(params$col.names)) {
                colNames <- params$col.names
            }
            else {
                stop("Argument col.names must be TRUE or FALSE")
            }
        }
        rowNames <- FALSE
        if ("rowNames" %in% names(params)) {
            if (is.logical(params$rowNames)) {
                rowNames <- params$rowNames
            }
            else {
                stop("Argument colNames must be TRUE or FALSE")
            }
        }
        if ("row.names" %in% names(params)) {
            if (is.logical(params$row.names)) {
                rowNames <- params$row.names
            }
            else {
                stop("Argument row.names must be TRUE or FALSE")
            }
        }
        xy <- NULL
        if ("xy" %in% names(params)) {
            if (length(params$xy) != 2) 
                stop("xy parameter must have length 2")
            xy <- params$xy
        }
        headerStyle <- NULL
        if ("headerStyle" %in% names(params)) {
            if (length(params$headerStyle) == 1) {
                if ("Style" %in% class(params$headerStyle)) {
                    headerStyle <- params$headerStyle
                }
                else {
                    stop("headerStyle must be a style object.")
                }
            }
            else {
                if (all(sapply(params$headerStyle, function(x) "Style" %in% 
                               class(x)))) {
                    headerStyle <- params$headerStyle
                }
                else {
                    stop("headerStyle must be a style object.")
                }
            }
        }
        borders <- NULL
        if ("borders" %in% names(params)) {
            borders <- tolower(params$borders)
            if (!all(borders %in% c("surrounding", "rows", "columns", 
                                    "all"))) 
                stop("Invalid borders argument")
        }
        borderColour <- getOption("openxlsx.borderColour", "black")
        if ("borderColour" %in% names(params)) 
            borderColour <- params$borderColour
        borderStyle <- getOption("openxlsx.borderStyle", "thin")
        if ("borderStyle" %in% names(params)) {
            borderStyle <- validateBorderStyle(params$borderStyle)
        }
        keepNA <- FALSE
        if ("keepNA" %in% names(params)) {
            if (!"logical" %in% class(keepNA)) {
                stop("keepNA must be a logical.")
            }
            else {
                keepNA <- params$keepNA
            }
        }
        tableStyle <- "TableStyleLight9"
        if ("tableStyle" %in% names(params)) 
            tableStyle <- params$tableStyle
        colWidths <- ""
        if ("colWidths" %in% names(params)) 
            colWidths <- params$colWidths
    } # params check

    if(class(x) == "data.frame") {
        x %<>% setTypes %>% list
    } else {
        lNames <- names(x)
        x %<>% lapply(setTypes)
    }

    if(T) {   
        nms <- names(x)
        nSheets <- length(x)
        if (is.null(nms)) {
            nms <- paste("Sheet", 1:nSheets)
        }
        else if (any("" %in% nms)) {
            nms[nms %in% ""] <- paste("Sheet", (1:nSheets)[nms %in% 
                                                               ""])
        }
        else {
            nms <- make.unique(nms)
        }
        if (any(nchar(nms) > 31)) {
            warning("Truncating list names to 31 characters.")
            nms <- substr(nms, 1, 31)
        }
        if (!is.null(tabColour)) {
            if (length(tabColour) != nSheets) 
                tabColour <- rep_len(tabColour, length.out = nSheets)
        }
        if (length(zoom) != nSheets) 
            zoom <- rep_len(zoom, length.out = nSheets)
        if (length(gridLines) != nSheets) 
            gridLines <- rep_len(gridLines, length.out = nSheets)
        if (length(withFilter) != nSheets) 
            withFilter <- rep_len(withFilter, length.out = nSheets)
        if (length(colNames) != nSheets) 
            colNames <- rep_len(colNames, length.out = nSheets)
        if (length(rowNames) != nSheets) 
            rowNames <- rep_len(rowNames, length.out = nSheets)
        if (length(startRow) != nSheets) 
            startRow <- rep_len(startRow, length.out = nSheets)
        if (length(startCol) != nSheets) 
            startCol <- rep_len(startCol, length.out = nSheets)
        if (!is.null(headerStyle)) 
            headerStyle <- lapply(1:nSheets, function(x) return(headerStyle))
        if (length(borders) != nSheets & !is.null(borders)) 
            borders <- rep_len(borders, length.out = nSheets)
        if (length(borderColour) != nSheets) 
            borderColour <- rep_len(borderColour, length.out = nSheets)
        if (length(borderStyle) != nSheets) 
            borderStyle <- rep_len(borderStyle, length.out = nSheets)
        if (length(keepNA) != nSheets) 
            keepNA <- rep_len(keepNA, length.out = nSheets)
        if (length(asTable) != nSheets) 
            asTable <- rep_len(asTable, length.out = nSheets)
        if (length(tableStyle) != nSheets) 
            tableStyle <- rep_len(tableStyle, length.out = nSheets)
        if (length(colWidths) != nSheets) 
            colWidths <- rep_len(colWidths, length.out = nSheets)
    }  # setup and validation

    wb <- openxlsx::createWorkbook(creator = creator, title = title, subject = subject, 
                         category = category)

    for (i in 1:nSheets) {

        if(T) {

            wb$addWorksheet(nms[[i]], showGridLines = gridLines[i], 
                            tabColour = tabColour[i], zoom = zoom[i])
            if (asTable[i]) {

                for(ii in seq_along(x[[i]])){
                    openxlsx::writeDataTable(wb = wb, sheet = i, x = names(x[[i]])[[ii]],
                                             startCol = ii, startRow = 1, 
                                             xy = xy, colNames = colNames[[i]], rowNames = rowNames[[i]], 
                                             tableStyle = tableStyle[[i]], tableName = NULL, 
                                             headerStyle = headerStyle[[i]], withFilter = withFilter[[i]], 
                                             keepNA = keepNA[[i]]
                                             )
                    icol <- x[[i]][[ii]]

                    for(j in seq_along(icol)){
                        dati <- icol[[j]]

                        openxlsx::writeData(wb = wb, sheet = i,x = dati,
                                            startCol = ii, startRow = j+1, 
                                            xy = xy, colNames = colNames[[i]], rowNames = rowNames[[i]], 
                                            tableStyle = tableStyle[[i]], tableName = NULL, 
                                            headerStyle = headerStyle[[i]], withFilter = withFilter[[i]], 
                                            keepNA = keepNA[[i]]
                                            )
                    }
                }
            }
            else {

                for(ii in seq_along(x[[i]])){

                    openxlsx::writeData(wb = wb, sheet = i, x = names(x[[i]])[[ii]],
                                        startCol = ii, startRow = 1,
                                        xy = xy, colNames = colNames[[i]], rowNames = rowNames[[i]],
                                        headerStyle = headerStyle[[i]],
                                        borders = borders[[i]], borderColour = borderColour[[i]], borderStyle = borderStyle[[i]],
                                        keepNA = keepNA[[i]]
                    )
                    icol <- x[[i]][[ii]]

                    for(j in seq_along(icol)){
                        dati <- icol[[j]]

                        openxlsx::writeData(wb = wb, sheet = i,x = dati,
                                            startCol = ii, startRow = j+1, 
                                            xy = xy, colNames = colNames[[i]], rowNames = rowNames[[i]],
                                            headerStyle = headerStyle[[i]],
                                            borders = borders[[i]], borderColour = borderColour[[i]], borderStyle = borderStyle[[i]],
                                            keepNA = keepNA[[i]]
                        )
                    }
                }
            }
            if (colWidths[i] %in% "auto") 
                setColWidths(wb, sheet = i, cols = 1:ncol(x[[i]]) + 
                                 startCol[[i]] - 1L, widths = "auto")

            } #from list



    }

    if(T) {
        freezePanes <- FALSE
        firstActiveRow <- rep_len(1L, length.out = nSheets)
        if ("firstActiveRow" %in% names(params)) {
            firstActiveRow <- params$firstActiveRow
            freezePanes <- TRUE
            if (length(firstActiveRow) != nSheets) 
                firstActiveRow <- rep_len(firstActiveRow, length.out = nSheets)
        }
        firstActiveCol <- rep_len(1L, length.out = nSheets)
        if ("firstActiveCol" %in% names(params)) {
            firstActiveCol <- params$firstActiveCol
            freezePanes <- TRUE
            if (length(firstActiveCol) != nSheets) 
                firstActiveCol <- rep_len(firstActiveCol, length.out = nSheets)
        }
        firstRow <- rep_len(FALSE, length.out = nSheets)
        if ("firstRow" %in% names(params)) {
            firstRow <- params$firstRow
            freezePanes <- TRUE
            if ("list" %in% class(x) & length(firstRow) != nSheets) 
                firstRow <- rep_len(firstRow, length.out = nSheets)
        }
        firstCol <- rep_len(FALSE, length.out = nSheets)
        if ("firstCol" %in% names(params)) {
            firstCol <- params$firstCol
            freezePanes <- TRUE
            if ("list" %in% class(x) & length(firstCol) != nSheets) 
                firstCol <- rep_len(firstCol, length.out = nSheets)
        }
        if (freezePanes) {
            for (i in 1:nSheets) openxlsx::freezePane(wb = wb, sheet = i, 
                                            firstActiveRow = firstActiveRow[i], firstActiveCol = firstActiveCol[i], 
                                            firstRow = firstRow[i], firstCol = firstCol[i])
        }
    } # additional settings/Options

    openxlsx::saveWorkbook(wb = wb, file = file, overwrite = overwrite)

    return(invisible(NULL))
}

example data:

df1 <- mtcars

df1[1,3]<-"ID =====>"
df1[1,4]<-"00000123"
df1[3,7]<-NA
df1[2,6]<-"stringi"

ldf <- list(NOW=df1, WITH=df1, LISTS=df1)

call:

writeXlsxWithTypes(df1, "test_normal3.xlsx" , rowNames = TRUE, borders = "surrounding")
writeXlsxWithTypes(ldf, "test_list3.xlsx", rowNames = TRUE, borders = "surrounding")
like image 20
Andre Elrico Avatar answered Nov 19 '22 00:11

Andre Elrico