How can I write the list of commands below into just one Function?
For example: VariableRanking <- function(formula, variables,.....) { Insert commands........ }
#Variable Ranking Model automation
#exclusion of the variables that are not model variables
exclude <- c("~,", "+" ) # exclude target which is bound_count for Property
formula <- toString(formula)
formula
#listing the entire model formula out
variables_pre <- unlist(strsplit(formula, split = " "))
variables_pre
#keeping only the model variables
variables <- sort(variables_pre[!variables_pre %in% exclude])
variables
#Exclude "," on the target variable
variables[1] <- substr(variables[1], 1, nchar(variables[1])-1)
variables
#Assigning the variables into a data frame
d <- c(1:length(variables))
d
d= data.frame(d)
d
d= t(d)
d
colnames(d)=variables
d
# exclude target variable on the data frame
allvariables <- colnames(d)[-1]
allvariables
# container for models
listOfModels <- vector("list", length(allvariables))
listOfModels
# loop over variables
for (i in seq_along(allvariables)) {
# exclude variable i
currentvariable <- allvariables[-i]
# programmatically assemble regression formula
regressionFormula <- as.formula(
paste(variables[1],"~", paste(currentvariable, collapse="+")))
# fit model
currentModel <- glm(formula = regressionFormula, family=binomial(link = "logit"), data=dataL_TT)
# store model in container
listOfModels[[i]] <- currentModel
}
listOfModels
#List of AICs for each model
lapply(listOfModels,function(xx) xx$aic)
#Assign X as the AIC of the full model
X <- modelTT$aic
X
# Difference of AICs of each model to the AIC of the full model
AICdifference <- lapply(listOfModels,function(xx) xx$aic - X)
AICdifference
# Naming the AIC Difference
AICdifference2 = data.frame(variables=allvariables, AICdiff=unlist(AICdifference))
AICdifference2
#Graph the Barchart of the AIC decrease of each variables and save it to pdf
pdf("Barchart.pdf",width=12,height=10)
par(mar=c(2,18,2,5))
barplot(sort(AICdifference2$AICdiff, decreasing = F), main="Variable Ranking based on AIC decrease",
horiz=TRUE, xlab="AIC Increase", names.arg= AICdifference2$variables[order(AICdifference2$AICdiff, decreasing = F)],
las=1, col= 'dodgerblue4')
dev.off()
Is it possible? because it has a lot of parameters. So basically I just need the output of the AICdifference2 data frame. And the barplot saved as pdf and pop up
Try this:
FOO <- function(myformula, data, fullmodel_AIC, plotname){
exclude <- c("~,", "+" ) # exclude target which is bound_count for Property
myformula <- toString(myformula)
variables_pre <- unlist(strsplit(myformula, split = " "))
variables <- sort(variables_pre[!variables_pre %in% exclude])
variables[1] <- substr(variables[1], 1, nchar(variables[1])-1)
d <- t(data.frame(c(1:length(variables))))
colnames(d)=variables
allvariables <- colnames(d)[-1]
listOfModels <- vector("list", length(allvariables))
for (i in seq_along(allvariables)) {
# exclude variable i
currentvariable <- allvariables[-i]
# programmatically assemble regression formula
regressionFormula <- as.formula(
paste(variables[1],"~", paste(currentvariable, collapse="+")))
# fit model
currentModel <- glm(formula = regressionFormula, family=binomial(link = "logit"), data = data)
# store model in container
listOfModels[[i]] <- currentModel
}
AICdifference <- lapply(listOfModels,function(xx) xx$aic - fullmodel_AIC)
AICdifference2 <- data.frame(variables=allvariables, AICdiff=unlist(AICdifference))
pdf(paste0(plotname, ".pdf"),width=12,height=10)
par(mar=c(2,18,2,5))
barplot(sort(AICdifference2$AICdiff, decreasing = F), main="Variable Ranking based on AIC decrease",
horiz=TRUE, xlab="AIC Increase", names.arg= AICdifference2$variables[order(AICdifference2$AICdiff, decreasing = F)],
las=1, col= 'dodgerblue4')
dev.off()
return(AICdifference2)
}
You need four parameters: The myformula, the data (dataL_TT in your code), the fullmodel_AIC (modelTT$aic in your code), and a string to name your plot.
Try calling it with FOO(myformula, dataL_TT, modelTT$aic, "test") and insert your formula object for myformula.
I've changed formula to myformula because formula is a base function of the stats package, and it is generally unwise to use object names which are base functions.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With