Eu tentei reescrever este código (para aprender esta abordagem), usando%>% operator:
library(arules)
data(AdultUCI) #https://archive.ics.uci.edu/ml/datasets/Census+Income
AdultUCI[["capital-gain"]] <- ordered(cut(AdultUCI[["capital-gain"]],
+ c(-Inf, 0, median(AdultUCI[["capital-gain"]][AdultUCI
+ [["capital-gain"]] > 0]), Inf)),
+ labels = c("None", "Low", "High"))
É possível fazer? Aqui está minha tentativa:
AdultUCI[["capital-gain"]] <- ordered %>% cut %>% AdultUCI[["capital-gain"]],
+ c(-Inf, 0, median(AdultUCI[["capital-gain"]][AdultUCI[["capital-gain"]] > 0]),
+ Inf),labels = c("None", "Low", "High")
Respostas:
1 para resposta № 1Isso deve funcionar:
library(dplyr)
#reproducible data
AdultUCI <- read.csv("https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data",header=FALSE)
colnames(AdultUCI)[13] <- "capital-gain"
#original code
originalOrdered <-
ordered(cut(AdultUCI[["capital-gain"]],
c(-Inf, 0,
median(AdultUCI[["capital-gain"]][AdultUCI[["capital-gain"]] > 0]), Inf),
labels = c("None", "Low", "High")),
levels = c("None", "Low", "High"))
#using dplyr
newOrdered <-
AdultUCI %>%
select(x=`capital-gain`) %>%
mutate(capitalGainOrdered=
ordered(
cut(x,c(-Inf, 0, median(x[x > 0]), Inf),
labels = c("None", "Low", "High")),
levels = c("None", "Low", "High"))) %>%
.$capitalGainOrdered
#test if same
identical(originalOrdered,newOrdered)
#[1] TRUE
str(newOrdered)
#Ord.factor w/ 3 levels "None"<"Low"<"High": 2 2 2 2 2 2 2 3 3 2 ...