## -----------------------------------------------------------------
library(tidyverse)
## -----------------------------------------------------------------
library(readxl)
task1 <- read_excel("Downloads/TASK 1 .xlsx")
dim(task1)
## -----------------------------------------------------------------
str(task1)
## -----------------------------------------------------------------
# Emotion 1
summary(task1$emotions_1)
table(task1$emotions_1)
# duration
summary(task1$duration)
## -----------------------------------------------------------------
apply(task1[, c("emotions_1", "emotions_2", "emotions_3",
"emotions_4", "emotions_5", "emotions_6",
"emotions_7", "emotions_8", "emotions_9",
"emotions_7r", "emotions_8r", "emotions_9r")]
, 2,
table)
## -----------------------------------------------------------------
temp_function <- function(col){
col %in% "NA"
}
task1[apply(task1, 2, temp_function) == T] = NA
task1 <- task1[complete.cases(task1),]
## -----------------------------------------------------------------
#Mean
mean(task1$duration)
# standard devation
sd(task1$duration)
# Range
max(task1$duration) - min(task1$duration)
## -----------------------------------------------------------------
table(task1$gender)
## -----------------------------------------------------------------
summary(task1$duration[task1$gender == 1])
## -----------------------------------------------------------------
summary(task1$duration[task1$gender == 2])
## -----------------------------------------------------------------
task1 %>%
ggplot(aes(x = factor(gender), y = duration)) +
geom_boxplot()
## -----------------------------------------------------------------
task1[task1$duration < 25000>%
ggplot(aes(x = factor(gender), y = duration)) +
geom_boxplot()
## -----------------------------------------------------------------
# First we will convert variables to numeric type
task1[,c(5:90)] <- apply(task1[,c(5:90)], 2, as.numeric)
task1 = task1 %>%
mutate(emotions_mean = rowMeans(select(task1, c("emotions_1", "emotions_2", "emotions_3",
"emotions_4", "emotions_5", "emotions_6",
"emotions_7", "emotions_8", "emotions_9" ,
"emotions_7r", "emotions_8r", "emotions_9r"))))
task1 = task1 %>%
mutate(AN_mean = rowMeans(select(task1, c( "AN_1", "AN_2", "AN_3", "AN_4",
"AN_5", "AN_6", "AN_7", "AN_8",
"AN_9", "AN_10", "AN_11" ))))
## -----------------------------------------------------------------
cor(task1$emotions_mean, task1$AN_mean)
## -----------------------------------------------------------------
#Writing .csv file
write.csv(task1, file = "studentid_task1_.csv")