- 24th Nov 2022
- 06:03 am
review_data_toys_and_games <-read.table("Toys_and_Games_5.json", header = FALSE, sep = '\t', quote = "\"", stringsAsFactors = FALSE)
review_data_automotive <-read.table("Automotive_5.json", header = FALSE, sep = '\t', quote = "\"", stringsAsFactors = FALSE)
review_data_beauty <- read.table("Beauty_5.json", header = FALSE, sep = '\t', quote = "\"", stringsAsFactors = FALSE)
review_data_pet_supplies <- read.table("Pet_Supplies_5.json", header = FALSE, sep = '\t', quote = "\"", stringsAsFactors = FALSE)
#################################################################################################
# Data prep for toys and games
idx <- 1
reviews <- c()
ratings <- c()
toys_games <- data.frame( ratings_data = numeric(0), review_data = numeric(0))
for (i in 1:nrow(review_data_toys_and_games)) {
get_data <- review_data_toys_and_games[i,1]
aa<-as.character(get_data)
textbag <- strsplit(aa, " ")
textbag <-unlist(textbag)
control_id <- grep('reviewerID', textbag, value = FALSE)
if (length(control_id) == 1) {
idx_1 <- grep('reviewText:', textbag, value = FALSE) + 1
idx_2 <- grep('overall:', textbag, value = FALSE) - 1
if ( length(idx_2) > 1 ) {
idx_2 <- idx_2[length(idx_2)]
}
if ( length(idx_1) > 1 ) {
idx_1 <- idx_1[1]
}
if ((length(idx_1) > 0) & (length(idx_2) > 0) ) {
reviews <- paste(textbag[idx_1:idx_2], collapse = " ")
ratings <- as.numeric(removePunctuation( textbag[idx_2 + 2]) )/10
toys_games[idx,1] <- ratings
toys_games[idx,2] <- reviews
idx <- idx + 1
}
}
}
######ATTENTION##########################################
# create a folder in your "Final Project" folder and name it as Data_Folder. You need to set your own current directory.
#dir.create("/Users/busrakeles/Dropbox/R/Assignments - 623/Final Project/Data_Folder")
# export the table to an external folder. You need to set your own current directory.
# The name of the file is saved as MGMT_623_toys_games.txt. You need to remember this when importing it for sentiment analysis.
write.table(x = toys_games,
file = ("C:/Manipal/Pal/SATISTICS/Jsanon/Final Project/MGMT_623_toys_games.txt"),
sep = "\t")
#################################################################################################
rm(list=setdiff(ls(), c("review_data_automotive", "review_data_beauty",
"review_data_pet_supplies")))
#################################################################################################
# Data prep for automotive
idx <- 1
automotive <- data.frame( ratings_data = numeric(0), review_data = numeric(0))
for (i in 1:nrow(review_data_automotive)) {
get_data <- review_data_automotive[i,1]
aa<-as.character(get_data)
textbag <- strsplit(aa, " ")
textbag <-unlist(textbag)
control_id <- grep('reviewerID', textbag, value = FALSE)
if (length(control_id) == 1) {
idx_1 <- grep('reviewText:', textbag, value = FALSE) + 1
idx_2 <- grep('overall:', textbag, value = FALSE) - 1
if ( length(idx_2) > 1 ) {
idx_2 <- idx_2[length(idx_2)]
}
if ( length(idx_1) > 1 ) {
idx_1 <- idx_1[1]
}
if ((length(idx_1) > 0) & (length(idx_2) > 0) ) {
reviews <- paste(textbag[idx_1:idx_2], collapse = " ")
ratings <- as.numeric(removePunctuation( textbag[idx_2 + 2]) )/10
automotive[idx,1] <- ratings
automotive[idx,2] <- reviews
idx <- idx + 1
}
}
}
# export the table to an external folder. You need to set your own current directory.
# The name of the file is saved as MGMT_623_automotive.txt. You need to remember this when importing it for sentiment analysis.
write.table(x = automotive,
file = ("C:/Manipal/Pal/SATISTICS/Jsanon/Final Project/MGMT_623_automotive.txt"),
sep = "\t")