#install.packages('readxl') #install.packages('stringr') library(readxl) library(stringr) entropy = function(p) { sum(ifelse(p == 0, 0, -p*log(p,2))) } data = read_excel('AkinatorTabela.xlsx') #name_encoding = model.matrix(~Name-1, data = data) #colnames(name_encoding) = sapply(colnames(name_encoding), function(x) paste('Is the person ', str_sub(x,5), '?', sep = '')) #data = cbind(data[,1:(ncol(data) - 1),], name_encoding, data[,ncol(data)]) par(mar=c(5,13,2,2)) sub_data = data answer = 0 n_questions = ncol(data) - 2 while(answer != -1) { print(paste('Current entropy = ',entropy(sub_data$Prob))) if(entropy(sub_data$Prob) == 0) { print(paste('The person is', sub_data$Name[which.max(sub_data$Prob)])) answer = -1 } else { barplot(sub_data$Prob, horiz = T, names.arg = sub_data$Name, col = 'salmon', main = 'Most likely person',xlab = 'Probability', las = 2) question_information = apply(sub_data[,2:(n_questions + 1)], 2, function(x) entropy(c(sum(data$Prob[x==0]), sum(data$Prob[x==1])))) #print(sort(question_information, decreasing = TRUE)) readline(prompt="Press [enter] to continue") barplot(sort(question_information), horiz = T, col = 'skyblue', main = 'Questions:', xlab = 'Average information gain', las = 2) max_information = max(question_information) best_questions = names(question_information)[question_information == max_information] question = sample(best_questions, 1) answer = as.numeric(readline(prompt = question)) sub_data = sub_data[sub_data[,question] == answer,] sub_data$Prob = sub_data$Prob / sum(sub_data$Prob) } }