#################### ### 1. zadatak ### #################### data("mtcars") # 1. (a) apply(mtcars[c(1,4,6,7)], 2, function(x) c("Mean" = mean(x), "SD" = sd(x), "MIN" = min(x), "MAX" = max(x))) # 1. (b) Index = mtcars$mpg^2/mtcars$wt + 0.5*mtcars$qsec - log(mtcars$hp) data = cbind(mtcars, round(Index,2)) data # 1. (v) classter1_condition = (data$mpg > mean(data$mpg)) & (data$qsec > mean(data$qsec)) & (data$Index < mean(data$Index)) classter2_condition = (data$mpg > mean(data$mpg)) & (data$qsec < mean(data$qsec)) & (data$Index < mean(data$Index)) classter3_condition = (data$mpg < mean(data$mpg)) & (data$qsec < mean(data$qsec)) & (data$Index < mean(data$Index)) Classter = ifelse(classter1_condition, 1, ifelse(classter2_condition, 2, ifelse(classter3_condition, 3, 4))) fClasster = factor(Classter, levels = 1:4) data = cbind(data, fClasster) data # 1. (g) Lista = list(c(rownames(data[data$fClasster == 1,]), mean(data[data$fClasster == 1,]$Index)), c(rownames(data[data$fClasster == 2,]), mean(data[data$fClasster == 2,]$Index)), c(rownames(data[data$fClasster == 3,]), mean(data[data$fClasster == 3,]$Index)), c(rownames(data[data$fClasster == 4,]), mean(data[data$fClasster == 4,]$Index))) Lista #################### ### 2. zadatak ### #################### data("airquality") airquality # 2. (a) round(apply(airquality[c(-5,-6)], 2, function(x) c(mean(x, na.rm = T), var(x, na.rm = T))), 5) scaled_air <- apply(airquality[c(-5,-6)], 2, function(x){ round((x - mean(x, na.rm = T)) / sd(x, na.rm = T), 5) }) round(apply(scaled_air, 2, function(x) c(mean(x, na.rm = T), var(x, na.rm = T))), 5) # 2. (b) Iqr_Func <- function(x){ as.numeric(diff(quantile(x, probs = c(0.25, 0.75), na.rm = T))) } # 2. (v) HotDays <- function(){ Mean <- mean(airquality$Wind, na.rm = T) Iqr <- Iqr_Func(airquality$Temp) condition <- airquality$Wind <= Mean & airquality$Temp >= 6.5*Iqr return (list(airquality[condition,], nrow(airquality[!condition,]))) } HotDays() #################### ### 3. zadatak ### #################### xk = c(58.3, 62.1, 54.7, 59.5, 61.0, 57.8, 63.2, 60.5, 56.9, 59.1, 64.0, 60.2) xt = c(65.1, 68.4, 70.2, 66.8, 67.5, 69.3, 71.0, 66.0, 68.1, 67.9, 69.7, 68.3) ## 3. (a) var.test(xk, xt, ratio = 1, alternative = "two.sided") t.test(xk, xt, var.equal = T) ## 3. (b) confidence_interval_m <- function(x, alfa = 0.95) { n <- length(x) C <- qt((1 + alfa)/2, df = n - 1) c(mean(x) - C * sd(x) / sqrt(n-1),mean(x) + C * sd(x) / sqrt(n-1)) } confidence_interval_m(xk) ## 3. (v) confidence_interval_s2 <- function(x, alfa = 0.90) { n <- length(x) c1 <- qchisq((1 + alfa)/2, df = n - 1) c2 <- qchisq((1 - alfa)/2, df = n - 1) c(n * var(x) / c1, n * var(x) / c2) } confidence_interval_s2(xk) sqrt(confidence_interval_s2(xt)) #################### ### 4. zadatak ### #################### set.seed(123) n <- 100 dani <- 31 temp <- rnorm(dani, mean = -2, sd = 4) sedmica <- rep(1:7, length.out = dani) vikendi <- ifelse(sedmica == 6 | sedmica == 7, TRUE, FALSE) ## 4. (a) potrosnja <- matrix(0, nrow = n, ncol = dani) for (d in 1:dani) { if (vikendi[d]) { osnovna_potrosnja <- rnorm(n, mean = 34, sd = 6) } else { osnovna_potrosnja <- rnorm(n, mean = 28, sd = 5) } if (temp[d] < 0) { osnovna_potrosnja <- osnovna_potrosnja * 1.2 } potrosnja[, d] <- osnovna_potrosnja } ## 4. (b) prosecna_po_danu <- colMeans(potrosnja) plot(1:dani, prosecna_po_danu, type = "l", col = "blue", lwd = 2, xlab = "Дан", ylab = "Просечна потрошња (kWh)", main = "Просечна дневна потрошња за 100 домаћинстава") #################### ### 5. zadatak ### #################### set.seed(123) n <- 25 N <- 100000 X <- sample(1:n, N, replace = TRUE) Y <- sample(1:n, N, replace = TRUE) Z <- X + Y # moguce vrednosti Z z_values <- 2:(2 * n) means <- numeric(length(z_values)) # E[X|Z=z] for (i in seq(length(z_values))) { idx <- Z == z_values[i] means[i] <- mean(X[idx]) } result <- data.frame(Z = z_values, E_X_given_Z = means) print(result) #################### ### 6. zadatak ### #################### library(dplyr) library(tidyr) data(satfruit , package = "PASWR") ## 6. (a) satfruit %>% summarise( AF_mean = mean(AF, na.rm = TRUE), AF_md = median(AF, na.rm = TRUE), AL_mean = mean(AL, na.rm = TRUE), AL_md = median(AL, na.rm = TRUE), OL_mean = mean(OL, na.rm = TRUE), OL_md = median(OL, na.rm = TRUE) ) ## 6. (b) satfruit <- satfruit %>% mutate( Total.Arable = WH + BA + COR + SF + VI + PS + ES + AF + CO + AR + AL + OL + FR, Fruit.Share = FR / Total.Arable ) satfruit %>% group_by(SArea) %>% summarise(AvgFruitShare = mean(Fruit.Share, na.rm = TRUE)) %>% arrange(AvgFruitShare) ## 6. (v) satfruit %>% group_by(SArea) %>% summarise( AvgCOR = mean(COR, na.rm = TRUE), MedianSF = median(SF, na.rm = TRUE), MaxFR = max(FR, na.rm = TRUE) ) #################### ### 7. zadatak ### #################### library(ggplot2) library(nycflights13) # 7. (a) flights_jan <- flights %>% filter(month == 1, !is.na(dep_delay), !is.na(arr_delay), !is.na(air_time)) %>% mutate(air_time_diff = air_time - dep_delay) # 7. (b) joined_data <- flights_jan %>% left_join(airports, by = c("dest" = "faa")) # 7. (v) summary_airports <- joined_data %>% group_by(name) %>% summarise( avg_arr_delay = mean(arr_delay, na.rm = TRUE), avg_air_time_diff = mean(air_time_diff, na.rm = TRUE) ) # 7. (g) summary_airports <- summary_airports %>% mutate( punctuality_level = ifelse( avg_arr_delay < 0, "high", ifelse(avg_arr_delay <= 10, "medium", "low") ) ) # 7. (d) summary_airports %>% count(punctuality_level) # 7. (dj) ggplot(summary_airports, aes(x = avg_air_time_diff, y = avg_arr_delay)) + geom_point(aes(color = punctuality_level), size = 3, alpha = 0.75) + labs( title = "Одступање дужине лета vs. просечно кашњење по аеродромима", x = "Одступање просечне дужине лета (мин) [avg_air_time_diff]", y = "Просечно кашњење при доласку (мин) [avg_arr_delay]", color = "Ниво тачности" ) + theme_minimal()