-
Notifications
You must be signed in to change notification settings - Fork 39
/
.Rhistory
71 lines (71 loc) · 2.48 KB
/
.Rhistory
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# Get working directory
getwd()
# Set working directory
setwd("Users/ves/Desktop/asds/stats/StatsI_Fall2023")
# Set working directory
setwd("Users/ves/Desktop/asds/stats/StatsI_Fall2023/")
# Set working directory
setwd("/Users/ves/Desktop/asds/stats/StatsI_Fall2023/")
getwd()
# Load data
df <- read.csv("datasets/fictional_data.csv")
# First step, look at data
View(df)
str(df) # Structure of R object
mean(df$income) # Central tendency, mean
var(df$income) # Variability, variance
sd(df$income) # Variability, standard deviation
sd(df$income)/sqrt(length(df$income)) # Variability, standard **error**
# The **precise** solution, using t distribution
t_score <- qt(0.995, df=length(df$income)-1)
# Some quick visualizations, to look at distribution
hist(df$income,
#breaks = 20,
main="Monthly net income",
xlab="Euro")
?qt
dev.off()
dev.off()
dev.off()
?dev
?dev.off()
# The **precise** solution, using t distribution
t_score <- qt(0.995, df=length(df$income)-1)
lower_99_t <- mean(df$income)-(t_score)*(sd(df$income)/sqrt(length(df$income)))
upper_99_t <- mean(df$income)+(t_score)*(sd(df$income)/sqrt(length(df$income)))
# Step by step
?qt
?abline
# We also found a much easier way to calculate the confidence intervals (!)
t.test(df$income, conf.level = 0.99, alternative = "two.sided")
plot(df$income,
df$edu,
col=df$cap+1,
xlab="Monthly net income (in Euro)",
ylab="University level education (in years)",
main="The Relationship between education and income")
# Scatter plot
plot(df$income,df$edu)
plot(df$income,df$edu,
col=df$cap+1) # Color over third variable (+1, because first color in R is white)
plot(df$income,df$edu,
col=df$cap+1) # Color over third variable (+1, because first color in R is white)
plot(df$income,
df$edu,
col=df$cap+1,
xlab="Monthly net income (in Euro)",
ylab="University level education (in years)",
main="The Relationship between education and income")
# t-test
t.test(df$income ~ df$cap, alternative = "two.sided")
# We also found a much easier way to calculate the confidence intervals (!)
t.test(df$income, conf.level = 0.99, alternative = "two.sided")
t.test(df$income, mu = 3034, alternative = "less")
# We also found a much easier way to calculate the confidence intervals (!)
t.test(df$income, conf.level = 0.99, alternative = "two.sided")
# We also found a much easier way to calculate the confidence intervals (!)
t.test(df$income, conf.level = 0.99, alternative = "two.sided")
# t-test
t.test(df$income ~ df$cap, alternative = "two.sided")
# t-test
t.test(df$income ~ df$cap, alternative = "less")