Linear regression of Boston Housing Dataset using R
Posted on Mon 06 November 2017 in Notebook
Load Dataset¶
In [1]:
url <- "https://raw.githubusercontent.com/vincentarelbundock/Rdatasets/master/csv/MASS/Boston.csv"
path <- "/tmp/Boston.csv"
download.file(url, path)
In [2]:
df <- read.csv(path, header = T)
In [3]:
head(df)
Statistic¶
In [4]:
summary(df)
Correlation coefficient¶¶
In [5]:
cor(df)
In [6]:
cor(df["rm"], df["medv"])
Plotting¶
In [7]:
plot(df)
In [8]:
hist(data.matrix(df["crim"]), breaks = 50,
main = "Histogram of Crime Rate",
xlab = "Per-capita-crime-rate-by-town",
ylab = "Frequency")
In [9]:
plot(df[c("zn", "indus")], xlab = "zn", ylab = "indus")
In [10]:
plot(df[c("rm", "medv")], xlab = "rm", ylab = "medv")
Linear Regression¶
In [11]:
x <- data.matrix(df["rm"])
y <- data.matrix(df["medv"])
In [12]:
fit <- lm(y~x)
In [13]:
summary(fit)
In [14]:
plot(df[c("rm", "medv")], xlab = "rm", ylab = "medv")
abline(fit)
Multiple Regression Analysis¶
In [15]:
crim <- data.matrix(df["crim"])
zn <- data.matrix(df["zn"])
indus <- data.matrix(df["indus"])
chas <- data.matrix(df["chas"])
nox <- data.matrix(df["nox"])
rm <- data.matrix(df["rm"])
age <- data.matrix(df["age"])
dis <- data.matrix(df["dis"])
rad <- data.matrix(df["rad"])
tax <- data.matrix(df["tax"])
ptratio <- data.matrix(df["ptratio"])
black <- data.matrix(df["black"])
lstat <- data.matrix(df["lstat"])
medv <- data.matrix(df["medv"])
y <- medv
In [16]:
fit <- lm(y ~ crim + zn + indus + chas + nox + rm + age + dis + rad + tax + ptratio + black + lstat)
Step-wise Selection¶
In [17]:
result <- step(fit)
In [18]:
summary(result)