#http://www.statmethods.net/graphs/density.html
#Load My Data
NHIS<-read.csv("http://sites.google.com/site/curtiskephart/data/nhisdata.txt")
summary(NHIS)
#Clean up data, subset away missing-data/outliers
NHIS<-subset(NHIS, NHIS$weight<300)
#Basic Histogram
attach(NHIS)
hist(weight)
#Note that this is same at hist(NHIS$weight).
# The attach() command allowed us to drop the "NHIS$"
#Adjusting Number of Breaks
#Default breaks varies.
#Set breaks are treated as a "suggestion"
hist(weight, breaks=19)
#Selecting Bins
#suppose you want one bin for each pound:
min(weight)
max(weight)
bins=seq(min(weight),max(weight),1)
hist(weight, breaks=bins)
#One Bin for every 5 pounds
bins=seq(min(weight),max(weight)+5,5)
hist(weight, breaks=bins)
#Labels -- don't forget those commas!
hist(weight, breaks=50,
main="Distribution of Weights \n NHIS Survey Data",
ylab="Frequency of Weights",
xlab="Weight \n (in pounds)",
col="grey"
)
#Your Data Compared to the Normal Distribution
#Draw Histogram, with a normal distribution over it
#1) create our histogram, assign it to h, "h<-..."
h<-hist(weight, breaks=50,
main="Distribution of Weights \n NHIS Survey Data",
ylab="Frequency of Weights",
xlab="Weight \n (in pounds)",
col="grey" )
#2) create 40 bins from our data:
xfit <- seq(min(weight),max(weight),length=40)
#3) Given our data's mean and sd, find the normal distribution
yfit <- dnorm(xfit, mean=mean(weight),,sd=sd(weight))
#4) Fit the normal dist to our data
yfit<- yfit*diff(h$mids[1:2])*length(weight)
#5) Plot these lines.
lines(xfit,yfit)
#Kernal Density Plots
density<-density(weight)
plot(density)
#fancy it up
plot(density,
main="Kernal Density for Weight",
xlab="Weight",
ylab="Density",
col="red"
)
polygon(density,col="grey")
#To End
#Comparing Distributions of Female Heights to Male Weights
#Install the sm package
library(sm)
male_weight<-subset(NHIS$weight,NHIS$SEX==1)
female_weight<-subset(NHIS$weight,NHIS$SEX==2)
weights.f <- factor(NHIS, levels= c(1,2),
labels = c("Males", "Females"))
#The comparison plot
sm.density.compare(NHIS$weight, SEX,
xlab="Male vs. Female Weights",
main="Weight Distribution by Gender")
# Legend
colfill<-c(2:(2+length(levels(weights.f))))
legend(235,0.014,legend=c("Males", "Females"),fill=colfill)