#http://www.statmethods.net/graphs/density.html #Load My Data NHIS<-read.csv("http://sites.google.com/site/curtiskephart/data/nhisdata.txt") summary(NHIS) #Clean up data, subset away missing-data/outliers NHIS<-subset(NHIS, NHIS\$weight<300) #Basic Histogram attach(NHIS) hist(weight) #Note that this is same at hist(NHIS\$weight). # The attach() command allowed us to drop the "NHIS\$" #Adjusting Number of Breaks #Default breaks varies. #Set breaks are treated as a "suggestion" hist(weight, breaks=19) #Selecting Bins #suppose you want one bin for each pound: min(weight) max(weight) bins=seq(min(weight),max(weight),1) hist(weight, breaks=bins) #One Bin for every 5 pounds bins=seq(min(weight),max(weight)+5,5) hist(weight, breaks=bins) #Labels -- don't forget those commas! hist(weight, breaks=50, main="Distribution of Weights \n NHIS Survey Data", ylab="Frequency of Weights", xlab="Weight \n (in pounds)", col="grey" ) #Your Data Compared to the Normal Distribution #Draw Histogram, with a normal distribution over it #1) create our histogram, assign it to h, "h<-..." h<-hist(weight, breaks=50, main="Distribution of Weights \n NHIS Survey Data", ylab="Frequency of Weights", xlab="Weight \n (in pounds)", col="grey" ) #2) create 40 bins from our data: xfit <- seq(min(weight),max(weight),length=40) #3) Given our data's mean and sd, find the normal distribution yfit <- dnorm(xfit, mean=mean(weight),,sd=sd(weight)) #4) Fit the normal dist to our data yfit<- yfit*diff(h\$mids[1:2])*length(weight) #5) Plot these lines. lines(xfit,yfit) #Kernal Density Plots density<-density(weight) plot(density) #fancy it up plot(density, main="Kernal Density for Weight", xlab="Weight", ylab="Density", col="red" ) polygon(density,col="grey") #To End #Comparing Distributions of Female Heights to Male Weights #Install the sm package library(sm) male_weight<-subset(NHIS\$weight,NHIS\$SEX==1) female_weight<-subset(NHIS\$weight,NHIS\$SEX==2) weights.f <- factor(NHIS, levels= c(1,2), labels = c("Males", "Females")) #The comparison plot sm.density.compare(NHIS\$weight, SEX, xlab="Male vs. Female Weights", main="Weight Distribution by Gender") # Legend colfill<-c(2:(2+length(levels(weights.f)))) legend(235,0.014,legend=c("Males", "Females"),fill=colfill)