#Read a csv file located at in the active directory. You can also use the full path
irisData = read.csv('iris.csv')

#Two ways to access the second column, either by the index or the columns name
irisData[, 2]
irisData$sepal_width

#Summarizing the data in the variable
summary(irisData)


#Store which rows belong to which class as TRUE, FALSE
irisVirginicaBool = irisData$class=='Iris-virginica'
irisSetosaBool = irisData$class=='Iris-setosa'

#Get the information from the complete dataset using the bool vectors
irisVirginica = irisData[irisVirginicaBool,]
irisSetosa = irisData[irisSetosaBool,]

#Perform a t-test on the sepal_width column on the Virginica and Setosa classes without assuming equal variance
t.test(irisVirginica$sepal_width, irisSetosa$sepal_width, var.equal=FALSE, paired=FALSE)


#Store the plot as a pdf
pdf('sepal_length_vs_sepal_width.pdf')

#Creates our plot we want to save
plot(irisData$sepal_length, irisData$sepal_width, col=c("red","blue","green")[irisData$class], 
     xlab="Sepal Length", ylab="Sepal Width" ,
     main="Plot of Sepal Length vs Sepal Width");

#Tells R that we are done with the plot and it is saved to the file previouslu specified
dev.off()


#Now we want to do a test of the difference of the means using bootstrap
#First we need to load the library boot
#Load library boot
library("boot")

#Create a function for the bootstrap
#Since it is for the bootstrap function it needs to take in two parameters. The first is the data including all the bootstrap samples
#The second tells us which bootstrap the function has been called on
#The function will be called once for each bootstrap sample
func = function(data,b){  
  d = data[b,] #Get the data from the current bootstrap
  
  #Get the data for the two classes again as before using bool vectors
  dVirginica = d[irisData$class=='Iris-virginica',]
  dSetosa = d[irisData$class=='Iris-setosa',]
  
  #Return the difference of the mean of the sepal widths of the classes
  return(mean(dVirginica$sepal_width)-mean(dSetosa$sepal_width))  
}

#Performs bootstrap 1000 times on the irisData using the statistica calculated in func specified above
bootMean = boot(data=irisData, statistic=func, R=1000)