#Read a csv file located at in the active directory. You can also use the full path irisData = read.csv('iris.csv') #Two ways to access the second column, either by the index or the columns name irisData[, 2] irisData$sepal_width #Summarizing the data in the variable summary(irisData) #Store which rows belong to which class as TRUE, FALSE irisVirginicaBool = irisData$class=='Iris-virginica' irisSetosaBool = irisData$class=='Iris-setosa' #Get the information from the complete dataset using the bool vectors irisVirginica = irisData[irisVirginicaBool,] irisSetosa = irisData[irisSetosaBool,] #Perform a t-test on the sepal_width column on the Virginica and Setosa classes without assuming equal variance t.test(irisVirginica$sepal_width, irisSetosa$sepal_width, var.equal=FALSE, paired=FALSE) #Store the plot as a pdf pdf('sepal_length_vs_sepal_width.pdf') #Creates our plot we want to save plot(irisData$sepal_length, irisData$sepal_width, col=c("red","blue","green")[irisData$class], xlab="Sepal Length", ylab="Sepal Width" , main="Plot of Sepal Length vs Sepal Width"); #Tells R that we are done with the plot and it is saved to the file previouslu specified dev.off() #Now we want to do a test of the difference of the means using bootstrap #First we need to load the library boot #Load library boot library("boot") #Create a function for the bootstrap #Since it is for the bootstrap function it needs to take in two parameters. The first is the data including all the bootstrap samples #The second tells us which bootstrap the function has been called on #The function will be called once for each bootstrap sample func = function(data,b){ d = data[b,] #Get the data from the current bootstrap #Get the data for the two classes again as before using bool vectors dVirginica = d[irisData$class=='Iris-virginica',] dSetosa = d[irisData$class=='Iris-setosa',] #Return the difference of the mean of the sepal widths of the classes return(mean(dVirginica$sepal_width)-mean(dSetosa$sepal_width)) } #Performs bootstrap 1000 times on the irisData using the statistica calculated in func specified above bootMean = boot(data=irisData, statistic=func, R=1000)