## This library provides functions to simulate and traditionally analyze Inspection experiments ## Rasch functions go somewhere else ## Include the following line into your R script to use these functions ## source("/Path/to/InspectionRealm.R") ################### C O N V E N T I O N S ################################ ## Person Ability Vector: data.frame(ID,Ability) ## Item Difficulty Vector: data.frame(ID,Difficulty) ## Matrices: Persons in rows, items in cols, named after the IDs in the original vectors ## Functions that output a matrix must always return a data.frame with item names (cols) and person names (rows) ################################################################################################################## ########################## S I M U L A T I O N ################################################################### ################################################################################################################## LambdaMatrix<-function(inspectors,defects,jitter=0.2){ ## Computes a matrix of lambda_ij for each inspector i and defect j (probability of defect identification) ## Takes a dataframe(ID,Ability) and dataframe(ID,Difficulty) as input ## Optional argument: amount of jitter added to the ability parameters (based on a normal distribution with var=jitter) ninspectors<-length(inspectors$Ability) ndefects<-length(defects$Difficulty) #matrix with abilities in rows print(ninspectors) minspector<-matrix(rep(inspectors$Ability,ndefects),ncol=ndefects,nrow=ninspectors) #adding the jitter minspector<-matrix(minspector+rnorm(ndefects*ninspectors,0,jitter),ncol=ndefects,nrow=ninspectors) #Matrix mit zweiter Faehigkeit #msecabil<-matrix(rep(Person$SecAbility,noitem),ncol=noitem,nrow=noabil) #matrix with difficulty in cols mdefect<-t(matrix(rep(defects$Difficulty,ninspectors),ncol=ninspectors,nrow=ndefects)) mlambda<-exp(minspector-mdefect)/(1+exp(minspector-mdefect)) #mlambda dflambda<-as.data.frame(mlambda) rownames(dflambda)<-inspectors$ID colnames(dflambda)<-defects$ID dflambda } ResponseMatrix<-function(dflambda){ ## Runs a series of Bernoulli experiments with the probabilities given by mlambda ## In other words: the response matrix is generated mlambda<-as.matrix(dflambda) mresponse<-matrix(rbinom(length(mlambda),1,mlambda),dim(mlambda)) dfresponse<-as.data.frame(mresponse) colnames(dfresponse)<-colnames(dflambda) rownames(dfresponse)<-rownames(dflambda) dfresponse } CombineMatrices<-function(m1,m2){ ## Combines two matrices where inspectors overlap but defects are distinct ## Yields a combined matrix with structurally incomplete design. ## Missed defects have value NA inspectors<-unique(c(rownames(m1),rownames(m2))) defects<-c(colnames(m1),colnames(m2)) cmatrix<-matrix(nrow=length(inspectors),ncol=length(defects)) rownames(cmatrix)<-inspectors colnames(cmatrix)<-defects for (i in inspectors){ for (d in defects){ if (i %in% rownames(m1) && d %in% colnames(m1)){ cmatrix[i,d]<-m1[i,d] } if (i %in% rownames(m2) && d %in% colnames(m2)) { cmatrix[i,d]<-m2[i,d] } } } cmatrix } SimulateInspectionExperiment<-function(inspectors,defects,methodProfile=NULL,plot=TRUE){ ## Simulates an inspection experiment ## Arguments: ## ability dataframe(ID,Ability) for inspectors ## detectability dataframe(ID,Difficulty) for defects ## method profile, a dataframe(ID,Capability) which expresses the capability of a method to help detecting a certain defect (must have the same length as the ddetectability vector) ## Conducts a Bernoulli experiment for each inspector and defect, where the probability is computed via the Rasch logit formula ## Returns a response matrix ## Plots the response matrix as a scatter plot by default # Length of vectors noabil=length(inspectors$ID) noitem=length(defects$ID) # IN case tzhe method profile was not given as an argument if (is.null(methodProfile)){ methodProfile<-data.frame(ID=defects$ID,Capability=rep.int(0,noitem)) } # Here we compute a matrix with probabilities for each inspector detecting a defect with the method mabil<-matrix(rep(inspectors$Ability,noitem),ncol=noitem,nrow=noabil) #print(mabil) mitem<-t(matrix(rep(defects$Difficulty,noabil),ncol=noabil,nrow=noitem)) #print(mitem) mmethod<-t(matrix(rep(methodProfile$Capability,noabil),ncol=noabil,nrow=noitem)) #print(mmethod) probab<-exp(mabil-mitem+mmethod)/(1+exp(mabil-mitem+mmethod)) sample<-array(rbinom(noabil*noitem,1,probab),c(noabil,noitem)) responseMatrix<-data.frame(sample) colnames(responseMatrix)<-defects$ID rownames(responseMatrix)<-inspectors$ID responseMatrix } SimDemo<-function(){ inspectors<-data.frame(ID=c(1:10),Ability=rnorm(10,0,1)) defects<-data.frame(ID=c(1:10),Difficulty=rnorm(10,0,1)) lm<-LambdaMatrix(inspectors,defects,jitter=0.3) rm<-ResponseMatrix(lm) PlotResponseMatrix(GuttmannSort(rm)) } ################################################################################################################## ########################## H A N D L I N G T H E R M ######################################################### ################################################################################################################## NormalizeResponseMatrix<-function(rm){ ## If multiple events were code in the response matrix (e.g. multiple reportings of the same defects) ## this function normalizes it to contain only [0;1] for (r in rownames(rm)){ for (c in colnames(rm)){ if (rm[r,c]>=1){ rm[r,c]<-1 } else { rm[r,c]<-0 } } } rm } FlattenResponseMatrix<-function(rm,stripped=FALSE){ ## Flattening the initial RespMat into a dataframe with: Person, Defect, Response ## Misses (zeroes) can be stripped off (as a preparation for plotResponseMatrix) flat<-c() # Walk throug each cell in the matrix and append in the vector for (p in rownames(rm)){ for (d in colnames(rm)){ flat<-append(flat,c(p,d,rm[p,d])) } } #make it a matrix with three cols, then a dataframe flat<-data.frame(t(matrix(nrow=3,flat))) colnames(flat)<-c("Inspector","Defect","Response") if (stripped==FALSE){ flat } else{ #strip the misses (zeroes) subset(flat,Response==1) } } GuttmannSort<-function(responseMatrix){ ## Sorts a response matrix by margin sums in rows and columns ## Argument: response matrix ## Return value: response matrix responseMatrix<-responseMatrix[order(apply(responseMatrix,1,sum)),] responseMatrix<-responseMatrix[,order(apply(responseMatrix,2,sum))] #recode the names of cols and rows according to their rank #rownames(responseMatrix)<-c(1:length(rownames(rm))) #colnames(responseMatrix)<-c(1:length(colnames(rm))) responseMatrix } PlotResponseMatrix<-function(responseMatrix,title="Response Matrix"){ ## Plots the response matrix as a scatter plot ## No return value noitems=length(colnames(responseMatrix)) noabil=length(rownames(responseMatrix)) x<-c() y<-c() for (i in 1:noitems){ for (a in 1:noabil){ if (responseMatrix[a,i]==1){ x<-c(x,i) y<-c(y,a) } } } plot(x,y, main=title, xlab="Defects", ylab="Inspectors") } PlotIncompleteResponseMatrix<-function(responseMatrix,title="Response Matrix"){ ## Plots the response matrix as a scatter plot ## This uses a different procedure and can also plot incomplete response matrices (where not every defect was exposed to each inspector) ## No return value frm<-FlattenResponseMatrix(responseMatrix) hits<-frm[which(frm$Response==1),] misses<-frm[which(frm$Response==0),] hits<-type.convert(as.matrix(hits)) misses<-type.convert(as.matrix(misses)) plot(hits[,1],hits[,2], main=title, xlab="Inspectors", ylab="Defects",pch=20,col='Green') points(misses[,1],misses[,2],pch=4, col='Red') } ################################################################################################################## ########################## C L A S S I C A L M E A S U R E S ################################################## ################################################################################################################## Thoroughness<-function(hitmatrix){ ## Computes the thoroughness for each inspector from a hit response matrix vthoroughness<-apply(hitmatrix,2,mean) vthoroughness } Validity<-function(hitmatrix,famatrix){ ## Compute the validity for each inspector from the hit matrix and the false alarms matrix sumreal<-apply(hitmatrix,2,sum) sumpred<-apply(famatrix,2,sum)+sumreal vvalidity<-sumreal/sumpred vvalidity } Effectiveness<-function(hitmatrix,famatrix){ veffect<-Thoroughness(hitmatrix)*Validity(hitmatrix,famatrix) veffect } GroupThoroughness<-function(hitmatrix,filterfunc=(function(vec){ifelse(sum(vec)>0,1,0)})){ ## Computes the thoroughness for the whole group from a hit response matrix ## A filter function can be given, for example to give a threshold when a defect is accepted, this defaults to 1 ghit<-t(apply(hitmatrix,1,filterfunc)) vthoroughness<-mean(ghit) vthoroughness } GroupValidity<-function(hitmatrix,famatrix,filterfunc=(function(vec){ifelse(sum(vec)>0,1,0)})){ ## Compute the validity for each inspector from the hit matrix and the false alarms matrix ## A filter function can be given, for example to give a threshold when a defect is accepted, this defaults to 1 ghit<-t(apply(hitmatrix,1,filterfunc)) gfa<-t(apply(famatrix,1,filterfunc)) sumreal<-apply(ghit,1,sum) sumpred<-apply(gfa,1,sum)+sumreal vvalidity<-sumreal/sumpred vvalidity } GroupEffectiveness<-function(hitmatrix,famatrix,filterfunc=(function(vec){ifelse(sum(vec)>0,1,0)})){ veffect<-GroupThoroughness(hitmatrix,filterfunc)*GroupValidity(hitmatrix,famatrix,filterfunc) veffect } SummarizePerformance<-function(hitmatrix,famatrix){ tho=Thoroughness(hitmatrix) val=Validity(hitmatrix,famatrix) eff=Effectiveness(hitmatrix,famatrix) perf<-data.frame(Mean=c(mean(tho),mean(val),mean(eff)),SD=c(sd(tho),sd(val),sd(eff))) row.names(perf)=c("Thoroughness","Validity","Effectiveness") #perf<-data.frame(Thoroughness=data.frame(Mean=mean(tho),SD=sd(tho)),Validity=data.frame(Mean=mean(val),SD=sd(val)),Effectiveness=data.frame(Mean=mean(eff),SD=sd(eff))) perf } SummarizeGroupPerformance<-function(hitmatrix,famatrix,filterfunc=(function(vec){ifelse(sum(vec)>0,1,0)})){ tho=GroupThoroughness(hitmatrix,filterfunc) val=GroupValidity(hitmatrix,famatrix,filterfunc) eff=GroupEffectiveness(hitmatrix,famatrix,filterfunc) perf<-data.frame(Mean=c(mean(tho),mean(val),mean(eff))) row.names(perf)=c("Thoroughness","Validity","Effectiveness") #perf<-data.frame(Thoroughness=data.frame(Mean=mean(tho),SD=sd(tho)),Validity=data.frame(Mean=mean(val),SD=sd(val)),Effectiveness=data.frame(Mean=mean(eff),SD=sd(eff))) perf } TwoGroupCombn<-function(names1,names2,size1,size2){ ## Given two groups (e.g. inspection conditions) this function assembles all possible group mixtures ## with a given subset size for each group. ## Use this to simulate all possible group mixtures for inspection exp with two conditions permg1<-combn(names1,size1) permg2<-combn(names2,size2) result=c() for (c1 in c(1:ncol(permg1))){ for (c2 in c(1:ncol(permg2))){ result=rbind(c(permg1[,c1],permg2[,c2]),result) } } t(result) } SimGroupPerformance<-function(hitmatrix,famatrix,filterfunc,SampleMatrix){ result=c() for (group in c(1:ncol(SampleMatrix))){ result=cbind(result,as.matrix(SummarizeGroupPerformance(hitmatrix[,SampleMatrix[,group]],famatrix[,SampleMatrix[,group]],filterfunc))) } result } MeasureDemo<-function(HitUPI,FalseUPI,HitHE,FalseHE){ ## This is from the analysis of Sabines experiment for HCI 2007 print("UPI unfiltered") SummarizeGroupPerformance(HitUPI,FalseUPI,(function(vec){ifelse(sum(vec)>0,1,0)})) print("HE unfiltered") SummarizeGroupPerformance(HitHE,FalseHE,(function(vec){ifelse(sum(vec)>0,1,0)})) print("UPI filtered") SummarizeGroupPerformance(HitUPI,FalseUPI,(function(vec){ifelse(sum(vec)>1,1,0)})) print("HE filtered") SummarizeGroupPerformance(HitHE,FalseHE,(function(vec){ifelse(sum(vec)>1,1,0)})) print("Both unfiltered") SummarizeGroupPerformance(cbind(HitUPI,HitHE),cbind(FalseUPI,FalseHE),(function(vec){ifelse(sum(vec)>0,1,0)})) print("Both filtered") SummarizeGroupPerformance(cbind(HitUPI,HitHE),cbind(FalseUPI,FalseHE),(function(vec){ifelse(sum(vec)>1,1,0)})) HEGroups<-as.matrix(combn(colnames(FalseHE),4)) UPIGroups<-as.matrix(combn(colnames(FalseUPI),4)) MixedGroups1<-as.matrix(TwoGroupCombn(colnames(FalseHE),colnames(FalseUPI),3,1)) MixedGroups2<-as.matrix(TwoGroupCombn(colnames(FalseHE),colnames(FalseUPI),1,3)) MixedGroups3<-as.matrix(TwoGroupCombn(colnames(FalseHE),colnames(FalseUPI),2,2)) MixedGroups<-cbind(MixedGroups1,MixedGroups2,MixedGroups3) GroupingSimulation=list( HE_Mean=apply(SimGroupPerformance(cbind(HitUPI,HitHE),cbind(FalseUPI,FalseHE),(function(vec){ifelse(sum(vec)>0,1,0)}),HEGroups),1,mean), HE_SD=apply(SimGroupPerformance(cbind(HitUPI,HitHE),cbind(FalseUPI,FalseHE),(function(vec){ifelse(sum(vec)>0,1,0)}),HEGroups),1,sd), UPI_Mean=apply(SimGroupPerformance(cbind(HitUPI,HitHE),cbind(FalseUPI,FalseHE),(function(vec){ifelse(sum(vec)>0,1,0)}),UPIGroups),1,mean), UPI_SD=apply(SimGroupPerformance(cbind(HitUPI,HitHE),cbind(FalseUPI,FalseHE),(function(vec){ifelse(sum(vec)>0,1,0)}),UPIGroups),1,sd), Mix_Mean=apply(SimGroupPerformance(cbind(HitUPI,HitHE),cbind(FalseUPI,FalseHE),(function(vec){ifelse(sum(vec)>0,1,0)}),MixedGroups),1,mean), Mix_SD=apply(SimGroupPerformance(cbind(HitUPI,HitHE),cbind(FalseUPI,FalseHE),(function(vec){ifelse(sum(vec)>0,1,0)}),MixedGroups),1,sd) ) GroupingSimulation_filtered=list( HE_Mean=apply(SimGroupPerformance(cbind(HitUPI,HitHE),cbind(FalseUPI,FalseHE),(function(vec){ifelse(sum(vec)>1,1,0)}),HEGroups),1,mean), HE_SD=apply(SimGroupPerformance(cbind(HitUPI,HitHE),cbind(FalseUPI,FalseHE),(function(vec){ifelse(sum(vec)>1,1,0)}),HEGroups),1,sd), UPI_Mean=apply(SimGroupPerformance(cbind(HitUPI,HitHE),cbind(FalseUPI,FalseHE),(function(vec){ifelse(sum(vec)>1,1,0)}),UPIGroups),1,mean), UPI_SD=apply(SimGroupPerformance(cbind(HitUPI,HitHE),cbind(FalseUPI,FalseHE),(function(vec){ifelse(sum(vec)>1,1,0)}),UPIGroups),1,sd), Mix_Mean=apply(SimGroupPerformance(cbind(HitUPI,HitHE),cbind(FalseUPI,FalseHE),(function(vec){ifelse(sum(vec)>1,1,0)}),MixedGroups),1,mean), Mix_SD=apply(SimGroupPerformance(cbind(HitUPI,HitHE),cbind(FalseUPI,FalseHE),(function(vec){ifelse(sum(vec)>1,1,0)}),MixedGroups),1,sd) ) } ################################################################################################################## ########################## M I S C ############################################################################### ################################################################################################################## randomSelect<-function(pop,size) pop[ round(runif(size,1,length(pop))) ] pdiminish<-function(x,p){ 1-(1-p)^x } ddiminish<-function(x,p){ #pdiminish(x,p)-pdiminish(x-1,p) p*(1-p)^(x-1) } dhetdiminish<-function(p){ ## Die Wahrscheinlichkeit das das letzte Ergebnis positiv ist, nachdem alle vorherigen negativ waren prod(1-p[1:(length(p)-1)])*p[length(p)] }