
# A library of common functionality for various plotting and statistics scripts 

# for rollapply, needs to be installed first by packages.install("zoo")
require (zoo)
# for frameApply, needs to be installed first by packages.install("gplots")
require (gplots)
# for mclapply when processing multiple generated architectures,
# needs to be installed first by packages.install("multicore")
require (multicore)

library("boot")

############################################################################
# Constants
############################################################################

nanoseconds_in_second <- 1000000000


############################################################################
# Utility functions
############################################################################

# Calculate mean throughput from a vector of 'request processing finished' timestamps 
meanThrput <- function (vEndTimes) {
  # the first and last timestamp give us a time window, during which N-1 requests were finished
  return ((length(vEndTimes) - 1 ) / ((max (vEndTimes) - min (vEndTimes))))
}

# Calculate bootstrap 0.95 confidence interval of mean of given vector of values
# @return Vector of (lo, hi) values
meanBCI <- function (vData) {
  bootMean <- boot (vData, function(x,i) { mean(x[i]) }, R=1000)
  vCI <- boot.ci (bootMean, type="basic", conf=0.95)$basic
  return (c (vCI [4], vCI [5]))
}

# try to replace the given file name with a .bz2 suffixed variant, if it exists
# otherwise return the unmodified file name
# Note: if both exist, the .bz2 variant is returned regardless of which is newer!
findCompressedFileVariant <- function (sFile) {
  sFileBz <- paste (sFile, "bz2", sep=".")
  if (file.exists (sFileBz)) {
    return (sFileBz)
  } else {
    return (sFile)
  }
}

liesInInterval <- function (fWhat, fLo, fHi) {
  return (fWhat >= fLo & fWhat <= fHi)
}

intervalsIntersect <- function (fLo1, fHi1, fLo2, fHi2) {
  return (!(fHi1 < fLo2 | fHi2 < fLo1))
}

############################################################################
# SimQPN output processing functions
############################################################################

# Read SimQPN output file pre-parsed by simqpn-output-parser.awk
readSimQPNOutput <- function (sFile) {
  sFile <- findCompressedFileVariant (sFile)
  tOutput <- read.table (sFile, col.names=c ("placeType", "placeName", "color", "statsType", "statsValue"))
  return (tOutput)
}

# Select a value from SimQPN output, for a given place, color and statsType
selectSimQPNValue <- function (tSimOutput, sPlaceType, sPlaceName, sColor, sStatsType) {
  fValue <- tSimOutput[tSimOutput$placeType == sPlaceType & tSimOutput$placeName == sPlaceName & tSimOutput$color == sColor & tSimOutput$statsType == sStatsType, ]$statsValue
  return (fValue)
}

# Get mean throughput of finished request processing from SimQPN
getSimMeanThroughput <- function (tSimOutput) {
  # throughput of requests processing finish
  fRequestsFinishTput <- selectSimQPNValue (tSimOutput, "Queue", "clients", "Color0", "arrivThrPut")
  return (fRequestsFinishTput)
}

# Calculate mean client population in the system, optionally restricted to clients that are being serviced by a thread
# Clients waiting for a thread are not counted if the parameter bIncludeThreadpool is FALSE
# Because the total client population is not part of SimQPN output, we need to supply it (from app output)
getSimMeanClientPop <- function (tSimOutput, iClientTotalPop, bIncludeThreadpool=TRUE) {
  # population of clients in the clients queue
  fClientQueuePop <- selectSimQPNValue (tSimOutput, "Queue", "clients", "Color0", "meanTkPop")

  # population of clients in the clients depository - these are waiting for a thread from the pool
  if (bIncludeThreadpool) {
    # if we want to include the clients waiting in thread pool, we set them to zero as this value is subtracted later
    fClientDepoPop <- 0
  } else {
    fClientDepoPop <- selectSimQPNValue (tSimOutput, "Depository", "clients", "Color0", "meanTkPop")
  }

  # population of clients whose requests are being processed is derived from above
  fRequestPop <- iClientTotalPop - (fClientQueuePop + fClientDepoPop)

  return (fRequestPop)
}

# Get mean response time of request processing in SimQPN, derived from mean throughput and client population
# Because the total client population is not part of SimQPN output, we need to supply it
# @param bThreadOnly - get thread response time, not client (don't include waiting for thread in thread pool)
getSimMeanResponse <- function (tSimOutput, iClientTotalPop, bThreadResponse=FALSE) {
  # throughput of incoming requests
  fRequestsBeginTput <- selectSimQPNValue (tSimOutput, "Place", "requests", "Color0", "arrivThrPut")

  # population of clients whose requests are being processed
  fRequestPop <- getSimMeanClientPop (tSimOutput, iClientTotalPop, !bThreadResponse)

  # mean response time according to Little's Law
  fResponseTime <- fRequestPop / fRequestsBeginTput

  return (fResponseTime)
}

getSimTotalProcessorUtilization <- function (tSimOutput) {
  return (sum (tSimOutput [tSimOutput$placeType == "Queue" & tSimOutput$placeName == "CPU" & tSimOutput$statsType == "meanTkPop", ]$statsValue))
}

getSimModuleClassProcessorUtilization <- function (tSimOutput, tModuleCPUMap) {
  vClasses <- levels (tModuleCPUMap$classname)
  tCPUUtils <- tSimOutput [tSimOutput$placeType == "Queue" & tSimOutput$placeName == "CPU" & tSimOutput$statsType == "meanTkPop", ]
  vModuleCPUUtils <- vector (mode="numeric")
  for (sClass in vClasses) {
    sColors <- as.vector (tModuleCPUMap [tModuleCPUMap$classname == sClass, "color"])
    fModuleCPUUtil <- sum (tCPUUtils [tCPUUtils$color %in% sColors,]$statsValue)
    vModuleCPUUtils[[sClass]] <- fModuleCPUUtil
  }
  return (vModuleCPUUtils)
}

############################################################################
# PCM output processing functions
############################################################################

# Read the PCM output (file with two columns separated by semi-colon)
# Columns are client finish timestamp and client service times
# An extra column with client start timestamp is calculated from these
readPCMOutput <- function (sFile) {
  sFile <- findCompressedFileVariant (sFile)
  tPCM <- read.csv2 (sFile, dec=".", col.names=c ("client_end", "client_time"), colClasses = c ("numeric"))
  tPCM <- transform(tPCM, client_start = client_end - client_time)
  return (tPCM)
}

############################################################################
# Misc files (module-classes and module-cpumap) output processing functions
############################################################################

# Read module names and classes as a table, from a file with id;name;classname format
readModuleClassFile <- function (sFile) {
  sFile <- findCompressedFileVariant (sFile)
  tModules <- read.table (sFile, sep=";", header=TRUE)
  return (tModules)
}

# Read file with mapping SimQPN colors on CPU place to module id and classname.
# Note that modules can repeat if called from multiple places.
readModuleCPUMapFile <- function (sFile) {
  sFile <- findCompressedFileVariant (sFile)
  tModuleCPUMap <- read.table (sFile, sep=";", col.names=c ("color", "id", "classname"))
  return (tModuleCPUMap)
}

# Get id's of CPU modules (e.g. those that have measured isolated times) from
# the table of CPU place color mapping.
#
# Relying on measured isolated times is not reliable anymore, as architectural
# modules can be measured in isolation as well).
getCPUModules <- function (tModuleCPUMap) {
  return (levels (tModuleCPUMap$id))
}

# Count CPU modules (e.g. those that have measured isolated times) from
# the table of CPU place color mapping.
#
# Relying on measured isolated times is not reliable anymore, as architectural
# modules can be measured in isolation as well).
countCPUModules <- function (tModuleCPUMap) {
  iCount <- length (getCPUModules (tModuleCPUMap))
  return (iCount)
}

# Extract set (vector) of unique module classes from a tModules table
getUniqueModuleClasses <- function (tModules) {
  return (levels (tModules$classname))
}

############################################################################
# Misc output processing functions
############################################################################

readPfmonOutput <- function (sFilePrefix = "pfmon.out", vCores = 0:7) {
    
    tResults <- NULL
    for (iCore in vCores) {
      sFile <- findCompressedFileVariant (paste (sFilePrefix, iCore, sep = ".cpu"))
      tCore <- read.table (sFile, col.names = c ("core", "events", "counter"))
      # some counters are present in multiple event sets, calculate mean value of these
      tCore <- frameApply (tCore, by = "counter", on = "events", fun = mean)
      # the core column got lost by that, put it back
      tCore [["core"]] <- as.factor (iCore)

      # append this table to the table for previous cores, if exists
      if (is.null (tResults)) {
	tResults <- tCore
      } else {
	tResults <- rbind (tResults, tCore)
      }
    }

    # sum events up over all cores
    tResults <- frameApply (tResults, by = "counter", on = "events", fun = sum)
    # rename column 'res' to 'events'
    tResults <- transform (tResults, events = res, res = NULL)
    # order by counter
    tResults <- tResults [order (tResults$counter), ]

    return (tResults)
}

readInfo <- function (sFile) {
    tInfo <- read.table (sFile, col.names = c ("name", "value"), sep=";")
    return (tInfo)
}

############################################################################
# App (rpg-generated) output processing functions
############################################################################

# Read output of RPG-generated main (e,g, main.out)
readAppOutput <- function (sFile) {
  sFile <- findCompressedFileVariant (sFile)
  tAppOutput <- read.table (sFile, sep=";", col.names=c ("moduleName", "measurementContext", "measurementType", "measuredValue"))
  return (tAppOutput)
}

# Get names of modules with isolated module measurements (i.e. typically only the worker modules)
getModulesWithIsolatedMeasurements <- function (tAppOutput) {
  tAppOutput <- tAppOutput[tAppOutput$measurementContext == "isolated" & tAppOutput$measurementType == "monotonic" & tAppOutput$moduleName != "app", ]
  return (levels (factor (tAppOutput$moduleName)))
}

# Filter the most useful module measurements (isolated monotonic, shared threadtime, optionally shared monotonic)
# Only worker modules (those that have isolated monotonic measurements) are considered
filterModuleMeasurements <- function (tAppOutput, bMonotonic) {
  vModules <- getModulesWithIsolatedMeasurements (tAppOutput)
  # filter out non-module data
  if (bMonotonic == TRUE) {
    tAppOutput <- tAppOutput[((tAppOutput$measurementContext == "isolated" & tAppOutput$measurementType == "monotonic")
			      | (tAppOutput$measurementContext == "shared" & tAppOutput$measurementType == "threadtime")
			      | (tAppOutput$measurementContext == "shared" & tAppOutput$measurementType == "monotonic"))
			     & tAppOutput$moduleName %in% vModules, ]
  } else {
    tAppOutput <- tAppOutput[((tAppOutput$measurementContext == "isolated" & tAppOutput$measurementType == "monotonic")
			      | (tAppOutput$measurementContext == "shared" & tAppOutput$measurementType == "threadtime"))
			     & tAppOutput$moduleName %in% vModules, ]
  }

  # paste context and type columns to make things simpler
  tAppOutput <- transform (tAppOutput, measurementType = paste(measurementContext, measurementType, sep="_"))
  # this apparently cannot be done in the previous call (breaks stuff)
  tAppOutput <- transform (tAppOutput, measurementContext = NULL)
  # due to the pasting, the column has lost its factor status
  tAppOutput$measurementType <- factor (tAppOutput$measurementType)

  # some renaming to make things shorter
  if (bMonotonic == TRUE) {
    levels(tAppOutput$measurementType) <- list(iso="isolated_monotonic", thr="shared_threadtime", mon="shared_monotonic")
  } else {
    levels(tAppOutput$measurementType) <- list(iso="isolated_monotonic", thr="shared_threadtime")
  }
  
  return (tAppOutput)
}

# Filter just isolated monotonic module measurements 
filterIsolatedModuleMeasurements <- function (tAppOutput) {
  tAppOutput <- tAppOutput[tAppOutput$measurementContext == "isolated" & tAppOutput$measurementType == "monotonic" & tAppOutput$moduleName != "app", ]
 
  return (tAppOutput)
}

# Filter just shared threadtime module measurements 
filterSharedThreadtimeModuleMeasurements <- function (tAppOutput) {
  tAppOutput <- tAppOutput[tAppOutput$measurementContext == "shared" & tAppOutput$measurementType == "threadtime" & tAppOutput$moduleName != "app", ]
 
  return (tAppOutput)
}

# Filter isolated monotonic measurements of leaf CPU consuming modules
filterIsolatedCPUModuleMeasurements <- function (tAppOutput, tModuleCPUMap) {
  tModules <- getCPUModules (tModuleCPUMap)
  
  tOut <- filterIsolatedModuleMeasurements (tAppOutput)
  tOut <- tOut [tOut$moduleName %in% tModules,]
  return (tOut)
}

# Filter app measurements
filterAppMeasurements <- function (tAppOutput) {
  tAppOutput <- tAppOutput[tAppOutput$moduleName == "app" & tAppOutput$measurementContext == "shared", ]
  return (tAppOutput)
}

# Select a value from App output, for a given source, context and type
selectAppValue <- function (tAppOutput, sModule, sContext, sType) {
  fValue <- tAppOutput[tAppOutput$moduleName == sModule & tAppOutput$measurementContext == sContext & tAppOutput$measurementType == sType, ]$measuredValue
  return (fValue)
}

# Convert app;shared;monotonic* measurements into data frame with multiple columns for thread time and client start/end/time.
# Sorted by the client start time
getAppTimes <- function (tAppOutput) {
  tApp <- filterAppMeasurements (tAppOutput)
  vThreadTimes <- as.numeric(tApp[tApp$measurementType == "monotonic", ]$measuredValue) / nanoseconds_in_second
  vClientEndTimes <- as.numeric(tApp[tApp$measurementType == "monotonic_end", ]$measuredValue) / nanoseconds_in_second
  vClientTimes <- as.numeric(tApp[tApp$measurementType == "monotonic_client", ]$measuredValue) / nanoseconds_in_second
  tAppTimes <- data.frame (client_time = vClientTimes, client_end = vClientEndTimes, thread_time = vThreadTimes)
  tAppTimes <- transform (tAppTimes, client_start = client_end - client_time)

  tAppTimes <- tAppTimes [order (tAppTimes$client_start), ]

  return (tAppTimes)
}

# Normalize the module times to a single scale around 1
normalizeModuleTimes <- function (tAppOutput) {
  for (sModule in levels(as.factor(tAppOutput$moduleName))) {
    fMeanIsolatedTime <- mean(tAppOutput[tAppOutput$moduleName == sModule & tAppOutput$measurementType == "iso", "measuredValue"])
    tAppOutput[tAppOutput$moduleName == sModule, "measuredValue"] <- tAppOutput[tAppOutput$moduleName == sModule, "measuredValue"] / fMeanIsolatedTime
  }
  
  return (tAppOutput)
}

# Apply an aggregate function on module times
applyModuleTimes <- function (tAppOutput, fun) {
  frameApply(tAppOutput, by=c("moduleName","measurementType"), on=c("measuredValue"), fun=fun, simplify=TRUE)
}

# Get total population of clients from app output
getAppClients <- function (tAppOut) {
  fClientTotalPop <- selectAppValue (tAppOutput, "app", "config", "client-count")
  return (fClientTotalPop)
}

# Calculate mean isolated time of module names (not id's)
getModuleNameIsolatedTimes <- function (tAppOutput, tModules) {
  tAppOutput <- filterIsolatedModuleMeasurements (tAppOutput)
  tMerged <- merge(tModules, tAppOutput, by.x=c("id"), by.y=c("moduleName"))
  tResult <- frameApply(tMerged, on="measuredValue", by=c("name"), fun=mean)
  return (tResult)
}

# Print expected module durations (in microseconds) in the format of a rpg config file
printModuleExpectedDurations <- function (tResults, file) {
  for (i in levels(tResults$name)) { 
    sLine <- sprintf("%s::expected-duration = %d", i, as.integer(tResults[tResults$name == i,]$measuredValue / 1000))
    write (sLine, file = file, append=TRUE)
  }
}

# Process the frame of isolated module times to calculate mean, sd, etc. per module (in seconds)
processModuleIsolatedTimes <- function (tModuleIsolatedOutput) {
  process <- function (x) {
    vTimes <- x$measuredValue
    out <- c (mean = mean (vTimes) / nanoseconds_in_second, sd = sd (vTimes) / nanoseconds_in_second)
  }
  frameApply (tModuleIsolatedOutput, by="moduleName", on="measuredValue", fun=process)
}

# Process the frame of shared thread module times to calculate mean, sd, etc. per module (in seconds)
# Fallback to isolated times where shared times are not available
processModuleSharedTimes <- function (tModuleSharedOutput, tModuleIsolatedOutput) {
  process <- function (x) {
    vTimes <- x$measuredValue
    out <- c (mean = mean (vTimes) / nanoseconds_in_second, sd = sd (vTimes) / nanoseconds_in_second)
  }

  # first, create a merged table with both isolated and shared times
  tIso <- frameApply (tModuleIsolatedOutput, by="moduleName", on="measuredValue", fun=process)
  tShared <- frameApply (tModuleSharedOutput, by="moduleName", on="measuredValue", fun=process)

  # missing shared values will have NA's
  tMerged <- merge (tIso, tShared, by="moduleName", all.x=TRUE, suffixes=c("", ".shr"))
  
  # replaces isolated times with shared times where the latter is not NA
  mergeColumns <- function (vIso, vShared) {
    # replace NA with 0, because FALSE * NA = NA
    vSharedZ <- vShared
    vSharedZ [is.na(vShared)] <- 0
    return (is.na(vShared) * vIso + (!is.na(vShared)) * vSharedZ)
  }

  tMerged <- transform (tMerged, mean = mergeColumns (mean, mean.shr), sd = mergeColumns (sd, sd.shr), mean.shr = NULL, sd.shr=NULL)

  return (tMerged)
}


# Process the app output to calculate mean, sd, etc. per worker module (in seconds), with some extra info
createModuleSummary <- function (tAppOutput, tModuleCPUMap, tSimOutput) {
  tModuleIsolatedOutput <- filterIsolatedCPUModuleMeasurements (tAppOutput, tModuleCPUMap)

  process <- function (x) {
    vValues <- x$measuredValue
    vMeanCI <- meanBCI (vValues) / nanoseconds_in_second
    out <- c (mean = mean (vValues) / nanoseconds_in_second, sd = sd (vValues) / nanoseconds_in_second, ciLo = vMeanCI [1], ciHi = vMeanCI [2])
  }

  tMod <- frameApply (tModuleIsolatedOutput, by="moduleName", on="measuredValue", fun=process)

  # prepare the CPU utilizations
  tCPUUtils <- tSimOutput [tSimOutput$placeType == "Queue" & tSimOutput$placeName == "CPU" & tSimOutput$statsType == "meanTkPop", ]
  tCPUUtils <- transform (subset (tCPUUtils, select=c("color", "statsValue")), cpuUtil=statsValue, statsValue=NULL)

  # add them to the Module CPU Map
  tCPUUtils <- merge (tModuleCPUMap, tCPUUtils, by="color")

  # sum up utilizations belonging to the same module but of different color (= module can get called on multiple paths in the hierarchy)
  tCPUUtils <- transform (frameApply (tCPUUtils, by="id", on="cpuUtil", fun=sum), cpuUtil=res, res=NULL)
  
  # get map of id to classname
  tClasses <- frameApply(tModuleCPUMap, by="id", on="classname", fun=unique)

  # append the classnames to utilizations
  tCPUUtils <- merge(tCPUUtils, tClasses, by="id")

  # merge the map with utilizations with the measured times
  tMod <- merge (tMod, tCPUUtils, by.x="moduleName", by.y="id")
  return (tMod)
}

############################################################################
# Summaries across many architrectures
############################################################################

getRpgDirs <- function () {
  return (dir (pattern="^rpg\\..*"))
}

# get vector of rpg.* directories, restricted to those that contain given
# file (such as simulation results when not all directories contain it)
getRpgDirsContaining <- function (filename) {
  files <- Sys.glob (paste ("rpg.*", filename, sep="/"))
  dirs <- sapply (files, dirname, USE.NAMES=FALSE)
  return (dirs)
}

# Summary of one architecture, separate function needed for parallel processing
# @see getStatsAcrossArchitectures for parameter descriptions
getArchitectureStats <- function (sDir, sOutputSuffix="", sMainSubDir=".", bSimShr=FALSE, bSimExp=FALSE, bPCM=TRUE, sPCMSubDir=".", bPfmon=FALSE) {

  print(sDir)

  tAppOutput <- readAppOutput (paste (sDir, "/", sMainSubDir, "/main", sOutputSuffix, ".out", sep=""))

  iClientCount <- selectAppValue (tAppOutput, "app", "config", "client-count")
  iThreadCount <- selectAppValue (tAppOutput, "app", "config", "thread-count")

  tSimOutput <- readSimQPNOutput (paste (sDir, "/", sMainSubDir, "/sim", sOutputSuffix, ".out", sep=""))

  if (bSimExp) {
    tSimExpOutput <- readSimQPNOutput (paste (sDir, "/", sMainSubDir, "/sim", sOutputSuffix, "-exp.out", sep=""))
  }

  if (bSimShr) {
    tSimShrOutput <- readSimQPNOutput (paste (sDir, "/", sMainSubDir, "/sim", sOutputSuffix, "-shr.out", sep=""))
  }
  
  fSimRespMean <- getSimMeanResponse (tSimOutput, iClientCount)
  fSimTputMean <- getSimMeanThroughput (tSimOutput)
    
  fSimClientPop <- getSimMeanClientPop (tSimOutput, iClientCount)
  fSimCPUUtil <- getSimTotalProcessorUtilization (tSimOutput)

  if (bSimExp) {
    fSimExpRespMean <- getSimMeanResponse (tSimExpOutput, iClientCount)
    fSimExpTputMean <- getSimMeanThroughput (tSimExpOutput)
  }

  if (bSimShr) {
    fSimShrRespMean <- getSimMeanResponse (tSimShrOutput, iClientCount)
    fSimShrTputMean <- getSimMeanThroughput (tSimShrOutput)
  }

  tAppTimes <- getAppTimes (tAppOutput)

  if (bPCM) {
    sPCMFile = paste ("main", sOutputSuffix, ".csv", sep="")
    # TODO: better warmup criterion? 
    # this assumes there are 10k results
    tPCM <- readPCMOutput (paste (sDir, sMainSubDir, sPCMSubDir, sPCMFile, sep="/"))[-(1:1000),]
    fPCMRespMean <- mean (tPCM$client_time)
    fPCMRespSd <- sd (tPCM$client_time)
    fPCMTputMean <- meanThrput (tPCM$client_end)
    # TODO: use batch means?
    vPCMRespCI <- meanBCI (tPCM$client_time)
    # sometimes the results are so uniform that it cannot calculate CI
    # in this case we set the bounds to the mean
    if (is.null (vPCMRespCI)) {
      fPCMRespCILo <- fPCMRespMean
      fPCMRespCIHi <- fPCMRespMean
    } else {
      fPCMRespCILo <- vPCMRespCI [1]
      fPCMRespCIHi <- vPCMRespCI [2]
    }
  }

  iSkipClients = max (iClientCount, iThreadCount)
  vAppClientTime <- tAppTimes$client_time [-(1:iSkipClients)]

  vAppThreadTime <- tAppTimes$thread_time [-(1:iThreadCount)]
  fAppThrRespMean <- mean (vAppThreadTime)
  fAppThrRespSd <- sd (vAppThreadTime)

  fAppRespMean <- mean (vAppClientTime)
  fAppRespSd <- sd (vAppClientTime)
  iAppNumSamples <- length (vAppClientTime)

  fAppRespCILo <- NA
  fAppRespCIHi <- NA
  if (iAppNumSamples > 50) {
     vAppRespCI <- meanBCI (vAppClientTime)
     fAppRespCILo <- vAppRespCI [1]
     fAppRespCIHi <- vAppRespCI [2]
  }

  vAppClientEnd <- tAppTimes$client_end [-(1:iThreadCount)]
  fAppTputMean <- meanThrput (vAppClientEnd)

  tModules <- readModuleClassFile (paste (sDir, "module-classes.out", sep="/"))
#   vModuleClasses <- getUniqueModuleClasses (tModules)
  tModuleClasses <- frameApply (tModules, on="id", by="classname", fun=function(xi) c(count=nrow(xi)))

  tModuleCPUMap <- readModuleCPUMapFile (paste (sDir, "module-cpumap.out", sep="/"))
  vModuleClassCPUUtils <- getSimModuleClassProcessorUtilization (tSimOutput, tModuleCPUMap)
  iAllModulesCount <- length (tModules$id)

  tWorkerModuleSummary <- createModuleSummary (tAppOutput, tModuleCPUMap, tSimOutput)

  iModulesCount <- countCPUModules (tModuleCPUMap)
  
  # read various preprocessed info about the architecture
  tInfo <- readInfo (paste (sDir, "/info.out", sep=""))

  iMaxDepth <- tInfo[tInfo$name == "maxDepth", ]$value

  fMeanDepth <- tInfo[tInfo$name == "meanDepth", ]$value

  # merge all simple values to a data frame
  tResults <- data.frame (dir=sDir, workerModCount=iModulesCount, archClientCount = iClientCount, archThreadCount = iThreadCount,
    allModCount=iAllModulesCount, archMaxDepth = iMaxDepth, archMeanDepth = fMeanDepth,
    simClientPop=fSimClientPop, simCpuUtil=fSimCPUUtil,
    simRespMean=fSimRespMean, simTputMean=fSimTputMean,
    appRespMean=fAppRespMean, appRespSd=fAppRespSd, appTputMean=fAppTputMean, appSamplesCount=iAppNumSamples,
    appRespCILo=fAppRespCILo, appRespCIHi=fAppRespCIHi,
    appThrRespMean=fAppThrRespMean, appThrRespSd=fAppThrRespSd)

  if (bPCM) {
    tResults <- cbind (tResults, pcmRespMean=fPCMRespMean, pcmRespSd=fPCMRespSd, pcmTputMean=fPCMTputMean,
      pcmRespCILo=fPCMRespCILo, pcmRespCIHi=fPCMRespCIHi)
  }

  if (bSimExp) {
    tResults <- cbind (tResults, simExpRespMean=fSimExpRespMean, simExpTputMean=fSimExpTputMean)
  }

  if (bSimShr) {
    tResults <- cbind (tResults, simShrRespMean=fSimShrRespMean, simShrTputMean=fSimShrTputMean)
  }

  if (bPfmon) {
    # read pfmon results
    tPf <- readPfmonOutput (sFilePrefix = paste (sDir, "/", sMainSubDir, "/pfmon", sOutputSuffix, ".out", sep=""))
    # convert it to a named vector
    vPf <- tPf$events
    names (vPf) <- tPf$counter
    # append to results
    tResults <- cbind (tResults, t(vPf))
  }

  # results that are not a simple value per directory have to be returned in separate lists or vectors,
  # all joined together with the simple values vector into one list that can be returned
  lRet <- list (main=tResults, modClasses=tModuleClasses, modCPUUtils=vModuleClassCPUUtils, modSummary=tWorkerModuleSummary)

  return (lRet)
}

# wrapper for GetArchitectureStats that converts number of clients to suffix, so it can be used in lapply
getArchitectureStatsForClients <- function (lDirsClients, ...) {
  sOutputSuffix <- paste ("-clients-", lDirsClients$clients, sep="")

  return (getArchitectureStats (lDirsClients$dir, sOutputSuffix = sOutputSuffix, ...))
}

# Statistics over many architectures
# @param sRootDir - path to root dir with architectures (default: current directory)
# @param vDirs - list of directories with architectures (default: all rpg.* )
# @param sOutputSuffix - optional suffix of input files (default: none)
# @param bSimShr - process also output of simulation using shared measurements for operations (default: false)
# @param bSimExp - process also output of simulation using exponential service times of operations (default: false)
# @param bPCM - process PCM output (default: TRUE)
# @param sPCMSubDir - subdirectory containing PCM output (default: root)
# @param bClients - process results with multiple clients
getStatsAcrossArchitectures <- function (sRootDir=NULL, vDirs=NULL, sOutputSuffix="", sMainSubDir=".", bSimShr=FALSE, bSimExp=FALSE, bPCM=TRUE, sPCMSubDir=".", bPfmon=FALSE, bClients=FALSE) {
  
  if (!is.null (sRootDir)) {
    sOldRoot=getwd()
    setwd (sRootDir)
  }

  if (is.null (vDirs)) {
    vDirs <- getRpgDirs ()
  }

  iDirs <- length (vDirs)

  if (iDirs < 1) {
    print ("no dirs found")
    return (NA)
  }

  if (!bClients) {
    # get all the results in parallel in a raw form
    lRawResults <- mclapply (vDirs, getArchitectureStats, sOutputSuffix=sOutputSuffix, sMainSubDir=sMainSubDir, bSimShr=bSimShr, bSimExp=bSimExp, bPCM=bPCM, sPCMSubDir=sPCMSubDir, bPfmon=bPfmon)
  } else {
    lDirsClients <- list ()
    for (sDir in vDirs) {
      vClients <- readClients (paste (sDir, sMainSubDir, sep="/"))
      for (iClient in vClients) {
	lDirsClients <- c (lDirsClients, list (list (dir=sDir, clients=iClient)))
      }
    }
    lRawResults <- mclapply (lDirsClients, getArchitectureStatsForClients, sMainSubDir=sMainSubDir, bSimShr=bSimShr, bSimExp=bSimExp, bPCM=bPCM, sPCMSubDir=sPCMSubDir, bPfmon=bPfmon)
  }
     
  # merge all vectors of simple values to a data frame for better handling

  # this is not so nice, could it be done better? the following attempt kills the data types:
  #   tResults <- data.frame (t (sapply(lRawResults, function (lDir) { lDir$main })))
  iRows <- length (lRawResults)
  tResults <- lRawResults[[1]]$main
  if (iRows > 1) {
    for (iRow in 2:iRows) {
      tResults <- rbind (tResults, lRawResults[[iRow]]$main)
    }
  }

  lRet <- list (main=tResults)

  # results that are not a simple value per directory have to be returned in separate lists or vectors,
  # indexed by the number of the architecture
  # all these are joined together with the data frame in one list that can be returned
  for (sName in names (lRawResults[[1]])) {
    if (sName != "main") {
      lRet [[sName]] <- lapply (lRawResults, function (lDir) { lDir[[sName]] })
    }
  }

  if (!is.null (sRootDir)) {
    setwd (sOldRoot)
  }

  return (lRet)
}

# filter results where the number of samples from the app measurements is high enough
filterStatsWithEnoughAppSamples <- function (lStats, iMinSamples) {

  fnFilter <- function (x) {
    if (is.data.frame(x)) {
      return (x[lStats$main$appSamplesCount > iMinSamples,]) 
    } else {
      return (x[lStats$main$appSamplesCount > iMinSamples])
    }
  }

  lStatsFiltered <- lapply (lStats, fnFilter)
  return (lStatsFiltered)
}

getWeightedModuleTimingCOV <- function (lStats) {
  iDirs <- length(lStats$main$dir)
  
  vWeightedCOV <- vector (mode="numeric", length=iDirs)

  for (iDir in 1:iDirs) {
    modSum <- lStats$modSummary[[iDir]]
    vWeightedCOV [iDir] <- sum (modSum$sd / modSum$mean * modSum$cpuUtil)
  }

  return (vWeightedCOV)
}

# quick and dirty check if isolated measurements changed significantly between two runs
compareIsolatedModuleMeasurements <- function (lStatsOld, lStatsNew) {
  vDirs <- lStatsOld$main$dir

  iDir <- 1
  for (sDir in vDirs) {

    print (sDir)

    tModOld <- lStatsOld$modSummary [[iDir]]    
    tModNew <- lStatsNew$modSummary [[iDir]]

    vModules = levels (tModOld$moduleName)

    for (sMod in vModules) {
      iOld <- tModOld [tModOld$moduleName == sMod, "mean.measuredValue"]
      iNew <- tModNew [tModOld$moduleName == sMod, "mean.measuredValue"]

      if (iOld / iNew < 0.95 || iOld / iNew > 1.05) {
	sClass <- tModOld [tModOld$moduleName == sMod, "classname"]
	print (paste (sMod, sClass, iOld / iNew))
      }
    }

    iDir <- iDir + 1
  }
}

# determine how good is response time prediction over multiple architectures
determineRespMatch <- function (tStats) {
  with (tStats, {
    # where mean of pred/meas is within CI of meas/pred
    vMeansInIntervals <- liesInInterval (appRespMean, pcmRespCILo, pcmRespCIHi) | liesInInterval (appRespMean, pcmRespCILo, pcmRespCIHi)
    # CI's intersect
    vIntervalsIntersect <- intervalsIntersect (appRespCILo, appRespCIHi, pcmRespCILo, pcmRespCIHi)
    return (vMeansInIntervals * 1 + (!vMeansInIntervals & vIntervalsIntersect) * 2)
  })
}
