modules::import(openxlsx) modules::import(bnlearn) modules::import(stringr) modules::import(graph) modules::import(ggplot2) modules::import(stats) modules::import(plotly) modules::import(utils) #Improvements needed: make the selection of first row/column of nodes programmatic FIRST_NODE_COL <- 3 mappings <- c('TestScenario', 'Map_P_BA', 'Map_BA_OP', 'Map_OP_ES') nodeTypes <- c('Input.Nodes', 'Internal.Nodes', 'Published.Nodes') states <- c('impact', 'confidence', 'growth', 'recovery', 'layer') refs <-c(1:length(mappings)) setEmpties <- function(val) { if (is.na(val)) return(0) else return(val) } readXL <- function(fName, sheetN, startRow=1) { xl <- read.xlsx(fName, sheet = sheetN, startRow) #, rowNames = import) return(data.frame(xl, stringsAsFactors = FALSE, row.names = NULL)) } delNA <- function(vec) { return(vec[!is.na(vec)]) } buildExpr <- function(pressStatus) { #pressStatus is a two column DF of name of pressure and status Ii.e. on or off) MEANPRESS = 0 expr <- "(" for (p in 1:nrow(pressStatus)) { if (pressStatus$status[p] == 'On') symbol='>=' else symbol='<=' expr <- paste0(expr, "(\"", pressStatus$code[p], "\"", symbol, MEANPRESS, ") & ") } expr<-substr(expr, 1, nchar(expr)-2) expr<-paste0(expr, ')') return(expr) } parseScenario <- function(press, prefix = 'p') { pressNames <- colnames(press)[2:length(colnames(press))] coefs <- matrix(data=NA, nrow=length(pressNames), ncol=3, dimnames=list(NULL, c('growth', 'confidence', 'layer'))) for (col in 2:ncol(press)) { coefs[col-1,] <- as.numeric(split(press[1, col]))[match(c('growth', 'confidence', 'layer'), states)] } press[is.na(press)] <- 0 if (sum(duplicated(pressNames))>0) { cat('Duplicated pressure node names found') print(pressNodes[duplicated(pressNames)]) } return(list( timeSeq=press, nodes=data.frame(name = pressNames, code=paste0(prefix, seq(1:length(pressNames))), growth = coefs[,'growth'], confidence=coefs[,'confidence'], layer=coefs[,'layer'], stringsAsFactors = FALSE), edges=data.frame(input=NULL, output=NULL, impact=NULL) )) } getInitial <- function(string, letter) { return(tolower(substr(string, start=1, stop=1))) } split <- function(cell) { params <- unlist(strsplit(cell, ',')) values <- rep(0, length(states)) for (n in 1:length(params)) { kvp <- unlist(strsplit(params[n], '=')) ref <- match(getInitial(trimws(kvp[1])), getInitial(states)) if ((ref>0) & (ref<=length(values))) { values[ref] <- kvp[2] } else { print(paste('Unrecognised parameter(s):',params[n])) } } return(values) } cleanTitles <- function(titleV) { return(str_replace_all(titleV, c(' ' = '.', '-' = ''))) } getOutNodes <- function(codes, codeList) { v <- vector(mode='logical', length=length(codes)) for (idx in 1:length(codes)) { v[idx] <- (sum(startsWith(codes[idx], codeList))>0) } return(v) } buildGraph <- function(model, desc) { #model contains the following # node table, edge table #descriptor (desc) contains: #inputCode - the top layer of the model #outputCodes - all subsequent layers to be included in the model inputNodes <- model$nodes$code[which(startsWith(model$nodes$code, desc$inputCode))] inputText <- paste0("[", inputNodes, "]", collapse ="") #do the internal nodes edges <- "" outNodes <- model$nodes$code[getOutNodes(model$nodes$code, desc$outputCodes)] outDist <- vector(mode="list", length=length(outNodes)) for (idx in 1:length(outNodes)) { nodeRef <- match(outNodes[idx], model$nodes$code) rows <- which(model$edges$output == outNodes[idx]) inputsStr <- paste0(model$edges$input[which(model$edges$output == outNodes[idx])], sep=":", collapse="") edges <- paste0(edges, paste0("[", outNodes[idx], "|", substr(inputsStr, start=1, stop=(nchar(inputsStr)-1)), "]")) #Make the coefficient of the distribution coefVal <- setNames(c(model$nodes$growth[nodeRef], model$edges$values[rows]), c("(Intercept)", model$edges$input[rows]) ) #str(coefVal) outDist[[idx]] <- list(coef = coefVal, sd = model$nodes$confidence[nodeRef]) } print('about to build network') print(paste0(inputText, edges)) net <- model2network(paste0(inputText, edges), debug=TRUE) print('network build successful') inDist <- vector(mode="list", length=length(inputNodes)) for (idx in 1:length(inputNodes)) { inRef <- match(inputNodes[idx], model$nodes$code) coefVal <- setNames(model$nodes$growth[inRef], "(Intercept)") inDist[[idx]] <- list(coef = coefVal, sd = model$nodes$confidence[inRef]) } allDists = as.list(setNames(c(inDist, outDist), c(inputNodes, outNodes))) cfit = custom.fit(net, allDists) cat('about to calculate sample distributions') print(outNodes) sampleDists <- cpdist(cfit, nodes = outNodes, evidence = TRUE, n = 10000, method = "lw") summDists <- summary(sampleDists) #stdDev <- sd(sampleDists) print('sample distribution build successful') model$edges$input <- model$nodes$name[match(model$edges$input, model$nodes$code)] model$edges$output <- model$nodes$name[match(model$edges$output, model$nodes$code)] return( list( nodes = model$nodes, edges = model$edges, net = net, cfit = cfit, allDists = allDists, summDists = summDists ) ) } getValidNodes <- function(mapping, prevOutputs, prefix) { #Find row id for input nodes, internal and published inputNodes <- mapping[2:nrow(mapping),1] #check that all input nodes are in the previous table inputNodes <- delNA(mapping[mapping[,"Node.Type"] == 'input', "Nodes"]) if (length(inputNodes)>0) { if (sum(inputNodes %in% prevOutputs$name)0) { cat('Duplicated input node names found') print(inputNodes[duplicated(inputNodes)]) } outNodes <- delNA(colnames(mapping)[FIRST_NODE_COL:ncol(mapping)]) if (sum(duplicated(outNodes))>0) { cat('Duplicated output node names found') print(outNodes[duplicated(outNodes)]) } #check that all internal nodes are in the columns intNodes <- delNA(mapping[mapping[,"Node.Type"] == 'internal', "Nodes"]) if (length(intNodes)>0) { if (sum(intNodes %in% outNodes)0) { sheets <- sort(delNA(match(names, mappings))) cat('starting sheet parse') print(sheets) if (sum(sheets==refs)==length(refs)) { #read all mapping tables scenario <- parseScenario(readXL(fName,mappings[1], startRow=1), prefix='p') p_ba <- parseMapping(readXL(fName,mappings[2], startRow=1), scenario, prefix='ba') p_op <- parseMapping(readXL(fName,mappings[3], startRow=1), p_ba, prefix='op') p_es <- parseMapping(readXL(fName,mappings[4], startRow=1), p_op, prefix='es') #print('building graphs') #p_baNet <- buildGraph(p_ba, desc=list(inputCode='p', outputCodes='ba')) #p_opNet <- buildGraph(p_op, desc=list(inputCode='p', outputCodes=c('ba', 'op'))) #p_esNet <- buildGraph(p_es, desc=list(inputCode='p', outputCodes=c('ba', 'op', 'es'))) print('sheet load completed') return( #list( #pressBioAss = p_baNet, #pressOpProc = p_opNet, #pressEcoServ = p_esNet, p_esMap = p_es #) ) } else { print(paste('Sheets found include', mappings[sheets])) cat('Missing sheets are:') print(refs[-sheets]) } } } }