Syntax conformation
This commit is contained in:
243
Parses.R
243
Parses.R
@@ -1,27 +1,27 @@
|
||||
|
||||
modules::import(openxlsx)
|
||||
modules::import(bnlearn)
|
||||
modules::import(openxlsx)
|
||||
modules::import(stringr)
|
||||
modules::import(graph)
|
||||
modules::import(ggplot2)
|
||||
modules::import(stats)
|
||||
modules::import(plotly)
|
||||
modules::import(utils)
|
||||
|
||||
|
||||
#Improvements needed: make the selection of first row/column of nodes programmatic
|
||||
FIRST_NODE_COL <- 3
|
||||
|
||||
mappings <- c('TestScenario', 'Map_P_BA', 'Map_BA_OP', 'Map_OP_ES')
|
||||
nodeTypes <- c('Input.Nodes', 'Internal.Nodes', 'Published.Nodes')
|
||||
states <- c('impact', 'confidence', 'growth', 'recovery', 'layer')
|
||||
refs <-c(1:length(mappings))
|
||||
mappings <- c("TestScenario", "Map_P_BA", "Map_BA_OP", "Map_OP_ES")
|
||||
nodeTypes <- c("Input.Nodes", "Internal.Nodes", "Published.Nodes")
|
||||
states <- c("impact", "confidence", "growth", "recovery", "layer")
|
||||
refs <- c(1:length(mappings))
|
||||
|
||||
setEmpties <- function(val) {
|
||||
if (is.na(val)) return(0) else return(val)
|
||||
if (is.na(val)) {
|
||||
return(0)
|
||||
} else {
|
||||
return(val)
|
||||
}
|
||||
}
|
||||
|
||||
readXL <- function(fName, sheetN, startRow=1) {
|
||||
readXL <- function(fName, sheetN, startRow = 1) {
|
||||
xl <- read.xlsx(fName, sheet = sheetN, startRow) #, rowNames = import)
|
||||
return(data.frame(xl, stringsAsFactors = FALSE, row.names = NULL))
|
||||
}
|
||||
@@ -32,73 +32,87 @@ delNA <- function(vec) {
|
||||
|
||||
buildExpr <- function(pressStatus) {
|
||||
#pressStatus is a two column DF of name of pressure and status Ii.e. on or off)
|
||||
MEANPRESS = 0
|
||||
MEANPRESS <- 0
|
||||
expr <- "("
|
||||
for (p in 1:nrow(pressStatus)) {
|
||||
if (pressStatus$status[p] == 'On') symbol='>=' else symbol='<='
|
||||
if (pressStatus$status[p] == "On") {
|
||||
symbol <- ">="
|
||||
} else {
|
||||
symbol <- "<="
|
||||
}
|
||||
|
||||
expr <- paste0(expr, "(\"", pressStatus$code[p], "\"", symbol, MEANPRESS, ") & ")
|
||||
}
|
||||
expr<-substr(expr, 1, nchar(expr)-2)
|
||||
expr<-paste0(expr, ')')
|
||||
expr <- substr(expr, 1, nchar(expr) - 2)
|
||||
expr <- paste0(expr, ")")
|
||||
|
||||
return(expr)
|
||||
}
|
||||
|
||||
parseScenario <- function(press, prefix = 'p') {
|
||||
parseScenario <- function(press, prefix = "p") {
|
||||
pressNames <- colnames(press)[2:length(colnames(press))]
|
||||
coefs <- matrix(data=NA, nrow=length(pressNames), ncol=3, dimnames=list(NULL, c('growth', 'confidence', 'layer')))
|
||||
coefs <- matrix(
|
||||
data = NA,
|
||||
nrow = length(pressNames),
|
||||
ncol = 3,
|
||||
dimnames = list(NULL, c("growth", "confidence", "layer"))
|
||||
)
|
||||
for (col in 2:ncol(press)) {
|
||||
coefs[col-1,] <- as.numeric(split(press[1, col]))[match(c('growth', 'confidence', 'layer'), states)]
|
||||
coefs[col-1,] <- as.numeric(split(press[1, col]))[match(c("growth", "confidence", "layer"), states)]
|
||||
}
|
||||
press[is.na(press)] <- 0
|
||||
if (sum(duplicated(pressNames))>0) {
|
||||
cat('Duplicated pressure node names found')
|
||||
if (sum(duplicated(pressNames)) > 0) {
|
||||
cat("Duplicated pressure node names found")
|
||||
print(pressNodes[duplicated(pressNames)])
|
||||
}
|
||||
|
||||
return(list(
|
||||
timeSeq=press,
|
||||
nodes=data.frame(name = pressNames,
|
||||
code=paste0(prefix, seq(1:length(pressNames))),
|
||||
growth = coefs[,'growth'],
|
||||
confidence=coefs[,'confidence'],
|
||||
layer=coefs[,'layer'],
|
||||
stringsAsFactors = FALSE),
|
||||
edges=data.frame(input=NULL, output=NULL, impact=NULL)
|
||||
timeSeq = press,
|
||||
nodes = data.frame(
|
||||
name = pressNames,
|
||||
code = paste0(prefix, seq(1:length(pressNames))),
|
||||
growth = coefs[,"growth"],
|
||||
confidence = coefs[,"confidence"],
|
||||
layer = coefs[,"layer"],
|
||||
stringsAsFactors = FALSE
|
||||
),
|
||||
edges = data.frame(input = NULL, output = NULL, impact = NULL)
|
||||
))
|
||||
}
|
||||
|
||||
getInitial <- function(string, letter) {
|
||||
return(tolower(substr(string, start=1, stop=1)))
|
||||
return(tolower(substr(string, start = 1, stop = 1)))
|
||||
}
|
||||
|
||||
split <- function(cell) {
|
||||
params <- unlist(strsplit(cell, ','))
|
||||
params <- unlist(strsplit(cell, ","))
|
||||
values <- rep(0, length(states))
|
||||
|
||||
for (n in 1:length(params)) {
|
||||
kvp <- unlist(strsplit(params[n], '='))
|
||||
kvp <- unlist(strsplit(params[n], "="))
|
||||
ref <- match(getInitial(trimws(kvp[1])), getInitial(states))
|
||||
if ((ref>0) & (ref<=length(values))) {
|
||||
|
||||
if ((ref > 0) & (ref <= length(values))) {
|
||||
values[ref] <- kvp[2]
|
||||
} else {
|
||||
print(paste('Unrecognised parameter(s):',params[n]))
|
||||
print(paste("Unrecognised parameter(s):",params[n]))
|
||||
}
|
||||
|
||||
}
|
||||
return(values)
|
||||
|
||||
return(values)
|
||||
}
|
||||
|
||||
cleanTitles <- function(titleV) {
|
||||
return(str_replace_all(titleV, c(' ' = '.', '-' = '')))
|
||||
return(str_replace_all(titleV, c(" " = ".", "-" = "")))
|
||||
}
|
||||
|
||||
getOutNodes <- function(codes, codeList) {
|
||||
v <- vector(mode='logical', length=length(codes))
|
||||
v <- vector(mode = "logical", length = length(codes))
|
||||
|
||||
for (idx in 1:length(codes)) {
|
||||
v[idx] <- (sum(startsWith(codes[idx], codeList))>0)
|
||||
v[idx] <- (sum(startsWith(codes[idx], codeList)) > 0)
|
||||
}
|
||||
|
||||
return(v)
|
||||
}
|
||||
|
||||
@@ -112,38 +126,38 @@ buildGraph <- function(model, desc) {
|
||||
#outputCodes - all subsequent layers to be included in the model
|
||||
|
||||
inputNodes <- model$nodes$code[which(startsWith(model$nodes$code, desc$inputCode))]
|
||||
inputText <- paste0("[", inputNodes, "]", collapse ="")
|
||||
inputText <- paste0("[", inputNodes, "]", collapse = "")
|
||||
|
||||
#do the internal nodes
|
||||
edges <- ""
|
||||
|
||||
outNodes <- model$nodes$code[getOutNodes(model$nodes$code, desc$outputCodes)]
|
||||
outDist <- vector(mode="list", length=length(outNodes))
|
||||
outDist <- vector(mode = "list", length = length(outNodes))
|
||||
|
||||
for (idx in 1:length(outNodes)) {
|
||||
nodeRef <- match(outNodes[idx], model$nodes$code)
|
||||
|
||||
rows <- which(model$edges$output == outNodes[idx])
|
||||
inputsStr <- paste0(model$edges$input[which(model$edges$output == outNodes[idx])], sep=":", collapse="")
|
||||
edges <- paste0(edges, paste0("[", outNodes[idx], "|", substr(inputsStr, start=1, stop=(nchar(inputsStr)-1)), "]"))
|
||||
inputsStr <- paste0(model$edges$input[which(model$edges$output == outNodes[idx])], sep = ":", collapse = "")
|
||||
edges <- paste0(edges, paste0("[", outNodes[idx], "|", substr(inputsStr, start = 1, stop = (nchar(inputsStr)-1)), "]"))
|
||||
|
||||
#Make the coefficient of the distribution
|
||||
coefVal <- setNames(c(model$nodes$growth[nodeRef], model$edges$values[rows]),
|
||||
c("(Intercept)", model$edges$input[rows])
|
||||
)
|
||||
coefVal <- setNames(
|
||||
c(model$nodes$growth[nodeRef], model$edges$values[rows]),
|
||||
c("(Intercept)", model$edges$input[rows])
|
||||
)
|
||||
#str(coefVal)
|
||||
outDist[[idx]] <- list(coef = coefVal,
|
||||
sd = model$nodes$confidence[nodeRef])
|
||||
outDist[[idx]] <- list(coef = coefVal, sd = model$nodes$confidence[nodeRef])
|
||||
}
|
||||
|
||||
print('about to build network')
|
||||
print("about to build network")
|
||||
print(paste0(inputText, edges))
|
||||
|
||||
net <- model2network(paste0(inputText, edges), debug=TRUE)
|
||||
net <- model2network(paste0(inputText, edges), debug = TRUE)
|
||||
|
||||
print('network build successful')
|
||||
print("network build successful")
|
||||
|
||||
inDist <- vector(mode="list", length=length(inputNodes))
|
||||
inDist <- vector(mode = "list", length = length(inputNodes))
|
||||
|
||||
for (idx in 1:length(inputNodes)) {
|
||||
inRef <- match(inputNodes[idx], model$nodes$code)
|
||||
@@ -151,17 +165,17 @@ buildGraph <- function(model, desc) {
|
||||
inDist[[idx]] <- list(coef = coefVal, sd = model$nodes$confidence[inRef])
|
||||
}
|
||||
|
||||
allDists = as.list(setNames(c(inDist, outDist), c(inputNodes, outNodes)))
|
||||
cfit = custom.fit(net, allDists)
|
||||
allDists <- as.list(setNames(c(inDist, outDist), c(inputNodes, outNodes)))
|
||||
cfit <- custom.fit(net, allDists)
|
||||
|
||||
cat('about to calculate sample distributions')
|
||||
cat("about to calculate sample distributions")
|
||||
print(outNodes)
|
||||
|
||||
sampleDists <- cpdist(cfit, nodes = outNodes, evidence = TRUE, n = 10000, method = "lw")
|
||||
summDists <- summary(sampleDists)
|
||||
#stdDev <- sd(sampleDists)
|
||||
|
||||
print('sample distribution build successful')
|
||||
print("sample distribution build successful")
|
||||
|
||||
model$edges$input <- model$nodes$name[match(model$edges$input, model$nodes$code)]
|
||||
model$edges$output <- model$nodes$name[match(model$edges$output, model$nodes$code)]
|
||||
@@ -185,58 +199,62 @@ getValidNodes <- function(mapping, prevOutputs, prefix) {
|
||||
inputNodes <- mapping[2:nrow(mapping),1]
|
||||
|
||||
#check that all input nodes are in the previous table
|
||||
inputNodes <- delNA(mapping[mapping[,"Node.Type"] == 'input', "Nodes"])
|
||||
if (length(inputNodes)>0) {
|
||||
if (sum(inputNodes %in% prevOutputs$name)<length(inputNodes)) {
|
||||
cat('Missing entries for input nodes in previous output columns')
|
||||
inputNodes <- delNA(mapping[mapping[,"Node.Type"] == "input", "Nodes"])
|
||||
if (length(inputNodes) > 0) {
|
||||
if (sum(inputNodes %in% prevOutputs$name) < length(inputNodes)) {
|
||||
cat("Missing entries for input nodes in previous output columns")
|
||||
print(inputNodes[!inputNodes %in% prevOutputs$name])
|
||||
}
|
||||
} else print('Invalid sheet - table must have at least one input row containing names from previous table')
|
||||
} else {
|
||||
print("Invalid sheet - table must have at least one input row containing names from previous table")
|
||||
}
|
||||
|
||||
|
||||
#Check the row headings concur with previous names
|
||||
validInputs <- delNA(inputNodes[which(unique(inputNodes) %in% prevOutputs$name)])
|
||||
if (length(validInputs)==0) print('Invalid sheet - table must have at least one input row containing names from previous table')
|
||||
if (length(validInputs) == 0) {
|
||||
print("Invalid sheet - table must have at least one input row containing names from previous table")
|
||||
}
|
||||
|
||||
|
||||
inputInts <- delNA(inputNodes[mapping$Node.Type!='link'])
|
||||
inputInts <- delNA(inputNodes[mapping$Node.Type != "link"])
|
||||
|
||||
if (sum(duplicated(inputInts))>0) {
|
||||
cat('Duplicated input node names found')
|
||||
cat("Duplicated input node names found")
|
||||
print(inputNodes[duplicated(inputNodes)])
|
||||
}
|
||||
|
||||
outNodes <- delNA(colnames(mapping)[FIRST_NODE_COL:ncol(mapping)])
|
||||
if (sum(duplicated(outNodes))>0) {
|
||||
cat('Duplicated output node names found')
|
||||
if (sum(duplicated(outNodes)) > 0) {
|
||||
cat("Duplicated output node names found")
|
||||
print(outNodes[duplicated(outNodes)])
|
||||
}
|
||||
|
||||
|
||||
#check that all internal nodes are in the columns
|
||||
intNodes <- delNA(mapping[mapping[,"Node.Type"] == 'internal', "Nodes"])
|
||||
if (length(intNodes)>0) {
|
||||
intNodes <- delNA(mapping[mapping[,"Node.Type"] == "internal", "Nodes"])
|
||||
if (length(intNodes) > 0) {
|
||||
if (sum(intNodes %in% outNodes)<length(intNodes)) {
|
||||
cat('Missing entries for internal nodes in output columns')
|
||||
print(intNodes[!intNodes %in% outNodes])
|
||||
cat("Missing entries for internal nodes in output columns")
|
||||
print(intNodes[!(intNodes %in% outNodes)])
|
||||
}
|
||||
}
|
||||
|
||||
coefs <- matrix(data=NA, nrow=length(outNodes), ncol=3, dimnames=list(NULL, c('growth', 'confidence', 'layer')))
|
||||
coefs <- matrix(data = NA, nrow = length(outNodes), ncol = 3, dimnames = list(NULL, c("growth", "confidence", "layer")))
|
||||
for (idx in 1:length(outNodes)) {
|
||||
col <- match(outNodes[idx], colnames(mapping))
|
||||
coefs[idx,] <- as.numeric(split(mapping[1, col]))[match(c('growth', 'confidence', 'layer'), states)]
|
||||
coefs[idx,] <- as.numeric(split(mapping[1, col]))[match(c("growth", "confidence", "layer"), states)]
|
||||
}
|
||||
|
||||
print(coefs)
|
||||
|
||||
return(data.frame(
|
||||
code=c(prevOutputs$code, paste0(prefix, seq(1:length(outNodes)))),
|
||||
name=c(prevOutputs$name, outNodes),
|
||||
growth=c(prevOutputs$growth, coefs[,"growth"]),
|
||||
confidence=c(prevOutputs$confidence, coefs[,"confidence"]),
|
||||
layer=c(prevOutputs$layer, coefs[,"layer"]),
|
||||
stringsAsFactors=FALSE
|
||||
code = c(prevOutputs$code, paste0(prefix, seq(1:length(outNodes)))),
|
||||
name = c(prevOutputs$name, outNodes),
|
||||
growth = c(prevOutputs$growth, coefs[,"growth"]),
|
||||
confidence = c(prevOutputs$confidence, coefs[,"confidence"]),
|
||||
layer = c(prevOutputs$layer, coefs[,"layer"]),
|
||||
stringsAsFactors = FALSE
|
||||
))
|
||||
}
|
||||
|
||||
@@ -244,14 +262,15 @@ getCode <- function(name, nodeDF) {
|
||||
nodeDF$code[match(name, nodeDF$name)]
|
||||
}
|
||||
|
||||
getValidEdges <- function(mapping, nodeDF, prevEdge=NULL, prefix) {
|
||||
str(nodeDF)
|
||||
edgeCols <- c('inputNode', 'outputNode', 'impact')
|
||||
edgeM <- matrix(data=NA, nrow=0, ncol=length(edgeCols), dimnames=list(NULL, edgeCols))
|
||||
getValidEdges <- function(mapping, nodeDF, prevEdge = NULL, prefix) {
|
||||
utils::str(nodeDF)
|
||||
|
||||
edgeCols <- c("inputNode", "outputNode", "impact")
|
||||
edgeM <- matrix(data = NA, nrow = 0, ncol = length(edgeCols), dimnames = list(NULL, edgeCols))
|
||||
|
||||
#to start let just get the statements and print them out....
|
||||
for (col in FIRST_NODE_COL:ncol(mapping)) {
|
||||
count=0
|
||||
count <- 0
|
||||
|
||||
for (row in 2:nrow(mapping)) {
|
||||
|
||||
@@ -259,76 +278,74 @@ getValidEdges <- function(mapping, nodeDF, prevEdge=NULL, prefix) {
|
||||
edgeM <- rbind(edgeM,
|
||||
c(getCode(mapping[row, 1], nodeDF),
|
||||
getCode(colnames(mapping)[col], nodeDF),
|
||||
split(mapping[row,col])[match('impact', states)]
|
||||
)
|
||||
split(mapping[row,col])[match("impact", states)]
|
||||
)
|
||||
)
|
||||
count=count+1
|
||||
count <- count + 1
|
||||
}
|
||||
#if (count==0) print(paste('No edges found for output', colnames(mapping)[col]))
|
||||
#if (count == 0) print(paste("No edges found for output", colnames(mapping)[col]))
|
||||
}
|
||||
}
|
||||
if (is.null(prevEdge)) return (
|
||||
data.frame(
|
||||
if (is.null(prevEdge)) {
|
||||
return (data.frame(
|
||||
input = edgeM[,"inputNode"],
|
||||
output = edgeM[,"outputNode"],
|
||||
impact = edgeM[,"impact"],
|
||||
stringsAsFactors = FALSE
|
||||
)
|
||||
) else return (
|
||||
data.frame(
|
||||
))
|
||||
} else {
|
||||
return (data.frame(
|
||||
input = c(prevEdge$input, edgeM[,"inputNode"]),
|
||||
output = c(prevEdge$output, edgeM[,"outputNode"]),
|
||||
impact = c(prevEdge$impact, edgeM[,"impact"]),
|
||||
stringsAsFactors = FALSE
|
||||
)
|
||||
)
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
parseMapping <- function(mapping, prevOutputs, prefix) {
|
||||
|
||||
mapping <- mapping[,-1]
|
||||
mapping[,1] <- cleanTitles(mapping[,1])
|
||||
|
||||
nodeDF <- getValidNodes(mapping, prevOutputs$nodes, prefix)
|
||||
edgeDF <- getValidEdges(mapping, nodeDF, prevEdge=prevOutputs$edges, prefix)
|
||||
edgeDF <- getValidEdges(mapping, nodeDF, prevEdge = prevOutputs$edges, prefix)
|
||||
|
||||
return(list(
|
||||
#New structure
|
||||
nodes=nodeDF,
|
||||
edges=edgeDF
|
||||
nodes = nodeDF,
|
||||
edges = edgeDF
|
||||
))
|
||||
|
||||
}
|
||||
|
||||
parseSheet <- function(fName) {
|
||||
#get sheet names
|
||||
|
||||
print(paste('starting sheet load', fName))
|
||||
print(paste("starting sheet load", fName))
|
||||
|
||||
if (file.exists(fName)) {
|
||||
names <- openxlsx::getSheetNames(fName)
|
||||
|
||||
if (length(names)>0) {
|
||||
if (length(names) > 0) {
|
||||
|
||||
sheets <- sort(delNA(match(names, mappings)))
|
||||
|
||||
cat('starting sheet parse')
|
||||
cat("starting sheet parse")
|
||||
print(sheets)
|
||||
|
||||
if (sum(sheets==refs)==length(refs)) {
|
||||
if (sum(sheets == refs) == length(refs)) {
|
||||
#read all mapping tables
|
||||
scenario <- parseScenario(readXL(fName,mappings[1], startRow=1), prefix='p')
|
||||
p_ba <- parseMapping(readXL(fName,mappings[2], startRow=1), scenario, prefix='ba')
|
||||
p_op <- parseMapping(readXL(fName,mappings[3], startRow=1), p_ba, prefix='op')
|
||||
p_es <- parseMapping(readXL(fName,mappings[4], startRow=1), p_op, prefix='es')
|
||||
scenario <- parseScenario(readXL(fName,mappings[1], startRow = 1), prefix = "p")
|
||||
p_ba <- parseMapping(readXL(fName,mappings[2], startRow = 1), scenario, prefix = "ba")
|
||||
p_op <- parseMapping(readXL(fName,mappings[3], startRow = 1), p_ba, prefix = "op")
|
||||
p_es <- parseMapping(readXL(fName,mappings[4], startRow = 1), p_op, prefix = "es")
|
||||
|
||||
#print('building graphs')
|
||||
#print("building graphs")
|
||||
|
||||
#p_baNet <- buildGraph(p_ba, desc=list(inputCode='p', outputCodes='ba'))
|
||||
#p_opNet <- buildGraph(p_op, desc=list(inputCode='p', outputCodes=c('ba', 'op')))
|
||||
#p_esNet <- buildGraph(p_es, desc=list(inputCode='p', outputCodes=c('ba', 'op', 'es')))
|
||||
#p_baNet <- buildGraph(p_ba, desc = list(inputCode = "p", outputCodes = "ba"))
|
||||
#p_opNet <- buildGraph(p_op, desc = list(inputCode = "p", outputCodes = c("ba", "op")))
|
||||
#p_esNet <- buildGraph(p_es, desc = list(inputCode = "p", outputCodes = c("ba", "op", "es")))
|
||||
|
||||
print('sheet load completed')
|
||||
print("sheet load completed")
|
||||
return(
|
||||
#list(
|
||||
#pressBioAss = p_baNet,
|
||||
@@ -339,8 +356,8 @@ parseSheet <- function(fName) {
|
||||
)
|
||||
|
||||
} else {
|
||||
print(paste('Sheets found include', mappings[sheets]))
|
||||
cat('Missing sheets are:')
|
||||
print(paste("Sheets found include", mappings[sheets]))
|
||||
cat("Missing sheets are:")
|
||||
print(refs[-sheets])
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user