Syntax conformation

2019-04-11 12:47:53 +01:00
parent 7752b73bc6
commit 29e0bd0cf2
2 changed files with 325 additions and 297 deletions
--- a/Parses.R
+++ b/Parses.R
@@ -1,27 +1,27 @@

-modules::import(openxlsx)
 modules::import(bnlearn)
+modules::import(openxlsx)
 modules::import(stringr)
-modules::import(graph)
-modules::import(ggplot2)
 modules::import(stats)
-modules::import(plotly)
-modules::import(utils)


 #Improvements needed: make the selection of first row/column of nodes programmatic
 FIRST_NODE_COL <- 3

-mappings <- c('TestScenario', 'Map_P_BA', 'Map_BA_OP', 'Map_OP_ES')
-nodeTypes <- c('Input.Nodes', 'Internal.Nodes', 'Published.Nodes')
-states <- c('impact', 'confidence', 'growth', 'recovery', 'layer')
-refs <-c(1:length(mappings))
+mappings <- c("TestScenario", "Map_P_BA", "Map_BA_OP", "Map_OP_ES")
+nodeTypes <- c("Input.Nodes", "Internal.Nodes", "Published.Nodes")
+states <- c("impact", "confidence", "growth", "recovery", "layer")
+refs <- c(1:length(mappings))

 setEmpties <- function(val) {
-  if (is.na(val)) return(0) else return(val)
+  if (is.na(val)) {
+    return(0)
+  } else {
+    return(val)
+  }
 }

-readXL <- function(fName, sheetN, startRow=1) {
+readXL <- function(fName, sheetN, startRow = 1) {
  xl <- read.xlsx(fName, sheet = sheetN, startRow)    #, rowNames = import)
  return(data.frame(xl, stringsAsFactors = FALSE, row.names = NULL))
 }
@@ -32,73 +32,87 @@ delNA <- function(vec) {

 buildExpr <- function(pressStatus) {
  #pressStatus is a two column DF of name of pressure and status Ii.e. on or off)
-  MEANPRESS = 0
+  MEANPRESS <- 0
  expr <- "("
  for (p in 1:nrow(pressStatus)) {
-    if (pressStatus$status[p] == 'On') symbol='>=' else symbol='<='
+    if (pressStatus$status[p] == "On") {
+      symbol <- ">="
+    } else {
+      symbol <- "<="
+    }
+
    expr <- paste0(expr, "(\"", pressStatus$code[p], "\"", symbol, MEANPRESS, ") & ")
  }
-  expr<-substr(expr, 1, nchar(expr)-2)
-  expr<-paste0(expr, ')')
+  expr <- substr(expr, 1, nchar(expr) - 2)
+  expr <- paste0(expr, ")")

  return(expr)
 }

-parseScenario <- function(press, prefix = 'p') {
+parseScenario <- function(press, prefix = "p") {
  pressNames <- colnames(press)[2:length(colnames(press))]
-  coefs <- matrix(data=NA, nrow=length(pressNames), ncol=3, dimnames=list(NULL, c('growth', 'confidence', 'layer')))
+  coefs <- matrix(
+    data = NA,
+    nrow = length(pressNames),
+    ncol = 3,
+    dimnames = list(NULL, c("growth", "confidence", "layer"))
+  )
  for (col in 2:ncol(press)) {
-    coefs[col-1,] <-  as.numeric(split(press[1, col]))[match(c('growth', 'confidence', 'layer'), states)]
+    coefs[col-1,] <-  as.numeric(split(press[1, col]))[match(c("growth", "confidence", "layer"), states)]
  }
  press[is.na(press)] <- 0
-  if (sum(duplicated(pressNames))>0) {
-    cat('Duplicated pressure node names found')
+  if (sum(duplicated(pressNames)) > 0) {
+    cat("Duplicated pressure node names found")
    print(pressNodes[duplicated(pressNames)])
  }

  return(list(
-    timeSeq=press,
-    nodes=data.frame(name = pressNames,
-                     code=paste0(prefix, seq(1:length(pressNames))),
-                     growth = coefs[,'growth'],
-                     confidence=coefs[,'confidence'],
-                     layer=coefs[,'layer'],
-                     stringsAsFactors = FALSE),
-    edges=data.frame(input=NULL, output=NULL, impact=NULL)
+    timeSeq = press,
+    nodes = data.frame(
+      name = pressNames,
+      code = paste0(prefix, seq(1:length(pressNames))),
+      growth = coefs[,"growth"],
+      confidence = coefs[,"confidence"],
+      layer = coefs[,"layer"],
+      stringsAsFactors = FALSE
+    ),
+    edges = data.frame(input = NULL, output = NULL, impact = NULL)
  ))
 }

 getInitial <- function(string, letter) {
-  return(tolower(substr(string, start=1, stop=1)))
+  return(tolower(substr(string, start = 1, stop = 1)))
 }

 split <- function(cell) {
-  params <- unlist(strsplit(cell, ','))
+  params <- unlist(strsplit(cell, ","))
  values <- rep(0, length(states))

  for (n in 1:length(params)) {
-     kvp <- unlist(strsplit(params[n], '='))
+     kvp <- unlist(strsplit(params[n], "="))
     ref <- match(getInitial(trimws(kvp[1])), getInitial(states))
-     if ((ref>0)  & (ref<=length(values))) {
+
+     if ((ref > 0) & (ref <= length(values))) {
       values[ref] <- kvp[2]
     } else {
-       print(paste('Unrecognised parameter(s):',params[n]))
+       print(paste("Unrecognised parameter(s):",params[n]))
     }
-
  }
-  return(values)

+  return(values)
 }

 cleanTitles <- function(titleV) {
-  return(str_replace_all(titleV, c(' ' = '.', '-' = '')))
+  return(str_replace_all(titleV, c(" " = ".", "-" = "")))
 }

 getOutNodes <- function(codes, codeList) {
-  v <- vector(mode='logical', length=length(codes))
+  v <- vector(mode = "logical", length = length(codes))
+
  for (idx in 1:length(codes)) {
-    v[idx] <- (sum(startsWith(codes[idx], codeList))>0)
+    v[idx] <- (sum(startsWith(codes[idx], codeList)) > 0)
  }
+
  return(v)
 }

@@ -112,38 +126,38 @@ buildGraph <- function(model, desc) {
  #outputCodes - all subsequent layers to be included in the model

  inputNodes <- model$nodes$code[which(startsWith(model$nodes$code, desc$inputCode))]
-  inputText <- paste0("[", inputNodes, "]", collapse ="")
+  inputText <- paste0("[", inputNodes, "]", collapse = "")

  #do the internal nodes
  edges <- ""

  outNodes <- model$nodes$code[getOutNodes(model$nodes$code, desc$outputCodes)]
-  outDist <- vector(mode="list", length=length(outNodes))
+  outDist <- vector(mode = "list", length = length(outNodes))

  for (idx in 1:length(outNodes)) {
    nodeRef <- match(outNodes[idx], model$nodes$code)

    rows <- which(model$edges$output == outNodes[idx])
-    inputsStr <- paste0(model$edges$input[which(model$edges$output == outNodes[idx])], sep=":", collapse="")
-    edges <- paste0(edges, paste0("[", outNodes[idx], "|", substr(inputsStr, start=1, stop=(nchar(inputsStr)-1)), "]"))
+    inputsStr <- paste0(model$edges$input[which(model$edges$output == outNodes[idx])], sep = ":", collapse = "")
+    edges <- paste0(edges, paste0("[", outNodes[idx], "|", substr(inputsStr, start = 1, stop = (nchar(inputsStr)-1)), "]"))

    #Make the coefficient of the distribution
-    coefVal <- setNames(c(model$nodes$growth[nodeRef], model$edges$values[rows]),
-                        c("(Intercept)", model$edges$input[rows])
-                        )
+    coefVal <- setNames(
+      c(model$nodes$growth[nodeRef], model$edges$values[rows]),
+      c("(Intercept)", model$edges$input[rows])
+    )
    #str(coefVal)
-    outDist[[idx]] <- list(coef = coefVal,
-                         sd = model$nodes$confidence[nodeRef])
+    outDist[[idx]] <- list(coef = coefVal, sd = model$nodes$confidence[nodeRef])
  }

-  print('about to build network')
+  print("about to build network")
  print(paste0(inputText, edges))

-  net <- model2network(paste0(inputText, edges), debug=TRUE)
+  net <- model2network(paste0(inputText, edges), debug = TRUE)

-  print('network build successful')
+  print("network build successful")

-  inDist <- vector(mode="list", length=length(inputNodes))
+  inDist <- vector(mode = "list", length = length(inputNodes))

  for (idx in 1:length(inputNodes)) {
    inRef <- match(inputNodes[idx], model$nodes$code)
@@ -151,17 +165,17 @@ buildGraph <- function(model, desc) {
    inDist[[idx]] <- list(coef = coefVal, sd = model$nodes$confidence[inRef])
  }

-  allDists = as.list(setNames(c(inDist, outDist), c(inputNodes, outNodes)))
-  cfit = custom.fit(net, allDists)
+  allDists <- as.list(setNames(c(inDist, outDist), c(inputNodes, outNodes)))
+  cfit <- custom.fit(net, allDists)

-  cat('about to calculate sample distributions')
+  cat("about to calculate sample distributions")
  print(outNodes)

  sampleDists <- cpdist(cfit, nodes = outNodes, evidence = TRUE, n = 10000, method = "lw")
  summDists <- summary(sampleDists)
  #stdDev <- sd(sampleDists)

-  print('sample distribution build successful')
+  print("sample distribution build successful")

  model$edges$input <- model$nodes$name[match(model$edges$input, model$nodes$code)]
  model$edges$output <- model$nodes$name[match(model$edges$output, model$nodes$code)]
@@ -185,58 +199,62 @@ getValidNodes <- function(mapping, prevOutputs, prefix) {
  inputNodes <- mapping[2:nrow(mapping),1]

  #check that all input nodes are in the previous table
-  inputNodes <- delNA(mapping[mapping[,"Node.Type"] == 'input', "Nodes"])
-  if (length(inputNodes)>0) {
-    if (sum(inputNodes %in% prevOutputs$name)<length(inputNodes)) {
-      cat('Missing entries for input nodes in previous output columns')
+  inputNodes <- delNA(mapping[mapping[,"Node.Type"] == "input", "Nodes"])
+  if (length(inputNodes) > 0) {
+    if (sum(inputNodes %in% prevOutputs$name) < length(inputNodes)) {
+      cat("Missing entries for input nodes in previous output columns")
      print(inputNodes[!inputNodes %in% prevOutputs$name])
    }
-  } else print('Invalid sheet - table must have at least one input row containing names from previous table')
+  } else {
+    print("Invalid sheet - table must have at least one input row containing names from previous table")
+  }


  #Check the row headings concur with previous names
  validInputs <- delNA(inputNodes[which(unique(inputNodes) %in% prevOutputs$name)])
-  if (length(validInputs)==0) print('Invalid sheet - table must have at least one input row containing names from previous table')
+  if (length(validInputs) == 0) {
+    print("Invalid sheet - table must have at least one input row containing names from previous table")
+  }


-  inputInts <- delNA(inputNodes[mapping$Node.Type!='link'])
+  inputInts <- delNA(inputNodes[mapping$Node.Type != "link"])

  if (sum(duplicated(inputInts))>0) {
-    cat('Duplicated input node names found')
+    cat("Duplicated input node names found")
    print(inputNodes[duplicated(inputNodes)])
  }

  outNodes <- delNA(colnames(mapping)[FIRST_NODE_COL:ncol(mapping)])
-  if (sum(duplicated(outNodes))>0) {
-    cat('Duplicated output node names found')
+  if (sum(duplicated(outNodes)) > 0) {
+    cat("Duplicated output node names found")
    print(outNodes[duplicated(outNodes)])
  }


  #check that all internal nodes are in the columns
-  intNodes <- delNA(mapping[mapping[,"Node.Type"] == 'internal', "Nodes"])
-  if (length(intNodes)>0) {
+  intNodes <- delNA(mapping[mapping[,"Node.Type"] == "internal", "Nodes"])
+  if (length(intNodes) > 0) {
    if (sum(intNodes %in% outNodes)<length(intNodes)) {
-      cat('Missing entries for internal nodes in output columns')
-      print(intNodes[!intNodes %in% outNodes])
+      cat("Missing entries for internal nodes in output columns")
+      print(intNodes[!(intNodes %in% outNodes)])
    }
  }

-  coefs <- matrix(data=NA, nrow=length(outNodes), ncol=3, dimnames=list(NULL, c('growth', 'confidence', 'layer')))
+  coefs <- matrix(data = NA, nrow = length(outNodes), ncol = 3, dimnames = list(NULL, c("growth", "confidence", "layer")))
  for (idx in 1:length(outNodes)) {
    col <- match(outNodes[idx], colnames(mapping))
-    coefs[idx,] <-  as.numeric(split(mapping[1, col]))[match(c('growth', 'confidence', 'layer'), states)]
+    coefs[idx,] <-  as.numeric(split(mapping[1, col]))[match(c("growth", "confidence", "layer"), states)]
  }

  print(coefs)

  return(data.frame(
-    code=c(prevOutputs$code, paste0(prefix, seq(1:length(outNodes)))),
-    name=c(prevOutputs$name, outNodes),
-    growth=c(prevOutputs$growth, coefs[,"growth"]),
-    confidence=c(prevOutputs$confidence, coefs[,"confidence"]),
-    layer=c(prevOutputs$layer, coefs[,"layer"]),
-    stringsAsFactors=FALSE
+    code = c(prevOutputs$code, paste0(prefix, seq(1:length(outNodes)))),
+    name = c(prevOutputs$name, outNodes),
+    growth = c(prevOutputs$growth, coefs[,"growth"]),
+    confidence = c(prevOutputs$confidence, coefs[,"confidence"]),
+    layer = c(prevOutputs$layer, coefs[,"layer"]),
+    stringsAsFactors = FALSE
  ))
 }

@@ -244,14 +262,15 @@ getCode <- function(name, nodeDF) {
  nodeDF$code[match(name, nodeDF$name)]
 }

-getValidEdges <- function(mapping, nodeDF, prevEdge=NULL, prefix) {
-  str(nodeDF)
-  edgeCols <- c('inputNode', 'outputNode', 'impact')
-  edgeM <- matrix(data=NA, nrow=0, ncol=length(edgeCols), dimnames=list(NULL, edgeCols))
+getValidEdges <- function(mapping, nodeDF, prevEdge = NULL, prefix) {
+  utils::str(nodeDF)
+
+  edgeCols <- c("inputNode", "outputNode", "impact")
+  edgeM <- matrix(data = NA, nrow = 0, ncol = length(edgeCols), dimnames = list(NULL, edgeCols))

  #to start let just get the statements and print them out....
  for (col in FIRST_NODE_COL:ncol(mapping)) {
-    count=0
+    count <- 0

    for (row in 2:nrow(mapping)) {

@@ -259,76 +278,74 @@ getValidEdges <- function(mapping, nodeDF, prevEdge=NULL, prefix) {
        edgeM <- rbind(edgeM,
          c(getCode(mapping[row, 1], nodeDF),
            getCode(colnames(mapping)[col], nodeDF),
-            split(mapping[row,col])[match('impact', states)]
-           )
+            split(mapping[row,col])[match("impact", states)]
+          )
        )
-        count=count+1
+        count <- count + 1
      }
-      #if (count==0) print(paste('No edges found for output', colnames(mapping)[col]))
+      #if (count == 0) print(paste("No edges found for output", colnames(mapping)[col]))
    }
  }
-  if (is.null(prevEdge)) return (
-    data.frame(
+  if (is.null(prevEdge)) {
+    return (data.frame(
      input = edgeM[,"inputNode"],
      output = edgeM[,"outputNode"],
      impact = edgeM[,"impact"],
      stringsAsFactors = FALSE
-    )
-  ) else return (
-    data.frame(
+    ))
+  } else {
+    return (data.frame(
      input = c(prevEdge$input, edgeM[,"inputNode"]),
      output = c(prevEdge$output, edgeM[,"outputNode"]),
      impact = c(prevEdge$impact, edgeM[,"impact"]),
      stringsAsFactors = FALSE
-    )
-  )
+    ))
+  }
 }

 parseMapping <- function(mapping, prevOutputs, prefix) {
-
  mapping <- mapping[,-1]
  mapping[,1] <- cleanTitles(mapping[,1])

  nodeDF <- getValidNodes(mapping, prevOutputs$nodes, prefix)
-  edgeDF <- getValidEdges(mapping, nodeDF, prevEdge=prevOutputs$edges, prefix)
+  edgeDF <- getValidEdges(mapping, nodeDF, prevEdge = prevOutputs$edges, prefix)

  return(list(
    #New structure
-    nodes=nodeDF,
-    edges=edgeDF
+    nodes = nodeDF,
+    edges = edgeDF
  ))
-
 }

 parseSheet <- function(fName) {
  #get sheet names

-  print(paste('starting sheet load', fName))
+  print(paste("starting sheet load", fName))

  if (file.exists(fName)) {
    names <- openxlsx::getSheetNames(fName)

-    if (length(names)>0) {
+    if (length(names) > 0) {

      sheets <- sort(delNA(match(names, mappings)))

-      cat('starting sheet parse')
+      cat("starting sheet parse")
      print(sheets)

-      if (sum(sheets==refs)==length(refs)) {
+      if (sum(sheets == refs) == length(refs)) {
        #read all mapping tables
-        scenario <-  parseScenario(readXL(fName,mappings[1], startRow=1), prefix='p')
-        p_ba <-  parseMapping(readXL(fName,mappings[2], startRow=1), scenario, prefix='ba')
-        p_op <- parseMapping(readXL(fName,mappings[3], startRow=1), p_ba, prefix='op')
-        p_es <- parseMapping(readXL(fName,mappings[4], startRow=1), p_op, prefix='es')
+        scenario <- parseScenario(readXL(fName,mappings[1], startRow = 1), prefix = "p")
+        p_ba <- parseMapping(readXL(fName,mappings[2], startRow = 1), scenario, prefix = "ba")
+        p_op <- parseMapping(readXL(fName,mappings[3], startRow = 1), p_ba, prefix = "op")
+        p_es <- parseMapping(readXL(fName,mappings[4], startRow = 1), p_op, prefix = "es")

-        #print('building graphs')
+        #print("building graphs")

-        #p_baNet <- buildGraph(p_ba, desc=list(inputCode='p', outputCodes='ba'))
-        #p_opNet <- buildGraph(p_op, desc=list(inputCode='p', outputCodes=c('ba', 'op')))
-        #p_esNet <- buildGraph(p_es, desc=list(inputCode='p', outputCodes=c('ba', 'op', 'es')))
+        #p_baNet <- buildGraph(p_ba, desc = list(inputCode = "p", outputCodes = "ba"))
+        #p_opNet <- buildGraph(p_op, desc = list(inputCode = "p", outputCodes = c("ba", "op")))
+        #p_esNet <- buildGraph(p_es, desc = list(inputCode = "p", outputCodes = c("ba", "op", "es")))

-        print('sheet load completed')
+        print("sheet load completed")
        return(
          #list(
            #pressBioAss = p_baNet,
@@ -339,8 +356,8 @@ parseSheet <- function(fName) {
        )

      } else {
-        print(paste('Sheets found include', mappings[sheets]))
-        cat('Missing sheets are:')
+        print(paste("Sheets found include", mappings[sheets]))
+        cat("Missing sheets are:")
        print(refs[-sheets])
      }
    }