StyleR run

2022-04-07 09:24:38 +01:00
parent be5319a423
commit 882f4cfb69
4 changed files with 507 additions and 492 deletions
--- a/Parses.R
+++ b/Parses.R
@@ -5,7 +5,7 @@ modules::import(stringr)
 modules::import(stats)


-#Improvements needed: make the selection of first row/column of nodes programmatic
+# Improvements needed: make the selection of first row/column of nodes programmatic
 FIRST_NODE_COL <- 3

 mappings <- c("TestScenario", "Map_P_BA", "Map_BA_OP", "Map_OP_ES", "Legend")
@@ -22,7 +22,7 @@ setEmpties <- function(val) {
 }

 readXL <- function(fName, sheetN, startRow = 1) {
-  xl <- read.xlsx(fName, sheet = sheetN, startRow)    #, rowNames = import)
+  xl <- read.xlsx(fName, sheet = sheetN, startRow) # , rowNames = import)
  return(data.frame(xl, stringsAsFactors = FALSE, row.names = NULL))
 }

@@ -31,7 +31,7 @@ delNA <- function(vec) {
 }

 buildExpr <- function(pressStatus) {
-  #pressStatus is a two column DF of name of pressure and status Ii.e. on or off)
+  # pressStatus is a two column DF of name of pressure and status Ii.e. on or off)
  MEANPRESS <- 0
  expr <- "("
  for (p in 1:nrow(pressStatus)) {
@@ -58,7 +58,7 @@ parseScenario <- function(press, prefix = "p") {
    dimnames = list(NULL, c("growth", "confidence", "layer"))
  )
  for (col in 2:ncol(press)) {
-    coefs[col-1,] <-  as.numeric(split(press[1, col]))[match(c("growth", "confidence", "layer"), states)]
+    coefs[col - 1, ] <- as.numeric(split(press[1, col]))[match(c("growth", "confidence", "layer"), states)]
  }
  press[is.na(press)] <- 0
  if (sum(duplicated(pressNames)) > 0) {
@@ -71,9 +71,9 @@ parseScenario <- function(press, prefix = "p") {
    nodes = data.frame(
      name = pressNames,
      code = paste0(prefix, seq(1:length(pressNames))),
-      growth = coefs[,"growth"],
-      confidence = coefs[,"confidence"],
-      layer = coefs[,"layer"],
+      growth = coefs[, "growth"],
+      confidence = coefs[, "confidence"],
+      layer = coefs[, "layer"],
      stringsAsFactors = FALSE
    ),
    edges = data.frame(input = NULL, output = NULL, impact = NULL)
@@ -85,19 +85,18 @@ getInitial <- function(string, letter) {
 }

 split <- function(cell) {
-
  params <- unlist(strsplit(cell, ","))
  values <- rep(0, length(states))

  for (n in 1:length(params)) {
-     kvp <- unlist(strsplit(params[n], "="))
-     ref <- match(getInitial(trimws(kvp[1])), getInitial(states))
+    kvp <- unlist(strsplit(params[n], "="))
+    ref <- match(getInitial(trimws(kvp[1])), getInitial(states))

-     if ((ref > 0) & (ref <= length(values))) {
-       values[ref] <- kvp[2]
-     } else {
-       print(paste("Unrecognised parameter(s):",params[n]))
-     }
+    if ((ref > 0) & (ref <= length(values))) {
+      values[ref] <- kvp[2]
+    } else {
+      print(paste("Unrecognised parameter(s):", params[n]))
+    }
  }

  return(values)
@@ -119,18 +118,18 @@ getOutNodes <- function(codes, codeList) {

 buildGraph <- function(model, desc) {

-  #model contains the following
+  # model contains the following
  # node table, edge table

-  #descriptor (desc) contains:
-  #inputCode - the top layer of the model
-  #outputCodes - all subsequent layers to be included in the model
+  # descriptor (desc) contains:
+  # inputCode - the top layer of the model
+  # outputCodes - all subsequent layers to be included in the model


  inputNodes <- model$nodes$code[which(startsWith(model$nodes$code, desc$inputCode))]
  inputText <- paste0("[", inputNodes, "]", collapse = "")

-  #do the internal nodes
+  # do the internal nodes
  edges <- ""

  outNodes <- model$nodes$code[getOutNodes(model$nodes$code, desc$outputCodes)]
@@ -141,24 +140,24 @@ buildGraph <- function(model, desc) {

    rows <- which(model$edges$output == outNodes[idx])
    inputsStr <- paste0(model$edges$input[which(model$edges$output == outNodes[idx])], sep = ":", collapse = "")
-    edges <- paste0(edges, paste0("[", outNodes[idx], "|", substr(inputsStr, start = 1, stop = (nchar(inputsStr)-1)), "]"))
+    edges <- paste0(edges, paste0("[", outNodes[idx], "|", substr(inputsStr, start = 1, stop = (nchar(inputsStr) - 1)), "]"))

-    #Make the coefficient of the distribution
+    # Make the coefficient of the distribution
    coefVal <- setNames(
      c(model$nodes$growth[nodeRef], model$edges$values[rows]),
      c("(Intercept)", model$edges$input[rows])
    )
-    #str(coefVal)
+    # str(coefVal)
    outDist[[idx]] <- list(coef = coefVal, sd = model$nodes$confidence[nodeRef])
  }

  print("Saving model prior to network modelling")
  modelDefn <- paste0(inputText, edges)
-  save(modelDefn, file="buildGraph.RData")
+  save(modelDefn, file = "buildGraph.RData")


-  #print("about to build network")
-  #print(paste0(inputText, edges))
+  # print("about to build network")
+  # print(paste0(inputText, edges))



@@ -176,15 +175,15 @@ buildGraph <- function(model, desc) {

  allDists <- as.list(setNames(c(inDist, outDist), c(inputNodes, outNodes)))

-  #print(allDists)
+  # print(allDists)
  cfit <- custom.fit(net, allDists)

  cat("about to calculate sample distributions")
-  #print(outNodes)
+  # print(outNodes)

  sampleDists <- cpdist(cfit, nodes = outNodes, evidence = TRUE, n = 10000, method = "lw")
  summDists <- summary(sampleDists)
-  #stdDev <- sd(sampleDists)
+  # stdDev <- sd(sampleDists)

  print("sample distribution build successful")

@@ -206,11 +205,11 @@ buildGraph <- function(model, desc) {

 getValidNodes <- function(mapping, prevOutputs, prefix) {

-  #Find row id for input nodes, internal and published
-  inputNodes <- mapping[2:nrow(mapping),1]
+  # Find row id for input nodes, internal and published
+  inputNodes <- mapping[2:nrow(mapping), 1]

-  #check that all input nodes are in the previous table
-  inputNodes <- delNA(mapping[mapping[,"Node.Type"] == "input", "Nodes"])
+  # check that all input nodes are in the previous table
+  inputNodes <- delNA(mapping[mapping[, "Node.Type"] == "input", "Nodes"])
  if (length(inputNodes) > 0) {
    if (sum(inputNodes %in% prevOutputs$name) < length(inputNodes)) {
      cat("Missing entries for input nodes in previous output columns")
@@ -221,7 +220,7 @@ getValidNodes <- function(mapping, prevOutputs, prefix) {
  }


-  #Check the row headings concur with previous names
+  # Check the row headings concur with previous names
  validInputs <- delNA(inputNodes[which(unique(inputNodes) %in% prevOutputs$name)])
  if (length(validInputs) == 0) {
    print("Invalid sheet - table must have at least one input row containing names from previous table")
@@ -230,7 +229,7 @@ getValidNodes <- function(mapping, prevOutputs, prefix) {

  inputInts <- delNA(inputNodes[mapping$Node.Type != "link"])

-  if (sum(duplicated(inputInts))>0) {
+  if (sum(duplicated(inputInts)) > 0) {
    cat("Duplicated input node names found")
    print(inputNodes[duplicated(inputNodes)])
  }
@@ -242,10 +241,10 @@ getValidNodes <- function(mapping, prevOutputs, prefix) {
  }


-  #check that all internal nodes are in the columns
-  intNodes <- delNA(mapping[mapping[,"Node.Type"] == "internal", "Nodes"])
+  # check that all internal nodes are in the columns
+  intNodes <- delNA(mapping[mapping[, "Node.Type"] == "internal", "Nodes"])
  if (length(intNodes) > 0) {
-    if (sum(intNodes %in% outNodes)<length(intNodes)) {
+    if (sum(intNodes %in% outNodes) < length(intNodes)) {
      cat("Missing entries for internal nodes in output columns")
      print(intNodes[!(intNodes %in% outNodes)])
    }
@@ -254,15 +253,15 @@ getValidNodes <- function(mapping, prevOutputs, prefix) {
  coefs <- matrix(data = NA, nrow = length(outNodes), ncol = 3, dimnames = list(NULL, c("growth", "confidence", "layer")))
  for (idx in 1:length(outNodes)) {
    col <- match(outNodes[idx], colnames(mapping))
-    coefs[idx,] <-  as.numeric(split(mapping[1, col]))[match(c("growth", "confidence", "layer"), states)]
+    coefs[idx, ] <- as.numeric(split(mapping[1, col]))[match(c("growth", "confidence", "layer"), states)]
  }

  return(data.frame(
    code = c(prevOutputs$code, paste0(prefix, seq(1:length(outNodes)))),
    name = c(prevOutputs$name, outNodes),
-    growth = c(prevOutputs$growth, coefs[,"growth"]),
-    confidence = c(prevOutputs$confidence, coefs[,"confidence"]),
-    layer = c(prevOutputs$layer, coefs[,"layer"]),
+    growth = c(prevOutputs$growth, coefs[, "growth"]),
+    confidence = c(prevOutputs$confidence, coefs[, "confidence"]),
+    layer = c(prevOutputs$layer, coefs[, "layer"]),
    stringsAsFactors = FALSE
  ))
 }
@@ -272,66 +271,67 @@ getCode <- function(name, nodeDF) {
 }

 getValidEdges <- function(mapping, nodeDF, prevEdge = NULL, prefix) {
-  #utils::str(nodeDF)
+  # utils::str(nodeDF)

-  #save(mapping, nodeDF, prevEdge, prefix, file="validEdges.RData")
+  # save(mapping, nodeDF, prevEdge, prefix, file="validEdges.RData")

  edgeCols <- c("inputNode", "outputNode", "impact")
  edgeM <- matrix(data = NA, nrow = 0, ncol = length(edgeCols), dimnames = list(NULL, edgeCols))

-  #to start let just get the statements and print them out....
+  # to start let just get the statements and print them out....
  for (col in FIRST_NODE_COL:ncol(mapping)) {
    count <- 0

    for (row in 2:nrow(mapping)) {
-
      if (!is.na(mapping[row, col])) {
-        edgeM <- rbind(edgeM,
-          c(getCode(mapping[row, 1], nodeDF),
+        edgeM <- rbind(
+          edgeM,
+          c(
+            getCode(mapping[row, 1], nodeDF),
            getCode(colnames(mapping)[col], nodeDF),
-            split(mapping[row,col])[match("impact", states)]
+            split(mapping[row, col])[match("impact", states)]
          )
        )
        count <- count + 1
      }
-      #if (count == 0) print(paste("No edges found for output", colnames(mapping)[col]))
+      # if (count == 0) print(paste("No edges found for output", colnames(mapping)[col]))
    }
  }
  if (is.null(prevEdge)) {
-    return (data.frame(
-      input = edgeM[,"inputNode"],
-      output = edgeM[,"outputNode"],
-      impact = edgeM[,"impact"],
+    return(data.frame(
+      input = edgeM[, "inputNode"],
+      output = edgeM[, "outputNode"],
+      impact = edgeM[, "impact"],
      stringsAsFactors = FALSE
    ))
  } else {
-    return (data.frame(
-      input = c(prevEdge$input, edgeM[,"inputNode"]),
-      output = c(prevEdge$output, edgeM[,"outputNode"]),
-      impact = c(prevEdge$impact, edgeM[,"impact"]),
+    return(data.frame(
+      input = c(prevEdge$input, edgeM[, "inputNode"]),
+      output = c(prevEdge$output, edgeM[, "outputNode"]),
+      impact = c(prevEdge$impact, edgeM[, "impact"]),
      stringsAsFactors = FALSE
    ))
  }
 }

 parseMapping <- function(mapping, prevOutputs, prefix) {
-  mapping <- mapping[,-1]
-  mapping[,1] <- cleanTitles(mapping[,1])
+  mapping <- mapping[, -1]
+  mapping[, 1] <- cleanTitles(mapping[, 1])

  nodeDF <- getValidNodes(mapping, prevOutputs$nodes, prefix)
  edgeDF <- getValidEdges(mapping, nodeDF, prevEdge = prevOutputs$edges, prefix)

-  #save(nodeDF, edgeDF, file="mapping.RData")
+  # save(nodeDF, edgeDF, file="mapping.RData")

  return(list(
-    #New structure
+    # New structure
    nodes = nodeDF,
    edges = edgeDF
  ))
 }

 parseSheet <- function(fName) {
-  #get sheet names
+  # get sheet names

  print(paste("starting sheet load", fName))

@@ -339,19 +339,18 @@ parseSheet <- function(fName) {
    names <- openxlsx::getSheetNames(fName)

    if (length(names) > 0) {
-
      sheets <- sort(delNA(match(names, mappings)))

      cat("starting sheet parse")
-      #print(sheets)
+      # print(sheets)

      if (sum(sheets == refs) == length(refs)) {
-        #read all mapping tables
-        scenario <- parseScenario(readXL(fName,mappings[1], startRow = 1), prefix = "p")
-        p_ba <- parseMapping(readXL(fName,mappings[2], startRow = 1), scenario, prefix = "ba")
-        p_op <- parseMapping(readXL(fName,mappings[3], startRow = 1), p_ba, prefix = "op")
-        p_es <- parseMapping(readXL(fName,mappings[4], startRow = 1), p_op, prefix = "es")
-        legend <- readXL(fName,mappings[5], startRow = 1)
+        # read all mapping tables
+        scenario <- parseScenario(readXL(fName, mappings[1], startRow = 1), prefix = "p")
+        p_ba <- parseMapping(readXL(fName, mappings[2], startRow = 1), scenario, prefix = "ba")
+        p_op <- parseMapping(readXL(fName, mappings[3], startRow = 1), p_ba, prefix = "op")
+        p_es <- parseMapping(readXL(fName, mappings[4], startRow = 1), p_op, prefix = "es")
+        legend <- readXL(fName, mappings[5], startRow = 1)

        print("sheet load completed")
        return(
@@ -360,7 +359,6 @@ parseSheet <- function(fName) {
            legend = legend
          )
        )
-
      } else {
        print(paste("Sheets found include", mappings[sheets]))
        cat("Missing sheets are:")