Implement changes as requested by JNCC

2022-03-30 17:51:19 +01:00
parent eec5f07cfc
commit 5fc290e832
5 changed files with 369 additions and 44 deletions
--- a/extract.R
+++ b/extract.R
@@ -0,0 +1,111 @@
+#R script to upload the existing spreadsheets and homologise them
+library(magrittr)
+fList <- list.files("data", pattern="*.xlsx")
+
+#Objective to create data tables with
+linkCheck <- function(nodeType, nodeString, nodeStringCheck) {
+  nodeString <- stringr::str_replace_all(nodeString, "\\.", " ")
+  res <- sapply(nodeString, match, nodeStringCheck$Nodes) %>% is.na() %>% which()
+  if (length(res)>0) print(paste("Clean up error found in", nodeType,  "mapping at", names(res)))
+}
+
+getNodeVals <- function(nodeStr) {
+  params <- stringr::str_split(nodeStr, ",") %>% unlist() %>% trimws()
+  paramVals <- stringr::str_split(params, "=")
+  vals <- c()
+  lapply(paramVals, function(l) {
+    val <- l[2]
+    names(val) <- l[1]
+    vals <<- c(vals, val)
+  })
+  vals
+}
+
+#We want to build a node table and an impact table.
+#Colnames of the node table will be
+#Hab,  Node Type, Node, Node Layer, Growth,  ....
+
+#The edges table will be
+#Hab, In Node, Out Node, Params, ....
+
+
+sheetNames <- c("TestScenario", "Map_P_BA", "Map_BA_OP", "Map_OP_ES", "Legend")
+
+cleanNames <- function(namVec) {
+  stringr::str_replace_all(namVec, "\\.", " ") %>% trimws() %>% tolower()
+}
+
+nodeTable <- tibble::tibble()
+
+for (wbIdx in 1:length(fList)) {
+  wb <- openxlsx::loadWorkbook(paste0("data/", fList[wbIdx]))
+  hab <- stringr::str_split(fList[wbIdx], "\\.")[[1]][1]
+  #get pressure names
+
+  #Drop the time column no use at all....
+  sheet <- openxlsx::readWorkbook(wb, sheet=sheetNames[1])[ ,-1]
+  pressures <- cleanNames(colnames(sheet))
+  pressure_nodes <- sheet[1,]
+
+
+  sheet <- openxlsx::readWorkbook(wb, sheet=sheetNames[2])[ ,-1]
+  pressure_check <- na.omit(sheet[,1:2])
+  sheet2 <- na.omit(sheet[, -c(1,2)])
+  ba <- cleanNames(colnames(sheet2))
+  ba_nodes <- sheet2[1,]
+  pressImpact <- sheet2[-1,]
+
+  #linkCheck("pressures", pressures, pressure_check)
+
+
+  sheet <- openxlsx::readWorkbook(wb, sheet=sheetNames[3])[ ,-1]
+  ba_check <- na.omit(sheet[,1:2])
+  sheet2 <- na.omit(sheet[, -c(1,2)])
+  op <- cleanNames(colnames(sheet2))
+  op_nodes <- sheet2[1,]
+  baImpact <-  sheet2[-1,]
+
+  #linkCheck("bioassemblages", ba, ba_check)
+
+  sheet <- openxlsx::readWorkbook(wb, sheet=sheetNames[4])[ ,-1]
+  op_check <- na.omit(sheet[,1:2])
+  sheet2 <- na.omit(sheet[, -c(1,2)])
+  es <- cleanNames(colnames(sheet2))
+  es_nodes <- sheet2[1,]
+  opImpact <-  sheet2[-1,]
+
+  #linkCheck("outputprocesses", op, op_check)
+
+  legend <- openxlsx::readWorkbook(wb, sheet=sheetNames[5])
+
+  nodeType <- c(
+    rep("pressure", length(pressures)),
+    rep("bioassemblage", length(ba)),
+    rep("outputprocess", length(op)),
+    rep("ecosystemservice", length(es))
+  )
+
+
+
+  res <- t(sapply(es_nodes[1,], getNodeVals)) %>% as.data.frame()
+  names(res) <- cleanNames(names(res))
+  res <- res %>% mutate(nodeName=names(res))
+
+  nodeTable <- nodeTable %>% dplyr::bind_rows(
+    tibble::tibble(
+      hab=hab,
+      nodeType=nodeType,
+      res
+    )
+  )
+
+}
+
+mapNewNames <- function() {
+  newNameMap <- openxlsx::read.xlsx("MBA_MESO_Nodes.xlsx") %>%
+     dplyr::select(hab, nodeType, Suggestion, node, newname)
+  save(newNameMap, file="nameMap.RData")
+}
+
+
+