Implement changes as requested by JNCC

This commit is contained in:
2022-03-30 17:51:19 +01:00
parent eec5f07cfc
commit 5fc290e832
5 changed files with 369 additions and 44 deletions

111
extract.R Normal file
View File

@@ -0,0 +1,111 @@
#R script to upload the existing spreadsheets and homologise them
library(magrittr)
fList <- list.files("data", pattern="*.xlsx")
#Objective to create data tables with
linkCheck <- function(nodeType, nodeString, nodeStringCheck) {
nodeString <- stringr::str_replace_all(nodeString, "\\.", " ")
res <- sapply(nodeString, match, nodeStringCheck$Nodes) %>% is.na() %>% which()
if (length(res)>0) print(paste("Clean up error found in", nodeType, "mapping at", names(res)))
}
getNodeVals <- function(nodeStr) {
params <- stringr::str_split(nodeStr, ",") %>% unlist() %>% trimws()
paramVals <- stringr::str_split(params, "=")
vals <- c()
lapply(paramVals, function(l) {
val <- l[2]
names(val) <- l[1]
vals <<- c(vals, val)
})
vals
}
#We want to build a node table and an impact table.
#Colnames of the node table will be
#Hab, Node Type, Node, Node Layer, Growth, ....
#The edges table will be
#Hab, In Node, Out Node, Params, ....
sheetNames <- c("TestScenario", "Map_P_BA", "Map_BA_OP", "Map_OP_ES", "Legend")
cleanNames <- function(namVec) {
stringr::str_replace_all(namVec, "\\.", " ") %>% trimws() %>% tolower()
}
nodeTable <- tibble::tibble()
for (wbIdx in 1:length(fList)) {
wb <- openxlsx::loadWorkbook(paste0("data/", fList[wbIdx]))
hab <- stringr::str_split(fList[wbIdx], "\\.")[[1]][1]
#get pressure names
#Drop the time column no use at all....
sheet <- openxlsx::readWorkbook(wb, sheet=sheetNames[1])[ ,-1]
pressures <- cleanNames(colnames(sheet))
pressure_nodes <- sheet[1,]
sheet <- openxlsx::readWorkbook(wb, sheet=sheetNames[2])[ ,-1]
pressure_check <- na.omit(sheet[,1:2])
sheet2 <- na.omit(sheet[, -c(1,2)])
ba <- cleanNames(colnames(sheet2))
ba_nodes <- sheet2[1,]
pressImpact <- sheet2[-1,]
#linkCheck("pressures", pressures, pressure_check)
sheet <- openxlsx::readWorkbook(wb, sheet=sheetNames[3])[ ,-1]
ba_check <- na.omit(sheet[,1:2])
sheet2 <- na.omit(sheet[, -c(1,2)])
op <- cleanNames(colnames(sheet2))
op_nodes <- sheet2[1,]
baImpact <- sheet2[-1,]
#linkCheck("bioassemblages", ba, ba_check)
sheet <- openxlsx::readWorkbook(wb, sheet=sheetNames[4])[ ,-1]
op_check <- na.omit(sheet[,1:2])
sheet2 <- na.omit(sheet[, -c(1,2)])
es <- cleanNames(colnames(sheet2))
es_nodes <- sheet2[1,]
opImpact <- sheet2[-1,]
#linkCheck("outputprocesses", op, op_check)
legend <- openxlsx::readWorkbook(wb, sheet=sheetNames[5])
nodeType <- c(
rep("pressure", length(pressures)),
rep("bioassemblage", length(ba)),
rep("outputprocess", length(op)),
rep("ecosystemservice", length(es))
)
res <- t(sapply(es_nodes[1,], getNodeVals)) %>% as.data.frame()
names(res) <- cleanNames(names(res))
res <- res %>% mutate(nodeName=names(res))
nodeTable <- nodeTable %>% dplyr::bind_rows(
tibble::tibble(
hab=hab,
nodeType=nodeType,
res
)
)
}
mapNewNames <- function() {
newNameMap <- openxlsx::read.xlsx("MBA_MESO_Nodes.xlsx") %>%
dplyr::select(hab, nodeType, Suggestion, node, newname)
save(newNameMap, file="nameMap.RData")
}