# R script to upload the existing spreadsheets and homologise them library(magrittr) fList <- list.files("data", pattern = "*.xlsx") # Objective to create data tables with linkCheck <- function(nodeType, nodeString, nodeStringCheck) { nodeString <- stringr::str_replace_all(nodeString, "\\.", " ") res <- sapply(nodeString, match, nodeStringCheck$Nodes) %>% is.na() %>% which() if (length(res) > 0) print(paste("Clean up error found in", nodeType, "mapping at", names(res))) } getNodeVals <- function(nodeStr) { params <- stringr::str_split(nodeStr, ",") %>% unlist() %>% trimws() paramVals <- stringr::str_split(params, "=") vals <- c() lapply(paramVals, function(l) { val <- l[2] names(val) <- l[1] vals <<- c(vals, val) }) vals } # We want to build a node table and an impact table. # Colnames of the node table will be # Hab, Node Type, Node, Node Layer, Growth, .... # The edges table will be # Hab, In Node, Out Node, Params, .... sheetNames <- c("TestScenario", "Map_P_BA", "Map_BA_OP", "Map_OP_ES", "Legend") cleanNames <- function(namVec) { stringr::str_replace_all(namVec, "\\.", " ") %>% trimws() %>% tolower() } nodeTable <- tibble::tibble() for (wbIdx in 1:length(fList)) { wb <- openxlsx::loadWorkbook(paste0("data/", fList[wbIdx])) hab <- stringr::str_split(fList[wbIdx], "\\.")[[1]][1] # get pressure names # Drop the time column no use at all.... sheet <- openxlsx::readWorkbook(wb, sheet = sheetNames[1])[, -1] pressures <- cleanNames(colnames(sheet)) pressure_nodes <- sheet[1, ] sheet <- openxlsx::readWorkbook(wb, sheet = sheetNames[2])[, -1] pressure_check <- na.omit(sheet[, 1:2]) sheet2 <- na.omit(sheet[, -c(1, 2)]) ba <- cleanNames(colnames(sheet2)) ba_nodes <- sheet2[1, ] pressImpact <- sheet2[-1, ] # linkCheck("pressures", pressures, pressure_check) sheet <- openxlsx::readWorkbook(wb, sheet = sheetNames[3])[, -1] ba_check <- na.omit(sheet[, 1:2]) sheet2 <- na.omit(sheet[, -c(1, 2)]) op <- cleanNames(colnames(sheet2)) op_nodes <- sheet2[1, ] baImpact <- sheet2[-1, ] # linkCheck("bioassemblages", ba, ba_check) sheet <- openxlsx::readWorkbook(wb, sheet = sheetNames[4])[, -1] op_check <- na.omit(sheet[, 1:2]) sheet2 <- na.omit(sheet[, -c(1, 2)]) es <- cleanNames(colnames(sheet2)) es_nodes <- sheet2[1, ] opImpact <- sheet2[-1, ] # linkCheck("outputprocesses", op, op_check) legend <- openxlsx::readWorkbook(wb, sheet = sheetNames[5]) nodeType <- c( rep("pressure", length(pressures)), rep("bioassemblage", length(ba)), rep("outputprocess", length(op)), rep("ecosystemservice", length(es)) ) res <- t(sapply(es_nodes[1, ], getNodeVals)) %>% as.data.frame() names(res) <- cleanNames(names(res)) res <- res %>% mutate(nodeName = names(res)) nodeTable <- nodeTable %>% dplyr::bind_rows( tibble::tibble( hab = hab, nodeType = nodeType, res ) ) } mapNewNames <- function() { newNameMap <- openxlsx::read.xlsx("MBA_MESO_Nodes.xlsx") %>% dplyr::select(hab, nodeType, Suggestion, node, newname) save(newNameMap, file = "nameMap.RData") }