MESO/extract.R

# R script to upload the existing spreadsheets and homologise them
modules::import(magrittr)

fList <- list.files("data", pattern = "*.xlsx")

# Objective to create data tables with
linkCheck <- function(nodeType, nodeString, nodeStringCheck) {
  nodeString <- stringr::str_replace_all(nodeString, "\\.", " ")
  res <- sapply(nodeString, match, nodeStringCheck$Nodes) %>%
    is.na() %>%
    which()
  if (length(res) > 0) print(paste("Clean up error found in", nodeType, "mapping at", names(res)))
}

getNodeVals <- function(nodeStr) {
  params <- stringr::str_split(nodeStr, ",") %>%
    unlist() %>%
    trimws()
  paramVals <- stringr::str_split(params, "=")
  vals <- c()
  lapply(paramVals, function(l) {
    val <- l[2]
    names(val) <- l[1]
    vals <<- c(vals, val)
  })
  vals
}

# We want to build a node table and an impact table.
# Colnames of the node table will be
# Hab,  Node Type, Node, Node Layer, Growth,  ....

# The edges table will be
# Hab, In Node, Out Node, Params, ....


sheetNames <- c("TestScenario", "Map_P_BA", "Map_BA_OP", "Map_OP_ES", "Legend")

cleanNames <- function(namVec) {
  stringr::str_replace_all(namVec, "\\.", " ") %>%
    trimws() %>%
    tolower()
}

nodeTable <- tibble::tibble()

for (wbIdx in 1:length(fList)) {
  wb <- openxlsx::loadWorkbook(paste0("data/", fList[wbIdx]))
  hab <- stringr::str_split(fList[wbIdx], "\\.")[[1]][1]


  # Drop the time column no use at all....
  sheet <- openxlsx::readWorkbook(wb, sheet = sheetNames[1])[, -1]
  pressures <- cleanNames(colnames(sheet))
  pressure_nodes <- sheet[1, ]


  sheet <- openxlsx::readWorkbook(wb, sheet = sheetNames[2])[, -1]
  pressure_check <- na.omit(sheet[, 1:2])
  sheet2 <- na.omit(sheet[, -c(1, 2)])
  ba <- cleanNames(colnames(sheet2))
  ba_nodes <- sheet2[1, ]
  pressImpact <- sheet2[-1, ]

  # linkCheck("pressures", pressures, pressure_check)


  sheet <- openxlsx::readWorkbook(wb, sheet = sheetNames[3])[, -1]
  ba_check <- na.omit(sheet[, 1:2])
  sheet2 <- na.omit(sheet[, -c(1, 2)])
  op <- cleanNames(colnames(sheet2))
  op_nodes <- sheet2[1, ]
  baImpact <- sheet2[-1, ]

  # linkCheck("bioassemblages", ba, ba_check)


  sheet <- openxlsx::readWorkbook(wb, sheet = sheetNames[4])[, -1]
  op_check <- na.omit(sheet[, 1:2])
  sheet2 <- na.omit(sheet[, -c(1, 2)])
  es <- cleanNames(colnames(sheet2))
  es_nodes <- sheet2[1, ]
  opImpact <- sheet2[-1, ]

  # linkCheck("outputprocesses", op, op_check)


  legend <- openxlsx::readWorkbook(wb, sheet = sheetNames[5])

  nodeType <- c(
    rep("pressure", length(pressures)),
    rep("bioassemblage", length(ba)),
    rep("outputprocess", length(op)),
    rep("ecosystemservice", length(es))
  )

  res <- t(sapply(es_nodes[1, ], getNodeVals)) %>% as.data.frame()
  names(res) <- cleanNames(names(res))
  res <- res %>% mutate(nodeName = names(res))

  nodeTable <- nodeTable %>% dplyr::bind_rows(
    tibble::tibble(
      hab = hab,
      nodeType = nodeType,
      res
    )
  )
}

mapNewNames <- function() {
  newNameMap <- openxlsx::read.xlsx("MBA_MESO_Nodes.xlsx") %>%
    dplyr::select(hab, nodeType, Suggestion, node, newname)
}