#####
#
# Convert SIMO simulated data to CSV files
# 2020-02-21
#
#####

# rm(list = ls())  # remove all files in memory if needed


library(dplyr)


### Set the working path to this R-project
path <- paste0(getwd())



### Define the input & output folders
# ------------------------------------

#   Input: contains all .db and will convert them to 
#   to .csv in 'output' folder
inputFolder  <- paste(path, "input_data", sep = "/")
outputFolder <- paste(path, "output",     sep = "/")




### The corresponding db files need to be stored in folder "input_data"
### The output .csv files will be stored in folder "output" 

# Names of the db can be defined flexible.
# !!! the ONLY rule is the name definition of certain simulations for set aside without deadwood extration:

# Examples: "without.db"     no climate change (CC)
#           "without_SA.db"  no CC and set aside without deadwood extraction (no other regimes!)
#           "CC45.db"        CC with RCP scenraio 4.5 
#           "CC45_SA.db"     RCP 4.5 and set aside without deadwood extraction (no other regimes!)

# Reason:   Databases containing only SA simulations require a special SQL querry for data extraction.
#           It is called/identified by "_SA" in the db name.




# -------------------
# Input db :
# -------------------


# List all .db in 'input'
db_input <- list.files(inputFolder,
                       pattern = ".db$")


#Remove the ending ".db" from the list to create a database name
db_names <- gsub(".db", "", db_input)



# -----------------------------------------
# Identify columns from UNIT table to export to .cvs
# -----------------------------------------
# different columns are stored in stratum or in comp_unit
# if more variables are required, need to update the SQL 
# created script 'structure_SIMO_rslDB.R'
# An overview on all available SIMO outcomes can be found under: params/Overview_outcomes_SIMO.xlsx

# !!! Column "branching_group" is required to merge the regime abbreviations (.../params/regimes.csv) !!!


source(paste(path, "MultiForest/columnsMF.R", sep = "/"))

# columns <-  paste0("id,
#                    year,
#                    branch,
#                    branching_group,
#                    Age,
#                    area,
#                    cash_flow,
#                    V_total_deadwood,
#                    BA,
#                    V,
#                    SC,
#                    N,
#                    THIN,
#                    PEAT,
#                    H_dom,
#                    D_gm,
#                    Harvested_V,
#                    Biomass,
#                    income_biomass,
#                    CARBON_STORAGE,
#                    MAIN_SP")

 

# --------------------
#   Process: QUERY
# --------------------

# Query each file in the folder depending on the management (only set aside or all regimes)
# Create UNIT table (stored in specific .db) which contains indicators over time and under management regimes


# !!! Only needed if DB is loaded for the first time (because this may take some time):     "first_load = TRUE"
# !!! Afterwards, it can be set to false                                                    "first_load = FALSE"
# !!! Funktionality may be used to extract new columns from huge DB


first_load = TRUE


####
if(first_load == TRUE){

  for (name in db_names) {
   
    # CHeck if the name contains "_SA" or "_inital", both were simulated without management and require certain query
    # if YES - run SA query
    # if NO  - run normal query
    
    
    if(grepl(pattern = '_SA|_initial', name)){
      
       # This needs specific SQL, since the database does not contain harvest information
       source(paste0(path, "/structure_SIMO_rslDB_SA.R"))
       
     } else {
       
       # Run the script with the SQL query for all management regimes besides SA
       source(paste0(path, "/structure_SIMO_rslDB_FBE.R")) 
       
     }
  }
  
}
 

# ---------------------
# Process: Export .csv
# ---------------------


# Create the final CSV for each database
# from UNIT table 

# Store all results in one giant datafram         all_in_one = TRUE 
# Create for each db one csv file                 all_in_one = FALSE
 


all_in_one = TRUE

source(paste(path, "loadDB.R", sep = "/"))