Skip to content
Snippets Groups Projects
Commit c1fe5c0d authored by clblatte's avatar clblatte
Browse files

scripts to validate optimzation data

parent ddce4978
No related branches found
No related tags found
No related merge requests found
---
title: "check_opt_data"
author: "CB"
date: "7/28/2020"
output: html_document
---
# RCP0 whole Finland - version 7
### (rslt_RCP0_FIN_V7.csv)
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
### Running on cPouta or WIN local
on_cPouta <- TRUE
### Set path to input files
if(on_cPouta == TRUE) {
# for pouta
datapath <- "/media/volume/outp_rcp0/"
path <- "/home/ubuntu/workspace/MultiForest2/"
# path <- paste0(getwd(),"/")
} else {
# for Windows
datapath <- "C:/MyTemp/r_SIMO_output/output/MF_FIN/RCP0_test/"
}
### load libraries
library(dplyr)
library(tidyr)
library(ggplot2)
library(data.table)
library(gridExtra)
library(egg)
library(kableExtra)
```
```{r , echo=FALSE}
poutaxy <- read.csv(paste0(datapath, "rslt_RCP0_FIN_V7_XY.csv"), sep = ";", header = TRUE, stringsAsFactors = FALSE)
pouta0 <- read.csv(paste0(datapath, "rslt_RCP0_FIN_V7.csv"), sep = ";", header = TRUE, stringsAsFactors = FALSE)
#poutaxy <- read.csv(paste0(datapath, "rslt_test_XY.csv"), sep = ";", header = TRUE, stringsAsFactors = FALSE)
#pouta0 <- read.csv(paste0(datapath, "rslt_test.csv"), sep = ";", header = TRUE, stringsAsFactors = FALSE)
pouta <- pouta0 %>%
left_join(poutaxy[,c("id", "year", "regime", "standid", "CARBON_STORAGE")], by = c("id", "year", "regime"))
```
## How many stands?
```{r, echo=TRUE}
length(unique(pouta$id))
```
## Simulated regimes
```{r, echo=TRUE}
unique(pouta$regime)
```
## Quick summary of the data
```{r, echo=TRUE}
summary(pouta)
```
# Check for outliers of certain indicators
Based on summary, the following are looked at due to high max-min values: </br>
- V </br>
- i_Vm3 </br>
- Harvested_V (not under bark volumes and Biomass) </br>
- ALL_MARKETED_MUSHROOMS </br>
- V_total_deadwood </br>
- N_where_D_gt_40 </br>
- prc_V_deciduous </br>
- CARBON SINK </br>
- Recreation </br>
- Scenic </br>
## Indicator: V
```{r , echo=TRUE}
boxplot(pouta$V, main="Indicator: V")
V_out <- pouta %>% filter(V > 1500) %>% # Standing volume above xx m3/ha
mutate(error = "V")
# how many stands effected
length(unique(V_out$id))
# which input stands
unique(V_out$standid)
# which region
unique(V_out$region)
# which regime
unique(V_out$regime)
# overall 0.995 quantile
pouta %>% summarise(V = quantile(V, c(0.995)))
# replace the value that are above the threshold by regional and regime dependened quantile
pouta <- pouta %>%
group_by(region, regime) %>%
mutate(V = ifelse(V > 1500, quantile(V,0.995), V)) %>%
ungroup()
boxplot(pouta$V, main="Indicator after smoothing: V ")
```
## Indicator: i_Vm3
```{r , echo=TRUE}
boxplot(pouta$i_Vm3, main="Indicator: i_Vm3")
i_Vm3_out <- pouta %>% filter(i_Vm3 > 50) %>% # annual volume increment above xx m3/ha
mutate(error = "i_Vm3")
# how many stands effected
length(unique(i_Vm3_out$id))
# which input stands
unique(i_Vm3_out$standid)
# which region
unique(i_Vm3_out$region)
# which regime
unique(i_Vm3_out$regime)
# overall 0.995 quantile
pouta %>% summarise(i_Vm3 = quantile(i_Vm3, c(0.995)))
```
### What are the high values
```{r , echo=TRUE}
i_Vm3_high <- pouta %>%
select(c("id", "standid", "year", "regime", "i_Vm3", "V","Harvested_V")) %>%
filter(id %in% unique(i_Vm3_out$id)) %>%
filter(i_Vm3 > 50 )
# first 20 rows
i_Vm3_high[1:10,] %>%
knitr::kable() %>%
kable_styling()
```
```{r , echo=TRUE}
# replace the value that are above the threshold by regional and regime dependened quantile
pouta <- pouta %>%
group_by(region, regime) %>%
mutate(i_Vm3 = ifelse(i_Vm3 > 50, quantile(i_Vm3,0.995), i_Vm3)) %>%
ungroup()
boxplot(pouta$i_Vm3, main="Indicator after smoothing: i_Vm3")
```
## Indicator: Harvested_V
```{r , echo=TRUE}
boxplot(pouta[pouta$Harvested_V != 0, ]$Harvested_V, main="Harvested_V")
Harvested_V_out <- pouta %>% filter(Harvested_V > 1000) %>% # Harvested volume above xx m3/ha
mutate(error = "Harvested_V")
# how many stands effected
length(unique(Harvested_V_out$id))
# which input stands
unique(Harvested_V_out$standid)
# which region
unique(Harvested_V_out$region)
# which regime
unique(Harvested_V_out$regime)
# overall 0.995 quantile
pouta %>% summarise(Harvested_V = quantile(Harvested_V, c(0.995)))
# replace the value that are above the threshold by regional and regime dependened quantile
pouta <- pouta %>%
group_by(region, regime) %>%
mutate(Harvested_V = ifelse(Harvested_V > 1500, quantile(Harvested_V,0.995), Harvested_V)) %>%
ungroup()
boxplot(pouta[pouta$Harvested_V != 0, ]$Harvested_V, main="Indicator after smoothing: Harvested_V ")
```
## Indicator: ALL_MARKETED_MUSHROOMS
```{r , echo=TRUE}
boxplot(pouta$ALL_MARKETED_MUSHROOMS, main="ALL_MARKETED_MUSHROOMS" )
plot_mushroom <- pouta %>%
filter(ALL_MARKETED_MUSHROOMS < 10000) # filter out the extrem high one for plotting
boxplot(plot_mushroom$ALL_MARKETED_MUSHROOMS, main="ALL_MARKETED_MUSHROOMS without main outlier")
mushroom_out <- pouta %>% filter(ALL_MARKETED_MUSHROOMS > 700) %>% # Mushrooms above xx kg/ha
mutate(error = "ALL_MARKEDET_MUSHROOMS")
# how many stands effected
length(unique(mushroom_out$id))
# which input stands
unique(mushroom_out$standid)
# which region
unique(mushroom_out$region)
# which regime
unique(mushroom_out$regime)
# overall 0.995 quantile
pouta %>% summarise(ALL_MARKETED_MUSHROOMS = quantile(ALL_MARKETED_MUSHROOMS, c(0.995)))
# replace the value that are above the threshold by regional and regime dependened quantile
pouta <- pouta %>%
group_by(region, regime) %>%
mutate(ALL_MARKETED_MUSHROOMS = ifelse(ALL_MARKETED_MUSHROOMS > 700, quantile(ALL_MARKETED_MUSHROOMS,0.995), ALL_MARKETED_MUSHROOMS)) %>%
ungroup()
boxplot(pouta$ALL_MARKETED_MUSHROOMS, main="Indicator after smoothing: ALL_MARKETED_MUSHROOMS ")
```
## Indicator: V_total_deadwood
```{r , echo=TRUE}
boxplot(pouta$V_total_deadwood, main="V_total_deadwood")
deadwood_out <- pouta %>% filter(V_total_deadwood > 700) %>% # Volume of Deadwood above xx m3/ha
mutate(error = "V_total_deadwood")
# how many stands effected
length(unique(deadwood_out$id))
# which input stands
unique(deadwood_out$standid)
# which region
unique(deadwood_out$region)
# which regime
unique(deadwood_out$regime)
# overall 0.995 quantile
pouta %>% summarise(V_total_deadwood = quantile(V_total_deadwood, c(0.995)))
# replace the value that are above the threshold by regional and regime dependened quantile
pouta <- pouta %>%
group_by(region, regime) %>%
mutate(V_total_deadwood = ifelse(V_total_deadwood > 700, quantile(V_total_deadwood,0.995), V_total_deadwood)) %>%
ungroup()
boxplot(pouta$V_total_deadwood, main="Indicator after smoothing: V_total_deadwood ")
```
## Indicator: N_where_D_gt_40
```{r , echo=TRUE}
boxplot(pouta$N_where_D_gt_40, main="N_where_D_gt_40")
llt_out <- pouta %>% filter(N_where_D_gt_40 > 500) %>% # Number of big trees (> 40) above xx N/ha
mutate(error = "N_where_D_gt_40")
# how many stands effected
length(unique(llt_out$id))
# which input stands
unique(llt_out$standid)
# which region
unique(llt_out$region)
# which regime
unique(llt_out$regime)
# overall 0.995 quantile
pouta %>% summarise(N_where_D_gt_40 = quantile(N_where_D_gt_40, c(0.995)))
# replace the value that are above the threshold by regional and regime dependened quantile
pouta <- pouta %>%
group_by(region, regime) %>%
mutate(N_where_D_gt_40 = ifelse(N_where_D_gt_40 > 500, quantile(N_where_D_gt_40,0.995), N_where_D_gt_40)) %>%
ungroup()
boxplot(pouta$N_where_D_gt_40, main="Indicator after smoothing: N_where_D_gt_40 ")
```
## Indicator: prc_V_deciduous
```{r , echo=TRUE}
boxplot(pouta$prc_V_deciduous, main="prc_V_deciduous")
```
## Indicator: CARBON SINK
```{r , echo=TRUE}
plot_carbon <- pouta %>%
filter(CARBON_SINK > -5000000)
boxplot(plot_carbon$CARBON_SINK, main="CARBON_SINK without the huge outlier")
carbon_sink_out <- pouta %>% filter(CARBON_SINK < -1000000 | CARBON_SINK > 1000000) %>% # source/sink of more than 1000 t CO2/ha
mutate(error = "CARBON_SINK")
# how many stands
length(unique(carbon_sink_out$id))
# which input stands
unique(carbon_sink_out$standid)
# which regime
unique(carbon_sink_out$regime)
# which region
unique(carbon_sink_out$region)
```
### What are the high values
```{r , echo=TRUE}
carbononly <- pouta %>%
select(c("id", "standid", "year", "regime", "V", "Harvested_V" ,"CARBON_STORAGE", "CARBON_SINK")) %>%
filter(id %in% unique(carbon_sink_out$id))
carbononly %>%
filter(CARBON_SINK < -1000000 | CARBON_SINK > 1000000) %>%
knitr::kable() %>%
kable_styling()
```
### Plot some example stands
```{r , echo=TRUE}
carbonplot <- carbononly %>%
filter(regime %in% c("SA","initial_state") & id %in% 99037593 |
regime %in% c("BAU_30","initial_state") & id %in% 99034584 |
regime %in% c("CCF_2", "CCF_1", "CCF_3", "initial_state") & id %in% 99032541) %>%
mutate(deltaCarbonstorage = round(CARBON_SINK / 3.67, digits = 2))
# for scenarios BAU_30 and SA
p1 = ggplot(carbonplot[carbonplot$regime %in% c("BAU_30", "SA"),], aes(year, V)) +
geom_line() +
facet_grid(.~regime) +
theme(axis.text.x = element_text(angle = 90))
p2 = ggplot(carbonplot[carbonplot$regime %in% c("BAU_30", "SA"),], aes(year, CARBON_STORAGE)) +
geom_line() +
facet_grid(.~regime) +
theme(axis.text.x = element_text(angle = 90)) +
scale_y_continuous(labels = function(x) format(x, scientific = FALSE))
p3 = ggplot(carbonplot[carbonplot$regime %in% c("BAU_30", "SA"),], aes(year, deltaCarbonstorage)) +
geom_line() +
facet_grid(.~regime) +
theme(axis.text.x = element_text(angle = 90)) +
scale_y_continuous(labels = function(x) format(x, scientific = FALSE))
ggarrange(p1, p2, p3,
top = "BAU_30 99034584 standid: 9932854 - SA 99037593 standid: 10628286")
```
!!! HIGH Carbon sinks due to extremly high simulated Volumes !!!
```{r , echo=TRUE}
# for scenarios CC_2, _1 and _3
p4 = ggplot(carbonplot[carbonplot$regime %in% c("CCF_2", "CCF_1", "CCF_3"),], aes(year, V)) +
geom_line() +
facet_grid(.~regime) +
theme(axis.text.x = element_text(angle = 90))
p5 = ggplot(carbonplot[carbonplot$regime %in% c("CCF_2", "CCF_1", "CCF_3"),], aes(year, CARBON_STORAGE)) +
geom_line() +
facet_grid(.~regime) +
theme(axis.text.x = element_text(angle = 90)) +
scale_y_continuous(labels = function(x) format(x, scientific = FALSE))
p6 = ggplot(carbonplot[carbonplot$regime %in% c("CCF_2", "CCF_1", "CCF_3"),], aes(year, deltaCarbonstorage)) +
geom_line() +
facet_grid(.~regime) +
theme(axis.text.x = element_text(angle = 90)) +
scale_y_continuous(labels = function(x) format(x, scientific = FALSE))
ggarrange(p4, p5, p6,
top = "99032541 - standid 23271031")
```
!!! HIGH Carbon sinks due to extremly high simulated Volumes !!!
### Replace the high values by qunatile
```{r , echo=TRUE}
# overall 0.995 quantile
pouta %>% summarise(CARBON_SINK = quantile(CARBON_SINK, c(0.995,0.005), na.rm = TRUE))
# replace the value that are above the threshold by regional and regime dependened quantile
pouta <- pouta %>%
group_by(region, regime) %>%
mutate(CARBON_SINK = ifelse(CARBON_SINK > quantile(CARBON_SINK,0.995,na.rm = TRUE), quantile(CARBON_SINK,0.995,na.rm = TRUE), CARBON_SINK)) %>%
mutate(CARBON_SINK = ifelse(CARBON_SINK < quantile(CARBON_SINK,0.005,na.rm = TRUE), quantile(CARBON_SINK,0.005,na.rm = TRUE), CARBON_SINK)) %>%
ungroup()
boxplot(pouta$CARBON_SINK, main="Indicator after smoothing: CARBON_SINK ")
```
## Indicator: Recreation
```{r , echo=TRUE}
boxplot(pouta$Recreation, main="Recreation")
recreation_out <- pouta %>% filter(Recreation < 0) %>% # values below 0
mutate(error = "Recreation")
# how many below 0
length(unique(recreation_out$id))
recreation_out_q <- pouta %>% filter(Recreation < quantile(Recreation,0.005,na.rm = TRUE))
# how many stands
length(unique(recreation_out_q$id))
# which input stands
unique(recreation_out_q$standid)
# which regime
unique(recreation_out_q$regime)
```
## Indicator: Scenic
```{r , echo=TRUE}
boxplot(pouta$Scenic, main="Scenic")
scenic_out <- pouta %>% filter(Scenic < 0) %>% # values below 0
mutate(error = "Scenic")
# how many below 0
length(unique(scenic_out$id))
scenic_out_q <- pouta %>% filter(Scenic < quantile(Scenic,0.005,na.rm = TRUE))
# how many stands
length(unique(scenic_out_q$id))
# which input stands
unique(scenic_out_q$standid)
# which regime
unique(scenic_out_q$regime)
```
## Combine all excluded cases (based on the thresholds)
### How many stands are effected by thresholds? (Recreation and Scenic not included!)
```{r , echo=TRUE}
out_all <- rbind(V_out, i_Vm3_out, Harvested_V_out, mushroom_out, deadwood_out, llt_out, carbon_sink_out
#,scenic_out, recreation_out
)
length(unique(out_all$id))
```
### How many stands have multiple times be filtered out?
```{r , echo=TRUE}
length(unique(V_out$id)) +
length(unique(i_Vm3_out$id)) +
length(unique(Harvested_V_out$id)) +
length(unique(mushroom_out$id)) +
length(unique(deadwood_out$id)) +
length(unique(llt_out$id))+
length(unique(carbon_sink_out$id)) - length(unique(out_all$id))
```
### Which stands caused unlogical values for several indicators?
```{r , echo=TRUE}
out_all_spread <- out_all %>%
group_by(standid, error) %>%
count(error) %>%
spread(error, n)
out_all_spread[1:10,] %>%
knitr::kable() %>%
kable_styling()
```
### Example stand that caused errors for 5 indicators
### Which regimes?
```{r , echo=TRUE}
# 5 indicators are: ALL_MARKETED_MUSHROOMS, Harvested_V, i_Vm3, N_where_D_gt_40, V
unique(out_all[out_all$standid == 14505106,]$regime)
# How does the input look like?
# 1;14505106;498288.75825802;6917281.65776072;1.228;2018-08-05;1;2;3;60;9;T1;1984;-1;1;1;1;1;0;0;1;1;4
# 2;2234390572;1;2;8;0;1800;0;1.14;;;;;;;;;;;;;;
# 2;2234390573;1;29;5;0;6240;0;1.3;;;;;;;;;;;;;;
```
### Example stand that caused errors for 4 indicators
### Which regimes?
```{r , echo=TRUE}
# 4 indicators are: CARBON_SINK, Harvested_V, i_Vm3, V
unique(out_all[out_all$standid == 23271031,]$regime)
# How does the input look like?
# 1;23271031;549478.4914827;7027894.3495342;0.104;2019-09-01;1;1;2;20;1;S0;-1;-1;1;1;1;1;0;0;1;1;4
# 2;2282172490;3;4;92;5.11;69;30.73;21.12;;;;;;;;;;;;;;
# 2;2282172491;3;5;92;2.06;18;38.8;21.12;;;;;;;;;;;;;;
# 2;2282172492;2;2;18;0.48;391;3.94;5.6;;;;;;;;;;;;;;
# 2;2282172493;2;9;12;4.57;2249;5.09;3.64;;;;;;;;;;;;;;
```
### ALL_MARKETED_MUSHROOMS occured often in combination with i_Vm3
### Under which regimes?
```{r , echo=TRUE}
unique(out_all[out_all$error %in% c("i_Vm3", "ALL_MARKEDET_MUSHROOMS"),]$regime)
```
No clear systematic !
# Plot the individual indicators over the time (data preparation)
```{r , echo=TRUE}
# # Use a subsample or all data
# standid <- unique(pouta$id)
# standidsample <- sample(standid, 100)
# rslt.final <- pouta %>% filter(id %in% standidsample)
# Use all data
# remove the ID that have a match in out_all
rslt.final <- pouta # %>%
# filter(id != unique(out_all$id)) # does not work!?!?
# anti_join(out_all, by = "id")
length(unique(rslt.final$id))
# -------------------
# percentage of broadleave contains still NA ! Remove it.
# -------------------
rslt.final$prc_V_deciduous[is.na(rslt.final$prc_V_deciduous)] <- 0
# -------------------
# Add the geographical region (South, Central, North, Lappland)
# -------------------
if(on_cPouta == TRUE) {
# for pouta
province <- read.csv(paste0(path,"params/provinceInfo/provinces2.csv"), sep = ";", header = TRUE, stringsAsFactors = FALSE)
} else {
# for Windows
province <- read.csv("C:/MyTemp/r_SIMO_output/params/provinceInfo/provinces2.csv", sep = ";", header = TRUE, stringsAsFactors = FALSE)
}
province <- province %>% rename(region = code_metsa)
rslt.final <- rslt.final %>%
left_join(province[,c("region", "NFIGrid")], by= "region")
rslt.final$NFIGrid <- as.factor(rslt.final$NFIGrid)
rslt.final$NFIGrid <- factor(rslt.final$NFIGrid, levels = c("South", "Central", "North", "Lapland"))
# -------------------
# Scale 5 year periodic values to yearly values (5 year simulation time steps)
# -------------------
rslt.final <- rslt.final %>%
mutate(Harvested_V = Harvested_V / 5,
Harvested_V_log_under_bark = Harvested_V_log_under_bark / 5,
Harvested_V_pulp_under_bark = Harvested_V_pulp_under_bark / 5,
Harvested_V_under_bark = Harvested_V_under_bark / 5,
Biomass = Biomass / 5,
# divide additionally by 1000 -> policy talks about million "tonnes" of CO2
# Currently, it is still kg CO2
CARBON_SINK = CARBON_SINK / 5 / 1000 )
```
## Function: plot MEAN values by regime and geographical region
```{r , echo=TRUE}
# column <- "V"
plotfkt <- function(column){
if(column %in% c("CARBON_SINK","i_Vm3")){
# Wihtout initial Values in 2016 as not available
meanovertime <- rslt.final[rslt.final$regime != "initial_state",] %>%
group_by(year, regime, NFIGrid) %>%
summarise(mean = mean(get(column))) %>%
ggplot(aes(year, mean)) +
geom_line( aes(color = regime)) +
theme(axis.text.x = element_text(angle = 90),
legend.position="bottom") +
scale_y_continuous(labels = function(x) format(x, scientific = FALSE)) +
scale_x_continuous(breaks=c(2016,2036,2056,2076,2096,2116)) +
facet_grid(.~NFIGrid) +
labs(title = paste("Indicator",column, "over simulation time"),
subtitle = "(initial state 2016 not available)")
} else {
# start timely developtment from initial value
# get values for 2016
mean2016 <- rslt.final[rslt.final$year %in% 2016,] %>%
group_by(year, regime, NFIGrid) %>%
summarise(mean2016 = mean(get(column)))
# get all regimes and extend by 2016 values
extendregimesfor2016 <- rslt.final[rslt.final$year %in% 2021,] %>%
group_by(regime, NFIGrid) %>%
summarise(mean2021 = mean(get(column))) %>%
left_join(mean2016[,c("NFIGrid","mean2016")], by = "NFIGrid") %>%
mutate(year = 2016) %>%
select(-mean2021) %>%
rename(mean = mean2016) %>%
select(year, regime, NFIGrid, mean)
meanovertime <- rslt.final[rslt.final$regime != "initial_state",] %>%
group_by(year, regime, NFIGrid) %>%
summarise(mean = mean(get(column))) %>%
rbind(extendregimesfor2016) %>%
ggplot(aes(year, mean)) +
geom_line( aes(color = regime)) +
geom_hline(data = mean2016, aes(yintercept = mean2016), linetype="dashed", color = "red") +
theme(axis.text.x = element_text(angle = 90),
legend.position="bottom") +
scale_y_continuous(labels = function(x) format(x, scientific = FALSE)) +
scale_x_continuous(breaks=c(2016,2036,2056,2076,2096,2116)) +
facet_grid(.~NFIGrid) +
labs(title = paste("Indicator",column, "over simulation time"),
subtitle = "(red dashed line: initial state 2016)")
}
plot(meanovertime)
# ggsave(plot = meanovertime, paste0(path,"/MultiForest/checkdata/graphics/plot_",column,".tiff"), width=8, height=6)
}
```
## Indicator: V
```{r , message=FALSE}
plotfkt("V")
```
## Indicator: i_Vm3
```{r , message=FALSE}
plotfkt("i_Vm3")
```
## Indicator: DEVEL_CLASS
```{r , message=FALSE}
plotfkt("DEVEL_CLASS")
```
## Indicator: ALL_MARKETED_MUSHROOMS
```{r , message=FALSE}
plotfkt("ALL_MARKETED_MUSHROOMS")
```
## Indicator: BILBERRY
```{r , message=FALSE}
plotfkt("BILBERRY")
```
## Indicator: COWBERRY
```{r , message=FALSE}
plotfkt("COWBERRY")
```
## Indicator: HSI_MOOSE
```{r , message=FALSE}
plotfkt("HSI_MOOSE")
```
## Indicator: CAPERCAILLIE
### according Mönkkönen et al. (2014), Indice also used to calculate the HSI of 6 umbrella species
```{r , message=FALSE}
plotfkt("CAPERCAILLIE")
```
## Indicator: HSI_CAPERCAILLIE
### according Väisänen, R. Metson (2008) (FIN only)
```{r , message=FALSE}
plotfkt("HSI_CAPERCAILLIE")
```
## Indicator: HAZEL_GROUSE
```{r , message=FALSE}
plotfkt("HAZEL_GROUSE")
```
## Indicator: COMBINED_HSI
```{r , message=FALSE}
plotfkt("COMBINED_HSI")
```
## Indicator: weighted_mean_HSI27
```{r , message=FALSE}
plotfkt("weighted_mean_HSI27")
```
## Indicator: V_total_deadwood
```{r , message=FALSE}
plotfkt("V_total_deadwood")
```
## Indicator: N_where_D_gt_40
```{r , message=FALSE}
plotfkt("N_where_D_gt_40")
```
## Indicator: prc_V_deciduous
```{r , message=FALSE}
plotfkt("prc_V_deciduous")
```
## Indicator: clearcut
```{r , message=FALSE}
plotfkt("clearcut")
```
## Indicator: CARBON_SINK
```{r , message=FALSE}
plotfkt("CARBON_SINK")
```
## Indicator: Recreation
```{r , message=FALSE}
plotfkt("Recreation")
```
## Indicator: Scenic
```{r , message=FALSE}
plotfkt("Scenic")
```
## Funtion: Plot CUMSUM values by regime and geographical region
## For harvested values
```{r , echo=TRUE}
# column <- "Harvested_V"
plotfkt_harvest <- function(column){
# get values for 2016 -> all zero
mean2016 <- rslt.final[rslt.final$year %in% 2016,] %>%
group_by(year, regime, NFIGrid) %>%
summarise(mean2016 = mean(get(column)))
# get all regimes and extend by 2016 values
extendregimesfor2016 <- rslt.final[rslt.final$year %in% 2021,] %>%
group_by(regime, NFIGrid) %>%
summarise(mean2021 = mean(get(column))) %>%
left_join(mean2016[,c("NFIGrid","mean2016")], by = "NFIGrid") %>%
mutate(year = 2016) %>%
select(-mean2021) %>%
rename(mean = mean2016) %>%
select(year, regime, NFIGrid, mean)
meanovertime <- rslt.final[rslt.final$regime != "initial_state",] %>%
group_by(year, regime, NFIGrid) %>%
summarise(mean = mean(get(column))) %>%
rbind(extendregimesfor2016) %>%
# order by year !!
arrange(year) %>%
group_by(regime, NFIGrid) %>%
mutate(cumsum =cumsum(mean)) %>%
ggplot(aes(year, cumsum)) +
geom_line( aes(color = regime)) +
theme(axis.text.x = element_text(angle = 90),
legend.position="bottom") +
scale_y_continuous(labels = function(x) format(x, scientific = FALSE)) +
scale_x_continuous(breaks=c(2016,2036,2056,2076,2096,2116)) +
facet_grid(.~NFIGrid) +
labs(title = paste("Indicator",column, ", cumulative sum of mean values per year"),
subtitle = "(red dashed line: initial state 2016)")
plot(meanovertime)
# ggsave(plot = meanovertime, paste0(path,"/MultiForest/checkdata/graphics/plot_cumsum_of_mean_",column,".tiff"), width=8, height=6)
}
```
## Indicator: Harvested_V
```{r , message=FALSE}
plotfkt_harvest("Harvested_V")
```
## Indicator: Harvested_V_under_bark
```{r , message=FALSE}
plotfkt_harvest("Harvested_V_under_bark")
```
## Indicator: Biomass
```{r , message=FALSE}
plotfkt_harvest("Biomass")
```
## Indicator: Harvested_V_log_under_bark
```{r , message=FALSE}
plotfkt_harvest("Harvested_V_log_under_bark")
```
## Indicator: Harvested_V_pulp_under_bark
```{r , message=FALSE}
plotfkt_harvest("Harvested_V_pulp_under_bark")
```
## How does it look for CARBON_SINK ???
```{r , message=FALSE}
plotfkt_harvest("CARBON_SINK")
```
# Save as data set for the Optimization framework
```{r , echo=TRUE}
# For the Optimization (Jupyter Notebook) the data can be further ZIP compressed.
# csv for optimization
write.table(rslt.final, paste0(datapath, "rslt_RCP0_FIN_V7_smooth3.csv" ),
sep = ";",
row.names = F,
col.names = TRUE)
```
---
title: "check_opt_data"
author: "CB"
date: "7/28/2020"
output: html_document
---
# RCP0 whole Finland - version 7
### (rslt_RCP0_FIN_V7.csv)
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
### Running on cPouta or WIN local
on_cPouta <- TRUE
### Set path to input files
if(on_cPouta == TRUE) {
# for pouta
datapath <- "/media/volume/outp_rcp0/"
path <- "/home/ubuntu/workspace/MultiForest2/"
# path <- paste0(getwd(),"/")
} else {
# for Windows
datapath <- "C:/MyTemp/r_SIMO_output/output/MF_FIN/RCP0_test/"
}
### load libraries
library(dplyr)
library(tidyr)
library(ggplot2)
library(data.table)
library(gridExtra)
library(egg)
library(kableExtra)
```
```{r , echo=FALSE}
poutaxy <- read.csv(paste0(datapath, "rslt_RCP0_FIN_V7_XY.csv"), sep = ";", header = TRUE, stringsAsFactors = FALSE)
pouta0 <- read.csv(paste0(datapath, "rslt_RCP0_FIN_V7.csv"), sep = ";", header = TRUE, stringsAsFactors = FALSE)
# poutaxy <- read.csv(paste0(datapath, "rslt_test_XY.csv"), sep = ";", header = TRUE, stringsAsFactors = FALSE)
# pouta0 <- read.csv(paste0(datapath, "rslt_test.csv"), sep = ";", header = TRUE, stringsAsFactors = FALSE)
pouta <- pouta0 %>%
dplyr::left_join(poutaxy[,c("id", "year", "regime", "standid", "CARBON_STORAGE")], by = c("id", "year", "regime"))
```
## How many stands?
```{r, echo=TRUE}
length(unique(pouta$id))
```
## Simulated regimes
```{r, echo=TRUE}
unique(pouta$regime)
```
# Check for outliers of certain indicators
Based on summary, the following are looked at due to high max-min values: </br>
- V </br>
- i_Vm3 </br>
- CARBON SINK </br>
## Indicator: V
```{r , echo=TRUE}
boxplot(pouta$V, main="Indicator: V")
V_out <- pouta %>% dplyr::filter(V > 1500) %>% # Standing volume above xx m3/ha
dplyr::mutate(error = "V")
# how many stands effected
length(unique(V_out$standid))
# ---------------
# which stands
# ---------------
unique(V_out$standid)
# which region
unique(V_out$region)
# which regime
unique(V_out$regime)
```
## Indicator: i_Vm3
```{r , echo=TRUE}
boxplot(pouta$i_Vm3, main="Indicator: i_Vm3")
i_Vm3_out <- pouta %>% filter(i_Vm3 > 50) %>% # annual volume increment above xx m3/ha
mutate(error = "i_Vm3")
# how many stands effected
length(unique(i_Vm3_out$id))
# ---------------
# which stands
# ---------------
unique(i_Vm3_out$standid)
# which region
unique(i_Vm3_out$region)
# which regime
unique(i_Vm3_out$regime)
```
### What are the high values
```{r , echo=TRUE}
i_Vm3_high <- pouta %>%
select(c("id", "standid", "year", "regime", "i_Vm3", "V","Harvested_V")) %>%
filter(id %in% unique(i_Vm3_out$id)) %>%
filter(i_Vm3 > 50 )
# first 20 rows
i_Vm3_high[1:20,] %>%
knitr::kable(caption = "FIRST 20 ROWS ONLY !!") %>%
kable_styling()
```
```{r , echo=TRUE}
## Indicator: CARBON SINK
```{r , echo=TRUE}
plot_carbon <- pouta %>%
filter(CARBON_SINK > -5000000)
boxplot(plot_carbon$CARBON_SINK, main="CARBON_SINK without the huge outlier")
carbon_sink_out <- pouta %>% filter(CARBON_SINK < -1000000 | CARBON_SINK > 1000000) %>% # source/sink of more than 1000 t CO2/ha
mutate(error = "CARBON_SINK")
# how many stands
length(unique(carbon_sink_out$id))
# ---------------
# which stands
# ---------------
unique(carbon_sink_out$standid)
# which regime
unique(carbon_sink_out$regime)
# which region
unique(carbon_sink_out$region)
```
### What are the high values
```{r , echo=TRUE}
carbononly <- pouta %>%
select(c("id", "standid", "year", "regime", "V", "Harvested_V" ,"CARBON_STORAGE", "CARBON_SINK")) %>%
filter(id %in% unique(carbon_sink_out$id))
carbononly %>%
filter(CARBON_SINK < -1000000 | CARBON_SINK > 1000000) %>%
knitr::kable(caption = "ALL ROWS !!") %>%
kable_styling()
```
### Plot some example stands
```{r , echo=TRUE}
carbonplot <- carbononly %>%
filter(regime %in% c("SA","initial_state") & id %in% 99037593 |
regime %in% c("BAU_30","initial_state") & id %in% 99034584 |
regime %in% c("CCF_2", "CCF_1", "CCF_3", "initial_state") & id %in% 99032541) %>%
mutate(deltaCarbonstorage = round(CARBON_SINK / 3.67, digits = 2))
# for scenarios BAU_30 and SA
p1 = ggplot(carbonplot[carbonplot$regime %in% c("BAU_30", "SA"),], aes(year, V)) +
geom_line() +
facet_grid(.~regime) +
theme(axis.text.x = element_text(angle = 90))
p2 = ggplot(carbonplot[carbonplot$regime %in% c("BAU_30", "SA"),], aes(year, CARBON_STORAGE)) +
geom_line() +
facet_grid(.~regime) +
theme(axis.text.x = element_text(angle = 90)) +
scale_y_continuous(labels = function(x) format(x, scientific = FALSE))
p3 = ggplot(carbonplot[carbonplot$regime %in% c("BAU_30", "SA"),], aes(year, deltaCarbonstorage)) +
geom_line() +
facet_grid(.~regime) +
theme(axis.text.x = element_text(angle = 90)) +
scale_y_continuous(labels = function(x) format(x, scientific = FALSE))
ggarrange(p1, p2, p3,
top = "BAU_30 99034584 standid: 9932854 - SA 99037593 standid: 10628286")
```
!!! HIGH Carbon sinks due to extremly high simulated Volumes !!!
```{r , echo=TRUE}
# for scenarios CC_2, _1 and _3
p4 = ggplot(carbonplot[carbonplot$regime %in% c("CCF_2", "CCF_1", "CCF_3"),], aes(year, V)) +
geom_line() +
facet_grid(.~regime) +
theme(axis.text.x = element_text(angle = 90))
p5 = ggplot(carbonplot[carbonplot$regime %in% c("CCF_2", "CCF_1", "CCF_3"),], aes(year, CARBON_STORAGE)) +
geom_line() +
facet_grid(.~regime) +
theme(axis.text.x = element_text(angle = 90)) +
scale_y_continuous(labels = function(x) format(x, scientific = FALSE))
p6 = ggplot(carbonplot[carbonplot$regime %in% c("CCF_2", "CCF_1", "CCF_3"),], aes(year, deltaCarbonstorage)) +
geom_line() +
facet_grid(.~regime) +
theme(axis.text.x = element_text(angle = 90)) +
scale_y_continuous(labels = function(x) format(x, scientific = FALSE))
ggarrange(p4, p5, p6,
top = "99032541 - standid 23271031")
```
......@@ -123,7 +123,7 @@ length(unique(V_out_q$id))
boxplot(pouta$i_Vm3, main="Indicator: i_Vm3")
i_Vm3_out <- pouta %>% filter(i_Vm3 > 50) %>% # annual volume increment above xx m3/ha
i_Vm3_out <- pouta %>% filter(i_Vm3 > 100) %>% # annual volume increment above xx m3/ha
mutate(error = "i_Vm3")
length(unique(i_Vm3_out$id))
unique(i_Vm3_out$regime)
......@@ -143,7 +143,7 @@ plotharvest <- pouta %>% filter(Harvested_V != 0)
boxplot(plotharvest$Harvested_V, main="Harvested_V")
Harvested_V_out <- pouta %>% filter(Harvested_V > 1000) %>% # Harvested volume above xx m3/ha
Harvested_V_out <- pouta %>% filter(Harvested_V > 1500) %>% # Harvested volume above xx m3/ha
mutate(error = "Harvested_V")
length(unique(Harvested_V_out$id))
......@@ -161,7 +161,7 @@ length(unique(Harvested_V_out_q$id))
boxplot(pouta$ALL_MARKETED_MUSHROOMS, main="ALL_MARKETED_MUSHROOMS" )
mushroom_out <- pouta %>% filter(ALL_MARKETED_MUSHROOMS > 700) %>% # Mushrooms above xx kg/ha
mushroom_out <- pouta %>% filter(ALL_MARKETED_MUSHROOMS > 1000) %>% # Mushrooms above xx kg/ha
mutate(error = "ALL_MARKEDET_MUSHROOMS")
length(unique(mushroom_out$id))
......@@ -178,7 +178,7 @@ length(unique(mushroom_out_q$id))
boxplot(pouta$V_total_deadwood, main="V_total_deadwood")
deadwood_out <- pouta %>% filter(V_total_deadwood > 700) %>% # Volume of Deadwood above xx m3/ha
deadwood_out <- pouta %>% filter(V_total_deadwood > 1000) %>% # Volume of Deadwood above xx m3/ha
mutate(error = "V_total_deadwood")
length(unique(deadwood_out$id))
......@@ -222,7 +222,7 @@ boxplot(pouta$prc_V_deciduous, main="prc_V_deciduous")
boxplot(pouta$CARBON_SINK, main="CARBON_SINK")
carbon_sink_out <- pouta %>% filter(CARBON_SINK < -1000000 | CARBON_SINK > 1000000) %>% # source/sink of more than 1000 t CO2/ha
carbon_sink_out <- pouta %>% filter(CARBON_SINK < -1500000 | CARBON_SINK > 1500000) %>% # source/sink of more than 1000 t CO2/ha
mutate(error = "CARBON_SINK")
length(unique(carbon_sink_out$id))
unique(carbon_sink_out$regime)
......
......@@ -134,7 +134,7 @@ plot_i_Vm3 <- pouta %>%
boxplot(plot_i_Vm3$i_Vm3, main="i_Vm3 without main outlier")
i_Vm3_out <- pouta %>% filter(i_Vm3 > 50) %>% # annual volume increment above xx m3/ha
i_Vm3_out <- pouta %>% filter(i_Vm3 > 100) %>% # annual volume increment above xx m3/ha
mutate(error = "i_Vm3")
length(unique(i_Vm3_out$id))
unique(i_Vm3_out$regime)
......@@ -154,7 +154,7 @@ plotharvest <- pouta %>% filter(Harvested_V != 0)
boxplot(plotharvest$Harvested_V, main="Harvested_V")
Harvested_V_out <- pouta %>% filter(Harvested_V > 1000) %>% # Harvested volume above xx m3/ha
Harvested_V_out <- pouta %>% filter(Harvested_V > 1500) %>% # Harvested volume above xx m3/ha
mutate(error = "Harvested_V")
length(unique(Harvested_V_out$id))
......@@ -177,7 +177,7 @@ plot_MUSHROOMS <- pouta %>%
boxplot(plot_MUSHROOMS$ALL_MARKETED_MUSHROOMS, main="ALL_MARKETED_MUSHROOMS without main outlier")
mushroom_out <- pouta %>% filter(ALL_MARKETED_MUSHROOMS > 700) %>% # Mushrooms above xx kg/ha
mushroom_out <- pouta %>% filter(ALL_MARKETED_MUSHROOMS > 1000) %>% # Mushrooms above xx kg/ha
mutate(error = "ALL_MARKEDET_MUSHROOMS")
length(unique(mushroom_out$id))
......@@ -199,7 +199,7 @@ plot_V_total_deadwood <- pouta %>%
boxplot(plot_V_total_deadwood$V_total_deadwood, main="V_total_deadwood without main outlier")
deadwood_out <- pouta %>% filter(V_total_deadwood > 700) %>% # Volume of Deadwood above xx m3/ha
deadwood_out <- pouta %>% filter(V_total_deadwood > 1000) %>% # Volume of Deadwood above xx m3/ha
mutate(error = "V_total_deadwood")
length(unique(deadwood_out$id))
......@@ -249,7 +249,7 @@ plot_carbon <- pouta %>%
boxplot(plot_carbon$CARBON_SINK, main="CARBON_SINK without the huge outlier")
carbon_sink_out <- pouta %>% filter(CARBON_SINK < -1000000 | CARBON_SINK > 1000000) %>% # source/sink of more than 1000 t CO2/ha
carbon_sink_out <- pouta %>% filter(CARBON_SINK < -1500000 | CARBON_SINK > 1500000) %>% # source/sink of more than 1000 t CO2/ha
mutate(error = "CARBON_SINK")
length(unique(carbon_sink_out$id))
unique(carbon_sink_out$regime)
......
......@@ -27,7 +27,6 @@ if(on_cPouta == TRUE) {
} else {
# for Windows
.libPaths("C:/Devel/R/library")
path <- paste0(getwd(),"/")
datapath <- "C:/MyTemp/r_SIMO_output/output/MF_FIN/"
......@@ -135,7 +134,7 @@ plot_i_Vm3 <- pouta %>%
boxplot(plot_i_Vm3$i_Vm3, main="i_Vm3 without main outlier")
i_Vm3_out <- pouta %>% filter(i_Vm3 > 50) %>% # annual volume increment above xx m3/ha
i_Vm3_out <- pouta %>% filter(i_Vm3 > 100) %>% # annual volume increment above xx m3/ha
mutate(error = "i_Vm3")
length(unique(i_Vm3_out$id))
unique(i_Vm3_out$regime)
......@@ -155,7 +154,7 @@ plotharvest <- pouta %>% filter(Harvested_V != 0)
boxplot(plotharvest$Harvested_V, main="Harvested_V")
Harvested_V_out <- pouta %>% filter(Harvested_V > 1000) %>% # Harvested volume above xx m3/ha
Harvested_V_out <- pouta %>% filter(Harvested_V > 1500) %>% # Harvested volume above xx m3/ha
mutate(error = "Harvested_V")
length(unique(Harvested_V_out$id))
......@@ -173,7 +172,7 @@ length(unique(Harvested_V_out_q$id))
boxplot(pouta$ALL_MARKETED_MUSHROOMS, main="ALL_MARKETED_MUSHROOMS" )
mushroom_out <- pouta %>% filter(ALL_MARKETED_MUSHROOMS > 700) %>% # Mushrooms above xx kg/ha
mushroom_out <- pouta %>% filter(ALL_MARKETED_MUSHROOMS > 1000) %>% # Mushrooms above xx kg/ha
mutate(error = "ALL_MARKEDET_MUSHROOMS")
length(unique(mushroom_out$id))
......@@ -190,7 +189,7 @@ length(unique(mushroom_out_q$id))
boxplot(pouta$V_total_deadwood, main="V_total_deadwood")
deadwood_out <- pouta %>% filter(V_total_deadwood > 700) %>% # Volume of Deadwood above xx m3/ha
deadwood_out <- pouta %>% filter(V_total_deadwood > 1000) %>% # Volume of Deadwood above xx m3/ha
mutate(error = "V_total_deadwood")
length(unique(deadwood_out$id))
......@@ -240,7 +239,7 @@ plot_carbon <- pouta %>%
boxplot(plot_carbon$CARBON_SINK, main="CARBON_SINK without the huge outlier")
carbon_sink_out <- pouta %>% filter(CARBON_SINK < -1000000 | CARBON_SINK > 1000000) %>% # source/sink of more than 1000 t CO2/ha
carbon_sink_out <- pouta %>% filter(CARBON_SINK < -1500000 | CARBON_SINK > 1500000) %>% # source/sink of more than 1000 t CO2/ha
mutate(error = "CARBON_SINK")
length(unique(carbon_sink_out$id))
unique(carbon_sink_out$regime)
......
......@@ -21,13 +21,13 @@ on_cPouta <- TRUE
if(on_cPouta == TRUE) {
# for pouta
datapath <- "/media/volume/outp_rcp0_test/"
datapath <- "/media/volume/outp_rcp0/"
path <- "/home/ubuntu/workspace/MultiForest2/"
# path <- paste0(getwd(),"/")
} else {
# for Windows
path <- "C:/MyTemp/r_SIMO_output/output/MF_FIN/final_RCP0_FIN_V3/"
datapath <- "C:/MyTemp/r_SIMO_output/output/MF_FIN/RCP0_test/"
}
......@@ -40,6 +40,7 @@ library(ggplot2)
library(data.table)
library(gridExtra)
library(egg)
library(kableExtra)
```
......@@ -121,7 +122,7 @@ length(unique(V_out_q$id))
boxplot(pouta$i_Vm3, main="Indicator: i_Vm3")
i_Vm3_out <- pouta %>% filter(i_Vm3 > 50) %>% # annual volume increment above xx m3/ha
i_Vm3_out <- pouta %>% filter(i_Vm3 > 100) %>% # annual volume increment above xx m3/ha
mutate(error = "i_Vm3")
length(unique(i_Vm3_out$id))
......@@ -132,6 +133,26 @@ length(unique(i_Vm3_out_q$id))
### What are the high values
```{r , echo=TRUE}
i_Vm3_high <- pouta %>%
left_join(poutaxy[,c("id", "year", "regime", "standid")], by = c("id", "year", "regime")) %>%
select(c("id", "standid", "year", "regime", "i_Vm3", "V","Harvested_V")) %>%
filter(id %in% unique(i_Vm3_out$id)) %>%
filter(i_Vm3 > 100 )
# first 20 rows
i_Vm3_high[1:30,] %>%
knitr::kable() %>%
kable_styling()
```
## Indicator: Harvested_V
```{r , echo=TRUE}
......@@ -140,7 +161,7 @@ plotharvest <- pouta %>% filter(Harvested_V != 0)
boxplot(plotharvest$Harvested_V, main="Harvested_V")
Harvested_V_out <- pouta %>% filter(Harvested_V > 1000) %>% # Harvested volume above xx m3/ha
Harvested_V_out <- pouta %>% filter(Harvested_V > 1500) %>% # Harvested volume above xx m3/ha
mutate(error = "Harvested_V")
length(unique(Harvested_V_out$id))
......@@ -229,7 +250,7 @@ plot_carbon <- pouta %>%
boxplot(plot_carbon$CARBON_SINK, main="CARBON_SINK without the huge outlier")
carbon_sink_out <- pouta %>% filter(CARBON_SINK < -1000000 | CARBON_SINK > 1000000) %>% # source/sink of more than 1000 t CO2/ha
carbon_sink_out <- pouta %>% filter(CARBON_SINK < -1500000 | CARBON_SINK > 1500000) %>% # source/sink of more than 1000 t CO2/ha
mutate(error = "CARBON_SINK")
length(unique(carbon_sink_out$id))
unique(carbon_sink_out$regime)
......@@ -245,13 +266,20 @@ length(unique(carbon_sink_out_q$id))
```{r , echo=TRUE}
carbononly <- pouta %>%
left_join(poutaxy[,c("id", "year", "regime", "standid", "CARBON_STORAGE")], by = c("id", "year", "regime")) %>%
select(c("id", "standid", "year", "regime", "V", "Harvested_V" ,"CARBON_STORAGE", "CARBON_SINK")) %>%
filter(id %in% unique(carbon_sink_out$id))
carbon_sink_out2 <- pouta %>% filter(CARBON_SINK < -1000000 | CARBON_SINK > 1000000) %>% # source/sink of more than 1000 t CO2/ha
mutate(error = "CARBON_SINK")
length(unique(carbon_sink_out2$id))
unique(carbon_sink_out2$regime)
carbononly %>%
filter(CARBON_SINK < -1000000 | CARBON_SINK > 1000000)
carbononly <- pouta %>%
left_join(poutaxy[,c("id", "year", "regime", "standid", "CARBON_STORAGE")], by = c("id", "year", "regime")) %>%
select(c("id", "standid", "year", "regime", "V", "Harvested_V" ,"CARBON_STORAGE", "CARBON_SINK")) %>%
filter(id %in% unique(carbon_sink_out2$id))
carbononly %>%
knitr::kable() %>%
kable_styling()
```
......@@ -396,7 +424,10 @@ length(unique(V_out$id)) +
out_all %>%
group_by(standid, error) %>%
count(error) %>%
spread(error, n)
spread(error, n) %>%
knitr::kable() %>%
kable_styling()
```
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment