---
title: "How to use RSDA 3.3"
output: rmarkdown::html_vignette
vignette: >
  %\VignetteIndexEntry{Instroduction to RSDA packages}
  %\VignetteEngine{knitr::rmarkdown}
  %\VignetteEncoding{UTF-8}
---

```{r, include = FALSE}
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.width = 6,
  fig.height = 5
)
library(RSDA)
```


## RSDA Package version 3.3
## Oldemar Rodríguez R.

## Installing the package

### CRAN

```{r, eval=F}
install.packages("RSDA", dependencies=TRUE)
```
### Github

```{r, eval=F}
devtools::install_github("PROMiDAT/RSDA")
```

## How to read a Symbolic Table from a CSV file with RSDA?

```{r}
ex3 <- read.sym.table(file = 'tsym1.csv', header=TRUE, sep=';',dec='.', row.names=1)
ex3
```

##How to save a Symbolic Table in a CSV file with RSDA?

```{r, eval=F}
write.sym.table(ex3, file = 'tsymtemp.csv', sep = ';',dec = '.',
                row.names = TRUE, col.names = TRUE)
```


## Symbolic Data Frame Example in RSDA

```{r}
data(example3)
example3
```

```{r}
example3[2,]
example3[,3]
example3[2:3,5]
example3$F1
```

## How to generated a symbolic data table from a classic data table in RSDA?

```{r}
data(ex1_db2so)
ex1_db2so
```

The `classic.to.sym` function allows to convert a traditional table into a symbolic one, to this we must indicate the following parameters.

+ `x` = a data.frame
+ `concept` = variables to be used as a concept
+ `variables` = variables to be used, conceptible with tidyselect options
+ `default.numeric` = function that will be used by default for numerical values (sym.interval)
+ `default.categorical` = functions to be used by default for categorical values (sym.model)

#### Example 1

```{r}
result <- classic.to.sym(x = ex1_db2so, 
                         concept = c(state, sex),
                         variables = c(county, group, age))
result
```


We can add new variables indicating the type we want them to be.

```{r}
result <- classic.to.sym(x = ex1_db2so, 
                         concept = c("state", "sex"),
                         variables = c(county, group, age),
                         age_hist = sym.histogram(age, breaks = pretty(ex1_db2so$age, 5)))
result
```

#### Example 2

```{r}
data(USCrime)
head(USCrime)
```

```{r}
result  <- classic.to.sym(x = USCrime,
                          concept = state, 
                          variables= c(NumInShelters,
                                       NumImmig,
                                       ViolentCrimesPerPop),
                          ViolentCrimesPerPop_hist = sym.histogram(ViolentCrimesPerPop,
                                                                   breaks = pretty(USCrime$ViolentCrimesPerPop,5)))
result
```

#### Example 3


```{r}
data("ex_mcfa1") 
head(ex_mcfa1)
```

```{r}
sym.table <- classic.to.sym(x = ex_mcfa1, 
                            concept = suspect, 
                            variables=c(hair,
                                        eyes,
                                        region),
                            default.categorical = sym.set)
sym.table
```

### Example 4

We can modify the function that will be applied by default to the categorical variables

```{r}
sym.table <- classic.to.sym(x = ex_mcfa1, 
                            concept = suspect,
                            default.categorical = sym.set)
sym.table
```


### Converting a SODAS 1.0 *.SDS files to RSDA files

```{r}
hani3101 <- SDS.to.RSDA(file.path = "hani3101.sds")
hani3101
```


```{r, eval=F}
# We can save the file in CSV to RSDA format as follows:
write.sym.table(hani3101,
                file='hani3101.csv',
                sep=';',
                dec='.',
                row.names=TRUE,
                col.names=TRUE)
```

### Converting a SODAS 2.0 *.XML files to RSDA files

```{r}
abalone <- SODAS.to.RSDA("abalone.xml")
abalone
```


```{r, eval=F}
write.sym.table(abalone,
                file='abalone.csv',
                sep=';',
                dec='.',
                row.names = TRUE,
                col.names = TRUE)
```


### Basic statistics

#### Symbolic Mean

```{r}
data(example3)
mean(example3$F1)
mean(example3[,1])
```

```{r}
mean(example3$F2)
mean(example3[,2])
```

```{r}
mean(example3$F2,method = "interval")
mean(example3[,2],method = "interval")
```

#### Symbolic median

```{r}
median(example3$F1)
median(example3[,1])
```

```{r}
median(example3$F2)
median(example3[,2])
```

```{r}
median(example3$F6, method = 'interval')
median(example3[,6], method = 'interval')
```

#### Variance and standard deviation

```{r}
var(example3[,1])
var(example3[,2])
var(example3$F6)
var(example3$F6, method = 'interval')
var(example3$F6, method = 'billard')
sd(example3$F1)
sd(example3$F2)
sd(example3$F6)
sd(example3$F6, method = 'interval')
sd(example3$F6, method = 'billard')
```

### Symbolic correlation

```{r}
cor(example3$F1, example3$F4)
cor(example3[,1], example3[,4])
cor(example3$F2, example3$F6, method = 'centers')
cor(example3$F2, example3$F6, method = 'billard')
```

### Radar plot for intervals

```{r}
library(ggpolypath)

data(oils)
oils <- RSDA:::to.v3(RSDA:::to.v2(oils))
sym.radar.plot(oils[2:3,])
sym.radar.plot(oils[2:5,])

res <- interval.histogram.plot(oils[,2],
                               n.bins = 4,
                               col = c(2,3,4,5))
res

res <- interval.histogram.plot(oils[,3],
                               n.bins = 3,
                               main = "Histogram",
                               col = c(2, 3, 4))
res
```

### Distances for intervals

#### Gowda-Diday

```{r}
data("oils")
DM <- sym.dist.interval(sym.data = oils[,1:4],
                        method = "Gowda.Diday")
model <- hclust(DM)
plot(model, hang = -1)
```

#### Ichino

```{r}
DM <- sym.dist.interval(sym.data= oils[,1:4],
                        method = "Ichino")
model <- hclust(DM)
plot(model, hang = -1)
```

#### Hausdorff

```{r}
DM <- sym.dist.interval(sym.data = oils[,c(1,2,4)],
                        gamma = 0.5,
                        method = "Hausdorff",
                        normalize = FALSE,
                        SpanNormalize = TRUE,
                        euclidea = TRUE,
                        q = 2)
model <- hclust(DM)
plot(model, hang = -1)
```

### Linear regression for intervals

#### Training 

```{r}
data(int_prost_train)
data(int_prost_test)
res.cm <- sym.lm(formula = lpsa~., sym.data = int_prost_train, method = 'cm')
res.cm
```

#### Prediction

```{r}
pred.cm <- sym.predict(model = res.cm, new.sym.data = int_prost_test)
```

#### Testing

```{r}
RMSE.L(int_prost_test$lpsa, pred.cm$Fitted)
RMSE.U(int_prost_test$lpsa, pred.cm$Fitted)
R2.L(int_prost_test$lpsa, pred.cm$Fitted)
R2.U(int_prost_test$lpsa, pred.cm$Fitted)
deter.coefficient(int_prost_test$lpsa, pred.cm$Fitted)
```

### LASSO regression for intervals 

```{r}
data(int_prost_train)
data(int_prost_test)
```

#### Training 

```{r}
res.cm.lasso <- sym.glm(sym.data = int_prost_train,
                        response = 9,
                        method = 'cm',
                        alpha = 1,
                        nfolds = 10,
                        grouped = TRUE)
```

#### Prediction

```{r}
pred.cm.lasso <- sym.predict(res.cm.lasso,
                             response = 9,
                             int_prost_test,
                             method = 'cm')
```

#### Testing 

```{r}
plot(res.cm.lasso)
plot(res.cm.lasso$glmnet.fit, "lambda", label=TRUE)
```

```{r}
RMSE.L(int_prost_test$lpsa,pred.cm.lasso)
RMSE.U(int_prost_test$lpsa,pred.cm.lasso) 
R2.L(int_prost_test$lpsa,pred.cm.lasso) 
R2.U(int_prost_test$lpsa,pred.cm.lasso) 
deter.coefficient(int_prost_test$lpsa, pred.cm.lasso)
```

### RIDGE regression for intervals

#### Training

```{r}
data(int_prost_train)
data(int_prost_test)

res.cm.ridge <- sym.glm(sym.data = int_prost_train,
                        response = 9,
                        method = 'cm',
                        alpha = 0,
                        nfolds = 10,
                        grouped = TRUE)
```

#### Prediction 

```{r}
pred.cm.ridge <- sym.predict(res.cm.ridge,
                             response = 9,
                             int_prost_test,
                             method = 'cm')
```

#### Testing

```{r}
plot(res.cm.ridge)
plot(res.cm.ridge$glmnet.fit, "lambda", label=TRUE)
RMSE.L(int_prost_test$lpsa, pred.cm.ridge)
RMSE.U(int_prost_test$lpsa, pred.cm.ridge)
R2.L(int_prost_test$lpsa, pred.cm.ridge)
R2.U(int_prost_test$lpsa, pred.cm.ridge)
deter.coefficient(int_prost_test$lpsa, pred.cm.ridge)
```

### PCA for intervals

#### Example 1

```{r}
data("oils")
res <- sym.pca(oils,'centers')
plot(res, choix = "ind")
plot(res, choix = "var")
```


#### Example 2

```{r}
res <- sym.pca(oils,'tops')
plot(res, choix = "ind")
```

#### Example 3

```{r}
res <- sym.pca(oils, 'principal.curves')
plot(res, choix = "ind")
```

#### Example 4

```{r}
res <- sym.pca(oils,'optimized.distance')
plot(res, choix = "ind")
plot(res, choix = "var")
```

#### Example 5

```{r}
res <- sym.pca(oils,'optimized.variance')
plot(res, choix = "ind")
plot(res, choix = "var")
```

### Symbolic Multiple Correspondence Analysis

#### Example 1

```{r}
data("ex_mcfa1") 
ex_mcfa1
```

```{r}
sym.table <- classic.to.sym(x = ex_mcfa1, 
                            concept = suspect, 
                            default.categorical = sym.set)
sym.table
```

```{r}
res <- sym.mcfa(sym.table, c(2,3))
mcfa.scatterplot(res[,2], res[,3], sym.data = sym.table, pos.var = c(2,3))
```

```{r}
res <- sym.mcfa(sym.table, c(2,3,4))
mcfa.scatterplot(res[,2], res[,3], sym.data = sym.table, pos.var = c(2,3,4))

```

# Symbolic UMAP

## Ejemplo Oils

```{r}
datos <- oils
datos
```

```{r}
x <- sym.umap(datos)
x
```

```{r}
plot(x)
```

## Ejemplo Cardiological

```{r}
datos <- Cardiological
datos
```

```{r}
x <- sym.umap(datos)
x
```

```{r}
plot(x)
```