diff --git a/DESCRIPTION b/DESCRIPTION index 34185bf..e8c3a93 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -19,6 +19,7 @@ Description: Compute multiple types of correlation analyses, including Imports: Rcpp (>= 1.0.13-1), RcppArmadillo (>= 14.2.2-1), + corrplot (>= 0.95), lsr (>= 0.5.2), parallel (>= 4.4.1), stats (>= 4.4.1), @@ -28,7 +29,6 @@ Imports: ppsr (>= 0.0.2), DescTools (>= 0.99.40) Suggests: - corrplot, energy, knitr, rmarkdown, diff --git a/NAMESPACE b/NAMESPACE index b995b61..2edd4ae 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -21,6 +21,7 @@ export(corr_rm) export(corrp) export(dcorT_test) export(ptest) +export(set_arguments) export(sil_acca) importFrom(Rcpp,evalCpp) importFrom(RcppArmadillo,armadillo_version) diff --git a/R/acca.R b/R/acca.R index 2e206f1..79cdee7 100644 --- a/R/acca.R +++ b/R/acca.R @@ -23,7 +23,7 @@ #' final result of the clustering method. #' That is, the name of the variables belonging to each cluster k. #' -#' @author Igor D.S. Siciliani +#' @author Igor D.S. Siciliani, Paulo H. dos Santos #' #' @keywords correlation , acca #' diff --git a/R/best_acca.R b/R/best_acca.R index adc6b4c..5f3415b 100644 --- a/R/best_acca.R +++ b/R/best_acca.R @@ -20,7 +20,7 @@ #' the optimal number of clusters `$best.k`. #' @seealso \code{\link{sil_acca}} #' -#' @author Igor D.S. Siciliani +#' @author Igor D.S. Siciliani, Paulo H. dos Santos #' #' @keywords silhouette , acca , optimal , k #' diff --git a/R/corr_fun.R b/R/corr_fun.R index e3047d3..6a22b4a 100644 --- a/R/corr_fun.R +++ b/R/corr_fun.R @@ -8,38 +8,8 @@ #' #' @name corr_fun #' -#' @section Details (Types): +#' @inheritSection corrp Pair Types #' -#' - \code{integer/numeric pair} Pearson Correlation using -#' \code{\link[stats]{cor}} function. The -#' value lies between -1 and 1.\cr -#' - \code{integer/numeric pair} Distance Correlation -#' using \code{\link[energy]{dcorT.test}} function. The -#' value lies between 0 and 1.\cr -#' - \code{integer/numeric pair} Maximal Information Coefficient using -#' \code{\link[minerva]{mine}} function. The -#' value lies between 0 and 1.\cr -#' - \code{integer/numeric pair} Predictive Power Score using -#' \code{\link[ppsr]{score}} function. The -#' value lies between 0 and 1.\cr\cr -#' - \code{integer/numeric - factor/categorical pair} correlation coefficient or -#' squared root of R^2 coefficient of linear regression of integer/numeric -#' variable over factor/categorical variable using -#' \code{\link[stats]{lm}} function. The value -#' lies between 0 and 1.\cr -#' - \code{integer/numeric - factor/categorical pair} -#' Predictive Power Score using \code{\link[ppsr]{score}} function. -#' The value lies between 0 and 1.\cr\cr -#' - \code{factor/categorical pair} Cramer's V value is -#' computed based on chisq test and using -#' \code{\link[lsr]{cramersV}} function. The value lies -#' between 0 and 1.\cr -#' - \code{factor/categorical pair} Uncertainty coefficient -#' using \code{\link[DescTools]{UncertCoef}} function. The -#' value lies between 0 and 1.\cr -#' - \code{factor/categorical pair} Predictive Power Score -#' using \code{\link[ppsr]{score}} function. -#' The value lies between 0 and 1.\cr #' #' @return list with all statistical results.\cr #' - All statistical tests are controlled by the confidence internal of @@ -52,44 +22,12 @@ #' default the association measure(`infer.value`) will be `NA`. #' #' -#' @param df \[\code{data.frame(1)}]\cr input data frame. +#' @inheritParams corrp #' @param nx \[\code{character(1)}]\cr first variable column name: independent/predictor variable. #' @param ny \[\code{character(1)}]\cr second variable column name: dependent/target variable. -#' @param p.value \[\code{logical(1)}]\cr -#' P-value probability of obtaining the observed results of a test, -#' assuming that the null hypothesis is correct. By default p.value=0.05 (Cutoff value for p-value.). -#' @param comp \[\code{character(1)}]\cr The param \code{p.value} must be greater -#' or less than those estimated in tests and correlations. -#' @param alternative \[\code{character(1)}]\cr a character string specifying the alternative hypothesis, -#' must be one of "greater" (default), "less" or "two.sided". You can specify just the initial letter. -#' You can specify just the initial letter. -#' @param verbose \[\code{logical(1)}]\cr Activate verbose mode. -#' @param num.s \[\code{numeric(1)}]\cr Used in permutation test. The number of samples with -#' replacement created with y numeric vector. -#' @param rk \[\code{logical(1)}]\cr Used in permutation test. -#' if its TRUE transform x, y numeric vectors with samples ranks. -#' @param cor.nn \[\code{character(1)}]\cr -#' Choose correlation type to be used in integer/numeric pair inference. -#' The options are `pearson: Pearson Correlation`,`mic: Maximal Information Coefficient`, -#' `dcor: Distance Correlation`,`pps: Predictive Power Score`.Default is `Pearson Correlation`. -#' @param cor.nc \[\code{character(1)}]\cr -#' Choose correlation type to be used in integer/numeric - factor/categorical pair inference. -#' The option are `lm: Linear Model`,`pps: Predictive Power Score`. Default is `Linear Model`. -#' @param cor.cc \[\code{character(1)}]\cr -#' Choose correlation type to be used in factor/categorical pair inference. -#' The option are `cramersV: Cramer's V`,`uncoef: Uncertainty coefficient`, -#' `pps: Predictive Power Score`. Default is ` Cramer's V`. -#' @param lm.args \[\code{list(1)}]\cr additional parameters for the specific method. -#' @param pearson.args \[\code{list(1)}]\cr additional parameters for the specific method. -#' @param dcor.args \[\code{list(1)}]\cr additional parameters for the specific method. -#' @param mic.args \[\code{list(1)}]\cr additional parameters for the specific method. -#' @param pps.args \[\code{list(1)}]\cr additional parameters for the specific method. -#' @param uncoef.args \[\code{list(1)}]\cr additional parameters for the specific method. -#' @param cramersV.args \[\code{list(1)}]\cr additional parameters for the specific method. -#' @param ... Additional arguments. #' #' -#' @author Igor D.S. Siciliani +#' @author Igor D.S. Siciliani, Paulo H. dos Santos #' #' @keywords correlation , power predictive score , linear model , distance correlation , #' mic , point biserial , pearson , cramer'sV diff --git a/R/corr_matrix.R b/R/corr_matrix.R index e0e9332..4a7695f 100644 --- a/R/corr_matrix.R +++ b/R/corr_matrix.R @@ -9,7 +9,7 @@ #' be represented by NA or FALSE in the correlation matrix. #' @param ... Additional arguments (TODO). #' -#' @author Igor D.S. Siciliani +#' @author Igor D.S. Siciliani, Paulo H. dos Santos #' #' @keywords correlation matrix , corrp #' @@ -17,7 +17,7 @@ #' #' iris_cor <- corrp(iris) #' iris_m <- corr_matrix(iris_cor, isig = FALSE) -#' corrplot(iris_m) +#' corrplot::corrplot(iris_m) #' @export corr_matrix <- function(c, ...) { assert_required_argument(c, "The 'c' argument must be a clist object, which is the output from corrp.") diff --git a/R/corr_rm.R b/R/corr_rm.R index 18ddd9e..c810abd 100644 --- a/R/corr_rm.R +++ b/R/corr_rm.R @@ -6,8 +6,8 @@ #' #' @param df \[\code{data.frame(1)}]\cr input data frame. #' @param c \[\code{clist(1)} | \code{cmatrix(1)}]\cr correlation list output from the function \code{\link[corrp]{corrp}} -#' with class \code{\link[corrp]{clist}} or correlation matrix output -#' from \code{\link[corrp]{corr_matrix}} with class \code{\link[corrp]{cmatrix}}. +#' with class \code{clist} or correlation matrix output +#' from \code{\link[corrp]{corr_matrix}} with class \code{cmatrix}. #' @param cutoff \[\code{numeric(1)}]\cr A numeric value for the pair-wise absolute correlation cutoff. #' The default values is 0.75. #' @param col \[\code{character(1)}]\cr choose the column to be used in the correlation matrix @@ -22,7 +22,7 @@ #' corr_rm(df = iris, c = iris_clist, cutoff = 0.75, col = "infer.value", isig = FALSE) #' corr_rm(df = iris, c = iris_cmatrix, cutoff = 0.75, col = "infer.value", isig = FALSE) #' -#' @author Igor D.S. Siciliani +#' @author Igor D.S. Siciliani, Paulo H. dos Santos #' #' @keywords highly correlated , cmatrix , clist #' diff --git a/R/corrp.R b/R/corrp.R index d2d5d1a..6562da4 100644 --- a/R/corrp.R +++ b/R/corrp.R @@ -8,29 +8,25 @@ #' #' @name corrp #' -#' @section Details (Pair Types): +#' @section Pair Types: #' -#' - \code{integer/numeric pair} Pearson Correlation using \code{\link[stats]{cor}} function. The -#' value lies between -1 and 1.\cr -#' - \code{integer/numeric pair} Distance Correlation using \code{\link[energy]{dcorT.test}} function. The -#' value lies between 0 and 1.\cr -#' - \code{integer/numeric pair} Maximal Information Coefficient using \code{\link[minerva]{mine}} function. The -#' value lies between 0 and 1.\cr -#' - \code{integer/numeric pair} Predictive Power Score using \code{\link[ppsr]{score}} function. The -#' value lies between 0 and 1.\cr\cr -#' - \code{integer/numeric - factor/categorical pair} correlation coefficient or -#' squared root of R^2 coefficient of linear regression of integer/numeric -#' variable over factor/categorical variable using \code{\link[stats]{lm}} function. The value -#' lies between 0 and 1.\cr -#' - \code{integer/numeric - factor/categorical pair} Predictive Power Score using \code{\link[ppsr]{score}} function. The -#' value lies between 0 and 1.\cr\cr -#' - \code{factor/categorical pair} Cramer's V value is -#' computed based on chisq test and using \code{\link[lsr]{cramersV}} function. The value lies -#' between 0 and 1.\cr -#' - \code{factor/categorical pair} Uncertainty coefficient using \code{\link[DescTools]{UncertCoef}} function. The -#' value lies between 0 and 1.\cr -#' - \code{factor/categorical pair} Predictive Power Score using \code{\link[ppsr]{score}} function. The -#' value lies between 0 and 1.\cr +#' **Numeric pairs (integer/numeric):** +#' +#' - **Pearson Correlation Coefficient:** A widely used measure of the strength and direction of linear relationships. Implemented using \code{\link[stats]{cor}}. For more details, see \url{https://doi.org/10.1098/rspl.1895.0041}. The value lies between -1 and 1.\cr +#' - **Distance Correlation:** Based on the idea of expanding covariance to distances, it measures both linear and nonlinear associations between variables. Implemented using \code{\link[energy]{dcorT.test}}. For more details, see \url{https://doi.org/10.1214/009053607000000505}. The value lies between 0 and 1.\cr +#' - **Maximal Information Coefficient (MIC):** An information-based nonparametric method that can detect both linear and non-linear relationships between variables. Implemented using \code{\link[minerva]{mine}}. For more details, see \url{https://doi.org/10.1126/science.1205438}. The value lies between 0 and 1.\cr +#' - **Predictive Power Score (PPS):** A metric used to assess predictive relations between variables. Implemented using \code{\link[ppsr]{score}}. For more details, see \url{https://zenodo.org/record/4091345}. The value lies between 0 and 1.\cr\cr +#' +#' **Numeric and categorical pairs (integer/numeric - factor/categorical):** +#' +#' - **Square Root of R² Coefficient:** From linear regression of the numeric variable over the categorical variable. Implemented using \code{\link[stats]{lm}}. For more details, see \url{https://doi.org/10.4324/9780203774441}. The value lies between 0 and 1.\cr +#' - **Predictive Power Score (PPS):** A metric used to assess predictive relations between numeric and categorical variables. Implemented using \code{\link[ppsr]{score}}. For more details, see \url{https://zenodo.org/record/4091345}. The value lies between 0 and 1.\cr\cr +#' +#' **Categorical pairs (factor/categorical):** +#' +#' - **Cramér's V:** A measure of association between nominal variables. Computed based on a chi-squared test and implemented using \code{\link[lsr]{cramersV}}. For more details, see \url{https://doi.org/10.1515/9781400883868}. The value lies between 0 and 1.\cr +#' - **Uncertainty Coefficient:** A measure of nominal association between two variables. Implemented using \code{\link[DescTools]{UncertCoef}}. For more details, see \url{https://doi.org/10.1016/j.jbi.2010.02.001}. The value lies between 0 and 1.\cr +#' - **Predictive Power Score (PPS):** A metric used to assess predictive relations between categorical variables. Implemented using \code{\link[ppsr]{score}}. For more details, see \url{https://zenodo.org/record/4091345}. The value lies between 0 and 1.\cr #' #' @return #' A list with two tables: `data` and `index`. @@ -54,9 +50,11 @@ #' All statistical tests are controlled by the confidence internal of #' p.value param. If the statistical tests do not obtain a significance greater/less #' than p.value the value of variable `isig` will be `FALSE`.\cr -#' By default there is no statistical significance test for the pps algorithm. By default `isig` is NA, you can enable in the pps.args.\cr + #' If any errors occur during operations the association measure (`infer.value`) will be `NA`.\cr #' The result `data` and `index` will have \eqn{N^2} rows, where N is the number of variables of the input data. +#' By default there is no statistical significance test for the pps algorithm. By default `isig` is NA, you can enable in the `pps.args` setting `ptest = TRUE`.\cr +#' All the `*.args` can modified the parameters (`p.value`, `comp`, `alternative`, `num.s`, `rk`, `ptest`) for the respective method on it's prefix. #' #' @param df \[\code{data.frame(1)}]\cr input data frame. #' @param parallel \[\code{logical(1)}]\cr If its TRUE run the operations in parallel backend. @@ -85,16 +83,16 @@ #' Choose correlation type to be used in factor/categorical pair inference. #' The option are `cramersV: Cramer's V`,`uncoef: Uncertainty coefficient`, #' `pps: Predictive Power Score`. Default is ` Cramer's V`. -#' @param lm.args \[\code{list(1)}]\cr additional parameters for the specific method. -#' @param pearson.args \[\code{list(1)}]\cr additional parameters for the specific method. -#' @param dcor.args \[\code{list(1)}]\cr additional parameters for the specific method. -#' @param mic.args \[\code{list(1)}]\cr additional parameters for the specific method. -#' @param pps.args \[\code{list(1)}]\cr additional parameters for the specific method. -#' @param uncoef.args \[\code{list(1)}]\cr additional parameters for the specific method. -#' @param cramersV.args \[\code{list(1)}]\cr additional parameters for the specific method. +#' @param lm.args \[\code{list(1)}]\cr additional parameters for linear model to be passed to \code{\link[stats]{lm}}. +#' @param pearson.args \[\code{list(1)}]\cr additional parameters for Pearson correlation to be passed to \code{\link[stats]{cor.test}}. +#' @param dcor.args \[\code{list(1)}]\cr additional parameters for the distance correlation to be passed to \code{\link[corrp]{dcorT_test}}. +#' @param mic.args \[\code{list(1)}]\cr additional parameters for the maximal information coefficient to be passed to \code{\link[minerva]{mine}}. +#' @param pps.args \[\code{list(1)}]\cr additional parameters for the predictive power score to be passed to \code{\link[ppsr]{score}}. +#' @param uncoef.args \[\code{list(1)}]\cr additional parameters for the uncertainty coefficient to be passed to \code{\link[DescTools]{UncertCoef}}. +#' @param cramersV.args \[\code{list(1)}]\cr additional parameters for the Cramer's V to be passed to \code{\link[lsr]{cramersV}}. #' @param ... Additional arguments. #' -#' @author Igor D.S. Siciliani +#' @author Igor D.S. Siciliani, Paulo H. dos Santos #' #' @keywords correlation , power predictive score , linear model , distance correlation , #' mic , point biserial , pearson , cramer'sV @@ -110,7 +108,7 @@ #' @examples #' iris_c <- corrp(iris) #' iris_m <- corr_matrix(iris_c, isig = FALSE) -#' corrplot(iris_m) +#' corrplot::corrplot(iris_m) #' #' #' @export diff --git a/R/sil_acca.R b/R/sil_acca.R index 395b470..bd3e9c4 100644 --- a/R/sil_acca.R +++ b/R/sil_acca.R @@ -14,7 +14,7 @@ #' are very well clustered. #' #' -#' @author Igor D.S. Siciliani +#' @author Igor D.S. Siciliani, Paulo H. dos Santos #' #' @keywords silhouette , acca #' diff --git a/R/utils.R b/R/utils.R index 2434a6f..779dcfc 100644 --- a/R/utils.R +++ b/R/utils.R @@ -457,7 +457,7 @@ set_arguments = function(args_list) { list_name <- deparse(substitute(args_list)) for (name_arg in names(args_list)) { - if (name_arg %in% c("p.value", "comp", "verbose", "alternative", "num.s", "rk", "ptest")) { + if (name_arg %in% c("p.value", "comp", "alternative", "num.s", "rk", "ptest")) { assign(name_arg, args_list[[name_arg]], envir = parent.frame()) args_list[[name_arg]] = NULL } diff --git a/man/acca.Rd b/man/acca.Rd index 7aedb2c..0695aca 100644 --- a/man/acca.Rd +++ b/man/acca.Rd @@ -51,7 +51,7 @@ genes with similar pattern of variation in their expression values." Journal of Biomedical Informatics 43.4 (2010): 560-568. } \author{ -Igor D.S. Siciliani +Igor D.S. Siciliani, Paulo H. dos Santos } \keyword{,} \keyword{acca} diff --git a/man/best_acca.Rd b/man/best_acca.Rd index 97df1c4..899b684 100644 --- a/man/best_acca.Rd +++ b/man/best_acca.Rd @@ -61,7 +61,7 @@ and Soft Computing. Springer, Cham, 2015. \code{\link{sil_acca}} } \author{ -Igor D.S. Siciliani +Igor D.S. Siciliani, Paulo H. dos Santos } \keyword{,} \keyword{acca} diff --git a/man/corr_fun.Rd b/man/corr_fun.Rd index 244a93d..afed51f 100644 --- a/man/corr_fun.Rd +++ b/man/corr_fun.Rd @@ -21,7 +21,7 @@ corr_fun( pearson.args = list(), dcor.args = list(), mic.args = list(), - pps.args = list(), + pps.args = list(ptest = FALSE), cramersV.args = list(), uncoef.args = list(), ... @@ -49,8 +49,8 @@ if its TRUE transform x, y numeric vectors with samples ranks.} \item{comp}{[\code{character(1)}]\cr The param \code{p.value} must be greater or less than those estimated in tests and correlations.} -\item{alternative}{[\code{character(1)}]\cr a character string specifying the alternative hypothesis, -must be one of "greater" (default), "less" or "two.sided". You can specify just the initial letter. +\item{alternative}{[\code{character(1)}]\cr a character string specifying the alternative hypothesis for +the correlation inference. It must be one of "two.sided" (default), "greater" or "less". You can specify just the initial letter.} \item{cor.nn}{[\code{character(1)}]\cr @@ -67,19 +67,19 @@ Choose correlation type to be used in factor/categorical pair inference. The option are \verb{cramersV: Cramer's V},\verb{uncoef: Uncertainty coefficient}, \verb{pps: Predictive Power Score}. Default is \verb{ Cramer's V}.} -\item{lm.args}{[\code{list(1)}]\cr additional parameters for the specific method.} +\item{lm.args}{[\code{list(1)}]\cr additional parameters for linear model to be passed to \code{\link[stats]{lm}}.} -\item{pearson.args}{[\code{list(1)}]\cr additional parameters for the specific method.} +\item{pearson.args}{[\code{list(1)}]\cr additional parameters for Pearson correlation to be passed to \code{\link[stats]{cor.test}}.} -\item{dcor.args}{[\code{list(1)}]\cr additional parameters for the specific method.} +\item{dcor.args}{[\code{list(1)}]\cr additional parameters for the distance correlation to be passed to \code{\link[corrp]{dcorT_test}}.} -\item{mic.args}{[\code{list(1)}]\cr additional parameters for the specific method.} +\item{mic.args}{[\code{list(1)}]\cr additional parameters for the maximal information coefficient to be passed to \code{\link[minerva]{mine}}.} -\item{pps.args}{[\code{list(1)}]\cr additional parameters for the specific method.} +\item{pps.args}{[\code{list(1)}]\cr additional parameters for the predictive power score to be passed to \code{\link[ppsr]{score}}.} -\item{cramersV.args}{[\code{list(1)}]\cr additional parameters for the specific method.} +\item{cramersV.args}{[\code{list(1)}]\cr additional parameters for the Cramer's V to be passed to \code{\link[lsr]{cramersV}}.} -\item{uncoef.args}{[\code{list(1)}]\cr additional parameters for the specific method.} +\item{uncoef.args}{[\code{list(1)}]\cr additional parameters for the uncertainty coefficient to be passed to \code{\link[DescTools]{UncertCoef}}.} \item{...}{Additional arguments.} } @@ -103,39 +103,28 @@ The dataframe is allowed to have columns of these four classes: integer, numeric, factor and character. The character column is considered as categorical variable. } -\section{Details (Types)}{ +\section{Pair Types}{ + +\strong{Numeric pairs (integer/numeric):} +\itemize{ +\item \strong{Pearson Correlation Coefficient:} A widely used measure of the strength and direction of linear relationships. Implemented using \code{\link[stats]{cor}}. For more details, see \url{https://doi.org/10.1098/rspl.1895.0041}. The value lies between -1 and 1.\cr +\item \strong{Distance Correlation:} Based on the idea of expanding covariance to distances, it measures both linear and nonlinear associations between variables. Implemented using \code{\link[energy]{dcorT.test}}. For more details, see \url{https://doi.org/10.1214/009053607000000505}. The value lies between 0 and 1.\cr +\item \strong{Maximal Information Coefficient (MIC):} An information-based nonparametric method that can detect both linear and non-linear relationships between variables. Implemented using \code{\link[minerva]{mine}}. For more details, see \url{https://doi.org/10.1126/science.1205438}. The value lies between 0 and 1.\cr +\item \strong{Predictive Power Score (PPS):} A metric used to assess predictive relations between variables. Implemented using \code{\link[ppsr]{score}}. For more details, see \url{https://zenodo.org/record/4091345}. The value lies between 0 and 1.\cr\cr +} + +\strong{Numeric and categorical pairs (integer/numeric - factor/categorical):} +\itemize{ +\item \strong{Square Root of R² Coefficient:} From linear regression of the numeric variable over the categorical variable. Implemented using \code{\link[stats]{lm}}. For more details, see \url{https://doi.org/10.4324/9780203774441}. The value lies between 0 and 1.\cr +\item \strong{Predictive Power Score (PPS):} A metric used to assess predictive relations between numeric and categorical variables. Implemented using \code{\link[ppsr]{score}}. For more details, see \url{https://zenodo.org/record/4091345}. The value lies between 0 and 1.\cr\cr +} + +\strong{Categorical pairs (factor/categorical):} \itemize{ -\item \code{integer/numeric pair} Pearson Correlation using -\code{\link[stats]{cor}} function. The -value lies between -1 and 1.\cr -\item \code{integer/numeric pair} Distance Correlation -using \code{\link[energy]{dcorT.test}} function. The -value lies between 0 and 1.\cr -\item \code{integer/numeric pair} Maximal Information Coefficient using -\code{\link[minerva]{mine}} function. The -value lies between 0 and 1.\cr -\item \code{integer/numeric pair} Predictive Power Score using -\code{\link[ppsr]{score}} function. The -value lies between 0 and 1.\cr\cr -\item \code{integer/numeric - factor/categorical pair} correlation coefficient or -squared root of R^2 coefficient of linear regression of integer/numeric -variable over factor/categorical variable using -\code{\link[stats]{lm}} function. The value -lies between 0 and 1.\cr -\item \code{integer/numeric - factor/categorical pair} -Predictive Power Score using \code{\link[ppsr]{score}} function. -The value lies between 0 and 1.\cr\cr -\item \code{factor/categorical pair} Cramer's V value is -computed based on chisq test and using -\code{\link[lsr]{cramersV}} function. The value lies -between 0 and 1.\cr -\item \code{factor/categorical pair} Uncertainty coefficient -using \code{\link[DescTools]{UncertCoef}} function. The -value lies between 0 and 1.\cr -\item \code{factor/categorical pair} Predictive Power Score -using \code{\link[ppsr]{score}} function. -The value lies between 0 and 1.\cr +\item \strong{Cramér's V:} A measure of association between nominal variables. Computed based on a chi-squared test and implemented using \code{\link[lsr]{cramersV}}. For more details, see \url{https://doi.org/10.1515/9781400883868}. The value lies between 0 and 1.\cr +\item \strong{Uncertainty Coefficient:} A measure of nominal association between two variables. Implemented using \code{\link[DescTools]{UncertCoef}}. For more details, see \url{https://doi.org/10.1016/j.jbi.2010.02.001}. The value lies between 0 and 1.\cr +\item \strong{Predictive Power Score (PPS):} A metric used to assess predictive relations between categorical variables. Implemented using \code{\link[ppsr]{score}}. For more details, see \url{https://zenodo.org/record/4091345}. The value lies between 0 and 1.\cr } } @@ -153,7 +142,7 @@ Paul van der Laken, ppsr,2021. URL \url{https://github.com/paulvanderlaken/ppsr}. } \author{ -Igor D.S. Siciliani +Igor D.S. Siciliani, Paulo H. dos Santos } \keyword{,} \keyword{biserial} diff --git a/man/corr_matrix.Rd b/man/corr_matrix.Rd index 725b9d0..5104dc1 100644 --- a/man/corr_matrix.Rd +++ b/man/corr_matrix.Rd @@ -30,10 +30,10 @@ create a correlation matrix. iris_cor <- corrp(iris) iris_m <- corr_matrix(iris_cor, isig = FALSE) -corrplot(iris_m) +corrplot::corrplot(iris_m) } \author{ -Igor D.S. Siciliani +Igor D.S. Siciliani, Paulo H. dos Santos } \keyword{,} \keyword{correlation} diff --git a/man/corr_rm.Rd b/man/corr_rm.Rd index cc0bec3..5471b42 100644 --- a/man/corr_rm.Rd +++ b/man/corr_rm.Rd @@ -36,8 +36,8 @@ corr_rm(df, c, ...) \item{df}{[\code{data.frame(1)}]\cr input data frame.} \item{c}{[\code{clist(1)} | \code{cmatrix(1)}]\cr correlation list output from the function \code{\link[corrp]{corrp}} -with class \code{\link[corrp]{clist}} or correlation matrix output -from \code{\link[corrp]{corr_matrix}} with class \code{\link[corrp]{cmatrix}}.} +with class \code{clist} or correlation matrix output +from \code{\link[corrp]{corr_matrix}} with class \code{cmatrix}.} \item{...}{Additional arguments.} @@ -62,7 +62,7 @@ corr_rm(df = iris, c = iris_cmatrix, cutoff = 0.75, col = "infer.value", isig = } \author{ -Igor D.S. Siciliani +Igor D.S. Siciliani, Paulo H. dos Santos } \keyword{,} \keyword{clist} diff --git a/man/corrp.Rd b/man/corrp.Rd index f511ea8..a85c229 100644 --- a/man/corrp.Rd +++ b/man/corrp.Rd @@ -21,7 +21,7 @@ corrp( pearson.args = list(), dcor.args = list(), mic.args = list(), - pps.args = list(), + pps.args = list(ptest = FALSE), cramersV.args = list(), uncoef.args = list(), ... @@ -67,19 +67,19 @@ Choose correlation type to be used in factor/categorical pair inference. The option are \verb{cramersV: Cramer's V},\verb{uncoef: Uncertainty coefficient}, \verb{pps: Predictive Power Score}. Default is \verb{ Cramer's V}.} -\item{lm.args}{[\code{list(1)}]\cr additional parameters for the specific method.} +\item{lm.args}{[\code{list(1)}]\cr additional parameters for linear model to be passed to \code{\link[stats]{lm}}.} -\item{pearson.args}{[\code{list(1)}]\cr additional parameters for the specific method.} +\item{pearson.args}{[\code{list(1)}]\cr additional parameters for Pearson correlation to be passed to \code{\link[stats]{cor.test}}.} -\item{dcor.args}{[\code{list(1)}]\cr additional parameters for the specific method.} +\item{dcor.args}{[\code{list(1)}]\cr additional parameters for the distance correlation to be passed to \code{\link[corrp]{dcorT_test}}.} -\item{mic.args}{[\code{list(1)}]\cr additional parameters for the specific method.} +\item{mic.args}{[\code{list(1)}]\cr additional parameters for the maximal information coefficient to be passed to \code{\link[minerva]{mine}}.} -\item{pps.args}{[\code{list(1)}]\cr additional parameters for the specific method.} +\item{pps.args}{[\code{list(1)}]\cr additional parameters for the predictive power score to be passed to \code{\link[ppsr]{score}}.} -\item{cramersV.args}{[\code{list(1)}]\cr additional parameters for the specific method.} +\item{cramersV.args}{[\code{list(1)}]\cr additional parameters for the Cramer's V to be passed to \code{\link[lsr]{cramersV}}.} -\item{uncoef.args}{[\code{list(1)}]\cr additional parameters for the specific method.} +\item{uncoef.args}{[\code{list(1)}]\cr additional parameters for the uncertainty coefficient to be passed to \code{\link[DescTools]{UncertCoef}}.} \item{...}{Additional arguments.} } @@ -103,9 +103,10 @@ A list with two tables: \code{data} and \code{index}. All statistical tests are controlled by the confidence internal of p.value param. If the statistical tests do not obtain a significance greater/less than p.value the value of variable \code{isig} will be \code{FALSE}.\cr -There is no statistical significance test for the pps algorithm. By default \code{isig} is TRUE.\cr If any errors occur during operations the association measure (\code{infer.value}) will be \code{NA}.\cr The result \code{data} and \code{index} will have \eqn{N^2} rows, where N is the number of variables of the input data. +By default there is no statistical significance test for the pps algorithm. By default \code{isig} is NA, you can enable in the \code{pps.args} setting \code{ptest = TRUE}.\cr +All the \verb{*.args} can modified the parameters (\code{p.value}, \code{comp}, \code{alternative}, \code{num.s}, \code{rk}, \code{ptest}) for the respective method on it's prefix. } \description{ Compute correlations type analysis on mixed classes columns of larges dataframes @@ -114,37 +115,35 @@ The dataframe is allowed to have columns of these four classes: integer, numeric, factor and character. The character column is considered as categorical variable. } -\section{Details (Pair Types)}{ +\section{Pair Types}{ + +\strong{Numeric pairs (integer/numeric):} +\itemize{ +\item \strong{Pearson Correlation Coefficient:} A widely used measure of the strength and direction of linear relationships. Implemented using \code{\link[stats]{cor}}. For more details, see \url{https://doi.org/10.1098/rspl.1895.0041}. The value lies between -1 and 1.\cr +\item \strong{Distance Correlation:} Based on the idea of expanding covariance to distances, it measures both linear and nonlinear associations between variables. Implemented using \code{\link[energy]{dcorT.test}}. For more details, see \url{https://doi.org/10.1214/009053607000000505}. The value lies between 0 and 1.\cr +\item \strong{Maximal Information Coefficient (MIC):} An information-based nonparametric method that can detect both linear and non-linear relationships between variables. Implemented using \code{\link[minerva]{mine}}. For more details, see \url{https://doi.org/10.1126/science.1205438}. The value lies between 0 and 1.\cr +\item \strong{Predictive Power Score (PPS):} A metric used to assess predictive relations between variables. Implemented using \code{\link[ppsr]{score}}. For more details, see \url{https://zenodo.org/record/4091345}. The value lies between 0 and 1.\cr\cr +} + +\strong{Numeric and categorical pairs (integer/numeric - factor/categorical):} +\itemize{ +\item \strong{Square Root of R² Coefficient:} From linear regression of the numeric variable over the categorical variable. Implemented using \code{\link[stats]{lm}}. For more details, see \url{https://doi.org/10.4324/9780203774441}. The value lies between 0 and 1.\cr +\item \strong{Predictive Power Score (PPS):} A metric used to assess predictive relations between numeric and categorical variables. Implemented using \code{\link[ppsr]{score}}. For more details, see \url{https://zenodo.org/record/4091345}. The value lies between 0 and 1.\cr\cr +} + +\strong{Categorical pairs (factor/categorical):} \itemize{ -\item \code{integer/numeric pair} Pearson Correlation using \code{\link[stats]{cor}} function. The -value lies between -1 and 1.\cr -\item \code{integer/numeric pair} Distance Correlation using \code{\link[energy]{dcorT.test}} function. The -value lies between 0 and 1.\cr -\item \code{integer/numeric pair} Maximal Information Coefficient using \code{\link[minerva]{mine}} function. The -value lies between 0 and 1.\cr -\item \code{integer/numeric pair} Predictive Power Score using \code{\link[ppsr]{score}} function. The -value lies between 0 and 1.\cr\cr -\item \code{integer/numeric - factor/categorical pair} correlation coefficient or -squared root of R^2 coefficient of linear regression of integer/numeric -variable over factor/categorical variable using \code{\link[stats]{lm}} function. The value -lies between 0 and 1.\cr -\item \code{integer/numeric - factor/categorical pair} Predictive Power Score using \code{\link[ppsr]{score}} function. The -value lies between 0 and 1.\cr\cr -\item \code{factor/categorical pair} Cramer's V value is -computed based on chisq test and using \code{\link[lsr]{cramersV}} function. The value lies -between 0 and 1.\cr -\item \code{factor/categorical pair} Uncertainty coefficient using \code{\link[DescTools]{UncertCoef}} function. The -value lies between 0 and 1.\cr -\item \code{factor/categorical pair} Predictive Power Score using \code{\link[ppsr]{score}} function. The -value lies between 0 and 1.\cr +\item \strong{Cramér's V:} A measure of association between nominal variables. Computed based on a chi-squared test and implemented using \code{\link[lsr]{cramersV}}. For more details, see \url{https://doi.org/10.1515/9781400883868}. The value lies between 0 and 1.\cr +\item \strong{Uncertainty Coefficient:} A measure of nominal association between two variables. Implemented using \code{\link[DescTools]{UncertCoef}}. For more details, see \url{https://doi.org/10.1016/j.jbi.2010.02.001}. The value lies between 0 and 1.\cr +\item \strong{Predictive Power Score (PPS):} A metric used to assess predictive relations between categorical variables. Implemented using \code{\link[ppsr]{score}}. For more details, see \url{https://zenodo.org/record/4091345}. The value lies between 0 and 1.\cr } } \examples{ iris_c <- corrp(iris) iris_m <- corr_matrix(iris_c, isig = FALSE) - corrplot(iris_m) + corrplot::corrplot(iris_m) } @@ -156,7 +155,7 @@ Paul van der Laken, ppsr,2021. URL \url{https://github.com/paulvanderlaken/ppsr}. } \author{ -Igor D.S. Siciliani +Igor D.S. Siciliani, Paulo H. dos Santos } \keyword{,} \keyword{biserial} diff --git a/man/set_arguments.Rd b/man/set_arguments.Rd new file mode 100644 index 0000000..06326be --- /dev/null +++ b/man/set_arguments.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{set_arguments} +\alias{set_arguments} +\title{Set Argument} +\usage{ +set_arguments(args_list) +} +\arguments{ +\item{args_list}{[\code{list}]\cr +A named list of arguments to be assigned to the parent environment.} +} +\value{ +A modified \code{args_list} with the arguments that were assigned to the parent environment removed. +} +\description{ +Assigns provided arguments from the \code{args_list} to the parent environment. If an argument is inside the arguments of the methods that calculate statistics, it assigns it on the parent environment, and removes the argument from the list. +} diff --git a/man/sil_acca.Rd b/man/sil_acca.Rd index 4cd0770..cbfdd94 100644 --- a/man/sil_acca.Rd +++ b/man/sil_acca.Rd @@ -47,7 +47,7 @@ Starczewski, Artur, and Adam Krzyżak. "Performance evaluation of the silhouette " International Conference on Artificial Intelligence and Soft Computing. Springer, Cham, 2015. } \author{ -Igor D.S. Siciliani +Igor D.S. Siciliani, Paulo H. dos Santos } \keyword{,} \keyword{acca}