General comments for review #27

update documentation end
meantrix · Jan 10, 2025 · f1ad814 · f1ad814
1 parent aaaeeb2
commit f1ad814
Show file tree

Hide file tree

Showing 18 changed files with 133 additions and 190 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -19,6 +19,7 @@ Description: Compute multiple types of correlation analyses, including
 Imports: 
     Rcpp (>= 1.0.13-1),
     RcppArmadillo (>= 14.2.2-1),
+    corrplot (>= 0.95),
     lsr (>= 0.5.2),
     parallel (>= 4.4.1),
     stats (>= 4.4.1),
@@ -28,7 +29,6 @@ Imports:
     ppsr (>= 0.0.2),
     DescTools (>= 0.99.40)
 Suggests: 
-    corrplot,
     energy,
     knitr,
     rmarkdown,

diff --git a/NAMESPACE b/NAMESPACE
@@ -21,6 +21,7 @@ export(corr_rm)
 export(corrp)
 export(dcorT_test)
 export(ptest)
+export(set_arguments)
 export(sil_acca)
 importFrom(Rcpp,evalCpp)
 importFrom(RcppArmadillo,armadillo_version)

diff --git a/R/acca.R b/R/acca.R
@@ -23,7 +23,7 @@
 #' final result of the clustering method.
 #'  That is, the name of the variables belonging to each cluster k.
 #'
-#' @author Igor D.S. Siciliani
+#' @author Igor D.S. Siciliani, Paulo H. dos Santos
 #'
 #' @keywords correlation , acca
 #'

diff --git a/R/best_acca.R b/R/best_acca.R
@@ -20,7 +20,7 @@
 #' the optimal number of clusters `$best.k`.
 #' @seealso \code{\link{sil_acca}}
 #'
-#' @author Igor D.S. Siciliani
+#' @author Igor D.S. Siciliani, Paulo H. dos Santos
 #'
 #' @keywords silhouette , acca , optimal , k
 #'

diff --git a/R/corr_fun.R b/R/corr_fun.R
@@ -8,38 +8,8 @@
 #'
 #' @name corr_fun
 #'
-#' @section Details (Types):
+#' @inheritSection corrp Pair Types
 #'
-#' - \code{integer/numeric pair} Pearson Correlation using
-#' \code{\link[stats]{cor}} function. The
-#'   value lies between -1 and 1.\cr
-#' - \code{integer/numeric pair} Distance Correlation
-#' using \code{\link[energy]{dcorT.test}} function. The
-#'   value lies between 0 and 1.\cr
-#' - \code{integer/numeric pair} Maximal Information Coefficient using
-#' \code{\link[minerva]{mine}} function. The
-#'   value lies between 0 and 1.\cr
-#' - \code{integer/numeric pair} Predictive Power Score using 
-#' \code{\link[ppsr]{score}} function. The
-#'   value lies between 0 and 1.\cr\cr
-#' - \code{integer/numeric - factor/categorical pair} correlation coefficient or
-#'   squared root of R^2 coefficient of linear regression of integer/numeric
-#'   variable over factor/categorical variable using 
-#' \code{\link[stats]{lm}} function. The value
-#'   lies between 0 and 1.\cr
-#' - \code{integer/numeric - factor/categorical pair} 
-#' Predictive Power Score using \code{\link[ppsr]{score}} function. 
-#' The value lies between 0 and 1.\cr\cr
-#' - \code{factor/categorical pair} Cramer's V value is
-#'   computed based on chisq test and using 
-#' \code{\link[lsr]{cramersV}} function. The value lies
-#'   between 0 and 1.\cr
-#' - \code{factor/categorical pair} Uncertainty coefficient 
-#' using \code{\link[DescTools]{UncertCoef}} function. The
-#'   value lies between 0 and 1.\cr
-#' - \code{factor/categorical pair} Predictive Power Score 
-#' using \code{\link[ppsr]{score}} function. 
-#' The value lies between 0 and 1.\cr
 #'
 #' @return list with all statistical results.\cr
 #' - All statistical tests are controlled by the confidence internal of
@@ -52,44 +22,12 @@
 #' default the association measure(`infer.value`) will be `NA`.
 #'
 #'
-#' @param df \[\code{data.frame(1)}]\cr input data frame.
+#' @inheritParams corrp
 #' @param nx \[\code{character(1)}]\cr first variable column name: independent/predictor variable. 
 #' @param ny \[\code{character(1)}]\cr second variable column name: dependent/target variable.
-#' @param p.value \[\code{logical(1)}]\cr
-#' P-value probability of obtaining the observed results of a test,
-#' assuming that the null hypothesis is correct. By default p.value=0.05 (Cutoff value for p-value.).
-#' @param comp \[\code{character(1)}]\cr The param \code{p.value} must be greater
-#'  or less than those estimated in tests and correlations.
-#' @param alternative \[\code{character(1)}]\cr a character string specifying the alternative hypothesis,
-#' must be one of "greater" (default), "less" or "two.sided". You can specify just the initial letter.
-#' You can specify just the initial letter.
-#' @param verbose \[\code{logical(1)}]\cr Activate verbose mode.
-#' @param num.s \[\code{numeric(1)}]\cr Used in permutation test. The number of samples with
-#' replacement created with y numeric vector.
-#' @param rk \[\code{logical(1)}]\cr Used in permutation test.
-#' if its TRUE transform x, y numeric vectors with samples ranks.
-#' @param cor.nn \[\code{character(1)}]\cr
-#' Choose correlation type to be used in integer/numeric pair inference.
-#' The options are `pearson: Pearson Correlation`,`mic: Maximal Information Coefficient`,
-#' `dcor: Distance Correlation`,`pps: Predictive Power Score`.Default is `Pearson Correlation`.
-#' @param cor.nc \[\code{character(1)}]\cr
-#' Choose correlation type to be used in integer/numeric - factor/categorical pair inference.
-#' The option are `lm: Linear Model`,`pps: Predictive Power Score`. Default is `Linear Model`.
-#' @param cor.cc  \[\code{character(1)}]\cr
-#' Choose correlation type to be used in factor/categorical pair inference.
-#' The option are `cramersV: Cramer's V`,`uncoef: Uncertainty coefficient`,
-#' `pps: Predictive Power Score`. Default is ` Cramer's V`.
-#' @param lm.args \[\code{list(1)}]\cr additional parameters for the specific method.
-#' @param pearson.args \[\code{list(1)}]\cr additional parameters for the specific method.
-#' @param dcor.args \[\code{list(1)}]\cr additional parameters for the specific method.
-#' @param mic.args \[\code{list(1)}]\cr additional parameters for the specific method.
-#' @param pps.args \[\code{list(1)}]\cr additional parameters for the specific method.
-#' @param uncoef.args \[\code{list(1)}]\cr additional parameters for the specific method.
-#' @param cramersV.args \[\code{list(1)}]\cr additional parameters for the specific method.
-#' @param ... Additional arguments.
 #'
 #'
-#' @author Igor D.S. Siciliani
+#' @author Igor D.S. Siciliani, Paulo H. dos Santos
 #'
 #' @keywords correlation , power predictive score , linear model , distance correlation ,
 #' mic , point biserial , pearson , cramer'sV

diff --git a/R/corr_matrix.R b/R/corr_matrix.R
@@ -9,15 +9,15 @@
 #' be represented by NA or FALSE in the correlation matrix.
 #' @param ... Additional arguments (TODO).
 #'
-#' @author Igor D.S. Siciliani
+#' @author Igor D.S. Siciliani, Paulo H. dos Santos
 #'
 #' @keywords correlation matrix , corrp
 #'
 #' @examples
 #'
 #' iris_cor <- corrp(iris)
 #' iris_m <- corr_matrix(iris_cor, isig = FALSE)
-#' corrplot(iris_m)
+#' corrplot::corrplot(iris_m)
 #' @export
 corr_matrix <- function(c, ...) {
   assert_required_argument(c, "The 'c' argument must be a clist object, which is the output from corrp.")

diff --git a/R/corr_rm.R b/R/corr_rm.R
@@ -6,8 +6,8 @@
 #'
 #' @param df \[\code{data.frame(1)}]\cr input data frame.
 #' @param c \[\code{clist(1)} | \code{cmatrix(1)}]\cr correlation list output from the function \code{\link[corrp]{corrp}} 
-#' with class \code{\link[corrp]{clist}} or correlation matrix output 
-#' from \code{\link[corrp]{corr_matrix}} with class \code{\link[corrp]{cmatrix}}.
+#' with class \code{clist} or correlation matrix output 
+#' from \code{\link[corrp]{corr_matrix}} with class \code{cmatrix}.
 #' @param cutoff \[\code{numeric(1)}]\cr A numeric value for the pair-wise absolute correlation cutoff.
 #' The default values is 0.75.
 #' @param col \[\code{character(1)}]\cr choose the column to be used in the correlation matrix
@@ -22,7 +22,7 @@
 #' corr_rm(df = iris, c = iris_clist, cutoff = 0.75, col = "infer.value", isig = FALSE)
 #' corr_rm(df = iris, c = iris_cmatrix, cutoff = 0.75, col = "infer.value", isig = FALSE)
 #'
-#' @author Igor D.S. Siciliani
+#' @author Igor D.S. Siciliani, Paulo H. dos Santos
 #'
 #' @keywords highly correlated , cmatrix , clist
 #'

diff --git a/R/corrp.R b/R/corrp.R
@@ -8,29 +8,25 @@
 #'
 #' @name corrp
 #'
-#' @section Details (Pair Types):
+#' @section Pair Types:
 #'
-#' - \code{integer/numeric pair} Pearson Correlation using \code{\link[stats]{cor}} function. The
-#'   value lies between -1 and 1.\cr
-#' - \code{integer/numeric pair} Distance Correlation using \code{\link[energy]{dcorT.test}} function. The
-#'   value lies between 0 and 1.\cr
-#' - \code{integer/numeric pair} Maximal Information Coefficient using \code{\link[minerva]{mine}} function. The
-#'   value lies between 0 and 1.\cr
-#' - \code{integer/numeric pair} Predictive Power Score using \code{\link[ppsr]{score}} function. The
-#'   value lies between 0 and 1.\cr\cr
-#' - \code{integer/numeric - factor/categorical pair} correlation coefficient or
-#'   squared root of R^2 coefficient of linear regression of integer/numeric
-#'   variable over factor/categorical variable using \code{\link[stats]{lm}} function. The value
-#'   lies between 0 and 1.\cr
-#' - \code{integer/numeric - factor/categorical pair} Predictive Power Score using \code{\link[ppsr]{score}} function. The
-#'   value lies between 0 and 1.\cr\cr
-#' - \code{factor/categorical pair} Cramer's V value is
-#'   computed based on chisq test and using \code{\link[lsr]{cramersV}} function. The value lies
-#'   between 0 and 1.\cr
-#' - \code{factor/categorical pair} Uncertainty coefficient using \code{\link[DescTools]{UncertCoef}} function. The
-#'   value lies between 0 and 1.\cr
-#' - \code{factor/categorical pair} Predictive Power Score using \code{\link[ppsr]{score}} function. The
-#'   value lies between 0 and 1.\cr
+#' **Numeric pairs (integer/numeric):**
+#' 
+#' - **Pearson Correlation Coefficient:** A widely used measure of the strength and direction of linear relationships. Implemented using \code{\link[stats]{cor}}. For more details, see \url{https://doi.org/10.1098/rspl.1895.0041}. The value lies between -1 and 1.\cr
+#' - **Distance Correlation:** Based on the idea of expanding covariance to distances, it measures both linear and nonlinear associations between variables. Implemented using \code{\link[energy]{dcorT.test}}. For more details, see \url{https://doi.org/10.1214/009053607000000505}. The value lies between 0 and 1.\cr
+#' - **Maximal Information Coefficient (MIC):** An information-based nonparametric method that can detect both linear and non-linear relationships between variables. Implemented using \code{\link[minerva]{mine}}. For more details, see \url{https://doi.org/10.1126/science.1205438}. The value lies between 0 and 1.\cr
+#' - **Predictive Power Score (PPS):** A metric used to assess predictive relations between variables. Implemented using \code{\link[ppsr]{score}}. For more details, see \url{https://zenodo.org/record/4091345}. The value lies between 0 and 1.\cr\cr
+#'
+#' **Numeric and categorical pairs (integer/numeric - factor/categorical):**
+#' 
+#' - **Square Root of R² Coefficient:** From linear regression of the numeric variable over the categorical variable. Implemented using \code{\link[stats]{lm}}. For more details, see \url{https://doi.org/10.4324/9780203774441}. The value lies between 0 and 1.\cr
+#' - **Predictive Power Score (PPS):** A metric used to assess predictive relations between numeric and categorical variables. Implemented using \code{\link[ppsr]{score}}. For more details, see \url{https://zenodo.org/record/4091345}. The value lies between 0 and 1.\cr\cr
+#'
+#' **Categorical pairs (factor/categorical):**
+#' 
+#' - **Cramér's V:** A measure of association between nominal variables. Computed based on a chi-squared test and implemented using \code{\link[lsr]{cramersV}}. For more details, see \url{https://doi.org/10.1515/9781400883868}. The value lies between 0 and 1.\cr
+#' - **Uncertainty Coefficient:** A measure of nominal association between two variables. Implemented using \code{\link[DescTools]{UncertCoef}}. For more details, see \url{https://doi.org/10.1016/j.jbi.2010.02.001}. The value lies between 0 and 1.\cr
+#' - **Predictive Power Score (PPS):** A metric used to assess predictive relations between categorical variables. Implemented using \code{\link[ppsr]{score}}. For more details, see \url{https://zenodo.org/record/4091345}. The value lies between 0 and 1.\cr
 #'
 #' @return 
 #' A list with two tables: `data` and `index`.
@@ -54,9 +50,11 @@
 #' All statistical tests are controlled by the confidence internal of
 #'   p.value param. If the statistical tests do not obtain a significance greater/less
 #'   than p.value the value of variable `isig` will be `FALSE`.\cr
-#' By default there is no statistical significance test for the pps algorithm. By default `isig` is NA, you can enable in the pps.args.\cr
+
 #' If any errors occur during operations the association measure (`infer.value`) will be `NA`.\cr
 #' The result `data` and `index` will have \eqn{N^2} rows, where N is the number of variables of the input data.
+#' By default there is no statistical significance test for the pps algorithm. By default `isig` is NA, you can enable in the `pps.args` setting `ptest = TRUE`.\cr
+#' All the `*.args` can modified the parameters (`p.value`, `comp`, `alternative`, `num.s`, `rk`, `ptest`) for the respective method on it's prefix.
 #'
 #' @param df \[\code{data.frame(1)}]\cr input data frame.
 #' @param parallel \[\code{logical(1)}]\cr If its TRUE run the operations in parallel backend.
@@ -85,16 +83,16 @@
 #' Choose correlation type to be used in factor/categorical pair inference.
 #' The option are `cramersV: Cramer's V`,`uncoef: Uncertainty coefficient`,
 #' `pps: Predictive Power Score`. Default is ` Cramer's V`.
-#' @param lm.args \[\code{list(1)}]\cr additional parameters for the specific method.
-#' @param pearson.args \[\code{list(1)}]\cr additional parameters for the specific method.
-#' @param dcor.args \[\code{list(1)}]\cr additional parameters for the specific method.
-#' @param mic.args \[\code{list(1)}]\cr additional parameters for the specific method.
-#' @param pps.args \[\code{list(1)}]\cr additional parameters for the specific method.
-#' @param uncoef.args \[\code{list(1)}]\cr additional parameters for the specific method.
-#' @param cramersV.args \[\code{list(1)}]\cr additional parameters for the specific method.
+#' @param lm.args \[\code{list(1)}]\cr additional parameters for linear model to be passed to \code{\link[stats]{lm}}. 
+#' @param pearson.args \[\code{list(1)}]\cr additional parameters for Pearson correlation to be passed to \code{\link[stats]{cor.test}}.
+#' @param dcor.args \[\code{list(1)}]\cr additional parameters for the distance correlation to be passed to \code{\link[corrp]{dcorT_test}}. 
+#' @param mic.args \[\code{list(1)}]\cr additional parameters for the maximal information coefficient to be passed to \code{\link[minerva]{mine}}.
+#' @param pps.args \[\code{list(1)}]\cr additional parameters for the predictive power score to be passed to \code{\link[ppsr]{score}}.
+#' @param uncoef.args \[\code{list(1)}]\cr additional parameters for the uncertainty coefficient to be passed to \code{\link[DescTools]{UncertCoef}}.
+#' @param cramersV.args \[\code{list(1)}]\cr additional parameters for the Cramer's V to be passed to \code{\link[lsr]{cramersV}}.
 #' @param ... Additional arguments.
 #'
-#' @author Igor D.S. Siciliani
+#' @author Igor D.S. Siciliani, Paulo H. dos Santos
 #'
 #' @keywords correlation , power predictive score , linear model , distance correlation ,
 #' mic , point biserial , pearson , cramer'sV
@@ -110,7 +108,7 @@
 #' @examples
 #'  iris_c <- corrp(iris)
 #'  iris_m <- corr_matrix(iris_c, isig = FALSE)
-#'  corrplot(iris_m)
+#'  corrplot::corrplot(iris_m)
 #' 
 #'
 #' @export

diff --git a/R/sil_acca.R b/R/sil_acca.R
@@ -14,7 +14,7 @@
 #'  are very well clustered.
 #'
 #'
-#' @author Igor D.S. Siciliani
+#' @author Igor D.S. Siciliani, Paulo H. dos Santos
 #'
 #' @keywords silhouette , acca
 #'

diff --git a/R/utils.R b/R/utils.R
@@ -457,7 +457,7 @@ set_arguments = function(args_list) {
   list_name <- deparse(substitute(args_list))
 
   for (name_arg in names(args_list)) {
-    if (name_arg %in% c("p.value", "comp", "verbose", "alternative", "num.s", "rk", "ptest")) {
+    if (name_arg %in% c("p.value", "comp", "alternative", "num.s", "rk", "ptest")) {
       assign(name_arg, args_list[[name_arg]], envir = parent.frame())
       args_list[[name_arg]] = NULL
     }

diff --git a/man/acca.Rd b/man/acca.Rd
diff --git a/man/best_acca.Rd b/man/best_acca.Rd