diff --git a/R/exploratoryfactoranalysis.R b/R/exploratoryfactoranalysis.R index e5b225bb..642d999a 100644 --- a/R/exploratoryfactoranalysis.R +++ b/R/exploratoryfactoranalysis.R @@ -253,7 +253,7 @@ exploratoryFactorAnalysisInternal <- function(jaspResults, dataset, options, ... kmo <- psych::KMO(cov2cor(as.matrix(dataset))) } - kmoTable[["col"]] <- c(gettext("Overall MSA\n"), names(kmo$MSAi)) + kmoTable[["col"]] <- c(gettext("Overall MSA"), names(kmo$MSAi)) kmoTable[["val"]] <- c(kmo$MSA, kmo$MSAi) } @@ -364,7 +364,7 @@ exploratoryFactorAnalysisInternal <- function(jaspResults, dataset, options, ... .efaGoodnessOfFitTable <- function(modelContainer, dataset, options, ready) { if (!is.null(modelContainer[["goodnessOfFitTable"]])) return() - goodnessOfFitTable <- createJaspTable(title = gettext("Chi-squared Test")) + goodnessOfFitTable <- createJaspTable(title = gettext("Chi-Squared Test")) goodnessOfFitTable$addColumnInfo(name = "model", title = "", type = "string") goodnessOfFitTable$addColumnInfo(name = "chisq", title = gettext("Value"), type = "number", format = "dp:3") goodnessOfFitTable$addColumnInfo(name = "df", title = gettext("df"), type = "integer") @@ -482,11 +482,10 @@ exploratoryFactorAnalysisInternal <- function(jaspResults, dataset, options, ... # if a rotation is used, the table needs more columns rotate <- options[[if (options[["rotationMethod"]] == "orthogonal") "orthogonalSelector" else "obliqueSelector"]] - eigenTable$addColumnInfo(name = "eigen", title = gettext("Eigenvalues"), type = "number") if (rotate != "none") { overTitleA <- gettext("Unrotated solution") overTitleB <- gettext("Rotated solution") - eigenTable$addColumnInfo(name = "sslU", title = gettext("SumSq. Loadings"), type = "number", overtitle = overTitleA) + eigenTable$addColumnInfo(name = "sslU", title = gettext("Eigenvalue"), type = "number", overtitle = overTitleA) eigenTable$addColumnInfo(name = "propU", title = gettext("Proportion var."), type = "number", overtitle = overTitleA) eigenTable$addColumnInfo(name = "cumpU", title = gettext("Cumulative"), type = "number", overtitle = overTitleA) eigenTable$addColumnInfo(name = "sslR", title = gettext("SumSq. Loadings"), type = "number", overtitle = overTitleB) @@ -512,7 +511,6 @@ exploratoryFactorAnalysisInternal <- function(jaspResults, dataset, options, ... Vaccounted <- efaResults[["Vaccounted"]] idx <- seq_len(efaResults[["factors"]]) - eigenTable[["eigen"]] <- eigv_init[idx] eigenTable[["comp"]] <- paste("Factor", idx) eigenTable[["sslU"]] <- eigv[idx] eigenTable[["propU"]] <- eigv[idx] / sum(eigv_init) diff --git a/R/principalcomponentanalysis.R b/R/principalcomponentanalysis.R index 3b24c1ab..ba27065c 100644 --- a/R/principalcomponentanalysis.R +++ b/R/principalcomponentanalysis.R @@ -281,7 +281,7 @@ principalComponentAnalysisInternal <- function(jaspResults, dataset, options, .. .pcaGoodnessOfFitTable <- function(modelContainer, dataset, options, ready) { if (!is.null(modelContainer[["goodnessOfFitTable"]])) return() - goodnessOfFitTable <- createJaspTable(title = gettext("Chi-squared Test")) + goodnessOfFitTable <- createJaspTable(title = gettext("Chi-Squared Test")) goodnessOfFitTable$addColumnInfo(name = "model", title = "", type = "string") goodnessOfFitTable$addColumnInfo(name = "chisq", title = gettext("Value"), type = "number", format = "dp:3") goodnessOfFitTable$addColumnInfo(name = "df", title = gettext("df"), type = "integer") diff --git a/inst/help/ExploratoryFactorAnalysis.md b/inst/help/ExploratoryFactorAnalysis.md deleted file mode 100644 index febcafb8..00000000 --- a/inst/help/ExploratoryFactorAnalysis.md +++ /dev/null @@ -1,167 +0,0 @@ -Exploratory Factor Analysis -=== - -With Exploratory Factor Analysis it is possible to identify one or more factors underlying the data. The factors are chosen such that they capture the common variance in the data. - -### Assumptions (Yong & Pearce, 2013) -- The variables included in the analysis are continuous*. - - If variables are ordinal, this assumption can be overcome when basing the analysis on the polychoric or tetrachoric correlation matrix (Bandalos & Finney, 2018). -- The data follow a multivariate normal distribution. -- There is a linear relation between the variables and the factors. -- There is no multicollinearity and singularity in the data. - -### Input ---- -#### Assignment Box -- Included Variables: In this box, the variables to perform the exploratory factor analysis on are selected. - -#### Number of Factors based on -- _NB: eigenvalues for EFA are different from eigenvalues for PCA. See Dinno (2014) for more information._ -- Here, the number of factors that the rotation is applied to is specified. Several methods to determine this number can be chosen from: - - Parallel Analysis: Factors are selected on the basis of parallel analysis. With this method, factors are selected when their eigenvalue is greater than the parallel average random eigenvalue. This method is selected by default. Can be based on principal component eigenvalues (PC) or factor eigenvalues (FA). A seed (1234) is chosen by default so that the results from the parallel analysis are equal across the EFA. - - Eigenvalues: Factors are selected when they have a certain eigenvalue. By default factors are selected that have an eigenvalue of 0 or higher. This is called the Kaiser criterion. - - Manual: The number of factors can be specified manually. By default this is set to 1. - -#### Factoring Method: -- Choose the estimation method used within the psych package to find the factor solution, options are: - - Minimum residual (default), maximum likelihood, principal axis factoring, ordinal least squares, - weighted least squares, generalized least squares, minimum chi-square, minimum rank - -#### Rotation -- Here, the rotation method to apply to the factors can be specified. - - Orthogonal: This method produces factors that are uncorrelated. For this method, there are several possibilities that can be selected: - - None: No rotation method is selected. - - varimax: Orthogonal rotation method varimax. This rotation is based on maximizing the variance of the loadings. - - quartimax: Orthogonal rotation method quartimax. For this method, the number of factors that is necessary to explain each variable is minimized. - - bentlerT: Orthogonal rotation method bentlerT. - - equamax: Orthogonal rotation method equamax. This is a combination of varimax and quartimax. - - varimin: Orthogonal rotation method varimin. - - Oblique: This method produces factors that allow for correlation between the factors. This method is selected by default. Several possibilities are available: - - promax: Oblique rotation method promax. This method is selected by default. - - oblimin: Oblique rotation method oblimin. - - simplimax: Oblique rotation method simplimax. - - bentlerQ: Oblique rotation method bentlerQ. - - biquartimin: Oblique rotation method biquartimin. - - cluster: Oblique rotation method cluster. - -#### Base analysis on -- Correlation: Bases the PCA on the correlation matrix of the data -- Covariance: Bases the PCA on the covariance matrix of the data -- Polychoric/tetrachoric: Bases the PCA on the poly/tetrachoric (mixed) correlation matrix of the data. This is sometimes unstable when sample size is small and when some variables do not contain all response categories. - -### Output Options -- Display loadings above: cutoff for the loadings to be displayed in the output table -- Order factor loadings by: Loadings in table are either ordered by size or variable names -- Include Tables: - - Factor correlations: When selecting this option, a table with the correlations between the factors will be displayed. - - Additional fit indices: This option displays the Root Mean Squared Error of Approximation (RMSEA) with 90% confidence interval, the Tucker Lewis Index (TLI), and the Bayesian Information Criterion (BIC) to test the fit of the model. - - Residual matrix: Displays a table containing the residual variances and correlations - - Parallel analysis: If this option is selected, a table will be generated exhibiting a detailed output of the parallel analysis. Can be based on principal component eigenvalues (PC) or factor eigenvalues (FA). The seed is taken from the parallel analysis for determining the number of factors above -- Plots: - - Path diagram: By selecting this option, a visual representation of the direction and strength of the relation between the variable and factor will be displayed. - - Scree plot: When selecting this option, a scree plot will be displayed. The scree plot provides information on how much variance in the data, indicated by the eigenvalue, is explained by each factor. A scree plot can be used to decide how many factors should be selected. - - Parallel analysis results: Display the results of the parallel analysis in the scree plot. The parallel analysis will be based on PC or FA as defined by the option for the parallel analysis table -- Assumption Checks: - - Kaiser-Meyer-Olkin Test (KMO): Determines how well variables are suited for factor analysis by computing the proportion of common variance between variables - - Bartlett's Test (of sphericity): Determines if the data correlation matrix is the identity matrix, meaning, if the variables are related or not - - Mardia's Test of Multivariate Normality: Assesses the degree of the departure from multivariate normality of the included variables in terms of multivariate skewness and kurtosis. The Mardia's test will always include the listwise complete cases. -- Missing values: - - Exclude cases pairwise: If one observation from a variable is missing, all the other variable observations from the same case will still be used for the analysis. In this scenario, it is not necessary to have an observation for all the variables to include the case in the analysis. This option is selected by default. - - Exclude cases listwise: If one observation from a variable is missing, the whole case, so all the other connected variable observations, will be dismissed from the analysis. In this scenario, observations for every variable are needed to include the case in the analysis. -- Add FA scores to data: Adds the estimated factor scores as new columns to the data set - - -### Output ---- -#### Assumption Checks -- Kaiser-Meyer-Olkin Test (KMO): Measure of sampling adequacy (MSA) as the proportion of common variance among variables is computed for all variables; values closer to 1 are desired. -- Bartlett's Test (of sphericity): A significant result means the correlation matrix is unlike the identity matrix. -- Mardia's Test of Multivariate Normality: - - Tests: The first column shows all the tests performed. - - Value: The values of `b1p` (multivariate skewness) and `b2p` (multivariate kurtosis), as denoted in Mardia (1970). - - Statistic: The two chi-squared test statistics of multivariate skewness (both standard and corrected for small samples) and the standard normal test statistic of multivariate kurtosis. - - df: Degrees of freedom. - - p: P-value. - -#### Exploratory Factor Analysis -- Factor Loadings: - - Variables: The first column shows all the variables included in the analysis. - - PC (1, 2, 3, etc.): This column shows the factor loadings on the variable. - - Uniqueness: The percentage of the variance of each variable that is not explained by the factor. - -- Factor Correlations: - - The correlations between the factors. - -- Chi-squared Test: - The fit of the model is tested. When the test is significant, the model is rejected. Bear in mind that a chi-squared approximation may be unreliable for small sample sizes, and the chi-squared test may too readily reject the model with very large sample sizes. Additional information about the fit of the model can be obtained by selecting the option `Additional fit indices` in the `Output options`. See, for example, Saris, Satorra, & van der Veld (2009) for more discussions on overall fit metrics. - - Model: The model obtained from the exploratory factor analysis. - - Value: The chi-squared test statistic. - - df: Degrees of freedom. - - p: P-value. - -- Factor Characteristics: - - Unrotated solution: - - SumSq. Loadings: Sum of squared loadings, variance explained by each unrotated factor - - Proportion var.: The proportion of variance in the dataset explained by each unrotated factor - - Cumulative: The proportion of variance in the dataset explained by the unrotated factor up to and including the current factor. - - Rotated solution: - - SumSq. Loadings: Sum of squared loadings, variance explained by each rotated factor - - Proportion var.: The proportion of variance in the dataset explained by each rotated factor - - Cumulative: The proportion of variance in the dataset explained by the rotated factor up to and including the current factor. - -- Additional Fit Indices: - These fit indices provide information about the fit of the model. - - Model: The model obtained from the exploratory factor analysis. - - RMSEA: Root Mean Square Error of Approximation. Corrects for parsimony. When models peform the same, but model 1 has more degrees of freedom than model 2, model 1 will be recommended. Browne and Cudeck (1993) advise a value less than 0.08 for an acceptable model fit, less than 0.05 a good model fit, and advice to reject models with values of 0.1 or higher. However, there is absolute agreement on these cutoffs. - - RMSEA 90% confidence interval: The 90% confidence interval of the Root Mean Square Error of Approximation. - - SRMR: Standardized root mean square residual. Cutoffs similar to RMSEA - - TLI: Tucker-Lewis Index. Evaluates the fit compared to a more resticted, nested baseline model. Hopwood and Donnallan (2010) suggested that a value higher than .9 indicates a good fit. However, there is no consensus about this cutoff. - - CFI: Comparative fit index. Cutoffs similar to TLI - - BIC: Bayesian Information Criterion. This measure is useful for comparing the performances of different models on the same data, where a lower value indicates a better fitting model. - -- Parallel Analysis: The table displays as many factors as variables selected for analysis, eigenvalues corresponding to the real-data factor, and the eigenvalue corresponding to the parallel mean resampled value. It will display an asterisk along the names of the factors advised to be retained (whose real-data eigenvalue is greater than the resampled-data mean value). Note that, even when selecting a PC-based parallel analysis, the table will refer to "factors" as the ones advised to be retained instead of "components"; this is due to common usage of the PC-based parallel analysis method for assessing the number of factors within EFA (e.g., Golino et al., 2020). - -#### Path Diagram -- F(1,2,3,...): The factors in the model are represented by the circles. -- Variables: The variables are represented by the boxes. -- Arrows: Going from the factors to the variables, representing the loading from the factor on the variable. Red indicates a negative loading, green a positive loading. The wider the arrows, the higher the loading. This highlight can be adjusted at `Highlight` in the `Output Options`. - -#### Screeplot -The scree plot provides information on how much variance in the data, indicated by the eigenvalue, is explained by each factor. The scree plot can be used to decide how many factors should be selected in the model. - -- Factors: On the x-axis, all possible factors. -- Eigenvalue: On the y-axis, the eigenvalue that indicates the variance explained by each factor. -- Data: The dotted line represents the data. -- Simulated: The triangle line represents the simulated data. This line is indicative for the parallel analysis. When the points from the dotted line (real data) are above this line, these factors will be included in the model by parallel analysis. -- Kaiser criterion: The horizontal line at the eigenvalue of 1 represents the Kaiser criterion. According to this criterion, only factors with values above this line (at an eigenvalue of 1) should be included in the model. - -### References ---- -- Bandalos, D. L., & Finney, S. J. (2018). Factor analysis: Exploratory and confirmatory. In G. R. Hancock, L. M. Stapleton, & R. O. Mueller, *The reviewer’s guide to quantitative methods in the social sciences* (pp. 98-122). Routledge. https://doi.org/10.4324/9781315755649 -- Dinno, A. (2014) Gently clarifying the application of Horn’s parallel analysis to principal component analysis versus factor analysis. *Working paper*. https://alexisdinno.com/Software/files/PA_for_PCA_vs_FA.pdf -- Dziuban, C. D., & Shirkey, E. C. (1974). When is a correlation matrix appropriate for factor analysis? Some decision rules. *Psychological Bulletin, 81*(6), 358–361. https://doi.org/10.1037/h0036316 -- Golino, H., Shi, D., Christensen, A. P., Garrido, L. E., Nieto, M. D., Sadana, R., ... & Martinez-Molina, A. (2020). Investigating the performance of exploratory graph analysis and traditional techniques to identify the number of latent factors: A simulation and tutorial. *Psychological Methods*, *25*(3), 292. https://doi.org/10.1037/met0000255 -- Hayton, J. C., Allen, D. G., & Scarpello, V. (2004). Factor retention - decisions in exploratory factor analysis: A tutorial on parallel analysis. *Organizational Research Methods, 7*(2), 191-205. https://doi.org/10.1177/1094428104263675 -- Hopwood, C. J., & Donnellan, M. B. (2010). How should the internal structure - of personality inventories be evaluated? *Personality and Social Psychology Review, 14*(3), 332–346. https://doi.org/10.1177/1088868310361240 -- Horn, J. L. (1965). A rationale and test for the number of factors in factor analysis. *Psychometrika, 30*(2), 179–185. https://doi.org/10.1007%2Fbf02289447 -- Hu, L.-t., & Bentler, P. M. (1998). Fit indices in covariance structure modeling: Sensitivity to underparameterized model misspecification. *Psychological Methods, 3*(4), 424–453. https://doi.org/10.1037/1082-989X.3.4.424 -- Mardia, K. V. (1970). Measures of multivariate skewness and kurtosis with applications. *Biometrika*, *57*(3), 519-530. https://doi.org/10.2307/2334770 -- Osborne, J. W., Costello, A. B., & Kellow, J. T. (2008). Best practices in - exploratory factor analysis. In J. Osborne (Ed.), *Best practices in quantitative methods* (pp. 86-99). SAGE Publications, Inc. https://doi.org/10.4135/9781412995627.d8 -- Saris, W. E., Satorra, A., & Van der Veld, W. M. (2009). Testing structural equation models or detection of misspecifications?. *Structural Equation Modeling: A Multidisciplinary Journal, 16*(4), 561-582. https://doi.org/10.1080/10705510903203433 -- Timmerman, M. E., & Lorenzo-Seva, U. (2011). Dimensionality assessment of ordered polytomous items with parallel analysis. *Psychological Methods*, *16*(2), 209. https://doi.org/10.1037/a0023353 -- Yong, A. G., & Pearce, S. (2013). A beginner’s guide to factor analysis: Focusing on exploratory factor analysis. *Tutorials in Quantitative Methods for Psychology*, *9*(2), 79-94. https://doi.org/10.20982/tqmp.09.2.p079 - -### R Packages ---- -- ggplot2 -- psych -- qgraph -- stats - -### Example ---- -- For an example go to `File`-->`Data library`-->`Factor`-->`G Factor`. -- For more details about Exploratory Factor Analysis in JASP, watch this video. diff --git a/inst/help/ExploratoryFactorAnalysis_nl.md b/inst/help/ExploratoryFactorAnalysis_nl.md deleted file mode 100644 index babafce1..00000000 --- a/inst/help/ExploratoryFactorAnalysis_nl.md +++ /dev/null @@ -1,141 +0,0 @@ -Exploratieve Factoranalyse -=== - -Met Exploratieve Factoranalyse kunt u één of meer onderliggende factoren van de data identificeren. De factoren zijn dusdanig gekozen dat zij gemeenschappelijke variantie dekken. - -### Assumpties (Yong & Pearce, 2013) -- De variabelen in de analyse zijn continu. -- De data heeft een multivariate normale verdeling. -- Er is een lineaire relatie tussen de variabelen en de factoren. -- Er is geen multicollineariteit en singulariteit in de data. - -### Invoer ---- -#### Invoerveld -- Ingevoegde Variabelen: In dit veld selecteert u de variabelen waarmee u de exploratieve factoranalyse uitvoert. - -#### Aantal Factoren -_NB: eigenwaarden voor EFA zijn anders dan eigenwaarden voor PCA. Meer informatie hierover is te vinden in Dinno (2014)._ -- Hier specificeert u het aantal factoren waar de rotatie op wordt toegepast. Er zijn verschillende methoden om te bepalen hoe dit wordt gedaan: - - Parallel Analyse: Factoren worden gekozen op basis van parallel analyse. Met deze methode worden factoren geselecteerd met een eigenwaarde die hoger is dan de parallel gemiddelde willekeurige eigenwaarde. Dit is de standaardoptie. - - Eigenwaardes: Factoren worden gekozen indien zij een bepaalde eigenwaarde hebben. Als standaardoptie worden factoren met een eigenwaarde van 0 of hoger gekozen. Dit wordt het Kaiser criterium genoemd. - - Handmatig: Het aantal factor kan handmatig gespecificeerd worden. De standaardoptie is 1. - -#### Rotatie -- De rotatiemethode die wordt toegepast op de factoren, kan hier gespecificeerd worden. - - Orthogonaal: Deze methode produceert ongecorreleerde factoren. Diverse mogelijkheden zijn beschikbaar: - - Geen: Geen rotatiemethode wordt geselecteerd. - - varimax: Orthogonale rotatiemethode varimax. Deze rotatie is gebaseerd op het maximaliseren van de variantie van de ladingen. - - quartimax: Orthogonale rotatiemethode quartimax. Voor deze methode worden het aantal factoren die nodig zijn om iedere variabele te verklaren geminimaliseerd. - - bentlerT: Orthogonale rotatiemethode bentlerT. - - equamax: Orthogonale rotatiemethode equamax. Dit is een combinatie van varimax en quartimax. - - varimin: Orthogonale rotatiemethode varimin. - - Oblique: Deze methode produceert factorendat correlaties tussen factoren toestaat. Dit is de standaardoptie. Diverse mogelijkheden zijn beschikbaar: - - promax: Oblique rotatiemethode promax. Dit is de standaardoptie. - - oblimin: Oblique rotatiemethode oblimin. - - simplimax: Oblique rotatiemethode simplimax. - - bentlerQ: Oblique rotatiemethode bentlerQ. - - biquartimin: Oblique rotatiemethode biquartimin. - - cluster: Oblique rotatiemethode cluster. - -#### Basis decompositie op -- Correlatie: Baseert de PCA op de correlatiematrix van de gegevens -- Covariantie: Baseert de PCA op de covariantiematrix van de gegevens -- Polychorisch/tetrachorisch: Baseert de PCA op de poly/tetrachorische (gemengde) correlatiematrix van de gegevens. Dit is soms onstabiel wanneer de steekproefomvang klein is en wanneer sommige variabelen niet alle antwoordcategorieën bevatten. - -### Uitvoeropties -- Markeer: Deze optie zet de waarde vanaf waar de paden schalen in breedte. Paden met absolute gewichten hoger dan deze waarde zullen steeds breder worden terwijl waardes eronder een vaste dunne breedte hebben. Alle paden krijgen een sterkere of zwakkere kleurintensiteit naarmate ze een sterker gewicht hebben. Als de waarde op 0 gezet wordt zullen alle paden een verschillende breedte krijgen. -- Voeg Tabellen Toe: - - Factorcorrelaties: Bij het selecteren van deze optie, wordt een tabel met de correlaties tussen factoren weergegeven. - - Aanvullende fit indices: Deze optie toont de wortel van de gemiddelde kwadraatsom fout (RMSEA) met 90% betrouwbaarheidsinterval, de Tucker Lewis Index (TLI), en de Bayesian Information Criterion (BIC) om de model fit te testen. -- Plots: - - Paddiagram: Bij het selecteren van deze optie wordt een visuele representatie van de richting en de sterkte van de relatie tussen de variabele en de factor weergegeven. - - Screeplot: Bij het selecteren van deze optie, wordt een screeplot getoond. De screeplot geeft informatie over hoeveel variantie in de data, aangegeven door de eigenwaarde, wordt verklaard door elke factor. Een screeplot kan gebruikt worden om te beslissen over de hoeveelheid van de factoren. - - Resultaten van parallelle analyse: Toont de resultaten van de parallelle analyse in de scree plot. De parallelle analyse wordt gebaseerd op PC of FA, zoals bepaald door de optie voor de parallelle analysetabel. -- Aannamecontroles: - - Kaiser-Meyer-Olkin Test (KMO): Bepaalt hoe goed variabelen geschikt zijn voor factoranalyse door het aandeel gemeenschappelijke variantie tussen variabelen te berekenen. - - Bartlett's Test (van sfericiteit): Bepaalt of de correlatiematrix van de gegevens de identiteitsmatrix is, d.w.z. of de variabelen aan elkaar gerelateerd zijn of niet. - - Mardia's test van multivariate normaliteit: Beoordeelt de mate van afwijking van de multivariate normaliteit van de opgenomen variabelen in termen van multivariate scheefheid en kurtose. De Mardia's test omvat altijd de lijstvolledige gevallen. -- Ontbrekende waarden: - - Sluit paarsgewijs uit: Indien 1 observatie van een variabele mist, worden de observaties van de andere variabelen nog wel gebruikt voor de analyse. In dit scenario is het niet nodig om voor elke variabele een observatie te hebben. Dit is de standaardoptie. - - Sluit lijstgewijs uit: Indien 1 observatie van een variabele mist, wordt de gehele casus (dus alle andere variabelen van dezelfde casus) uitgesloten voor analyse. In dit scenario zijn is voor elke variabele een observatie nodig. - -### Uitvoer ---- -#### Aannamecontroles -- Kaiser-Meyer-Olkin Test (KMO): Maat voor steekproeftoereikendheid (MSA) als het aandeel van de gemeenschappelijke variantie tussen variabelen wordt berekend voor alle variabelen; waarden dichter bij 1 zijn gewenst. -- Bartlett's Test (of sphericity): Een significant resultaat betekent dat de correlatiematrix afwijkt van de identiteitsmatrix. -- Mardia's Test van multivariate normaliteit: - - Tests: In de eerste kolom staan alle uitgevoerde testen. - - Waarde: De waarden van `b1p` (multivariate scheefheid) en `b2p` (multivariate kurtosis), zoals aangegeven in Mardia (1970). - - Statistiek: De twee chi-kwadraat teststatistieken van multivariate scheefheid (zowel standaard als gecorrigeerd voor kleine steekproeven) en de standaard normale teststatistiek van multivariate kurtosis. - - df: Vrijheidsgraden. - - p: P-waarde. - -#### Exploratieve Factoranalyse -Factorladingen: -- Variabelen: De eerste kolom toont alle variabelen die zijn meegenomen in de analyse. -- PC (1, 2, 3, etc.): Deze kolom toont de factorladingen op de variabele. -- Uniciteit: Het percentage van de variantie van elke variabele dat niet verklaard wordt door de factor. - -Factor Correlaties: -- De correlaties tussen de factoren. - -Chi-squared Toets: -De fit van het model wordt getoetst. Als de toets significant is, dan wordt het model verworpen. Onthoud dat een chi-kwadraat schatting onbetrouwbaar kan zijn voor kleine steekproeven, en bij hele grote steekproeven kan de chi-kwadraattoets het model te snel verwerpen. Aanvullende informatie over de fit van het model kan verkregen worden door de optie `Aanvullende pas indexen` onder `Uitvoeropties` te selecteren. Voor een verdere discussie over fit indices kan bijvoorbeeld Saris, Satorra, & van der Veld (2009) geraadpleegd worden. -- Model: Het verkregen model van de exploratieve factoranalyse. -- Value: De chi-squared toetsstatistiek. -- vg: Vrijheidsgraden. -- p: P-waarde. - -Aanvullende Fit Indices: -Deze fit indices geven informatie over de fit van het model. -- Model: Het verkregen model van de exploratieve factoranalyse. -- RMSEA: De wortel van de gemiddelde kwadraatsom fout van de schatting (RMSEA). Corrigeert voor spaarzaamheid. Wanneer een model hetzelfde presteert, maar het model 1 vrijheidsgraad dan model 2, wordt model 1 aangeraden. Browne and Cudeck (1993) benoemt een waarde kleiner dan 0.08 als acceptabele model fit, kleiner dan 0.05 een goede model fit, en adviseert om modellen met een waarde van 0.1 of hoger te verwerpen. Er is echter geen overeenstemming over deze grens. -- RMSEA 90% betrouwbaarheidsinterval: Het 90% betrouwbaarheidsinterval van de wortel van de gemiddelde kwadraatsom fout van de schatting. -- TLI: Tucker-Lewis Index. Evalueert de fit vergeleken met een striktere, genestelde baseline model. Hopwood and Donnallan (2010) suggereerde dat een waarde hoger dan .9 een goede fit aangeeft. Er is echter geen consensus over deze grens. -- BIC: Bayesian Information Criterion. Deze maat is nuttig voor het vergelijken van de prestatie van verschillende modellen op dezelfde data. Een lage waarde impliceert een betere fit. - -#### Paddiagram -- F(1,2,3,...): De factoren in het model zijn weergegeven als cirkels. -- Variabelen: De variabelen zijn weergegeven als rechthoeken. -- Pijlen: Gaan van de factoren naar de variabelen, toont de lading van de factor op de variabele. Rood betekent een negatieve lading, groen een positieve lading. Hoe wijder de pijlen, hoe hoger de lading. Deze markering kan aangepast worden bij `Markeren` in de `Uitvoeropties`. - -#### Screeplot -De screeplot geeft informatie over hoeveel variantie in de data, aangegeven door de eigenwaarde, wordt verklaard door elke factor. Een screeplot kan gebruikt worden om te beslissen over de hoeveelheid van de factoren. -- Factors: Op de x-as, alle mogelijke factoren. -- Eigenvalue: Op de y-as, de eigenwaarde die de verklaarde variantie van elke factor aangeeft. -- Data: De gestippelde lijn staat voor de data. -- Gesimuleerd: De driehoekslijn staat voor de gesimuleerde data. Deze lijn is indicatief voor de parallel analyse. Als de punten van de gestippelde lijn (werkelijke data) boven deze lijn zijn, worden deze factoren meegenomen in het model door parallel analyse. -- Kaiser criterium: De horizontale lijn op de eigenwaarde van 1 staat voor het Kaiser criterium. Volgens dit criterium dienen enkel factoren met waarden boven deze lijn (eigenwaarde van 1) mee te worden genomen in het model. - -### Referenties ---- -- Bandalos, D. L., & Finney, S. J. (2018). Factor analysis: Exploratory and confirmatory. In G. R. Hancock, L. M. Stapleton, & R. O. Mueller, *The reviewer’s guide to quantitative methods in the social sciences* (pp. 98-122). Routledge. https://doi.org/10.4324/9781315755649 -- Dinno, A. (2014) Gently clarifying the application of Horn’s parallel analysis to principal component analysis versus factor analysis. *Working paper*. https://alexisdinno.com/Software/files/PA_for_PCA_vs_FA.pdf -- Dziuban, C. D., & Shirkey, E. C. (1974). When is a correlation matrix appropriate for factor analysis? Some decision rules. *Psychological Bulletin, 81*(6), 358–361. https://doi.org/10.1037/h0036316 -- Golino, H., Shi, D., Christensen, A. P., Garrido, L. E., Nieto, M. D., Sadana, R., ... & Martinez-Molina, A. (2020). Investigating the performance of exploratory graph analysis and traditional techniques to identify the number of latent factors: A simulation and tutorial. *Psychological Methods*, *25*(3), 292. https://doi.org/10.1037/met0000255 -- Hayton, J. C., Allen, D. G., & Scarpello, V. (2004). Factor retention - decisions in exploratory factor analysis: A tutorial on parallel analysis. *Organizational Research Methods, 7*(2), 191-205. https://doi.org/10.1177/1094428104263675 -- Hopwood, C. J., & Donnellan, M. B. (2010). How should the internal structure - of personality inventories be evaluated? *Personality and Social Psychology Review, 14*(3), 332–346. https://doi.org/10.1177/1088868310361240 -- Horn, J. L. (1965). A rationale and test for the number of factors in factor analysis. *Psychometrika, 30*(2), 179–185. https://doi.org/10.1007%2Fbf02289447 -- Hu, L.-t., & Bentler, P. M. (1998). Fit indices in covariance structure modeling: Sensitivity to underparameterized model misspecification. *Psychological Methods, 3*(4), 424–453. https://doi.org/10.1037/1082-989X.3.4.424 -- Mardia, K. V. (1970). Measures of multivariate skewness and kurtosis with applications. *Biometrika*, *57*(3), 519-530. https://doi.org/10.2307/2334770 -- Osborne, J. W., Costello, A. B., & Kellow, J. T. (2008). Best practices in - exploratory factor analysis. In J. Osborne (Ed.), *Best practices in quantitative methods* (pp. 86-99). SAGE Publications, Inc. https://doi.org/10.4135/9781412995627.d8 -- Saris, W. E., Satorra, A., & Van der Veld, W. M. (2009). Testing structural equation models or detection of misspecifications?. *Structural Equation Modeling: A Multidisciplinary Journal, 16*(4), 561-582. https://doi.org/10.1080/10705510903203433 -- Timmerman, M. E., & Lorenzo-Seva, U. (2011). Dimensionality assessment of ordered polytomous items with parallel analysis. *Psychological Methods*, *16*(2), 209. https://doi.org/10.1037/a0023353 -- Yong, A. G., & Pearce, S. (2013). A beginner’s guide to factor analysis: Focusing on exploratory factor analysis. *Tutorials in Quantitative Methods for Psychology*, *9*(2), 79-94. https://doi.org/10.20982/tqmp.09.2.p079 - -### R Packages ---- -- ggplot2 -- psych -- qgraph -- stats - -### Voorbeeld ---- -- Voor een voorbeeld ga naar `File`-->`Data library`-->`Factor`-->`G Factor`. -- Voor meer details over Exploratieve Factoranalyse in JASP, zie video. diff --git a/inst/help/PrincipalComponentAnalysis.md b/inst/help/PrincipalComponentAnalysis.md deleted file mode 100755 index 665fdd26..00000000 --- a/inst/help/PrincipalComponentAnalysis.md +++ /dev/null @@ -1,126 +0,0 @@ -Principal Component Analysis -=== - -Principcal Component Analysis is used to represent the data in smaller components than the dataset originally consists of. The components are chosen such that they explain most of the variance in the original dataset. - -### Assumptions -- The variables included in the analysis are correlated (Shlens, 2014). -- The variables included in the analysis are linearly related (Shlens, 2014). - -### Input ---- -#### Assignment Box -- Included Variables: In this box, the variables to perform the principal component analysis on are selected. - -#### Number of Components -- Here, the number of components that the rotation is applied to is specified. Several methods to determine this number can be chosen from: - - Parallel Analysis: Factors are selected on the basis of parallel analysis. With this method, factors are selected when their eigenvalue is greater than the parallel average random eigenvalue. This method is selected by default. Can be based on principal component eigenvalues (PC) or factor eigenvalues (FA). A seed (1234) is chosen by default so that the results from the parallel analysis are equal across the PCA. - - Eigenvalues: Components are selected when they have a certain eigenvalue. By default components are selected that have an eigenvalue above 1. - - Manual: The number of components can be specified manually. By default this is set to 1. - -#### Rotation -- Here, the rotation method to apply to the components can be specified. Rotation ensures a simpler understanding of the data structure. - - Orthogonal: This method produces components that are uncorrelated. For this method, there are several possibilities that can be selected: - - None: No rotation method is selected. - - varimax: Orthogonal rotation method varimax. Rotation based on the maximizing the variance of the loadings. - - quartimax: Orthogonal rotation method quartimax. In this rotation method, the number of components that is necessary to explain each variable is minimized. - - bentlerT: Orthogonal rotation method bentlerT. - - equamax: Orthogonal rotation method equamax. This is a combination of varimax and quartimax. - - varimin: Orthogonal rotation method varimin. - - Oblique: This method produces components that allow for correlation between the components. This method is selected by default. Several possibilities are available: - - promax: Oblique rotation method promax. This method is selected by default. - - oblimin: Oblique rotation method oblimin. - - simplimax: Oblique rotation method simplimax. - - bentlerQ: Oblique rotation method bentlerQ. - - biquartimin: Oblique rotation method biquartimin. - - cluster: Oblique rotation method cluster. - -#### Base decomposition on -- Correlation: Bases the PCA on the correlation matrix of the data -- Covariance: Bases the PCA on the covariance matrix of the data -- Polychoric/tetrachoric: Bases the PCA on the poly/tetrachoric (mixed) correlation matrix of the data. - This is sometimes unstable when sample size is small and when some variables do not contain all response categories - - -### Output Options -- Highlight: This option cuts the scaling of paths in width and color saturation. Paths with absolute weights over this value will have the strongest color intensity and become wider the stronger they are, and paths with absolute weights under this value will have the smallest width and become vaguer the weaker the weight. If set to 0, no cutoff is used and all paths vary in width and color. -- Include Tables: - - Component correlations: When selecting this option, a table with the correlations between the components will be displayed. - - Residual matrix: Displays a table containing the residual variances and correlations - - Parallel analysis: If this option is selected, a table will be generated exhibiting a detailed output of the parallel analysis. Can be based on principal component eigenvalues (PC) or factor eigenvalues (FA). The seed is taken from the parallel analysis for determining the number of factors above. - - Path diagram: By selecting this option, a visual representation of the direction and strength of the relation between the variable and component will be displayed. - - Scree plot: When selecting this option, a scree plot will be displayed. The scree plot provides information on how much variance in the data, indicated by the eigenvalue, is explained by each component. A scree plot can be used to decide how many components should be selected. -- Assumption Checks: - - Kaiser-Meyer-Olkin Test (KMO): Determines how well variables are suited for factor analysis by computing the proportion of common variance between variables - - Bartlett's Test (of sphericity): Determines if the data correlation matrix is the identity matrix, meaning, if the variables are related or not - - Mardia's Test of Multivariate Normality: Assesses the degree of the departure from multivariate normality of the included variables in terms of multivariate skewness and kurtosis. The Mardia's test will always include the listwise complete cases. -- Missing values: - - Exclude cases pairwise: If one observation from a variable is missing, all the other variable observations from the same case will still be used for the analysis. In this scenario, it is not necessary to have a observation for all the variables to include the case in the analysis. This option is selected by default. - - Exclude cases listwise: If one observation from a variable is missing, the whole case, so all the other connected variable observations, will be dismissed from the analysis. In this scenario, observations for every variable are needed to include the case in the analysis. - -### Output ---- -#### Assumption Checks -- Kaiser-Meyer-Olkin Test (KMO): Measure of sampling adequacy (MSA) as the proportion of common variance among variables is computed for all variables; values closer to 1 are desired. -- Bartlett's Test (of sphericity): A significant result means the correlation matrix is unlike the identity matrix. -- Mardia's Test of Multivariate Normality: - - Tests: The first column shows all the tests performed. - - Value: The values of `b1p` (multivariate skewness) and `b2p` (multivariate kurtosis), as denoted in Mardia (1970). - - Statistic: The two chi-squared test statistics of multivariate skewness (both standard and corrected for small samples) and the standard normal test statistic of multivariate kurtosis. - - df: Degrees of freedom. - - p: P-value. - -#### Chi-squared Test: -The fit of the model is tested. When the test is significant, the model is rejected. Bear in mind that a chi-squared approximation may be unreliable for small sample sizes, and the chi-squared test may too readily reject the model with very large sample sizes. See, for example, Saris, Satorra, & van der Veld (2009) for more discussions on overall fit metrics. - - Model: The model obtained from the principal component analysis. - - Value: The chi-squared test statistic. - - df: Degrees of freedom. - - p: P-value. - -#### Component Loadings: -- Variables: The first column shows all the variables included in the analysis. -- PC (1, 2, 3, ...): This column shows the variable loadings on the components. -- Uniqueness: The percentage of the variance of each variable that is not explained by the component. - -#### Component Characteristics: -- Unrotated solution: - - Eigenvalues: The eigenvalue for each component - - Proportion var.: The proportion of variance in the dataset explained by each unrotated component - - Cumulative: The proportion of variance in the dataset explained by the unrotated components up to and including the current component. -- Rotated solution: - - SumSq. Loadings: Sum of squared loadings, variance explained by each rotated component - - Proportion var.: The proportion of variance in the dataset explained by each rotated component - - Cumulative: The proportion of variance in the dataset explained by the rotated components up to and including the current component. - -- Correlations: - The correlation between the principal components. - -#### Path Diagram -- PC: The principal components are represented by the circles. -- Variables: The variables loadings on the components are represented by the boxes. -- Arrows: Going from the variables to the principal components, representing the loading from the variable on the component. Red indicates a negative loading, green a positive loading. The wider the arrows, the higher the loading. This highlight can be adjusted at `highlight` in the `Output Options`. - -#### Screeplot -The scree plot provides information on how much variance in the data, indicated by the eigenvalue, is explained by each component. The scree plot can be used to decide how many components should be selected in the model. -- Components: On the x-axis, the components. -- Eigenvalue: On the y-axis, the eigenvalue that indicates the variance explained by each component. -- Data: The dotted line represents the data. -- Simulated: The triangle line represents the simulated data. This line is indicative for the parallel analysis. When the points from the dotted line (real data) are above this line, these components will be included in the model by parallel analysis. -- Kaiser criterion: The horizontal line at the eigenvalue of 1 represents the Kaiser criterion. According to this criterion, only components with values above this line (at an eigenvalue of 1) should be included in the model. - -### References ---- -- Dziuban, C. D., & Shirkey, E. C. (1974). When is a correlation matrix appropriate for factor analysis? Some decision rules. *Psychological Bulletin, 81*(6), 358–361. https://doi.org/10.1037/h0036316 -- Hayton, J. C., Allen, D. G., & Scarpello, V. (2004). Factor retention decisions in exploratory factor analysis: A tutorial on parallel analysis. *Organizational Research Methods, 7*(2), 191-205. https://doi.org/10.1177/1094428104263675 -- Horn, J. L. (1965). A rationale and test for the number of factors in factor analysis. *Psychometrika, 30*(2), 179–185. https://doi.org/10.1007%2Fbf02289447 -- James, G., Witten, D., Hastie, T., & Tibshirani, R. (2013). *An introduction to statistical learning* (2nd ed.). Springer. -- Mardia, K. V. (1970). Measures of multivariate skewness and kurtosis with applications. *Biometrika*, *57*(3), 519-530. https://doi.org/10.2307/2334770 -- Osborne, J. W., Costello, A. B., & Kellow, J. T. (2008). Best practices in exploratory factor analysis. In J. Osborne (Ed.), *Best practices in quantitative methods* (pp. 86-99). SAGE Publications, Inc. https://doi.org/10.4135/9781412995627.d8 -- Saris, W. E., Satorra, A., & Van der Veld, W. M. (2009). Testing structural equation models or detection of misspecifications?. *Structural Equation Modeling: A Multidisciplinary Journal, 16*(4), 561-582. https://doi.org/10.1080/10705510903203433 -- Shlens, J. (2014). A tutorial on principal component analysis. *arXiv preprint arXiv:1404.1100*. https://doi.org/10.48550/arXiv.1404.1100 - -### R Packages ---- -- psych -- qgraph - diff --git a/inst/help/PrincipalComponentAnalysis_nl.md b/inst/help/PrincipalComponentAnalysis_nl.md deleted file mode 100644 index 5db2555a..00000000 --- a/inst/help/PrincipalComponentAnalysis_nl.md +++ /dev/null @@ -1,119 +0,0 @@ -Principale Componentenanalyse -=== - -Principale componentenanalyse wordt gebruikt om data weer te geven in minder componenten dan waaruit de dataset oorspronkelijk bestaat. De componenten worden zo gekozen dat ze zoveel mogelijk variantie in de originele dataset verklaren. - -### Assumpties -- De variabelen die worden meegenomen in de analyse zijn gecorreleerd (Shlens, 2014). -- De variabelen die worden meegenomen in de analyse zijn lineair gerelateerd (Shlens, 2014). - -### Invoer ---- -#### Invoerveld -- Meegenomen variabelen: In dit veld kunnen de variabelen worden ingevuld om een principale componentenanalyse op uit te voeren. - -#### Aantal Componenten -- Hier wordt het aantal componenten gespecificeerd waarop de rotatie wordt toegepast. Er kunnen verschillende modellen worden gekozen om dit kunnen te bepalen: - - Parallelle Analyse: Componenten worden geselecteerd op basis van parallelle analyse. Met deze methode worden componenten geselecteerd wanneer hun eigenwaarde groter is dan de parallelle gemiddelde willekeurige eigenwaarde. Dit is de standaardoptie. - - Eigenwaardes: Componenten worden geselecteerd wanneer ze een bepaalde eigenwaarde hebben. De standaardoptie is een eigenwaarde boven 1. - - Handmatig: Het aantal componenten kan handmatig worden ingesteld. De standaardoptie is 1. - -#### Rotatie -- Hier wordt de rotatiemethode gespecificeerd. Rotatie zorgt ervoor dat de structuur van de data makkelijker te interpreteren is. - - Orthogonaal: deze methode produceert componenten die niet gecorreleerd zijn. Voor deze methode kunnen verschillende opties worden geselecteerd: - - Geen: Er wordt geen rotatiemethode geselecteerd. - - Varimax: De varimax orthagonale rotatiemethode. Rotatie gebaseerd op het maximaliseren van de variantie van factorladingen. - - Quartimax: De quartimax orthagonale rotatiemethode. In deze methode wordt het aantal componenten die nodig zijn om elke variabele te verklaren geminimaliseerd. - - BetlerT: De betlerT orthagonale rotatiemethode. - - Equamax: De exuamax orthagonale rotatiemethode. Dit is een combinatie van varimax en quartimax. - - Varimin: De varimin orthagonale rotatiemethode - - Oblique: Deze methode produceert componenten waartussen wel een correlatie is toegestaan. Dit is de standaardoptie. Verschillende opties zijn beschikbaar: - - Promax: De promax oblique rotatiemethode. Dit is de standaardoptie. - - Oblimin: De oblimin oblique rotatiemethode. - - Simplimax. De simplimax oblique rotatiemethode. - - BentlerQ: De betlerQ oblique rotatiemethode. - - Biquartimin: De biquartimin oblique rotatiemethode. - - Cluster: De cluster oblique rotatiemethode. - -#### Basis decompositie op -- Correlatie: Baseert de PCA op de correlatiematrix van de gegevens -- Covariantie: Baseert de PCA op de covariantiematrix van de gegevens -- Polychorisch/tetrachorisch: Baseert de PCA op de poly/tetrachorische (gemengde) correlatiematrix van de gegevens. - Dit is soms onstabiel wanneer de steekproefomvang klein is en wanneer sommige variabelen niet alle antwoordcategorieën bevatten - - -### Uitvoeropties -- Markeer: Deze optie zet de waarde vanaf waar de paden schalen in breedte. Paden met absolute gewichten hoger dan deze waarde zullen steeds breder worden terwijl waardes eronder een vaste dunne breedte hebben. Alle paden krijgen een sterkere of zwakkere kleurintensiteit naarmate ze een sterker gewicht hebben. Als de waarde op 0 gezet wordt zullen alle paden een verschillende breedte krijgen. -- Neem tabellen mee: - - Component correlaties: Als u deze optie selecteert wordt er een tabel met de correlates tussen componenten weergegeven. - - Pad diagram: Als u deze optie selecteert wordt er een visuele representatie van de richting en sterkte van de relatie tussen variabelen en componenten weergegeven. - - Screeplot: Als u deze optie selecteert wordt er een screeplot weergegeven. Deze grafiek geeft informatie over de variantie in de data die wordt verklaard door elke component, door middel van de eigenwaarde. Een scree-grafiek kunt u gebruiken om het aantal componenten te selecteren. -- Aannamecontroles: - - Kaiser-Meyer-Olkin Test (KMO): Bepaalt hoe goed variabelen geschikt zijn voor factoranalyse door het aandeel gemeenschappelijke variantie tussen variabelen te berekenen. - - Bartlett's Test (van sfericiteit): Bepaalt of de correlatiematrix van de gegevens de identiteitsmatrix is, d.w.z. of de variabelen aan elkaar gerelateerd zijn of niet. - - Mardia's test van multivariate normaliteit: Beoordeelt de mate van afwijking van de multivariate normaliteit van de opgenomen variabelen in termen van multivariate scheefheid en kurtose. De Mardia's test omvat altijd de volledige gevallen in de lijst. -- Ontbrekende waarden: - - Sluit waarnemingen paarwijs uit: Als een observatie van een variabele mist worden de andere observaties op andere variabelen van hetzelfde geval nog steeds gebruikt voor de analyse. In dit scenario is het niet nodig om een observatie voor alle variabelen te hebben om een geval mee te nemen in de analyse. Dit is de standaardoptie. - - Sluit waarnemingen op lijstwijze uit: Als een observatie van een variabele mist wordt het hele geval, dus alle verbonden observaties op andere variabelen, uitgesloten van de analyse. In dit scenario moet een geval observaties op elke variabele hebben om meegenomen te worden in de analyse. - -### Uitvoer ---- -#### Veronderstellingen Controles -- Kaiser-Meyer-Olkin Test (KMO): Maat voor steekproeftoereikendheid (MSA) als het aandeel van de gemeenschappelijke variantie tussen variabelen wordt berekend voor alle variabelen; waarden dichter bij 1 zijn gewenst. -- Bartlett's Test (of sphericity): Een significant resultaat betekent dat de correlatiematrix afwijkt van de identiteitsmatrix. -- Mardia's Test van multivariate normaliteit: - - Tests: In de eerste kolom staan alle uitgevoerde testen. - - Waarde: De waarden van `b1p` (multivariate scheefheid) en `b2p` (multivariate kurtosis), zoals aangegeven in Mardia (1970). - - Statistiek: De twee chi-kwadraat teststatistieken van multivariate scheefheid (zowel standaard als gecorrigeerd voor kleine steekproeven) en de standaard normale teststatistiek van multivariate kurtosis. - - df: Vrijheidsgraden. - - p: P-waarde. - -### Principale Componentenanalyse -Chi-kwadraat toets: -De passing van het model wordt getoetst. Wanneer de toets significant is wordt het model verworpen. Houd in gedachten dat een chi-kwadraat benadering onbetrouwbaar kan zijn voor kleine steekproeven, en bij hele grote steekproeven kan de chi-kwadraattoets het model te snel verwerpen. Voor een verdere discussie over pas indices kan bijvoorbeeld Saris, Satorra, & van der Veld (2009) geraadpleegd worden. -- Model: Het model dat uit de principale componentenanalyse is gekomen. -- Waarde: De chi-kwadraat toetsstatistiek. -- vg: Vrijheidsgraden. -- P: De p-waarde. - -#### Componentladingen: -- Variabelen: De eerste kolom geeft alle variabelen die zijn meegenomen. -- PC (1, 2, 3, ...): Deze kolom geeft de ladingen van de variabelen op de componenten. -- Uniciteit: Het percentage van de variantie van iedere variabele die wordt verklaard door de component. - -#### Component karakteristieken: -- Eigenwaardes: De eigenwaarde van elk geselecteerd component. -- Proportie var.: De proportie van variatie in de dataset die door iedere component wordt verklaard. -- Cumulatief: De proportie van variantie in de dataset die wordt verklaard door de componenten tot en met deze component. - -#### Component correlaties: -- De correlatie tussen de principale componenten. - -#### Pad Diagram -- PC: De principale componenten worden weergegeven in de cirkels. -- Variabelen: De variabelen worden weergegeven als rechthoeken. -- Pijlen: Gaan van de variabelen naar de componenten en representeren de lading van een variabele op een component. Rood is een negatieve lading, groen een positieve. Hoe breder de strepen, hoe sterker de lading. Deze markering kan worden aangepast bij `markeren` in de `uitvoer opties`. - -#### Screeplot -De screeplot geeft informatie over hoeveel variantie in de data wordt verklaard door elke component, door middel van de eigenwaarde. De screeplot kan worden gebruikt om over het aantal componenten in het model te beslissen. -- Componenten: De componenten staan op de x-as. -- Eigenwaarde: De eigenwaarden staan op de y-as, en geven aan hoeveel variantie door elke component wordt verklaard. -- Data: De stippellijn representeert de data. -- Gesimuleerd: De driehoekslijn representeert de gesimuleerde data. Deze lijn is indicatief voor de parallelle analyse. Wanneer de punten van de stippellijn (echte data) boven deze lijn liggen worden deze componenten meegenomen in het model door parallelle analyse. -- Kaiser criterium: De horizontale lijn bij een eigenwaarde van 1 representeert het Kaiser criterium. Volgens dit criterium moeten componenten met een eigenwaarde boven de 1 worden meegenomen. - -### Referenties ---- -- Hayton, J. C., Allen, D. G., & Scarpello, V. (2004). Factor retention decisions in exploratory factor analysis: A tutorial on parallel analysis. *Organizational Research Methods, 7*(2), 191-205. https://doi.org/10.1177/1094428104263675 -- Horn, J. L. (1965). A rationale and test for the number of factors in factor analysis. *Psychometrika, 30*(2), 179–185. https://doi.org/10.1007%2Fbf02289447 -- James, G., Witten, D., Hastie, T., & Tibshirani, R. (2013). *An introduction to statistical learning* (2nd ed.). Springer. -- Mardia, K. V. (1970). Measures of multivariate skewness and kurtosis with applications. *Biometrika*, *57*(3), 519-530. https://doi.org/10.2307/2334770 -- Osborne, J. W., Costello, A. B., & Kellow, J. T. (2008). Best practices in exploratory factor analysis. In J. Osborne (Ed.), *Best practices in quantitative methods* (pp. 86-99). SAGE Publications, Inc. https://doi.org/10.4135/9781412995627.d8 -- Saris, W. E., Satorra, A., & Van der Veld, W. M. (2009). Testing structural equation models or detection of misspecifications?. *Structural Equation Modeling: A Multidisciplinary Journal, 16*(4), 561-582. https://doi.org/10.1080/10705510903203433 -- Shlens, J. (2014). A tutorial on principal component analysis. *arXiv preprint arXiv:1404.1100*. https://doi.org/10.48550/arXiv.1404.1100 - -### R Packages ---- -- psych -- qgraph - diff --git a/inst/qml/ConfirmatoryFactorAnalysis.qml b/inst/qml/ConfirmatoryFactorAnalysis.qml index e10695e6..e0a08ba5 100644 --- a/inst/qml/ConfirmatoryFactorAnalysis.qml +++ b/inst/qml/ConfirmatoryFactorAnalysis.qml @@ -169,13 +169,13 @@ Form title: qsTr("Additional Output") Group { - CheckBox { name: "fitMeasures"; label: qsTr("Additional fit measures") } - CheckBox { name: "kaiserMeyerOlkinTest"; label: qsTr("Kaiser-Meyer-Olkin (KMO) test") } - CheckBox { name: "bartlettTest"; label: qsTr("Bartlett's test of sphericity") } - CheckBox { name: "rSquared"; label: qsTr("R-Squared") } - CheckBox { name: "ave"; label: qsTr("Average variance extracted (AVE)") } - CheckBox { name: "htmt"; label: qsTr("Heterotrait-monotrait ratio (HTMT)") } - CheckBox { name: "reliability"; label: qsTr("Reliability") } + CheckBox { label: qsTr("Additional fit measures") ; name: "fitMeasures" } + CheckBox { label: qsTr("Kaiser-Meyer-Olkin (KMO) test"); name: "kaiserMeyerOlkinTest"} + CheckBox { label: qsTr("Bartlett's test of sphericity"); name: "bartlettTest"} + CheckBox { label: qsTr("R-Squared") ; name: "rSquared" } + CheckBox { name: "ave"; label: qsTr("Average variance extracted (AVE)") } + CheckBox { name: "htmt"; label: qsTr("Heterotrait-monotrait ratio (HTMT)") } + CheckBox { name: "reliability"; label: qsTr("Reliability") } } Group { @@ -191,23 +191,7 @@ Form defaultValue: 3.84 } } - CheckBox { name: "lavaanSyntax"; label: qsTr("Show lavaan syntax") } - - CheckBox - { - id: addScores - name: "addScores" - label: qsTr("Add factor scores to data") - enabled: variables.count > 1 & dataType.value == "raw" - - TextField { - name: "addedScoresPrefix" - label: qsTr("Prefix") - defaultValue: "FS" - fieldWidth: 80 - enabled: addScores.checked - } - } + CheckBox { label: qsTr("Show lavaan syntax") ; name: "lavaanSyntax" } } } @@ -313,31 +297,13 @@ Form label: qsTr("Missing data handling") values: [ - - { label: qsTr("Listwise deletion"), value: "listwise"}, - { label: qsTr("FIML") , value: "fiml"}, + { label: qsTr("Listwise deletion") , value: "listwise" }, + { label: qsTr("FIML") , value: "fiml" }, { label: qsTr("Pairwise") , value: "pairwise" }, { label: qsTr("Two-stage") , value: "twoStage" }, { label: qsTr("Robust two-stage") , value: "twoStageRobust" }, ] } - // DropDown - // { - // name: "naAction" - // label: qsTr("Missing data handling") - // values: - // [ - // { factors.columnsTypes.includes("ordinal") ? - // {label: qsTr("Listwise deletion"), value: "listwise"} : {label: qsTr("FIML"), value: "fiml"} - // }, - // { factors.columnsTypes.includes("ordinal") ? - // {label: qsTr("FIML") , value: "fiml"} : {label: qsTr("Listwise deletion"), value: "listwise"} - // }, - // { label: qsTr("Pairwise") , value: "pairwise" }, - // { label: qsTr("Two-stage") , value: "twoStage" }, - // { label: qsTr("Robust two-stage") , value: "twoStageRobust" }, - // ] - // } } RadioButtonGroup diff --git a/inst/qml/ExploratoryFactorAnalysis.qml b/inst/qml/ExploratoryFactorAnalysis.qml index f8907599..3d0004a9 100644 --- a/inst/qml/ExploratoryFactorAnalysis.qml +++ b/inst/qml/ExploratoryFactorAnalysis.qml @@ -25,47 +25,19 @@ import "./common" as Common Form { - VariablesForm - { - // preferredHeight: jaspTheme.smallDefaultVariablesFormHeight - AvailableVariablesList { name: "allVariablesList" } - AssignedVariablesList - { - id: variables - name: "variables" - title: qsTr("Variables") - allowedColumns: ["scale"] - } - Group - { - // columns: 4 - title: qsTr("Data") - RadioButtonGroup - { - name: "dataType" - id: dataType - columns: 2 - RadioButton { value: "raw"; label: qsTr("Raw"); checked: true } - RadioButton - { - value: "varianceCovariance"; label: qsTr("Variance-covariance matrix") - IntegerField { name: "sampleSize"; label: qsTr("Sample size"); defaultValue: 200 } - } - } - } - } + Common.PcaEfaVariables{} - Common.NumberFactors{ + Common.PcaEfaNumberFactors{ pca: false variablesCount: variables.count } - Common.AnalysisOptions{ + Common.PcaEfaAnalysisOptions{ pca: false dataRaw: dataType.value == "raw" } - Common.OutputOptions{ + Common.PcaEfaOutputOptions{ pca: false dataRaw: dataType.value == "raw" variablesCount: variables.count diff --git a/inst/qml/PrincipalComponentAnalysis.qml b/inst/qml/PrincipalComponentAnalysis.qml index 2ff481e7..8ec765da 100644 --- a/inst/qml/PrincipalComponentAnalysis.qml +++ b/inst/qml/PrincipalComponentAnalysis.qml @@ -24,48 +24,20 @@ import "./common" as Common Form { - VariablesForm - { - // preferredHeight: jaspTheme.smallDefaultVariablesFormHeight - AvailableVariablesList { name: "allVariablesList" } - AssignedVariablesList - { - id: variables - name: "variables" - title: qsTr("Variables") - allowedColumns: ["scale"] - } - Group - { - // columns: 4 - title: qsTr("Data") - RadioButtonGroup - { - name: "dataType" - id: dataType - columns: 2 - RadioButton { value: "raw"; label: qsTr("Raw"); checked: true } - RadioButton - { - value: "varianceCovariance"; label: qsTr("Variance-covariance matrix") - IntegerField { name: "sampleSize"; label: qsTr("Sample size"); defaultValue: 200 } - } - } - } - } + Common.PcaEfaVariables{} - Common.NumberFactors{ + Common.PcaEfaNumberFactors{ pca: true variablesCount: variables.count } - Common.AnalysisOptions{ + Common.PcaEfaAnalysisOptions{ pca: true dataRaw: dataType.value == "raw" } - Common.OutputOptions{ + Common.PcaEfaOutputOptions{ pca: true dataRaw: dataType.value == "raw" variablesCount: variables.count diff --git a/inst/qml/common/AnalysisOptions.qml b/inst/qml/common/AnalysisOptions.qml deleted file mode 100755 index 9ea6257b..00000000 --- a/inst/qml/common/AnalysisOptions.qml +++ /dev/null @@ -1,112 +0,0 @@ -// -// Copyright (C) 2013-2018 University of Amsterdam -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU Affero General Public License as -// published by the Free Software Foundation, either version 3 of the -// License, or (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Affero General Public License for more details. -// -// You should have received a copy of the GNU Affero General Public -// License along with this program. If not, see -// . -// - -import QtQuick -import QtQuick.Layouts -import JASP -import JASP.Controls - -Section -{ - property bool pca: true - property bool dataRaw: true - - id: analysisoptions - title: qsTr("Analysis Options") - expanded: true - - - RadioButtonGroup - { - name: "rotationMethod" - title: qsTr("Rotation") - RadioButton - { - value : "orthogonal" - label : qsTr("Orthogonal") - DropDown - { - name: "orthogonalSelector" - values: [ - { label: qsTr("none") , value: "none" }, - { label: "varimax" , value: "varimax" }, - { label: "quartimax" , value: "quartimax" }, - { label: "bentlerT" , value: "bentlerT" }, - { label: "equamax" , value: "equamax" }, - { label: "geominT" , value: "geominT" } - ] - } - } - RadioButton - { - value : "oblique" - label : qsTr("Oblique") - checked : true - DropDown { name: "obliqueSelector"; - values: [ "promax", "oblimin", "simplimax", "bentlerQ", "biquartimin", "cluster", "geominQ" ] } - } - } - - RadioButtonGroup - { - name: "analysisBasedOn" - title: qsTr("Base Decomposition on") - RadioButton - { - value: "correlationMatrix" - label: qsTr("Correlation matrix") - checked: true - } - RadioButton - { - value: "covarianceMatrix" - label: qsTr("Covariance matrix") - } - RadioButton - { - enabled: dataRaw - value: "polyTetrachoricCorrelationMatrix" - label: qsTr("Polychoric/tetrachoric correlation matrix") - } - } - - - Group - { - visible: !pca - title: qsTr("Factoring Method") - DropDown - { - name: "factoringMethod" - indexDefaultValue: 0 - values: - [ - { label: qsTr("Minimum residual"), value: "minimumResidual" }, - { label: qsTr("Maximum likelihood"), value: "maximumLikelihood" }, - { label: qsTr("Principal axis factoring"), value: "principalAxis" }, - { label: qsTr("Ordinary least squares"), value: "ordinaryLeastSquares" }, - { label: qsTr("Weighted least squares"), value: "weightedLeastSquares" }, - { label: qsTr("Generalized least squares"), value: "generalizedLeastSquares"}, - { label: qsTr("Minimum chi-square"), value: "minimumChiSquare" }, - { label: qsTr("Minimum rank"), value: "minimumRank" } - ] - } - } - - -} \ No newline at end of file diff --git a/inst/qml/common/OutputOptions.qml b/inst/qml/common/OutputOptions.qml deleted file mode 100755 index fbae318d..00000000 --- a/inst/qml/common/OutputOptions.qml +++ /dev/null @@ -1,135 +0,0 @@ -// -// Copyright (C) 2013-2018 University of Amsterdam -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU Affero General Public License as -// published by the Free Software Foundation, either version 3 of the -// License, or (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Affero General Public License for more details. -// -// You should have received a copy of the GNU Affero General Public -// License along with this program. If not, see -// . -// - -import QtQuick -import QtQuick.Layouts -import JASP -import JASP.Controls - -Section -{ - property bool pca: true - property bool dataRaw: true - property int variablesCount: 1 - - property string correlationsName: pca ? "componentCorrelations" : "factorCorrelations" - property string correlationsLabel: pca ? qsTr("Component correlations") : qsTr("Factor correlations") - - - title: qsTr("Output Options") - - Slider - { - name: "loadingsDisplayLimit" - label: qsTr("Display loadings above") - value: 0.4 - } - - RadioButtonGroup - { - name: "loadingsOrder" - title: qsTr("Order Loadings By") - RadioButton { name: "sortBySize"; label: qsTr("Size"); checked: true } - RadioButton { name: "sortByVariables"; label: qsTr("Variables") } - } - - Group - { - title: qsTr("Tables") - CheckBox { visible: !pca; name: "factorStructure"; label: qsTr("Structure matrix") } - CheckBox { - name: pca ? "componentCorrelations" : "factorCorrelations"; - label: pca ? qsTr("Component correlations") : qsTr("Factor correlations") - } - CheckBox { visible: !pca; name: "fitIndices"; label: qsTr("Additional fit indices") } - CheckBox { name: "residualMatrix"; label: qsTr("Residual matrix") } - CheckBox { - name: "parallelAnalysisTable"; - label: qsTr("Parallel analysis") - RadioButtonGroup - { - name: "parallelAnalysisTableMethod" - title: "" - - RadioButton - { - value: "principalComponentBased" - label: qsTr("Based on PC") - checked: true - } - RadioButton - { - value: "factorBased" - label: qsTr("Based on FA") - } - } - } - } - Group - { - title: qsTr("Plots") - CheckBox { - name: "pathDiagram" - label: qsTr("Path diagram") - } - CheckBox { - name: "screePlot"; - label: qsTr("Scree plot") - - CheckBox { - name: "screePlotParallelAnalysisResults" - label: qsTr("Parallel analysis results") - checked: true - } - } - } - - Group - { - title: qsTr("Assumption checks") - CheckBox { name: "kaiserMeyerOlkinTest"; label: qsTr("KMO test") } - CheckBox { name: "bartlettTest"; label: qsTr("Bartlett's test") } - CheckBox { name: "mardiaTest"; label: qsTr("Mardia's test") ; enabled: dataType.value == "raw" } - CheckBox { name: "antiImageCorrelationMatrix"; label: qsTr("Anti-image correlation matrix") } - } - - RadioButtonGroup - { - name: "naAction" - title: qsTr("Missing Values") - RadioButton { value: "pairwise"; label: qsTr("Exclude cases pairwise"); checked: true } - RadioButton { value: "listwise"; label: qsTr("Exclude cases listwise") } - } - - CheckBox - { - id: addScores - name: "addScores" - label: qsTr("Add PC scores to data") - enabled: variablesCount > 1 & dataRaw - - TextField { - name: "addedScoresPrefix" - label: qsTr("Prefix") - defaultValue: pca ? qsTr("PC") : qsTr("FA") - fieldWidth: 80 - enabled: addScores.checked - } - } - -} \ No newline at end of file diff --git a/inst/qml/common/PcaEfaAnalysisOptions.qml b/inst/qml/common/PcaEfaAnalysisOptions.qml new file mode 100755 index 00000000..a4414d6f --- /dev/null +++ b/inst/qml/common/PcaEfaAnalysisOptions.qml @@ -0,0 +1,129 @@ +// +// Copyright (C) 2013-2018 University of Amsterdam +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public +// License along with this program. If not, see +// . +// + +import QtQuick +import QtQuick.Layouts +import JASP +import JASP.Controls + +Section +{ + property bool pca: true + property bool dataRaw: true + + id: analysisoptions + title: qsTr("Analysis Options") + info: qsTr("Here, options for the analysis can be specified.") + expanded: true + + + RadioButtonGroup + { + name: "rotationMethod" + title: qsTr("Rotation") + info: qsTr("Here, the rotation method to apply to the components can be specified. Rotation ensures a simpler understanding of the data structure.") + RadioButton + { + value : "orthogonal" + label : qsTr("Orthogonal") + info: qsTr("This method produces components that are uncorrelated. For this method, there are several possibilities that can be selected.") + DropDown + { + name: "orthogonalSelector" + values: [ + { label: qsTr("none") , value: "none" , info: qsTr("No rotation method is selected.")}, + { label: "varimax" , value: "varimax", info: qsTr("Maximizes the variance of squared loadings for each factor, simplifying interpretations.")}, + { label: "quartimax" , value: "quartimax", info: qsTr("Minimizes the number of factors needed to explain each variable, simplifying structure.") }, + { label: "bentlerT" , value: "bentlerT", info: qsTr("Orthogonal rotation method by Bentler, used for simplifying factor loadings.")}, + { label: "equamax" , value: "equamax", info: qsTr("Combines Varimax and Quartimax to balance factor simplicity and variable explanations.")}, + { label: "geominT" , value: "geominT", info: qsTr("Applies an orthogonal rotation minimizing a weighted sum of squared loadings.")} + ] + } + } + RadioButton + { + value : "oblique" + label : qsTr("Oblique") + info: qsTr("This method produces components that allow for correlation between the components. This method is selected by default. Several possibilities are available. The default is promax.") + checked : true + DropDown { name: "obliqueSelector"; + values: [ + { name: "promax", label: "promax", info: qsTr("Starts with Varimax and applies a power transformation to allow correlated factors.")}, + { name: "oblimin", label: "oblimin", info: qsTr("An oblique rotation that minimizes the correlation among factors while allowing flexibility.")}, + { name: "simplimax", label: "simplimax", info: qsTr("Aims to simplify factor loadings by minimizing nonzero entries.") }, + { name: "bentlerQ", label: "bentlerQ", info: qsTr("Oblique rotation by Bentler, simplifying factor correlations.")}, + { name: "biquartimin", label: "biquartimin", info: qsTr("An oblique rotation balancing between simple and interpretable factor loadings.")}, + { name: "cluster", label: "cluster", info: qsTr("Targets a cluster rotation for factor analysis, emphasizing group separation.") }, + { name: "geominQ", label: "geominQ", info: qsTr("An oblique version of Geomin, allowing factor correlations and reducing complex loadings.")} + ] + } + } + } + + RadioButtonGroup + { + name: "analysisBasedOn" + title: qsTr("Base Decomposition on") + info: qsTr("What to base the decomposition of the data into components/factors on.") + RadioButton + { + value: "correlationMatrix" + label: qsTr("Correlation matrix") + info: qsTr("The correlation matrix is used to decompose the data into components/factors.") + checked: true + } + RadioButton + { + value: "covarianceMatrix" + label: qsTr("Covariance matrix") + info: qsTr("The covariance matrix is used to decompose the data into components/factors.") + } + RadioButton + { + enabled: dataRaw + value: "polyTetrachoricCorrelationMatrix" + label: qsTr("Polychoric/tetrachoric correlation matrix") + info: qsTr("The polychoric/tetrachoric correlation matrix is used to decompose the data into components/factors. This is sometimes unstable when sample size is small and when some variables do not contain all response categories") + } + } + + + Group + { + visible: !pca + title: qsTr("Factoring Method") + DropDown + { + name: "factoringMethod" + info: qsTr("Which factoring method to use for the decomposition.") + indexDefaultValue: 0 + values: + [ + { label: qsTr("Minimum residual"), value: "minimumResidual", info: qsTr("Perform a minimum residual factor analysis (minres) using the first derivative.")}, + { label: qsTr("Maximum likelihood"), value: "maximumLikelihood", info: qsTr("Perform a maximum likelihood factor analysis (ml).") }, + { label: qsTr("Principal axis factoring"), value: "principalAxis", info: qsTr("Perform a principal factor solution (pa).") }, + { label: qsTr("Ordinary least squares"), value: "ordinaryLeastSquares", info: qsTr("Minimize the residual matrix using an OLS procedure, slower but uses the empirical first derivative.") }, + { label: qsTr("Weighted least squares"), value: "weightedLeastSquares", info: qsTr("Perform a weighted least squares (WLS) solution.") }, + { label: qsTr("Generalized least squares"), value: "generalizedLeastSquares", info: qsTr("Perform a generalized least squares (GLS) solution.")}, + { label: qsTr("Minimum chi-square"), value: "minimumChiSquare", info: qsTr("Minimize the sample size-weighted chi-square using pairwise correlations.") }, + { label: qsTr("Minimum rank"), value: "minimumRank", info: qsTr("Perform a minimum rank factor analysis (minrank).") } + ] + } + } + +} \ No newline at end of file diff --git a/inst/qml/common/NumberFactors.qml b/inst/qml/common/PcaEfaNumberFactors.qml similarity index 66% rename from inst/qml/common/NumberFactors.qml rename to inst/qml/common/PcaEfaNumberFactors.qml index 406f464b..f172f865 100755 --- a/inst/qml/common/NumberFactors.qml +++ b/inst/qml/common/PcaEfaNumberFactors.qml @@ -29,17 +29,18 @@ Section id: numberof title: pca ? qsTr("Number of Components") : qsTr("Number of Factors") expanded: true - + info: qsTr("Here, the number of components/factors that are used in the analysis is determined. Several methods to determine this number can be chosen from:") RadioButtonGroup { name: pca ? "componentCountMethod" : "factorCountMethod" - title: pca ? qsTr("Number of Components Based on") : qsTr("Number of Factors Based on") + title: pca ? qsTr("Based on") : qsTr("Based on") RadioButton { value: "parallelAnalysis"; label: qsTr("Parallel analysis"); checked: true + info: qsTr("Components/factors are selected on the basis of parallel analysis. With this method, factors are selected when their eigenvalue is greater than the parallel average random eigenvalue. This method is selected by default. Can be based on principal component eigenvalues (PC) or factor eigenvalues (FA). A seed (1234) is chosen by default so that the results from the parallel analysis are equal across the PCA") RadioButtonGroup { @@ -64,7 +65,9 @@ Section } RadioButton { - value: "eigenValues"; label: qsTr("Eigenvalues") + value: "eigenValues" + label: qsTr("Eigenvalues") + info: qsTr("Components are selected when they have a certain eigenvalue. By default components are selected that have an eigenvalue above 1.") DoubleField { name: "eigenValuesAbove" label: qsTr("Eigenvalues above") @@ -74,7 +77,9 @@ Section } RadioButton { - value: "manual"; label: qsTr("Manual") + value: "manual" + label: qsTr("Manual") + info: qsTr("The number of components can be specified manually. By default this is set to 1.") IntegerField { name: pca ? "manualNumberOfComponents" : "manualNumberOfFactors" label: pca? qsTr("Number of components") : qsTr("Number of factors") diff --git a/inst/qml/common/PcaEfaOutputOptions.qml b/inst/qml/common/PcaEfaOutputOptions.qml new file mode 100755 index 00000000..23038784 --- /dev/null +++ b/inst/qml/common/PcaEfaOutputOptions.qml @@ -0,0 +1,175 @@ +// +// Copyright (C) 2013-2018 University of Amsterdam +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public +// License along with this program. If not, see +// . +// + +import QtQuick +import QtQuick.Layouts +import JASP +import JASP.Controls + +Section +{ + property bool pca: true + property bool dataRaw: true + property int variablesCount: 1 + + title: qsTr("Output Options") + info: qsTr("Additional output options for the PCA/EFA analysis.") + + Slider + { + name: "loadingsDisplayLimit" + label: qsTr("Display loadings above") + info: qsTr("Loadings below this value will not be displayed in the output table.") + value: 0.4 + } + + RadioButtonGroup + { + name: "loadingsOrder" + title: qsTr("Order Loadings By") + info: qsTr("Either order the loadings by their size from large to small, or by variables, meaning according to their occurence in the variables list.") + RadioButton { name: "sortBySize"; label: qsTr("Size"); checked: true } + RadioButton { name: "sortByVariables"; label: qsTr("Variables") } + } + + Group + { + title: qsTr("Tables") + info: qsTr("Display addition tables in the output.") + CheckBox { visible: !pca; name: "factorStructure"; label: qsTr("Structure matrix"); info: qsTr("An item by factor structure matrix. This is just the loadings (pattern) matrix times the factor intercorrelation matrix.")} + CheckBox { + name: pca ? "componentCorrelations" : "factorCorrelations"; + label: pca ? qsTr("Component correlations") : qsTr("Factor correlations") + info: qsTr("When selecting this option, a table with the correlations between the components/factors will be displayed.") + } + CheckBox { + visible: !pca; + name: "fitIndices"; + label: qsTr("Additional fit indices") + info: qsTr("This option displays the Root Mean Squared Error of Approximation (RMSEA) with 90% confidence interval, the Tucker Lewis Index (TLI), and the Bayesian Information Criterion (BIC) to test the fit of the model.") + } + CheckBox { name: "residualMatrix"; label: qsTr("Residual matrix"); info: qsTr("Displays a table containing the residual variances and correlations.")} + CheckBox { + name: "parallelAnalysisTable"; + label: qsTr("Parallel analysis") + info: qsTr("If this option is selected, a table will be generated exhibiting a detailed output of the parallel analysis. Can be based on principal component eigenvalues (PC) or factor eigenvalues (FA). The seed is taken from the parallel analysis for determining the number of components/factors above.") + RadioButtonGroup + { + name: "parallelAnalysisTableMethod" + title: "" + + RadioButton + { + value: "principalComponentBased" + label: qsTr("Based on PC") + checked: true + } + RadioButton + { + value: "factorBased" + label: qsTr("Based on FA") + } + } + } + } + Group + { + title: qsTr("Plots") + info: qsTr("Display plots.") + CheckBox { + name: "pathDiagram" + label: qsTr("Path diagram") + info: qsTr("By selecting this option, a visual representation of the direction and strength of the relation between the variable and factor will be displayed.") + } + CheckBox { + name: "screePlot"; + label: qsTr("Scree plot") + info: qsTr("When selecting this option, a scree plot will be displayed. The scree plot provides information on how much variance in the data, indicated by the eigenvalue, is explained by each factor. A scree plot can be used to decide how many factors should be selected.") + + CheckBox { + name: "screePlotParallelAnalysisResults" + label: qsTr("Parallel analysis results") + checked: true + info: qsTr("Display the results of the parallel analysis in the scree plot. The parallel analysis will be based on PC or FA as defined by the option for the parallel analysis table.") + } + } + } + + Group + { + title: qsTr("Assumption checks") + info: qsTr("Assumptions: The variables included in the analysis are correlated; the variables included in the analysis are linearly related (Shlens, 2014)") + CheckBox { + name: "kaiserMeyerOlkinTest"; + label: qsTr("KMO test"); + info: qsTr("Determines how well variables are suited for factor analysis by computing the proportion of common variance between variables. Produces a measure of sampling adequacy (MSA) as the proportion of common variance among variables is computed for all variables; values closer to 1 are desired.") + } + CheckBox { + name: "bartlettTest"; + label: qsTr("Bartlett's test") + info: qsTr("Determines if the data correlation matrix is the identity matrix, meaning, if the variables are related or not. A significant result means the correlation matrix is unlike the identity matrix.") + } + CheckBox { + name: "mardiaTest"; + label: qsTr("Mardia's test") + enabled: dataType.value == "raw" + info: qsTr("Assesses the degree of the departure from multivariate normality of the included variables in terms of multivariate skewness and kurtosis. The Mardia's test will always include the listwise complete cases.") + } + CheckBox { + name: "antiImageCorrelationMatrix" + label: qsTr("Anti-image correlation matrix") + info: qsTr("Contains the negative partial correlations between pairs of variables after accounting for the effects of all other variables in the dataset. High values in the anti-image correlation matrix indicate that a variable might be redundant or have too much shared variance with other variables.") + } + } + + RadioButtonGroup + { + name: "naAction" + title: qsTr("Missing Values") + info: qsTr("Select how to handle missing values.") + RadioButton { + value: "pairwise"; + label: qsTr("Exclude cases pairwise"); + checked: true; + info: qsTr("If one observation from a variable is missing, all the other variable observations from the same case will still be used for the analysis. In this scenario, it is not necessary to have an observation for all the variables to include the case in the analysis. This option is selected by default.") + } + RadioButton { + value: "listwise"; + label: qsTr("Exclude cases listwise") + info: qsTr("If one observation from a variable is missing, the whole case, so all the other connected variable observations, will be dismissed from the analysis. In this scenario, observations for every variable are needed to include the case in the analysis. ") + } + } + + CheckBox + { + id: addScores + name: "addScores" + label: pca ? qsTr("Add PC scores to data") : qsTr("Add FA scores to data") + info: qsTr("Adds the estimated component/factor scores as new columns to the data set") + enabled: variablesCount > 1 & dataRaw + + TextField { + name: "addedScoresPrefix" + label: qsTr("Prefix") + defaultValue: pca ? qsTr("PC") : qsTr("FA") + fieldWidth: 80 + enabled: addScores.checked + } + } + +} \ No newline at end of file diff --git a/inst/qml/common/PcaEfaVariables.qml b/inst/qml/common/PcaEfaVariables.qml new file mode 100755 index 00000000..954b9ad4 --- /dev/null +++ b/inst/qml/common/PcaEfaVariables.qml @@ -0,0 +1,52 @@ +// +// Copyright (C) 2013-2018 University of Amsterdam +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public +// License along with this program. If not, see +// . +// + +import QtQuick +import QtQuick.Layouts +import JASP +import JASP.Controls + +VariablesForm +{ + // preferredHeight: jaspTheme.smallDefaultVariablesFormHeight + AvailableVariablesList { name: "allVariablesList" } + AssignedVariablesList + { + id: variables + name: "variables" + title: qsTr("Variables") + allowedColumns: ["scale"] + info: qsTr("In this box, the variables to perform the analysis on are selected") + } + + RadioButtonGroup + { + name: "dataType" + title: qsTr("Data") + id: dataType + columns: 2 + info: qsTr("Specifies whether the data is raw, meaning observations in rows and variables in columns, or whether the data is a variance-covariance matrix. For the latter, the sample size is required.") + RadioButton { value: "raw"; label: qsTr("Raw"); checked: true } + RadioButton + { + value: "varianceCovariance"; label: qsTr("Variance-covariance matrix") + IntegerField { name: "sampleSize"; label: qsTr("Sample size"); defaultValue: 200 } + } + } + +} \ No newline at end of file diff --git a/tests/testthat/_snaps/principalcomponentanalysis/path-diagram.svg b/tests/testthat/_snaps/principalcomponentanalysis/path-diagram.svg deleted file mode 100644 index 4833e460..00000000 --- a/tests/testthat/_snaps/principalcomponentanalysis/path-diagram.svg +++ /dev/null @@ -1,81 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -RC1 -RC2 -contNormal -contcor1 -facFifty -contGamma -debCollin1 - - diff --git a/tests/testthat/test-exploratoryfactoranalysis.R b/tests/testthat/test-exploratoryfactoranalysis.R index eb1d565b..e9f0bd5b 100644 --- a/tests/testthat/test-exploratoryfactoranalysis.R +++ b/tests/testthat/test-exploratoryfactoranalysis.R @@ -22,6 +22,7 @@ defaultOptions <- list( parallelAnalysisTable = FALSE, pathDiagram = FALSE, screePlot = FALSE, + antiImageCorrelationMatrix = FALSE, screePlotParallelAnalysisResults = TRUE, kaiserMeyerOlkinTest = FALSE, bartlettTest = FALSE, @@ -59,7 +60,7 @@ options$loadingsOrder <- "sortByVariables" options$variables <- list("contWide", "contcor1", "contcor2", "facFifty", "contExpon", "debCollin1", "debEqual1") set.seed(1) -results <- jaspTools::runAnalysis("exploratoryFactorAnalysis", "test.csv", options) +results <- jaspTools::runAnalysis("exploratoryFactorAnalysis", "test.csv", options, makeTests = F) test_that("Factor Correlations table results match", { @@ -71,11 +72,10 @@ test_that("Factor Correlations table results match", { test_that("Factor Characteristics table results match", { table <- results[["results"]][["modelContainer"]][["collection"]][["modelContainer_eigenTable"]][["data"]] jaspTools::expect_equal_tables(table, - list("Factor 1", 0.211560139237577, 0.21520386846338, 1.76545396982125, - 0.211560139237577, 0.21520386846338, 1.48092097466304, 1.50642707924366, - "Factor 2", 0.366100386048402, 0.366966592875575, 1.31015305219849, - 0.154540246810825, 0.151762724412195, 1.08178172767577, 1.06233907088537 - )) + list("Factor 1", 0.211560139237578, 0.215203868463381, 0.211560139237578, + 0.215203868463381, 1.48092097466305, 1.50642707924367, "Factor 2", + 0.366100386048402, 0.366966592875577, 0.154540246810824, 0.151762724412196, + 1.08178172767577, 1.06233907088537)) }) test_that("Additional fit indices table results match", { @@ -163,7 +163,6 @@ test_that("Missing values works", { options$loadingsOrder <- "sortByVariables" - test_that("loadingsOrder sort the factor loadings table", { options <- defaultOptions @@ -237,16 +236,15 @@ options$orthogonalSelector <- "none" options$rotationMethod <- "orthogonal" options$variables <- paste0("x", 1:9) set.seed(1) -results <- runAnalysis("exploratoryFactorAnalysis", testthat::test_path("holzingerswineford.csv"), options) - +results <- runAnalysis("exploratoryFactorAnalysis", testthat::test_path("holzingerswineford.csv"), options, makeTests = F) test_that("Factor Characteristics table results match", { table <- results[["results"]][["modelContainer"]][["collection"]][["modelContainer_eigenTable"]][["data"]] jaspTools::expect_equal_tables(table, - list("Factor 1", 0.314163998816933, 3.21634418143771, 0.314163998816933, - 2.8274759893524, "Factor 2", 0.449126711506194, 1.63871322152606, - 0.134962712689261, 1.21466441420335, "Factor 3", 0.539738017727465, - 1.36515934778625, 0.0906113062212709, 0.815501755991438)) + list("Factor 1", 0.314163998816934, 0.314163998816934, 2.8274759893524, + "Factor 2", 0.449126711506193, 0.13496271268926, 1.21466441420334, + "Factor 3", 0.539738017727469, 0.090611306221276, 0.815501755991485 + )) }) test_that("Chi-squared Test table results match with parallel analysis based on PCs", { @@ -297,14 +295,13 @@ options$variables <- list("contcor1", "contcor2", "facFifty", "facFive","contNor set.seed(1) results <- runAnalysis("exploratoryFactorAnalysis", "test.csv", options, makeTests = F) -test_that("Factor Characteristics table results match with poly cor", { +test_that("Factor Characteristics table results match", { table <- results[["results"]][["modelContainer"]][["collection"]][["modelContainer_eigenTable"]][["data"]] jaspTools::expect_equal_tables(table, - list("Factor 1", 0.237444621938795, 0.238183139137491, 1.78311572348898, - 0.237444621938795, 0.238183139137491, 1.42466773163277, 1.42909883482494, - "Factor 2", 0.411139518227707, 0.411182023092321, 1.28924116893078, - 0.173694896288912, 0.17299888395483, 1.04216937773347, 1.03799330372898 - )) + list("Factor 1", 0.237444621938795, 0.238183139137491, 0.237444621938795, + 0.238183139137491, 1.42466773163277, 1.42909883482494, "Factor 2", + 0.411139518227707, 0.411182023092321, 0.173694896288912, 0.17299888395483, + 1.04216937773347, 1.03799330372898)) }) test_that("Mardia's Test of Multivariate Normality table results match with poly cor", { @@ -347,15 +344,13 @@ test_that("Anti-Image Correlation Matrix table results match", { test_that("Kaiser-Meyer-Olkin Test table results match", { table <- results[["results"]][["modelContainer"]][["collection"]][["modelContainer_kmoTable"]][["data"]] jaspTools::expect_equal_tables(table, - list("Overall MSA - ", 0.472338374588124, "contcor1", 0.490151695858873, + list("Overall MSA", 0.472338374588124, "contcor1", 0.490151695858873, "contcor2", 0.49029987989214, "facFifty", 0.473417708976819, "facFive", 0.515909446356412, "contNormal", 0.364284277677057, "debMiss1", 0.421070371345991)) }) - options <- defaultOptions options$factorCountMethod <- "parallelAnalysis" options$parallelAnalysisMethod <- "principalComponentBased" @@ -412,6 +407,7 @@ options <- list( rotationMethod = "orthogonal", sampleSize = 200, screePlot = FALSE, + antiImageCorrelationMatrix = FALSE, screePlotParallelAnalysisResults = TRUE, variables = c("x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9") ) @@ -422,8 +418,8 @@ results <- runAnalysis("exploratoryFactorAnalysis", cdt, options, makeTests = F) test_that("Factor Characteristics table results match", { table <- results[["results"]][["modelContainer"]][["collection"]][["modelContainer_eigenTable"]][["data"]] jaspTools::expect_equal_tables(table, - list("Factor 1", 0.292468690109419, 3.21634418143771, 0.292468690109419, - 2.63221821098477)) + list("Factor 1", 0.292468690109419, 0.292468690109419, 2.63221821098477 + )) }) test_that("Chi-squared Test table results match", { diff --git a/tests/testthat/test-principalcomponentanalysis.R b/tests/testthat/test-principalcomponentanalysis.R index ec263c00..b006bb05 100644 --- a/tests/testthat/test-principalcomponentanalysis.R +++ b/tests/testthat/test-principalcomponentanalysis.R @@ -22,6 +22,7 @@ defaultOptions <- list( bartlettTest = FALSE, mardiaTest = FALSE, addScores = FALSE, + antiImageCorrelationMatrix = FALSE, addedScoresPrefix = "", dataType = "raw", componentCountMethod = "parallelAnalysis", @@ -398,6 +399,7 @@ options <- list( parallelAnalysisMethod = "principalComponentBased", parallelAnalysisSeed = 1234, parallelAnalysisTable = FALSE, + antiImageCorrelationMatrix = FALSE, parallelAnalysisTableMethod = "principalComponentBased", pathDiagram = FALSE, plotHeight = 320, diff --git a/tests/testthat/test-verified-exploratoryfactoranalysis.R b/tests/testthat/test-verified-exploratoryfactoranalysis.R index 00755f9e..fba5f45e 100644 --- a/tests/testthat/test-verified-exploratoryfactoranalysis.R +++ b/tests/testthat/test-verified-exploratoryfactoranalysis.R @@ -24,6 +24,7 @@ defaultOptions <- list( pathDiagram = FALSE, screePlot = FALSE, screePlotParallelAnalysisResults = TRUE, + antiImageCorrelationMatrix = FALSE, kaiserMeyerOlkinTest = FALSE, bartlettTest = FALSE, mardiaTest = FALSE, @@ -56,7 +57,7 @@ options$loadingsDisplayLimit <- 0.4 options$obliqueSelector <- "oblimin" set.seed(1) -results <- jaspTools::runAnalysis("exploratoryFactorAnalysis", "EFA.csv", options) +results <- jaspTools::runAnalysis("exploratoryFactorAnalysis", testthat::test_path("EFA.csv"), options, makeTests = F) # https://jasp-stats.github.io/jasp-verification-project/factor.html#exploratory-factor-analysis @@ -65,8 +66,7 @@ test_that("Kaiser-Meyer-Olkin test match R, SPSS, SAS, MiniTab", { resultTable <- results$results$modelContainer$collection$modelContainer_kmoTable$data jaspTools::expect_equal_tables( "test"=resultTable, - "ref"=list("Overall MSA -", 0.930224499116479, "Question_01", 0.929761029851962, + "ref"=list("Overall MSA", 0.930224499116479, "Question_01", 0.929761029851962, "Question_02", 0.87477543869641, "Question_03", 0.951037837608159, "Question_04", 0.955340346281847, "Question_05", 0.960089249619342, "Question_06", 0.891331391519047, "Question_07", 0.941679983971416, @@ -139,14 +139,13 @@ test_that("Factor Loadings table results match R, SPSS, SAS, MiniTab", { test_that("Factor Characteristics table results match", { table <- results[["results"]][["modelContainer"]][["collection"]][["modelContainer_eigenTable"]][["data"]] jaspTools::expect_equal_tables(table, - list("Factor 1", 0.131897550845728, 0.293227816104412, 7.29004706361899, - 0.131897550845728, 0.293227816104412, 3.03364366945174, 6.74423977040149, - "Factor 2", 0.256005905753789, 0.342246352158825, 1.73882874685703, - 0.124108354908061, 0.0490185360544126, 2.85449216288541, 1.12742632925149, - "Factor 3", 0.342351093761207, 0.377617942230257, 1.31675152787573, - 0.0863451880074178, 0.0353715900714321, 1.98593932417061, 0.813546571642938, - "Factor 4", 0.404746469846354, 0.404746469846354, 1.22719815361453, - 0.0623953760851471, 0.0271285276160969, 1.43509364995838, 0.623956135170229 + list("Factor 1", 0.131897550845728, 0.293227816104414, 0.131897550845728, + 0.293227816104414, 3.03364366945174, 6.74423977040153, "Factor 2", + 0.25600590575379, 0.342246352158826, 0.124108354908063, 0.0490185360544119, + 2.85449216288545, 1.12742632925147, "Factor 3", 0.342351093761208, + 0.377617942230258, 0.0863451880074171, 0.035371590071432, 1.98593932417059, + 0.813546571642936, "Factor 4", 0.404746469846355, 0.404746469846355, + 0.0623953760851476, 0.0271285276160967, 1.43509364995839, 0.623956135170225 )) }) @@ -173,7 +172,7 @@ options$rotationMethod <- "oblique" options$variables <- list("contWide", "contcor1", "contcor2", "facFifty", "contExpon", "debCollin1", "debEqual1") set.seed(1) -results <- jaspTools::runAnalysis("exploratoryFactorAnalysis", "test.csv", options) +results <- jaspTools::runAnalysis("exploratoryFactorAnalysis", "test.csv", options, makeTests = F) @@ -187,11 +186,10 @@ test_that("Factor Correlations table results match", { test_that("Factor Characteristics table results match", { table <- results[["results"]][["modelContainer"]][["collection"]][["modelContainer_eigenTable"]][["data"]] jaspTools::expect_equal_tables(table, - list("Factor 1", 0.211560139237577, 0.21520386846338, 1.76545396982125, - 0.211560139237577, 0.21520386846338, 1.48092097466304, 1.50642707924366, - "Factor 2", 0.366100386048402, 0.366966592875575, 1.31015305219849, - 0.154540246810825, 0.151762724412195, 1.08178172767577, 1.06233907088537 - )) + list("Factor 1", 0.211560139237578, 0.215203868463381, 0.211560139237578, + 0.215203868463381, 1.48092097466305, 1.50642707924367, "Factor 2", + 0.366100386048402, 0.366966592875577, 0.154540246810824, 0.151762724412196, + 1.08178172767577, 1.06233907088537)) }) test_that("Additional fit indices table results match", {