diff --git a/README.md b/README.md index c7b9687..a5788bf 100644 --- a/README.md +++ b/README.md @@ -107,45 +107,45 @@ The number of clusters that were found, as well as the number of members for eac Below there is an example of how this information is printed: ``` $ clusttraj trajectory.xyz -rmsd 3.2 -np 4 -p -n -cc xyz +2024-12-12 17:48:19,268 INFO [distmat.py:34] Calculating distance matrix using 4 threads -2023-08-25 11:37:35,201 INFO [distmat.py:22] Calculating distance matrix using 4 threads +2024-12-12 17:48:23,800 INFO [distmat.py:38] Saving condensed distance matrix to distmat.npy -2023-08-25 11:37:35,770 INFO [distmat.py:26] Saving condensed distance matrix to distmat.npy +2024-12-12 17:48:23,801 INFO [classify.py:97] Clustering using 'average' method to join the clusters -2023-08-25 11:37:35,771 INFO [classify.py:8] Starting clustering using 'average' method to join the clusters +2024-12-12 17:48:23,803 INFO [classify.py:105] Saving clustering classification to clusters.dat -2023-08-25 11:37:35,772 INFO [classify.py:15] Saving clustering classification to clusters.dat +2024-12-12 17:48:23,804 INFO [main.py:59]
Writing superposed configurations per cluster to files clusters_confs_*.xyz -2023-08-25 11:37:35,772 INFO [main.py:41]
Writing superposed configurations per cluster to files clusters_confs_*.xyz - -2023-08-25 11:37:37,797 INFO [main.py:71]
A total 100 snapshots were read and 16 cluster(s) was(were) found. +2024-12-12 17:48:26,729 INFO [main.py:102]
A total 100 snapshots were read and 7 cluster(s) was(were) found. The cluster sizes are: -Cluster Size -1 3 -2 6 -3 6 -4 8 -5 7 -6 6 -7 11 -8 2 -9 6 -10 11 -11 6 -12 5 -13 4 -14 7 -15 4 -16 8 +Cluster Size +1 3 +2 3 +3 31 +4 30 +5 18 +6 3 +7 12 + +2024-12-12 17:48:26,729 INFO [main.py:126]
Total wall time: 7.462641 s + ``` In the cluster output file (`-oc` option, default filename `clusters.dat`) the classification for each structure in the trajectory is printed. For example, if the first structure of the trajectory belongs to the cluster number *2*, the second structure belongs to cluster *1*, the third to cluster *2* and so on, the file `clusters.dat` will start with ``` -2 -1 -2 -... +$ head clusters.dat +7 +4 +5 +3 +4 +7 +6 +7 +4 +3 ``` The plot of the multidimensional representation (when the `-p` option is used) have each cluster colored in one color as the following picture: diff --git a/clusttraj/distmat.py b/clusttraj/distmat.py index 6b4e633..200fbae 100644 --- a/clusttraj/distmat.py +++ b/clusttraj/distmat.py @@ -192,9 +192,10 @@ def compute_distmat_line( Paview = Paview[prr] # build the total structure reordering just these atoms - whereins = np.where( - np.isin(np.arange(natoms), reorderexcl[soluexcl]) is True - ) + # whereins = np.where( + # np.isin(np.arange(natoms), reorderexcl[soluexcl]) is True + # ) + whereins = np.where(np.atleast_1d(np.isin(np.arange(natoms), reorderexcl[soluexcl]))) Psolu = np.insert( Pview, [x - whereins[0].tolist().index(x) for x in whereins[0]], @@ -263,7 +264,8 @@ def compute_distmat_line( Pview = Pview[prr] # build the total molecule with the reordered atoms - whereins = np.where(np.isin(np.arange(len(P)), reorderexcl) is True) + # whereins = np.where(np.isin(np.arange(len(P)), reorderexcl) is True) + whereins = np.where(np.atleast_1d(np.isin(np.arange(len(P)), reorderexcl))) Pr = np.insert( Pview, [x - whereins[0].tolist().index(x) for x in whereins[0]], diff --git a/clusttraj/io.py b/clusttraj/io.py index 69817a2..e1063f6 100644 --- a/clusttraj/io.py +++ b/clusttraj/io.py @@ -774,9 +774,10 @@ def save_clusters_config( Paview = Paview[prr] # build the total molecule reordering just these atoms - whereins = np.where( - np.isin(np.arange(natoms), reorderexcl[soluexcl]) is True - ) + # whereins = np.where( + # np.isin(np.arange(natoms), reorderexcl[soluexcl]) is True + # ) + whereins = np.where(np.atleast_1d(np.isin(np.arange(natoms), reorderexcl))) Psolu = np.insert( Pview, [x - whereins[0].tolist().index(x) for x in whereins[0]], @@ -812,7 +813,6 @@ def save_clusters_config( # prr = reorder(Qa[solvview], Paview, Q[solvview], Pview) # Pview = Pview[prr] # Paview = Paview[prr] - # # build the total molecule with the reordered atoms # whereins = np.where( # np.isin(np.arange(natoms, len(P)), reorderexcl[solvexcl]) == True @@ -849,7 +849,8 @@ def save_clusters_config( Pview = Pview[prr] # build the total molecule with the reordered atoms - whereins = np.where(np.isin(np.arange(len(P)), reorderexcl) is True) + # whereins = np.where(np.isin(np.arange(len(P)), reorderexcl) is True) + whereins = np.where(np.atleast_1d(np.isin(np.arange(len(P)), reorderexcl))) Pr = np.insert( Pview, [x - whereins[0].tolist().index(x) for x in whereins[0]], @@ -882,3 +883,4 @@ def save_clusters_config( # closes the file for the cnum cluster outfile.close() + # type: ignore diff --git a/clusttraj/main.py b/clusttraj/main.py index 5bd1cd1..1e49810 100644 --- a/clusttraj/main.py +++ b/clusttraj/main.py @@ -83,7 +83,7 @@ def main(args: List[str] = None) -> None: start_time = time.monotonic() plot_clust_evo(clust_opt, clusters) - plot_dendrogram(clust_opt, Z) + plot_dendrogram(clust_opt, clusters, Z) plot_mds(clust_opt, clusters, distmat) @@ -104,7 +104,7 @@ def main(args: List[str] = None) -> None: # Compute the evaluation metrics if clust_opt.metrics: start_time = time.monotonic() - ss, ch, db, cpcc = compute_metrics(clust_opt, distmat, Z, clusters) + ss, ch, db, cpcc = compute_metrics(distmat, Z, clusters) end_time = time.monotonic() outclust_str += f"\nSilhouette score: {ss:.3f}\n" diff --git a/clusttraj/metrics.py b/clusttraj/metrics.py index 0275c8d..909eb54 100644 --- a/clusttraj/metrics.py +++ b/clusttraj/metrics.py @@ -9,11 +9,11 @@ from scipy.cluster.hierarchy import cophenet from typing import Tuple import numpy as np -from .io import ClustOptions +# from .io import ClustOptions def compute_metrics( - clust_opt: ClustOptions, + # clust_opt: ClustOptions, distmat: np.ndarray, z_matrix: np.ndarray, clusters: np.ndarray, @@ -21,8 +21,10 @@ def compute_metrics( """Compute metrics to assess the performance of the clustering procedure. Args: - clust_opt (ClustOptions): The clustering options. + # clust_opt (ClustOptions): The clustering options. + distmat: The distance matrix. z_matrix (np.ndarray): The Z-matrix from hierarchical clustering procedure. + clusters (np.ndarray): The cluster classifications for each sample. Returns: ss (np.float64): The silhouette score. @@ -41,6 +43,6 @@ def compute_metrics( db = davies_bouldin_score(squareform(distmat), clusters) # Compute the cophenetic correlation coefficient - cpcc = cophenet(z_matrix)[0] + cpcc, _ = cophenet(z_matrix, distmat) return ss, ch, db, cpcc diff --git a/clusttraj/plot.py b/clusttraj/plot.py index 226a1a3..67c9f7c 100644 --- a/clusttraj/plot.py +++ b/clusttraj/plot.py @@ -3,12 +3,19 @@ from sklearn import manifold from scipy.spatial.distance import squareform import scipy.cluster.hierarchy as hcl +from scipy.cluster import hierarchy import matplotlib.pyplot as plt +import matplotlib.cm as cm +from matplotlib.colors import to_hex +from matplotlib.ticker import MaxNLocator import numpy as np from .io import ClustOptions -def plot_clust_evo(clust_opt: ClustOptions, clusters: np.ndarray) -> None: +def plot_clust_evo( + clust_opt: ClustOptions, + clusters: np.ndarray +) -> None: """Plot the evolution of cluster classification over the given samples. Args: @@ -18,52 +25,91 @@ def plot_clust_evo(clust_opt: ClustOptions, clusters: np.ndarray) -> None: Returns: None """ + + # Define a color for the lines + line_color = (0, 0, 0, 0.5) + # plot evolution with o cluster in trajectory - plt.figure(figsize=(25, 10)) - plt.plot(range(1, len(clusters) + 1), clusters, "o-", markersize=4) - plt.xlabel("Sample Index") - plt.ylabel("Cluster classification") + plt.figure(figsize=(10, 6)) + + # Set the y-axis to only show integers + plt.gca().yaxis.set_major_locator(MaxNLocator(integer=True)) + + # Increase tick size and font size + plt.tick_params(axis='both', which='major', direction='in', labelsize=12) + + plt.plot(range(1, len(clusters) + 1), clusters, markersize=4, color=line_color) + plt.scatter(range(1, len(clusters) + 1), clusters, marker="o", c=clusters, cmap=plt.cm.nipy_spectral) + plt.xlabel("Sample Index", fontsize=14) + plt.ylabel("Cluster classification", fontsize=14) plt.savefig(clust_opt.evo_name, bbox_inches="tight") -def plot_dendrogram(clust_opt: ClustOptions, Z: np.ndarray) -> None: +def plot_dendrogram( + clust_opt: ClustOptions, + clusters: np.ndarray, + Z: np.ndarray +) -> None: """Plot a dendrogram based on hierarchical clustering. Parameters: clust_opt (ClustOptions): The options for clustering. + clusters (np.ndarray): The cluster labels. Z (np.ndarray): The linkage matrix. Returns: None """ # Plot the dendrogram - plt.figure(figsize=(25, 10)) - plt.title("Hierarchical Clustering Dendrogram") - plt.xlabel("Sample Index") - plt.ylabel(r"RMSD ($\AA$)") + plt.figure(figsize=(18, 6)) + plt.title("Hierarchical Clustering Dendrogram", fontsize=20) + # plt.xlabel("Sample Index", fontsize=14) + plt.ylabel(r"RMSD ($\AA$)", fontsize=18) + plt.tick_params(axis='y', labelsize=18) - hcl.dendrogram( - Z, - leaf_rotation=90.0, # Rotates the x axis labels - leaf_font_size=8.0, # Font size for the x axis labels - ) + # Define a color for the dashed and non-cluster lines + line_color = (0, 0, 0, 0.5) - # Add a horizontal line at the minimum RMSD value + # Add a horizontal line at the minimum RMSD value and set the threshold if clust_opt.silhouette_score: if isinstance(clust_opt.optimal_cut, (np.ndarray, list)): - plt.axhline(clust_opt.optimal_cut[0], linestyle="--") + plt.axhline(clust_opt.optimal_cut[0], linestyle="--", linewidth=2, color=line_color) + threshold = clust_opt.optimal_cut[0] elif isinstance(clust_opt.optimal_cut, (float, np.float32, np.float64)): - plt.axhline(clust_opt.optimal_cut, linestyle="--") + plt.axhline(clust_opt.optimal_cut, linestyle="--", linewidth=2, color=line_color) + threshold = clust_opt.optimal_cut else: raise ValueError("optimal_cut must be a float or np.ndarray") else: - plt.axhline(clust_opt.min_rmsd, linestyle="--") + plt.axhline(clust_opt.min_rmsd, linestyle="--", linewidth=2, color=line_color) + threshold = clust_opt.min_rmsd + + # Use the 'nipy_spectral' cmap to color the dendrogram + unique_clusters = np.unique(clusters) + cmap = cm.get_cmap('nipy_spectral', len(unique_clusters)) + colors = [to_hex(cmap(i)) for i in range(cmap.N)] + + hierarchy.set_link_color_palette(colors) + + # Plot the dendrogram + hcl.dendrogram( + Z, + # leaf_rotation=90.0, # Rotates the x axis labels + # leaf_font_size=8.0, # Font size for the x axis labels + no_labels=True, + color_threshold=threshold, + above_threshold_color=line_color + ) # Save the dendrogram to a file plt.savefig(clust_opt.dendrogram_name, bbox_inches="tight") -def plot_mds(clust_opt: ClustOptions, clusters: np.ndarray, distmat: np.ndarray) -> None: +def plot_mds( + clust_opt: ClustOptions, + clusters: np.ndarray, + distmat: np.ndarray +) -> None: """Plot the multidimensional scaling (MDS) of the distance matrix. Args: @@ -92,6 +138,9 @@ def plot_mds(clust_opt: ClustOptions, clusters: np.ndarray, distmat: np.ndarray) # Perform MDS and get the 2D representation coords = mds.fit_transform(squareform(distmat)) + # Set the figure size + plt.figure(figsize=(6,6)) + # Configure tick parameters plt.tick_params( axis="both", @@ -109,14 +158,16 @@ def plot_mds(clust_opt: ClustOptions, clusters: np.ndarray, distmat: np.ndarray) coords[:, 0], coords[:, 1], marker="o", c=clusters, cmap=plt.cm.nipy_spectral ) - plt.title("MDS Visualization") + plt.title("MDS Visualization", fontsize=14) # Save the plot plt.savefig(clust_opt.mds_name, bbox_inches="tight") def plot_tsne( - clust_opt: ClustOptions, clusters: np.ndarray, distmat: np.ndarray + clust_opt: ClustOptions, + clusters: np.ndarray, + distmat: np.ndarray ) -> None: """Plot the t-distributed Stochastic Neighbor Embedding 2D plot of the clustering. @@ -143,10 +194,11 @@ def plot_tsne( # Define a list of unique colors for each cluster unique_clusters = np.unique(clusters) - colors = plt.cm.tab20(np.linspace(0, 1, len(unique_clusters))) + cmap = cm.get_cmap('nipy_spectral', len(unique_clusters)) + colors = [cmap(i) for i in range(len(unique_clusters))] - # Create a new figure - plt.figure() + # Set the figure size + plt.figure(figsize=(6,6)) # Configure tick parameters plt.tick_params( diff --git a/docs/source/examples.rst b/docs/source/examples.rst index 2fd7553..562af65 100644 --- a/docs/source/examples.rst +++ b/docs/source/examples.rst @@ -1,16 +1,19 @@ Examples ======== -Before following the steps presented in this section, make sure to install the ``clusttraj`` package as presented in :doc:`install`. +Before following the steps presented in this section, make sure to install the +``clusttraj`` package as presented in :doc:`install`. +.. _water-example: Clustering of water molecules ***************************** -Here we perform the clustering of water molecules from a molecular dynamics simulation. The ``h2o_traj.xyz`` file has the molecular trajectory of 5 water molecules: +Here we perform the clustering of water molecules from a molecular dynamics simulation. +The ``h2o_traj.xyz`` file has the configurations of 5 water molecules: .. code-block:: console - ╰─○ head h2o_traj.xyz + $ head h2o_traj.xyz 15 Frame 1 @@ -24,26 +27,29 @@ Here we perform the clustering of water molecules from a molecular dynamics simu H 2.46676 23.16482 10.69619 -To perform the clustering procedure we need to provide the trajectory file and the RMSD threshold distance. This cutoff distance establishs the maximum accepted distance between clusters and can be determined in two ways. +To perform the clustering procedure we need to provide the file with configurations +and the Root Mean Square Deviation (RMSD) threshold distance. This cutoff distance establishes the +maximum accepted distance between clusters and can be determined in two ways. Manual threshold ^^^^^^^^^^^^^^^^ -We can fix the maximum RMSD deviation between units in the same cluster up to a certain number, `e.g.`, 2.0 Angstrons: +We can fix the maximum RMSD deviation between units in the same cluster +up to a certain number, `e.g.`, 2.0 Angstrons: .. code-block:: console - python -m clusttraj h2o_traj.xyz -rmsd 2.0 + $ python -m clusttraj h2o_traj.xyz -rmsd 2.0 As a result, we obtained 4 output files, `i.e.`, ``distmat.npy``, ``clusters.dat``, ``clusters.out`` and ``clusttraj.log``. - ``distmat.npy`` file has the condensed distance matrix in the ``numpy`` file format. -- ``clusters.dat`` file has the labels of each configuration in the trajectory file. +- ``clusters.dat`` file has the labels of each configuration in the configurations file. .. code-block:: console - ╰─○ head clusters.dat + $ head clusters.dat 3 3 1 @@ -60,16 +66,16 @@ As a result, we obtained 4 output files, `i.e.`, ``distmat.npy``, ``clusters.dat .. code-block:: console - ╰─○ cat clusters.out + $ cat clusters.out - Full command: /Users/Rafael/Coisas/Doutorado/clusttraj/clusttraj/clusttraj/__main__.py h2o_traj.xyz -rmsd 2.0 -i distmat.npy + Full command: /Users/rafael/Documents/Física/projetos/clusttraj/clusttraj/__main__.py h2o_traj.xyz -rmsd 2.0 Clusterized from trajectory file: h2o_traj.xyz Method: average RMSD criterion: 2.0 Ignoring hydrogens?: False - Distance matrix was read from: distmat.npy + Distance matrix was written in: distmat.npy The classification of each configuration was written in: clusters.dat A total 100 snapshots were read and 3 cluster(s) was(were) found. The cluster sizes are: @@ -82,75 +88,95 @@ As a result, we obtained 4 output files, `i.e.`, ``distmat.npy``, ``clusters.dat .. code-block:: console - ╰─○ cat clusttraj.log - 2023-09-30 16:39:02,100 INFO [distmat.py:28] Reading condensed distance matrix from distmat.npy + $ cat clusttraj.log + 2024-12-10 20:03:47,369 INFO [distmat.py:34] Calculating distance matrix using 4 threads - 2023-09-30 16:39:02,102 INFO [classify.py:97] Clustering using 'average' method to join the clusters + 2024-12-10 20:03:49,416 INFO [distmat.py:38] Saving condensed distance matrix to distmat.npy - 2023-09-30 16:39:02,103 INFO [classify.py:105] Saving clustering classification to clusters.dat + 2024-12-10 20:03:49,418 INFO [classify.py:97] Clustering using 'average' method to join the clusters - 2023-09-30 16:39:02,105 INFO [main.py:75]
A total 100 snapshots were read and 3 cluster(s) was(were) found. + 2024-12-10 20:03:49,420 INFO [classify.py:105] Saving clustering classification to clusters.dat + + 2024-12-10 20:03:49,422 INFO [main.py:102]
A total 100 snapshots were read and 3 cluster(s) was(were) found. The cluster sizes are: Cluster Size 1 44 2 22 3 34 + 2024-12-10 20:03:49,422 INFO [main.py:126]
Total wall time: 2.053868 s + Automatic threshold ^^^^^^^^^^^^^^^^^^^ -Instead of manually fixing the maximum RMSD, we can run the ``-ss`` flag to determine the threshold as the value that maximizes the silhouette coefficient. The coefficient varies between -1 and 1, such that higher values indicate a better clustering procedure. Further details can be found `here `_. +Instead of manually fixing the maximum RMSD, we can run the ``-ss`` flag to +determine the threshold as the value that maximizes the silhouette coefficient. +The coefficient varies between -1 and 1, such that higher values indicate a better +clustering procedure. Further details can be found `here `_. .. code-block:: console - python -m clusttraj h2o_traj.xyz -ss -i distmat.npy -p + $ python -m clusttraj h2o_traj.xyz -ss -i distmat.npy -p -Since we already computed the distance matrix, we can provide it as input using the ``-i`` flag. Additionally, the ``-p`` flag generates 3 new output files for visualization. +Since we already computed the distance matrix, we can provide it as +input using the ``-i`` flag. Additionally, the ``-p`` flag generates +3 new output files for visualization. -- ``clusters.pdf`` plots the multidimensional scaling (MDS) of the distance matrix. +- ``clusters_mds.pdf`` plots the multidimensional scaling (MDS) of the distance matrix. .. image:: images/average_full_mds.pdf + :align: center :width: 300pt - ``clusters_dendrogram.pdf`` plots the hierarchical clustering dendrogram. .. image:: images/average_full_dend.pdf - :width: 300pt + :align: center + :width: 500pt - ``clusters_evo.pdf`` plots the evolution of cluster populations during the simulation. .. image:: images/average_full_evo.pdf - :width: 300pt + :align: center + :width: 500pt -The highest silhouette score is printed in the ``clusttraj.log`` file, along with the corresponding RMSD threshold: +The highest silhouette score is printed in the ``clusttraj.log`` file, along +with the corresponding RMSD threshold: .. code-block:: console - ╰─○ cat clusttraj.log - 2023-09-30 17:04:14,908 INFO [distmat.py:28] Reading condensed distance matrix from distmat.npy + $ cat clusttraj.log + 2024-12-10 20:06:50,323 INFO [distmat.py:28] Reading condensed distance matrix from distmat.npy - 2023-09-30 17:04:14,916 INFO [classify.py:27] Clustering using 'average' method to join the clusters + 2024-12-10 20:06:50,324 INFO [classify.py:27] Clustering using 'average' method to join the clusters - 2023-09-30 17:04:15,064 INFO [classify.py:61] Highest silhouette score: 0.21741836027295444 + 2024-12-10 20:06:50,338 INFO [classify.py:61] Highest silhouette score: 0.21741836027295453 - 2023-09-30 17:04:15,065 INFO [classify.py:65] The following RMSD threshold values yielded the same optimial silhouette score: 2.160840752745414, 2.2608407527454135 + 2024-12-10 20:06:50,338 INFO [classify.py:65] The following RMSD threshold values yielded the same optimial silhouette score: 2.160840752745414, 2.2608407527454135 - 2023-09-30 17:04:15,065 INFO [classify.py:68] The smallest RMSD of 2.160840752745414 has been adopted + 2024-12-10 20:06:50,338 INFO [classify.py:68] The smallest RMSD of 2.160840752745414 has been adopted - 2023-09-30 17:04:15,065 INFO [classify.py:76] Saving clustering classification to clusters.dat + 2024-12-10 20:06:50,338 INFO [classify.py:76] Saving clustering classification to clusters.dat - 2023-09-30 17:04:21,562 INFO [main.py:75]
A total 100 snapshots were read and 2 cluster(s) was(were) found. + 2024-12-10 20:06:52,172 INFO [main.py:102]
A total 100 snapshots were read and 2 cluster(s) was(were) found. The cluster sizes are: Cluster Size 1 44 - 2 56 + 2 56 -To determine the optimal threshold the silhouette coefficient is computed for all values in in the `linkage matrix `_ with the default step of 0.1. In this case more than one value yields the same optimal threshold (2.16 and 2.26), and the smallest one is adopted to enhance the within cluster similarity. + 2024-12-10 20:06:52,172 INFO [main.py:126]
Total wall time: 1.850199 s + +To determine the optimal threshold the silhouette coefficient is computed for +all values in in the `linkage matrix `_ +with the default step of 0.1. In this case more than one value yields the +same optimal threshold (2.16 and 2.26), and the smallest one is adopted to +enhance the within cluster similarity. Working with distance methods ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -To investigate the performance of different cluster distance methods we can use the ``-m`` flag. +To investigate the performance of different cluster distance methods we can use +the ``-m`` flag. Ward ++++ @@ -159,33 +185,41 @@ In the case of following the Ward variance minimization algorithm: .. code-block:: console - python -m clusttraj -ss -i distmat.npy -p -m ward -f + $ python -m clusttraj h2o_traj.xyz -ss -i distmat.npy -p -m ward -f -In this approach the ``ward`` method is adopted instead of the default ``average`` method. The ``-f`` flag is also included to force the output overwrite with the new data. From the log file: +In this approach the ``ward`` method is adopted instead of the default ``average`` +method. The ``-f`` flag is also included to force the output overwrite with the new data. +From the log file: .. code-block:: console - ╰─○ cat clusttraj.log - 2023-09-30 18:07:18,729 INFO [distmat.py:28] Reading condensed distance matrix from distmat.npy + $ tail -n 22 clusttraj.log + 2024-12-10 20:23:31,014 INFO [main.py:126]
Total wall time: 2.051024 s + + 2024-12-10 20:24:58,651 INFO [distmat.py:28] Reading condensed distance matrix from distmat.npy - 2023-09-30 18:07:18,730 INFO [classify.py:27] Clustering using 'ward' method to join the clusters + 2024-12-10 20:24:58,652 INFO [classify.py:27] Clustering using 'ward' method to join the clusters - 2023-09-30 18:07:18,943 INFO [classify.py:61] Highest silhouette score: 0.23037242401157293 + 2024-12-10 20:24:58,712 INFO [classify.py:61] Highest silhouette score: 0.23037242401157287 - 2023-09-30 18:07:18,943 INFO [classify.py:65] The following RMSD threshold values yielded the same optimial silhouette score: 6.060840752745413, 6.160840752745413, 6.260840752745413, 6.360840752745413, 6.460840752745413, 6.5608407527454125, 6.660840752745413, 6.760840752745413, 6.860840752745412, 6.960840752745413, 7.0608407527454125, 7.160840752745413, 7.260840752745413, 7.360840752745412, 7.460840752745413, 7.5608407527454125, 7.660840752745413, 7.760840752745413, 7.860840752745412, 7.960840752745413, 8.060840752745412, 8.160840752745411, 8.260840752745413, 8.360840752745412, 8.460840752745412, 8.560840752745412, 8.660840752745411, 8.760840752745413, 8.860840752745412, 8.960840752745412, 9.060840752745412, 9.160840752745413, 9.260840752745413, 9.360840752745412, 9.460840752745412, 9.560840752745412, 9.660840752745411, 9.760840752745413, 9.860840752745412, 9.960840752745412, 10.060840752745412, 10.160840752745411, 10.260840752745413, 10.360840752745412, 10.460840752745412, 10.560840752745412, 10.660840752745411, 10.760840752745413 + 2024-12-10 20:24:58,712 INFO [classify.py:65] The following RMSD threshold values yielded the same optimial silhouette score: 6.0608407527454125, 6.160840752745413, 6.260840752745413, 6.360840752745412, 6.460840752745413, 6.5608407527454125, 6.660840752745413, 6.760840752745413, 6.860840752745412, 6.960840752745413, 7.0608407527454125, 7.160840752745413, 7.260840752745413, 7.360840752745412, 7.460840752745413, 7.5608407527454125, 7.660840752745412, 7.760840752745413, 7.860840752745412, 7.960840752745412, 8.060840752745412, 8.160840752745413, 8.260840752745413, 8.360840752745412, 8.460840752745412, 8.560840752745412, 8.660840752745413, 8.760840752745413, 8.860840752745412, 8.960840752745412, 9.060840752745412, 9.160840752745411, 9.260840752745413, 9.360840752745412, 9.460840752745412, 9.560840752745412, 9.660840752745411, 9.760840752745413, 9.860840752745412, 9.960840752745412, 10.060840752745412, 10.160840752745411, 10.260840752745413, 10.360840752745412, 10.460840752745412, 10.560840752745412, 10.660840752745411, 10.760840752745413 - 2023-09-30 18:07:18,943 INFO [classify.py:68] The smallest RMSD of 6.060840752745413 has been adopted + 2024-12-10 20:24:58,712 INFO [classify.py:68] The smallest RMSD of 6.0608407527454125 has been adopted - 2023-09-30 18:07:18,943 INFO [classify.py:76] Saving clustering classification to clusters.dat + 2024-12-10 20:24:58,712 INFO [classify.py:76] Saving clustering classification to clusters.dat - 2023-09-30 18:07:25,197 INFO [main.py:75]
A total 100 snapshots were read and 2 cluster(s) was(were) found. + 2024-12-10 20:25:00,661 INFO [main.py:102]
A total 100 snapshots were read and 2 cluster(s) was(were) found. The cluster sizes are: Cluster Size 1 46 2 54 + 2024-12-10 20:25:00,661 INFO [main.py:126]
Total wall time: 2.011472 s -The ``ward`` method also resulted in two clusters with similar populations (46/54 vs 44/56) and with higher silhouette coefficient (0.230 vs 0.217). On the other hand, the smallest RMSD threshold is 6.06, indicating higher deviation between the geometries in each cluster. +The ``ward`` method also resulted in two clusters with similar populations +(46/54 vs 44/56) and with higher silhouette coefficient (0.230 vs 0.217). +On the other hand, the smallest RMSD threshold is 6.06, indicating higher deviation +between the geometries in each cluster. Median ++++++ @@ -194,32 +228,35 @@ To adopt the ``median`` method we can run: .. code-block:: console - python -m clusttraj h2o_traj.xyz -ss -i distmat.npy -p -m median -f - - ╰─○ cat clusttraj.log - 2023-09-30 18:23:54,842 INFO [distmat.py:28] Reading condensed distance matrix from distmat.npy + $ python -m clusttraj h2o_traj.xyz -ss -i distmat.npy -p -m median -f + 2024-12-10 20:27:55,765 INFO [distmat.py:28] Reading condensed distance matrix from distmat.npy - 2023-09-30 18:23:54,843 INFO [classify.py:27] Clustering using 'median' method to join the clusters + 2024-12-10 20:27:55,766 INFO [classify.py:27] Clustering using 'median' method to join the clusters - 2023-09-30 18:23:54,870 INFO [classify.py:61] Highest silhouette score: 0.07527635729544939 + 2024-12-10 20:27:55,775 INFO [classify.py:61] Highest silhouette score: 0.07527635729544939 - 2023-09-30 18:23:54,870 INFO [classify.py:65] The following RMSD threshold values yielded the same optimial silhouette score: 1.8608407527454136, 1.9608407527454137, 2.060840752745414 + 2024-12-10 20:27:55,775 INFO [classify.py:65] The following RMSD threshold values yielded the same optimial silhouette score: 1.8608407527454136, 1.9608407527454137, 2.060840752745414 - 2023-09-30 18:23:54,870 INFO [classify.py:68] The smallest RMSD of 1.8608407527454136 has been adopted + 2024-12-10 20:27:55,775 INFO [classify.py:68] The smallest RMSD of 1.8608407527454136 has been adopted - 2023-09-30 18:23:54,870 INFO [classify.py:76] Saving clustering classification to clusters.dat + 2024-12-10 20:27:55,775 INFO [classify.py:76] Saving clustering classification to clusters.dat - 2023-09-30 18:24:00,293 INFO [main.py:75]
A total 100 snapshots were read and 2 cluster(s) was(were) found. + 2024-12-10 20:27:58,152 INFO [main.py:102]
A total 100 snapshots were read and 2 cluster(s) was(were) found. The cluster sizes are: Cluster Size 1 99 2 1 + 2024-12-10 20:27:58,153 INFO [main.py:126]
Total wall time: 2.388923 s -In this case the highest silhouette score of 0.075 indicates that the points are located near the edge of the clusters. The distribution of population among the 2 clusters (1/99) also indicates the limitations of the method. Finally, visual inspection of the dendrogram shows anomalous behavior. +In this case the highest silhouette score of 0.075 indicates that the points are +located near the edge of the clusters. The distribution of population among the +2 clusters (1/99) also indicates the limitations of the method. Finally, visual +inspection of the dendrogram shows anomalous behavior. .. image:: images/anomalous_dend.pdf - :width: 300pt + :align: center + :width: 500pt .. .. raw:: html @@ -227,34 +264,36 @@ In this case the highest silhouette score of 0.075 indicates that the points are The reader is encouraged to verify that the addition of ``-odl`` for `optimal visualization `_ flag cannot avoid the dendrogram crossings. - -Accouting for molecule permutation +Accounting for molecule permutation ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -As an attempt to avoid separating similar configurations due to permutation of identical molecules, we can reorder the atoms using the ``-e`` flag. +As an attempt to avoid separating similar configurations due to permutation of identical +molecules, we can reorder the atoms using the ``-e`` flag. .. code-block:: console - python -m clusttraj h2o_traj.xyz -ss -p -m average -e -f + $ python -m clusttraj h2o_traj.xyz -ss -p -m average -e -f -For this system the reordering compromised the statistical quality of the clustering. The number of clusters was increased from 2 to 35 while the optimal silhouette score was reduced from 0.217 to 0.119: +For this system the reordering compromised the statistical quality of the clustering. +The number of clusters was increased from 2 to 35 while the optimal silhouette score +was reduced from 0.217 to 0.119: .. code-block:: console - ╰─○ cat clusttraj.log - 2023-10-02 19:53:20,618 INFO [distmat.py:34] Calculating distance matrix using 4 threads + $ python -m clusttraj h2o_traj.xyz -ss -p -m average -e -f + 2024-12-10 20:44:05,214 INFO [distmat.py:34] Calculating distance matrix using 4 threads - 2023-10-02 19:54:00,821 INFO [distmat.py:38] Saving condensed distance matrix to distmat.npy + 2024-12-10 20:44:07,216 INFO [distmat.py:38] Saving condensed distance matrix to distmat.npy - 2023-10-02 19:54:00,823 INFO [classify.py:27] Clustering using 'average' method to join the clusters + 2024-12-10 20:44:07,217 INFO [classify.py:27] Clustering using 'average' method to join the clusters - 2023-10-02 19:54:00,855 INFO [classify.py:61] Highest silhouette score: 0.11873407875769024 + 2024-12-10 20:44:07,229 INFO [classify.py:61] Highest silhouette score: 0.11873407875769022 - 2023-10-02 19:54:00,856 INFO [classify.py:71] Optimal RMSD threshold value: 1.237013337787396 + 2024-12-10 20:44:07,229 INFO [classify.py:71] Optimal RMSD threshold value: 1.2370133377873964 - 2023-10-02 19:54:00,856 INFO [classify.py:76] Saving clustering classification to clusters.dat + 2024-12-10 20:44:07,229 INFO [classify.py:76] Saving clustering classification to clusters.dat - 2023-10-02 19:54:06,676 INFO [main.py:75]
A total 100 snapshots were read and 35 cluster(s) was(were) found. + 2024-12-10 20:44:09,279 INFO [main.py:102]
A total 100 snapshots were read and 35 cluster(s) was(were) found. The cluster sizes are: Cluster Size 1 2 @@ -293,48 +332,230 @@ For this system the reordering compromised the statistical quality of the cluste 34 1 35 1 -This functionality is especially useful in the case of solvated systems. In our case, we can treat one water molecule as the solute and the others as solvent. For example, considering the first water molecule as the solute: - -.. code-block:: console - - python -m clusttraj h2o_traj.xyz -ss -p -m average -e -f -ns 3 + 2024-12-10 20:44:09,280 INFO [main.py:126]
Total wall time: 4.066500 s -The number of solvent atoms must be specified using the ``-ns`` flag, and as a result we managed to increase the silhouette coefficient to 0.247 with a significant change in the cluster populations: +This functionality is especially useful in the case of solvated systems. In our case, +we can treat one water molecule as the solute and the others as solvent. For example, +considering the first water molecule as the solute: .. code-block:: console - ╰─○ cat clusttraj.log - 2023-10-02 20:13:52,041 INFO [distmat.py:38] Saving condensed distance matrix to distmat.npy + $ python -m clusttraj h2o_traj.xyz -ss -p -m average -e -f -ns 3 + 2024-12-10 20:46:41,192 INFO [distmat.py:34] Calculating distance matrix using 4 threads + + 2024-12-10 20:46:43,383 INFO [distmat.py:38] Saving condensed distance matrix to distmat.npy - 2023-10-02 20:13:52,044 INFO [classify.py:27] Clustering using 'average' method to join the clusters + 2024-12-10 20:46:43,385 INFO [classify.py:27] Clustering using 'average' method to join the clusters - 2023-10-02 20:13:52,101 INFO [classify.py:61] Highest silhouette score: 0.24735123044958368 + 2024-12-10 20:46:43,407 INFO [classify.py:61] Highest silhouette score: 0.24735123044958363 - 2023-10-02 20:13:52,102 INFO [classify.py:65] The following RMSD threshold values yielded the same optimial silhouette score: 3.035586843407412, 3.135586843407412, 3.235586843407412, 3.335586843407412 + 2024-12-10 20:46:43,407 INFO [classify.py:65] The following RMSD threshold values yielded the same optimial silhouette score: 3.035586843407412, 3.135586843407412, 3.235586843407412, 3.335586843407412 - 2023-10-02 20:13:52,102 INFO [classify.py:68] The smallest RMSD of 3.035586843407412 has been adopted + 2024-12-10 20:46:43,407 INFO [classify.py:68] The smallest RMSD of 3.035586843407412 has been adopted - 2023-10-02 20:13:52,102 INFO [classify.py:76] Saving clustering classification to clusters.dat + 2024-12-10 20:46:43,407 INFO [classify.py:76] Saving clustering classification to clusters.dat - 2023-10-02 20:13:57,498 INFO [main.py:75]
A total 100 snapshots were read and 2 cluster(s) was(were) found. + 2024-12-10 20:46:45,206 INFO [main.py:102]
A total 100 snapshots were read and 2 cluster(s) was(were) found. The cluster sizes are: Cluster Size 1 3 2 97 + 2024-12-10 20:46:45,206 INFO [main.py:126]
Total wall time: 4.015671 s + +The number of solvent atoms must be specified using the ``-ns`` flag, and as a result +we managed to increase the silhouette coefficient to 0.247 with a significant change +in the cluster populations: + Final Kabsch rotation ^^^^^^^^^^^^^^^^^^^^^ -We can also add a final Kabsch rotation to minimize the RMSD after reordering the solvent atoms: +We can also add a final Kabsch rotation to minimize the RMSD after reordering the +solvent atoms: + +.. code-block:: console + + $ python -m clusttraj h2o_traj.xyz -ss -p -m average -e -f -ns 3 --final-kabsch + +For this system no significant changes were observed, as the silhouette coefficient +and cluster populations remain almost identical. + +.. _polymer-example: +Polymer solvated in aqueous mixture +*********************************** + +In this example we are going to consider a larger system with solute and solvent +molecules. From an MD simulation of a single oligomer chain comprising 10 monomers of PTQ10 +solvated in chloroform, we extracted 100 snapshots to perform the clustering procedure. The +classical simulations were performed for 50 ns at the NPT ensemble using the +`GROMACS software `_, and the trajectory is +stored in the ``olig_solv.gro`` file. Here is the first frame of the trajectory: + +.. image:: images/olig_solv.pdf + :align: center + :width: 200pt + +Since the code uses ``openbabel`` to read the +configurations, the trajectory file can be provided in any one of the `file formats supported +by the library `_. + +Standard clustering using RMSD +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +For comparison, we start by running the traditional hierarchical clustering scheme that +does not account for solvent permutation. Instead of calling the library via ``python +-m clusttraj``, as done in the :ref:`previous section `, one can directly +run the program: + +.. code-block:: console + + $ clusttraj olig_solv.gro -m average -ss -p --metrics + 2024-12-12 16:13:01,490 INFO [distmat.py:34] Calculating distance matrix using 4 threads + + 2024-12-12 16:13:43,838 INFO [distmat.py:38] Saving condensed distance matrix to distmat.npy + + 2024-12-12 16:13:43,840 INFO [classify.py:27] Clustering using 'average' method to join the clusters + + 2024-12-12 16:13:43,923 INFO [classify.py:61] Highest silhouette score: 0.13900364227503081 + + 2024-12-12 16:13:43,923 INFO [classify.py:65] The following RMSD threshold values yielded the same optimial silhouette score: 25.157769211396136, 25.257769211396134 + + 2024-12-12 16:13:43,923 INFO [classify.py:68] The smallest RMSD of 25.157769211396136 has been adopted + + 2024-12-12 16:13:43,924 INFO [classify.py:76] Saving clustering classification to clusters.dat + + 2024-12-12 16:13:46,184 INFO [main.py:102]
A total 100 snapshots were read and 2 cluster(s) was(were) found. + The cluster sizes are: + Cluster Size + 1 99 + 2 1 + + 2024-12-12 16:13:46,189 INFO [main.py:126]
Total wall time: 44.698860 s + + +For this case we obtain only two clusters with a drastic difference in population, +which is not very helpful for analysis. The addition of ``--metrics`` flag compute +computes 4 scores to quantitatively compare the models performance. + +.. code-block:: console + + $ tail clusters.out + The cluster sizes are: + Cluster Size + 1 99 + 2 1 + + Silhouette score: 0.139 + Calinski Harabsz score: 2.476 + Davies-Bouldin score: 0.619 + Cophenetic correlation coefficient: 0.908 + + +Reordering solvent molecules and the final Kabsch rotation +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +To include the molecular permutation we include the ``-e`` flag and parse the number of oligomer +atoms in the ``-ns`` flag, to be ignored during the reordering process. .. code-block:: console - python -m clusttraj h2o_traj.xyz -ss -p -m average -e -f -ns 3 --final-kabsch + $ clusttraj olig_solv.gro -m average -ss -p --metrics -e -ns 702 -f + 2024-12-12 16:09:08,619 INFO [distmat.py:34] Calculating distance matrix using 4 threads + + 2024-12-12 16:12:08,573 INFO [distmat.py:38] Saving condensed distance matrix to distmat.npy + + 2024-12-12 16:12:08,576 INFO [classify.py:27] Clustering using 'average' method to join the clusters + + 2024-12-12 16:12:08,675 INFO [classify.py:61] Highest silhouette score: 0.4420374106917728 + + 2024-12-12 16:12:08,676 INFO [classify.py:65] The following RMSD threshold values yielded the same optimial silhouette score: 11.532116467337541, 11.632116467337543, 11.73211646733754, 11.832116467337542, 11.93211646733754, 12.032116467337541, 12.13211646733754, 12.23211646733754, 12.332116467337539, 12.43211646733754, 12.532116467337538, 12.63211646733754, 12.732116467337537, 12.832116467337539, 12.932116467337536, 13.032116467337538, 13.132116467337536, 13.232116467337537, 13.332116467337535, 13.432116467337536, 13.532116467337534, 13.632116467337536, 13.732116467337534, 13.832116467337535, 13.932116467337533, 14.032116467337534, 14.132116467337532, 14.232116467337534, 14.332116467337531, 14.432116467337533, 14.53211646733753, 14.632116467337532, 14.73211646733753, 14.832116467337531, 14.93211646733753, 15.03211646733753, 15.132116467337529, 15.23211646733753, 15.332116467337528, 15.43211646733753, 15.532116467337527, 15.632116467337529, 15.732116467337526, 15.832116467337528, 15.932116467337526, 16.032116467337527, 16.132116467337525, 16.232116467337526, 16.332116467337524, 16.432116467337526, 16.532116467337524 + + 2024-12-12 16:12:08,676 INFO [classify.py:68] The smallest RMSD of 11.532116467337541 has been adopted + + 2024-12-12 16:12:08,676 INFO [classify.py:76] Saving clustering classification to clusters.dat + + 2024-12-12 16:12:11,256 INFO [main.py:102]
A total 100 snapshots were read and 2 cluster(s) was(were) found. + The cluster sizes are: + Cluster Size + 1 30 + 2 70 + + 2024-12-12 16:12:11,262 INFO [main.py:126]
Total wall time: 182.642790 s + + +.. code-block:: console + + $ tail clusters.out + The cluster sizes are: + Cluster Size + 1 30 + 2 70 + + Silhouette score: 0.442 + Calinski Harabsz score: 256.998 + Davies-Bouldin score: 0.482 + Cophenetic correlation coefficient: 0.845 + + +In addition to the wrong interpretation when not considering the permutation between +identical molecules, the metrics are generally worst. Given the summary presented below, +the three scores are significantly better when accounting for the permutation. + +.. image:: images/summary.png + :align: center + :width: 500pt + + +Even the difference in the Cophenetic correlation coefficient is small, indicating an +overall statistically better clustering approach with the reordering process. Finally, +one can perform the final Kabsch rotation by running: + +.. code-block:: console + + $ clusttraj olig_solv.gro -m average -ss -p --metrics -e -ns 702 -f --final-kabsch + 2024-12-12 16:35:25,405 INFO [distmat.py:34] Calculating distance matrix using 4 threads + + 2024-12-12 16:38:24,715 INFO [distmat.py:38] Saving condensed distance matrix to distmat.npy + + 2024-12-12 16:38:24,718 INFO [classify.py:27] Clustering using 'average' method to join the clusters + + 2024-12-12 16:38:24,798 INFO [classify.py:61] Highest silhouette score: 0.4505111680708198 + + 2024-12-12 16:38:24,798 INFO [classify.py:65] The following RMSD threshold values yielded the same optimial silhouette score: 11.504201293638701, 11.6042012936387, 11.7042012936387, 11.8042012936387, 11.9042012936387, 12.0042012936387, 12.104201293638699, 12.204201293638699, 12.304201293638698, 12.404201293638698, 12.504201293638697, 12.604201293638697, 12.704201293638697, 12.804201293638696, 12.904201293638696, 13.004201293638696, 13.104201293638695, 13.204201293638695, 13.304201293638695, 13.404201293638694, 13.504201293638694, 13.604201293638694, 13.704201293638693, 13.804201293638693, 13.904201293638693, 14.004201293638692, 14.104201293638692, 14.204201293638691, 14.304201293638691, 14.40420129363869, 14.50420129363869, 14.60420129363869, 14.70420129363869, 14.80420129363869, 14.904201293638689, 15.004201293638689, 15.104201293638688, 15.204201293638688, 15.304201293638688, 15.404201293638687, 15.504201293638687, 15.604201293638686, 15.704201293638686, 15.804201293638686, 15.904201293638685, 16.004201293638687, 16.104201293638685, 16.204201293638683, 16.304201293638684, 16.404201293638685, 16.504201293638683 + + 2024-12-12 16:38:24,798 INFO [classify.py:68] The smallest RMSD of 11.504201293638701 has been adopted + + 2024-12-12 16:38:24,798 INFO [classify.py:76] Saving clustering classification to clusters.dat + + 2024-12-12 16:38:26,979 INFO [main.py:102]
A total 100 snapshots were read and 2 cluster(s) was(were) found. + The cluster sizes are: + Cluster Size + 1 30 + 2 70 + + 2024-12-12 16:38:26,981 INFO [main.py:126]
Total wall time: 181.576293 s + + +.. code-block:: console + + $ tail clusters.out + The cluster sizes are: + Cluster Size + 1 30 + 2 70 + + Silhouette score: 0.451 + Calinski Harabsz score: 268.899 + Davies-Bouldin score: 0.463 + Cophenetic correlation coefficient: 0.845 + +In this case, we obtained the same clustering evolution but with small differences +in the coefficients that consistently better, `i.e.`, with a slight increase in silhouette and +Calinski-Harabasz scores and a decrease in the Davies-Bouldin score. + + -For this system no significant changes were observed, as the silhouette coefficient and cluster populations remain almost identical. -Removing hydrogen atoms -^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/docs/source/images/anomalous_dend.pdf b/docs/source/images/anomalous_dend.pdf index ff94386..d90e6b9 100644 Binary files a/docs/source/images/anomalous_dend.pdf and b/docs/source/images/anomalous_dend.pdf differ diff --git a/docs/source/images/average_full_dend.pdf b/docs/source/images/average_full_dend.pdf index 884ff81..dcc9a12 100644 Binary files a/docs/source/images/average_full_dend.pdf and b/docs/source/images/average_full_dend.pdf differ diff --git a/docs/source/images/average_full_evo.pdf b/docs/source/images/average_full_evo.pdf index 8221aa2..33e7966 100644 Binary files a/docs/source/images/average_full_evo.pdf and b/docs/source/images/average_full_evo.pdf differ diff --git a/docs/source/images/average_full_mds.pdf b/docs/source/images/average_full_mds.pdf index 7cfad2b..b38fdd1 100644 Binary files a/docs/source/images/average_full_mds.pdf and b/docs/source/images/average_full_mds.pdf differ diff --git a/docs/source/images/average_full_tsne.pdf b/docs/source/images/average_full_tsne.pdf new file mode 100644 index 0000000..6d54ffa Binary files /dev/null and b/docs/source/images/average_full_tsne.pdf differ diff --git a/docs/source/images/olig_solv.pdf b/docs/source/images/olig_solv.pdf new file mode 100644 index 0000000..c8f33b2 Binary files /dev/null and b/docs/source/images/olig_solv.pdf differ diff --git a/docs/source/images/summary.png b/docs/source/images/summary.png new file mode 100644 index 0000000..8135a31 Binary files /dev/null and b/docs/source/images/summary.png differ diff --git a/docs/source/index.rst b/docs/source/index.rst index 05f0354..45aefcd 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -15,7 +15,7 @@ Welcome to ClustTraj's documentation! clusttraj usage examples - + Indices and tables ================== diff --git a/imgs/example_dendrogram.png b/imgs/example_dendrogram.png index 0e900ee..f09e26c 100644 Binary files a/imgs/example_dendrogram.png and b/imgs/example_dendrogram.png differ diff --git a/imgs/example_evo.png b/imgs/example_evo.png index 7af469f..653759c 100644 Binary files a/imgs/example_evo.png and b/imgs/example_evo.png differ diff --git a/imgs/example_mds.png b/imgs/example_mds.png index e73cf21..3256a77 100644 Binary files a/imgs/example_mds.png and b/imgs/example_mds.png differ