diff --git a/CHANGELOG.md b/CHANGELOG.md
index bc717c3..f2da3d9 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,8 @@
 # Change Log
 
+## v2.1.0
+- Add class weight support
+
 ## v2.0.1
 - Fix documentation inconsistency
 
diff --git a/CITATION.cff b/CITATION.cff
index 641cadd..ab321ae 100644
--- a/CITATION.cff
+++ b/CITATION.cff
@@ -55,5 +55,5 @@ keywords:
 license: "Apache-2.0"
 message: "If you use this software, please cite it using these metadata."
 title: "mcfly: deep learning for time series"
-version: "2.0.1"
+version: "2.1.0"
 ...
diff --git a/mcfly/_version.py b/mcfly/_version.py
index 75b7c3f..e6e2e89 100644
--- a/mcfly/_version.py
+++ b/mcfly/_version.py
@@ -15,4 +15,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-__version__ = '2.0.1'
+__version__ = '2.1.0'
diff --git a/mcfly/find_architecture.py b/mcfly/find_architecture.py
index 7ba5843..228681d 100644
--- a/mcfly/find_architecture.py
+++ b/mcfly/find_architecture.py
@@ -42,7 +42,7 @@
 def train_models_on_samples(X_train, y_train, X_val, y_val, models,
                             nr_epochs=5, subset_size=100, verbose=True, outputfile=None,
                             model_path=None, early_stopping=False,
-                            batch_size=20, metric='accuracy'):
+                            batch_size=20, metric='accuracy', class_weight=None):
     """
     Given a list of compiled models, this function trains
     them all on a subset of the train data. If the given size of the subset is
@@ -76,6 +76,8 @@ def train_models_on_samples(X_train, y_train, X_val, y_val, models,
         nr of samples per batch
     metric : str
         metric to store in the history object
+    class_weight: dict, optional
+        Dictionary containing class weights (example: {0: 0.5, 1: 2.})
 
     Returns
     ----------
@@ -86,7 +88,6 @@ def train_models_on_samples(X_train, y_train, X_val, y_val, models,
     val_losses : list of floats
         validation losses of the models
     """
-    # if subset_size is smaller then X_train, this will work fine
     X_train_sub = X_train[:subset_size, :, :]
     y_train_sub = y_train[:subset_size, :]
 
@@ -112,7 +113,8 @@ def train_models_on_samples(X_train, y_train, X_val, y_val, models,
                             # see comment on subsize_set
                             validation_data=(X_val, y_val),
                             verbose=verbose,
-                            callbacks=callbacks)
+                            callbacks=callbacks,
+                            class_weight=class_weight)
         histories.append(history)
 
         val_metrics.append(_get_from_history('val_' + metric_name, history.history)[-1])
@@ -198,6 +200,7 @@ def _cast_to_primitive_type(obj):
 def find_best_architecture(X_train, y_train, X_val, y_val, verbose=True,
                            number_of_models=5, nr_epochs=5, subset_size=100,
                            outputpath=None, model_path=None, metric='accuracy',
+                           class_weight=None,
                            **kwargs):
     """
     Tries out a number of models on a subsample of the data,
@@ -230,6 +233,8 @@ def find_best_architecture(X_train, y_train, X_val, y_val, verbose=True,
         File location to store the model results
     model_path: str, optional
         Directory to save the models as HDF5 files
+    class_weight: dict, optional
+        Dictionary containing class weights (example: {0: 0.5, 1: 2.})
     metric: str, optional
         metric that is used to evaluate the model on the validation set.
         See https://keras.io/metrics/ for possible metrics
@@ -262,7 +267,8 @@ def find_best_architecture(X_train, y_train, X_val, y_val, verbose=True,
                                                                     verbose=verbose,
                                                                     outputfile=outputpath,
                                                                     model_path=model_path,
-                                                                    metric=metric)
+                                                                    metric=metric,
+                                                                    class_weight=class_weight)
     best_model_index = np.argmax(val_accuracies)
     best_model, best_params, best_model_type = models[best_model_index]
     knn_acc = kNN_accuracy(
diff --git a/tests/test_find_architecture.py b/tests/test_find_architecture.py
index 1d69117..058853f 100644
--- a/tests/test_find_architecture.py
+++ b/tests/test_find_architecture.py
@@ -2,15 +2,14 @@
 import numpy as np
 from pytest import approx, raises
 from tensorflow.keras.utils import to_categorical
+import tensorflow as tf
 import os
 import unittest
 
 from test_tools import safe_remove
 
 
-class FindArchitectureSuite(unittest.TestCase):
-    """Basic test cases."""
-
+class FindArchitectureBasicSuite(unittest.TestCase):
     def test_kNN_accuracy_1(self):
         """
         The accuracy for this single-point dataset should be 1.
@@ -79,10 +78,51 @@ def train_models_on_samples_empty(self):
                 batch_size=20, metric='accuracy')
         assert len(histories) == 0
 
+    @unittest.skip('Needs tensorflow API v2. Also, quite a slow test of 15s.')
+    def test_find_best_architecture_with_class_weights(self):
+        """Model should not ignore tiny class with huge class weight. Note that this test is non-deterministic,
+        even though a seed was set. Note2 that this test is very slow, taking up 40% of all mcfly test time."""
+        tf.random.set_seed(1234)  # Needs tensorflow API v2
+
+        X_train, y_train = _create_2_class_labeled_dataset(1, 999)  # very unbalanced
+        X_val, y_val = _create_2_class_labeled_dataset(1, 99)
+        X_test, y_test = _create_2_class_labeled_dataset(10, 10)
+        class_weight = {0: 2, 1: 0.002}
+
+        best_model, best_params, best_model_type, knn_acc = find_architecture.find_best_architecture(
+            X_train, y_train, X_val, y_val, verbose=False, subset_size=1000,
+            number_of_models=5, nr_epochs=1, model_type='CNN', class_weight=class_weight)
+
+        probabilities = best_model.predict_proba(X_test)
+        predicted = probabilities.argmax(axis=1)
+        np.testing.assert_array_equal(predicted, y_test.argmax(axis=1))
+
     def setUp(self):
         np.random.seed(1234)
 
 
+def _create_2_class_labeled_dataset(num_samples_class_a, num_samples_class_b):
+    X = _create_2_class_noisy_data(num_samples_class_a, num_samples_class_b)
+    y = _create_2_class_labels(num_samples_class_a, num_samples_class_b)
+    return X, y
+
+
+def _create_2_class_noisy_data(num_samples_class_a, num_samples_class_b):
+    num_channels = 1
+    num_time_steps = 10
+    data_class_a = np.zeros((num_samples_class_a, num_time_steps, num_channels))
+    data_class_b = np.ones((num_samples_class_b, num_time_steps, num_channels))
+    signal = np.vstack((data_class_a, data_class_b))
+    noise = 0.1 * np.random.randn(signal.shape[0], signal.shape[1], signal.shape[2])
+    return signal + noise
+
+
+def _create_2_class_labels(num_samples_class_a, num_samples_class_b):
+    labels_class_a = np.zeros(num_samples_class_a)
+    labels_class_b = np.ones(num_samples_class_b)
+    return to_categorical(np.hstack((labels_class_a, labels_class_b)))
+
+
 class MetricNamingSuite(unittest.TestCase):
     @staticmethod
     def test_get_metric_name_accuracy():
@@ -144,6 +184,7 @@ def test_accuracy_get_from_history_none_raise():
         with raises(KeyError):
             find_architecture._get_from_history('accuracy', history_history)
 
+
 class HistoryStoringSuite(unittest.TestCase):
     def test_store_train_history_as_json(self):
         """The code should produce a json file."""