diff --git a/pysegcnn/main/eval.py b/pysegcnn/main/eval.py
index 2ecfe6055aa079b876a95158e55d1aae420648a5..d97deec7e44fb5a80527901c9640b4c09761af13 100644
--- a/pysegcnn/main/eval.py
+++ b/pysegcnn/main/eval.py
@@ -1,9 +1,9 @@
 """Main script to evaluate a model.
+This is a command line script, which can be customized on the terminal.
 Steps to run a model evaluation:
-    1. Configure the model evaluation in :py:mod:`pysegcnn.main.eval_config.py`
-    2. Save :py:mod:`pysegcnn.main.eval_config.py`
     3. In a terminal, navigate to the repository's root directory
     4. Run
@@ -11,6 +11,8 @@ Steps to run a model evaluation:
         python pysegcnn/main/eval.py
+    This will print a list of options for the evaluation.
@@ -26,15 +28,59 @@ License
 # !/usr/bin/env python
 # -*- coding: utf-8 -*-
+# builtins
+import sys
 # locals
+from pysegcnn.core.cli import evaluation_parser
 from pysegcnn.core.trainer import NetworkInference
-from pysegcnn.main.eval_config import eval_config
+from pysegcnn.core.utils import search_files
+from pysegcnn.main.eval_config import trg_ds, trg_ds_split
 if __name__ == '__main__':
-    # instanciate the network inference class
-    inference = NetworkInference(**eval_config)
-    # evaluate model
-    output = inference.evaluate()
+    # define command line argument parser
+    parser = evaluation_parser()
+    # parse command line arguments
+    args = sys.argv[1:]
+    if not args:
+        parser.print_help()
+        sys.exit()
+    else:
+        args = parser.parse_args(args)
+    # check whether the input path exists
+    if args.source.exists():
+        # get the model state files
+        state_files = search_files(args.source, args.pattern)
+        # check whether to evaluate on datasets defined at training time or
+        # on explicitly defined datasets
+        ds = ds_split = {}
+        if not args.implicit:
+            ds = trg_ds
+            ds_split = trg_ds_split
+        # instanciate the network inference class
+        inference = NetworkInference(
+            state_files=state_files,
+            implicit=args.implicit,
+            domain=args.domain,
+            test=args.subset,
+            aggregate=args.aggregate,
+            ds=ds,
+            ds_split=ds_split,
+            map_labels=args.map_labels,
+            predict_scene=args.predict_scene,
+            plot_scenes=args.plot_scenes,
+            cm=args.confusion_matrix)
+        # evaluate models
+        output = inference.evaluate()
+    else:
+        print('{} does not exist.'.format(str(args.source)))
+        sys.exit()
diff --git a/pysegcnn/main/eval_config.py b/pysegcnn/main/eval_config.py
index ad6a5ada5de3d5b68d5b03ac332159e99a8288d2..da215db0120dda742a5fa721b2a8a07fd835f8c6 100644
--- a/pysegcnn/main/eval_config.py
+++ b/pysegcnn/main/eval_config.py
@@ -1,8 +1,6 @@
-"""The configuration file to train and evaluate a model.
+"""The configuration file to evaluate a model on an explicitly defined dataset.
-The configuration is handled by the configuration dictionaries.
-Modify the values to your needs, but DO NOT modify the keys.
+See pysegcnn/main/eval.py for more details.
@@ -21,25 +19,19 @@ License
 # builtins
 import pathlib
-# locals
-from pysegcnn.core.utils import search_files
-# path to this file
-HERE = pathlib.Path(__file__).resolve().parent
 # path to the datasets on the current machine
 DRIVE_PATH = pathlib.Path('C:/Eurac/Projects/CCISNOW/Datasets/')
 # DRIVE_PATH = pathlib.Path('/mnt/CEPH_PROJECTS/cci_snow/dfrisinghelli/Datasets/')  # nopep8
 # name and paths to the datasets
 DATASETS = {'Sparcs': DRIVE_PATH.joinpath('Sparcs'),
-            'Alcd': DRIVE_PATH.joinpath('Alcd/60m')
+            'Alcd': DRIVE_PATH.joinpath('Alcd')
 # name of the target dataset
 TRG_DS = 'Alcd'
-# spectral bands to use for training
+# spectral bands to use for evaluation
 BANDS = ['red', 'green', 'blue', 'nir', 'swir1', 'swir2']
 # tile size of a single sample
@@ -72,99 +64,3 @@ trg_ds_split = {
     'tvratio': 0.8,
-# the evaluation configuration
-eval_config = {
-    # -------------------------------------------------------------------------
-    # ----------------------------- Evaluation --------------------------------
-    # -------------------------------------------------------------------------
-    # these options are only used for evaluating a trained model using
-    # pysegcnn.main.eval.py
-    # the model(s) to evaluate
-    'state_files': search_files(HERE, '*.pt'),
-    # Evaluate on datasets defined at training time ---------------------------
-    # implicit=True,  models are evaluated on the training, validation
-    #                 and test datasets defined at training time
-    # implicit=False, models are evaluated on an explicitly defined dataset
-    #                 'ds'
-    'implicit': True,
-    # 'implicit': False,
-    # The options 'domain' and 'test' define on which domain (source, target)
-    # and on which set (training, validation, test) to evaluate the model.
-    # NOTE: If the specified set was not available at training time, an error
-    #       is raised.
-    # whether to evaluate the model on the labelled source domain or the
-    # (un)labelled target domain
-    # if domain='trg',  target domain
-    # if domain='src',  source domain
-    # 'domain': 'src',
-    'domain': 'trg',
-    # the subset to evaluate the model on
-    # test=False, 0 means evaluating on the validation set
-    # test=True, 1 means evaluating on the test set
-    # test=None means evaluating on the training set
-    # 'test': True,
-    'test': None,
-    # 'test': False,
-    # whether to map the model labels from the model source domain to the
-    # defined 'domain'
-    # For models trained via unsupervised domain adaptation, the classes of the
-    # source domain, i.e. the classes the model is trained with, may differ
-    # from the classes of the target domain. Setting 'map_labels'=True, means
-    # mapping the source classes to the target classes. Obviously, this is only
-    # possible if the target classes are a subset of the source classes.
-    'map_labels': False,
-    # Evaluate on an explicitly defined dataset -------------------------------
-    # OPTIONAL: If 'trg_ds' is specified and 'implicit'=False, the model is not
-    #           evaluated on the datasets defined at training time, but on the
-    #           dataset defined by 'trg_ds'.
-    # the dataset to evaluate the model on (optional)
-    'ds': trg_ds,
-    # the dataset split to use for 'ds'
-    'ds_split': trg_ds_split,
-    # Evaluation options ------------------------------------------------------
-    # whether to compute and plot the confusion matrix
-    # output path is: pysegcnn/main/_graphics/
-    # 'cm': True,
-    'cm': False,
-    # whether to predict each sample or each scene individually
-    # False: each sample is predicted individually and the scenes are not
-    #        reconstructed
-    # True: each scene is first reconstructed and then the whole scene is
-    #       predicted at once
-    # NOTE: this option works only for datasets split by split_mode="scene"
-    'predict_scene': True,
-    # whether to save plots of (input, ground truth, prediction) for each scene
-    # in the train/validation/test dataset to disk, applies if
-    # predict_scene=True
-    # output path is: pysegcnn/main/_scenes/
-    'plot_scenes': True,
-    # plot_bands defines the bands used to plot a false color composite of
-    # the input scene: red': bands[0], green': bands[1], blue': bands[2]
-    'plot_bands': ['nir', 'red', 'green'],
-    # size of the figures
-    'figsize': (16, 9),
-    # degree of constrast stretching for false color composite
-    'alpha': 5