diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 86be92d9ad90d7fcf732a94960e02ef2ede0a5e1..017cfc5cbfa6fa67cb4eabc367a32021923def23 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -1,15 +1,18 @@
 image: $CI_REGISTRY_IMAGE:$CI_COMMIT_REF_NAME
 
 variables:
-    OTB_BUILD: /src/otb/build/OTB/build  # Local OTB build directory
-    OTBTF_SRC: /src/otbtf  # Local OTBTF source directory
-    OTB_TEST_DIR: $OTB_BUILD/Testing/Temporary  # OTB testing directory
-    ARTIFACT_TEST_DIR: $CI_PROJECT_DIR/testing
-    CRC_BOOK_TMP: /tmp/crc_book_tests_tmp
-    DOCKER_BUILDKIT: 1
-    DOCKER_DRIVER: overlay2
-    CACHE_IMAGE_BASE: $CI_REGISTRY_IMAGE:otbtf-base
-    CACHE_IMAGE_BUILDER: $CI_REGISTRY_IMAGE:builder
+  OTBTF_VERSION: 3.2.1
+  OTB_BUILD: /src/otb/build/OTB/build  # Local OTB build directory
+  OTBTF_SRC: /src/otbtf  # Local OTBTF source directory
+  OTB_TEST_DIR: $OTB_BUILD/Testing/Temporary  # OTB testing directory
+  ARTIFACT_TEST_DIR: $CI_PROJECT_DIR/testing
+  CRC_BOOK_TMP: /tmp/crc_book_tests_tmp
+  DOCKER_BUILDKIT: 1
+  DOCKER_DRIVER: overlay2
+  CACHE_IMAGE_BASE: $CI_REGISTRY_IMAGE:otbtf-base
+  CACHE_IMAGE_BUILDER: $CI_REGISTRY_IMAGE:builder
+  BRANCH_IMAGE: $CI_REGISTRY_IMAGE:$CI_COMMIT_REF_NAME
+  DEV_IMAGE: $CI_REGISTRY_IMAGE:cpu-basic-dev-testing
     
 workflow:
   rules:
@@ -44,6 +47,7 @@ docker image:
       --build-arg BASE_IMG="ubuntu:20.04"
       --build-arg BUILDKIT_INLINE_CACHE=1
       "."
+    - docker push $CACHE_IMAGE_BASE
     - >
       docker build
       --target builder
@@ -57,24 +61,22 @@ docker image:
       --build-arg BASE_IMG="ubuntu:20.04"
       --build-arg BUILDKIT_INLINE_CACHE=1
       "."
+    - docker push $CACHE_IMAGE_BUILDER
     - >
       docker build
       --network="host"
       --cache-from $CACHE_IMAGE_BASE
       --cache-from $CACHE_IMAGE_BUILDER
-      --cache-from $CI_REGISTRY_IMAGE:$CI_COMMIT_REF_NAME
-      --cache-from $CI_REGISTRY_IMAGE:cpu-basic-dev-testing
-      --tag $CI_REGISTRY_IMAGE:$CI_COMMIT_REF_NAME
+      --cache-from $BRANCH_IMAGE
+      --cache-from $DEV_IMAGE
+      --tag $BRANCH_IMAGE
       --build-arg OTBTESTS="true"
       --build-arg KEEP_SRC_OTB="true"
       --build-arg BZL_CONFIGS=""
       --build-arg BASE_IMG="ubuntu:20.04"
       --build-arg BUILDKIT_INLINE_CACHE=1
       "."
-  after_script:
-    - docker push $CACHE_IMAGE_BASE
-    - docker push $CACHE_IMAGE_BUILDER
-    - docker push $CI_REGISTRY_IMAGE:$CI_COMMIT_REF_NAME
+    - docker push $BRANCH_IMAGE
 
 .static_analysis_base:
   stage: Static Analysis
@@ -155,24 +157,26 @@ deploy:
   image: docker/compose:latest
   services:
     - name: docker:dind
+  variables:
+    CI_REGISTRY_PUBIMG: $CI_REGISTRY_IMAGE/$OTBTF_VERSION
   before_script:
     - echo -n $CI_REGISTRY_PASSWORD | docker login -u $CI_REGISTRY_USER --password-stdin $CI_REGISTRY
   timeout: 10 hours
   script:
     - echo "Shippping!"
-    - docker pull $CI_REGISTRY_IMAGE:$CI_COMMIT_REF_NAME
-    - docker tag $CI_REGISTRY_IMAGE:$CI_COMMIT_REF_NAME $CI_REGISTRY_IMAGE:cpu-basic-dev-testing
-    - docker push $CI_REGISTRY_IMAGE:cpu-basic-dev-testing
-    - docker build --network='host' --tag $CI_REGISTRY_IMAGE:cpu-basic --build-arg BASE_IMG=ubuntu:20.04 --build-arg BZL_CONFIGS="" .  # cpu-basic
-    - docker push $CI_REGISTRY_IMAGE:cpu-basic
-    - docker build --network='host' --tag $CI_REGISTRY_IMAGE:cpu-basic-dev --build-arg BASE_IMG=ubuntu:20.04 --build-arg BZL_CONFIGS="" --build-arg KEEP_SRC_OTB=true .  # cpu-basic-dev
-    - docker push $CI_REGISTRY_IMAGE:cpu-basic-dev
-    - docker build --network='host' --tag $CI_REGISTRY_IMAGE:gpu --build-arg BASE_IMG=nvidia/cuda:11.2.2-cudnn8-devel-ubuntu20.04 .  # gpu
-    - docker push $CI_REGISTRY_IMAGE:gpu
-    - docker build --network='host' --tag $CI_REGISTRY_IMAGE:gpu-dev --build-arg BASE_IMG=nvidia/cuda:11.2.2-cudnn8-devel-ubuntu20.04 --build-arg KEEP_SRC_OTB=true .  # gpu-dev
-    - docker push $CI_REGISTRY_IMAGE:gpu-dev
-    - docker build --network='host' --tag $CI_REGISTRY_IMAGE:gpu-basic --build-arg BASE_IMG=nvidia/cuda:11.2.2-cudnn8-devel-ubuntu20.04 --build-arg BZL_CONFIGS="" .  # gpu-basic
-    - docker push $CI_REGISTRY_IMAGE:gpu-basic
-    - docker build --network='host' --tag $CI_REGISTRY_IMAGE:gpu-basic-dev --build-arg BZL_CONFIGS="" --build-arg BASE_IMG=nvidia/cuda:11.2.2-cudnn8-devel-ubuntu20.04 --build-arg KEEP_SRC_OTB=true .  # gpu-basic-dev
-    - docker push $CI_REGISTRY_IMAGE:gpu-basic-dev
+    - docker pull $BRANCH_IMAGE
+    - docker tag $BRANCH_IMAGE $DEV_IMAGE
+    - docker push $DEV_IMAGE
+    - docker build --network='host' --tag $CI_REGISTRY_PUBIMG:cpu-basic --build-arg BASE_IMG=ubuntu:20.04 --build-arg BZL_CONFIGS="" .  # cpu-basic
+    - docker push $CI_REGISTRY_PUBIMG:cpu-basic
+    - docker build --network='host' --tag $CI_REGISTRY_PUBIMG:cpu-basic-dev --build-arg BASE_IMG=ubuntu:20.04 --build-arg BZL_CONFIGS="" --build-arg KEEP_SRC_OTB=true .  # cpu-basic-dev
+    - docker push $CI_REGISTRY_PUBIMG:cpu-basic-dev
+    - docker build --network='host' --tag $CI_REGISTRY_PUBIMG:gpu --build-arg BASE_IMG=nvidia/cuda:11.2.2-cudnn8-devel-ubuntu20.04 .  # gpu
+    - docker push $CI_REGISTRY_PUBIMG:gpu
+    - docker build --network='host' --tag $CI_REGISTRY_PUBIMG:gpu-dev --build-arg BASE_IMG=nvidia/cuda:11.2.2-cudnn8-devel-ubuntu20.04 --build-arg KEEP_SRC_OTB=true .  # gpu-dev
+    - docker push $CI_REGISTRY_PUBIMG:gpu-dev
+    - docker build --network='host' --tag $CI_REGISTRY_PUBIMG:gpu-basic --build-arg BASE_IMG=nvidia/cuda:11.2.2-cudnn8-devel-ubuntu20.04 --build-arg BZL_CONFIGS="" .  # gpu-basic
+    - docker push $CI_REGISTRY_PUBIMG:gpu-basic
+    - docker build --network='host' --tag $CI_REGISTRY_PUBIMG:gpu-basic-dev --build-arg BZL_CONFIGS="" --build-arg BASE_IMG=nvidia/cuda:11.2.2-cudnn8-devel-ubuntu20.04 --build-arg KEEP_SRC_OTB=true .  # gpu-basic-dev
+    - docker push $CI_REGISTRY_PUBIMG:gpu-basic-dev
 
diff --git a/Dockerfile b/Dockerfile
index d5a644f7bcc1c16251ba27b887eca984608339fb..da634cea3fb7dbbce68b3660b9900e2d60a0d837 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -25,7 +25,8 @@ RUN if $GUI; then \
 RUN ln -s /usr/bin/python3 /usr/local/bin/python && ln -s /usr/bin/pip3 /usr/local/bin/pip
 # NumPy version is conflicting with system's gdal dep and may require venv
 ARG NUMPY_SPEC="==1.22.*"
-RUN pip install --no-cache-dir -U pip wheel mock six future tqdm deprecated "numpy$NUMPY_SPEC" \
+ARG PROTO_SPEC="==3.20.*"
+RUN pip install --no-cache-dir -U pip wheel mock six future tqdm deprecated "numpy$NUMPY_SPEC" "protobuf$PROTO_SPEC" \
  && pip install --no-cache-dir --no-deps keras_applications keras_preprocessing
 
 # ----------------------------------------------------------------------------
diff --git a/RELEASE_NOTES.txt b/RELEASE_NOTES.txt
index f9934e43937077b67096608f5aba02c1b1e6e1ee..d039a2e38bc8d53e188944d66196733102651ccf 100644
--- a/RELEASE_NOTES.txt
+++ b/RELEASE_NOTES.txt
@@ -1,3 +1,10 @@
+Version 3.2.1 (1 jun 2022)
+----------------------------------------------------------------
+* Enhance CI with automatic builds of docker images
+* Fix a bug in dataset.py (output type was always Float32)
+* Fix a bug in `otbTensorflowSampler.hxx` (see https://github.com/remicres/otbtf/issues/81)
+* Change the behavior of `_read_extract_as_np_arr` method: now  returns 3D arrays even for singleband
+
 Version 3.1 (5 apr 2022)
 ----------------------------------------------------------------
 * Add the "split" strategy in the `PatchesSelection` application. Now the application can select points for 3 independent data splits (train, valid, test).
diff --git a/include/otbTensorflowSampler.hxx b/include/otbTensorflowSampler.hxx
index 966a37969c43ffdb9b7df32ea21dc8a0c7330dd2..323cdfbe455dff56bbe6ff1e062524be9bf691bb 100644
--- a/include/otbTensorflowSampler.hxx
+++ b/include/otbTensorflowSampler.hxx
@@ -179,57 +179,60 @@ TensorflowSampler<TInputImage, TVectorData>::Update()
     if (!itVector.Get()->IsRoot() && !itVector.Get()->IsDocument() && !itVector.Get()->IsFolder())
     {
       DataNodePointer currentGeometry = itVector.Get();
-      PointType       point = currentGeometry->GetPoint();
-
-      // Get the label value
-      labelPix[0] = static_cast<InternalPixelType>(currentGeometry->GetFieldAsInt(m_Field));
-
-      bool hasBeenSampled = true;
-      for (unsigned int i = 0; i < nbInputs; i++)
+      if (currentGeometry->HasField(m_Field))
       {
-        // Get input
-        ImagePointerType inputPtr = const_cast<ImageType *>(this->GetInput(i));
+        PointType       point = currentGeometry->GetPoint();
 
-        // Try to sample the image
-        if (!tf::SampleImage<ImageType>(inputPtr, m_OutputPatchImages[i], point, count, m_PatchSizes[i]))
-        {
-          // If not, reject this sample
-          hasBeenSampled = false;
-        }
-        // If NoData is provided, check if the sampled patch contains a no-data value
-        if (m_NoDataValues.count(i) > 0 && hasBeenSampled)
+        // Get the label value
+        labelPix[0] = static_cast<InternalPixelType>(currentGeometry->GetFieldAsInt(m_Field));
+
+        bool hasBeenSampled = true;
+        for (unsigned int i = 0; i < nbInputs; i++)
         {
-          IndexType outIndex;
-          outIndex[0] = 0;
-          outIndex[1] = count * m_PatchSizes[i][1];
-          RegionType region(outIndex, m_PatchSizes[i]);
+          // Get input
+          ImagePointerType inputPtr = const_cast<ImageType *>(this->GetInput(i));
 
-          IteratorType it(m_OutputPatchImages[i], region);
-          for (it.GoToBegin(); !it.IsAtEnd(); ++it)
+          // Try to sample the image
+          if (!tf::SampleImage<ImageType>(inputPtr, m_OutputPatchImages[i], point, count, m_PatchSizes[i]))
+          {
+            // If not, reject this sample
+            hasBeenSampled = false;
+          }
+          // If NoData is provided, check if the sampled patch contains a no-data value
+          if (m_NoDataValues.count(i) > 0 && hasBeenSampled)
           {
-            PixelType pix = it.Get();
-            for (unsigned int band = 0; band < pix.Size(); band++)
-              if (pix[band] == m_NoDataValues[i])
-                hasBeenSampled = false;
+            IndexType outIndex;
+            outIndex[0] = 0;
+            outIndex[1] = count * m_PatchSizes[i][1];
+            RegionType region(outIndex, m_PatchSizes[i]);
+
+            IteratorType it(m_OutputPatchImages[i], region);
+            for (it.GoToBegin(); !it.IsAtEnd(); ++it)
+            {
+              PixelType pix = it.Get();
+              for (unsigned int band = 0; band < pix.Size(); band++)
+                if (pix[band] == m_NoDataValues[i])
+                  hasBeenSampled = false;
+            }
           }
+        } // Next input
+        if (hasBeenSampled)
+        {
+          // Fill label
+          labelIndex[1] = count;
+          m_OutputLabelImage->SetPixel(labelIndex, labelPix);
+
+          // update count
+          count++;
+        }
+        else
+        {
+          rejected++;
         }
-      } // Next input
-      if (hasBeenSampled)
-      {
-        // Fill label
-        labelIndex[1] = count;
-        m_OutputLabelImage->SetPixel(labelIndex, labelPix);
 
-        // update count
-        count++;
+        // Update progress
+        progress.CompletedPixel();
       }
-      else
-      {
-        rejected++;
-      }
-
-      // Update progress
-      progress.CompletedPixel();
     }
 
     ++itVector;