diff --git a/src/ipa/raspberrypi/raspberrypi.cpp b/src/ipa/raspberrypi/raspberrypi.cpp
index ebe3013a..ba63480e 100644
--- a/src/ipa/raspberrypi/raspberrypi.cpp
+++ b/src/ipa/raspberrypi/raspberrypi.cpp
@@ -67,7 +67,7 @@ public:
 	IPARPi()
 		: lastMode_({}), controller_(), controllerInit_(false),
 		  frameCount_(0), checkCount_(0), mistrustCount_(0),
-		  lsTable_(nullptr)
+		  lsTable_(nullptr), firstStart_(true)
 	{
 	}
 
@@ -145,6 +145,9 @@ private:
 	/* LS table allocation passed in from the pipeline handler. */
 	FileDescriptor lsTableHandle_;
 	void *lsTable_;
+
+	/* Distinguish the first camera start from others. */
+	bool firstStart_;
 };
 
 int IPARPi::init(const IPASettings &settings)
@@ -180,6 +183,27 @@ int IPARPi::start(const IPAOperationData &data, IPAOperationData *result)
 		result->operation |= RPi::IPA_CONFIG_SENSOR;
 	}
 
+	/*
+	 * Initialise frame counts, and decide how many frames must be hidden or
+	 * "mistrusted", which depends on whether this is a startup from cold,
+	 * or merely a mode switch in a running system.
+	 */
+	frameCount_ = 0;
+	checkCount_ = 0;
+	unsigned int dropFrame = 0;
+	if (firstStart_) {
+		dropFrame = helper_->HideFramesStartup();
+		mistrustCount_ = helper_->MistrustFramesStartup();
+	} else {
+		dropFrame = helper_->HideFramesModeSwitch();
+		mistrustCount_ = helper_->MistrustFramesModeSwitch();
+	}
+
+	result->data.push_back(dropFrame);
+	result->operation |= RPi::IPA_CONFIG_DROP_FRAMES;
+
+	firstStart_ = false;
+
 	return 0;
 }
 
@@ -305,25 +329,6 @@ void IPARPi::configure(const CameraSensorInfo &sensorInfo,
 	/* Pass the camera mode to the CamHelper to setup algorithms. */
 	helper_->SetCameraMode(mode_);
 
-	/*
-	 * Initialise frame counts, and decide how many frames must be hidden or
-	 *"mistrusted", which depends on whether this is a startup from cold,
-	 * or merely a mode switch in a running system.
-	 */
-	frameCount_ = 0;
-	checkCount_ = 0;
-	unsigned int dropFrame = 0;
-	if (controllerInit_) {
-		dropFrame = helper_->HideFramesModeSwitch();
-		mistrustCount_ = helper_->MistrustFramesModeSwitch();
-	} else {
-		dropFrame = helper_->HideFramesStartup();
-		mistrustCount_ = helper_->MistrustFramesStartup();
-	}
-
-	result->data.push_back(dropFrame);
-	result->operation |= RPi::IPA_CONFIG_DROP_FRAMES;
-
 	if (!controllerInit_) {
 		/* Load the tuning file for this sensor. */
 		controller_.Read(tuningFile_.c_str());
diff --git a/src/libcamera/pipeline/raspberrypi/raspberrypi.cpp b/src/libcamera/pipeline/raspberrypi/raspberrypi.cpp
index 593fd7ac..439c21ce 100644
--- a/src/libcamera/pipeline/raspberrypi/raspberrypi.cpp
+++ b/src/libcamera/pipeline/raspberrypi/raspberrypi.cpp
@@ -745,13 +745,6 @@ int PipelineHandlerRPi::start(Camera *camera, [[maybe_unused]] ControlList *cont
 		return ret;
 	}
 
-	ret = queueAllBuffers(camera);
-	if (ret) {
-		LOG(RPI, Error) << "Failed to queue buffers";
-		stop(camera);
-		return ret;
-	}
-
 	/* Check if a ScalerCrop control was specified. */
 	if (controls)
 		data->applyScalerCrop(*controls);
@@ -779,6 +772,19 @@ int PipelineHandlerRPi::start(Camera *camera, [[maybe_unused]] ControlList *cont
 			LOG(RPI, Error) << "V4L2 staggered set failed";
 	}
 
+	if (result.operation & RPi::IPA_CONFIG_DROP_FRAMES) {
+		/* Configure the number of dropped frames required on startup. */
+		data->dropFrameCount_ = result.data[0];
+	}
+
+	/* We need to set the dropFrameCount_ before queueing buffers. */
+	ret = queueAllBuffers(camera);
+	if (ret) {
+		LOG(RPI, Error) << "Failed to queue buffers";
+		stop(camera);
+		return ret;
+	}
+
 	/*
 	 * IPA configure may have changed the sensor flips - hence the bayer
 	 * order. Get the sensor format and set the ISP input now.
@@ -1237,11 +1243,6 @@ int RPiCameraData::configureIPA(const CameraConfiguration *config)
 			LOG(RPI, Error) << "V4L2 staggered set failed";
 	}
 
-	if (result.operation & RPi::IPA_CONFIG_DROP_FRAMES) {
-		/* Configure the number of dropped frames required on startup. */
-		dropFrameCount_ = result.data[resultIdx++];
-	}
-
 	/*
 	 * Configure the H/V flip controls based on the combination of
 	 * the sensor and user transform.
