diff --git a/vamb/aamb_encode.py b/vamb/aamb_encode.py
index d7ce663d..e6867a16 100644
--- a/vamb/aamb_encode.py
+++ b/vamb/aamb_encode.py
@@ -122,9 +122,6 @@ def _reparameterization(self, mu, logvar):
 
         std = torch.exp(logvar / 2)
         sampled_z = Variable(Tensor(self.rng.normal(0, 1, (mu.size(0), self.ld))))
-
-        if self.usecuda:
-            sampled_z = sampled_z.cuda()
         z = sampled_z * std + mu
 
         return z
@@ -248,7 +245,7 @@ def trainmodel(
         # Define adversarial loss for the discriminators
         adversarial_loss = torch.nn.BCELoss()
         if self.usecuda:
-            adversarial_loss.cuda()
+            adversarial_loss = adversarial_loss.cuda()
 
         #### Optimizers
         optimizer_E = torch.optim.Adam(enc_params, lr=lr)
@@ -290,16 +287,15 @@ def trainmodel(
                 )
 
                 # Sample noise as discriminator Z,Y ground truth
+                device = "cuda" if self.usecuda else "cpu"
 
                 if self.usecuda:
                     z_prior = torch.cuda.FloatTensor(nrows, self.ld).normal_()
-                    z_prior.cuda()
                     ohc = RelaxedOneHotCategorical(
-                        torch.tensor([T], device="cuda"),
-                        torch.ones([nrows, self.y_len], device="cuda"),
+                        torch.tensor([T], device=device),
+                        torch.ones([nrows, self.y_len], device=device),
                     )
                     y_prior = ohc.sample()
-                    y_prior = y_prior.cuda()
 
                 else:
                     z_prior = Variable(Tensor(self.rng.normal(0, 1, (nrows, self.ld))))
@@ -308,9 +304,8 @@ def trainmodel(
 
                 del ohc
 
-                if self.usecuda:
-                    depths_in = depths_in.cuda()
-                    tnfs_in = tnfs_in.cuda()
+                depths_in = depths_in.to(device)
+                tnfs_in = tnfs_in.to(device)
 
                 # -----------------
                 #  Train Generator
@@ -461,15 +456,15 @@ def get_latents(
         with torch.no_grad():
             for depths_in, tnfs_in, _, _ in new_data_loader:
                 nrows, _ = depths_in.shape
+                device = "cuda" if self.usecuda else "cpu"
+
                 if self.usecuda:
                     z_prior = torch.cuda.FloatTensor(nrows, self.ld).normal_()
-                    z_prior.cuda()
                     ohc = RelaxedOneHotCategorical(
-                        torch.tensor([0.15], device="cuda"),
-                        torch.ones([nrows, self.y_len], device="cuda"),
+                        torch.tensor([0.15], device=device),
+                        torch.ones([nrows, self.y_len], device=device),
                     )
                     y_prior = ohc.sample()
-                    y_prior = y_prior.cuda()
 
                 else:
                     z_prior = Variable(Tensor(self.rng.normal(0, 1, (nrows, self.ld))))
@@ -478,9 +473,8 @@ def get_latents(
                     )
                     y_prior = ohc.sample()
 
-                if self.usecuda:
-                    depths_in = depths_in.cuda()
-                    tnfs_in = tnfs_in.cuda()
+                depths_in = depths_in.to(device)
+                tnfs_in = tnfs_in.to(device)
 
                 mu, _, _, _, y_sample = self(depths_in, tnfs_in)[0:5]
 
diff --git a/vamb/cluster.py b/vamb/cluster.py
index c6bf9b84..8db575f4 100644
--- a/vamb/cluster.py
+++ b/vamb/cluster.py
@@ -224,9 +224,9 @@ def _check_params(
     def _init_histogram_kept_mask(self, N: int) -> tuple[_Tensor, _Tensor]:
         "N is number of contigs"
 
-        kept_mask = _torch.ones(N, dtype=_torch.bool)
-        if self.cuda:
-            kept_mask = kept_mask.cuda()
+        kept_mask = _torch.ones(
+            N, dtype=_torch.bool, device="cuda" if self.cuda else "cpu"
+        )
 
         histogram = _torch.empty(_ceil(_XMAX / _DELTA_X))
         return histogram, kept_mask
@@ -258,10 +258,9 @@ def __init__(
             _normalize(torch_matrix, inplace=True)
 
         # Move to GPU
-        torch_lengths = _torch.Tensor(lengths)
-        if cuda:
-            torch_matrix = torch_matrix.cuda()
-            torch_lengths = torch_lengths.cuda()
+        device = "cuda" if cuda else "cpu"
+        torch_matrix = torch_matrix.to(device)
+        torch_lengths = _torch.tensor(lengths, dtype=_torch.float32, device=device)
 
         self.maxsteps: int = maxsteps
         self.minsuccesses: int = minsuccesses
@@ -274,9 +273,7 @@ def __init__(
         self.indices = _torch.arange(len(matrix))
         self.order = _np.argsort(lengths)[::-1]
         self.order_index = 0
-        self.lengths = _torch.Tensor(lengths)
-        if cuda:
-            self.lengths = self.lengths.cuda()
+        self.lengths = torch_lengths
         self.n_emitted_clusters = 0
         self.n_remaining_points = len(torch_matrix)
         self.peak_valley_ratio = 0.1
@@ -321,7 +318,7 @@ def pack(self):
             cpu_kept_mask = self.kept_mask.cpu()
             self.matrix = _vambtools.torch_inplace_maskarray(
                 self.matrix.cpu(), cpu_kept_mask
-            ).cuda()
+            ).to("cuda")
             self.indices = self.indices[cpu_kept_mask]
 
         else:
diff --git a/vamb/encode.py b/vamb/encode.py
index 67894d33..2260d52d 100644
--- a/vamb/encode.py
+++ b/vamb/encode.py
@@ -274,10 +274,9 @@ def _encode(self, tensor: Tensor) -> Tensor:
 
     # sample with gaussian noise
     def reparameterize(self, mu: Tensor) -> Tensor:
-        epsilon = _torch.randn(mu.size(0), mu.size(1))
-
-        if self.usecuda:
-            epsilon = epsilon.cuda()
+        epsilon = _torch.randn(
+            mu.size(0), mu.size(1), device="cuda" if self.usecuda else "cpu"
+        )
 
         epsilon.requires_grad = True
 
@@ -392,11 +391,11 @@ def trainepoch(
             tnf_in.requires_grad = True
             abundance_in.requires_grad = True
 
-            if self.usecuda:
-                depths_in = depths_in.cuda()
-                tnf_in = tnf_in.cuda()
-                abundance_in = abundance_in.cuda()
-                weights = weights.cuda()
+            device = "cuda" if self.usecuda else "cpu"
+            depths_in = depths_in.to(device)
+            tnf_in = tnf_in.to(device)
+            abundance_in = abundance_in.to(device)
+            weights = weights.to(device)
 
             optimizer.zero_grad()
 
@@ -465,10 +464,10 @@ def encode(self, data_loader) -> _np.ndarray:
         with _torch.no_grad():
             for depths, tnf, ab, _ in new_data_loader:
                 # Move input to GPU if requested
-                if self.usecuda:
-                    depths = depths.cuda()
-                    tnf = tnf.cuda()
-                    ab = ab.cuda()
+                device = "cuda" if self.usecuda else "cpu"
+                depths = depths.to(device)
+                tnf = tnf.to(device)
+                ab = ab.to(device)
 
                 # Evaluate
                 _, _, _, mu = self(depths, tnf, ab)
diff --git a/vamb/semisupervised_encode.py b/vamb/semisupervised_encode.py
index e9c3c5d9..ce092b79 100644
--- a/vamb/semisupervised_encode.py
+++ b/vamb/semisupervised_encode.py
@@ -238,9 +238,7 @@ def _decode(self, tensor):
 
     def forward(self, labels):
         mu = self._encode(labels)
-        logsigma = _torch.zeros(mu.size())
-        if self.usecuda:
-            logsigma = logsigma.cuda()
+        logsigma = _torch.zeros(mu.size(), device="cuda" if self.usecuda else "cpu")
         latent = self.reparameterize(mu)
         labels_out = self._decode(latent)
         return labels_out, mu, logsigma
@@ -281,8 +279,7 @@ def trainepoch(self, data_loader, epoch, optimizer, batchsteps):
             labels_in = labels_in[0]
             labels_in.requires_grad = True
 
-            if self.usecuda:
-                labels_in = labels_in.cuda()
+            labels_in = labels_in.to("cuda" if self.usecuda else "cpu")
 
             optimizer.zero_grad()
 
@@ -343,8 +340,7 @@ def encode(self, data_loader):
             for labels in new_data_loader:
                 labels = labels[0]
                 # Move input to GPU if requested
-                if self.usecuda:
-                    labels = labels.cuda()
+                labels = labels.to("cuda" if self.usecuda else "cpu")
 
                 # Evaluate
                 out_labels, mu, logsigma = self(labels)
@@ -504,9 +500,7 @@ def _decode(self, tensor):
     def forward(self, depths, tnf, abundance, labels):
         tensor = _torch.cat((depths, tnf, abundance, labels), 1)
         mu = self._encode(tensor)
-        logsigma = _torch.zeros(mu.size())
-        if self.usecuda:
-            logsigma = logsigma.cuda()
+        logsigma = _torch.zeros(mu.size(), device="cuda" if self.usecuda else "cpu")
         latent = self.reparameterize(mu)
         depths_out, tnf_out, abundance_out, labels_out = self._decode(latent)
 
@@ -596,12 +590,12 @@ def trainepoch(self, data_loader, epoch, optimizer, batchsteps):
             abundance_in.requires_grad = True
             labels_in.requires_grad = True
 
-            if self.usecuda:
-                depths_in = depths_in.cuda()
-                tnf_in = tnf_in.cuda()
-                abundance_in = abundance_in.cuda()
-                weights = weights.cuda()
-                labels_in = labels_in.cuda()
+            device = "cuda" if self.usecuda else "cpu"
+            depths_in = depths_in.to(device)
+            tnf_in = tnf_in.to(device)
+            abundance_in = abundance_in.to(device)
+            weights = weights.to(device)
+            labels_in = labels_in.to(device)
 
             optimizer.zero_grad()
 
@@ -677,11 +671,11 @@ def encode(self, data_loader):
         with _torch.no_grad():
             for depths, tnf, ab, weights, labels in new_data_loader:
                 # Move input to GPU if requested
-                if self.usecuda:
-                    depths = depths.cuda()
-                    tnf = tnf.cuda()
-                    ab = ab.cuda()
-                    labels = labels.cuda()
+                device = "cuda" if self.usecuda else "cpu"
+                depths = depths.to(device)
+                tnf = tnf.to(device)
+                ab = ab.to(device)
+                labels = labels.to(device)
 
                 # Evaluate
                 _, _, _, _, mu, _ = self(depths, tnf, ab, labels)
@@ -882,17 +876,17 @@ def trainepoch(self, data_loader, epoch, optimizer, batchsteps):
             abundance_in_unsup.requires_grad = True
             labels_in_unsup.requires_grad = True
 
-            if self.VAEVamb.usecuda:
-                depths_in_sup = depths_in_sup.cuda()
-                tnf_in_sup = tnf_in_sup.cuda()
-                abundance_in_sup = abundance_in_sup.cuda()
-                weights_in_sup = weights_in_sup.cuda()
-                labels_in_sup = labels_in_sup.cuda()
-                depths_in_unsup = depths_in_unsup.cuda()
-                tnf_in_unsup = tnf_in_unsup.cuda()
-                abundance_in_unsup = abundance_in_unsup.cuda()
-                weights_in_unsup = weights_in_unsup.cuda()
-                labels_in_unsup = labels_in_unsup.cuda()
+            device = "cuda" if self.VAEVamb.usecuda else "cpu"
+            depths_in_sup = depths_in_sup.to(device)
+            tnf_in_sup = tnf_in_sup.to(device)
+            abundance_in_sup = abundance_in_sup.to(device)
+            weights_in_sup = weights_in_sup.to(device)
+            labels_in_sup = labels_in_sup.to(device)
+            depths_in_unsup = depths_in_unsup.to(device)
+            tnf_in_unsup = tnf_in_unsup.to(device)
+            abundance_in_unsup = abundance_in_unsup.to(device)
+            weights_in_unsup = weights_in_unsup.to(device)
+            labels_in_unsup = labels_in_unsup.to(device)
 
             optimizer.zero_grad()
 
@@ -913,15 +907,15 @@ def trainepoch(self, data_loader, epoch, optimizer, batchsteps):
                 abundance_out_unsup,
                 mu_vamb_unsup,
             ) = self.VAEVamb(depths_in_unsup, tnf_in_unsup, abundance_in_unsup)
-            logsigma_vamb_unsup = _torch.zeros(mu_vamb_unsup.size())
-            if self.usecuda:
-                logsigma_vamb_unsup = logsigma_vamb_unsup.cuda()
+            logsigma_vamb_unsup = _torch.zeros(
+                mu_vamb_unsup.size(), device="cuda" if self.VAEVamb.usecuda else "cpu"
+            )
             _, _, _, mu_vamb_sup_s = self.VAEVamb(
                 depths_in_sup, tnf_in_sup, abundance_in_sup
             )
-            logsigma_vamb_sup_s = _torch.zeros(mu_vamb_sup_s.size())
-            if self.usecuda:
-                logsigma_vamb_sup_s = logsigma_vamb_sup_s.cuda()
+            logsigma_vamb_sup_s = _torch.zeros(
+                mu_vamb_sup_s.size(), device="cuda" if self.VAEVamb.usecuda else "cpu"
+            )
             labels_out_unsup, mu_labels_unsup, logsigma_labels_unsup = self.VAELabels(
                 labels_in_unsup
             )
diff --git a/vamb/taxvamb_encode.py b/vamb/taxvamb_encode.py
index 924ca6c0..5ea9d4f8 100644
--- a/vamb/taxvamb_encode.py
+++ b/vamb/taxvamb_encode.py
@@ -900,11 +900,11 @@ def predict(self, data_loader) -> Iterable[tuple[_np.ndarray, _np.ndarray]]:
         with _torch.no_grad():
             for depths, tnf, abundances, weights in new_data_loader:
                 # Move input to GPU if requested
-                if self.usecuda:
-                    depths = depths.cuda()
-                    tnf = tnf.cuda()
-                    abundances = abundances.cuda()
-                    weights = weights.cuda()
+                device = "cuda" if self.usecuda else "cpu"
+                depths = depths.to(device)
+                tnf = tnf.to(device)
+                abundances = abundances.to(device)
+                weights = weights.to(device)
 
                 # Evaluate
                 labels = self(depths, tnf, abundances, weights)
@@ -931,12 +931,12 @@ def predict_with_ground_truth(
 
         with _torch.no_grad():
             for depths_in, tnf_in, abundances_in, weights, labels_in in new_data_loader:
-                if self.usecuda:
-                    depths_in = depths_in.cuda()
-                    tnf_in = tnf_in.cuda()
-                    abundances_in = abundances_in.cuda()
-                    weights = weights.cuda()
-                    labels_in = labels_in.cuda()
+                device = "cuda" if self.usecuda else "cpu"
+                depths_in = depths_in.to(device)
+                tnf_in = tnf_in.to(device)
+                abundances_in = abundances_in.to(device)
+                weights = weights.to(device)
+                labels_in = labels_in.to(device)
 
                 labels_out = self(depths_in, tnf_in, abundances_in, weights)
                 loss, correct_labels = self.calc_loss(labels_in, labels_out)
@@ -1004,12 +1004,12 @@ def trainepoch(
             abundances_in.requires_grad = True
             labels_in.requires_grad = True
 
-            if self.usecuda:
-                depths_in = depths_in.cuda()
-                tnf_in = tnf_in.cuda()
-                abundances_in = abundances_in.cuda()
-                weights = weights.cuda()
-                labels_in = labels_in.cuda()
+            device = "cuda" if self.usecuda else "cpu"
+            depths_in = depths_in.to(device)
+            tnf_in = tnf_in.to(device)
+            abundances_in = abundances_in.to(device)
+            weights = weights.to(device)
+            labels_in = labels_in.to(device)
 
             optimizer.zero_grad()