diff --git a/R/dictionary.R b/R/dictionary.R
index 57228d9d..dbb40a35 100644
--- a/R/dictionary.R
+++ b/R/dictionary.R
@@ -2,3 +2,5 @@
 
 scaled_string = "_scaled"
 adjusted_string = "_adjusted"
+
+warning_for_scaling_with_few_genes = "tidybulk says: There are < 100 features/genes that are present in all you samples. Because edgeR::calcNormFactors does not allow NAs, the scaling is performed on that limited set of features.genes. The scaling could not be accurate, it is adivasble to perform impute_missing_abundance() before scaling. It is possible to filter the imputed counts after scaling."
diff --git a/R/functions.R b/R/functions.R
index e26d2942..18a9f915 100755
--- a/R/functions.R
+++ b/R/functions.R
@@ -3174,7 +3174,8 @@ fill_NA_using_formula = function(.data,
 																 .sample = NULL,
 																 .transcript = NULL,
 																 .abundance = NULL,
-																 .abundance_scaled = NULL){
+																 .abundance_scaled = NUL,
+																 suffix = "_imputed"){
 
 	# Get column names
 	.sample = enquo(.sample)
@@ -3194,7 +3195,6 @@ fill_NA_using_formula = function(.data,
 
 
 	# Create NAs for missing sample/transcript pair
-
  .data_completed =
 		.data %>%
 
@@ -3203,9 +3203,29 @@ fill_NA_using_formula = function(.data,
  		mutate(ct_data = map(ct_data, ~ .x %>% droplevels() %>% complete(!!as.symbol(quo_name(.sample)), !!.transcript) )) %>%
  		unnest(ct_data)
 
+ # For non scaled counts create a pseudo scale based on library size, then calculate imputed and scale back
+ abundance_is_int = .data %>% slice(1) %>% pull(!!.abundance) %>% class() %>% equals("integer")
+ .data =
+   .data %>%
+   group_by(!!.sample) %>%
+   mutate(library_size__ = sum(!!.abundance)) %>%
+   ungroup() %>%
+   mutate(!!.abundance := !!.abundance / library_size__)
+
+ imputed_column = sprintf("%s%s", quo_name(.abundance), suffix )
+ imputed_column_scaled = sprintf("%s%s", quo_name(.abundance_scaled), suffix )
+
+ # Divide the dataset
  .data_OK =
  	.data %>%
- 	anti_join(.data_completed %>% filter(!!.abundance %>% is.na) %>% select( !!.transcript, col_formula) %>% distinct(), by = c(quo_name(.transcript), col_formula))
+ 	anti_join(.data_completed %>% filter(!!.abundance %>% is.na) %>% select( !!.transcript, col_formula) %>% distinct(), by = c(quo_name(.transcript), col_formula)) %>%
+
+   # Add the imputed column
+   mutate(!!as.symbol(imputed_column) := !!.abundance) %>%
+   when(
+     quo_is_symbol(.abundance_scaled) ~ .x %>%
+       mutate(!!as.symbol(imputed_column_scaled) := !!.abundance_scaled)
+   )
 
  .data_FIXED =
  .data %>%
@@ -3216,16 +3236,22 @@ fill_NA_using_formula = function(.data,
 	.data_completed %>%
 		filter(!!.abundance %>% is.na) %>%
 		select(!!.sample, !!.transcript) %>%
-		left_join(.data %>% pivot_sample(!!.sample)) %>%
-		left_join(.data %>% pivot_transcript(!!.transcript))
-	) %>%
+		left_join(.data %>% pivot_sample(!!.sample), by = quo_name(.sample)) %>%
+		left_join(.data %>% pivot_transcript(!!.transcript), by = quo_name(.transcript))
+	)
+
+ # Clean environment
+ rm(.data_completed)
+ gc()
+
+ .data_FIXED %>%
 
 	# Group by covariate
 	nest(cov_data = -c(col_formula, !!.transcript)) %>%
 	mutate(cov_data = map(cov_data, ~
 											.x %>%
 											mutate(
-												!!.abundance := ifelse(
+											  !!as.symbol(imputed_column) := ifelse(
 													!!.abundance %>% is.na,
 													median(!!.abundance, na.rm = TRUE),!!.abundance
 												)
@@ -3235,7 +3261,7 @@ fill_NA_using_formula = function(.data,
 											ifelse_pipe(
 												quo_is_symbol(.abundance_scaled),
 												~ .x %>% mutate(
-													!!.abundance_scaled := ifelse(
+												  !!as.symbol(imputed_column_scaled) := ifelse(
 														!!.abundance_scaled %>% is.na,
 														median(!!.abundance_scaled, na.rm = TRUE),!!.abundance_scaled
 													)
@@ -3250,6 +3276,11 @@ fill_NA_using_formula = function(.data,
 	.data_OK %>%
 		bind_rows(.data_FIXED) %>%
 
+	  # Scale back the pseudoscaling
+	  mutate(!!.abundance := !!.abundance * library_size__) %>%
+	  select(-library_size__) %>%
+	  when(abundance_is_int ~ mutate(., !!.abundance := as.integer(!!.abundance)), ~ (.)) %>%
+
 		# Reattach internals
 		reattach_internals(.data)
 
@@ -3290,6 +3321,9 @@ fill_NA_using_value = function(.data,
 	.feature = enquo(.transcript)
 	.value = enquo(.abundance)
 
+	# Scale based on library size
+
+
 	# Create NAs for missing element/feature pair
 	df_to_impute =
 		.data %>%
diff --git a/R/methods.R b/R/methods.R
index a9f92ee3..f0fa6588 100755
--- a/R/methods.R
+++ b/R/methods.R
@@ -3685,7 +3685,8 @@ setGeneric("impute_missing_abundance", function(.data,
 																				.formula,
 																				.sample = NULL,
 																				.transcript = NULL,
-																				.abundance = NULL)
+																				.abundance = NULL,
+																				suffix = "")
 	standardGeneric("impute_missing_abundance"))
 
 # Set internal
@@ -3693,7 +3694,8 @@ setGeneric("impute_missing_abundance", function(.data,
 															.formula,
 															.sample = NULL,
 															.transcript = NULL,
-															.abundance = NULL)
+															.abundance = NULL,
+															suffix = "")
 {
 	# Get column names
 	.sample = enquo(.sample)
@@ -3724,7 +3726,8 @@ setGeneric("impute_missing_abundance", function(.data,
 			.sample = !!.sample,
 			.transcript = !!.transcript,
 			.abundance = !!.abundance,
-			.abundance_scaled = !!.abundance_scaled) %>%
+			.abundance_scaled = !!.abundance_scaled,
+			suffix = suffix) %>%
 
 		# Reattach internals
 		reattach_internals(.data)
diff --git a/R/methods_SE.R b/R/methods_SE.R
index a1432700..96f269ab 100755
--- a/R/methods_SE.R
+++ b/R/methods_SE.R
@@ -151,8 +151,13 @@ setMethod("tidybulk", "RangedSummarizedExperiment", .tidybulk_se)
 	  when(nrow(.) == 0 ~ stop("tidybulk says: there are 0 genes that passes the filters (.abundant and/or .subset_for_scaling). Please check your filtering or your data."), ~ (.))
 
 	my_assay = assays(.data_filtered) %>% as.list() %>% .[1]
-	my_counts_filtered = my_assay[[1]]
-	library_size_filtered = my_counts_filtered %>% colSums()
+
+	# Drop genes with NAs, as edgeR::calcNormFactors does not accept them
+	my_counts_filtered = my_assay[[1]] %>% na.omit()
+	library_size_filtered = my_counts_filtered %>% colSums(na.rm  = TRUE)
+
+	# If not enough genes, warning
+	if(nrow(my_counts_filtered)<100) warning(warning_for_scaling_with_few_genes)
 
 	# Set column name for value scaled
 	value_scaled = my_assay %>% names() %>% paste0(scaled_string)
@@ -1315,7 +1320,8 @@ setMethod("keep_variable",
 		edgeR::filterByExpr(
 			min.count = minimum_counts,
 			group = string_factor_of_interest,
-			min.prop = minimum_proportion
+			min.prop = minimum_proportion,
+			lib.size = colSums(., na.rm=TRUE)
 		) %>%
 		not() %>%
 		which %>%
@@ -1956,89 +1962,82 @@ setMethod("pivot_transcript",
 
 
 .impute_missing_abundance_se = function(.data,
-																				.formula) {
+																				.formula,
+																				.sample = NULL,
+																				.transcript = NULL,
+																				.abundance  = NULL,
+																				suffix = "") {
+
+  .abundance = enquo(.abundance)
+
+  .assay_to_impute =
+    .abundance %>%
+    when(
+      quo_is_symbolic(.) ~ assays(.data)[quo_names(.abundance)],
+      ~ assays(.data)
+    )
 
 
 
+  # Split data by formula and impute
+  imputed_dataframe =
+    map2(
 
+      # Capture assay names as we need to know if scaled is in the name
+      as.list(.assay_to_impute), names(.assay_to_impute),
+      ~ {
 
-	col_formula =
-		colData(.data) %>%
-		as_tibble() %>%
-		select(parse_formula(.formula)) %>%
-		distinct() %>%
-		select_if(function(x) is.character(x) | is.logical(x) | is.factor(x)) %>%
-		colnames
+        # Pseudo-scale if not scaled
+        if(!grepl("_scaled", .y)) library_size = colSums(.x, na.rm = TRUE)
+        if(!grepl("_scaled", .y)) .x = .x / library_size
 
-	# Create NAs for missing sample/transcript pair
-	assays(.data) =
-		assays(.data) %>%
-		as.list() %>%
-		map(~{
-			.my_data =
-				.x %>%
-				as.matrix() %>%
-				as_tibble(rownames = "transcript") %>%
-				gather(sample, abundance, -transcript) %>%
-
-				# Attach annotation
-				left_join(
-					colData(.data) %>%
-						as_tibble(rownames="sample") %>%
-						select(sample, col_formula),
-					by="sample"
-				)
+        # Log
+        need_log = max(.x, na.rm=T) > 50
+        if(need_log) .x = log1p(.x)
 
-			# Data used for filtering
-			NA_data =
-				.my_data %>%
-				filter(abundance %>% is.na) %>%
-				select( transcript, col_formula) %>%
-				distinct()
-
-			# If no missing just return the same matrix
-			if(nrow(NA_data) == 0) return(.x)
-
-			.data_OK =
-				.my_data %>%
-				anti_join(NA_data, by = c("transcript", col_formula))
-
-
-			.data_FIXED =
-				.my_data %>%
-				inner_join(NA_data, by = c("transcript", col_formula)) %>%
-
-				# Group by covariate
-				nest(cov_data = -c(col_formula, transcript)) %>%
-				mutate(cov_data = map(cov_data, ~
-																.x %>%
-																mutate(abundance =
-																			 	case_when(
-																			 		is.na(abundance) ~ median(abundance, na.rm=TRUE),
-																			 		TRUE ~ abundance
-																			 	)
-																) %>%
-
-																# Through warning if group of size 1
-																when(
-																	nrow(.) %>% st(2) ~ warning("tidybulk says: According to your design matrix, u have sample groups of size < 2, so you your dataset could still be sparse."),
-																	~ (.)
-																)
-				)) %>%
-				unnest(cov_data)
-
-			.data_OK %>%
-				bind_rows(.data_FIXED) %>%
-				select(-col_formula) %>%
-				spread(sample, abundance) %>%
-				as_matrix(rownames = transcript)
-
-		})
+        # Imputation
+        .x = fill_NA_matrix_with_factor_colwise(
+          .x,
+          # I split according to the formula
+          colData(.data)[,parse_formula(.formula)]
+        )
 
-	.data
+        # Exp back
+        if(need_log) .x = exp(.x)-1
+
+        # Scale back if pseudoscaled
+        if(!grepl("_scaled", .y)) .x = .x * library_size
+
+        # Return
+        .x
+      }
+    ) %>%
+
+    # Add imputed to the name
+    setNames(sprintf("%s%s", names(.assay_to_impute), suffix))
+
+  .assays_name_to_port = names(assays(.data)) %>% setdiff(names(.assay_to_impute))
+
+  assays(.data) =
+    as.list(assays(.data))[.assays_name_to_port] %>%
+    c(imputed_dataframe ) %>%
+
+    # Add .imputed column
+    c(list(.imputed =  which_NA_matrix(.assay_to_impute[[1]] ))) %>%
+
+    # Make names unique
+    setNames(names(.) %>% make.unique())
+
+
+  .data %>%
+
+    # Reattach internals
+    reattach_internals(.data)
 
 }
 
+
+
 #' impute_missing_abundance
 #' @inheritParams impute_missing_abundance
 #'
diff --git a/R/utilities.R b/R/utilities.R
index 5c1f956d..4ce61b46 100755
--- a/R/utilities.R
+++ b/R/utilities.R
@@ -257,7 +257,7 @@ scale_design = function(df, .formula) {
 }
 
 get_tt_columns = function(.data){
-  if(.data %>% attr("internals") %>% is.list())
+  if(.data %>% attr("internals") %>% is.list() ) #& "internals" %in% (.data %>% attr("internals") %>% names()))
     .data %>% attr("internals") %$% tt_columns
   else NULL
 }
@@ -1215,6 +1215,15 @@ add_scaled_counts_bulk.calcNormFactor <- function(.data,
 		droplevels() %>%
 		select(!!.sample, !!.transcript, !!.abundance)
 
+	df.filt.spread =
+	  df.filt %>%
+	  tidyr::spread(!!.sample,!!.abundance) %>%
+	  tidyr::drop_na() %>%
+	  dplyr::select(-!!.transcript)
+
+	# If not enough genes, warning
+	if(nrow(df.filt.spread)<100) warning(warning_for_scaling_with_few_genes)
+
 	# scaled data set
 	nf =
 		tibble::tibble(
@@ -1223,10 +1232,7 @@ add_scaled_counts_bulk.calcNormFactor <- function(.data,
 
 			# scaled data frame
 			nf = edgeR::calcNormFactors(
-				df.filt %>%
-					tidyr::spread(!!.sample,!!.abundance) %>%
-					tidyr::drop_na() %>%
-					dplyr::select(-!!.transcript),
+			  df.filt.spread,
 				refColumn = which(reference == factor(levels(
 					df.filt %>% pull(!!.sample)
 				))),
@@ -1658,3 +1664,53 @@ filter_genes_on_condition = function(.data, .subset_for_scaling){
   .data[rownames(.data) %in% my_genes,]
 
 }
+
+
+which_NA_matrix = function(.data){
+  is_na <- which(is.na(.data), arr.ind=TRUE)
+  which_is_NA = fill_matrix_with_FALSE(.data )
+  which_is_NA[is_na] <- TRUE
+  which_is_NA
+}
+
+fill_matrix_with_FALSE = function(.data){
+  .data[,] = FALSE
+  .data
+}
+
+rowMedians = function(.data, na.rm){
+  apply(.data, 1, median, na.rm=na.rm)
+}
+
+fill_NA_matrix_with_factor_colwise = function(.data, factor){
+
+  rn = rownames(.data)
+  cn = colnames(.data)
+
+  .data %>%
+    t %>%
+    split.data.frame(factor) %>%
+    map(~ t(.x)) %>%
+
+    # Fill
+    map(
+      ~ {
+        k <- which(is.na(.x), arr.ind=TRUE)
+        .x[k] <- rowMedians(.x, na.rm=TRUE)[k[,1]]
+        .x
+      }
+    ) %>%
+
+    # Add NA factors if any
+    when(
+      is.na(factor) %>% length() %>% gt(0) ~ (.) %>% c(list(.data[,is.na(factor)])),
+      ~ (.)
+    ) %>%
+
+    # Merge
+    reduce(cbind) %>%
+
+    # Reorder rows and column as it was
+    .[rn, cn]
+
+}
diff --git a/man/fill_NA_using_formula.Rd b/man/fill_NA_using_formula.Rd
index f44f40e2..4824695a 100644
--- a/man/fill_NA_using_formula.Rd
+++ b/man/fill_NA_using_formula.Rd
@@ -10,7 +10,8 @@ fill_NA_using_formula(
   .sample = NULL,
   .transcript = NULL,
   .abundance = NULL,
-  .abundance_scaled = NULL
+  .abundance_scaled = NUL,
+  suffix = "_imputed"
 )
 }
 \arguments{
diff --git a/man/impute_missing_abundance-methods.Rd b/man/impute_missing_abundance-methods.Rd
index fdb77e74..20ef0c04 100644
--- a/man/impute_missing_abundance-methods.Rd
+++ b/man/impute_missing_abundance-methods.Rd
@@ -15,7 +15,8 @@ impute_missing_abundance(
   .formula,
   .sample = NULL,
   .transcript = NULL,
-  .abundance = NULL
+  .abundance = NULL,
+  suffix = ""
 )
 
 \S4method{impute_missing_abundance}{spec_tbl_df}(
@@ -23,7 +24,8 @@ impute_missing_abundance(
   .formula,
   .sample = NULL,
   .transcript = NULL,
-  .abundance = NULL
+  .abundance = NULL,
+  suffix = ""
 )
 
 \S4method{impute_missing_abundance}{tbl_df}(
@@ -31,7 +33,8 @@ impute_missing_abundance(
   .formula,
   .sample = NULL,
   .transcript = NULL,
-  .abundance = NULL
+  .abundance = NULL,
+  suffix = ""
 )
 
 \S4method{impute_missing_abundance}{tidybulk}(
@@ -39,12 +42,27 @@ impute_missing_abundance(
   .formula,
   .sample = NULL,
   .transcript = NULL,
-  .abundance = NULL
+  .abundance = NULL,
+  suffix = ""
 )
 
-\S4method{impute_missing_abundance}{SummarizedExperiment}(.data, .formula)
+\S4method{impute_missing_abundance}{SummarizedExperiment}(
+  .data,
+  .formula,
+  .sample = NULL,
+  .transcript = NULL,
+  .abundance = NULL,
+  suffix = ""
+)
 
-\S4method{impute_missing_abundance}{RangedSummarizedExperiment}(.data, .formula)
+\S4method{impute_missing_abundance}{RangedSummarizedExperiment}(
+  .data,
+  .formula,
+  .sample = NULL,
+  .transcript = NULL,
+  .abundance = NULL,
+  suffix = ""
+)
 }
 \arguments{
 \item{.data}{A `tbl` (with at least three columns for sample, feature and transcript abundance) or `SummarizedExperiment` (more convenient if abstracted to tibble with library(tidySummarizedExperiment))}