Python | Replace NaN values ​​with column mean

To solve this problem, one possible method is to replace the nan values ​​with the mean of the columns. Here are several ways to solve this problem.

Method # 1: Using np.colmean and np.take

# Python code for demonstration
# replace nan values ​​
# with average columns

 

import numpy as np

 
# Initializing the numpy array

ini_array = np.array ([[ 1.3 , 2.5 , 3.6 , np.nan], 

  [ 2.6 , 3.3 , np.nan, 5.5 ],

[ 2.1 , 3.2 , 5.4 , 6.5 ]])

 
# print the original array

print ( "initial array" , ini_array)

 
number of columns

col_mean = np.nanmean (ini_array, axis = 0 )

 
# printing a column means

print ( "columns mean" , str (col_mean))

 
# find indexes where nan is present

inds = np.where (np.isnan (ini_array))

 
# replace indexes with column mean

ini_array [inds] = np.take (col_mean, inds [ 1 ])

 
# print the final array

print ( "final array" , ini_array)

Exit:

 initial array [[1.3 2.5 3.6 nan] [2.6 3.3 nan 5.5] [2.1 3.2 5.4 6.5]] columns mean [2. 3. 4.5 6.] final array [[1.3 2.5 3.6 6.] [2.6 3.3 4.5 5.5] [2.1 3.2 5.4 6.5]] 

Method # 2: Using np.ma and np.where

# Python code for demo
# replace nan values ​​
# with average columns

 

import numpy as np

 
# Initializing the numpy array

ini_array = np. array ([[ 1.3 , 2.5 , 3.6 , np.nan],

[ 2.6 , 3.3 , np.nan, 5.5 ],

[ 2.1 , 3.2 , 5.4 , 6.5 ]])

 
# print the original array

print ( "initial array" , ini_array)

 
# replace nan with col

res = np.where (np.isnan (ini_array), np.ma.array (ini_array,

mask = np.isnan (ini_array)). mean (axis = 0 ), ini_array) 

  
# print the final array

print "final array" , res)

Exit:

 initial array [[1.3 2.5 3.6 nan] [2.6 3.3 nan 5.5] [2.1 3.2 5.4 6.5]] final array [[1.3 2.5 3.6 6.] [2.6 3.3 4.5 5.5] [2.1 3.2 5.4 6.5]] 

Method # 3: Using Naive and zip

# Python code for demonstration
# replace nan values ​​
# with average number of columns

 

import numpy as np

 
# Initializing the numpy array

ini_array = np.arra y ([[ 1.3 , 2.5 , 3.6 , np.nan],

[ 2.6 , 3.3 , np.nan, 5.5 ],

[ 2.1 , 3.2 , 5.4 , 6.5 ]])

 
# print the original array

print ( "initial array" , ini_array)

 
# indexes where values this is nan in an array

indices = np .where (np.isnan (ini_array))

 
# Loop through the numpy array to replace nan with values ​​

for row, col in zip ( * indices):

  ini_array [row, col] = np.mean (ini_array [

~ np.isnan (ini_array [:, col]), col])

 
# print the final array

print ( "final array" , ini_array)

Exit:

 initial array [[1.3 2.5 3.6 nan] [2.6 3.3 nan 5.5] [2.1 3.2 5.4 6.5]] final array [[1.3 2.5 3.6 6.] [2.6 3.3 4.5 5.5] [2.1 3.2 5.4 6.5]]