# Print the data types of telco_raw dataset
print(telco_raw.**dtypes**) ## Similar to .info()
# Print the header of telco_raw dataset
print(telco_raw.head())
# Print the number of unique values in each telco_raw column
print(telco_raw.**nunique()**)
Separate the identifier and target variable names as lists
custid = ['customerID']
target = ['Churn']
Separate categorical and numeric column names as lists
categorical = telco_raw.nunique()[telcom.nunique()<10].keys().tolist()
categorical.remove(target[0])
numerical = [col for col in telco_raw.columns if col not in custid + target + categorical]
Transforming categorical variables into a various boolean variables.
# Perform one-hot encoding to categorical variables
telco_raw = pd.get_dummies(data = telco_raw, columns = categorical, drop_first=True)
# Import StandardSclaer library
from sklearn.preprocessing import StandardScaler
# Initialize StandardScaler instance
scaler = StandardScaler()
# Fit the scaler to numerical columns
scaled_numerical = scaler.fit_trasform(telco_raw[numerical])
# Drop non-scaled numerical columns
telco_raw = telco_raw.drop(columns=numerical, axis=1)
# Merge the non-numerical with the scaled numerical data
telco = telco_raw.merge(right=scaled_numerical,
how='left',
left_index=True,
right_index=True)