import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import ttest_ind

# Load the data
# Please note - I have changed the name of the original .csv file to "Marketing_Dataset.csv"
df = pd.read_csv("Marketing_Dataset.csv")

# Display the first few rows of the dataset
df.head()

# Display no. of rows & columns
df.shape

(2240, 28)

# Check data types and missing values
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2240 entries, 0 to 2239
Data columns (total 28 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   ID                   2240 non-null   int64 
 1   Year_Birth           2240 non-null   int64 
 2   Education            2240 non-null   object
 3   Marital_Status       2240 non-null   object
 4    Income              2216 non-null   object
 5   Kidhome              2240 non-null   int64 
 6   Teenhome             2240 non-null   int64 
 7   Dt_Customer          2240 non-null   object
 8   Recency              2240 non-null   int64 
 9   MntWines             2240 non-null   int64 
 10  MntFruits            2240 non-null   int64 
 11  MntMeatProducts      2240 non-null   int64 
 12  MntFishProducts      2240 non-null   int64 
 13  MntSweetProducts     2240 non-null   int64 
 14  MntGoldProds         2240 non-null   int64 
 15  NumDealsPurchases    2240 non-null   int64 
 16  NumWebPurchases      2240 non-null   int64 
 17  NumCatalogPurchases  2240 non-null   int64 
 18  NumStorePurchases    2240 non-null   int64 
 19  NumWebVisitsMonth    2240 non-null   int64 
 20  AcceptedCmp3         2240 non-null   int64 
 21  AcceptedCmp4         2240 non-null   int64 
 22  AcceptedCmp5         2240 non-null   int64 
 23  AcceptedCmp1         2240 non-null   int64 
 24  AcceptedCmp2         2240 non-null   int64 
 25  Response             2240 non-null   int64 
 26  Complain             2240 non-null   int64 
 27  Country              2240 non-null   object
dtypes: int64(23), object(5)
memory usage: 490.1+ KB

from datetime import datetime

# Convert 'Dt_Customer' to a uniform date format
df['Dt_Customer'] = pd.to_datetime(df['Dt_Customer'], errors='coerce')

# Calculate the number of days since enrollment
# Here i am taking "Today's date = 31st Dec 2023"
enrollment_date = pd.to_datetime('31-12-2023', format='%d-%m-%Y')
df['Days_Since_Enrollment'] = (enrollment_date - df['Dt_Customer']).dt.days

# Display the updated DataFrame
df.head(50)

C:\Users\sun24\AppData\Local\Temp\ipykernel_11680\1449127132.py:4: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.
  df['Dt_Customer'] = pd.to_datetime(df['Dt_Customer'], errors='coerce')

# Remove leading and trailing whitespaces from all column names
# This will rename the ' Income' column to 'Income' without the leading space
df.columns = df.columns.str.strip()

# Check for missing values in 'Income'
df[df['Income'].isnull()]

# Remove non-numeric characters from the 'Income' column
df['Income'] = df['Income'].replace('[\$,]', '', regex=True).astype(float)

# Verify the changes
df.head()

# Group by 'Education' and 'Marital_Status', then fill missing 'Income' values with the mean of each group
df['Income'] = df.groupby(['Education', 'Marital_Status'])['Income'].transform(lambda x: x.fillna(x.mean()))

# Verify if missing values are filled
df[df['Income'].isnull()]

# Check for missing values in the entire dataset
missing_values = df.isnull().sum()

# Display the count of missing values for each column
print("Missing Values in Each Column:")
print(missing_values)

# Check if there are any missing values in the entire dataset
if missing_values.sum() == 0:
    print("No missing values in the dataset.")
else:
    print("There are missing values in the dataset.")

Missing Values in Each Column:
ID                       0
Year_Birth               0
Education                0
Marital_Status           0
Income                   0
Kidhome                  0
Teenhome                 0
Dt_Customer              0
Recency                  0
MntWines                 0
MntFruits                0
MntMeatProducts          0
MntFishProducts          0
MntSweetProducts         0
MntGoldProds             0
NumDealsPurchases        0
NumWebPurchases          0
NumCatalogPurchases      0
NumStorePurchases        0
NumWebVisitsMonth        0
AcceptedCmp3             0
AcceptedCmp4             0
AcceptedCmp5             0
AcceptedCmp1             0
AcceptedCmp2             0
Response                 0
Complain                 0
Country                  0
Days_Since_Enrollment    0
dtype: int64
No missing values in the dataset.

# Create a variable for the total number of children
df['Total_Children'] = df['Kidhome'] + df['Teenhome']

# Create a variable for age (assuming the current year is 2023)
df['Age'] = 2023 - df['Year_Birth']

# Create a variable for total spending
df['Total_Spending'] = (
    df['MntWines'] + df['MntFruits'] + df['MntMeatProducts'] +
    df['MntFishProducts'] + df['MntSweetProducts'] + df['MntGoldProds']
)

# Display the updated DataFrame with new variables
df.head()

# Set up the plotting environment
sns.set(style="whitegrid")
plt.figure(figsize=(16, 4))

# Create box plot for 'Income'
plt.subplot(1, 2, 1)
sns.boxplot(y=df['Income'])
plt.title('Boxplot of Income')

# Create histogram for 'Income'
plt.subplot(1, 2, 2)
sns.histplot(df['Income'], kde=True, bins=30)
plt.title('Histogram of Income')

# Display the plots
plt.show()

# Calculate the IQR (Interquartile Range) for 'Income'
Q1 = df['Income'].quantile(0.25)
Q3 = df['Income'].quantile(0.75)
IQR = Q3 - Q1

# Define the upper and lower bounds to identify outliers
lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR

# Check for outliers
outliers = df[(df['Income'] < lower_bound) | (df['Income'] > upper_bound)]

# Display the outliers
print("Potential outliers in 'Income' column:")
print(outliers[['Income']])

Potential outliers in 'Income' column:
        Income
325   157146.0
497   160803.0
527   666666.0
731   162397.0
853   157733.0
1826  153924.0
1925  156924.0
2204  157243.0

# List of potential outlier values
outlier_values = [157146.0, 160803.0, 666666.0, 162397.0, 157733.0, 153924.0, 156924.0, 157243.0]

# Remove rows with potential outliers in 'Income'
df = df[~df['Income'].isin(outlier_values)]

# Verify that the rows have been removed
df[df['Income'].isin(outlier_values)]

# Set up the plotting environment
sns.set(style="whitegrid")
plt.figure(figsize=(16, 4))

# Create box plot for 'Income'
plt.subplot(1, 2, 1)
sns.boxplot(y=df['Income'])
plt.title('Boxplot of Income')

# Create histogram for 'Income'
plt.subplot(1, 2, 2)
sns.histplot(df['Income'], kde=True, bins=30)
plt.title('Histogram of Income')

# Display the plots
plt.show()

from sklearn.preprocessing import OrdinalEncoder

# Create an OrdinalEncoder instance
ordinal_encoder = OrdinalEncoder()

# Define the columns to be ordinal encoded
ordinal_column = ['Education']

# Unique values in 'Education' column
unique_education = df['Education'].unique()
print("Unique values in 'Education':", unique_education)

Unique values in 'Education': ['Graduation' 'PhD' '2n Cycle' 'Master' 'Basic']

# Reverse the order for ordinal encoding
education_order = ['PhD', 'Master', 'Graduation', '2n Cycle', 'Basic']
education_order.reverse()

# Create a copy of the DataFrame
df_encoded = df.copy()

# Create an OrdinalEncoder instance with the reversed order
ordinal_encoder_education = OrdinalEncoder(categories=[education_order])

# Apply ordinal encoding to the new 'Education Encoded' column
df_encoded['Education Encoded'] = ordinal_encoder_education.fit_transform(df_encoded[['Education']])

# Display the DataFrame with the new column
df_encoded.head(50)

df_encoded[50:100]

# Unique values in 'Marital_Status' column
unique_marital_status = df_encoded['Marital_Status'].unique()
print("Unique values in 'Marital_Status':", unique_marital_status)

Unique values in 'Marital_Status': ['Divorced' 'Single' 'Married' 'Together' 'Widow' 'YOLO' 'Alone' 'Absurd']

# Performing one-hot encoding on 'Marital_Status' column
df_encoded = pd.get_dummies(df_encoded, columns=['Marital_Status'], prefix='Marital')

# Display the updated DataFrame
df_encoded.head()

# Printing list of all columns in the dataset
print(df_encoded.columns)

Index(['ID', 'Year_Birth', 'Education', 'Income', 'Kidhome', 'Teenhome',
       'Dt_Customer', 'Recency', 'MntWines', 'MntFruits', 'MntMeatProducts',
       'MntFishProducts', 'MntSweetProducts', 'MntGoldProds',
       'NumDealsPurchases', 'NumWebPurchases', 'NumCatalogPurchases',
       'NumStorePurchases', 'NumWebVisitsMonth', 'AcceptedCmp3',
       'AcceptedCmp4', 'AcceptedCmp5', 'AcceptedCmp1', 'AcceptedCmp2',
       'Response', 'Complain', 'Country', 'Days_Since_Enrollment',
       'Total_Children', 'Age', 'Total_Spending', 'Education Encoded',
       'Marital_Absurd', 'Marital_Alone', 'Marital_Divorced',
       'Marital_Married', 'Marital_Single', 'Marital_Together',
       'Marital_Widow', 'Marital_YOLO'],
      dtype='object')

# List of columns to be dropped
columns_to_drop = ['ID', 'Year_Birth', 'Dt_Customer', 'Country', 'Education']

# Drop the unnecessary columns
df_new1 = df_encoded.drop(columns=columns_to_drop)

# Display the updated DataFrame
df_new1.head()

df_new1.shape

(2232, 35)

# Calculate the correlation matrix
correlation_matrix = df_new1.corr()

# Set up the matplotlib figure
plt.figure(figsize=(20, 18))

# Create a heatmap with the correlation matrix
sns.heatmap(correlation_matrix, annot=True, cmap="coolwarm", fmt=".2f", linewidths=.5, square=True, cbar_kws={"shrink": 0.8}, annot_kws={"size": 8})

# Show the plot
plt.show()

# 'Age' is the column representing age and 'NumStorePurchases' is the column representing in-store purchases
age_young = df_encoded[df_encoded['Age'] < 40]['NumStorePurchases']
age_old = df_encoded[df_encoded['Age'] >= 40]['NumStorePurchases']

# Perform t-test
t_stat, p_value = ttest_ind(age_young, age_old, equal_var=False)

# Print the results
print(f'T-statistic: {t_stat}\nP-value: {p_value}')

# Check if the p-value is below a significance level (e.g., 0.05) to reject the null hypothesis
if p_value < 0.05:
    print("There is a significant difference in in-store purchases between younger and older people.")
    print('And therefore its true that "Older people are not as tech-savvy and probably prefer shopping in-store"')
else:
    print("There is no significant difference in in-store purchases between younger and older people.")
    print('And therefore its not true that "Older people are not as tech-savvy and probably prefer shopping in-store"')

T-statistic: -2.414029744659807
P-value: 0.01632747760333936
There is a significant difference in in-store purchases between younger and older people.
And therefore its true that "Older people are not as tech-savvy and probably prefer shopping in-store"

# 'NumWebPurchases' is the column representing the number of purchases made through the company's website
# 'Kidhome' is the column representing the presence of kids (1 if there are kids, 0 otherwise)

# Split the data into two groups: customers with kids and customers without kids
web_purchases_with_kids = df_encoded[df_encoded['Kidhome'] == 1]['NumWebPurchases']
web_purchases_without_kids = df_encoded[df_encoded['Kidhome'] == 0]['NumWebPurchases']

# Perform t-test
t_statistic, p_value = ttest_ind(web_purchases_with_kids, web_purchases_without_kids, equal_var=False)

# Print the results
print(f'T-Statistic: {t_statistic}')
print(f'P-Value: {p_value}')

# Determine the significance level (e.g., 0.05)
alpha = 0.05

# Check if the p-value is less than the significance level
if p_value < alpha:
    print("There is a significant difference in online purchases between customers with and without kids.")
    print('And therefore its true that "There is a significant difference in online purchases between customers with and without kids."')
else:
    print("There is no significant difference in online purchases between customers with and without kids.")
    print('And therefore its not true that "There is a significant difference in online purchases between customers with and without kids."')

T-Statistic: -19.408653381527323
P-Value: 2.5808735484888766e-77
There is a significant difference in online purchases between customers with and without kids.
And therefore its true that "There is a significant difference in online purchases between customers with and without kids."

# 'NumStorePurchases' is the column representing the number of purchases made directly in the store
# 'NumWebPurchases', 'NumCatalogPurchases', 'NumDealsPurchases' represent purchases through other channels

# Split the data into two groups: store purchases and purchases through other channels
store_purchases = df_encoded['NumStorePurchases']
other_channel_purchases = df_encoded['NumWebPurchases'] + df_encoded['NumCatalogPurchases'] + df_encoded['NumDealsPurchases']

# Perform t-test
t_statistic, p_value = ttest_ind(store_purchases, other_channel_purchases, equal_var=False)

# Print the results
print(f'T-Statistic: {t_statistic}')
print(f'P-Value: {p_value}')

# Determine the significance level (e.g., 0.05)
alpha = 0.05

# Check if the p-value is less than the significance level
if p_value < alpha:
    print("There is no significant difference in sales between the store and other distribution channels.")
    print('Therefore its true that "Other distribution channels may cannibalize sales at the store."')
else:
    print("There is no significant difference in sales between the store and other distribution channels.")
    print('Therefore its not true that "Other distribution channels may cannibalize sales at the store."')

T-Statistic: -24.856413539505034
P-Value: 2.6421518760038623e-126
There is no significant difference in sales between the store and other distribution channels.
Therefore its true that "Other distribution channels may cannibalize sales at the store."

# 'Country' is the column representing the country of the customer
# 'Total_Spending' is the column representing the total spending

# Split the data into two groups: US and other countries
us_spending = df_encoded[df_encoded['Country'] == 'US']['Total_Spending']
other_countries_spending = df_encoded[df_encoded['Country'] != 'US']['Total_Spending']

# Perform t-test
t_statistic, p_value = ttest_ind(us_spending, other_countries_spending, equal_var=False)

# Print the results
print(f'T-Statistic: {t_statistic}')
print(f'P-Value: {p_value}')

# Determine the significance level (e.g., 0.05)
alpha = 0.05

# Check if the p-value is less than the significance level
if p_value < alpha:
    print("There is a significant difference in total purchases between the US and other countries.")
    print('Therefore its true that "The US fare significantly better than the rest of the world in terms of total purchases"')
else:
    print("There is no significant difference in total purchases between the US and other countries.")
    print('Therefore its not true that "The US fare significantly better than the rest of the world in terms of total purchases"')

T-Statistic: 0.13566705169049106
P-Value: 0.892310891071058
There is no significant difference in total purchases between the US and other countries.
Therefore its not true that "The US fare significantly better than the rest of the world in terms of total purchases"

# Mnt* columns represent spending on different products
product_columns = ['MntWines', 'MntFruits', 'MntMeatProducts', 'MntFishProducts', 'MntSweetProducts', 'MntGoldProds']

# Calculate total spending for each product category
total_spending_per_product = df_encoded[product_columns].sum()

# Plot the bar chart
plt.figure(figsize=(10, 6))
total_spending_per_product.sort_values().plot(kind='bar', color='skyblue')
plt.title('Total Spending on Different Products')
plt.xlabel('Product Categories')
plt.ylabel('Total Spending')
plt.show()

# 'Age' is the column representing age and 'AcceptedCmp4' is the column representing the last campaign acceptance
plt.figure(figsize=(12, 8))
sns.barplot(x='Age', y='AcceptedCmp4', data=df_encoded, ci=None)
plt.title('Bar Plot: Age vs. Mean Last Campaign Acceptance Rate')
plt.xlabel('Age')
plt.ylabel('Mean Last Campaign Acceptance Rate')
plt.tight_layout()
plt.show()

C:\Users\sun24\AppData\Local\Temp\ipykernel_11680\1699840540.py:3: FutureWarning: 

The `ci` parameter is deprecated. Use `errorbar=None` for the same effect.

  sns.barplot(x='Age', y='AcceptedCmp4', data=df_encoded, ci=None)

# 'Country' is the column representing the country of the customer and 'AcceptedCmp4' is the column representing the last campaign acceptance
accepted_campaign_by_country = df_encoded[df_encoded['AcceptedCmp4'] == 1]['Country'].value_counts()

# Plot the bar chart
plt.figure(figsize=(10, 6))
accepted_campaign_by_country.plot(kind='bar', color='green')
plt.title('Number of Customers Accepted Last Campaign by Country')
plt.xlabel('Country')
plt.ylabel('Number of Customers')
plt.show()

# 'Kidhome' is the column representing the number of children at home and 'Total_Spending' is the column representing total spending
plt.figure(figsize=(10, 6))
plt.figure(figsize=(10, 6))
sns.barplot(x='Kidhome', y='Total_Spending', data=df_encoded)
plt.title('Bar Plot: Number of Children at Home vs. Total Spend')
plt.xlabel('Number of Children at Home')
plt.ylabel('Total Spend')
plt.show()

<Figure size 1000x600 with 0 Axes>

# Education' is the column representing education level and 'Complain' is the column representing whether a customer complained
complained_education = df_encoded[df_encoded['Complain'] == 1]['Education'].value_counts()

# Plot the bar chart
plt.figure(figsize=(10, 6))
complained_education.plot(kind='bar', color='orange')
plt.title('Education Background of Customers Who Complained in Last 2 Years')
plt.xlabel('Education Level')
plt.ylabel('Number of Customers')
plt.show()

# Viewing all the 5 graphs in one slide

# Set the overall figure size
plt.figure(figsize=(18, 12))

# Plot 1
plt.subplot(2, 3, 1)
product_columns = ['MntWines', 'MntFruits', 'MntMeatProducts', 'MntFishProducts', 'MntSweetProducts', 'MntGoldProds']
total_spending_per_product = df_encoded[product_columns].sum()
total_spending_per_product.sort_values().plot(kind='bar', color='skyblue')
plt.title('Total Spending on Different Products')
plt.xlabel('Product Categories')
plt.ylabel('Total Spending')

# Plot 2
plt.subplot(2, 3, 2)
sns.barplot(x='Age', y='AcceptedCmp4', data=df_encoded, ci=None)
plt.title('Bar Plot: Age vs. Mean Last Campaign Acceptance Rate')
plt.xlabel('Age')
plt.ylabel('Mean Last Campaign Acceptance Rate')

# Plot 2 (wider)
plt.subplot(1, 2, 2)
sns.barplot(x='Age', y='AcceptedCmp4', data=df_encoded, ci=None)
plt.title('Bar Plot: Age vs. Mean Last Campaign Acceptance Rate')
plt.xlabel('Age')
plt.ylabel('Mean Last Campaign Acceptance Rate')

# Plot 3
plt.subplot(2, 3, 3)
accepted_campaign_by_country = df_encoded[df_encoded['AcceptedCmp4'] == 1]['Country'].value_counts()
accepted_campaign_by_country.plot(kind='bar', color='green')
plt.title('Number of Customers Accepted Last Campaign by Country')
plt.xlabel('Country')
plt.ylabel('Number of Customers')

# Plot 4
plt.subplot(2, 3, 4)
sns.barplot(x='Kidhome', y='Total_Spending', data=df_encoded)
plt.title('Bar Plot: Number of Children at Home vs. Total Spend')
plt.xlabel('Number of Children at Home')
plt.ylabel('Total Spend')

# Plot 5
plt.subplot(2, 3, 5)
complained_education = df_encoded[df_encoded['Complain'] == 1]['Education'].value_counts()
complained_education.plot(kind='bar', color='orange')
plt.title('Education Background of Customers Who Complained in Last 2 Years')
plt.xlabel('Education Level')
plt.ylabel('Number of Customers')

# Adjust layout for better appearance
plt.tight_layout()

# Show the plots
plt.show()

C:\Users\sun24\AppData\Local\Temp\ipykernel_11680\1089939779.py:17: FutureWarning: 

The `ci` parameter is deprecated. Use `errorbar=None` for the same effect.

  sns.barplot(x='Age', y='AcceptedCmp4', data=df_encoded, ci=None)

###

	ID	Year_Birth	Education	Marital_Status	Income	Kidhome	Teenhome	Dt_Customer	Recency	MntWines	...	NumWebVisitsMonth	AcceptedCmp3	AcceptedCmp4	AcceptedCmp5	AcceptedCmp1	AcceptedCmp2	Response	Country	Days_Since_Enrollment
0	1826	1970	Graduation	Divorced	$84,835.00	0	0	2014-06-16	0	189	...	1	0	0	0	0	0	1	SP	3485
1	1	1961	Graduation	Single	$57,091.00	0	0	2014-06-15	0	464	...	5	0	0	0	0	1	1	CA	3486
2	10476	1958	Graduation	Married	$67,267.00	0	1	2014-05-13	0	134	...	2	0	0	0	0	0	0	US	3519
3	1386	1967	Graduation	Together	$32,474.00	1	1	2014-05-11	0	10	...	7	0	0	0	0	0	0	AUS	3521
4	5371	1989	Graduation	Single	$21,474.00	1	0	2014-04-08	0	6	...	7	1	0	0	0	0	1	SP	3554
5	7348	1958	PhD	Single	$71,691.00	0	0	2014-03-17	0	336	...	2	0	0	0	0	0	1	SP	3576
6	4073	1954	2n Cycle	Married	$63,564.00	0	0	2014-01-29	0	769	...	6	1	0	0	0	0	1	GER	3623
7	1991	1967	Graduation	Together	$44,931.00	0	1	2014-01-18	0	78	...	5	0	0	0	0	0	0	SP	3634
8	4047	1954	PhD	Married	$65,324.00	0	1	2014-01-11	0	384	...	4	0	0	0	0	0	0	US	3641
9	9477	1954	PhD	Married	$65,324.00	0	1	2014-01-11	0	384	...	4	0	0	0	0	0	0	IND	3641
10	2079	1947	2n Cycle	Married	$81,044.00	0	0	2013-12-27	0	450	...	1	0	0	0	0	0	0	US	3656
11	5642	1979	Master	Together	$62,499.00	1	0	2013-12-09	0	140	...	4	0	0	0	0	0	0	SP	3674
12	10530	1959	PhD	Widow	$67,786.00	0	0	2013-12-07	0	431	...	1	0	0	0	0	0	1	IND	3676
13	2964	1981	Graduation	Married	$26,872.00	0	0	2013-10-16	0	3	...	6	0	0	0	0	0	0	CA	3728
14	10311	1969	Graduation	Married	$4,428.00	0	1	2013-10-05	0	16	...	1	0	0	0	0	0	0	SP	3739
15	837	1977	Graduation	Married	$54,809.00	1	1	2013-09-11	0	63	...	4	0	0	0	0	0	0	SP	3763
16	10521	1977	Graduation	Married	$54,809.00	1	1	2013-09-11	0	63	...	4	0	0	0	0	0	1	SP	3763
17	10175	1958	PhD	Divorced	$32,173.00	0	1	2013-08-01	0	18	...	4	0	0	0	0	0	0	SP	3804
18	1473	1960	2n Cycle	Single	$47,823.00	0	1	2013-07-23	0	53	...	8	0	0	0	0	0	0	CA	3813
19	2795	1958	Master	Single	$30,523.00	2	1	2013-07-01	0	5	...	7	0	0	0	0	0	0	CA	3835
20	2285	1954	Master	Together	$36,634.00	0	1	2013-05-28	0	213	...	7	0	0	0	0	0	0	SA	3869
21	115	1966	Master	Single	$43,456.00	0	1	2013-03-26	0	275	...	5	0	0	0	0	0	0	IND	3932
22	10470	1979	Master	Married	$40,662.00	1	0	2013-03-15	0	40	...	4	0	0	0	0	0	0	GER	3943
23	4065	1976	PhD	Married	$49,544.00	1	0	2013-02-12	0	308	...	7	0	0	0	0	0	0	SP	3974
24	10968	1969	Graduation	Single	$57,731.00	0	1	2012-11-23	0	266	...	6	0	0	0	0	0	0	IND	4055
25	5985	1965	Master	Single	$33,168.00	0	1	2012-10-13	0	80	...	7	0	0	0	0	0	0	SP	4096
26	5430	1956	Graduation	Together	$54,450.00	1	1	2012-09-14	0	454	...	8	0	0	0	0	0	0	SP	4125
27	8432	1956	Graduation	Together	$54,450.00	1	1	2012-09-14	0	454	...	8	0	0	0	0	0	0	SP	4125
28	453	1956	PhD	Widow	$35,340.00	1	1	2014-06-29	1	27	...	5	0	0	0	0	0	0	SP	3472
29	9687	1975	Graduation	Single	$73,170.00	0	0	2014-05-31	1	184	...	2	0	0	0	0	0	0	CA	3501
30	8890	1971	PhD	Divorced	$65,808.00	1	1	2014-05-30	1	155	...	6	0	0	0	0	0	0	SP	3502
31	9264	1986	Graduation	Married	$79,529.00	0	0	2014-04-27	1	423	...	2	0	0	0	0	0	0	CA	3535
32	5824	1972	PhD	Together	$34,578.00	2	1	2014-04-11	1	7	...	6	0	0	0	0	0	0	AUS	3551
33	5794	1974	PhD	Married	$46,374.00	0	1	2014-03-17	1	408	...	8	0	1	0	1	0	1	IND	3576
34	3068	1990	Graduation	Married	$18,351.00	0	0	2013-10-29	1	1	...	7	0	0	0	0	0	0	SP	3715
35	7962	1987	PhD	Single	$95,169.00	0	0	2013-10-09	1	1285	...	1	0	0	1	1	0	1	SP	3735
36	2681	1984	2n Cycle	Married	$65,370.00	0	0	2013-08-01	1	71	...	1	0	0	0	0	0	0	SP	3804
37	10141	1960	Master	Divorced	$39,228.00	0	0	2013-05-10	1	7	...	4	0	0	0	0	0	0	SA	3887
38	3725	1961	PhD	Single	$84,865.00	0	0	2013-05-09	1	1248	...	4	0	1	1	1	1	1	SP	3888
39	3767	1968	Graduation	Married	$61,314.00	0	1	2013-04-25	1	378	...	3	0	0	0	0	0	0	SP	3902
40	5585	1972	Graduation	Single	$21,359.00	1	0	2013-04-20	1	12	...	8	0	0	0	0	0	1	CA	3907
41	7030	1955	PhD	Married	$66,465.00	0	1	2013-03-30	1	1200	...	6	0	0	0	1	0	0	SP	3928
42	1524	1983	2n Cycle	Single	$81,698.00	0	0	2013-03-01	1	709	...	5	0	0	0	1	0	1	SP	3957
43	3657	1986	Graduation	Single	$39,146.00	1	0	2013-02-14	1	94	...	8	0	0	0	0	0	0	SP	3972
44	5740	1970	2n Cycle	Divorced	$25,959.00	1	1	2013-02-14	1	4	...	6	0	0	0	0	0	1	SP	3972
45	9595	1961	Graduation	Together	$64,260.00	0	0	2013-01-11	1	539	...	3	0	0	0	0	0	1	SP	4006
46	3158	1973	Graduation	Married	$32,300.00	1	0	2013-01-03	1	13	...	8	0	0	0	0	0	0	SP	4014
47	5114	1965	Master	Married	$74,806.00	0	1	2012-12-19	1	670	...	4	0	0	0	0	0	0	AUS	4029
48	340	1970	Graduation	Divorced	$72,967.00	0	1	2012-12-15	1	158	...	5	1	0	0	0	0	1	GER	4033
49	8805	1960	Graduation	Single	$48,904.00	0	1	2012-12-02	1	283	...	8	0	0	0	0	0	0	US	4046

	ID	Year_Birth	Education	Marital_Status	Income	Kidhome	Teenhome	Dt_Customer	Recency	MntWines	...	NumWebVisitsMonth	AcceptedCmp4	AcceptedCmp5	AcceptedCmp1	Response	Country	Days_Since_Enrollment
134	8996	1957	PhD	Married	NaN	2	1	2012-11-19	4	230	...	9	0	0	0	0	GER	4059
262	1994	1983	Graduation	Married	NaN	1	0	2013-11-15	11	5	...	7	0	0	0	0	US	3698
394	3769	1972	PhD	Together	NaN	1	0	2014-03-02	17	25	...	7	0	0	0	0	AUS	3591
449	5255	1986	Graduation	Single	NaN	1	0	2013-02-20	19	5	...	1	0	0	0	0	AUS	3966
525	8268	1961	PhD	Married	NaN	0	1	2013-07-11	23	352	...	6	0	0	0	0	CA	3825
590	10629	1973	2n Cycle	Married	NaN	1	0	2012-09-14	25	25	...	8	0	0	0	0	GER	4125
899	10475	1970	Master	Together	NaN	0	1	2013-04-01	39	187	...	5	0	0	0	0	US	3926
997	9235	1957	Graduation	Single	NaN	1	1	2014-05-27	45	7	...	7	0	0	0	0	GER	3505
1096	4345	1964	2n Cycle	Single	NaN	1	1	2014-01-12	49	5	...	7	0	0	0	0	AUS	3640
1185	7187	1969	Master	Together	NaN	1	1	2013-05-18	52	375	...	3	0	0	0	0	AUS	3879
1213	8720	1978	2n Cycle	Together	NaN	0	0	2012-08-12	53	32	...	0	1	0	0	0	IND	4158
1312	8557	1982	Graduation	Single	NaN	1	0	2013-06-17	57	11	...	6	0	0	0	0	AUS	3849
1515	2863	1970	Graduation	Single	NaN	1	2	2013-08-23	67	738	...	7	1	0	1	0	SP	3782
1558	2437	1989	Graduation	Married	NaN	0	0	2013-06-03	69	861	...	3	1	0	1	0	SP	3863
1693	5250	1943	Master	Widow	NaN	0	0	2013-10-30	75	532	...	1	0	1	0	1	AUS	3714
1804	7281	1959	PhD	Single	NaN	0	0	2013-11-05	80	81	...	2	0	0	0	0	AUS	3708
1858	1612	1981	PhD	Single	NaN	1	0	2013-05-31	82	23	...	6	0	0	0	0	AUS	3866
1863	5079	1971	Graduation	Married	NaN	1	1	2013-03-03	82	71	...	8	0	0	0	0	AUS	3955
1880	10339	1954	Master	Together	NaN	0	1	2013-06-23	83	161	...	6	0	0	0	0	AUS	3843
1967	5798	1973	Master	Together	NaN	0	0	2013-11-23	87	445	...	1	0	0	0	0	GER	3690
1983	2902	1958	Graduation	Together	NaN	1	1	2012-09-03	87	19	...	5	0	0	0	0	AUS	4136
2139	3117	1955	Graduation	Single	NaN	0	1	2013-10-18	95	264	...	7	0	0	0	0	AUS	3726
2165	7244	1951	Graduation	Single	NaN	2	1	2014-01-01	96	48	...	6	0	0	0	0	AUS	3651
2170	1295	1963	Graduation	Married	NaN	0	1	2013-08-11	96	231	...	4	0	0	0	0	CA	3794

	ID	Year_Birth	Education	Marital_Status	Income	Kidhome	Teenhome	Dt_Customer	MntWines	...	NumWebVisitsMonth	AcceptedCmp3	AcceptedCmp2	Response	Country	Days_Since_Enrollment
0	1826	1970	Graduation	Divorced	84835.0	0	0	2014-06-16	189	...	1	0	0	1	SP	3485
1	1	1961	Graduation	Single	57091.0	0	0	2014-06-15	464	...	5	0	1	1	CA	3486
2	10476	1958	Graduation	Married	67267.0	0	1	2014-05-13	134	...	2	0	0	0	US	3519
3	1386	1967	Graduation	Together	32474.0	1	1	2014-05-11	10	...	7	0	0	0	AUS	3521
4	5371	1989	Graduation	Single	21474.0	1	0	2014-04-08	6	...	7	1	0	1	SP	3554

	ID	Year_Birth	Education	Marital_Status	Income	Kidhome	Teenhome	Dt_Customer	Recency	MntWines	...	AcceptedCmp1	AcceptedCmp2	Response	Country	Days_Since_Enrollment	Total_Children	Age	Total_Spending	Education Encoded
0	1826	1970	Graduation	Divorced	84835.0	0	0	2014-06-16	0	189	...	0	0	1	SP	3485	0	53	1190	2.0
1	1	1961	Graduation	Single	57091.0	0	0	2014-06-15	0	464	...	0	1	1	CA	3486	0	62	577	2.0
2	10476	1958	Graduation	Married	67267.0	0	1	2014-05-13	0	134	...	0	0	0	US	3519	1	65	251	2.0
3	1386	1967	Graduation	Together	32474.0	1	1	2014-05-11	0	10	...	0	0	0	AUS	3521	2	56	11	2.0
4	5371	1989	Graduation	Single	21474.0	1	0	2014-04-08	0	6	...	0	0	1	SP	3554	1	34	91	2.0
5	7348	1958	PhD	Single	71691.0	0	0	2014-03-17	0	336	...	0	0	1	SP	3576	0	65	1192	4.0
6	4073	1954	2n Cycle	Married	63564.0	0	0	2014-01-29	0	769	...	0	0	1	GER	3623	0	69	1215	1.0
7	1991	1967	Graduation	Together	44931.0	0	1	2014-01-18	0	78	...	0	0	0	SP	3634	1	56	96	2.0
8	4047	1954	PhD	Married	65324.0	0	1	2014-01-11	0	384	...	0	0	0	US	3641	1	69	544	4.0
9	9477	1954	PhD	Married	65324.0	0	1	2014-01-11	0	384	...	0	0	0	IND	3641	1	69	544	4.0
10	2079	1947	2n Cycle	Married	81044.0	0	0	2013-12-27	0	450	...	0	0	0	US	3656	0	76	1208	1.0
11	5642	1979	Master	Together	62499.0	1	0	2013-12-09	0	140	...	0	0	0	SP	3674	1	44	222	3.0
12	10530	1959	PhD	Widow	67786.0	0	0	2013-12-07	0	431	...	0	0	1	IND	3676	0	64	1156	4.0
13	2964	1981	Graduation	Married	26872.0	0	0	2013-10-16	0	3	...	0	0	0	CA	3728	0	42	72	2.0
14	10311	1969	Graduation	Married	4428.0	0	1	2013-10-05	0	16	...	0	0	0	SP	3739	1	54	359	2.0
15	837	1977	Graduation	Married	54809.0	1	1	2013-09-11	0	63	...	0	0	0	SP	3763	2	46	174	2.0
16	10521	1977	Graduation	Married	54809.0	1	1	2013-09-11	0	63	...	0	0	1	SP	3763	2	46	174	2.0
17	10175	1958	PhD	Divorced	32173.0	0	1	2013-08-01	0	18	...	0	0	0	SP	3804	1	65	22	4.0
18	1473	1960	2n Cycle	Single	47823.0	0	1	2013-07-23	0	53	...	0	0	0	CA	3813	1	63	72	1.0
19	2795	1958	Master	Single	30523.0	2	1	2013-07-01	0	5	...	0	0	0	CA	3835	3	65	13	3.0
20	2285	1954	Master	Together	36634.0	0	1	2013-05-28	0	213	...	0	0	0	SA	3869	1	69	335	3.0
21	115	1966	Master	Single	43456.0	0	1	2013-03-26	0	275	...	0	0	0	IND	3932	1	57	393	3.0
22	10470	1979	Master	Married	40662.0	1	0	2013-03-15	0	40	...	0	0	0	GER	3943	1	44	92	3.0
23	4065	1976	PhD	Married	49544.0	1	0	2013-02-12	0	308	...	0	0	0	SP	3974	1	47	404	4.0
24	10968	1969	Graduation	Single	57731.0	0	1	2012-11-23	0	266	...	0	0	0	IND	4055	1	54	704	2.0
25	5985	1965	Master	Single	33168.0	0	1	2012-10-13	0	80	...	0	0	0	SP	4096	1	58	122	3.0
26	5430	1956	Graduation	Together	54450.0	1	1	2012-09-14	0	454	...	0	0	0	SP	4125	2	67	684	2.0
27	8432	1956	Graduation	Together	54450.0	1	1	2012-09-14	0	454	...	0	0	0	SP	4125	2	67	684	2.0
28	453	1956	PhD	Widow	35340.0	1	1	2014-06-29	1	27	...	0	0	0	SP	3472	2	67	45	4.0
29	9687	1975	Graduation	Single	73170.0	0	0	2014-05-31	1	184	...	0	0	0	CA	3501	0	48	726	2.0
30	8890	1971	PhD	Divorced	65808.0	1	1	2014-05-30	1	155	...	0	0	0	SP	3502	2	52	272	4.0
31	9264	1986	Graduation	Married	79529.0	0	0	2014-04-27	1	423	...	0	0	0	CA	3535	0	37	1638	2.0
32	5824	1972	PhD	Together	34578.0	2	1	2014-04-11	1	7	...	0	0	0	AUS	3551	3	51	8	4.0
33	5794	1974	PhD	Married	46374.0	0	1	2014-03-17	1	408	...	1	0	1	IND	3576	1	49	446	4.0
34	3068	1990	Graduation	Married	18351.0	0	0	2013-10-29	1	1	...	0	0	0	SP	3715	0	33	43	2.0
35	7962	1987	PhD	Single	95169.0	0	0	2013-10-09	1	1285	...	1	0	1	SP	3735	0	36	1901	4.0
36	2681	1984	2n Cycle	Married	65370.0	0	0	2013-08-01	1	71	...	0	0	0	SP	3804	0	39	461	1.0
37	10141	1960	Master	Divorced	39228.0	0	0	2013-05-10	1	7	...	0	0	0	SA	3887	0	63	20	3.0
38	3725	1961	PhD	Single	84865.0	0	0	2013-05-09	1	1248	...	1	1	1	SP	3888	0	62	1688	4.0
39	3767	1968	Graduation	Married	61314.0	0	1	2013-04-25	1	378	...	0	0	0	SP	3902	1	55	1008	2.0
40	5585	1972	Graduation	Single	21359.0	1	0	2013-04-20	1	12	...	0	0	1	CA	3907	1	51	48	2.0
41	7030	1955	PhD	Married	66465.0	0	1	2013-03-30	1	1200	...	1	0	0	SP	3928	1	68	1485	4.0
42	1524	1983	2n Cycle	Single	81698.0	0	0	2013-03-01	1	709	...	1	0	1	SP	3957	0	40	1104	1.0
43	3657	1986	Graduation	Single	39146.0	1	0	2013-02-14	1	94	...	0	0	0	SP	3972	1	37	165	2.0
44	5740	1970	2n Cycle	Divorced	25959.0	1	1	2013-02-14	1	4	...	0	0	1	SP	3972	2	53	56	1.0
45	9595	1961	Graduation	Together	64260.0	0	0	2013-01-11	1	539	...	0	0	1	SP	4006	0	62	1574	2.0
46	3158	1973	Graduation	Married	32300.0	1	0	2013-01-03	1	13	...	0	0	0	SP	4014	1	50	39	2.0
47	5114	1965	Master	Married	74806.0	0	1	2012-12-19	1	670	...	0	0	0	AUS	4029	1	58	965	3.0
48	340	1970	Graduation	Divorced	72967.0	0	1	2012-12-15	1	158	...	0	0	1	GER	4033	1	53	497	2.0
49	8805	1960	Graduation	Single	48904.0	0	1	2012-12-02	1	283	...	0	0	0	US	4046	1	63	371	2.0

	ID	Year_Birth	Education	Marital_Status	Income	Kidhome	Teenhome	Dt_Customer	Recency	MntWines	...	AcceptedCmp1	Response	Country	Days_Since_Enrollment	Total_Children	Age	Total_Spending	Education Encoded
50	1241	1984	2n Cycle	Married	14796.0	1	0	2012-09-17	1	13	...	0	1	GER	4122	1	39	51	1.0
51	1402	1954	Master	Married	66991.0	0	0	2012-09-11	1	496	...	0	0	GER	4128	0	69	1253	3.0
52	7264	1978	2n Cycle	Single	52195.0	2	1	2014-05-12	2	12	...	0	0	SA	3520	3	45	17	1.0
53	1619	1956	Graduation	Married	90369.0	0	0	2014-04-28	2	292	...	0	1	SP	3534	0	67	1588	2.0
54	6398	1974	Basic	Married	18393.0	1	0	2014-03-29	2	7	...	0	0	SP	3564	1	49	50	0.0
55	1857	1952	Graduation	Single	47139.0	1	1	2014-03-06	2	46	...	0	1	SP	3587	2	71	83	2.0
56	4877	1973	Master	Married	38576.0	0	1	2014-03-04	2	34	...	0	0	IND	3589	1	50	41	3.0
57	3066	1975	PhD	Together	61905.0	0	1	2014-02-04	2	167	...	0	0	SA	3617	1	48	231	4.0
58	10286	1962	Graduation	Married	83715.0	0	0	2014-02-03	2	318	...	0	0	SA	3618	0	61	926	2.0
59	1992	1964	Graduation	Married	60597.0	0	1	2014-01-01	2	522	...	1	1	SP	3651	1	59	893	2.0
60	4246	1982	Master	Single	6560.0	0	0	2013-12-12	2	67	...	0	0	SP	3671	0	41	373	3.0
61	10623	1961	Master	Together	48330.0	0	1	2013-11-15	2	28	...	0	0	SP	3698	1	62	32	3.0
62	4867	1968	PhD	Single	38236.0	1	1	2013-09-20	2	58	...	0	0	SP	3754	2	55	88	4.0
63	3112	1977	Master	Married	22701.0	1	0	2013-09-05	2	2	...	0	0	SP	3769	1	46	24	3.0
64	4865	1974	Master	Divorced	53367.0	1	1	2013-08-31	2	229	...	0	1	AUS	3774	2	49	400	3.0
65	6287	1986	Graduation	Together	34728.0	1	0	2013-07-30	2	14	...	0	1	SP	3806	1	37	36	2.0
66	4405	1956	Master	Married	63915.0	0	2	2013-07-30	2	622	...	0	0	AUS	3806	2	67	789	3.0
67	5332	1960	2n Cycle	Married	82504.0	0	0	2013-07-27	2	362	...	0	0	IND	3809	0	63	1066	1.0
68	1519	1972	PhD	Single	38578.0	1	1	2013-06-22	2	38	...	0	1	SP	3844	2	51	73	4.0
69	9080	1972	PhD	Single	38578.0	1	1	2013-06-22	2	38	...	0	0	SP	3844	2	51	73	4.0
70	1772	1975	PhD	Married	79174.0	0	0	2013-01-11	2	1074	...	1	1	CA	4006	0	48	2043	4.0
71	5341	1962	2n Cycle	Divorced	81975.0	0	1	2013-01-05	2	983	...	0	0	SA	4012	1	61	1588	1.0
72	5510	1977	Master	Married	43263.0	0	1	2012-11-21	2	262	...	0	0	SP	4057	1	46	441	3.0
73	3887	1970	Graduation	Single	27242.0	1	0	2012-11-11	2	3	...	0	1	IND	4067	1	53	106	2.0
74	7022	1971	Graduation	Married	76445.0	1	0	2012-09-28	2	739	...	0	0	SA	4111	1	52	1410	2.0
75	9999	1965	Graduation	Together	75276.0	0	0	2012-09-27	2	610	...	0	0	SP	4112	0	58	1040	2.0
76	10352	1963	Graduation	Widow	34213.0	1	1	2012-09-07	2	50	...	0	1	SA	4132	2	60	117	2.0
77	7919	1976	PhD	Together	72335.0	0	0	2012-08-13	2	1285	...	0	1	SP	4157	0	47	2092	4.0
78	4114	1964	Master	Married	79143.0	0	0	2012-08-11	2	650	...	0	0	AUS	4159	0	59	1693	3.0
79	7990	1947	Graduation	Married	27469.0	0	0	2012-08-02	2	9	...	0	0	CA	4168	0	76	17	2.0
80	9888	1969	Graduation	Together	68695.0	0	0	2014-06-25	3	458	...	0	0	SP	3476	0	54	1091	2.0
81	4399	1969	Graduation	Together	68695.0	0	0	2014-06-25	3	458	...	0	0	CA	3476	0	54	1091	2.0
82	4452	1957	Graduation	Single	50388.0	0	1	2014-05-28	3	292	...	1	1	GER	3504	1	66	372	2.0
83	4785	1970	PhD	Together	77622.0	0	2	2014-04-14	3	520	...	0	0	SA	3548	2	53	714	4.0
84	8461	1962	Graduation	Divorced	46102.0	2	1	2014-03-10	3	14	...	0	0	SP	3583	3	61	16	2.0
85	3878	1980	2n Cycle	Single	31859.0	1	0	2014-02-27	3	3	...	0	0	SP	3594	1	43	48	1.0
86	9612	1987	2n Cycle	Single	23830.0	0	0	2014-02-07	3	1	...	0	0	SP	3614	0	36	43	1.0
87	4098	1973	Graduation	Married	24639.0	1	1	2014-01-28	3	20	...	0	0	AUS	3624	2	50	44	2.0
88	158	1945	PhD	Together	71604.0	0	0	2013-11-17	3	345	...	0	1	SP	3696	0	78	1196	4.0
89	3896	1984	Graduation	Married	27255.0	1	0	2013-11-07	3	22	...	0	0	SP	3706	1	39	37	2.0
90	9970	1977	Graduation	Together	55375.0	0	1	2013-10-17	3	42	...	0	0	CA	3727	1	46	162	2.0
91	4002	1960	PhD	Married	77037.0	0	1	2013-10-13	3	463	...	0	0	SP	3731	1	63	1123	4.0
92	10914	1970	Graduation	Single	24163.0	1	1	2013-10-12	3	4	...	0	0	SP	3732	2	53	17	2.0
93	7279	1969	PhD	Together	69476.0	0	0	2013-09-30	3	260	...	0	0	US	3744	0	54	1044	4.0
94	10582	1979	Graduation	Married	72063.0	0	1	2013-07-03	3	180	...	0	0	GER	3833	1	44	758	2.0
95	4470	1962	Master	Married	58646.0	0	1	2013-06-10	3	62	...	0	0	SP	3856	1	61	140	3.0
96	6183	1962	Master	Married	58646.0	0	1	2013-06-10	3	62	...	0	0	GER	3856	1	61	140	3.0
97	6379	1949	Master	Widow	47570.0	1	1	2013-05-29	3	67	...	0	1	US	3868	2	74	121	3.0
98	8601	1980	Graduation	Married	80011.0	0	1	2013-04-29	3	421	...	0	0	AUS	3898	1	43	1395	2.0
99	4827	1956	PhD	Single	54998.0	0	1	2013-03-10	3	154	...	0	1	SP	3948	1	67	455	4.0

STEP 1 - Importing Packages and Performing Basic Data Exploration¶

STEP 2 - Fixing the "Dt_Customer" column¶

STEP 3 - Fixing the "Income" column¶

STEP 4 - Creating new "Age" & "No. of Children" & "Total Spending" columns¶

STEP 5 - Removing outliers from Income column¶

STEP 6 - Performing OrdinalEncoding on Education Column¶

STEP 7 - Performing one-hot encoding on 'Marital_Status' column¶

STEP 8 - Creating a heatmap to showcase the correlation between all the different variables with eachother¶

STEP 9.1 - Testing hypotheses 1 = Older people are not as tech-savvy and probably prefer shopping in-store.¶

STEP 9.2 - Testing hypotheses 2 = Customers with kids probably have less time to visit a store and would prefer to shop online.¶

STEP 9.3 - Testing hypotheses 3 = Other distribution channels may cannibalize sales at the store.¶

STEP 9.4 - Testing hypotheses 4 = Does the US fare significantly better than the rest of the world in terms of total purchases?¶

STEP 10.1 - Visualization to help analyze = Which products are performing the best, and which are performing the least in terms of revenue?¶

STEP 10.2 - Visualization to help analyze = Is there any pattern between the age of customers and the last campaign acceptance rate?¶

STEP 10.3 - Visualization to help analyze = Which Country has the greatest number of customers who accepted the last campaign?¶

STEP 10.4 - Visualization to help analyze = Is there any pattern in the number of children at home and total spend?¶

STEP 10.5 - Visualization to help analyze = Education background of the customers who complained in the last 2 years.¶

	Income	Kidhome	Teenhome	MntWines	MntFruits	MntMeatProducts	MntFishProducts	MntSweetProducts	MntGoldProds	...	Total_Spending	Education Encoded	Marital_Absurd	Marital_Alone	Marital_Divorced	Marital_Married	Marital_Single	Marital_Together	Marital_Widow	Marital_YOLO
0	84835.0	0	0	189	104	379	111	189	218	...	1190	2.0	False	False	True	False	False	False	False	False
1	57091.0	0	0	464	5	64	7	0	37	...	577	2.0	False	False	False	False	True	False	False	False
2	67267.0	0	1	134	11	59	15	2	30	...	251	2.0	False	False	False	True	False	False	False	False
3	32474.0	1	1	10	0	1	0	0	0	...	11	2.0	False	False	False	False	False	True	False	False
4	21474.0	1	0	6	16	24	11	0	34	...	91	2.0	False	False	False	False	True	False	False	False