# View the heat map of correlations between variables
plt.figure(figsize=(12,7))
correlation_data = df.corr()
correlation_data = round((correlation_data*100),0).astype(int)
sns.heatmap(correlation_data, annot=True, fmt="d", cmap="YlGnBu")
plt.show()
# Setting box plot title, x and y axis labels, range and format
ax = sns.boxplot(data=df, width=0.5, palette="Blues")
ax.set_title('Plot Title')
ax.set_xlabel("Label for x axis");
ax.set_ylabel("Label for y axis");
ax.set_yticklabels(['${:,}m'.format(int(x/1000000)) for x in ax.get_yticks().tolist()]);
ax.grid(which='major', linestyle=':', linewidth='1', color='grey');
for item in ([ax.xaxis.label, ax.yaxis.label]):
item.set_fontsize(14);
item.set_color("blue"); #set the x and y label to blue
# Draw 2 red lines showing the preferred range
ax.axhline(5000000, ls='-', linewidth='1', color='red')
ax.axhline(15000000, ls='-', linewidth='1', color='red')
ax.set_ylim(0, 80000000); #set the upper limit of y axis
#Rename columns:
df = df.rename(columns={"col1": "column1", "col2": "column2"}
#Drop columns:
df.drop(columns = ["column1", "column2"], axis=1, inplace=True)
#Unique values in each column:
df.nunique()
#Number of missing values in each column:
df.isnull().sum()
#Convert a string column to date:
df["date_column"] = pd.to_datetime(df["string_column"], dayfirst = True)
#Get the day element from a date column:
df["day"] = df["date_column"].dt.day
#Get the weekday name (e.g. Monday, Tuesday) from a date column:
df["weekday_name"] = df["date_column"].dt.day_name(locale='English')
#Group by 2 columns and get the row count:
df.groupby(["column1","column2"]).size()
#Combine 2 data frames:
df = pd.concat([df1, df2], axis=1)