# Display the first few rowsprint(df.head())# Display basic informationprint(df.info())# Display summary statisticsprint(df.describe())
Data Selection
Selecting Columns
# Select a single columndf['Column1']# Select multiple columnsdf[['Column1','Column2']]
Selecting Rows
# Select rows by indexdf.iloc[0]# First rowdf.iloc[0:3]# First three rows# Select rows by labeldf.loc[0]# First rowdf.loc[0:2]# Rows with labels 0 to 2
Filtering Data
# Filter rows based on a conditiondf[df['Column1']>2]
Data Cleaning
Handling Missing Data
# Drop rows with missing valuesdf.dropna()# Fill missing valuesdf.fillna(value)
# Group by a column and calculate meandf.groupby('Column1').mean()# Group by multiple columns and calculate sumdf.groupby(['Column1','Column2']).sum()
Sorting Data
# Sort by a single columndf.sort_values(by='Column1')# Sort by multiple columnsdf.sort_values(by=['Column1','Column2'],ascending=[True,False])
Merging and Joining
Concatenation
# Concatenate two DataFrames verticallypd.concat([df1,df2])# Concatenate two DataFrames horizontallypd.concat([df1,df2],axis=1)
Merging
# Merge two DataFrames on a keypd.merge(df1,df2,on='Key')# Merge with different join typespd.merge(df1,df2,on='Key',how='left')# Left joinpd.merge(df1,df2,on='Key',how='right')# Right joinpd.merge(df1,df2,on='Key',how='inner')# Inner joinpd.merge(df1,df2,on='Key',how='outer')# Outer join
Input/Output
Reading and Writing CSV Files
# Read a CSV filedf=pd.read_csv('path_to_csv_file.csv')# Write a DataFrame to a CSV filedf.to_csv('path_to_output_csv_file.csv',index=False)
Reading and Writing Excel Files
# Read an Excel filedf=pd.read_excel('path_to_excel_file.xlsx')# Write a DataFrame to an Excel filedf.to_excel('path_to_output_excel_file.xlsx',index=False)
Example
Complete Example
importpandasaspd# Create a DataFrame from a dictionarydata={'Name':['Alice','Bob','Charlie'],'Age':[25,30,35],'City':['New York','Los Angeles','Chicago']}df=pd.DataFrame(data)# Display the first few rowsprint(df.head())# Add a new columndf['Age in 10 Years']=df['Age']+10# Filter rows where Age is greater than 30filtered_df=df[df['Age']>30]# Group by City and calculate mean Agegrouped_df=df.groupby('City').mean()# Save the DataFrame to a CSV filedf.to_csv('output.csv',index=False)