import numpy as np
import pandas as pd
from math import log2
# Load the dataset
df=pd.read_csv(‘tennisdata.csv’) #Make sure the file is in the same folder
print(df)
# Function to calculate entropy
def calculate_entropy(data_column):
counts = data_column.value_counts()
probabilities= counts / len(data_column)
entropy=-sum(probabilities* np.log2(probabilities))
return entropy
# Function to calculate information gain
def calculate_information_gain (data, feature, target=”playTennis”):
#Total entropy of the target column
total_entropy = calculate_entropy(data[target])
# Weighted entropy for each value of the feature
values = data[feature].unique()
weighted_entropy = 0
for value in values:
subset= data[data[feature] == value]
subset_entropy= calculate_entropy(subset[target])
for value in values:
subset= data[data [feature]== value]
subset_entropy = calculate_entropy(subset[target])
weighted_entropy += (len(subset) / len(data)) *subset_entropy
# Information gain
information_gain = total_entropy – weighted_entropy
return information_gain
# Simple ID3 function to build the decision tree
def build_tree(data, features, target=”playTennis”):
# If all target values are the same, return that value
if len(data[target].unique()) == 1:
return data[target].iloc[0]
# If no features left, return the most common target value
if len(features) == 0:
return data[target].mode()[0]
#Find the best feature based on information gain
best_feature = max(features, key=lambda f: calculate_information_gain(data, f))
#Initialize the tree with the best feature
tree = {best_feature: {}}
remaining_features= [f for f in features if f!= best_feature]
#Build branches for each value of the best feature
for value in data[best_feature].unique():
subset = data[data[best_feature] == value]
tree[best_feature][value]= build_tree(subset, remaining_features, target)
return tree
# Build the decision tree
features=df.columns[:-1] # All columns except the target
decision_tree= build_tree(df, features)
print(“Decision Tree:”, decision_tree)