Sdrds

import numpy as np

import pandas as pd

from math import log2

 

# Load the dataset

df=pd.read_csv(‘tennisdata.csv’) #Make sure the file is in the same folder

print(df)

 

# Function to calculate entropy

def calculate_entropy(data_column):

counts = data_column.value_counts()

probabilities= counts / len(data_column)

entropy=-sum(probabilities* np.log2(probabilities))

return entropy

 

# Function to calculate information gain

def calculate_information_gain (data, feature, target=”playTennis”):

#Total entropy of the target column

total_entropy = calculate_entropy(data[target])

 

# Weighted entropy for each value of the feature

values = data[feature].unique()

weighted_entropy = 0

 

for value in values:

subset= data[data[feature] == value]

subset_entropy= calculate_entropy(subset[target])

 

for value in values:

subset= data[data [feature]== value]

subset_entropy = calculate_entropy(subset[target])

weighted_entropy += (len(subset) / len(data)) *subset_entropy

 

# Information gain

information_gain = total_entropy – weighted_entropy

return information_gain

 

# Simple ID3 function to build the decision tree

def build_tree(data, features, target=”playTennis”):

 

# If all target values are the same, return that value

if len(data[target].unique()) == 1:

return data[target].iloc[0]

 

# If no features left, return the most common target value

if len(features) == 0:

return data[target].mode()[0]

 

#Find the best feature based on information gain

best_feature = max(features, key=lambda f: calculate_information_gain(data, f))

 

#Initialize the tree with the best feature

tree = {best_feature: {}}

remaining_features= [f for f in features if f!= best_feature]

 

#Build branches for each value of the best feature

for value in data[best_feature].unique():

subset = data[data[best_feature] == value]

tree[best_feature][value]= build_tree(subset, remaining_features, target)

return tree

 

# Build the decision tree

features=df.columns[:-1] # All columns except the target

decision_tree= build_tree(df, features)

print(“Decision Tree:”, decision_tree)

Leave a Reply

Your email address will not be published. Required fields are marked *