Decision Tree
hard· Machine Learningruns: 0Implement a CART-style decision tree classifier from scratch. At each node, pick the feature and threshold that maximize the reduction in Gini impurity. Recurse until a depth limit, a minimum sample threshold, or a pure node is reached. Predict by walking the tree.
sign in to paste and practice your own solution
wpm 0acc 100%time 0:000 / 2619
import numpy as np
class Node:
def __init__(self):
self.feature = None
self.threshold = None
self.left = None
self.right = None
self.label = None
class DecisionTree:
def __init__(self, max_depth: int = 10, min_samples_split: int = 2):
self.max_depth = max_depth
self.min_samples_split = min_samples_split
self.root = None
def _gini(self, y):
_, counts = np.unique(y, return_counts=True)
p = counts / len(y)
return 1 - np.sum(p ** 2)
def _best_split(self, X, y):
best = None
best_gain = 0
parent_gini = self._gini(y)
n_samples, n_features = X.shape
for feature in range(n_features):
thresholds = np.unique(X[:, feature])
for t in thresholds:
left = y[X[:, feature] <= t]
right = y[X[:, feature] > t]
if len(left) == 0 or len(right) == 0:
continue
weighted = (
len(left) * self._gini(left)
+ len(right) * self._gini(right)
) / n_samples
gain = parent_gini - weighted
if gain > best_gain:
best_gain = gain
best = (feature, t)
return best
def _build(self, X, y, depth: int):
node = Node()
if (
depth >= self.max_depth
or len(y) < self.min_samples_split
or len(np.unique(y)) == 1
):
values, counts = np.unique(y, return_counts=True)
node.label = values[np.argmax(counts)]
return node
split = self._best_split(X, y)
if split is None:
values, counts = np.unique(y, return_counts=True)
node.label = values[np.argmax(counts)]
return node
feature, threshold = split
node.feature = feature
node.threshold = threshold
left_mask = X[:, feature] <= threshold
node.left = self._build(X[left_mask], y[left_mask], depth + 1)
node.right = self._build(X[~left_mask], y[~left_mask], depth + 1)
return node
def fit(self, X, y):
self.root = self._build(X, y, 0)
return self
def _predict_one(self, x, node):
if node.label is not None:
return node.label
if x[node.feature] <= node.threshold:
return self._predict_one(x, node.left)
return self._predict_one(x, node.right)
def predict(self, X):
return np.array([self._predict_one(x, self.root) for x in X])
click the box to focus · tab inserts 4 spaces · backspace to correct · esc to pause
desktop only
codedrill is a typing game and needs a real keyboard. open this on a laptop or desktop to practice.
you can still browse problems and sections from your phone.