Machine Learning (ML) is a field of AI that enables computers to learn from data and make predictions or decisions without being explicitly programmed. ML powers applications like recommendation systems, image recognition, and language translation.
import numpy as np
A = np.array([[1, 2], [3, 4]])
B = np.array([[5, 6], [7, 8]])
C = np.dot(A, B) # Matrix multiplication
# Derivative of f(x) = x^2
import sympy as sp
x = sp.symbols('x')
f = x**2
f_prime = sp.diff(f, x)
import numpy as np
x = np.random.normal(loc=0, scale=1, size=1000)
mean = np.mean(x)
std = np.std(x)
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
from sklearn.preprocessing import PolynomialFeatures
poly = PolynomialFeatures(degree=2)
X_poly = poly.fit_transform(X)
from sklearn.decomposition import PCA
pca = PCA(n_components=2)
X_reduced = pca.fit_transform(X)
from sklearn.linear_model import LinearRegression
model = LinearRegression()
model.fit(X_train, y_train)
predictions = model.predict(X_test)
from sklearn.linear_model import Ridge
from sklearn.svm import SVR
ridge = Ridge(alpha=1.0)
svr = SVR(kernel='rbf')
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
rf = RandomForestClassifier(n_estimators=100)
svm = SVC(kernel='rbf')
from sklearn.metrics import accuracy_score, f1_score
accuracy = accuracy_score(y_true, y_pred)
f1 = f1_score(y_true, y_pred, average='weighted')
from sklearn.model_selection import cross_val_score
scores = cross_val_score(model, X, y, cv=5)
mean_score = scores.mean()
# High bias: Linear model on non-linear data
# High variance: Complex model on small dataset
# Solution: Regularization
from sklearn.linear_model import Ridge
ridge = Ridge(alpha=1.0) # Controls variance
# Early stopping to prevent overfitting
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(max_depth=5) # Limit complexity
from sklearn.cluster import KMeans
kmeans = KMeans(n_clusters=3)
clusters = kmeans.fit_predict(X)
from sklearn.cluster import DBSCAN, AgglomerativeClustering
dbscan = DBSCAN(eps=0.5, min_samples=5)
hierarchical = AgglomerativeClustering(n_clusters=3)
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules
frequent_itemsets = apriori(df, min_support=0.1)
rules = association_rules(frequent_itemsets, metric="confidence")
from sklearn.ensemble import VotingClassifier
from sklearn.ensemble import GradientBoostingClassifier
gb = GradientBoostingClassifier(n_estimators=100)
voting = VotingClassifier(estimators=[('rf', rf), ('gb', gb)])
import tensorflow as tf
model = tf.keras.Sequential([
tf.keras.layers.Dense(64, activation='relu'),
tf.keras.layers.Dense(32, activation='relu'),
tf.keras.layers.Dense(1)
])
import numpy as np
def relu(x): return np.maximum(0, x)
def sigmoid(x): return 1 / (1 + np.exp(-x))
def softmax(x): return np.exp(x) / np.sum(np.exp(x))
# Automatic backpropagation in TensorFlow
with tf.GradientTape() as tape:
predictions = model(x)
loss = loss_function(y_true, predictions)
gradients = tape.gradient(loss, model.trainable_variables)
from tensorflow.keras.optimizers import Adam, SGD
optimizer = Adam(learning_rate=0.001)
sgd = SGD(learning_rate=0.01, momentum=0.9)
model.compile(optimizer=optimizer, loss='mse')
import nltk
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer
tokens = word_tokenize("Hello world!")
stemmer = PorterStemmer()
stems = [stemmer.stem(token) for token in tokens]
import re
from nltk.corpus import stopwords
text = "Hello World! This is a test."
text = re.sub(r'[^\w\s]', '', text.lower())
tokens = text.split()
from gensim.models import Word2Vec
sentences = [['hello', 'world'], ['machine', 'learning']]
model = Word2Vec(sentences, vector_size=100, window=5)
vector = model.wv['hello']
import gym
env = gym.make('CartPole-v1')
state = env.reset()
action = env.action_space.sample()
next_state, reward, done, info = env.step(action)
import numpy as np
Q = np.zeros((n_states, n_actions))
alpha = 0.1 # learning rate
gamma = 0.9 # discount factor
Q[state, action] = Q[state, action] + alpha * (reward + gamma * max(Q[next_state]) - Q[state, action])
import joblib
import pickle
# Save model
joblib.dump(model, 'model.pkl')
# Load model
loaded_model = joblib.load('model.pkl')
import pandas as pd
import numpy as np
import sklearn
import tensorflow as tf
import torch
# Core ML stack
df = pd.read_csv('data.csv')
X = np.array(df.drop('target', axis=1))
# Typical ML workflow
import pandas as pd
import matplotlib.pyplot as plt
# 1. Load data
df = pd.read_csv('data.csv')
# 2. EDA
df.describe()
df.isnull().sum()
# 3. Preprocess
# 4. Train model
# 5. Evaluate
# Example: Image classification
from tensorflow.keras.applications import ResNet50
model = ResNet50(weights='imagenet')
# Example: Sentiment analysis
from textblob import TextBlob
blob = TextBlob("I love this product!")
sentiment = blob.sentiment.polarity
# Best practices checklist
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report
# 1. Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
# 2. Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
# 3. Evaluate properly
print(classification_report(y_test, y_pred))
| Algorithm | Time | Space |
|---|---|---|
| Linear Regression | O(n²d) | O(d) |
| Logistic Regression | O(n²d) | O(d) |
| Decision Tree | O(n log n) | O(n) |
| Random Forest | O(t × n log n) | O(t × n) |
| SVM | O(n²) to O(n³) | O(n²) |
| K-Means | O(nkd) | O(n + kd) |
| Neural Network | O(n × layers) | O(parameters) |
n = samples, d = features, k = clusters, t = trees