Every concept, topic by topic. From your first variable to building data structures, ML-ready.
Python is an interpreted, dynamically typed, garbage-collected language.
x = 5 then x = "hi" — totally fine.# How Python runs your code behind the scenes:
# 1. Lexer tokenizes your source code
# 2. Parser builds an AST (Abstract Syntax Tree)
# 3. Compiler turns AST → bytecode (.pyc files)
# 4. Python Virtual Machine (PVM) executes bytecode
# You can actually SEE the bytecode:
import dis
dis.dis(lambda: print("hello"))
# LOAD_GLOBAL 0 (print)
# LOAD_CONST 1 ('hello')
# CALL_FUNCTION 1
# RETURN_VALUE
A variable is a name that points to an object in memory. It's a label stuck on a box, not a box itself.
# ── Basic types ──
name = "Yatin" # str
age = 25 # int
height = 5.9 # float
is_dev = True # bool
nothing = None # NoneType
print(type(name)) # <class 'str'>
print(type(age)) # <class 'int'>
# ── Everything is an object ──
x = 42
print(id(x)) # memory address
print(type(x)) # <class 'int'>
# Two names can point to the SAME object
y = x
print(id(x) == id(y)) # True
# Python caches small integers (-5 to 256)
a = 256
b = 256
print(a is b) # True — same cached object
Immutable objects can't be changed after creation — modifying them creates a new object. Mutable objects can be changed in-place.
| Immutable (can't change) | Mutable (can change in-place) |
|---|---|
int, float, bool, str, tuple, frozenset | list, dict, set, bytearray |
# Immutable — "changing" creates a NEW object
s = "hello"
print(id(s)) # 4377001200
s = s + " world"
print(id(s)) # 4377055600 — different object!
# Mutable — you modify the SAME object
lst = [1, 2, 3]
print(id(lst)) # 4377102400
lst.append(4)
print(id(lst)) # 4377102400 — same object!
Symbols that perform operations on values — arithmetic, comparison, logic, and assignment.
# ── Arithmetic ──
print(10 / 3) # 3.333 (true division — ALWAYS float)
print(10 // 3) # 3 (floor division)
print(10 % 3) # 1 (modulo)
print(10 ** 3) # 1000 (power)
# ── Identity: is vs == ──
a = [1, 2]
b = [1, 2]
print(a == b) # True — same VALUE
print(a is b) # False — different OBJECTS
# ── Short-circuit ──
print("" or "default") # "default" — common pattern!
print("hello" and "world") # "world"
# ── Walrus operator := (3.8+) ──
data = [1, 2, 3, 4, 5]
if (n := len(data)) > 3:
print(f"Too many: {n}") # Too many: 5
# ── Truthy / Falsy ──
# Falsy: False, 0, 0.0, "", [], {}, set(), None
# Everything else is Truthy
print(bool([])) # False
print(bool([0])) # True — list has an element
Convert between types using built-in functions like int(), str(), float(), and list().
x = "42"
print(int(x)) # 42
print(float(x)) # 42.0
print(str(42)) # "42"
print(bool(0)) # False
print(list("abc")) # ['a', 'b', 'c']
Strings are immutable sequences of Unicode characters. Every operation creates a new string.
# ── Creation ──
s1 = 'single'
s2 = "double" # identical
s3 = """multi
line"""
s4 = r"raw: \n stays literal" # no escape processing
# ── f-strings (THE way to format) ──
name = "Yatin"
print(f"Hello {name}") # Hello Yatin
print(f"{3.14159:.2f}") # 3.14
print(f"{1000000:,}") # 1,000,000
print(f"{'hello':*^20}") # *******hello********
# ── Indexing & Slicing ──
s = "Python"
print(s[0]) # P
print(s[-1]) # n
print(s[0:3]) # Pyt (stop is EXCLUDED)
print(s[::-1]) # nohtyP (reverse)
# ── Common Methods ──
s = " Hello, World! "
s.strip() # "Hello, World!"
s.lower() # " hello, world! "
s.upper() # " HELLO, WORLD! "
s.strip().split(",") # ['Hello', ' World!']
"-".join(["a","b"]) # "a-b"
"hello".replace("l", "L") # "heLLo"
"hello".startswith("he") # True
"hello".find("ll") # 2 (index, -1 if not found)
"42".isdigit() # True
# ── if / elif / else ──
score = 85
if score >= 90:
grade = "A"
elif score >= 80:
grade = "B"
else:
grade = "F"
# ── Ternary ──
status = "adult" if age >= 18 else "minor"
# ── match/case (3.10+) — Structural Pattern Matching ──
point = (3, 4)
match point:
case (0, 0):
print("Origin")
case (x, 0):
print(f"On x-axis at {x}")
case (x, y):
print(f"Point at {x}, {y}") # Point at 3, 4
# ── Loops ──
for fruit in ["apple", "banana"]:
print(fruit)
for i in range(5): # 0, 1, 2, 3, 4
print(i)
for i in range(2, 10, 3): # 2, 5, 8 (start, stop, step)
print(i)
# ── enumerate & zip ──
fruits = ["apple", "banana"]
for i, f in enumerate(fruits):
print(f"{i}: {f}") # 0: apple, 1: banana
names = ["Alice", "Bob"]
scores = [90, 85]
for name, score in zip(names, scores):
print(f"{name}: {score}")
# ── while ──
count = 0
while count < 5:
count += 1
# ── break, continue, for...else ──
for n in range(10):
if n == 3: continue # skip 3
if n == 7: break # stop at 7
print(n)
# for...else — else runs if loop completed WITHOUT break
for n in range(2, 10):
for x in range(2, n):
if n % x == 0: break
else:
print(f"{n} is prime")
# ── Defining ──
def greet(name):
"""Return a greeting. This is a docstring."""
return f"Hello, {name}!"
print(greet("Yatin")) # Hello, Yatin!
# ── Default parameters ──
def power(base, exp=2):
return base ** exp
print(power(3)) # 9
print(power(3, 3)) # 27
# ── Multiple return values (actually a tuple) ──
def divide(a, b):
return a // b, a % b
q, r = divide(17, 5) # q=3, r=2
# ── Keyword arguments ──
def create_user(name, age, role="viewer"):
return {"name": name, "age": age, "role": role}
user = create_user(age=25, name="Yatin", role="admin")
# ── THE BUG ──
def append_to(element, target=[]): # DON'T DO THIS
target.append(element)
return target
print(append_to(1)) # [1]
print(append_to(2)) # [1, 2] — WHAT?! Remembered the old list!
# ── THE FIX ──
def append_to(element, target=None):
if target is None:
target = []
target.append(element)
return target
Small one-line functions without a name — perfect for quick inline operations like sorting or filtering. Syntax: lambda arguments: expression
# lambda is just a shortcut for simple functions
square = lambda x: x ** 2
square(5) # 25
# Equivalent to:
def square(x):
return x ** 2
# Multiple arguments
add = lambda a, b: a + b
add(3, 4) # 7
# No arguments
greet = lambda: "Hello!"
greet() # "Hello!"
# With default values
power = lambda x, n=2: x ** n
power(3) # 9 (default n=2)
power(3, 3) # 27
# Sort by string length
names = ["Charlie", "Alice", "Bob"]
names.sort(key=lambda n: len(n))
# ['Bob', 'Alice', 'Charlie']
# Sort list of tuples by second element
students = [("Alice", 90), ("Bob", 75), ("Carol", 85)]
students.sort(key=lambda s: s[1])
# [('Bob', 75), ('Carol', 85), ('Alice', 90)]
# Sort dicts by a specific key
users = [
{"name": "Yatin", "age": 25},
{"name": "Alice", "age": 30},
{"name": "Bob", "age": 20}
]
users.sort(key=lambda u: u["age"])
# sorted by age: Bob(20), Yatin(25), Alice(30)
# Sort dict by value
scores = {"Alice": 90, "Bob": 85, "Carol": 95}
ranked = sorted(scores.items(), key=lambda x: x[1], reverse=True)
# [('Carol', 95), ('Alice', 90), ('Bob', 85)]
# Multi-key sort: first by age, then by name
people = [("Bob", 25), ("Alice", 25), ("Carol", 20)]
people.sort(key=lambda p: (p[1], p[0]))
# [('Carol', 20), ('Alice', 25), ('Bob', 25)]
nums = [1, 2, 3, 4, 5]
# map — apply function to every element
squares = list(map(lambda x: x**2, nums))
# [1, 4, 9, 16, 25]
# filter — keep elements that return True
evens = list(filter(lambda x: x % 2 == 0, nums))
# [2, 4]
# reduce — combine all elements into one value
from functools import reduce
total = reduce(lambda a, b: a + b, nums)
# 15 (1+2+3+4+5)
product = reduce(lambda a, b: a * b, nums)
# 120 (1*2*3*4*5)
# But list comprehensions are usually cleaner!
squares = [x**2 for x in nums] # prefer over map + lambda
evens = [x for x in nums if x % 2 == 0] # prefer over filter + lambda
# min/max with key
words = ["python", "go", "javascript", "rust"]
shortest = min(words, key=lambda w: len(w)) # "go"
longest = max(words, key=lambda w: len(w)) # "javascript"
# Find user with highest score
users = [{"name": "Alice", "score": 90}, {"name": "Bob", "score": 95}]
best = max(users, key=lambda u: u["score"])
# {'name': 'Bob', 'score': 95}
# Ternary expression inside lambda
check = lambda x: "even" if x % 2 == 0 else "odd"
check(4) # "even"
check(7) # "odd"
grade = lambda s: "A" if s >= 90 else "B" if s >= 80 else "C"
grade(95) # "A"
grade(85) # "B"
grade(70) # "C"
# ✓ USE lambda — simple, one-line, inline with sort/map/filter
names.sort(key=lambda n: n.lower())
# ✗ DON'T assign lambda to a variable (use def instead)
# Bad:
multiply = lambda x, y: x * y # PEP 8 discourages this
# Good:
def multiply(x, y):
return x * y
# ✗ DON'T use lambda for complex logic
# Lambda can only have ONE expression — no statements, loops, or assignments
# lambda x: for i in x: print(i) — SyntaxError!
In Python, functions can be stored in variables, passed as arguments, and put in data structures — just like any other value.
def add(a, b): return a + b
def sub(a, b): return a - b
# Assign to variable, put in dict, pass as argument
ops = {"+": add, "-": sub}
print(ops["+"](10, 3)) # 13
# Higher-order function — takes a function as argument
def apply(func, x, y):
return func(x, y)
print(apply(add, 5, 3)) # 8
# map, filter
nums = [1, 2, 3, 4]
list(map(lambda x: x**2, nums)) # [1, 4, 9, 16]
list(filter(lambda x: x % 2 == 0, nums)) # [2, 4]
The most common data structure — you can add, remove, and change elements freely. Keeps insertion order.
# Different ways to create
empty = []
nums = [1, 2, 3, 4, 5]
mixed = [1, "hello", 3.14, True, None] # any type
nested = [[1, 2], [3, 4], [5, 6]] # 2D list
from_range = list(range(5)) # [0, 1, 2, 3, 4]
repeated = [0] * 5 # [0, 0, 0, 0, 0]
from_string = list("hello") # ['h', 'e', 'l', 'l', 'o']
nums = [10, 20, 30, 40, 50]
# Indexing
nums[0] # 10 — first
nums[-1] # 50 — last
nums[-2] # 40 — second to last
# Slicing — [start:stop:step]
nums[1:4] # [20, 30, 40] — index 1 to 3
nums[:3] # [10, 20, 30] — first 3
nums[2:] # [30, 40, 50] — from index 2 onwards
nums[::] # [10, 20, 30, 40, 50] — full copy
nums[::-1] # [50, 40, 30, 20, 10] — reversed
nums[::2] # [10, 30, 50] — every 2nd element
# Slice assignment (replace a range)
nums[1:3] = [200, 300] # [10, 200, 300, 40, 50]
nums[1:3] = [99] # [10, 99, 40, 50] — can change size!
fruits = ["apple", "banana"]
fruits.append("cherry") # add ONE item to end
# ['apple', 'banana', 'cherry']
fruits.insert(1, "mango") # insert at specific index
# ['apple', 'mango', 'banana', 'cherry']
fruits.extend(["grape", "kiwi"]) # add MULTIPLE items from iterable
# ['apple', 'mango', 'banana', 'cherry', 'grape', 'kiwi']
# + creates a NEW list (doesn't modify original)
new = fruits + ["melon"] # fruits is unchanged!
# Common mistake: append vs extend
a = [1, 2]
a.append([3, 4]) # [1, 2, [3, 4]] — adds list AS element!
b = [1, 2]
b.extend([3, 4]) # [1, 2, 3, 4] — adds each item
nums = [10, 20, 30, 20, 40]
nums.remove(20) # removes FIRST occurrence only
# [10, 30, 20, 40]
popped = nums.pop() # remove & return LAST
# popped = 40, nums = [10, 30, 20]
popped = nums.pop(1) # remove & return at INDEX
# popped = 30, nums = [10, 20]
del nums[0] # delete by index (no return)
# [20]
nums = [1, 2, 3, 4, 5]
del nums[1:3] # delete a slice
# [1, 4, 5]
nums.clear() # remove ALL elements
# []
letters = ["a", "b", "c", "b", "a"]
letters.index("b") # 1 — first occurrence index
letters.index("b", 2) # 3 — search starting from index 2
letters.count("a") # 2 — how many times "a" appears
"c" in letters # True — membership check
"z" in letters # False
len(letters) # 5
nums = [3, 1, 4, 1, 5]
# In-place sort (modifies original, returns None!)
nums.sort() # [1, 1, 3, 4, 5]
nums.sort(reverse=True) # [5, 4, 3, 1, 1]
# sorted() — returns NEW list, original unchanged
original = [3, 1, 4]
new_sorted = sorted(original) # [1, 3, 4]
# original is still [3, 1, 4]
# Custom sort with key
words = ["banana", "apple", "cherry"]
words.sort(key=len) # ['apple', 'banana', 'cherry']
users = [("Yatin", 25), ("Alice", 30), ("Bob", 20)]
users.sort(key=lambda u: u[1]) # sort by age
# Reverse (in-place)
nums = [1, 2, 3]
nums.reverse() # [3, 2, 1]
list(reversed(nums)) # returns NEW reversed list
# Basic unpacking
a, b, c = [1, 2, 3] # a=1, b=2, c=3
# Star unpacking
first, *middle, last = [1, 2, 3, 4, 5]
# first=1, middle=[2, 3, 4], last=5
head, *tail = [1, 2, 3, 4]
# head=1, tail=[2, 3, 4]
*init, last = [1, 2, 3, 4]
# init=[1, 2, 3], last=4
# Swap without temp variable
x, y = 10, 20
x, y = y, x # x=20, y=10
# Ignore values with _
_, b, _ = [1, 2, 3] # only care about b
import copy
# Shallow copy — 3 ways (all equivalent)
original = [[1, 2], [3, 4]]
a = original.copy()
b = list(original)
c = original[::]
# Shallow = new outer list, but inner lists are SHARED
original[0][0] = 999
print(a[0][0]) # 999 — changed too!
# Deep copy — fully independent at all levels
original = [[1, 2], [3, 4]]
deep = copy.deepcopy(original)
original[0][0] = 999
print(deep[0][0]) # 1 — independent!
# Enumerate — get index + value
for i, fruit in enumerate(["apple", "banana", "cherry"]):
print(f"{i}: {fruit}")
# Zip — iterate multiple lists together
names = ["Alice", "Bob"]
ages = [25, 30]
for name, age in zip(names, ages):
print(f"{name} is {age}")
# List as stack (LIFO)
stack = []
stack.append(1) # push
stack.append(2)
stack.pop() # 2 — last in, first out
# Flatten nested list — 3 ways
nested = [[1, 2], [3, 4], [5]]
flat = [x for sub in nested for x in sub] # comprehension (most Pythonic)
flat = sum(nested, []) # shortest but slow for large lists (O(n²))
from itertools import chain
flat = list(chain.from_iterable(nested)) # fastest for big data (O(n))
# all give [1, 2, 3, 4, 5]
# Recursive flatten — handles ANY nesting depth
def flatten(lst):
result = []
for item in lst:
if isinstance(item, (list, tuple)):
result.extend(flatten(item)) # recurse into nested
else:
result.append(item)
return result
flatten([1, [2, [3, 4]], (5, 6), 7])
# [1, 2, 3, 4, 5, 6, 7]
# Filter with list comprehension
nums = [1, 2, 3, 4, 5, 6]
evens = [n for n in nums if n % 2 == 0] # [2, 4, 6]
# min, max, sum
min(nums) # 1
max(nums) # 6
sum(nums) # 21
# Check if all/any match a condition
all(n > 0 for n in nums) # True — all positive
any(n > 5 for n in nums) # True — at least one > 5
# 1. Mutable default argument — CLASSIC bug
def add_item(item, lst=[]): # BAD! shared across calls
lst.append(item)
return lst
add_item(1) # [1]
add_item(2) # [1, 2] — not [2]!
def add_item(item, lst=None): # GOOD! use None
if lst is None:
lst = []
lst.append(item)
return lst
# 2. Multiplying nested lists — shared references!
grid = [[0] * 3] * 3 # BAD!
grid[0][0] = 1
# [[1,0,0], [1,0,0], [1,0,0]] — all rows changed!
grid = [[0] * 3 for _ in range(3)] # GOOD!
grid[0][0] = 1
# [[1,0,0], [0,0,0], [0,0,0]] — only first row
# 3. sort() returns None, not the list
result = [3, 1, 2].sort() # None! not [1,2,3]
result = sorted([3, 1, 2]) # [1,2,3] ✓
Like lists, but you can't change values once created. Faster than lists, use less memory, and can be dictionary keys.
# Different ways to create
empty = ()
single = (42,) # MUST have trailing comma!
not_a_tuple = (42) # just an int in parentheses
pair = (3, 4)
mixed = (1, "hello", 3.14)
from_list = tuple([1, 2, 3])
# Parentheses are optional (packing)
coords = 10, 20, 30 # same as (10, 20, 30)
t = (10, 20, 30, 40, 50)
t[0] # 10
t[-1] # 50
t[1:4] # (20, 30, 40) — slice returns a tuple
t[::-1] # (50, 40, 30, 20, 10)
len(t) # 5
min(t) # 10
max(t) # 50
sum(t) # 150
t = (1, 2, 3, 2, 1)
t.count(2) # 2 — how many times 2 appears
t.index(3) # 2 — index of first occurrence
# That's it! No append, remove, sort — tuples are immutable
# 1. As dictionary keys (lists CAN'T do this)
locations = {}
locations[(40.7, -74.0)] = "New York"
locations[(51.5, -0.1)] = "London"
# 2. As set elements
points = {(0, 0), (1, 1), (2, 2)}
# 3. Function returning multiple values
def get_user():
return "Yatin", 25 # returns a tuple
name, age = get_user() # unpack
# 4. Protect data from accidental changes
DAYS = ("Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun")
# 5. Slightly faster & less memory than lists
import sys
sys.getsizeof([1, 2, 3]) # 120 bytes
sys.getsizeof((1, 2, 3)) # 64 bytes — almost half!
# Unpacking works same as lists
a, b, c = (1, 2, 3)
first, *rest = (1, 2, 3, 4) # first=1, rest=[2,3,4]
# Swap values (this creates tuples under the hood)
x, y = 10, 20
x, y = y, x
# Tuple concatenation (creates NEW tuple)
a = (1, 2)
b = (3, 4)
c = a + b # (1, 2, 3, 4)
d = a * 3 # (1, 2, 1, 2, 1, 2)
from collections import namedtuple
# Create a "class-like" tuple
Point = namedtuple("Point", ["x", "y"])
p = Point(3, 4)
print(p.x, p.y) # 3 4 — access by name
print(p[0], p[1]) # 3 4 — still works by index
# Practical example
User = namedtuple("User", ["name", "age", "email"])
user = User("Yatin", 25, "y@dev.com")
print(user.name) # "Yatin"
# Convert to dict
user._asdict() # {'name': 'Yatin', 'age': 25, 'email': 'y@dev.com'}
# Create modified copy (tuples are immutable, so _replace returns new)
older = user._replace(age=26)
# User(name='Yatin', age=26, email='y@dev.com')
# 1. Single element needs comma!
t = (42,) # tuple
t = (42) # just int 42
type((42,)) # <class 'tuple'>
type((42)) # <class 'int'>
# 2. Immutable BUT can contain mutable objects
t = ([1, 2], [3, 4])
t[0].append(99) # works! inner list is mutable
# ([1, 2, 99], [3, 4])
# t[0] = [5, 6] # TypeError! can't reassign tuple element
Store data as key-value pairs for fast O(1) lookup. The most-used data structure after lists. Keys must be hashable (immutable).
# Different ways to create
empty = {}
user = {"name": "Yatin", "age": 25}
from_tuples = dict([("a", 1), ("b", 2)])
from_kwargs = dict(name="Yatin", age=25)
from_keys = dict.fromkeys(["a", "b", "c"], 0) # {'a': 0, 'b': 0, 'c': 0}
from_zip = dict(zip(["name", "age"], ["Yatin", 25]))
# Dict comprehension
squares = {n: n**2 for n in range(5)}
# {0: 0, 1: 1, 2: 4, 3: 9, 4: 16}
user = {"name": "Yatin", "age": 25, "skills": ["Python"]}
# Direct access — raises KeyError if missing!
user["name"] # "Yatin"
# user["email"] # KeyError!
# Safe access with .get()
user.get("email") # None — no error
user.get("email", "N/A") # "N/A" — custom default
# Check if key exists
"name" in user # True
"email" in user # False
user = {"name": "Yatin"}
# Add / update single key
user["age"] = 25 # add new key
user["name"] = "Yatin Dora" # update existing
# Update multiple at once
user.update({"email": "y@dev.com", "age": 26})
# setdefault — add ONLY if key doesn't exist
user.setdefault("name", "Unknown") # "Yatin Dora" — already exists, no change
user.setdefault("city", "NYC") # "NYC" — added because key was missing
# Merge with | operator (3.9+)
a = {"x": 1, "y": 2}
b = {"y": 3, "z": 4}
merged = a | b # {'x': 1, 'y': 3, 'z': 4}
# creates NEW dict — "y" exists in both, b's value (3) wins
# a and b are unchanged
a |= b # modifies a IN-PLACE (like += for dicts)
# a is now {'x': 1, 'y': 3, 'z': 4} — same as a.update(b)
# Key rule: when keys overlap, RIGHT side always wins
{"a": 1} | {"a": 99} # {'a': 99}
# Before 3.9 — old ways to merge
merged = {**a, **b} # unpacking (3.5+)
a.update(b) # in-place (like |=)
user = {"name": "Yatin", "age": 25, "city": "NYC"}
# del — raises KeyError if missing
del user["city"]
# pop — remove & return value (with optional default)
age = user.pop("age") # 25
email = user.pop("email", None) # None — no error
# popitem — remove & return last inserted pair
user["a"] = 1
user["b"] = 2
user.popitem() # ('b', 2)
# clear — remove everything
user.clear() # {}
user = {"name": "Yatin", "age": 25, "city": "NYC"}
# Loop through keys (default)
for key in user:
print(key) # name, age, city
# Loop through values
for val in user.values():
print(val) # Yatin, 25, NYC
# Loop through key-value pairs
for k, v in user.items():
print(f"{k}: {v}") # name: Yatin, age: 25, city: NYC
# Get all keys/values as lists
keys = list(user.keys()) # ['name', 'age', 'city']
vals = list(user.values()) # ['Yatin', 25, 'NYC']
users = {
"user1": {
"name": "Yatin",
"skills": ["Python", "JS"],
"address": {"city": "NYC", "zip": "10001"}
},
"user2": {
"name": "Alice",
"skills": ["Go", "Rust"],
"address": {"city": "LA", "zip": "90001"}
}
}
# Access nested data
users["user1"]["name"] # "Yatin"
users["user1"]["skills"][0] # "Python"
users["user1"]["address"]["city"] # "NYC"
# Safe nested access (avoid KeyError chains)
city = users.get("user3", {}).get("address", {}).get("city", "Unknown")
# "Unknown"
from collections import defaultdict, Counter
# defaultdict — auto-creates missing keys with a default value
word_count = defaultdict(int) # default = 0
for w in "hello world hello python hello".split():
word_count[w] += 1
# {'hello': 3, 'world': 1, 'python': 1}
# Group items
groups = defaultdict(list)
for name, dept in [("Alice", "eng"), ("Bob", "hr"), ("Carol", "eng")]:
groups[dept].append(name)
# {'eng': ['Alice', 'Carol'], 'hr': ['Bob']}
# Counter — count anything iterable
counts = Counter("mississippi")
# Counter({'s': 4, 'i': 4, 'p': 2, 'm': 1})
counts.most_common(2) # [('s', 4), ('i', 4)]
counts["s"] # 4
counts["z"] # 0 — no KeyError!
# Counter arithmetic
a = Counter("aab") # {'a': 2, 'b': 1}
b = Counter("abc") # {'a': 1, 'b': 1, 'c': 1}
a + b # Counter({'a': 3, 'b': 2, 'c': 1})
a - b # Counter({'a': 1})
# 1. Keys must be hashable (immutable)
d = {[1, 2]: "val"} # TypeError! lists can't be keys
d = {(1, 2): "val"} # OK — tuples can be keys
# 2. Don't modify dict size while iterating
d = {"a": 1, "b": 2, "c": 3}
# BAD: for k in d: del d[k] — RuntimeError!
# GOOD: iterate over a copy
for k in list(d.keys()):
if d[k] < 3:
del d[k]
# 3. {} is an empty dict, NOT an empty set
type({}) # <class 'dict'>
type(set()) # <class 'set'>
No duplicates allowed and no guaranteed order. Lightning-fast membership testing with O(1) lookup.
empty = set() # NOT {} — that's a dict!
nums = {1, 2, 3, 3} # {1, 2, 3} — duplicate removed
from_list = set([1, 2, 2, 3]) # {1, 2, 3}
from_string = set("hello") # {'h', 'e', 'l', 'o'}
# Set comprehension
evens = {n for n in range(10) if n % 2 == 0}
# {0, 2, 4, 6, 8}
s = {1, 2, 3}
s.add(4) # {1, 2, 3, 4}
s.add(3) # {1, 2, 3, 4} — no effect, already exists
s.update([5, 6]) # add multiple: {1, 2, 3, 4, 5, 6}
s.remove(6) # raises KeyError if not found
s.discard(99) # no error if not found — safer!
popped = s.pop() # remove & return arbitrary element
s.clear() # empty the set
a = {1, 2, 3, 4}
b = {3, 4, 5, 6}
# Union — everything from both
a | b # {1, 2, 3, 4, 5, 6}
a.union(b) # same thing
# Intersection — common elements
a & b # {3, 4}
a.intersection(b) # same thing
# Difference — in a but NOT in b
a - b # {1, 2}
a.difference(b) # same thing
# Symmetric difference — in either but NOT both
a ^ b # {1, 2, 5, 6}
# Subset & superset checks
{1, 2} <= {1, 2, 3} # True — subset
{1, 2, 3} >= {1, 2} # True — superset
{1, 2}.isdisjoint({3, 4}) # True — no common elements
# Remove duplicates from a list
names = ["alice", "bob", "alice", "carol", "bob"]
unique = list(set(names)) # ['alice', 'bob', 'carol'] (order may vary)
# Remove duplicates preserving order (Python 3.7+)
unique_ordered = list(dict.fromkeys(names))
# ['alice', 'bob', 'carol'] — original order kept
# O(1) membership test — WAY faster than lists for large data
big_set = set(range(1_000_000))
999999 in big_set # instant!
999999 in list(big_set) # slow — has to check each element
# Find common elements between lists
list1 = [1, 2, 3, 4]
list2 = [3, 4, 5, 6]
common = list(set(list1) & set(list2)) # [3, 4]
# ┌──────────────┬─────────┬───────────┬─────────┬───────────┐
# │ │ List │ Tuple │ Dict │ Set │
# ├──────────────┼─────────┼───────────┼─────────┼───────────┤
# │ Syntax │ [1,2,3] │ (1,2,3) │ {k: v} │ {1,2,3} │
# │ Ordered │ ✓ │ ✓ │ ✓ (3.7+)│ ✗ │
# │ Mutable │ ✓ │ ✗ │ ✓ │ ✓ │
# │ Duplicates │ ✓ │ ✓ │ keys ✗ │ ✗ │
# │ Index access │ ✓ │ ✓ │ by key │ ✗ │
# │ Hashable │ ✗ │ ✓ │ keys ✓ │ items ✓ │
# │ Use case │ general │ fixed │ lookup │ unique │
# │ │ storage │ data │ mapping │ fast test │
# └──────────────┴─────────┴───────────┴─────────┴───────────┘
A class = blueprint. An object = instance created from that blueprint.
class Dog:
# Class attribute — shared by ALL instances
species = "Canis familiaris"
def __init__(self, name, age):
# Instance attributes — unique to each dog
self.name = name
self.age = age
def bark(self):
return f"{self.name} says Woof!"
def human_years(self):
return self.age * 7
rex = Dog("Rex", 5)
buddy = Dog("Buddy", 3)
print(rex.bark()) # Rex says Woof!
print(buddy.human_years()) # 21
print(rex.species) # Canis familiaris (shared)
Three ways to define methods — instance methods get self, class methods get cls, static methods get nothing. Each serves a different purpose.
selfThe default. Has access to the instance (self) and its data. Can also access class attributes through self.
class User:
platform = "Web" # class attribute
def __init__(self, name):
self.name = name # instance attribute
def greet(self): # instance method
return f"Hi, I'm {self.name} on {self.platform}"
u = User("Yatin")
u.greet() # "Hi, I'm Yatin on Web"
# What actually happens when you call u.greet():
# Python converts it to → User.greet(u)
# That's why self is the first parameter!
clsGets the class itself (not an instance). Can't access instance data (self). Mainly used as factory methods — alternative ways to create objects.
class User:
def __init__(self, name, age):
self.name = name
self.age = age
@classmethod
def from_string(cls, data_str):
# cls IS the User class — same as calling User(...)
name, age = data_str.split("-")
return cls(name, int(age)) # creates a new User
@classmethod
def from_dict(cls, data):
return cls(data["name"], data["age"])
# Multiple ways to create a User
u1 = User("Yatin", 25) # normal
u2 = User.from_string("Alice-30") # from string
u3 = User.from_dict({"name": "Bob", "age": 20}) # from dict
The core idea: your __init__ takes clean, final values. But real data comes in messy formats — JSON strings, CSV rows, env variables, database rows. Class methods handle the parsing and conversion, then call cls() to create the object. Same object, different input sources.
1. Python's own datetime uses classmethods! — You've already used these. They're all factory methods that create a datetime object from different inputs.
from datetime import datetime
now = datetime.now() # from system clock
today = datetime.today() # from today's date
d = datetime.fromtimestamp(1679500000) # from unix timestamp
# All return a datetime object — just different ways to create one
2. Config loader — Your app needs config settings, but they come from different places depending on the environment. __init__ just stores the settings dict. Each classmethod knows how to read from one source.
class Config:
def __init__(self, settings):
self.settings = settings # just stores clean data
@classmethod
def from_json(cls, filepath): # reads a JSON file, parses it
with open(filepath) as f:
return cls(json.load(f)) # cls() calls __init__ with parsed dict
@classmethod
def from_env(cls): # reads from environment variables
return cls({
"debug": os.getenv("DEBUG", "false"),
"db_url": os.getenv("DATABASE_URL"),
})
@classmethod
def default(cls): # hardcoded defaults for dev
return cls({"debug": "false", "db_url": "sqlite:///app.db"})
# Pick whichever fits the situation
config = Config.from_json("config.json") # production — read from file
config = Config.from_env() # docker/cloud — read from env vars
config = Config.default() # development — use defaults
3. User from different data sources — APIs send JSON, files have CSV, databases return tuples. Each classmethod knows how to parse one format and extract name, email, age for __init__.
class User:
def __init__(self, name, email, age):
self.name = name # __init__ only cares about
self.email = email # clean, final values
self.age = age
@classmethod
def from_json(cls, json_str): # API sends '{"name": "...", ...}'
data = json.loads(json_str) # parse JSON string → dict
return cls(data["name"], data["email"], data["age"])
@classmethod
def from_csv_row(cls, row): # CSV file has "Yatin,y@dev.com,25"
name, email, age = row.split(",")
return cls(name, email, int(age)) # convert age string → int
@classmethod
def from_db_row(cls, row): # DB returns (id, name, email, age)
return cls(row[1], row[2], row[3]) # skip id at index 0
# All create the same User object — parsing logic stays in the classmethod
# __init__ stays clean and simple
cls Instead of Just Writing the Class Name?Because cls is dynamic — it becomes whatever class called the method. Hardcoding the class name breaks inheritance.
# ❌ Hardcoded class name — breaks for subclasses
class Animal:
def __init__(self, name):
self.name = name
@classmethod
def create(cls, name):
return Animal(name) # hardcoded!
class Dog(Animal):
def bark(self): return "Woof!"
d = Dog.create("Buddy")
type(d) # <class 'Animal'> — NOT a Dog!
d.bark() # AttributeError! Animal has no bark()
# ✓ Using cls — works for ALL subclasses
class Animal:
def __init__(self, name):
self.name = name
@classmethod
def create(cls, name):
return cls(name) # cls = whatever class calls it
class Dog(Animal):
def bark(self): return "Woof!"
d = Dog.create("Buddy") # cls = Dog → Dog("Buddy")
type(d) # <class 'Dog'> ✓
d.bark() # "Woof!" ✓
a = Animal.create("Cat") # cls = Animal → Animal("Cat")
type(a) # <class 'Animal'> ✓
self vs cls — What's the Difference?self is a specific object (instance). cls is the class itself (the blueprint). They operate at different levels.
class Car:
total_cars = 0 # class-level data (shared by all)
def __init__(self, brand):
self.brand = brand # instance-level data (unique to each)
Car.total_cars += 1
# Instance method — works with ONE specific car
def describe(self):
return f"I am a {self.brand}"
# self = the specific car object
# self.brand = THIS car's brand
# Class method — works with the Car CLASS itself
@classmethod
def get_count(cls):
return f"{cls.total_cars} {cls.__name__}s made"
# cls = the Car class (not any specific car)
# cls.total_cars = class-level data
@classmethod
def create_default(cls):
return cls("Unknown") # cls() creates a new instance
# self CAN'T do this — self is already an instance
c1 = Car("BMW")
c2 = Car("Tesla")
c1.describe() # "I am a BMW" — self = c1
c2.describe() # "I am a Tesla" — self = c2
Car.get_count() # "2 Cars made" — cls = Car
# ┌──────────────┬─────────────────────┬────────────────────────┐
# │ │ self │ cls │
# ├──────────────┼─────────────────────┼────────────────────────┤
# │ What is it? │ ONE specific object │ The class (blueprint) │
# │ Example │ self = my BMW │ cls = Car │
# │ Access │ self.brand (unique) │ cls.total_cars (shared)│
# │ Can create? │ No (already exists) │ Yes — cls() makes new │
# │ Used in │ instance methods │ @classmethod │
# └──────────────┴─────────────────────┴────────────────────────┘
self, no clsGets nothing — can't access instance or class data. It's just a regular function that lives inside the class for organization.
class MathUtils:
@staticmethod
def add(a, b):
return a + b
@staticmethod
def is_even(n):
return n % 2 == 0
# Call without creating an instance
MathUtils.add(3, 4) # 7
MathUtils.is_even(5) # False
# Also works on instances (but no point)
m = MathUtils()
m.add(3, 4) # 7 — same thing
class Pizza:
base_price = 10
def __init__(self, toppings):
self.toppings = toppings
# Instance method — needs self (instance data)
def price(self):
return self.base_price + len(self.toppings) * 2
# Class method — factory, alternative constructor
@classmethod
def margherita(cls):
return cls(["mozzarella", "tomato"])
# Static method — utility, no access to self or cls
@staticmethod
def is_valid_topping(topping):
return topping in ["cheese", "tomato", "mushroom"]
p = Pizza.margherita() # classmethod — factory
print(p.price()) # 14 — instance method
print(Pizza.is_valid_topping("cheese")) # True — static method
# ┌────────────────┬───────────────┬────────────────┬───────────────┐
# │ │ Instance │ @classmethod │ @staticmethod │
# ├────────────────┼───────────────┼────────────────┼───────────────┤
# │ First param │ self │ cls │ nothing │
# │ Access instance│ ✓ │ ✗ │ ✗ │
# │ Access class │ ✓ (via self) │ ✓ (via cls) │ ✗ │
# │ Called on │ instance │ class or inst │ class or inst │
# │ Use case │ work with │ factory / │ utility │
# │ │ object data │ alt constructor│ function │
# └────────────────┴───────────────┴────────────────┴───────────────┘
__init__ is the initializer (NOT the constructor). It's called automatically after the object is created. Its job: set up initial state.
# WITHOUT __init__ — blank, fragile objects
class Dog:
pass
d = Dog()
d.name = "Rex" # manually set every time
d.age = 5 # easy to forget one → bugs
# WITH __init__ — guaranteed setup
class Dog:
def __init__(self, name, age):
self.name = name
self.age = age
d = Dog("Rex", 5) # one line, always has both attributes
self is a reference to the specific instance calling the method. When you call rex.bark(), Python actually does Dog.bark(rex). So self IS rex.
class Demo:
def __init__(self, name):
# self.name = the OBJECT's attribute
# name = the PARAMETER passed in
self.name = name
print(f"I am object at {id(self)}")
d = Demo("test")
print(f"d is at {id(d)}")
# Both print the SAME address — self IS the object
__new__ creates the object (allocates memory), then __init__ sets it up. Most of the time you only need __init__.
class Dog:
def __new__(cls, *args, **kwargs):
print("1. __new__: CREATING object (allocating memory)")
instance = super().__new__(cls)
return instance
def __init__(self, name):
print("2. __init__: INITIALIZING object (setting up data)")
self.name = name
d = Dog("Rex")
# 1. __new__: CREATING object (allocating memory)
# 2. __init__: INITIALIZING object (setting up data)
# So Dog("Rex") actually does:
# 1. obj = Dog.__new__(Dog, "Rex") ← blank object
# 2. Dog.__init__(obj, "Rex") ← set attributes
# 3. return obj
How __init__ is used in production — validating inputs, storing config, and setting up initial state.
class DatabaseConnection:
def __init__(self, host, port, db, timeout=30):
# Validate
if not host:
raise ValueError("Host required")
# Store config
self.host = host
self.port = port
self.db = db
self.timeout = timeout
# Set up initial state
self.is_connected = False
self._connection = None # _ = private by convention
self.query_count = 0
def connect(self):
print(f"Connecting to {self.host}:{self.port}/{self.db}")
self.is_connected = True
Double-underscore methods let your objects work with Python's operators and built-in functions.
class Vector:
def __init__(self, x, y):
self.x = x
self.y = y
def __repr__(self): # for developers (debugging)
return f"Vector({self.x}, {self.y})"
def __str__(self): # for users (print)
return f"({self.x}, {self.y})"
def __add__(self, other): # + operator
return Vector(self.x + other.x, self.y + other.y)
def __eq__(self, other): # == operator
return self.x == other.x and self.y == other.y
def __abs__(self): # abs() function
return (self.x**2 + self.y**2)**0.5
def __len__(self): # len() function
return int(abs(self))
def __getitem__(self, i): # v[0], v[1]
return (self.x, self.y)[i]
def __iter__(self): # for x in v, unpacking
yield self.x
yield self.y
def __call__(self, scalar): # v(3) — make it callable
return Vector(self.x * scalar, self.y * scalar)
v1 = Vector(3, 4)
v2 = Vector(1, 2)
print(v1) # (3, 4) __str__
print(repr(v1)) # Vector(3, 4) __repr__
print(v1 + v2) # (4, 6) __add__
print(abs(v1)) # 5.0 __abs__
print(v1[0]) # 3 __getitem__
print(v1(3)) # (9, 12) __call__
x, y = v1 # unpacking via __iter__
Quick lookup of all common dunder methods, grouped by what they do and what triggers them.
| Category | Methods | Triggered by |
|---|---|---|
| Lifecycle | __new__, __init__, __del__ | Object creation/destruction |
| String | __str__, __repr__, __format__ | str(), repr(), f-strings |
| Compare | __eq__, __lt__, __gt__, __le__, __ge__ | ==, <, > |
| Math | __add__, __sub__, __mul__, __truediv__ | + - * / |
| Container | __len__, __getitem__, __setitem__, __contains__ | len(), [], in |
| Iteration | __iter__, __next__ | for loops |
| Context | __enter__, __exit__ | with statement |
| Callable | __call__ | obj() |
| Hash | __hash__ | hash(), dict keys |
class Animal:
def __init__(self, name):
self.name = name
def speak(self):
raise NotImplementedError
def describe(self):
return f"I am {self.name}, an animal"
class Dog(Animal):
def speak(self):
return f"{self.name}: Woof!"
def describe(self):
return f"I am {self.name}, a dog"
class Cat(Animal):
def speak(self):
return f"{self.name}: Meow!"
# Polymorphism
for a in [Dog("Rex"), Cat("Whiskers")]:
print(a.speak())
# ── super() ──
# Animal (grandparent) → Dog (parent) → GuideDog (child)
class GuideDog(Dog):
def __init__(self, name, owner):
super().__init__(name) # call parent (Dog) __init__
self.owner = owner
def speak(self):
# call parent (Dog) method
parent_sound = super().speak() # "Buddy: Woof!"
# super().describe() → calls Dog.describe() (parent, one step up MRO)
parent_desc = super().describe() # "I am Buddy, a dog"
# Animal.describe(self) → directly calls grandparent, skips MRO
grandparent_desc = Animal.describe(self) # "I am Buddy, an animal"
return f"{parent_sound} | {parent_desc} | {grandparent_desc}"
def info(self):
# call grandparent (Animal) attribute — self.name set by Animal.__init__
# works because: GuideDog.__init__ → super (Dog) → Dog inherits Animal.__init__ → sets self.name
return f"Guide dog {self.name} belongs to {self.owner}"
g = GuideDog("Buddy", "Alice")
print(g.speak()) # "Buddy: Woof! | I am Buddy, a dog | I am Buddy, an animal"
print(g.info()) # "Guide dog Buddy belongs to Alice"
# To explicitly call grandparent skipping parent:
# Animal.speak(self) — works but breaks MRO, avoid this
# super() always follows MRO chain: GuideDog → Dog → Animal → object
# ── MRO (Method Resolution Order) ──
# MRO is the order Python searches for a method when you call it.
# When you call obj.method(), Python checks classes in MRO order
# and uses the FIRST match it finds. super() follows this same order.
#
# Single inheritance: simple chain
# GuideDog → Dog → Animal → object
# g.speak() → checks GuideDog first ✓ found, stop
# g.name → GuideDog? ✗ → Dog? ✗ → Animal.__init__ set it ✓
#
# Multiple inheritance: diamond problem
# Python uses C3 linearization to decide the order.
# Rules: 1) child before parent 2) left parent before right parent
class A:
def greet(self): return "A"
class B(A):
def greet(self): return "B"
class C(A):
def greet(self): return "C"
class D(B, C): # Diamond! D inherits B and C, both inherit A
pass
# A MRO: D → B → C → A → object
# / \ D.greet() → checks D? ✗ → B? ✓ "B" (stop)
# B C super() in B.greet() would go to C (not A!)
# \ / because MRO says C comes next after B
# D
print(D().greet()) # "B" — first match in MRO
print(D.__mro__) # D → B → C → A → object
# isinstance / issubclass
print(isinstance(Dog("Rex"), Animal)) # True
print(issubclass(Dog, Animal)) # True
Use @property to control attribute access — add validation, computed values, or read-only fields without changing the API.
Why do we need it? Without @property, anyone can set any value directly — even invalid ones like t.celsius = -500. You could write getter/setter methods like get_celsius() / set_celsius(), but then every user of your class has to change from t.celsius to t.get_celsius(). @property lets you add validation/logic behind the scenes while keeping the clean t.celsius syntax.
# ❌ Without @property — no control over what gets set
class Temperature:
def __init__(self, celsius):
self.celsius = celsius
t = Temperature(100)
t.celsius = -500 # no error! but -500°C is physically impossible
print(t.celsius) # -500 — bad data, no validation
# ✅ With @property — setter runs validation automatically
class Temperature:
def __init__(self, celsius):
self._celsius = celsius # _celsius = private storage (convention)
@property # GETTER: runs when you READ t.celsius
def celsius(self):
return self._celsius
@celsius.setter # SETTER: runs when you WRITE t.celsius = value
def celsius(self, value):
if value < -273.15:
raise ValueError("Below absolute zero!")
self._celsius = value
@property # read-only — no setter, so can't set t.fahrenheit = x
def fahrenheit(self):
return self._celsius * 9/5 + 32 # computed on the fly from _celsius
t = Temperature(100)
print(t.celsius) # 100 — calls @property getter
print(t.fahrenheit) # 212.0 — computed, read-only
t.celsius = 0 # calls @celsius.setter → validates → sets _celsius = 0
print(t.fahrenheit) # 32.0 — auto-recomputed
t.celsius = -300 # ValueError: Below absolute zero!
t.fahrenheit = 50 # AttributeError: can't set — no setter defined
# The magic: caller uses simple t.celsius syntax
# but behind the scenes Python calls your getter/setter methods
# t.celsius → Temperature.celsius.fget(t) (getter)
# t.celsius = 0 → Temperature.celsius.fset(t, 0) (setter)
Python has no true private variables like Java/C++. Everything is accessible. But there are two conventions:
class BankAccount:
def __init__(self, balance):
self.name = "Savings" # public — anyone can read/write
self._balance = balance # _ = "protected" — convention: don't touch from outside
self.__pin = 1234 # __ = "private" — name mangling (Python renames it)
acc = BankAccount(1000)
# public — works fine
print(acc.name) # "Savings"
# _ single underscore — works but "please don't"
print(acc._balance) # 1000 — still accessible! just a convention
# __ double underscore — name mangling
print(acc.__pin) # AttributeError: no attribute '__pin'
print(acc._BankAccount__pin) # 1234 — Python renamed it to _ClassName__var
# so it's STILL accessible, just harder to find
# Summary:
# name → public anyone can use
# _name → protected "don't touch" convention, still accessible
# __name → name-mangled Python renames to _ClassName__name, still accessible
#
# Bottom line: Python trusts developers — "we're all adults here"
# Use _ for internal attrs, __ only to avoid name clashes in subclasses
# ── try / except / else / finally ──
try:
result = 10 / 0
except ZeroDivisionError as e:
print(f"Error: {e}")
except (TypeError, ValueError):
print("Type or value error")
except Exception as e: # catch-all (use sparingly)
print(f"Unknown: {e}")
else: # runs ONLY if no exception
print("Success!")
finally: # runs ALWAYS
print("Cleanup")
# ── raise vs throw ──
# "raise" IS Python's "throw" — same concept, different keyword
# Python: raise | Java/C#/JS/C++: throw
#
# What raise does:
# 1. Creates an exception object
# 2. Immediately stops the current function
# 3. Walks UP the call stack looking for a matching except
# 4. If nothing catches it → program crashes with traceback
# ── Raising ──
def validate_age(age):
if age < 0:
raise ValueError(f"Age can't be negative: {age}")
# raise separates DETECTING an error from HANDLING it
def divide(a, b):
if b == 0:
raise ZeroDivisionError("can't divide by zero") # stops HERE
return a / b # never reached if b == 0
# Caller decides what to do with the error
try:
divide(10, 0)
except ZeroDivisionError as e:
print(e) # can't divide by zero
# Three forms of raise:
raise ValueError("msg") # raise a new exception
raise # re-raise current exception (inside except)
raise TypeError("x") from err # chain: new exception caused by original
# ── Custom exceptions ──
class InsufficientFunds(Exception):
def __init__(self, balance, amount):
self.balance = balance
self.amount = amount
super().__init__(f"Need {amount}, have {balance}")
class BankAccount:
def __init__(self, balance):
self.balance = balance
def withdraw(self, amount):
if amount > self.balance:
raise InsufficientFunds(self.balance, amount)
self.balance -= amount
# ── Using the custom exception ──
acct = BankAccount(100)
try:
acct.withdraw(250)
except InsufficientFunds as e:
print(e) # Need 250, have 100
print(e.balance) # 100 ← access custom attrs
print(e.amount) # 250
# ── Exception hierarchies for your app ──
class AppError(Exception):
"""Base for all app errors — catch this to handle any app error."""
class NotFoundError(AppError):
pass
class PermissionError(AppError):
pass
try:
raise NotFoundError("User 42 missing")
except AppError as e: # catches NotFoundError AND PermissionError
print(e)
# ── Re-raising & chaining ──
try:
int("abc")
except ValueError as e:
raise # re-raise same exception
try:
int("abc")
except ValueError as original:
raise RuntimeError("parse failed") from original # chain: keeps both tracebacks
# ── Practical pattern: retry with fallback ──
for attempt in range(3):
try:
result = risky_operation()
break # success — exit loop
except ConnectionError:
if attempt == 2:
raise # last attempt — give up
print(f"Retry {attempt + 1}...")
# BaseException
# ├── SystemExit, KeyboardInterrupt
# └── Exception ← catch THIS, not BaseException
# ├── ValueError
# ├── TypeError
# ├── KeyError
# ├── IndexError
# ├── FileNotFoundError
# ├── ZeroDivisionError
# ├── AttributeError
# └── StopIteration
# ALWAYS use `with` — auto-closes, even on exception
with open("data.txt", "w") as f:
f.write("Hello\nWorld\n")
with open("data.txt") as f:
content = f.read() # entire file
with open("data.txt") as f:
for line in f: # line by line (memory efficient)
print(line.strip())
# Modes: "r" read, "w" write (overwrite!), "a" append
# "x" create (fail if exists), "b" binary, "+" read+write
# ── Production: use pathlib (modern, cross-platform) ──
from pathlib import Path
config_dir = Path("config")
config_dir.mkdir(parents=True, exist_ok=True) # create dirs safely
file = config_dir / "app.txt" # build paths with / (not string concat!)
file.write_text("hello") # write + auto-close
content = file.read_text() # read + auto-close
file.exists() # True
file.name # "app.txt"
file.suffix # ".txt"
file.stem # "app"
file.parent # Path("config")
file.resolve() # absolute path
# Iterate files in a directory
for p in Path(".").glob("*.py"): # all .py in current dir
print(p.name)
for p in Path(".").rglob("*.py"): # recursive — all subdirs too
print(p)
# ── Production: handle errors gracefully ──
from pathlib import Path
def read_config(path: str) -> dict:
p = Path(path)
if not p.exists():
raise FileNotFoundError(f"Config missing: {p}")
return json.loads(p.read_text())
# Or catch errors at call site
try:
data = Path("settings.json").read_text()
except FileNotFoundError:
data = "{}" # fallback to empty config
except PermissionError:
print("No read access!")
# ── Production: safe writes (atomic — no half-written files) ──
import tempfile, os
def safe_write(path: str, content: str):
"""Write to temp file first, then rename — atomic on most OS."""
p = Path(path)
tmp = p.with_suffix(".tmp")
tmp.write_text(content)
tmp.rename(p) # atomic replace — no corruption
# ── Production: encoding matters ──
# ALWAYS specify encoding — default varies by OS!
with open("data.txt", "w", encoding="utf-8") as f:
f.write("café ☕")
Path("data.txt").read_text(encoding="utf-8")
# ── JSON ──
import json
with open("data.json", "w") as f:
json.dump({"name": "Yatin"}, f, indent=2)
with open("data.json") as f:
data = json.load(f)
# Production: validate JSON & handle bad data
def load_json_safe(path: str, default=None) -> dict:
try:
return json.loads(Path(path).read_text(encoding="utf-8"))
except (FileNotFoundError, json.JSONDecodeError):
return default or {}
# json.dumps / json.loads — work with strings (no file)
text = json.dumps({"age": 25}) # dict → string
obj = json.loads(text) # string → dict
# ── CSV ──
import csv
# Reading
with open("data.csv", encoding="utf-8") as f:
for row in csv.DictReader(f): # each row is a dict
print(row["name"])
# Writing
users = [{"name": "Yatin", "age": 25}, {"name": "Bob", "age": 30}]
with open("users.csv", "w", newline="", encoding="utf-8") as f:
writer = csv.DictWriter(f, fieldnames=["name", "age"])
writer.writeheader() # name,age
writer.writerows(users) # write all rows at once
# ── Large files: stream instead of loading all into memory ──
def count_errors(log_path: str) -> int:
"""Process a 10GB log file without using 10GB of RAM."""
errors = 0
with open(log_path, encoding="utf-8") as f:
for line in f: # yields one line at a time
if "ERROR" in line:
errors += 1
return errors
A module is just a .py file. import executes it and gives you access to its contents.
# ── math_utils.py ──
PI = 3.14159
def circle_area(r): return PI * r ** 2
# ── main.py ──
import math_utils # whole module
from math_utils import circle_area # specific function
from math_utils import circle_area as ca # with alias
Guards code so it only runs when the file is executed directly, not when it's imported as a module.
# When Python runs a file directly: __name__ = "__main__"
# When it's imported: __name__ = "module_name"
def circle_area(r):
return 3.14 * r ** 2
if __name__ == "__main__":
# Only runs when executed directly, NOT when imported
print(circle_area(5))
print("Tests passed!")
Directories become importable packages with __init__.py — controls what's publicly accessible.
# ── models/__init__.py ──
from .user import User
from .product import Product
# Now users can do:
from models import User, Product # clean!
# Instead of:
from models.user import User # verbose
# ── WHY __init__.py in every folder? ──
# Problem: Python sees folders as just folders, not packages.
# Without __init__.py, `import models` fails — Python doesn't
# know this folder contains importable code.
# __init__.py tells Python: "This folder is a package."
# Think of it like an index page for a chapter in a book.
# ── What happens when you import a package (full breakdown) ──
#
# Given this structure:
# models/
# ├── __init__.py → from .user import User
# └── user.py → class User: ...
#
# When you write: `from models import User`
#
# Step 1: FIND the package
# Python searches sys.path (list of directories) for "models"
# sys.path includes: current dir, installed packages, stdlib
# It finds models/ folder and checks for __init__.py → found!
#
# Step 2: EXECUTE __init__.py (this is the key!)
# Python runs models/__init__.py top to bottom, like any script.
# That file says: `from .user import User`
# → Python now runs models/user.py
# → class User is created and bound to the name "User"
# inside the models package namespace
#
# Step 3: BIND the name
# `from models import User` grabs "User" from the models
# namespace and binds it in YOUR file's namespace.
# Now you can use User directly.
#
# Step 4: CACHE it
# Python stores the module in sys.modules["models"].
# Next time ANYONE imports models, __init__.py does NOT
# run again — Python reuses the cached version.
# (This is why init code only runs ONCE.)
# You can see this yourself:
import sys
# Before import — "models" not in cache
print("models" in sys.modules) # False
from models import User # triggers Step 1-4
# After import — cached!
print("models" in sys.modules) # True
print(sys.modules["models"]) # <module 'models' from 'models/__init__.py'>
# See where Python searches for packages:
for p in sys.path:
print(p)
# /Users/you/project ← your current directory (first!)
# /usr/lib/python3.12 ← stdlib
# /usr/lib/python3.12/site-packages ← pip installed
# ── Different import styles — same mechanism ──
import models # runs __init__.py → models.User
from models import User # runs __init__.py → User directly
from models.user import User # runs __init__.py AND user.py → User
import models.user # runs __init__.py AND user.py → models.user.User
# KEY INSIGHT: __init__.py ALWAYS runs, no matter which style!
# Even `from models.user import User` triggers __init__.py first.
# ── 4 things __init__.py does ──
# 1. MARKS the directory as a package (can be empty!)
# utils/__init__.py → even an empty file works
# 2. CONTROLS the public API — hide internals, expose clean imports
# ── models/user.py ──
class User:
def __init__(self, name):
self.name = name
# ── models/__init__.py ──
from .user import User
from .product import Product
# Without __init__.py re-exports:
from models.user import User # ugly — exposes internal structure
from models.product import Product
# With __init__.py re-exports:
from models import User, Product # clean! users don't know about user.py
# 3. RUNS INIT CODE — setup that should happen once on import
#
# __init__.py runs ONCE when the package is first imported.
# Perfect for one-time setup. Think of it as the "boot up" for your package.
# Problem: Every file creates its own DB connection = wasteful
#
# routes/auth.py
db = Database("sqlite:///app.db") # connection #1
# routes/api.py
db = Database("sqlite:///app.db") # connection #2 ← duplicate!
# routes/admin.py
db = Database("sqlite:///app.db") # connection #3 ← duplicate!
# Solution: create it ONCE in __init__.py
#
# ── db/__init__.py ──
from .connection import Database
default_db = Database("sqlite:///app.db") # created ONCE on first import
# ── routes/auth.py ──
from db import default_db # reuses same connection
# ── routes/api.py ──
from db import default_db # same connection — not recreated!
# ── routes/admin.py ──
from db import default_db # same connection — shared across app
# WHY? Python caches modules after first import.
# __init__.py runs once → default_db created once → everyone shares it.
# ── myapp/__init__.py ──
import logging
# Runs ONCE when anyone does `import myapp` or `from myapp import ...`
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger("myapp")
logger.info("App package loaded!") # prints once on startup
# ── myapp/routes/auth.py ──
import logging
logger = logging.getLogger("myapp.auth") # inherits config from above!
logger.info("Auth route hit") # formatted the same way
# ── plugins/__init__.py ──
from .email_plugin import EmailPlugin
from .sms_plugin import SMSPlugin
# Auto-register all plugins into a registry
registry = {}
for plugin in [EmailPlugin, SMSPlugin]:
registry[plugin.name] = plugin # built once at import time
# ── anywhere else ──
from plugins import registry
registry["email"].send("hello") # use directly — no setup needed
# ── mylib/__init__.py ──
__version__ = "2.1.0"
__author__ = "Yatin"
# Now users can check:
import mylib
print(mylib.__version__) # "2.1.0"
# This is how real packages do it:
import flask
print(flask.__version__) # "3.0.0"
import requests
print(requests.__version__) # "2.31.0"
# ── payments/__init__.py ──
import os
# Fail FAST — crash at import time, not at runtime 10 minutes later
API_KEY = os.environ.get("STRIPE_API_KEY")
if not API_KEY:
raise RuntimeError(
"STRIPE_API_KEY not set! Add it to .env"
)
# If we get here, key exists — safe to use in any file
# ── payments/charge.py ──
from . import API_KEY # guaranteed to exist
# WHY in __init__.py?
# Without: app starts → runs for 10 min → user pays → CRASH (no API key)
# With: app starts → CRASH immediately → you fix it before deploying
# 4. CONTROLS __all__ — what `from package import *` includes
#
# When someone writes: `from models import *`
# Python asks: "what is EVERYTHING in models?"
# __all__ is the answer — a list of names to export.
# ── models/__init__.py ──
from .user import User
from .product import Product
from ._internal import _helper
__all__ = ["User", "Product"] # ← whitelist for * imports
# ── main.py ──
from models import * # imports User and Product ONLY
print(User) # ✅ works
print(Product) # ✅ works
print(_helper) # ❌ NameError — not in __all__!
# But explicit imports ALWAYS work, regardless of __all__:
from models import _helper # ✅ works — __all__ only restricts *
# ── What happens WITHOUT __all__? ──
#
# `from models import *` exports EVERYTHING in __init__.py's namespace.
# That includes User, Product, _helper, AND any imports like:
# os, sys, json, random internal functions...
# Your namespace gets polluted with stuff you didn't want.
#
# __all__ = ["User", "Product"] says:
# "Only these two are the PUBLIC API. Everything else is internal."
# ── Real example: a utils package ──
# utils/__init__.py
import os # needed internally
import hashlib # needed internally
from .security import hash_password
from .security import verify_password
from ._cache import _build_cache # internal helper
__all__ = ["hash_password", "verify_password"]
# Without __all__: `from utils import *` gives you:
# hash_password, verify_password, _build_cache, os, hashlib ← messy!
#
# With __all__: `from utils import *` gives you:
# hash_password, verify_password ← clean! only what you intended
# ── __all__ also works in regular .py files, not just __init__.py ──
# helpers.py
__all__ = ["format_date"]
def format_date(d): # public
return _pad(d.day)
def _pad(n): # internal — not in __all__
return str(n).zfill(2)
# from helpers import * → gets format_date only, not _pad
# ── "But it works without __init__.py!" — yes, partly. ──
#
# Structure WITHOUT __init__.py:
# models/
# ├── user.py
# └── product.py
#
# What WORKS without it (Python 3.3+):
from models.user import User # ✅ works — full path to file
from models.product import Product # ✅ works
# What BREAKS without it (and WHY):
# ─── BREAK 1: `from models import User` → ImportError ───
#
# Think of it this way:
# `from models import User` means "go to the models PACKAGE, find User"
# But WHERE in models? Python checks __init__.py for that answer.
# No __init__.py = Python has no idea User exists inside models/.
#
# It's like asking a receptionist for "John" but there's no receptionist.
# The building has John inside room 204 (user.py), but nobody at the
# front desk to point you there.
#
from models import User # ❌ ImportError: cannot import name 'User'
#
# FIX with __init__.py:
# models/__init__.py → from .user import User
# Now Python knows: "User? Yeah, it's in user.py, here you go."
# ─── BREAK 2: `import models` → empty, useless module ───
#
# Without __init__.py, `import models` gives you a namespace package
# that's basically an empty shell. Nothing is loaded.
#
import models
print(dir(models)) # ['__loader__', '__name__', ...] ← no User!
models.User # ❌ AttributeError: no attribute 'User'
models.user # ❌ AttributeError: no attribute 'user'
#
# Python found the folder, but didn't load ANY .py files inside it.
# Just because files exist in a folder doesn't mean Python reads them.
# __init__.py is the instruction sheet: "when someone imports me,
# load these things and make them available."
#
# FIX with __init__.py:
# models/__init__.py → from .user import User
# Now: models.User works!
# ─── BREAK 3: relative imports fail entirely ───
#
# Inside models/user.py, you want to import from product.py:
#
# models/user.py
from .product import Product # ❌ ImportError: no parent package
#
# The dot (.) means "from my parent package".
# But without __init__.py, Python doesn't recognize models/ as a
# real package — so there IS no "parent package" to be relative to.
#
# It's like saying "go to the room next door" when you're standing
# outside — there's no building (package) you're inside of.
#
# Without __init__.py, your only option is:
from models.product import Product # absolute import — works but fragile
#
# FIX: add __init__.py (even empty!) and relative imports work.
# ── So WHY bother with __init__.py if direct imports work? ──
#
# 1. CLEAN IMPORTS — your users write less, know less about internals
#
# Without: from models.user import User ← must know file name
# from models.product import Product
# from models.validators import validate ← if you move validate
# to another file,
# ALL imports break!
#
# With: from models import User, Product, validate ← clean, stable
# # You can move validate from validators.py to utils.py
# # and NOTHING breaks — __init__.py absorbs the change.
#
# 2. RELATIVE IMPORTS — files within package can import each other
#
# # models/user.py
# from .product import Product ← needs __init__.py!
# from .validators import validate ← needs __init__.py!
#
# 3. TOOLS BREAK — pytest, mypy, Flask, Django all expect it
#
# pytest models/ ← may not discover tests without __init__.py
# mypy models/ ← may skip type checking
# pip install . ← your package won't include the folder
#
# 4. REFACTORING IS SAFE — move files around, __init__.py hides the change
#
# # You split user.py into user_model.py + user_schema.py
# # Update __init__.py once:
# from .user_model import User
# from .user_schema import UserSchema
# # Every file that does `from models import User` still works!
#
# BOTTOM LINE:
# Without __init__.py = it works, but fragile — every import is a
# hard-coded path to a specific file. Rename a file → imports break.
# With __init__.py = stable public API — internal file structure can
# change freely. This is why every serious project uses them.
# ── Real project example ──
# myapp/
# ├── __init__.py ← "from myapp import create_app"
# ├── models/
# │ ├── __init__.py ← "from myapp.models import User"
# │ ├── user.py
# │ └── product.py
# ├── routes/
# │ ├── __init__.py ← "from myapp.routes import auth_bp"
# │ ├── auth.py
# │ └── api.py
# └── utils/
# ├── __init__.py ← "from myapp.utils import hash_password"
# └── security.py
#
# Every __init__.py re-exports just the public stuff.
# Users of your package never need to know your file structure.
# ── List ──
squares = [x**2 for x in range(10)]
evens = [x for x in range(20) if x % 2 == 0]
labels = ["even" if x%2==0 else "odd" for x in range(5)]
# Nested — flatten matrix
matrix = [[1,2], [3,4], [5,6]]
flat = [n for row in matrix for n in row]
# [1, 2, 3, 4, 5, 6]
# ── Dict ──
word_lens = {w: len(w) for w in ["hello", "world"]}
swapped = {v: k for k, v in {"a": 1}.items()}
# ── Set ──
unique_lens = {len(w) for w in ["hi", "hey", "hello"]} # {2, 3, 5}
# ── Generator expression (lazy — no memory for full list) ──
total = sum(x**2 for x in range(1_000_000)) # memory efficient!
How for loops actually work under the hood — using __iter__() and __next__() to walk through items one by one.
# How `for x in something` ACTUALLY works:
nums = [1, 2, 3]
iterator = iter(nums) # calls __iter__()
print(next(iterator)) # 1 — calls __next__()
print(next(iterator)) # 2
print(next(iterator)) # 3
# next(iterator) # StopIteration!
yield pauses the function, returns a value, and resumes on next call.
def countdown(n):
while n > 0:
yield n # pause here, return n
n -= 1
for i in countdown(5):
print(i) # 5, 4, 3, 2, 1
# WHY? Memory efficiency.
# This yields billion values without storing them all:
def infinite():
n = 0
while True:
yield n
n += 1
# Real-world: process huge files with constant memory
def read_large_file(path):
with open(path) as f:
for line in f:
yield line.strip()
# ── Generator pipeline (data processing pattern) ──
def parse(lines):
for line in lines:
yield line.split(",")
def filter_valid(rows):
for row in rows:
if len(row) == 3:
yield row
# Chain them — nothing runs until you iterate!
pipeline = filter_valid(parse(read_large_file("data.csv")))
for row in pipeline:
print(row)
# ── yield from ──
def chain(*iterables):
for it in iterables:
yield from it
list(chain([1,2], [3,4])) # [1, 2, 3, 4]
A decorator takes a function, wraps it, and returns the wrapped version. @ is just syntactic sugar.
import time
from functools import wraps
# ── Basic decorator ──
def timer(func):
@wraps(func) # preserves original name/docstring
def wrapper(*args, **kwargs):
start = time.perf_counter()
result = func(*args, **kwargs)
print(f"{func.__name__} took {time.perf_counter()-start:.4f}s")
return result
return wrapper
@timer
def slow():
time.sleep(1)
slow() # slow took 1.0012s
# @timer on slow is the same as: slow = timer(slow)
# ── Decorator WITH arguments ──
def retry(max_attempts=3):
def decorator(func):
@wraps(func)
def wrapper(*args, **kwargs):
for attempt in range(1, max_attempts + 1):
try:
return func(*args, **kwargs)
except Exception as e:
print(f"Attempt {attempt} failed: {e}")
if attempt == max_attempts: raise
return wrapper
return decorator
@retry(max_attempts=5)
def flaky_api():
pass
# ── Built-in cache decorator ──
from functools import lru_cache
@lru_cache(maxsize=128)
def fib(n):
if n < 2: return n
return fib(n-1) + fib(n-2)
print(fib(100)) # instant!
The order Python searches for variable names — Local, Enclosing, Global, then Built-in.
# Python looks up variables in this order:
# L — Local (inside current function)
# E — Enclosing (in outer function, for nested)
# G — Global (module level)
# B — Built-in (print, len, etc.)
x = "global"
def outer():
x = "enclosing"
def inner():
x = "local"
print(x) # "local"
inner()
# global / nonlocal keywords
count = 0
def inc():
global count # access module-level count
count += 1
def outer():
x = 0
def inner():
nonlocal x # access enclosing scope's x
x += 1
inner()
print(x) # 1
A function that remembers variables from the scope where it was created, even after that scope is gone.
# A function that "closes over" variables from its enclosing scope
def make_multiplier(factor):
def multiply(x):
return x * factor # factor is remembered!
return multiply
double = make_multiplier(2)
triple = make_multiplier(3)
print(double(5)) # 10
print(triple(5)) # 15
# Real-world: config factories
def make_logger(prefix):
def log(msg):
print(f"[{prefix}] {msg}")
return log
error = make_logger("ERROR")
error("Something broke") # [ERROR] Something broke
# ── Class-based ──
class Timer:
def __enter__(self):
self.start = time.perf_counter()
return self
def __exit__(self, exc_type, exc_val, exc_tb):
print(f"Elapsed: {time.perf_counter()-self.start:.4f}s")
return False # don't suppress exceptions
with Timer():
time.sleep(1)
# ── Generator-based (simpler) ──
from contextlib import contextmanager
@contextmanager
def timer(label):
start = time.perf_counter()
try:
yield # `with` block runs here
finally:
print(f"{label}: {time.perf_counter()-start:.4f}s")
with timer("query"):
time.sleep(0.5)
# *args → extra POSITIONAL args as a tuple
# **kwargs → extra KEYWORD args as a dict
def add_all(*args):
return sum(args) # args is a tuple
print(add_all(1,2,3,4)) # 10
def profile(**kwargs):
return kwargs # kwargs is a dict
profile(name="Yatin", age=25) # {'name':'Yatin','age':25}
# Combining — ORDER MATTERS
def full(required, default="hi", *args, **kwargs):
print(required, default, args, kwargs)
# ── Unpacking ──
def add(a, b, c): return a + b + c
nums = [1, 2, 3]
print(add(*nums)) # 6 — unpack list
print(add(**{"a":1,"b":2,"c":3})) # 6 — unpack dict
# ── Keyword-only (after *) ──
def connect(host, port, *, timeout=30, ssl=True):
pass # timeout & ssl MUST be keyword arguments
# ── Positional-only (before /) — 3.8+ ──
def greet(name, /, greeting="Hello"):
pass # name MUST be positional
Type hints don't affect runtime. They exist for docs, IDE support, and tools like mypy.
def greet(name: str, times: int = 1) -> str:
return (name + "! ") * times
# Collections
def process(
items: list[int],
mapping: dict[str, float],
coords: tuple[float, float],
) -> None: ...
# Optional / Union (3.10+)
def find(id: int) -> str | None: ...
def parse(data: int | str) -> str: ...
# Callable
from typing import Callable
def apply(f: Callable[[int, int], int], a: int, b: int) -> int:
return f(a, b)
# TypeAlias
Vector = list[float]
Matrix = list[Vector]
Building classic data structures in Python teaches you OOP, references, recursion, and how the language actually works under the hood.
A chain of nodes where each one points to the next. Good for fast insertions/deletions, but slow random access.
class Node:
def __init__(self, data):
self.data = data
self.next = None # pointer to next node
class LinkedList:
def __init__(self):
self.head = None
def append(self, data):
new_node = Node(data)
if not self.head: # empty list
self.head = new_node
return
current = self.head
while current.next: # walk to the end
current = current.next
current.next = new_node # link it
def prepend(self, data):
new_node = Node(data)
new_node.next = self.head # point to old head
self.head = new_node # become the new head
def delete(self, data):
if not self.head:
return
if self.head.data == data:
self.head = self.head.next
return
current = self.head
while current.next:
if current.next.data == data:
current.next = current.next.next # skip over it
return
current = current.next
def __iter__(self): # make it iterable!
current = self.head
while current:
yield current.data
current = current.next
def __repr__(self):
return " → ".join(str(x) for x in self) + " → None"
# Usage
ll = LinkedList()
ll.append(1)
ll.append(2)
ll.append(3)
ll.prepend(0)
print(ll) # 0 → 1 → 2 → 3 → None
ll.delete(2)
print(ll) # 0 → 1 → 3 → None
# Iterate like any Python collection
for val in ll:
print(val) # 0, 1, 3
Last In, First Out — like a stack of plates. The last item you add is the first one you remove.
class Stack:
def __init__(self):
self._items = []
def push(self, item):
self._items.append(item)
def pop(self):
if self.is_empty():
raise IndexError("Stack is empty")
return self._items.pop()
def peek(self):
return self._items[-1]
def is_empty(self):
return len(self._items) == 0
def __len__(self):
return len(self._items)
def __repr__(self):
return f"Stack({self._items})"
s = Stack()
s.push(1); s.push(2); s.push(3)
print(s.pop()) # 3 (last in, first out)
print(s.peek()) # 2
First In, First Out — like a real queue. The first item added is the first one processed.
from collections import deque
class Queue:
def __init__(self):
self._items = deque() # deque is O(1) for both ends
def enqueue(self, item):
self._items.append(item)
def dequeue(self):
return self._items.popleft()
def __len__(self):
return len(self._items)
def __repr__(self):
return f"Queue({list(self._items)})"
q = Queue()
q.enqueue("first"); q.enqueue("second")
print(q.dequeue()) # "first" (first in, first out)
A hierarchical structure where each node has at most two children. A BST keeps values sorted for fast search.
class TreeNode:
def __init__(self, val):
self.val = val
self.left = None
self.right = None
class BinarySearchTree:
def __init__(self):
self.root = None
def insert(self, val):
if not self.root:
self.root = TreeNode(val)
return
self._insert(self.root, val)
def _insert(self, node, val):
if val < node.val:
if node.left:
self._insert(node.left, val)
else:
node.left = TreeNode(val)
else:
if node.right:
self._insert(node.right, val)
else:
node.right = TreeNode(val)
def search(self, val):
return self._search(self.root, val)
def _search(self, node, val):
if not node:
return False
if val == node.val:
return True
if val < node.val:
return self._search(node.left, val)
return self._search(node.right, val)
def inorder(self, node="DEFAULT"):
"""In-order traversal: left → root → right (sorted order!)"""
if node == "DEFAULT":
node = self.root
if not node:
return []
return self.inorder(node.left) + [node.val] + self.inorder(node.right)
# Usage
bst = BinarySearchTree()
for val in [5, 3, 7, 1, 4, 6, 8]:
bst.insert(val)
print(bst.inorder()) # [1, 3, 4, 5, 6, 7, 8] — sorted!
print(bst.search(4)) # True
print(bst.search(99)) # False
# The tree looks like:
# 5
# / \
# 3 7
# / \ / \
# 1 4 6 8
How Python's dict works under the hood — hashing keys to bucket indices for O(1) average lookup.
class HashMap:
"""Simplified version of how Python's dict works."""
def __init__(self, size=16):
self.size = size
self.buckets = [[] for _ in range(size)]
def _hash(self, key):
return hash(key) % self.size # map key to bucket index
def __setitem__(self, key, value): # hm[key] = value
idx = self._hash(key)
for i, (k, v) in enumerate(self.buckets[idx]):
if k == key:
self.buckets[idx][i] = (key, value)
return
self.buckets[idx].append((key, value))
def __getitem__(self, key): # hm[key]
idx = self._hash(key)
for k, v in self.buckets[idx]:
if k == key:
return v
raise KeyError(key)
hm = HashMap()
hm["name"] = "Yatin"
print(hm["name"]) # Yatin
# ── Threading — for I/O-bound ──
import threading, time
def download(url):
time.sleep(2) # simulate I/O
# Sequential: 6s | Threaded: ~2s
threads = [threading.Thread(target=download, args=(u,))
for u in ["u1", "u2", "u3"]]
for t in threads: t.start()
for t in threads: t.join()
# ── Multiprocessing — for CPU-bound (bypasses GIL) ──
from multiprocessing import Pool
def compute(n):
return sum(i**2 for i in range(n))
with Pool(4) as pool:
results = pool.map(compute, [10**6]*4)
# ── asyncio — modern I/O-bound ──
import asyncio
async def fetch(url):
await asyncio.sleep(2)
return f"Data from {url}"
async def main():
results = await asyncio.gather(
fetch("api/users"), fetch("api/posts")
)
asyncio.run(main()) # ~2s, not ~4s
| Approach | Best For | GIL? |
|---|---|---|
| Threading | I/O-bound | Limited |
| asyncio | Many I/O connections | Single thread |
| Multiprocessing | CPU-bound | Bypassed |
import sys
# Everything is heap-allocated
print(sys.getsizeof(0)) # 28 bytes — even an int!
print(sys.getsizeof("")) # 49 bytes — empty string!
print(sys.getsizeof([])) # 56 bytes — empty list!
# Reference counting — primary GC
a = [1, 2] # refcount = 1
b = a # refcount = 2
del b # refcount = 1
del a # refcount = 0 → freed immediately
# Circular refs → generational GC handles them
import gc
gc.collect()
# ── __slots__ — save memory ──
class Point:
__slots__ = ("x", "y") # no __dict__ per instance
def __init__(self, x, y):
self.x = x
self.y = y
# Regular class: ~152 bytes/instance
# With __slots__: ~56 bytes/instance
# 1M points = ~100MB saved!
A metaclass is the "class of a class." type is the default metaclass.
print(type(42)) # <class 'int'>
print(type(int)) # <class 'type'> — int is an instance of type!
print(type(type)) # <class 'type'> — type is its own metaclass
# Singleton metaclass
class SingletonMeta(type):
_instances = {}
def __call__(cls, *args, **kwargs):
if cls not in cls._instances:
cls._instances[cls] = super().__call__(*args, **kwargs)
return cls._instances[cls]
class Database(metaclass=SingletonMeta):
pass
print(Database() is Database()) # True — always same object
The low-level mechanism behind @property, @classmethod, and @staticmethod — controls how attribute access works.
# The mechanism behind @property, @classmethod, @staticmethod
class Positive:
def __set_name__(self, owner, name):
self.name = name
self.storage = f"_{name}"
def __get__(self, obj, objtype=None):
return getattr(obj, self.storage, None)
def __set__(self, obj, value):
if value <= 0:
raise ValueError(f"{self.name} must be positive")
setattr(obj, self.storage, value)
class Product:
price = Positive() # descriptor on the CLASS
quantity = Positive()
def __init__(self, name, price, qty):
self.name = name
self.price = price # triggers Positive.__set__
self.quantity = qty
# ── Strategy (first-class functions) ──
def sort_by_name(items):
return sorted(items, key=lambda x: x["name"])
def display(items, strategy):
return strategy(items)
# ── Observer ──
class EventEmitter:
def __init__(self):
self._listeners = {}
def on(self, event, cb):
self._listeners.setdefault(event, []).append(cb)
def emit(self, event, *args):
for cb in self._listeners.get(event, []):
cb(*args)
# ── Registry (common in ML frameworks) ──
REGISTRY = {}
def register(name):
def dec(cls):
REGISTRY[name] = cls
return cls
return dec
@register("linear")
class LinearModel: pass
model = REGISTRY["linear"]() # create by name
import pytest
def add(a, b): return a + b
# Test functions start with test_
def test_add():
assert add(2, 3) == 5
assert add(-1, 1) == 0
# Test exceptions
def test_divide_zero():
with pytest.raises(ZeroDivisionError):
1 / 0
# Fixtures — setup/teardown
@pytest.fixture
def sample_data():
data = {"users": ["Alice", "Bob"]}
yield data # test runs at yield point
# cleanup after
def test_users(sample_data):
assert len(sample_data["users"]) == 2
# Parametrize — run same test with different inputs
@pytest.mark.parametrize("a,b,expected", [
(1,2,3), (-1,1,0), (0,0,0),
])
def test_add_many(a, b, expected):
assert add(a, b) == expected
# Run: pytest test_file.py -v
from dataclasses import dataclass, field, asdict
# Auto-generates __init__, __repr__, __eq__
@dataclass
class Point:
x: float
y: float
z: float = 0.0
p = Point(1, 2)
print(p) # Point(x=1, y=2, z=0.0)
print(p == Point(1, 2)) # True
# frozen=True → immutable (hashable, can be dict keys)
@dataclass(frozen=True)
class Color:
r: int; g: int; b: int
# slots=True → memory efficient (3.10+)
@dataclass(slots=True)
class Particle:
x: float; y: float; mass: float
# Mutable defaults need field(default_factory=...)
@dataclass
class Config:
name: str
tags: list[str] = field(default_factory=list)
def __post_init__(self):
# Runs AFTER auto-generated __init__ — for validation
if not self.name:
raise ValueError("Name required")
print(asdict(Config("app", ["ml"])))
# {'name': 'app', 'tags': ['ml']}
from functools import reduce, partial
from itertools import chain, islice, groupby, product
# ── partial — lock in some arguments ──
def power(base, exp): return base ** exp
square = partial(power, exp=2)
print(square(5)) # 25
# ── itertools ──
list(chain([1,2], [3,4])) # [1,2,3,4] flatten
list(islice(infinite(), 5)) # [0,1,2,3,4] lazy slice
list(product([1,2], ["a","b"])) # all combinations
# reduce — accumulate
print(reduce(lambda a,b: a+b, [1,2,3,4])) # 10
NumPy is the foundation of ALL Python ML/data science. Every ML library uses NumPy arrays internally.
import numpy as np
# ── Creation ──
a = np.array([1, 2, 3])
b = np.zeros((3, 4)) # 3x4 zeros
c = np.ones((2, 3)) # 2x3 ones
d = np.arange(0, 10, 2) # [0, 2, 4, 6, 8]
e = np.linspace(0, 1, 5) # [0, 0.25, 0.5, 0.75, 1.0]
f = np.random.randn(3, 3) # 3x3 random normal
g = np.eye(3) # identity matrix
# ── Shape ──
print(b.shape) # (3, 4)
print(b.dtype) # float64
print(b.ndim) # 2
x = np.arange(12).reshape(3, 4) # reshape
# ── Vectorized ops (100x faster than Python loops) ──
a = np.array([1, 2, 3])
b = np.array([10, 20, 30])
print(a + b) # [11, 22, 33]
print(a * b) # [10, 40, 90]
print(a ** 2) # [1, 4, 9]
# ── Boolean indexing ──
data = np.array([10, 25, 3, 40])
print(data[data > 10]) # [25, 40]
# ── Broadcasting ──
matrix = np.ones((3, 3))
row = np.array([1, 2, 3])
print(matrix + row) # row is broadcast across all rows
# ── Linear algebra ──
A = np.array([[1,2],[3,4]])
B = np.array([[5,6],[7,8]])
print(A @ B) # matrix multiply
print(np.linalg.inv(A)) # inverse
print(np.linalg.det(A)) # determinant
# ── Aggregations ──
m = np.array([[1,2,3],[4,5,6]])
print(m.sum(axis=0)) # [5,7,9] per column
print(m.sum(axis=1)) # [6, 15] per row
print(m.mean(), m.std())
import pandas as pd
df = pd.DataFrame({
"name": ["Alice", "Bob", "Charlie"],
"age": [25, 30, 35],
"salary": [50000, 60000, 70000],
"dept": ["Eng", "Mkt", "Eng"]
})
# ── Explore ──
df.head() # first 5 rows
df.info() # types, non-null counts
df.describe() # statistics
# ── Select ──
df["name"] # one column (Series)
df[["name", "age"]] # multiple columns
df.loc[0] # row by label
df.iloc[0:2] # rows by position
# ── Filter (SQL WHERE) ──
df[df["age"] > 28]
df[(df["dept"] == "Eng") & (df["salary"] > 55000)]
# Use & not 'and', | not 'or'
# ── New columns ──
df["bonus"] = df["salary"] * 0.1
# ── GroupBy (SQL GROUP BY) ──
df.groupby("dept")["salary"].mean()
# ── Missing data ──
df.dropna() # drop rows with NaN
df.fillna(0) # replace NaN
df.isna().sum() # count NaNs per column
# ── Merge (SQL JOIN) ──
orders = pd.DataFrame({"uid": [1,2], "product": ["A","B"]})
users = pd.DataFrame({"uid": [1,2], "name": ["Alice","Bob"]})
merged = pd.merge(orders, users, on="uid")
# ── Read/Write ──
# pd.read_csv("data.csv")
# df.to_csv("out.csv", index=False)
This example uses almost every Python concept from this guide to build Linear Regression from scratch.
import numpy as np
import pandas as pd
from dataclasses import dataclass
from abc import ABC, abstractmethod
from functools import wraps
import time
# Decorator
def log_time(func):
@wraps(func)
def wrapper(*args, **kwargs):
start = time.perf_counter()
result = func(*args, **kwargs)
print(f"[{func.__name__}] {time.perf_counter()-start:.2f}s")
return result
return wrapper
# Dataclass for config
@dataclass
class Config:
lr: float = 0.01
epochs: int = 100
reg: float = 0.001
# ABC for model interface
class BaseModel(ABC):
def __init__(self, config: Config):
self.config = config
self.weights = None
@abstractmethod
def fit(self, X, y): ...
@abstractmethod
def predict(self, X): ...
def __repr__(self):
return f"{self.__class__.__name__}(lr={self.config.lr})"
# Concrete model
class LinearRegression(BaseModel):
@log_time
def fit(self, X, y):
X_b = np.c_[np.ones(X.shape[0]), X]
self.weights = np.zeros(X_b.shape[1])
for _ in range(self.config.epochs):
preds = X_b @ self.weights
errors = preds - y
grad = (2 / len(y)) * X_b.T @ errors
self.weights -= self.config.lr * grad
return self
def predict(self, X):
X_b = np.c_[np.ones(X.shape[0]), X]
return X_b @ self.weights
# Generator for mini-batches
def batch_gen(X, y, size=32):
idx = np.random.permutation(len(X))
for i in range(0, len(X), size):
yield X[idx[i:i+size]], y[idx[i:i+size]]
# Run it
if __name__ == "__main__":
np.random.seed(42)
X = np.random.randn(100, 3)
true_w = np.array([2.0, -1.0, 0.5])
y = X @ true_w + np.random.randn(100) * 0.1
model = LinearRegression(Config(lr=0.01, epochs=1000))
model.fit(X, y)
print(f"Learned: {model.weights[1:]}")
print(f"True: {true_w}")
How every Python concept you learned maps directly to real machine learning workflows.
| Concept | Used in ML for |
|---|---|
__init__ | Model hyperparameters, weights |
| Inheritance / ABCs | Base model interfaces |
| Decorators | Timing, caching, validation |
| Generators | Data loading, mini-batches |
| Dataclasses | Config objects, experiment tracking |
| NumPy | ALL numerical computation |
| Pandas | Data loading, cleaning, features |
| Context managers | GPU memory, file handling |
| Closures | Loss functions, LR schedules |
| Comprehensions | Feature extraction, transforms |
The simplest layout — just a script, dependencies, and a gitignore.
The proper layout for reusable Python packages with src directory, tests, and config files.
Organized layout for ML work — separating data, notebooks, source code, and trained models.
Production-ready API structure with routes, models, services, and database layers separated cleanly.
Isolated Python environments per project — keeps dependencies separate so projects don't conflict.
# Create a virtual environment
python -m venv .venv
# Activate it
source .venv/bin/activate # macOS/Linux
.venv\Scripts\activate # Windows
# Install packages
pip install numpy pandas scikit-learn
pip freeze > requirements.txt # save dependencies
# Recreate environment elsewhere
pip install -r requirements.txt
# Deactivate
deactivate
The core extensions every Python developer needs — install these first.
Extensions that significantly boost productivity — you'll want these soon after starting.
Optional extras that add polish — install these when you want to fine-tune your workflow.
Copy-paste these settings to get the best Python development experience out of the box.
// Add to your VS Code settings.json (Cmd+Shift+P → "Open User Settings JSON")
{
// ── Python core ──
"python.analysis.typeCheckingMode": "basic", // catch type errors
"python.analysis.autoImportCompletions": true, // auto-import on autocomplete
"python.analysis.inlayHints.functionReturnTypes": true,
// ── Formatting (Ruff) ──
"[python]": {
"editor.defaultFormatter": "charliermarsh.ruff",
"editor.formatOnSave": true,
"editor.codeActionsOnSave": {
"source.fixAll": "explicit",
"source.organizeImports": "explicit"
}
},
// ── Editor comfort ──
"editor.rulers": [88], // line length guide
"editor.bracketPairColorization.enabled": true,
"editor.guides.bracketPairs": "active",
"editor.stickyScroll.enabled": true, // sticky class/function headers
"files.autoSave": "afterDelay",
// ── Testing ──
"python.testing.pytestEnabled": true,
"python.testing.pytestArgs": ["tests"]
}
The shortcuts that will save you hours — learn these and you'll fly through code.
| Shortcut | Action |
|---|---|
F5 | Start debugging |
F9 | Toggle breakpoint |
F12 | Go to definition |
Shift+F12 | Find all references |
Cmd+Shift+P | Command palette |
Cmd+P | Quick open file |
Cmd+D | Select next occurrence |
Cmd+Shift+L | Select all occurrences |
Alt+Up/Down | Move line up/down |
Shift+Alt+Up/Down | Duplicate line |
Cmd+/ | Toggle comment |
Cmd+Shift+K | Delete line |
Ctrl+` | Toggle terminal |
Cmd+B | Toggle sidebar |
Cmd+Shift+E | Explorer panel |
Cmd+Shift+F | Search across files |
Every method you'll reach for daily. Grouped by type so you can scan fast.
s = " Hello, World! "
# ── Cleaning ──
s.strip() # "Hello, World!" remove whitespace both ends
s.lstrip() # "Hello, World! " left only
s.rstrip() # " Hello, World!" right only
s.strip("! ") # "Hello, World" strip specific chars
# ── Case ──
"hello".upper() # "HELLO"
"HELLO".lower() # "hello"
"hello world".title() # "Hello World"
"hello world".capitalize() # "Hello world"
"Hello".swapcase() # "hELLO"
# ── Search ──
"hello".find("ll") # 2 index of first match, -1 if not found
"hello".index("ll") # 2 same but raises ValueError if not found
"hello".rfind("l") # 3 search from the right
"hello".count("l") # 2 count occurrences
"hello".startswith("he") # True
"hello".endswith("lo") # True
# ── Replace & Transform ──
"hello".replace("l", "L") # "heLLo" replace ALL occurrences
"hello".replace("l", "L", 1) # "heLlo" replace first N only
"a-b-c".split("-") # ['a','b','c']
"a b c".split() # ['a','b','c'] split on any whitespace
"line1\nline2".splitlines() # ['line1', 'line2']
"-".join(["a", "b", "c"]) # "a-b-c"
"hello".center(20, "*") # "*******hello********"
"42".zfill(5) # "00042" pad with zeros
# ── Checks ──
"42".isdigit() # True all digits?
"abc".isalpha() # True all letters?
"abc123".isalnum() # True letters or digits?
" ".isspace() # True all whitespace?
"Hello".isupper() # False
"hello".islower() # True
# ── Encoding ──
"hello".encode("utf-8") # b'hello' str → bytes
b"hello".decode("utf-8") # "hello" bytes → str
lst = [3, 1, 4, 1, 5, 9]
# ── Add ──
lst.append(2) # [3,1,4,1,5,9,2] add to end
lst.insert(0, 99) # [99,3,1,4,1,5,9,2] add at index
lst.extend([6, 7]) # [..., 6, 7] add multiple
# ── Remove ──
lst.remove(1) # remove FIRST occurrence of value 1
lst.pop() # remove & return LAST element
lst.pop(0) # remove & return at index 0
lst.clear() # remove everything
# ── Sort & Order ──
lst.sort() # in-place, ascending (returns None!)
lst.sort(reverse=True) # in-place, descending
lst.sort(key=len) # sort by custom key
lst.reverse() # reverse in-place
# ── Search ──
lst.index(4) # index of first occurrence (ValueError if missing)
lst.count(1) # count occurrences
# ── Copy ──
lst.copy() # shallow copy (same as lst[:])
# ── Built-in functions that work with lists ──
sorted(lst) # NEW sorted list (original unchanged)
reversed(lst) # iterator in reverse
len(lst) # length
sum(lst) # sum all elements
min(lst), max(lst) # min and max
any([False,True]) # True — any element truthy?
all([True,True]) # True — all elements truthy?
enumerate(lst) # pairs of (index, value)
zip(lst, other) # pairs from two lists
d = {"name": "Yatin", "age": 25, "role": "dev"}
# ── Access ──
d["name"] # "Yatin" KeyError if missing
d.get("name") # "Yatin" None if missing
d.get("email", "N/A") # "N/A" custom default
# ── Add / Update ──
d["email"] = "y@dev.com" # add or overwrite
d.update({"age": 26, "city": "NYC"}) # merge in another dict
d.setdefault("lang", "Python") # set only if key doesn't exist, return value
# ── Remove ──
d.pop("age") # remove & return value (KeyError if missing)
d.pop("age", None) # safe pop — returns None if missing
d.popitem() # remove & return last (key, value) pair
del d["role"] # delete key
d.clear() # remove everything
# ── Views ──
d.keys() # dict_keys(['name', 'age', 'role'])
d.values() # dict_values(['Yatin', 25, 'dev'])
d.items() # dict_items([('name','Yatin'), ...])
# ── Copy & Merge ──
d.copy() # shallow copy
{**d, "new": True} # merge via unpacking
d | {"new": True} # merge operator (3.9+)
# ── Useful patterns ──
if "name" in d: # check key exists (O(1))
print(d["name"])
# dict comprehension from two lists
keys = ["a", "b", "c"]
vals = [1, 2, 3]
dict(zip(keys, vals)) # {'a': 1, 'b': 2, 'c': 3}
a = {1, 2, 3, 4}
b = {3, 4, 5, 6}
# ── Add / Remove ──
a.add(5) # add one element
a.update([6, 7]) # add multiple
a.remove(5) # remove (KeyError if missing)
a.discard(99) # remove (no error if missing)
a.pop() # remove & return arbitrary element
# ── Set Math ──
a | b # union {1,2,3,4,5,6}
a.union(b) # same thing
a & b # intersection {3, 4}
a.intersection(b) # same thing
a - b # difference {1, 2}
a.difference(b) # same thing
a ^ b # symmetric diff {1,2,5,6}
a.symmetric_difference(b) # same thing
# ── Checks ──
a.issubset(b) # is a ⊆ b?
a.issuperset(b) # is a ⊇ b?
a.isdisjoint(b) # no common elements?
3 in a # membership test — O(1)!
# ── Type & Conversion ──
int("42") # 42
float("3.14") # 3.14
str(42) # "42"
bool(0) # False
list("abc") # ['a', 'b', 'c']
tuple([1,2]) # (1, 2)
set([1,1,2]) # {1, 2}
dict(a=1, b=2) # {'a': 1, 'b': 2}
# ── Math ──
abs(-5) # 5
round(3.14159, 2) # 3.14
pow(2, 10) # 1024
divmod(17, 5) # (3, 2) quotient and remainder
min(3, 1, 4) # 1
max(3, 1, 4) # 4
sum([1, 2, 3]) # 6
# ── Iteration ──
range(5) # 0,1,2,3,4
range(2, 10, 3) # 2,5,8
enumerate(lst) # (0,a), (1,b), ...
zip(a, b) # (a1,b1), (a2,b2), ...
map(func, lst) # apply func to each element
filter(func, lst) # keep elements where func returns True
sorted(lst) # new sorted list
reversed(lst) # reverse iterator
next(iterator) # get next value from iterator
iter(lst) # get iterator from iterable
# ── Introspection ──
type(obj) # what type is it?
isinstance(obj, int) # is it an int (or subclass)?
id(obj) # memory address
dir(obj) # list all attributes/methods
help(obj) # interactive help
hasattr(obj, "x") # does obj.x exist?
getattr(obj, "x") # get obj.x (can set default)
setattr(obj, "x", 5) # set obj.x = 5
callable(obj) # can you call obj()?
vars(obj) # obj.__dict__
# ── I/O ──
print("hello", end="") # no newline at end
print("a", "b", sep="-") # custom separator → "a-b"
input("Enter: ") # read user input as string
open("f.txt") # open file
# ── Logic ──
any([False,False,True]) # True at least one truthy?
all([True,True,False]) # False all truthy?
from pathlib import Path # modern way (preferred)
import os
# ── pathlib (use this!) ──
p = Path("data/raw/file.csv")
p.exists() # True/False
p.is_file() # is it a file?
p.is_dir() # is it a directory?
p.name # "file.csv"
p.stem # "file"
p.suffix # ".csv"
p.parent # Path("data/raw")
p.resolve() # absolute path
p.read_text() # read entire file as string
p.write_text("data") # write string to file
# Directory operations
Path("output").mkdir(parents=True, exist_ok=True)
list(Path(".").glob("*.py")) # all .py files
list(Path(".").rglob("*.py")) # recursive
# Path joining (/ operator!)
config_path = Path("project") / "config" / "settings.json"
# ── os module (older but still useful) ──
os.getcwd() # current working directory
os.listdir(".") # list directory contents
os.path.join("a", "b") # "a/b" (use Path / instead)
os.path.exists("f.txt") # True/False
os.environ["HOME"] # environment variable
os.environ.get("API_KEY", "default") # safe access
from itertools import (
chain, islice, cycle, repeat, count,
product, permutations, combinations,
groupby, accumulate, starmap, zip_longest
)
# ── Combining ──
list(chain([1,2], [3,4])) # [1,2,3,4] flatten iterables
list(chain.from_iterable([[1,2],[3]])) # [1,2,3] flatten nested
list(zip_longest([1,2], ["a"], fillvalue="-"))
# [(1,'a'), (2,'-')] — zip but pads shorter
# ── Slicing ──
list(islice(count(), 5)) # [0,1,2,3,4] take first 5 from infinite
list(islice(count(), 2, 6)) # [2,3,4,5] skip 2, take until 6
# ── Infinite ──
count(10) # 10, 11, 12, ... forever
cycle(["a", "b"]) # a, b, a, b, ... forever
repeat("hi", 3) # "hi", "hi", "hi"
# ── Combinatorics ──
list(product([1,2], ["a","b"])) # [(1,'a'),(1,'b'),(2,'a'),(2,'b')]
list(permutations([1,2,3], 2)) # [(1,2),(1,3),(2,1),(2,3),(3,1),(3,2)]
list(combinations([1,2,3], 2)) # [(1,2),(1,3),(2,3)]
# ── Grouping ──
data = [("a",1), ("a",2), ("b",3)]
for key, group in groupby(data, key=lambda x: x[0]):
print(key, list(group))
# a [('a',1), ('a',2)]
# b [('b',3)]
# NOTE: data must be sorted by grouping key first!
# ── Accumulate ──
list(accumulate([1,2,3,4])) # [1,3,6,10] running sum
list(accumulate([1,2,3], lambda a,b: a*b)) # [1,2,6] running product
from collections import (
Counter, defaultdict, OrderedDict,
deque, namedtuple, ChainMap
)
# ── Counter ──
c = Counter("abracadabra")
c.most_common(3) # [('a',5), ('b',2), ('r',2)]
c["a"] # 5
c["z"] # 0 (never KeyError!)
c.update("aaa") # add more counts
Counter("aab") + Counter("bcc") # Counter({'b':2,'a':2,'c':2})
# ── defaultdict ──
dd = defaultdict(list) # missing keys auto-create empty list
dd["fruits"].append("apple") # no KeyError!
dd = defaultdict(int) # missing keys = 0
dd["count"] += 1
dd = defaultdict(set) # missing keys = empty set
dd["tags"].add("python")
# ── deque (double-ended queue) ──
dq = deque([1, 2, 3])
dq.appendleft(0) # deque([0,1,2,3]) O(1) left append!
dq.popleft() # 0 O(1) left pop!
dq.rotate(1) # deque([3,1,2]) rotate right
dq.rotate(-1) # deque([1,2,3]) rotate left
# deque is O(1) for both ends, list is O(n) for left operations
# ── namedtuple ──
Point = namedtuple("Point", ["x", "y"])
p = Point(3, 4)
p.x, p.y # 3, 4 (access by name)
p[0], p[1] # 3, 4 (still works by index)
# ── ChainMap — search multiple dicts ──
defaults = {"color": "blue", "size": 10}
user_prefs = {"color": "red"}
config = ChainMap(user_prefs, defaults)
config["color"] # "red" (user overrides default)
config["size"] # 10 (falls through to default)
from functools import (
lru_cache, cache, partial, reduce, wraps, total_ordering
)
# ── lru_cache — memoization ──
@lru_cache(maxsize=128)
def fib(n):
if n < 2: return n
return fib(n-1) + fib(n-2)
fib(100) # instant! cached results
fib.cache_info() # hits, misses, size
fib.cache_clear() # reset cache
# @cache — same but unlimited (3.9+)
# ── partial — freeze some arguments ──
def power(base, exp): return base ** exp
square = partial(power, exp=2)
square(5) # 25
# ── reduce — accumulate ──
reduce(lambda a, b: a + b, [1,2,3,4]) # 10
reduce(lambda a, b: a * b, [1,2,3,4]) # 24
# ── wraps — preserve function metadata in decorators ──
def my_decorator(func):
@wraps(func) # without this, wrapper.__name__ = "wrapper"
def wrapper(*args, **kwargs):
return func(*args, **kwargs)
return wrapper
# ── total_ordering — auto-generate comparison methods ──
@total_ordering
class Score:
def __init__(self, val): self.val = val
def __eq__(self, other): return self.val == other.val
def __lt__(self, other): return self.val < other.val
# Now <=, >, >= all work automatically!
from datetime import datetime, date, timedelta
# ── Now ──
now = datetime.now() # 2026-03-18 14:30:45.123456
today = date.today() # 2026-03-18
# ── Create ──
dt = datetime(2026, 3, 18, 14, 30)
d = date(2026, 3, 18)
# ── Format & Parse ──
now.strftime("%Y-%m-%d %H:%M") # "2026-03-18 14:30"
now.strftime("%B %d, %Y") # "March 18, 2026"
datetime.strptime("2026-03-18", "%Y-%m-%d") # parse string → datetime
# ── Arithmetic ──
tomorrow = today + timedelta(days=1)
last_week = today - timedelta(weeks=1)
diff = datetime(2026,12,31) - datetime.now()
print(diff.days) # days until new year
# ── Attributes ──
now.year, now.month, now.day
now.hour, now.minute, now.second
now.weekday() # 0=Monday, 6=Sunday
now.isoformat() # "2026-03-18T14:30:45.123456"
import re
text = "Call me at 123-456-7890 or 987-654-3210"
# ── Search (first match) ──
m = re.search(r"\d{3}-\d{3}-\d{4}", text)
if m:
print(m.group()) # "123-456-7890"
print(m.start(), m.end()) # 11 23
# ── Find all ──
phones = re.findall(r"\d{3}-\d{3}-\d{4}", text)
# ['123-456-7890', '987-654-3210']
# ── Replace ──
cleaned = re.sub(r"\d", "X", text)
# "Call me at XXX-XXX-XXXX or XXX-XXX-XXXX"
# ── Split ──
re.split(r"[,;\s]+", "a, b; c d")
# ['a', 'b', 'c', 'd']
# ── Groups ──
m = re.search(r"(\d{3})-(\d{3})-(\d{4})", text)
m.group(1) # "123" (area code)
m.groups() # ('123', '456', '7890')
# ── Named groups ──
m = re.search(r"(?P<area>\d{3})-(?P<rest>\d{3}-\d{4})", text)
m.group("area") # "123"
# ── Compile for reuse ──
phone_pattern = re.compile(r"\d{3}-\d{3}-\d{4}")
phone_pattern.findall(text) # faster when used multiple times
# ── Common patterns ──
# r"\d+" digits
# r"\w+" word chars (letters, digits, _)
# r"\s+" whitespace
# r"[a-zA-Z]+" letters only
# r"^...$" start and end of string
# r"\.py$" ends with .py
# r"(?i)hello" case insensitive
import json
# ── Python → JSON string ──
data = {"name": "Yatin", "scores": [90, 85], "active": True}
json_str = json.dumps(data) # compact string
json_str = json.dumps(data, indent=2) # pretty print
json_str = json.dumps(data, sort_keys=True) # sorted keys
# ── JSON string → Python ──
parsed = json.loads(json_str) # dict
# ── File I/O ──
with open("data.json", "w") as f:
json.dump(data, f, indent=2) # write to file
with open("data.json") as f:
loaded = json.load(f) # read from file
# dump/load = files, dumps/loads = strings (s = string)
List is mutable — you can add, remove, change. Tuple is immutable — once created, can't change.
Tuples can be dict keys and in sets (lists can't). Tuples are slightly faster and use less memory.
lst = [1, 2, 3]
lst[0] = 99 # OK
tup = (1, 2, 3)
tup[0] = 99 # TypeError!
== checks value equality. is checks if they're the same object in memory (same id()).
a = [1, 2]
b = [1, 2]
print(a == b) # True — same value
print(a is b) # False — different objects
c = a
print(a is c) # True — same object
# Always use `is` for None:
if x is None: # correct
*args collects extra positional arguments into a tuple. **kwargs collects extra keyword arguments into a dict.
def f(*args, **kwargs):
print(args) # (1, 2, 3)
print(kwargs) # {'x': 10}
f(1, 2, 3, x=10)
Common use: wrapper functions that forward all arguments to another function.
When Python runs a file directly, __name__ is "__main__". When imported, it's the module name. This guard runs code only when executed directly.
def helper(): ...
if __name__ == "__main__":
# Only with `python utils.py`, NOT on import
helper()
Default arguments are evaluated once at function definition, not each call. Mutable defaults are shared across calls.
# BUG:
def f(items=[]):
items.append(1)
return items
f() # [1]
f() # [1, 1] — same list!
# FIX:
def f(items=None):
if items is None: items = []
items.append(1)
return items
__init__ is the initializer. Called automatically after object creation to set up attributes. Without it you'd manually set every attribute — fragile and error-prone.
class User:
def __init__(self, name, email):
self.name = name
self.email = email
self.is_active = True
__init__ is NOT the constructor — __new__ allocates memory. __init__ just fills in data.
A reference to the current instance. obj.method() becomes Class.method(obj) — self is that obj. It's how each instance knows which data belongs to it.
It's a convention — you could name it anything, but never do.
Regular — gets self (instance). @classmethod — gets cls (class), used for factories. @staticmethod — gets nothing, just a namespaced function.
class Date:
def display(self): # regular
return f"{self.y}-{self.m}"
@classmethod
def from_string(cls, s): # factory
return cls(*s.split("-"))
@staticmethod
def is_valid(s): # utility
return len(s.split("-")) == 3
MRO (Method Resolution Order) — the order Python searches for methods. Uses C3 linearization.
class A: pass
class B(A): pass
class C(A): pass
class D(B, C): pass
print(D.__mro__) # D → B → C → A → object
A class always appears before its parents. Multiple parents keep left-to-right order. super() follows MRO, not just the immediate parent.
A function that wraps another function. @dec is sugar for func = dec(func).
def loud(func):
def wrapper(*args, **kwargs):
print("CALLING!")
result = func(*args, **kwargs)
print("DONE!")
return result
return wrapper
@loud
def greet(name):
print(f"Hi {name}")
Always use @functools.wraps(func) to preserve the original name and docstring.
Generators produce values lazily with yield. Lists store everything in memory. Generators are memory efficient but single-use.
# List: all in memory
[x**2 for x in range(10_000_000)]
# Generator: ~0 bytes
(x**2 for x in range(10_000_000))
def squares(n):
for i in range(n):
yield i ** 2
The Global Interpreter Lock — a mutex allowing only one thread to run Python bytecode at a time. Exists because CPython's reference counting isn't thread-safe.
Workarounds:
threading or asynciomultiprocessing (separate processes)A function that remembers variables from its enclosing scope even after that scope finishes.
def make_counter():
count = 0
def counter():
nonlocal count
count += 1
return count
return counter
c = make_counter()
c() # 1
c() # 2 — remembers count!
Stored in c.__closure__. Decorators are a common use of closures.
1. Reference counting: Each object tracks how many names point to it. Drops to 0 → freed immediately.
2. Generational GC: Handles circular references. 3 generations — new objects checked more often.
import sys
a = [1, 2]
print(sys.getrefcount(a)) # 2
b = a # refcount = 3
del b # refcount = 2
del a # refcount = 0 → freed
Even int(0) takes 28 bytes. Use __slots__ to save memory on many instances.
A class whose instances are classes. type is the default metaclass. class Foo: calls type("Foo", bases, namespace).
print(type(42)) # <class 'int'>
print(type(int)) # <class 'type'>
print(type(type)) # <class 'type'>
Rarely needed. __init_subclass__ and class decorators solve most cases more simply.
List for ordered, sequential access. Dict for O(1) key lookup.
CounterDict is O(1), list in is O(n). For 1M elements, dict is ~100,000x faster for lookups.
It's a hash table. hash(key) → bucket index → store key-value pair. Collisions use open addressing.
Keys must be hashable (immutable). Lists can't be keys. Average O(1) for get/set/delete.
def reverse(head):
prev = None
current = head
while current:
next_node = current.next
current.next = prev
prev = current
current = next_node
return prev
Three pointers. At each step, flip current → next to current → prev. O(n) time, O(1) space.
Same result. Comprehensions are more Pythonic.
[x**2 for x in range(5)] # preferred over map
[x for x in range(10) if x > 3] # preferred over filter
Use map when you already have a named function: list(map(str, nums)).
Wraps code with setup/teardown via with. Implements __enter__/__exit__. Used for files, DB connections, locks, timing.
with open("f.txt") as f:
data = f.read()
# auto-closed, even on exception
from contextlib import contextmanager
@contextmanager
def timer():
start = time.time()
yield
print(f"Took {time.time()-start:.2f}s")
Shallow — new outer object, shared inner objects. Deep — fully independent at every level.
import copy
original = [[1, 2], [3, 4]]
shallow = original.copy()
deep = copy.deepcopy(original)
original[0][0] = 999
shallow[0][0] # 999 — shared!
deep[0][0] # 1 — independent
Implement __iter__ as a generator. Easiest way.
class Range:
def __init__(self, start, end):
self.start = start
self.end = end
def __iter__(self):
current = self.start
while current < self.end:
yield current
current += 1
for i in Range(1, 5):
print(i) # 1, 2, 3, 4
@property lets you define computed attributes with getter/setter logic while keeping the obj.attr syntax. Useful for validation, caching, and derived values.
class Circle:
def __init__(self, radius):
self._radius = radius
@property
def radius(self):
return self._radius
@radius.setter
def radius(self, val):
if val < 0: raise ValueError("Negative")
self._radius = val
@property
def area(self): # computed on access
return 3.14 * self._radius ** 2
"If it walks like a duck and quacks like a duck, it's a duck." Python doesn't care about an object's type — only that it has the right methods. You don't need to inherit from an interface; just implement the expected methods.
class Cat:
def speak(self): return "Meow"
class Dog:
def speak(self): return "Woof"
# No shared base class needed:
for a in [Cat(), Dog()]:
print(a.speak()) # just works
Threading — I/O-bound tasks (API calls, file reads). GIL limits CPU parallelism but releases during I/O.
Multiprocessing — CPU-bound tasks (number crunching). Separate processes bypass the GIL entirely.
asyncio — Many concurrent I/O operations (web servers, scrapers). Single-threaded, event loop, very low overhead.
Rule of thumb: waiting on network? → asyncio. Crunching numbers? → multiprocessing. Simple I/O parallelism? → threading.
async def creates a coroutine. await pauses it until the awaited thing completes, letting other coroutines run. It's cooperative multitasking on a single thread.
import asyncio
async def fetch(url):
await asyncio.sleep(1) # yields control
return f"data from {url}"
async def main():
# Runs 3 fetches concurrently, not sequentially
results = await asyncio.gather(
fetch("a"), fetch("b"), fetch("c")
) # ~1s total, not ~3s
1. Profile first — cProfile, line_profiler, or timeit. Never guess.
2. Algorithm — O(n) vs O(n^2) matters more than any micro-optimization.
3. Data structures — dict/set for O(1) lookups instead of list scans.
4. Built-ins — sum(), map(), comprehensions are C-speed.
5. Caching — @lru_cache for expensive repeated computations.
6. Vectorize — NumPy instead of Python loops for numerical work.
7. Last resort — C extensions, Cython, or multiprocessing.
When two threads access shared state simultaneously and the result depends on timing. Even with the GIL, race conditions happen between bytecode instructions.
import threading
counter = 0
lock = threading.Lock()
def increment():
global counter
with lock: # only one thread at a time
counter += 1 # now safe
Prevention: Lock, RLock, Queue, or avoid shared state entirely (use multiprocessing).
Reference counting (primary): Each object tracks references. Drops to 0 → freed instantly.
Generational GC (cycle detector): Catches circular refs. 3 generations — gen0 (new, checked often), gen1, gen2 (old, checked rarely).
gc.collect() forces a collection. gc.disable() turns off the cycle detector (ref counting still works). weakref creates references that don't increase refcount.
S — Single Responsibility. One class = one job.
O — Open/Closed. Open for extension, closed for modification. Use inheritance or composition.
L — Liskov Substitution. Subclasses should work anywhere the parent does.
I — Interface Segregation. Many small protocols, not one giant ABC.
D — Dependency Inversion. Depend on abstractions (Protocol/ABC), not concrete classes.
Python favors duck typing and Protocols over heavy interface hierarchies. SOLID applies but with a lighter touch.
Almost always. Inheritance = "is-a" (Dog is an Animal). Composition = "has-a" (Car has an Engine). Composition is more flexible because you can swap components at runtime.
# Inheritance (rigid)
class ElectricCar(Car): ...
# Composition (flexible)
class Car:
def __init__(self, engine):
self.engine = engine # swap gas/electric/hybrid
Use inheritance for true "is-a" relationships and when you need polymorphism. Use composition for everything else.
Separate concerns into packages: models/, services/, api/, utils/. Keep __init__.py files to control public APIs. Use pyproject.toml for project config.
Key principles: avoid circular imports (use dependency injection), keep modules focused, use absolute imports, write tests alongside code, use src/ layout for installable packages.
ABC — nominal typing. You must class Foo(MyABC): explicitly. Enforced at instantiation.
Protocol — structural typing. Just implement the right methods. No inheritance needed. Checked by type checkers (mypy), not at runtime (unless @runtime_checkable).
from typing import Protocol
class Drawable(Protocol):
def draw(self) -> str: ...
class Circle: # no inheritance!
def draw(self) -> str: return "O"
# Circle satisfies Drawable structurally
Prefer Protocols for Python — they match duck typing philosophy.
An object with __get__, __set__, or __delete__. It's the mechanism behind @property, @classmethod, @staticmethod, and even plain methods.
When you access obj.x and x is a descriptor on the class, Python calls x.__get__(obj, type(obj)) instead of returning x directly.
class Validated:
def __set_name__(self, owner, name):
self.name = name
def __set__(self, obj, val):
if val < 0: raise ValueError
obj.__dict__[self.name] = val
def __get__(self, obj, cls):
return obj.__dict__.get(self.name)
1. Read the traceback — bottom is where the error happened, top is what called it.
2. print() / f-strings for quick checks.
3. breakpoint() (Python 3.7+) drops into pdb debugger.
4. VS Code debugger — set breakpoints, inspect variables, step through code.
5. logging module for production — levels (DEBUG, INFO, WARNING, ERROR).
6. pdb.post_mortem() to debug after an exception.
# Quick debug:
breakpoint() # drops into pdb
# In pdb:
# n (next), s (step into), c (continue)
# p variable (print), l (list code), q (quit)
Module — a single .py file.
Package — a directory with __init__.py containing modules.
Library — a collection of packages distributed together (e.g., requests, numpy). Installed via pip.
Framework — a library that controls the flow (Django, Flask). You write code that the framework calls.
Circular imports happen when A imports B and B imports A. Fixes:
1. Move the import inside the function that needs it (lazy import).
2. Restructure — extract shared code into a third module.
3. Use TYPE_CHECKING for type hints only:
from __future__ import annotations
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from .models import User # only for type checkers
Dynamically modifying a class or module at runtime. Possible because Python is dynamic, but use sparingly — makes code unpredictable.
import math
math.pi = 3 # monkey patched! (don't do this)
# Legitimate use: mocking in tests
from unittest.mock import patch
@patch("module.expensive_api_call")
def test_something(mock_call):
mock_call.return_value = "fake"
Assignment (b = a) — both names point to the same object. No copy at all.
Shallow copy (a.copy()) — new outer object, but inner objects are shared.
Deep copy (copy.deepcopy(a)) — everything is fully independent.
a = [[1]]
b = a # same object
c = a.copy() # new list, shared inner
d = copy.deepcopy(a) # fully new
a[0].append(2)
# b[0] = [1,2] — same object
# c[0] = [1,2] — shared inner list
# d[0] = [1] — independent
@dataclass auto-generates __init__, __repr__, __eq__. Use when you have a class that's mostly data. Cleaner than writing boilerplate.
from dataclasses import dataclass
@dataclass
class Point:
x: float
y: float
# Auto-generates __init__, __repr__, __eq__
# frozen=True for immutable, slots=True for memory
Use dataclass for data containers. Use regular class when you need heavy custom behavior. Use NamedTuple for immutable records.
1. Check sys.modules cache — if already imported, return cached version.
2. Search sys.path — list of directories to look in.
3. Found → execute the module file, store in sys.modules.
4. .pyc files (bytecode cache) in __pycache__/ speed up subsequent imports.
This is why the first import is slow and repeated imports are instant.
By default, each instance has a __dict__ (~100 bytes). __slots__ replaces it with a fixed struct, saving ~60% memory per instance.
class Point:
__slots__ = ("x", "y")
def __init__(self, x, y):
self.x = x
self.y = y
# Can't add arbitrary attributes:
# p.z = 3 → AttributeError
Use when creating millions of instances (particles, data points, graph nodes). Don't use everywhere — it limits flexibility.
__repr__ — always. Unambiguous string for debugging. Should ideally be valid Python.
__str__ — when you need a user-friendly display different from repr.
__eq__ — when equality should be value-based, not identity-based.
__hash__ — if you implement __eq__ and want instances as dict keys/set members.
__len__ — if your object has a meaningful size.
__iter__ — if your object is a collection.
__enter__/__exit__ — if your object manages a resource.
1. OS starts the CPython interpreter process.
2. Lexer tokenizes source code into tokens.
3. Parser builds an AST (Abstract Syntax Tree).
4. Compiler converts AST → bytecode (.pyc).
5. PVM (Python Virtual Machine) executes bytecode instruction by instruction.
You can inspect bytecode with dis.dis(func) and the AST with ast.parse(code).
Iterable — has __iter__ that returns an iterator. Examples: list, str, dict, file.
Iterator — has __next__ that returns the next value (and raises StopIteration when done). Also has __iter__ returning itself.
You can iterate an iterable multiple times (fresh iterator each time). An iterator is consumed — single use.
lst = [1, 2, 3] # iterable
it = iter(lst) # iterator
next(it) # 1
next(it) # 2
:= assigns AND returns a value in one expression. Avoids computing something twice or needing a separate line.
# Without:
line = f.readline()
while line:
process(line)
line = f.readline()
# With walrus:
while (line := f.readline()):
process(line)
# In comprehensions:
results = [y for x in data if (y := expensive(x)) > 0]
Python equivalent of JS infinite currying — use a class with __call__ and __repr__/__str__/__int__.
class add:
def __init__(self, val):
self.val = val
def __call__(self, n):
self.val += n
return self # return self for chaining
def __repr__(self):
return str(self.val)
def __int__(self):
return self.val
print(add(1)(2)(3)) # 6
print(add(5)(10)(15)(20)) # 50
print(int(add(1)(2)) + 3) # 6
Alternate — pure function approach with closures:
def add(x):
def inner(y):
return add(x + y)
inner.val = x
inner.__repr__ = lambda: str(x)
inner.__str__ = lambda: str(x)
return inner
print(add(1)(2)(3)) # 6
Key concept: __call__ makes instances callable like functions. Returning self enables chaining.
funcs = []
for i in range(5):
funcs.append(lambda: i)
print([f() for f in funcs])
# Expected: [0, 1, 2, 3, 4]
# Actual: [4, 4, 4, 4, 4] 😱
Why? Closures capture the variable, not its value. By the time you call them, the loop is done and i = 4.
Fix 1 — default argument (captures value at creation):
funcs = [lambda i=i: i for i in range(5)]
print([f() for f in funcs]) # [0, 1, 2, 3, 4]
Fix 2 — functools.partial:
from functools import partial
funcs = [partial(lambda i: i, i) for i in range(5)]
Same trap exists in JS — exact same concept, exact same fix pattern.
a = 256
b = 256
print(a is b) # True
a = 257
b = 257
print(a is b) # False (in REPL) ⚠️
Why? CPython pre-caches integers from -5 to 256. Same value in that range → same object. Outside → new objects each time.
# These are all the SAME object:
print(id(100) == id(100)) # True
print(id(300) == id(300)) # False (usually)
# String interning too:
a = "hello"
b = "hello"
print(a is b) # True — Python interns short strings
a = "hello world!"
b = "hello world!"
print(a is b) # False — not interned
Lesson: Never use is to compare values. Always use ==. is is only for None, True, False.
class Student:
grades = [] # class variable — shared!
def __init__(self, name):
self.name = name
s1 = Student("Alice")
s2 = Student("Bob")
s1.grades.append(90)
print(s2.grades) # [90] 😱 — Bob has Alice's grade!
print(Student.grades) # [90] — it's shared
Why? grades = [] at class level creates ONE list shared by ALL instances. Mutating it via any instance changes it everywhere.
Fix — initialize in __init__:
class Student:
def __init__(self, name):
self.name = name
self.grades = [] # each instance gets its own list
Gotcha within the gotcha: Reassignment (s1.grades = [90]) creates a new instance variable and doesn't affect others. Only mutation (.append) is shared.
Compose multiple functions left-to-right — a common functional programming pattern.
from functools import reduce
def pipe(*fns):
def inner(x):
return reduce(lambda acc, f: f(acc), fns, x)
return inner
# Usage:
add1 = lambda x: x + 1
double = lambda x: x * 2
square = lambda x: x ** 2
transform = pipe(add1, double, square)
print(transform(5)) # (5+1)*2 = 12, 12² = 144
Reverse (compose — right-to-left):
def compose(*fns):
return pipe(*reversed(fns))
# Trap 1: Single-element tuple
a = (1)
b = (1,)
print(type(a)) # <class 'int'> — just parentheses!
print(type(b)) # <class 'tuple'> — the comma makes it
# Trap 2: Chained comparisons
print(1 < 2 < 3) # True — means (1<2) and (2<3)
print(1 < 2 > 0) # True — means (1<2) and (2>0)
print(True == 1 == 1.0) # True!
print(False == 0 == 0.0) # True!
# Trap 3: This is NOT a tuple comparison
print( (0, 1) == 0, 1 ) # False 1 — it's print((0,1)==0, 1)
Why? Python chains comparisons implicitly. a < b < c is a < b and b < c. And True/False are subclasses of int (True == 1, False == 0).
for i in range(5):
if i == 3:
break
else:
print("Completed!")
# Prints nothing! `else` runs only if loop didn't `break`
for i in range(5):
if i == 99:
break
else:
print("Completed!") # Prints! No break happened
Real use case — search pattern:
for item in items:
if item.matches(query):
result = item
break
else:
raise ValueError("Not found") # only if no break
Think of else as "no break". Works on while too.
def memoize(func):
cache = {}
def wrapper(*args):
if args not in cache:
cache[args] = func(*args)
return cache[args]
wrapper.cache = cache # expose for inspection
return wrapper
@memoize
def fib(n):
if n < 2: return n
return fib(n - 1) + fib(n - 2)
print(fib(100)) # instant — without memoize, heat death of universe
print(fib.cache) # see all cached results
Production version: just use @functools.lru_cache(maxsize=128) — it handles kwargs, has max size, and is C-optimized.
# This works:
d = {(1, 2): "tuple key"}
# This crashes:
d = {[1, 2]: "list key"} # TypeError: unhashable type: 'list'
# This is the EVIL gotcha:
class BadKey:
def __init__(self, val):
self.val = val
def __hash__(self):
return hash(self.val)
def __eq__(self, other):
return self.val == other.val
key = BadKey(1)
d = {key: "found"}
key.val = 2 # mutate the key!
print(d[key]) # KeyError! hash changed, can't find it
print(d[BadKey(1)]) # KeyError! right hash, but original key ≠ BadKey(1) now
Rule: Dict keys must be immutable OR their hash must never change. Mutating a key corrupts the entire dict bucket.
def f(x, lst):
x = x + 1 # rebinds local x — original NOT affected
lst.append(99) # mutates the SAME list object
a = 10
b = [1, 2]
f(a, b)
print(a) # 10 — unchanged
print(b) # [1, 2, 99] — changed!
Why? Python is "pass by object reference." Reassigning a name (x = ...) creates a new local binding. Mutating an object (lst.append) changes it everywhere.
# Another gotcha:
def g(lst):
lst += [4] # for lists, += is .extend() — MUTATES in place!
def h(tup):
tup += (4,) # for tuples, += creates a NEW tuple — no mutation
a = [1, 2, 3]
g(a)
print(a) # [1, 2, 3, 4] — mutated!
b = (1, 2, 3)
h(b)
print(b) # (1, 2, 3) — unchanged
Like JS's optional chaining but for auto-creating deeply nested structures.
class AutoDict(dict):
def __missing__(self, key):
self[key] = AutoDict()
return self[key]
d = AutoDict()
d["a"]["b"]["c"] = 42 # no KeyError!
print(d) # {'a': {'b': {'c': 42}}}
# Same thing with collections:
from collections import defaultdict
tree = lambda: defaultdict(tree)
d = tree()
d["a"]["b"]["c"] = 42 # works!
Key concept: __missing__ is called by dict.__getitem__ when a key doesn't exist. Only works with [] access, not .get().
x = 10
def foo():
print(x) # UnboundLocalError! 😱
x = 20
foo()
Why? Python sees x = 20 anywhere in the function and marks x as local for the entire function. So the print(x) before the assignment references a local x that doesn't exist yet.
Fixes:
# Fix 1 — use global
def foo():
global x
print(x)
x = 20
# Fix 2 — use a different name
def foo():
print(x)
y = 20 # doesn't shadow x
# Nested scope version — use nonlocal
def outer():
x = 10
def inner():
nonlocal x
x += 1
inner()
print(x) # 11
# True/False are ints!
print(True + True) # 2
print(True * 10) # 10
print(sum([True, False, True])) # 2
# and/or return VALUES, not True/False
print(0 or "hello") # "hello"
print("hi" and "bye") # "bye"
print("" or "default") # "default"
print(None or 0 or [] or "found") # "found"
# Common pattern — default values before walrus operator:
name = user_input or "Anonymous"
How and/or actually work:
or returns the first truthy value, or the last valueand returns the first falsy value, or the last valuedef flatten(lst):
for item in lst:
if isinstance(item, list):
yield from flatten(item) # recursive generator!
else:
yield item
nested = [1, [2, [3, 4], 5], [6, [7, [8]]]]
print(list(flatten(nested)))
# [1, 2, 3, 4, 5, 6, 7, 8]
Key concept: yield from delegates to another generator — flattens one level of generator nesting. Without it you'd need a loop + yield.
One-liner (for fixed depth):
# Only 1 level deep:
flat = [x for sub in nested for x in sub]
# Python swap — no temp variable needed:
a, b = 1, 2
a, b = b, a
print(a, b) # 2 1
# But watch this:
a = [1, 2, 3]
a[0], a[a[0]] = a[a[0]], a[0]
print(a) # [2, 1, 3]? or [2, 2, 3]? 🤔
# The unpacking trick:
a, *b, c = [1, 2, 3, 4, 5]
print(a) # 1
print(b) # [2, 3, 4]
print(c) # 5
# Nested unpacking:
(a, b), c = [1, 2], 3
print(a, b, c) # 1 2 3
# Swallowing with *_:
first, *_, last = range(100)
print(first, last) # 0 99
class FuncList:
def __init__(self, *items):
self._data = list(items)
def __call__(self, *items):
self._data.extend(items)
return self
def __getitem__(self, idx):
return self._data[idx]
def __len__(self):
return len(self._data)
def __repr__(self):
return repr(self._data)
fl = FuncList(1, 2)(3, 4)(5)
print(fl) # [1, 2, 3, 4, 5]
print(fl[2]) # 3
print(len(fl)) # 5
Key concept: Dunder methods let you make objects behave like any built-in type. __call__ = callable, __getitem__ = indexable, __len__ = sizable.