Advanced
Chapter 19 · 11 min read
Performance & Load Testing
Measure API response times, run concurrent load tests, validate performance SLAs, and integrate performance checks into your test suite.
API Performance & Load Testing
Functional tests verify correctness; performance tests verify speed and scalability. Every API has (or should have) performance SLAs — maximum response time, requests per second capacity, and error rate thresholds.
Key Metrics
- Response time — How long a single request takes (p50, p95, p99)
- Throughput — Requests processed per second
- Error rate — Percentage of failed requests under load
- Concurrency — How many simultaneous users the API supports
Testing Approaches
Start with single-request response time validation in your functional tests. Then run concurrent load tests with increasing user counts to find the breaking point. Always measure percentiles, not just averages.
performance.test.js
// Single request response time
const start = Date.now();
const response = await fetch('https://jsonplaceholder.typicode.com/posts/1');
const elapsed = Date.now() - start;
console.assert(response.status === 200);
console.assert(elapsed < 2000, `Response time ${elapsed}ms exceeds 2000ms SLA`);
console.log(`Single request: ${elapsed}ms`);
// Concurrent load test
async function loadTest(url, concurrency, totalRequests) {
const results = [];
const errors = [];
let completed = 0;
const worker = async () => {
while (completed < totalRequests) {
completed++;
const t0 = Date.now();
try {
const resp = await fetch(url);
results.push({
status: resp.status,
time: Date.now() - t0
});
} catch (error) {
errors.push({ error: error.message, time: Date.now() - t0 });
}
}
};
const startTime = Date.now();
await Promise.all(Array(concurrency).fill().map(() => worker()));
const totalTime = Date.now() - startTime;
// Calculate percentiles
const times = results.map(r => r.time).sort((a, b) => a - b);
const p50 = times[Math.floor(times.length * 0.5)];
const p95 = times[Math.floor(times.length * 0.95)];
const p99 = times[Math.floor(times.length * 0.99)];
const avg = times.reduce((a, b) => a + b, 0) / times.length;
return {
totalRequests: results.length,
errors: errors.length,
totalTimeMs: totalTime,
rps: (results.length / totalTime * 1000).toFixed(1),
avgMs: avg.toFixed(0),
p50Ms: p50,
p95Ms: p95,
p99Ms: p99
};
}
const report = await loadTest(
'https://jsonplaceholder.typicode.com/posts',
5, // 5 concurrent users
20 // 20 total requests
);
console.log('Load Test Report:', report);
console.assert(report.errors === 0, 'Should have no errors');
console.assert(report.p95Ms < 5000, 'P95 should be under 5s');
PerformanceTest.java
import static io.restassured.RestAssured.*;
import static org.hamcrest.Matchers.*;
import io.restassured.response.Response;
import org.testng.annotations.Test;
import org.testng.Assert;
import java.util.*;
import java.util.concurrent.*;
public class PerformanceTest {
private static final String BASE = "https://jsonplaceholder.typicode.com";
@Test
public void testSingleRequestSLA() {
// RestAssured has built-in response time assertion
given().baseUri(BASE)
.when().get("/posts/1")
.then()
.statusCode(200)
.time(lessThan(2000L)); // SLA: under 2 seconds
}
@Test
public void testConcurrentLoad() throws Exception {
int concurrency = 5;
int totalRequests = 20;
ExecutorService executor = Executors.newFixedThreadPool(concurrency);
List<Future<Long>> futures = new ArrayList<>();
long startTime = System.currentTimeMillis();
for (int i = 0; i < totalRequests; i++) {
futures.add(executor.submit(() -> {
long t0 = System.currentTimeMillis();
Response resp = given().baseUri(BASE)
.when().get("/posts/1");
Assert.assertEquals(resp.getStatusCode(), 200);
return System.currentTimeMillis() - t0;
}));
}
List<Long> times = new ArrayList<>();
int errors = 0;
for (Future<Long> f : futures) {
try {
times.add(f.get(10, TimeUnit.SECONDS));
} catch (Exception e) {
errors++;
}
}
long totalTime = System.currentTimeMillis() - startTime;
executor.shutdown();
// Calculate stats
Collections.sort(times);
long p50 = times.get((int)(times.size() * 0.5));
long p95 = times.get((int)(times.size() * 0.95));
double avg = times.stream().mapToLong(Long::longValue).average().orElse(0);
double rps = times.size() * 1000.0 / totalTime;
System.out.printf("Results: %d requests, %d errors%n", times.size(), errors);
System.out.printf("Avg: %,.0fms | P50: %dms | P95: %dms%n", avg, p50, p95);
System.out.printf("Throughput: %.1f req/s%n", rps);
Assert.assertEquals(errors, 0, "No errors expected");
Assert.assertTrue(p95 < 5000, "P95 should be under 5s");
}
}
test_performance.py
"""Performance and load testing"""
import requests
import time
import statistics
from concurrent.futures import ThreadPoolExecutor, as_completed
BASE_URL = 'https://jsonplaceholder.typicode.com'
def test_single_request_sla():
"""Validate single request meets SLA"""
response = requests.get(f'{BASE_URL}/posts/1')
assert response.status_code == 200
assert response.elapsed.total_seconds() < 2.0, \
f'Response time {response.elapsed.total_seconds():.2f}s exceeds 2s SLA'
print(f"Single request: {response.elapsed.total_seconds()*1000:.0f}ms")
def make_request(url):
"""Helper for concurrent requests"""
start = time.time()
try:
resp = requests.get(url, timeout=10)
return {'status': resp.status_code, 'time': (time.time() - start) * 1000}
except Exception as e:
return {'status': 'error', 'time': (time.time() - start) * 1000, 'error': str(e)}
def test_concurrent_load():
"""Run concurrent load test"""
url = f'{BASE_URL}/posts'
concurrency = 5
total_requests = 20
start_time = time.time()
with ThreadPoolExecutor(max_workers=concurrency) as executor:
futures = [executor.submit(make_request, url) for _ in range(total_requests)]
results = [f.result() for f in as_completed(futures)]
total_time = (time.time() - start_time) * 1000
# Analyze results
times = [r['time'] for r in results if r['status'] == 200]
errors = [r for r in results if r['status'] != 200]
times.sort()
p50 = times[int(len(times) * 0.5)] if times else 0
p95 = times[int(len(times) * 0.95)] if times else 0
avg = statistics.mean(times) if times else 0
rps = len(times) / total_time * 1000
print(f"Results: {len(times)} OK, {len(errors)} errors")
print(f"Avg: {avg:.0f}ms | P50: {p50:.0f}ms | P95: {p95:.0f}ms")
print(f"Throughput: {rps:.1f} req/s")
assert len(errors) == 0, 'Should have no errors'
assert p95 < 5000, f'P95 ({p95:.0f}ms) should be under 5s'
API Testing
Advanced
Performance & Load Testing
Written by PV
© 2026 All Rights Reserved