Advanced Chapter 19 · 11 min read

Performance & Load Testing

Measure API response times, run concurrent load tests, validate performance SLAs, and integrate performance checks into your test suite.

API Performance & Load Testing

Functional tests verify correctness; performance tests verify speed and scalability. Every API has (or should have) performance SLAs — maximum response time, requests per second capacity, and error rate thresholds.

Key Metrics

  • Response time — How long a single request takes (p50, p95, p99)
  • Throughput — Requests processed per second
  • Error rate — Percentage of failed requests under load
  • Concurrency — How many simultaneous users the API supports

Testing Approaches

Start with single-request response time validation in your functional tests. Then run concurrent load tests with increasing user counts to find the breaking point. Always measure percentiles, not just averages.

performance.test.js
// Single request response time
const start = Date.now();
const response = await fetch('https://jsonplaceholder.typicode.com/posts/1');
const elapsed = Date.now() - start;

console.assert(response.status === 200);
console.assert(elapsed < 2000, `Response time ${elapsed}ms exceeds 2000ms SLA`);
console.log(`Single request: ${elapsed}ms`);

// Concurrent load test
async function loadTest(url, concurrency, totalRequests) {
  const results = [];
  const errors = [];
  let completed = 0;

  const worker = async () => {
    while (completed < totalRequests) {
      completed++;
      const t0 = Date.now();
      try {
        const resp = await fetch(url);
        results.push({
          status: resp.status,
          time: Date.now() - t0
        });
      } catch (error) {
        errors.push({ error: error.message, time: Date.now() - t0 });
      }
    }
  };

  const startTime = Date.now();
  await Promise.all(Array(concurrency).fill().map(() => worker()));
  const totalTime = Date.now() - startTime;

  // Calculate percentiles
  const times = results.map(r => r.time).sort((a, b) => a - b);
  const p50 = times[Math.floor(times.length * 0.5)];
  const p95 = times[Math.floor(times.length * 0.95)];
  const p99 = times[Math.floor(times.length * 0.99)];
  const avg = times.reduce((a, b) => a + b, 0) / times.length;

  return {
    totalRequests: results.length,
    errors: errors.length,
    totalTimeMs: totalTime,
    rps: (results.length / totalTime * 1000).toFixed(1),
    avgMs: avg.toFixed(0),
    p50Ms: p50,
    p95Ms: p95,
    p99Ms: p99
  };
}

const report = await loadTest(
  'https://jsonplaceholder.typicode.com/posts',
  5,    // 5 concurrent users
  20    // 20 total requests
);

console.log('Load Test Report:', report);
console.assert(report.errors === 0, 'Should have no errors');
console.assert(report.p95Ms < 5000, 'P95 should be under 5s');
PerformanceTest.java
import static io.restassured.RestAssured.*;
import static org.hamcrest.Matchers.*;
import io.restassured.response.Response;
import org.testng.annotations.Test;
import org.testng.Assert;
import java.util.*;
import java.util.concurrent.*;

public class PerformanceTest {

    private static final String BASE = "https://jsonplaceholder.typicode.com";

    @Test
    public void testSingleRequestSLA() {
        // RestAssured has built-in response time assertion
        given().baseUri(BASE)
        .when().get("/posts/1")
        .then()
            .statusCode(200)
            .time(lessThan(2000L)); // SLA: under 2 seconds
    }

    @Test
    public void testConcurrentLoad() throws Exception {
        int concurrency = 5;
        int totalRequests = 20;
        ExecutorService executor = Executors.newFixedThreadPool(concurrency);
        List<Future<Long>> futures = new ArrayList<>();

        long startTime = System.currentTimeMillis();

        for (int i = 0; i < totalRequests; i++) {
            futures.add(executor.submit(() -> {
                long t0 = System.currentTimeMillis();
                Response resp = given().baseUri(BASE)
                    .when().get("/posts/1");
                Assert.assertEquals(resp.getStatusCode(), 200);
                return System.currentTimeMillis() - t0;
            }));
        }

        List<Long> times = new ArrayList<>();
        int errors = 0;
        for (Future<Long> f : futures) {
            try {
                times.add(f.get(10, TimeUnit.SECONDS));
            } catch (Exception e) {
                errors++;
            }
        }

        long totalTime = System.currentTimeMillis() - startTime;
        executor.shutdown();

        // Calculate stats
        Collections.sort(times);
        long p50 = times.get((int)(times.size() * 0.5));
        long p95 = times.get((int)(times.size() * 0.95));
        double avg = times.stream().mapToLong(Long::longValue).average().orElse(0);
        double rps = times.size() * 1000.0 / totalTime;

        System.out.printf("Results: %d requests, %d errors%n", times.size(), errors);
        System.out.printf("Avg: %,.0fms | P50: %dms | P95: %dms%n", avg, p50, p95);
        System.out.printf("Throughput: %.1f req/s%n", rps);

        Assert.assertEquals(errors, 0, "No errors expected");
        Assert.assertTrue(p95 < 5000, "P95 should be under 5s");
    }
}
test_performance.py
"""Performance and load testing"""
import requests
import time
import statistics
from concurrent.futures import ThreadPoolExecutor, as_completed

BASE_URL = 'https://jsonplaceholder.typicode.com'

def test_single_request_sla():
    """Validate single request meets SLA"""
    response = requests.get(f'{BASE_URL}/posts/1')

    assert response.status_code == 200
    assert response.elapsed.total_seconds() < 2.0, \
        f'Response time {response.elapsed.total_seconds():.2f}s exceeds 2s SLA'
    print(f"Single request: {response.elapsed.total_seconds()*1000:.0f}ms")

def make_request(url):
    """Helper for concurrent requests"""
    start = time.time()
    try:
        resp = requests.get(url, timeout=10)
        return {'status': resp.status_code, 'time': (time.time() - start) * 1000}
    except Exception as e:
        return {'status': 'error', 'time': (time.time() - start) * 1000, 'error': str(e)}

def test_concurrent_load():
    """Run concurrent load test"""
    url = f'{BASE_URL}/posts'
    concurrency = 5
    total_requests = 20

    start_time = time.time()

    with ThreadPoolExecutor(max_workers=concurrency) as executor:
        futures = [executor.submit(make_request, url) for _ in range(total_requests)]
        results = [f.result() for f in as_completed(futures)]

    total_time = (time.time() - start_time) * 1000

    # Analyze results
    times = [r['time'] for r in results if r['status'] == 200]
    errors = [r for r in results if r['status'] != 200]
    times.sort()

    p50 = times[int(len(times) * 0.5)] if times else 0
    p95 = times[int(len(times) * 0.95)] if times else 0
    avg = statistics.mean(times) if times else 0
    rps = len(times) / total_time * 1000

    print(f"Results: {len(times)} OK, {len(errors)} errors")
    print(f"Avg: {avg:.0f}ms | P50: {p50:.0f}ms | P95: {p95:.0f}ms")
    print(f"Throughput: {rps:.1f} req/s")

    assert len(errors) == 0, 'Should have no errors'
    assert p95 < 5000, f'P95 ({p95:.0f}ms) should be under 5s'

API Testing Advanced Performance & Load Testing

Written by PV

© 2026 All Rights Reserved