Skip to content

Commit c72141a

Browse files
authored
Revert "Remove server rate limit code" (#55020)
1 parent 1275538 commit c72141a

File tree

4 files changed

+319
-0
lines changed

4 files changed

+319
-0
lines changed

src/shielding/lib/fastly-ips.ts

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
// Logic to get and store the current list of public Fastly IPs from the Fastly API: https://www.fastly.com/documentation/reference/api/utils/public-ip-list/
2+
3+
// Default returned from ➜ curl "https://api.fastly.com/public-ip-list"
4+
export const DEFAULT_FASTLY_IPS: string[] = [
5+
'23.235.32.0/20',
6+
'43.249.72.0/22',
7+
'103.244.50.0/24',
8+
'103.245.222.0/23',
9+
'103.245.224.0/24',
10+
'104.156.80.0/20',
11+
'140.248.64.0/18',
12+
'140.248.128.0/17',
13+
'146.75.0.0/17',
14+
'151.101.0.0/16',
15+
'157.52.64.0/18',
16+
'167.82.0.0/17',
17+
'167.82.128.0/20',
18+
'167.82.160.0/20',
19+
'167.82.224.0/20',
20+
'172.111.64.0/18',
21+
'185.31.16.0/22',
22+
'199.27.72.0/21',
23+
'199.232.0.0/16',
24+
]
25+
26+
let ipCache: string[] = []
27+
28+
export async function getPublicFastlyIPs(): Promise<string[]> {
29+
// Don't fetch the list in dev & testing, just use the defaults
30+
if (process.env.NODE_ENV !== 'production') {
31+
ipCache = DEFAULT_FASTLY_IPS
32+
}
33+
34+
if (ipCache.length) {
35+
return ipCache
36+
}
37+
38+
const endpoint = 'https://api.fastly.com/public-ip-list'
39+
let ips: string[] = []
40+
let attempt = 0
41+
42+
while (attempt < 3) {
43+
try {
44+
const response = await fetch(endpoint)
45+
if (!response.ok) {
46+
throw new Error(`Failed to fetch: ${response.status}`)
47+
}
48+
const data = await response.json()
49+
if (data && Array.isArray(data.addresses)) {
50+
ips = data.addresses
51+
break
52+
} else {
53+
throw new Error('Invalid response structure')
54+
}
55+
} catch (error: any) {
56+
console.error(
57+
`Failed to fetch Fastly IPs: ${error.message}. Retrying ${3 - attempt} more times`,
58+
)
59+
attempt++
60+
if (attempt >= 3) {
61+
ips = DEFAULT_FASTLY_IPS
62+
}
63+
}
64+
}
65+
66+
ipCache = ips
67+
return ips
68+
}
69+
70+
// The IPs we check in the rate-limiter are in the form `X.X.X.X`
71+
// But the IPs returned from the Fastly API are in the form `X.X.X.X/Y`
72+
// For an IP in the rate-limiter, we want `X.X.X.*` to match `X.X.X.X/Y`
73+
export async function isFastlyIP(ip: string): Promise<boolean> {
74+
// If IPs aren't initialized, fetch them
75+
if (!ipCache.length) {
76+
await getPublicFastlyIPs()
77+
}
78+
const parts = ip.split('.')
79+
const prefix = parts.slice(0, 3).join('.')
80+
return ipCache.some((fastlyIP) => fastlyIP.startsWith(prefix))
81+
}

src/shielding/middleware/index.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,11 @@ import handleOldNextDataPaths from './handle-old-next-data-paths'
66
import handleInvalidQuerystringValues from './handle-invalid-query-string-values'
77
import handleInvalidNextPaths from './handle-invalid-nextjs-paths'
88
import handleInvalidHeaders from './handle-invalid-headers'
9+
import { createRateLimiter } from './rate-limit'
910

1011
const router = express.Router()
1112

13+
router.use(createRateLimiter())
1214
router.use(handleInvalidQuerystrings)
1315
router.use(handleInvalidPaths)
1416
router.use(handleOldNextDataPaths)
Lines changed: 168 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,168 @@
1+
import type { Request } from 'express'
2+
3+
import rateLimit from 'express-rate-limit'
4+
5+
import statsd from '@/observability/lib/statsd.js'
6+
import { noCacheControl } from '@/frame/middleware/cache-control.js'
7+
import { isFastlyIP } from '@/shielding/lib/fastly-ips'
8+
9+
const EXPIRES_IN_AS_SECONDS = 60
10+
11+
const MAX = process.env.RATE_LIMIT_MAX ? parseInt(process.env.RATE_LIMIT_MAX, 10) : 50
12+
if (isNaN(MAX)) {
13+
throw new Error(`process.env.RATE_LIMIT_MAX (${process.env.RATE_LIMIT_MAX}) not a number`)
14+
}
15+
16+
// We apply this rate limiter to _all_ routes in src/shielding/index.ts except for `/api/*` routes
17+
export function createRateLimiter(max = MAX, isAPILimiter = false) {
18+
return rateLimit({
19+
// 1 minute
20+
windowMs: EXPIRES_IN_AS_SECONDS * 1000,
21+
// limit each IP to X requests per windowMs
22+
// We currently have about 12 instances in production. That's routed
23+
// in Moda to spread the requests to each healthy instance.
24+
// So, the true rate limit, per `windowMs`, is this number multiplied
25+
// by the current number of instances.
26+
max: max,
27+
28+
// Return rate limit info in the `RateLimit-*` headers
29+
standardHeaders: true,
30+
// Disable the `X-RateLimit-*` headers
31+
legacyHeaders: false,
32+
33+
keyGenerator: (req) => {
34+
return getClientIPFromReq(req)
35+
},
36+
37+
skip: async (req) => {
38+
const ip = getClientIPFromReq(req)
39+
if (await isFastlyIP(ip)) {
40+
return true
41+
}
42+
// IP is empty when we are in a non-production (not behind Fastly) environment
43+
// In these environments, we don't want to rate limit (including tests)
44+
// However, if you want to test rate limiting locally, you can manually set
45+
// the `fastly-client-ip` header to your IP address to bypass this check set the
46+
if (ip === '') {
47+
return true
48+
}
49+
50+
// We handle /api/* routes with a separate rate limiter
51+
// When it is a separate rate limiter, isAPILimiter will be passed as true
52+
if (req.path.startsWith('/api/') || isAPILimiter) {
53+
return false
54+
}
55+
56+
// If the request is not suspicious, don't rate limit it
57+
if (!isSuspiciousRequest(req)) {
58+
return true
59+
}
60+
61+
// At this point, a request is suspicious. We want to track how many are in datadog
62+
const tags = [`url:${req.url}`, `ip:${ip}`, `path:${req.path}`, `qs:${req.url.split('?')[1]}`]
63+
statsd.increment('middleware.rate_limit_dont_skip', 1, tags)
64+
65+
return false
66+
},
67+
68+
handler: (req, res, next, options) => {
69+
const tags = [`url:${req.url}`, `ip:${req.ip}`, `path:${req.path}`]
70+
statsd.increment('middleware.rate_limit', 1, tags)
71+
noCacheControl(res)
72+
res.status(options.statusCode).send(options.message)
73+
},
74+
75+
// Temporary so that we can see what is coming from Fastly v app level
76+
statusCode: 418, // "i'm a teapot"
77+
})
78+
}
79+
80+
function getClientIPFromReq(req: Request) {
81+
// Moda forwards the client's IP using the `fastly-client-ip` header.
82+
// However, in non-fastly environments, this header is not present.
83+
// Staging is behind Okta, so we don't need to rate limit there.
84+
let ip = req?.headers?.['fastly-client-ip'] || ''
85+
// This is to satisfy TypeScript since a header could be a string array, but fastly-client-ip is not
86+
if (typeof ip !== 'string') {
87+
ip = ''
88+
}
89+
return ip
90+
}
91+
92+
const RECOGNIZED_KEYS_BY_PREFIX = {
93+
'/_next/data/': ['versionId', 'productId', 'restPage', 'apiVersion', 'category', 'subcategory'],
94+
'/api/search': ['query', 'language', 'version', 'page', 'product', 'autocomplete', 'limit'],
95+
'/api/anchor-redirect': ['hash', 'path'],
96+
'/api/webhooks': ['category', 'version'],
97+
'/api/pageinfo': ['pathname'],
98+
}
99+
100+
const RECOGNIZED_KEYS = {
101+
search: ['query', 'page'],
102+
}
103+
104+
const MISC_KEYS = [
105+
// Learning track pages
106+
'learn',
107+
'learnProduct',
108+
109+
// Platform picker
110+
'platform',
111+
112+
// Tool picker
113+
'tool',
114+
115+
// When apiVersion isn't the only one. E.g. ?apiVersion=XXX&tool=vscode
116+
'apiVersion',
117+
118+
// Lowercase for rest pages
119+
'apiversion',
120+
121+
// We use the query param "feature" to enable experiments in the browser
122+
'feature',
123+
]
124+
125+
/**
126+
* Return true if the request looks like a DoS request. I.e. suspicious.
127+
*
128+
* We've seen lots of requests slip past the CDN and its edge rate limiter
129+
* that clearly are not realistic URLs that you'd get in a browser.
130+
* For example `?action=octrh&api=h9vcd&immagine=jzs3c&lang=xb0kp&m=rrmek`
131+
* There are certain URLs that have query strings that are valid, but
132+
* have one more query string keys. In particular the `/api/..` endpoints.
133+
*
134+
* Remember, just because this function might return true, it doesn't mean
135+
* the request will be rate limited. It has to be both suspicious AND
136+
* have lots and lots of requests.
137+
*
138+
* @param {Request} req
139+
* @returns boolean
140+
*/
141+
function isSuspiciousRequest(req: Request) {
142+
const keys = Object.keys(req.query)
143+
144+
// Since this function can only speculate by query strings (at the
145+
// moment), if the URL doesn't have any query strings it's not suspicious.
146+
if (!keys.length) {
147+
return false
148+
}
149+
150+
// E.g. `/en/rest/actions?apiVersion=YYYY-MM-DD`
151+
if (keys.length === 1 && keys[0] === 'apiVersion') return false
152+
153+
// Now check what query string keys are *left* based on a list of
154+
// recognized keys per different prefixes.
155+
for (const [prefix, recognizedKeys] of Object.entries(RECOGNIZED_KEYS_BY_PREFIX)) {
156+
if (req.path.startsWith(prefix)) {
157+
return keys.filter((key) => !recognizedKeys.includes(key)).length > 0
158+
}
159+
}
160+
161+
// E.g. `/fr/search?query=foo
162+
if (req.path.split('/')[2] === 'search') {
163+
return keys.filter((key) => !RECOGNIZED_KEYS.search.includes(key)).length > 0
164+
}
165+
166+
const unrecognizedKeys = keys.filter((key) => !MISC_KEYS.includes(key))
167+
return unrecognizedKeys.length > 0
168+
}

src/shielding/tests/shielding.ts

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ import { describe, expect, test } from 'vitest'
22

33
import { SURROGATE_ENUMS } from '@/frame/middleware/set-fastly-surrogate-key.js'
44
import { get } from '@/tests/helpers/e2etest.js'
5+
import { DEFAULT_FASTLY_IPS } from '@/shielding/lib/fastly-ips'
56

67
describe('honeypotting', () => {
78
test('any GET with survey-vote and survey-token query strings is 400', async () => {
@@ -94,6 +95,73 @@ describe('index.md and .md suffixes', () => {
9495
})
9596
})
9697

98+
describe('rate limiting', () => {
99+
// We can't actually trigger a full rate limit because
100+
// then all other tests will all fail. And we can't rely on this
101+
// test always being run last.
102+
103+
test('only happens if you have junk query strings', async () => {
104+
const res = await get('/robots.txt?foo=bar', {
105+
headers: {
106+
// Rate limiting only happens in production, so we need to
107+
// make the environment look like production.
108+
'fastly-client-ip': 'abc',
109+
},
110+
})
111+
expect(res.statusCode).toBe(200)
112+
const limit = parseInt(res.headers['ratelimit-limit'])
113+
const remaining = parseInt(res.headers['ratelimit-remaining'])
114+
expect(limit).toBeGreaterThan(0)
115+
expect(remaining).toBeLessThan(limit)
116+
117+
// A second request
118+
{
119+
const res = await get('/robots.txt?foo=buzz', {
120+
headers: {
121+
'fastly-client-ip': 'abc',
122+
},
123+
})
124+
expect(res.statusCode).toBe(200)
125+
const newLimit = parseInt(res.headers['ratelimit-limit'])
126+
const newRemaining = parseInt(res.headers['ratelimit-remaining'])
127+
expect(newLimit).toBe(limit)
128+
// Can't rely on `newRemaining == remaining - 1` because of
129+
// concurrency of test-running.
130+
expect(newRemaining).toBeLessThan(remaining)
131+
}
132+
})
133+
134+
test('nothing happens if no unrecognized query string', async () => {
135+
const res = await get('/robots.txt')
136+
expect(res.statusCode).toBe(200)
137+
expect(res.headers['ratelimit-limit']).toBeUndefined()
138+
expect(res.headers['ratelimit-remaining']).toBeUndefined()
139+
})
140+
141+
test('Fastly IPs are not rate limited', async () => {
142+
// Fastly IPs are in the form `X.X.X.X/Y`
143+
// Rate limited IPs are in the form `X.X.X.X`
144+
// Where the last X could be any 2-3 digit number
145+
const mockFastlyIP =
146+
DEFAULT_FASTLY_IPS[0].split('.').slice(0, 3).join('.') + `.${Math.floor(Math.random() * 100)}`
147+
// Cookies only allows 1 request per minute
148+
const res1 = await get('/api/cookies', {
149+
headers: {
150+
'fastly-client-ip': mockFastlyIP,
151+
},
152+
})
153+
expect(res1.statusCode).toBe(200)
154+
155+
// A second request shouldn't be rate limited because it's from a Fastly IP
156+
const res2 = await get('/api/cookies', {
157+
headers: {
158+
'fastly-client-ip': mockFastlyIP,
159+
},
160+
})
161+
expect(res2.statusCode).toBe(200)
162+
})
163+
})
164+
97165
describe('404 pages and their content-type', () => {
98166
const exampleNonLanguage404plain = ['/_next/image/foo']
99167
test.each(exampleNonLanguage404plain)(

0 commit comments

Comments
 (0)