Description. The function ptype##s_to_affine_row_wbits (defined in blst/src/multi_scalar.c) fails to correctly handle projective points at infinity (Z = 0).
Specifically, in the section where the accumulator is built:
for (i = 0; i < npoints; i++)
for (j = nwin; --src, --j; acc++)
mul_##field(acc[0], acc[-1], src->Z);
the Z coordinate is multiplied directly into the accumulator without guarding against the case where Z = 0. This means that if any point in the input batch is infinity, the accumulator becomes zero. Since batch inversion relies on this accumulator, the reciprocal computation:
--acc; reciprocal_##field(acc[0], acc[0]);
will fail silently, propagating invalid zeroes into the affine coordinates of all points.
By contrast, the related function ptype##s_to_affine correctly applies:
vec_select(acc++, BLS12_381_Rx.p, point->Z, sizeof(vec##bits),
vec_is_zero(point->Z, sizeof(point->Z)));
which substitutes a neutral Montgomery radix (1) when Z = 0, ensuring that infinity points do not corrupt the batch.
Fuzzer Context. To find this and another findings we developed and ran a custom fuzzer. The fuzzer targets the blst BLS12-381 scalar multiplication routines, including both single-point, wbits-precompute, and Pippenger multi-point paths. Inputs are variable-length byte arrays that are split into a scalar (up to 320 bits) and a compressed curve point.
For each input, the fuzzer:
- Parses and validates the scalar and curve point.
- Executes all three multiplication paths. Using the zero scalar and point for the second and third paths as required.
- Compares results to detect inconsistencies or crashes.
The fuzzer uses AFL++ in persistent mode but can also run standalone for testing. Any mismatches trigger an immediate abort to report potential bugs.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <assert.h>
#include <unistd.h>
#include "bindings/blst.h"
#ifndef __AFL_FUZZ_TESTCASE_LEN
ssize_t fuzz_len;
#define __AFL_FUZZ_TESTCASE_LEN fuzz_len
unsigned char fuzz_buf[1024*1024];
#define __AFL_FUZZ_TESTCASE_BUF fuzz_buf
#define __AFL_FUZZ_INIT() void sync(void);
#define __AFL_LOOP(x) ((fuzz_len = read(0, fuzz_buf, sizeof(fuzz_buf))) > 0 ? 1 : 0)
#define __AFL_INIT() sync()
#endif
__AFL_FUZZ_INIT();
#define SCALAR_SIZE 32
#define POINT_SIZE 48
#define MAX_SCALAR_SIZE 40
#define MAX_INPUT_SIZE (MAX_SCALAR_SIZE + POINT_SIZE)
#define PIPPENGER_NPOINTS 32
static limb_t *g_scratch;
static size_t g_scratch_size;
static byte *g_zero_scalars;
static blst_p1_affine *g_zero_points;
static blst_p1_affine *g_points_array;
static byte *g_scalars_array;
static const blst_p1_affine **g_point_ptrs;
static const byte **g_scalar_ptrs;
static void init_fuzzer() {
g_scratch_size = blst_p1s_mult_pippenger_scratch_sizeof(PIPPENGER_NPOINTS);
size_t wbits_scratch = blst_p1s_mult_wbits_scratch_sizeof(PIPPENGER_NPOINTS);
if (wbits_scratch > g_scratch_size) g_scratch_size = wbits_scratch;
g_scratch = malloc(g_scratch_size);
g_zero_scalars = calloc(PIPPENGER_NPOINTS, MAX_SCALAR_SIZE);
g_zero_points = calloc(PIPPENGER_NPOINTS, sizeof(blst_p1_affine));
g_points_array = malloc(PIPPENGER_NPOINTS * sizeof(blst_p1_affine));
g_scalars_array = malloc(PIPPENGER_NPOINTS * MAX_SCALAR_SIZE);
g_point_ptrs = malloc(PIPPENGER_NPOINTS * sizeof(blst_p1_affine *));
g_scalar_ptrs = malloc(PIPPENGER_NPOINTS * sizeof(byte *));
}
static size_t calc_scalar_nbits(const byte *scalar, size_t max_bytes) {
for (size_t i = max_bytes; i > 0; i--) {
if (scalar[i-1]) {
size_t bits = i*8;
byte b = scalar[i-1];
while ((b & 0x80) == 0 && bits > (i-1)*8) { bits--; b <<= 1; }
return bits;
}
}
return 0;
}
static int parse_input(const uint8_t *data, size_t size, byte *scalar, size_t *scalar_nbits, blst_p1_affine *point) {
byte tmp[MAX_INPUT_SIZE];
if (size > MAX_INPUT_SIZE) return 0;
memcpy(tmp, data, size); if (size < MAX_INPUT_SIZE) memset(tmp+size, 0, MAX_INPUT_SIZE-size);
memcpy(scalar, tmp, MAX_SCALAR_SIZE);
*scalar_nbits = calc_scalar_nbits(scalar, MAX_SCALAR_SIZE);
return blst_p1_uncompress(point, tmp + MAX_SCALAR_SIZE) == BLST_SUCCESS;
}
static void test_path(const byte *scalar, size_t nbits, const blst_p1_affine *point, blst_p1 *r1, blst_p1 *r2, blst_p1 *r3) {
const blst_p1_affine *p1[1] = {point};
const byte *s1[1] = {scalar};
blst_p1s_mult_pippenger(r1, p1, 1, s1, nbits, g_scratch);
g_point_ptrs[0]=point; g_point_ptrs[1]=g_zero_points;
g_scalar_ptrs[0]=scalar; g_scalar_ptrs[1]=g_zero_scalars;
blst_p1s_mult_pippenger(r2, g_point_ptrs, 2, g_scalar_ptrs, nbits, g_scratch);
memcpy(g_points_array, point, sizeof(blst_p1_affine));
memcpy(g_scalars_array, scalar, MAX_SCALAR_SIZE);
g_point_ptrs[0]=g_points_array; g_point_ptrs[1]=NULL;
g_scalar_ptrs[0]=g_scalars_array; g_scalar_ptrs[1]=NULL;
blst_p1s_mult_pippenger(r3, g_point_ptrs, PIPPENGER_NPOINTS, g_scalar_ptrs, nbits, g_scratch);
}
static int points_equal(const blst_p1 *a, const blst_p1 *b) { return blst_p1_is_equal(a,b); }
int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
byte scalar[MAX_SCALAR_SIZE]; blst_p1_affine point; size_t scalar_nbits;
if (!parse_input(data, size, scalar, &scalar_nbits, &point)) return 0;
size_t max_nbits = scalar_nbits + 10; if (max_nbits > MAX_SCALAR_SIZE*8) max_nbits = MAX_SCALAR_SIZE*8;
for (size_t nbits = scalar_nbits; nbits <= max_nbits; nbits++) {
blst_p1 r1,r2,r3; test_path(scalar, nbits, &point, &r1,&r2,&r3);
if (!points_equal(&r1,&r2) || !points_equal(&r1,&r3) || !points_equal(&r2,&r3)) abort();
}
return 0;
}
int main(int argc, char **argv) {
init_fuzzer();
unsigned char *buf = __AFL_FUZZ_TESTCASE_BUF;
while (__AFL_LOOP(10000)) { int len=__AFL_FUZZ_TESTCASE_LEN; if(len>0 && len<=MAX_INPUT_SIZE){memset(g_scratch,0,g_scratch_size); LLVMFuzzerTestOneInput(buf,len);}}
if(argc==2){ FILE *fp=fopen(argv[1],"rb"); uint8_t data[MAX_INPUT_SIZE]; size_t read=fread(data,1,MAX_INPUT_SIZE,fp); fclose(fp); LLVMFuzzerTestOneInput(data,read);}
free(g_scratch); free(g_zero_scalars); free(g_zero_points); free(g_points_array); free(g_scalars_array); free(g_point_ptrs); free(g_scalar_ptrs);
return 0;
}
Impact.
ptype##s_to_affine_row_wbits() is used in ptype##s_precompute_wbits() to batch-convert the computed point table from Jabobi to affine representation. In turn, ptype##s_precompute_wbits() is used as precomputation step of the main MSM routine prefix##s_mult_pippenger(), in the case that the number of points is between 2 and 31:
void prefix##s_mult_pippenger(ptype *ret, \
const ptype##_affine *const points[], \
size_t npoints, \
const byte *const scalars[], size_t nbits, \
ptype##xyzz scratch[]) \
{ \
// ...
if ((npoints * sizeof(ptype##_affine) * 8 * 3) <= SCRATCH_LIMIT && \
npoints < 32) { \
ptype##_affine *table = alloca(npoints * sizeof(ptype##_affine) * 8); \
ptype##s_precompute_wbits(table, 4, points, npoints); \
ptype##s_mult_wbits(ret, table, 4, npoints, scalars, nbits, NULL); \
return; \
} \
ptype##s_mult_pippenger(ret, points, npoints, scalars, nbits, scratch, 0); \
}
According to our analysis, the issue is unlikely to affect the KZG libraries that depend on prefix##s_mult_pippenger(). For example, g1_lincomb_fast() in c-kzg-4844 depends on the problematic code path when the number of points is between 8 and 31 input points. However, it avoids zero input points by filtering them out before calling into blst. The same is true for the Rust library.
Nevertheless, the issue is subtle and could affect the soundness of similar protocols or of the EIP-7594 verifier if point filtering was removed. Inherently, g1_lincomb_fast() is called on prover-supplied input points (the KZG cell proofs) and is therefore vulnerable to any mishandling of invalid points.
Recommendation. Adopt the same protective logic used in ptype##s_to_affine:
- Use
vec_select to substitute Z = 1 when Z = 0 before contributing to the batch product.
- Apply
vec_czero on the affine outputs to zero them out if the original point was infinity.
This ensures that infinity points are handled locally and do not corrupt other valid points in the batch.
Client Response. Fixed in https://github.com/supranational/blst/commit/f48500c1fdbefa7c0bf9800bccd65d28236799c1.