Loading CHANGES +5 −0 Original line number Diff line number Diff line Loading @@ -310,6 +310,11 @@ Changes between 1.0.1e and 1.0.2 [xx XXX xxxx] *) Fix ec_GFp_simple_points_make_affine (thus, EC_POINTs_mul etc.) for corner cases. (Certain input points at infinity could lead to bogus results, with non-infinity inputs mapped to infinity too.) [Bodo Moeller] *) Initial support for PowerISA 2.0.7, first implemented in POWER8. This covers AES, SHA256/512 and GHASH. "Initial" means that most common cases are optimized and there still is room for further Loading crypto/ec/ecp_smpl.c +76 −98 Original line number Diff line number Diff line Loading @@ -1175,9 +1175,8 @@ int ec_GFp_simple_make_affine(const EC_GROUP *group, EC_POINT *point, BN_CTX *ct int ec_GFp_simple_points_make_affine(const EC_GROUP *group, size_t num, EC_POINT *points[], BN_CTX *ctx) { BN_CTX *new_ctx = NULL; BIGNUM *tmp0, *tmp1; size_t pow2 = 0; BIGNUM **heap = NULL; BIGNUM *tmp, *tmp_Z; BIGNUM **prod_Z = NULL; size_t i; int ret = 0; Loading @@ -1192,110 +1191,90 @@ int ec_GFp_simple_points_make_affine(const EC_GROUP *group, size_t num, EC_POINT } BN_CTX_start(ctx); tmp0 = BN_CTX_get(ctx); tmp1 = BN_CTX_get(ctx); if (tmp0 == NULL || tmp1 == NULL) goto err; /* Before converting the individual points, compute inverses of all Z values. * Modular inversion is rather slow, but luckily we can do with a single * explicit inversion, plus about 3 multiplications per input value. */ pow2 = 1; while (num > pow2) pow2 <<= 1; /* Now pow2 is the smallest power of 2 satifsying pow2 >= num. * We need twice that. */ pow2 <<= 1; heap = OPENSSL_malloc(pow2 * sizeof heap[0]); if (heap == NULL) goto err; tmp = BN_CTX_get(ctx); tmp_Z = BN_CTX_get(ctx); if (tmp == NULL || tmp_Z == NULL) goto err; /* The array is used as a binary tree, exactly as in heapsort: * * heap[1] * heap[2] heap[3] * heap[4] heap[5] heap[6] heap[7] * heap[8]heap[9] heap[10]heap[11] heap[12]heap[13] heap[14] heap[15] * * We put the Z's in the last line; * then we set each other node to the product of its two child-nodes (where * empty or 0 entries are treated as ones); * then we invert heap[1]; * then we invert each other node by replacing it by the product of its * parent (after inversion) and its sibling (before inversion). */ heap[0] = NULL; for (i = pow2/2 - 1; i > 0; i--) heap[i] = NULL; prod_Z = OPENSSL_malloc(num * sizeof prod_Z[0]); if (prod_Z == NULL) goto err; for (i = 0; i < num; i++) heap[pow2/2 + i] = &points[i]->Z; for (i = pow2/2 + num; i < pow2; i++) heap[i] = NULL; /* set each node to the product of its children */ for (i = pow2/2 - 1; i > 0; i--) { heap[i] = BN_new(); if (heap[i] == NULL) goto err; prod_Z[i] = BN_new(); if (prod_Z[i] == NULL) goto err; } if (heap[2*i] != NULL) { if ((heap[2*i + 1] == NULL) || BN_is_zero(heap[2*i + 1])) /* Set each prod_Z[i] to the product of points[0]->Z .. points[i]->Z, * skipping any zero-valued inputs (pretend that they're 1). */ if (!BN_is_zero(&points[0]->Z)) { if (!BN_copy(heap[i], heap[2*i])) goto err; if (!BN_copy(prod_Z[0], &points[0]->Z)) goto err; } else { if (BN_is_zero(heap[2*i])) if (group->meth->field_set_to_one != 0) { if (!BN_copy(heap[i], heap[2*i + 1])) goto err; if (!group->meth->field_set_to_one(group, prod_Z[0], ctx)) goto err; } else { if (!group->meth->field_mul(group, heap[i], heap[2*i], heap[2*i + 1], ctx)) goto err; if (!BN_one(prod_Z[0])) goto err; } } for (i = 1; i < num; i++) { if (!BN_is_zero(&points[i]->Z)) { if (!group->meth->field_mul(group, prod_Z[i], prod_Z[i - 1], &points[i]->Z, ctx)) goto err; } else { if (!BN_copy(prod_Z[i], prod_Z[i - 1])) goto err; } } /* invert heap[1] */ if (!BN_is_zero(heap[1])) { if (!BN_mod_inverse(heap[1], heap[1], &group->field, ctx)) /* Now use a single explicit inversion to replace every * non-zero points[i]->Z by its inverse. */ if (!BN_mod_inverse(tmp, prod_Z[num - 1], &group->field, ctx)) { ECerr(EC_F_EC_GFP_SIMPLE_POINTS_MAKE_AFFINE, ERR_R_BN_LIB); goto err; } } if (group->meth->field_encode != 0) { /* in the Montgomery case, we just turned R*H (representing H) /* In the Montgomery case, we just turned R*H (representing H) * into 1/(R*H), but we need R*(1/H) (representing 1/H); * i.e. we have need to multiply by the Montgomery factor twice */ if (!group->meth->field_encode(group, heap[1], heap[1], ctx)) goto err; if (!group->meth->field_encode(group, heap[1], heap[1], ctx)) goto err; * i.e. we need to multiply by the Montgomery factor twice. */ if (!group->meth->field_encode(group, tmp, tmp, ctx)) goto err; if (!group->meth->field_encode(group, tmp, tmp, ctx)) goto err; } /* set other heap[i]'s to their inverses */ for (i = 2; i < pow2/2 + num; i += 2) for (i = num - 1; i > 0; --i) { /* i is even */ if ((heap[i + 1] != NULL) && !BN_is_zero(heap[i + 1])) /* Loop invariant: tmp is the product of the inverses of * points[0]->Z .. points[i]->Z (zero-valued inputs skipped). */ if (!BN_is_zero(&points[i]->Z)) { if (!group->meth->field_mul(group, tmp0, heap[i/2], heap[i + 1], ctx)) goto err; if (!group->meth->field_mul(group, tmp1, heap[i/2], heap[i], ctx)) goto err; if (!BN_copy(heap[i], tmp0)) goto err; if (!BN_copy(heap[i + 1], tmp1)) goto err; /* Set tmp_Z to the inverse of points[i]->Z (as product * of Z inverses 0 .. i, Z values 0 .. i - 1). */ if (!group->meth->field_mul(group, tmp_Z, prod_Z[i - 1], tmp, ctx)) goto err; /* Update tmp to satisfy the loop invariant for i - 1. */ if (!group->meth->field_mul(group, tmp, tmp, &points[i]->Z, ctx)) goto err; /* Replace points[i]->Z by its inverse. */ if (!BN_copy(&points[i]->Z, tmp_Z)) goto err; } else { if (!BN_copy(heap[i], heap[i/2])) goto err; } if (!BN_is_zero(&points[0]->Z)) { /* Replace points[0]->Z by its inverse. */ if (!BN_copy(&points[0]->Z, tmp)) goto err; } /* we have replaced all non-zero Z's by their inverses, now fix up all the points */ /* Finally, fix up the X and Y coordinates for all points. */ for (i = 0; i < num; i++) { EC_POINT *p = points[i]; Loading @@ -1304,11 +1283,11 @@ int ec_GFp_simple_points_make_affine(const EC_GROUP *group, size_t num, EC_POINT { /* turn (X, Y, 1/Z) into (X/Z^2, Y/Z^3, 1) */ if (!group->meth->field_sqr(group, tmp1, &p->Z, ctx)) goto err; if (!group->meth->field_mul(group, &p->X, &p->X, tmp1, ctx)) goto err; if (!group->meth->field_sqr(group, tmp, &p->Z, ctx)) goto err; if (!group->meth->field_mul(group, &p->X, &p->X, tmp, ctx)) goto err; if (!group->meth->field_mul(group, tmp1, tmp1, &p->Z, ctx)) goto err; if (!group->meth->field_mul(group, &p->Y, &p->Y, tmp1, ctx)) goto err; if (!group->meth->field_mul(group, tmp, tmp, &p->Z, ctx)) goto err; if (!group->meth->field_mul(group, &p->Y, &p->Y, tmp, ctx)) goto err; if (group->meth->field_set_to_one != 0) { Loading @@ -1328,15 +1307,14 @@ int ec_GFp_simple_points_make_affine(const EC_GROUP *group, size_t num, EC_POINT BN_CTX_end(ctx); if (new_ctx != NULL) BN_CTX_free(new_ctx); if (heap != NULL) if (prod_Z != NULL) { /* heap[pow2/2] .. heap[pow2-1] have not been allocated locally! */ for (i = pow2/2 - 1; i > 0; i--) for (i = 0; i < num; i++) { if (heap[i] != NULL) BN_clear_free(heap[i]); if (prod_Z[i] != NULL) BN_clear_free(prod_Z[i]); } OPENSSL_free(heap); OPENSSL_free(prod_Z); } return ret; } Loading crypto/ec/ectest.c +49 −14 Original line number Diff line number Diff line Loading @@ -199,6 +199,7 @@ static void group_order_tests(EC_GROUP *group) EC_POINT *P = EC_POINT_new(group); EC_POINT *Q = EC_POINT_new(group); BN_CTX *ctx = BN_CTX_new(); int i; n1 = BN_new(); n2 = BN_new(); order = BN_new(); fprintf(stdout, "verify group order ..."); Loading @@ -212,21 +213,55 @@ static void group_order_tests(EC_GROUP *group) if (!EC_POINT_mul(group, Q, order, NULL, NULL, ctx)) ABORT; if (!EC_POINT_is_at_infinity(group, Q)) ABORT; fprintf(stdout, " ok\n"); fprintf(stdout, "long/negative scalar tests ... "); fprintf(stdout, "long/negative scalar tests "); for (i = 1; i <= 2; i++) { const BIGNUM *scalars[6]; const EC_POINT *points[6]; fprintf(stdout, i == 1 ? "allowing precomputation ... " : "without precomputation ... "); if (!BN_set_word(n1, i)) ABORT; /* If i == 1, P will be the predefined generator for which * EC_GROUP_precompute_mult has set up precomputation. */ if (!EC_POINT_mul(group, P, n1, NULL, NULL, ctx)) ABORT; if (!BN_one(n1)) ABORT; /* n1 = 1 - order */ if (!BN_sub(n1, n1, order)) ABORT; if (!EC_POINT_mul(group, Q, NULL, P, n1, ctx)) ABORT; if (0 != EC_POINT_cmp(group, Q, P, ctx)) ABORT; /* n2 = 1 + order */ if (!BN_add(n2, order, BN_value_one())) ABORT; if (!EC_POINT_mul(group, Q, NULL, P, n2, ctx)) ABORT; if (0 != EC_POINT_cmp(group, Q, P, ctx)) ABORT; /* n2 = (1 - order) * (1 + order) */ /* n2 = (1 - order) * (1 + order) = 1 - order^2 */ if (!BN_mul(n2, n1, n2, ctx)) ABORT; if (!EC_POINT_mul(group, Q, NULL, P, n2, ctx)) ABORT; if (0 != EC_POINT_cmp(group, Q, P, ctx)) ABORT; /* n2 = order^2 - 1 */ BN_set_negative(n2, 0); if (!EC_POINT_mul(group, Q, NULL, P, n2, ctx)) ABORT; /* Add P to verify the result. */ if (!EC_POINT_add(group, Q, Q, P, ctx)) ABORT; if (!EC_POINT_is_at_infinity(group, Q)) ABORT; /* Exercise EC_POINTs_mul, including corner cases. */ scalars[0] = n1; points[0] = Q; /* => infinity */ scalars[1] = n2; points[1] = P; /* => -P */ scalars[2] = n1; points[2] = Q; /* => infinity */ scalars[3] = n2; points[3] = Q; /* => infinity */ scalars[4] = n1; points[4] = P; /* => P */ scalars[5] = n2; points[5] = Q; /* => infinity */ if (!EC_POINTs_mul(group, Q, NULL, 5, points, scalars, ctx)) ABORT; if (!EC_POINT_is_at_infinity(group, Q)) ABORT; } fprintf(stdout, "ok\n"); EC_POINT_free(P); EC_POINT_free(Q); BN_free(n1); Loading Loading
CHANGES +5 −0 Original line number Diff line number Diff line Loading @@ -310,6 +310,11 @@ Changes between 1.0.1e and 1.0.2 [xx XXX xxxx] *) Fix ec_GFp_simple_points_make_affine (thus, EC_POINTs_mul etc.) for corner cases. (Certain input points at infinity could lead to bogus results, with non-infinity inputs mapped to infinity too.) [Bodo Moeller] *) Initial support for PowerISA 2.0.7, first implemented in POWER8. This covers AES, SHA256/512 and GHASH. "Initial" means that most common cases are optimized and there still is room for further Loading
crypto/ec/ecp_smpl.c +76 −98 Original line number Diff line number Diff line Loading @@ -1175,9 +1175,8 @@ int ec_GFp_simple_make_affine(const EC_GROUP *group, EC_POINT *point, BN_CTX *ct int ec_GFp_simple_points_make_affine(const EC_GROUP *group, size_t num, EC_POINT *points[], BN_CTX *ctx) { BN_CTX *new_ctx = NULL; BIGNUM *tmp0, *tmp1; size_t pow2 = 0; BIGNUM **heap = NULL; BIGNUM *tmp, *tmp_Z; BIGNUM **prod_Z = NULL; size_t i; int ret = 0; Loading @@ -1192,110 +1191,90 @@ int ec_GFp_simple_points_make_affine(const EC_GROUP *group, size_t num, EC_POINT } BN_CTX_start(ctx); tmp0 = BN_CTX_get(ctx); tmp1 = BN_CTX_get(ctx); if (tmp0 == NULL || tmp1 == NULL) goto err; /* Before converting the individual points, compute inverses of all Z values. * Modular inversion is rather slow, but luckily we can do with a single * explicit inversion, plus about 3 multiplications per input value. */ pow2 = 1; while (num > pow2) pow2 <<= 1; /* Now pow2 is the smallest power of 2 satifsying pow2 >= num. * We need twice that. */ pow2 <<= 1; heap = OPENSSL_malloc(pow2 * sizeof heap[0]); if (heap == NULL) goto err; tmp = BN_CTX_get(ctx); tmp_Z = BN_CTX_get(ctx); if (tmp == NULL || tmp_Z == NULL) goto err; /* The array is used as a binary tree, exactly as in heapsort: * * heap[1] * heap[2] heap[3] * heap[4] heap[5] heap[6] heap[7] * heap[8]heap[9] heap[10]heap[11] heap[12]heap[13] heap[14] heap[15] * * We put the Z's in the last line; * then we set each other node to the product of its two child-nodes (where * empty or 0 entries are treated as ones); * then we invert heap[1]; * then we invert each other node by replacing it by the product of its * parent (after inversion) and its sibling (before inversion). */ heap[0] = NULL; for (i = pow2/2 - 1; i > 0; i--) heap[i] = NULL; prod_Z = OPENSSL_malloc(num * sizeof prod_Z[0]); if (prod_Z == NULL) goto err; for (i = 0; i < num; i++) heap[pow2/2 + i] = &points[i]->Z; for (i = pow2/2 + num; i < pow2; i++) heap[i] = NULL; /* set each node to the product of its children */ for (i = pow2/2 - 1; i > 0; i--) { heap[i] = BN_new(); if (heap[i] == NULL) goto err; prod_Z[i] = BN_new(); if (prod_Z[i] == NULL) goto err; } if (heap[2*i] != NULL) { if ((heap[2*i + 1] == NULL) || BN_is_zero(heap[2*i + 1])) /* Set each prod_Z[i] to the product of points[0]->Z .. points[i]->Z, * skipping any zero-valued inputs (pretend that they're 1). */ if (!BN_is_zero(&points[0]->Z)) { if (!BN_copy(heap[i], heap[2*i])) goto err; if (!BN_copy(prod_Z[0], &points[0]->Z)) goto err; } else { if (BN_is_zero(heap[2*i])) if (group->meth->field_set_to_one != 0) { if (!BN_copy(heap[i], heap[2*i + 1])) goto err; if (!group->meth->field_set_to_one(group, prod_Z[0], ctx)) goto err; } else { if (!group->meth->field_mul(group, heap[i], heap[2*i], heap[2*i + 1], ctx)) goto err; if (!BN_one(prod_Z[0])) goto err; } } for (i = 1; i < num; i++) { if (!BN_is_zero(&points[i]->Z)) { if (!group->meth->field_mul(group, prod_Z[i], prod_Z[i - 1], &points[i]->Z, ctx)) goto err; } else { if (!BN_copy(prod_Z[i], prod_Z[i - 1])) goto err; } } /* invert heap[1] */ if (!BN_is_zero(heap[1])) { if (!BN_mod_inverse(heap[1], heap[1], &group->field, ctx)) /* Now use a single explicit inversion to replace every * non-zero points[i]->Z by its inverse. */ if (!BN_mod_inverse(tmp, prod_Z[num - 1], &group->field, ctx)) { ECerr(EC_F_EC_GFP_SIMPLE_POINTS_MAKE_AFFINE, ERR_R_BN_LIB); goto err; } } if (group->meth->field_encode != 0) { /* in the Montgomery case, we just turned R*H (representing H) /* In the Montgomery case, we just turned R*H (representing H) * into 1/(R*H), but we need R*(1/H) (representing 1/H); * i.e. we have need to multiply by the Montgomery factor twice */ if (!group->meth->field_encode(group, heap[1], heap[1], ctx)) goto err; if (!group->meth->field_encode(group, heap[1], heap[1], ctx)) goto err; * i.e. we need to multiply by the Montgomery factor twice. */ if (!group->meth->field_encode(group, tmp, tmp, ctx)) goto err; if (!group->meth->field_encode(group, tmp, tmp, ctx)) goto err; } /* set other heap[i]'s to their inverses */ for (i = 2; i < pow2/2 + num; i += 2) for (i = num - 1; i > 0; --i) { /* i is even */ if ((heap[i + 1] != NULL) && !BN_is_zero(heap[i + 1])) /* Loop invariant: tmp is the product of the inverses of * points[0]->Z .. points[i]->Z (zero-valued inputs skipped). */ if (!BN_is_zero(&points[i]->Z)) { if (!group->meth->field_mul(group, tmp0, heap[i/2], heap[i + 1], ctx)) goto err; if (!group->meth->field_mul(group, tmp1, heap[i/2], heap[i], ctx)) goto err; if (!BN_copy(heap[i], tmp0)) goto err; if (!BN_copy(heap[i + 1], tmp1)) goto err; /* Set tmp_Z to the inverse of points[i]->Z (as product * of Z inverses 0 .. i, Z values 0 .. i - 1). */ if (!group->meth->field_mul(group, tmp_Z, prod_Z[i - 1], tmp, ctx)) goto err; /* Update tmp to satisfy the loop invariant for i - 1. */ if (!group->meth->field_mul(group, tmp, tmp, &points[i]->Z, ctx)) goto err; /* Replace points[i]->Z by its inverse. */ if (!BN_copy(&points[i]->Z, tmp_Z)) goto err; } else { if (!BN_copy(heap[i], heap[i/2])) goto err; } if (!BN_is_zero(&points[0]->Z)) { /* Replace points[0]->Z by its inverse. */ if (!BN_copy(&points[0]->Z, tmp)) goto err; } /* we have replaced all non-zero Z's by their inverses, now fix up all the points */ /* Finally, fix up the X and Y coordinates for all points. */ for (i = 0; i < num; i++) { EC_POINT *p = points[i]; Loading @@ -1304,11 +1283,11 @@ int ec_GFp_simple_points_make_affine(const EC_GROUP *group, size_t num, EC_POINT { /* turn (X, Y, 1/Z) into (X/Z^2, Y/Z^3, 1) */ if (!group->meth->field_sqr(group, tmp1, &p->Z, ctx)) goto err; if (!group->meth->field_mul(group, &p->X, &p->X, tmp1, ctx)) goto err; if (!group->meth->field_sqr(group, tmp, &p->Z, ctx)) goto err; if (!group->meth->field_mul(group, &p->X, &p->X, tmp, ctx)) goto err; if (!group->meth->field_mul(group, tmp1, tmp1, &p->Z, ctx)) goto err; if (!group->meth->field_mul(group, &p->Y, &p->Y, tmp1, ctx)) goto err; if (!group->meth->field_mul(group, tmp, tmp, &p->Z, ctx)) goto err; if (!group->meth->field_mul(group, &p->Y, &p->Y, tmp, ctx)) goto err; if (group->meth->field_set_to_one != 0) { Loading @@ -1328,15 +1307,14 @@ int ec_GFp_simple_points_make_affine(const EC_GROUP *group, size_t num, EC_POINT BN_CTX_end(ctx); if (new_ctx != NULL) BN_CTX_free(new_ctx); if (heap != NULL) if (prod_Z != NULL) { /* heap[pow2/2] .. heap[pow2-1] have not been allocated locally! */ for (i = pow2/2 - 1; i > 0; i--) for (i = 0; i < num; i++) { if (heap[i] != NULL) BN_clear_free(heap[i]); if (prod_Z[i] != NULL) BN_clear_free(prod_Z[i]); } OPENSSL_free(heap); OPENSSL_free(prod_Z); } return ret; } Loading
crypto/ec/ectest.c +49 −14 Original line number Diff line number Diff line Loading @@ -199,6 +199,7 @@ static void group_order_tests(EC_GROUP *group) EC_POINT *P = EC_POINT_new(group); EC_POINT *Q = EC_POINT_new(group); BN_CTX *ctx = BN_CTX_new(); int i; n1 = BN_new(); n2 = BN_new(); order = BN_new(); fprintf(stdout, "verify group order ..."); Loading @@ -212,21 +213,55 @@ static void group_order_tests(EC_GROUP *group) if (!EC_POINT_mul(group, Q, order, NULL, NULL, ctx)) ABORT; if (!EC_POINT_is_at_infinity(group, Q)) ABORT; fprintf(stdout, " ok\n"); fprintf(stdout, "long/negative scalar tests ... "); fprintf(stdout, "long/negative scalar tests "); for (i = 1; i <= 2; i++) { const BIGNUM *scalars[6]; const EC_POINT *points[6]; fprintf(stdout, i == 1 ? "allowing precomputation ... " : "without precomputation ... "); if (!BN_set_word(n1, i)) ABORT; /* If i == 1, P will be the predefined generator for which * EC_GROUP_precompute_mult has set up precomputation. */ if (!EC_POINT_mul(group, P, n1, NULL, NULL, ctx)) ABORT; if (!BN_one(n1)) ABORT; /* n1 = 1 - order */ if (!BN_sub(n1, n1, order)) ABORT; if (!EC_POINT_mul(group, Q, NULL, P, n1, ctx)) ABORT; if (0 != EC_POINT_cmp(group, Q, P, ctx)) ABORT; /* n2 = 1 + order */ if (!BN_add(n2, order, BN_value_one())) ABORT; if (!EC_POINT_mul(group, Q, NULL, P, n2, ctx)) ABORT; if (0 != EC_POINT_cmp(group, Q, P, ctx)) ABORT; /* n2 = (1 - order) * (1 + order) */ /* n2 = (1 - order) * (1 + order) = 1 - order^2 */ if (!BN_mul(n2, n1, n2, ctx)) ABORT; if (!EC_POINT_mul(group, Q, NULL, P, n2, ctx)) ABORT; if (0 != EC_POINT_cmp(group, Q, P, ctx)) ABORT; /* n2 = order^2 - 1 */ BN_set_negative(n2, 0); if (!EC_POINT_mul(group, Q, NULL, P, n2, ctx)) ABORT; /* Add P to verify the result. */ if (!EC_POINT_add(group, Q, Q, P, ctx)) ABORT; if (!EC_POINT_is_at_infinity(group, Q)) ABORT; /* Exercise EC_POINTs_mul, including corner cases. */ scalars[0] = n1; points[0] = Q; /* => infinity */ scalars[1] = n2; points[1] = P; /* => -P */ scalars[2] = n1; points[2] = Q; /* => infinity */ scalars[3] = n2; points[3] = Q; /* => infinity */ scalars[4] = n1; points[4] = P; /* => P */ scalars[5] = n2; points[5] = Q; /* => infinity */ if (!EC_POINTs_mul(group, Q, NULL, 5, points, scalars, ctx)) ABORT; if (!EC_POINT_is_at_infinity(group, Q)) ABORT; } fprintf(stdout, "ok\n"); EC_POINT_free(P); EC_POINT_free(Q); BN_free(n1); Loading