diff --git a/commons-math-core/src/main/java/org/apache/commons/math4/core/jdkmath/AccurateMath.java b/commons-math-core/src/main/java/org/apache/commons/math4/core/jdkmath/AccurateMath.java index 96562866b5..7431270009 100644 --- a/commons-math-core/src/main/java/org/apache/commons/math4/core/jdkmath/AccurateMath.java +++ b/commons-math-core/src/main/java/org/apache/commons/math4/core/jdkmath/AccurateMath.java @@ -3077,164 +3077,242 @@ public static float ulp(float x) { } /** - * Multiply a double number by a power of 2. - * @param d number to multiply - * @param n power of 2 - * @return d × 2n + * Compute 2scaleFactor × d. + * + * @param d d + * @param scaleFactor scaleFactor + * @return 2scaleFactor × d */ - public static double scalb(final double d, final int n) { + public static double scalb(final double d, final int scaleFactor) { + + final int sizeBits = 64; + final int fractionBits = 52; + final int implicitFractionBits = fractionBits + 1; + final int maximumExponent = Double.MAX_EXPONENT; + final int minimumExponent = Double.MIN_EXPONENT; + final int minumumSubnormalExponent = Double.MIN_EXPONENT - fractionBits; + final int exponentBias = 1023; + + final long signMask = 0x8000000000000000L; + final long exponentMask = 0x7FF0000000000000L; + final long fractionMask = 0xFFFFFFFFFFFFFL; + + // bit cast d. It is OK (and faster) to use raw because we will trap special values below. + final long bits = Double.doubleToRawLongBits(d); - // first simple and fast handling when 2^n can be represented using normal numbers - if ((n > -1023) && (n < 1024)) { - return d * Double.longBitsToDouble(((long) (n + 1023)) << 52); - } + // extract the exponent field. + final long exponentField = bits & exponentMask; - // handle special cases - if (Double.isNaN(d) || Double.isInfinite(d) || (d == 0)) { + // return special values (NaN, +/-infinity) as-is. + if (exponentField == exponentMask) { return d; } - if (n < -2098) { - return (d > 0) ? 0.0 : -0.0; - } - if (n > 2097) { - return (d > 0) ? Double.POSITIVE_INFINITY : Double.NEGATIVE_INFINITY; - } - // decompose d - final long bits = Double.doubleToRawLongBits(d); - final long sign = bits & 0x8000000000000000L; - int exponent = ((int) (bits >>> 52)) & 0x7ff; - long mantissa = bits & 0x000fffffffffffffL; - - // compute scaled exponent - int scaledExponent = exponent + n; - - if (n < 0) { - // we are really in the case n <= -1023 - if (scaledExponent > 0) { - // both the input and the result are normal numbers, we only adjust the exponent - return Double.longBitsToDouble(sign | (((long) scaledExponent) << 52) | mantissa); - } else if (scaledExponent > -53) { - // the input is a normal number and the result is a subnormal number - - // recover the hidden mantissa bit - mantissa |= 1L << 52; - - // scales down complete mantissa, hence losing least significant bits - final long mostSignificantLostBit = mantissa & (1L << (-scaledExponent)); - mantissa >>>= 1 - scaledExponent; - if (mostSignificantLostBit != 0) { - // we need to add 1 bit to round up the result - mantissa++; - } - return Double.longBitsToDouble(sign | mantissa); - } else { - // no need to compute the mantissa, the number scales down to 0 - return (sign == 0L) ? 0.0 : -0.0; - } + // get the sign and normalized fraction fields, along with the unbiased exponent value. + // fraction will contain the fractional field value of a normalized double, i.e. without the + // implicit bit, even if the input value was subnormal. Exponent will contain the unbiased + // exponent value, even if the input is subnormal, i.e. we could not store the (biased) + // value of said exponent "directly" in the exponent field position. + final long signField = bits & signMask; + long fractionField = bits & fractionMask; + final long unbiasedExponentValue; + + if (exponentField != 0) { + + // get the unbiased exponent. There's nothing to do with the fraction when the input is + // normal. + unbiasedExponentValue = ((exponentField >> fractionBits) - exponentBias); } else { - // we are really in the case n >= 1024 - if (exponent == 0) { - // the input number is subnormal, normalize it - while ((mantissa >>> 52) != 1) { - mantissa <<= 1; - --scaledExponent; - } - ++scaledExponent; - mantissa &= 0x000fffffffffffffL; + // trap 0, making sure to preserve signed zero. + if (fractionField == 0) { + return d; + } - if (scaledExponent < 2047) { - return Double.longBitsToDouble(sign | (((long) scaledExponent) << 52) | mantissa); - } else { - return (sign == 0L) ? Double.POSITIVE_INFINITY : Double.NEGATIVE_INFINITY; - } - } else if (scaledExponent < 2047) { - return Double.longBitsToDouble(sign | (((long) scaledExponent) << 52) | mantissa); - } else { - return (sign == 0L) ? Double.POSITIVE_INFINITY : Double.NEGATIVE_INFINITY; + // d is subnormal. determine how much we have to shift to get the leading 1 into + // implicit position. + final int shift = Long.numberOfLeadingZeros(fractionField) - (sizeBits - implicitFractionBits); + + // shift and then kill off the implicit bit. fractionField now contains a normalized + // value. + fractionField = (fractionField << shift) & fractionMask; + + // but remember to back this out of the exponent. + unbiasedExponentValue = minimumExponent - shift; + } + + // compute the resulting (unbiased) exponent given the input value and scale factor. + final long resultExponent = unbiasedExponentValue + scaleFactor; + + if ((minimumExponent <= resultExponent) && (resultExponent <= maximumExponent)) { + + // result is a normal number, so just load the exponent. We already have the normal + // fraction field (even if the input was subnormal). + return Double.longBitsToDouble(signField | ((resultExponent + exponentBias) << fractionBits) | fractionField); + } else if (resultExponent >= maximumExponent) { + + // overflow. Fraction of zero with the special exponent is an infinity. + return Double.longBitsToDouble(signField | exponentMask); + } else if (resultExponent < (minumumSubnormalExponent - 1)) { + + // the result is closer to 0 than Double.MIN_VALUE: We will be shifting more than 2 + // binary places past 2^-1074, so we will be less than 1/2 of distance between 0 and + // Double.MIN_VALUE. We need to return an appropriately signed zero. + return Double.longBitsToDouble(signField); + } else { + + // the result will be subnormal or possibly underflow to zero. + // We know that -1075 <= resultExponent <= -1023 inside this block. + + // add the implicit bit to the fraction. + fractionField |= 1L << fractionBits; + + // determine the number of bits that we are going to vaporize due to rounding. + long bitsLost = -resultExponent + minimumExponent; + + // capture a guard bit, meaning the last bit lost to rounding; and sticky bits, + // meaning everything else. (Unlike an FPU, we won't convert these to individual bits, + // but just leave them as longs.) Note that this still works when bitsLost = 1. The + // sticky mask will be zero - and thus the sticky bits will always be zero - but + // this is correct. + long guardMask = (1L << (bitsLost - 1)); + long stickyMask = guardMask - 1L; + long guard = fractionField & guardMask; + long sticky = fractionField & stickyMask; + + // actually perform the shift. + fractionField >>= bitsLost; + + if ((guard != 0L) && ((sticky != 0L) || (fractionField & 1L) == 1L)) { + // if the guard is non-zero, then we rounded away at least 1/2 an ulp. + // We need to round up when: + // 1. We rounded away more than 1/2 an ulp. In this case, the sticky bits will be + // non-zero. + // 2. The resulting fraction is odd, since we are in ties-to-even. + fractionField++; } + + return Double.longBitsToDouble(signField | fractionField); } } /** - * Multiply a float number by a power of 2. - * @param f number to multiply - * @param n power of 2 - * @return f × 2n + * Compute 2scaleFactor × d. + * + * @param d d + * @param scaleFactor scaleFactor + * @return 2scaleFactor × d */ - public static float scalb(final float f, final int n) { - - // first simple and fast handling when 2^n can be represented using normal numbers - if ((n > -127) && (n < 128)) { - return f * Float.intBitsToFloat((n + 127) << 23); + public static float scalb(final float d, final int scaleFactor) { + + final int sizeBits = 32; + final int fractionBits = 23; + final int implicitFractionBits = fractionBits + 1; + final int maximumExponent = Float.MAX_EXPONENT; + final int minimumExponent = Float.MIN_EXPONENT; + final int minumumSubnormalExponent = Float.MIN_EXPONENT - fractionBits; + final int exponentBias = 127; + + final int signMask = 0x80000000; + final int exponentMask = 0x7F800000; + final int fractionMask = 0x7FFFFF; + + // bit cast d. It is OK (and faster) to use raw because we will trap special values below. + final int bits = Float.floatToRawIntBits(d); + + // extract the exponent field. + final int exponentField = bits & exponentMask; + + // return special values (NaN, +/-infinity) as-is. + if (exponentField == exponentMask) { + return d; } - // handle special cases - if (Float.isNaN(f) || Float.isInfinite(f) || (f == 0f)) { - return f; - } - if (n < -277) { - return (f > 0) ? 0.0f : -0.0f; - } - if (n > 276) { - return (f > 0) ? Float.POSITIVE_INFINITY : Float.NEGATIVE_INFINITY; - } + // get the sign and normalized fraction fields, aint with the unbiased exponent value. + // fraction will contain the fractional field value of a normalized float, i.e. without the + // implicit bit, even if the input value was subnormal. Exponent will contain the unbiased + // exponent value, even if the input is subnormal, i.e. we could not store the (biased) + // value of said exponent "directly" in the exponent field position. + final int signField = bits & signMask; + int fractionField = bits & fractionMask; + final int unbiasedExponentValue; - // decompose f - final int bits = Float.floatToIntBits(f); - final int sign = bits & 0x80000000; - int exponent = (bits >>> 23) & 0xff; - int mantissa = bits & 0x007fffff; - - // compute scaled exponent - int scaledExponent = exponent + n; - - if (n < 0) { - // we are really in the case n <= -127 - if (scaledExponent > 0) { - // both the input and the result are normal numbers, we only adjust the exponent - return Float.intBitsToFloat(sign | (scaledExponent << 23) | mantissa); - } else if (scaledExponent > -24) { - // the input is a normal number and the result is a subnormal number - - // recover the hidden mantissa bit - mantissa |= 1 << 23; - - // scales down complete mantissa, hence losing least significant bits - final int mostSignificantLostBit = mantissa & (1 << (-scaledExponent)); - mantissa >>>= 1 - scaledExponent; - if (mostSignificantLostBit != 0) { - // we need to add 1 bit to round up the result - mantissa++; - } - return Float.intBitsToFloat(sign | mantissa); - } else { - // no need to compute the mantissa, the number scales down to 0 - return (sign == 0) ? 0.0f : -0.0f; - } + if (exponentField != 0) { + + // get the unbiased exponent. There's nothing to do with the fraction when the input is + // normal. + unbiasedExponentValue = ((exponentField >> fractionBits) - exponentBias); } else { - // we are really in the case n >= 128 - if (exponent == 0) { - // the input number is subnormal, normalize it - while ((mantissa >>> 23) != 1) { - mantissa <<= 1; - --scaledExponent; - } - ++scaledExponent; - mantissa &= 0x007fffff; + // trap 0, making sure to preserve signed zero. + if (fractionField == 0) { + return d; + } - if (scaledExponent < 255) { - return Float.intBitsToFloat(sign | (scaledExponent << 23) | mantissa); - } else { - return (sign == 0) ? Float.POSITIVE_INFINITY : Float.NEGATIVE_INFINITY; - } - } else if (scaledExponent < 255) { - return Float.intBitsToFloat(sign | (scaledExponent << 23) | mantissa); - } else { - return (sign == 0) ? Float.POSITIVE_INFINITY : Float.NEGATIVE_INFINITY; + // d is subnormal. determine how much we have to shift to get the leading 1 into + // implicit position. + final int shift = Integer.numberOfLeadingZeros(fractionField) - (sizeBits - implicitFractionBits); + + // shift and then kill off the implicit bit. fractionField now contains a normalized + // value. + fractionField = (fractionField << shift) & fractionMask; + + // but remember to back this out of the exponent. + unbiasedExponentValue = minimumExponent - shift; + } + + // compute the resulting (unbiased) exponent given the input value and scale factor. + final int resultExponent = unbiasedExponentValue + scaleFactor; + + if ((minimumExponent <= resultExponent) && (resultExponent <= maximumExponent)) { + + // result is a normal number, so just load the exponent. We already have the normal + // fraction field (even if the input was subnormal). + return Float.intBitsToFloat(signField | ((resultExponent + exponentBias) << fractionBits) | fractionField); + } else if (resultExponent >= maximumExponent) { + + // overflow. Fraction of zero with the special exponent is an infinity. + return Float.intBitsToFloat(signField | exponentMask); + } else if (resultExponent < (minumumSubnormalExponent - 1)) { + + // the result is closer to 0 than float.MIN_VALUE: We will be shifting more than 2 + // binary places past 2^-150, so we will be less than 1/2 of distance between 0 and + // float.MIN_VALUE. We need to return an appropriately signed zero. + return Float.intBitsToFloat(signField); + } else { + + // the result will be subnormal or possibly underflow to zero. + // We know that -151 <= resultExponent <= -127 inside this block. + + // add the implicit bit to the fraction. + fractionField |= 1 << fractionBits; + + // determine the number of bits that we are going to vaporize due to rounding. + int bitsLost = -resultExponent + minimumExponent; + + // capture a guard bit, meaning the last bit lost to rounding; and sticky bits, + // meaning everything else. (Unlike an FPU, we won't convert these to individual bits, + // but just leave them as ints.) Note that this still works when bitsLost = 1. The + // sticky mask will be zero - and thus the sticky bits will always be zero - but + // this is correct. + int guardMask = (1 << (bitsLost - 1)); + int stickyMask = guardMask - 1; + int guard = fractionField & guardMask; + int sticky = fractionField & stickyMask; + + // actually perform the shift. + fractionField >>= bitsLost; + + if ((guard != 0) && ((sticky != 0) || (fractionField & 1) == 1)) { + // if the guard is non-zero, then we rounded away at least 1/2 an ulp. + // We need to round up when: + // 1. We rounded away more than 1/2 an ulp. In this case, the sticky bits will be + // non-zero. + // 2. The resulting fraction is odd, since we are in ties-to-even. + fractionField++; } + + return Float.intBitsToFloat(signField | fractionField); } } diff --git a/commons-math-core/src/test/java/org/apache/commons/math4/core/jdkmath/ScalbTest.java b/commons-math-core/src/test/java/org/apache/commons/math4/core/jdkmath/ScalbTest.java new file mode 100644 index 0000000000..cf08aaae7d --- /dev/null +++ b/commons-math-core/src/test/java/org/apache/commons/math4/core/jdkmath/ScalbTest.java @@ -0,0 +1,220 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math4.core.jdkmath; + +import org.junit.Assert; +import org.junit.Test; + +/** + * Tests for scalb. + */ +public class ScalbTest { + + @Test + public void doubleSpecial() { + + double nonCanonicalNaN = Double.longBitsToDouble(0x7ff8000000000001L); + + for(int scaleFactor = Double.MIN_EXPONENT * 2; scaleFactor <= (Double.MAX_EXPONENT - Double.MIN_EXPONENT); scaleFactor++) { + assertBitPattern(StrictMath.scalb(Double.NaN, scaleFactor), AccurateMath.scalb(Double.NaN, scaleFactor)); + assertBitPattern(StrictMath.scalb(nonCanonicalNaN, scaleFactor), AccurateMath.scalb(nonCanonicalNaN, scaleFactor)); + assertBitPattern(StrictMath.scalb(Double.POSITIVE_INFINITY, scaleFactor), AccurateMath.scalb(Double.POSITIVE_INFINITY, scaleFactor)); + assertBitPattern(StrictMath.scalb(Double.NEGATIVE_INFINITY, scaleFactor), AccurateMath.scalb(Double.NEGATIVE_INFINITY, scaleFactor)); + assertBitPattern(StrictMath.scalb(Double.NEGATIVE_INFINITY, scaleFactor), AccurateMath.scalb(Double.NEGATIVE_INFINITY, scaleFactor)); + } + } + + @Test + public void doubleZero() { + + double zero = 0; + double negativeZero = Math.copySign(zero, -1d); + + for(int scaleFactor = Double.MIN_EXPONENT * 2; scaleFactor <= (Double.MAX_EXPONENT - Double.MIN_EXPONENT); scaleFactor++) { + assertBitPattern(StrictMath.scalb(zero, scaleFactor), AccurateMath.scalb(zero, scaleFactor)); + assertBitPattern(StrictMath.scalb(negativeZero, scaleFactor), AccurateMath.scalb(negativeZero, scaleFactor)); + } + } + + @Test + public void doubleNormalValues() { + + long m = 1; + for(int exp = Double.MIN_EXPONENT; exp <= Double.MAX_EXPONENT; exp++) { + + long exponentBits = (exp + 1023L) << 52; + double powerOfTwo = Double.longBitsToDouble(exponentBits); + + for(int scaleFactor = Double.MIN_EXPONENT * 2; scaleFactor <= (Double.MAX_EXPONENT - Double.MIN_EXPONENT); scaleFactor++) { + + // check the power of two case. + assertBitPattern(StrictMath.scalb(powerOfTwo, scaleFactor), AccurateMath.scalb(powerOfTwo, scaleFactor)); + + for(int count = 0; count < 100; count++) { + + // generate mantissa and construct the value. + long mantissaBits = m & 0x000FFFFFFFFFFFFFL; + double x = Double.longBitsToDouble(exponentBits | mantissaBits); + + assertBitPattern(StrictMath.scalb(x, scaleFactor), AccurateMath.scalb(x, scaleFactor)); + assertBitPattern(StrictMath.scalb(-x, scaleFactor), AccurateMath.scalb(-x, scaleFactor)); + + // Marsaglia XOR pseudo-random shift. + m ^= m << 13; + m ^= m >> 17; + m ^= m << 5; + } + } + } + } + + @Test + public void doubleSubnormal() { + + long m = 1; + for(int scaleFactor = Double.MIN_EXPONENT * 2; scaleFactor <= (Double.MAX_EXPONENT - Double.MIN_EXPONENT); scaleFactor++) { + for(int count = 0; count < 50000; count++) { + + long mantissaBits = m & 0x000FFFFFFFFFFFFFL; + double x = Double.longBitsToDouble(mantissaBits); + + Assert.assertTrue(x <= Double.MIN_NORMAL); + assertBitPattern(StrictMath.scalb(x, scaleFactor), AccurateMath.scalb(x, scaleFactor)); + assertBitPattern(StrictMath.scalb(-x, scaleFactor), AccurateMath.scalb(-x, scaleFactor)); + + m ^= m << 13; + m ^= m >> 17; + m ^= m << 5; + } + } + } + + @Test + public void floatSpecial() { + + for(int scaleFactor = Float.MIN_EXPONENT * 2; scaleFactor <= (Float.MAX_EXPONENT - Float.MIN_EXPONENT); scaleFactor++) { + assertBitPattern(StrictMath.scalb(Float.NaN, scaleFactor), AccurateMath.scalb(Float.NaN, scaleFactor)); + assertBitPattern(StrictMath.scalb(Float.POSITIVE_INFINITY, scaleFactor), AccurateMath.scalb(Float.POSITIVE_INFINITY, scaleFactor)); + assertBitPattern(StrictMath.scalb(Float.NEGATIVE_INFINITY, scaleFactor), AccurateMath.scalb(Float.NEGATIVE_INFINITY, scaleFactor)); + assertBitPattern(StrictMath.scalb(Float.NEGATIVE_INFINITY, scaleFactor), AccurateMath.scalb(Float.NEGATIVE_INFINITY, scaleFactor)); + } + } + + @Test + public void floatZero() { + + float zero = 0; + float negativeZero = Math.copySign(zero, -1f); + + for(int scaleFactor = Float.MIN_EXPONENT * 2; scaleFactor <= (Float.MAX_EXPONENT - Float.MIN_EXPONENT); scaleFactor++) { + assertBitPattern(StrictMath.scalb(zero, scaleFactor), AccurateMath.scalb(zero, scaleFactor)); + assertBitPattern(StrictMath.scalb(negativeZero, scaleFactor), AccurateMath.scalb(negativeZero, scaleFactor)); + } + } + + @Test + public void floatNormalValues() { + + int m = 1; + for(int exp = Float.MIN_EXPONENT; exp <= Float.MAX_EXPONENT; exp++) { + + int exponentBits = (exp + 127) << 23; + float powerOfTwo = Float.intBitsToFloat(exponentBits); + + for(int scaleFactor = Float.MIN_EXPONENT * 2; scaleFactor <= (Float.MAX_EXPONENT - Float.MIN_EXPONENT); scaleFactor++) { + + // check the power of two case. + assertBitPattern(StrictMath.scalb(powerOfTwo, scaleFactor), AccurateMath.scalb(powerOfTwo, scaleFactor)); + + for(int count = 0; count < 100; count++) { + + // generate mantissa and construct the value. + int mantissaBits = m & 0x7fffff; + float x = Float.intBitsToFloat(exponentBits | mantissaBits); + + assertBitPattern(StrictMath.scalb(x, scaleFactor), AccurateMath.scalb(x, scaleFactor)); + assertBitPattern(StrictMath.scalb(-x, scaleFactor), AccurateMath.scalb(-x, scaleFactor)); + + // Marsaglia XOR pseudo-random shift. + m ^= m << 13; + m ^= m >> 17; + m ^= m << 5; + } + } + } + } + + @Test + public void floatSubnormal() { + + int m = 1; + for(int scaleFactor = Float.MIN_EXPONENT * 2; scaleFactor <= (Float.MAX_EXPONENT - Float.MIN_EXPONENT); scaleFactor++) { + for(int count = 0; count < 50000; count++) { + + int mantissaBits = m & 0x7fffff; + float x = Float.intBitsToFloat(mantissaBits); + + Assert.assertTrue(x <= Float.MIN_NORMAL); + assertBitPattern(StrictMath.scalb(x, scaleFactor), AccurateMath.scalb(x, scaleFactor)); + assertBitPattern(StrictMath.scalb(-x, scaleFactor), AccurateMath.scalb(-x, scaleFactor)); + + m ^= m << 13; + m ^= m >> 17; + m ^= m << 5; + } + } + } + + /** + * Assert that the specified doubles have the same bit pattern, except for NaN. + *

+ * If expected is NaN, we only check that actual is also + * NaN, not that they have the same fraction value. + * + * @param expected Expected value. + * @param actual Actual value. + */ + private static void assertBitPattern(final double expected, final double actual) { + + if(Double.isNaN(expected)) { + Assert.assertTrue(Double.isNaN(actual)); + return; + } + + Assert.assertEquals(Double.doubleToRawLongBits(expected), Double.doubleToRawLongBits(actual)); + } + + /** + * Assert that the specified floats have the same bit pattern, except for NaN. + *

+ * If expected is NaN, we only check that actual is also + * NaN, not that they have the same fraction value. + * + * @param expected Expected value. + * @param actual Actual value. + */ + private static void assertBitPattern(final float expected, final float actual) { + + if(Float.isNaN(expected)) { + Assert.assertTrue(Float.isNaN(actual)); + return; + } + + Assert.assertEquals(Float.floatToIntBits(expected), Float.floatToIntBits(actual)); + } + +}