g++ gives wrong answers : float128 aka long double : cmath - Stack Overflow-软件玩家

admin管理员组
文章数量:1305239

g++ gives grossly wrong numerical answers under some conditions. All this is 100% reproducible chez moi.

It fails differently depending on optimization level. It fails on my Ubuntu 24.04.1 LTS "noble" x86_64 platform ... and it fails in exactly the same way on an older "jammy" platform. The clang++ compiler on x86 is even worse; OTOH clang++ on an arm64 platform gets the right answers.

Here is some code that exercises the bugs:

#include <iostream>
#include <iomanip>
#include <sstream>
#include <vector>
using namespace std;
// android: error: __float128 is not supported on this target
typedef long double LD;

#define Literally(x) #x
#define Quote(X) Literally(X)
#define join(x,y) x ## y
#define append_L(x) join(x,L)

#include <cmath>

#if defined(__x86_64)
  string arch("__x86_64");
#elif defined(__ARM_ARCH_ISA_A64)
  string arch("__ARM_ARCH_ISA_A64");
#else
  string arch("undetermined");
#endif

#if defined(__OPTIMIZE__)
  string optimizing("Optimizing");
#else
  string optimizing("notOptimizing");
#endif

LD pi(3.14159265358979323846264338327950288419716939937510L);

double smash(LD const arg){
  return double(arg);
}

string fmt(LD const _val);

string pretty(string const arg) {
  return ((arg[0] == '-') ? "" : " ") + arg;
}

// Let's make sure compiler knows about long doubles.
// If this fails, you may need a compiler option
// such as -mlong-double-128 or the like.
static_assert (1.L + 1e-34L > 1.L, "need 128-bit long doubles");

#define munge(fun,arg,shift,goodval)            \
  { cout << "  ***  munging: "                  \
        << Quote(fun(arg))                      \
        << " shift " Quote(shift) << endl;      \
    cout << "should be:  " << pretty(Quote(goodval)) << endl;         \
    LD qqq = append_L(goodval);                               \
    if(shift) cout << "shifted:    " << fmt(qqq+shift) << endl;   \
    auto foo = fun(LD(arg));                        \
    LD bar; bar = fun(LD(arg));                     \
    cout << "initialize: " << fmt(foo)                  \
        << "  Δ: " << smash(foo - qqq) << endl;        \
    cout << "assign:     " << fmt(bar)                  \
        << "  Δ: " << smash(bar - qqq) << endl;        \
    vector<LD> vec; vec.push_back(arg);                 \
    auto vvv = fun(LD(vec.front()));                        \
    cout << "vector:     " << fmt(vvv)                  \
        << "  Δ: " << smash(vvv - qqq) << endl;        \
  }

int main(int argc, char * const * argv) {
  cout << "architecture: " << arch
        << "  version: " << Quote(__VERSION__)
        << "  " << optimizing
        << endl;
  munge(atan,1.0L,0,7.85398163397448309615660845819875699e-01)
  munge(sin,1.0L,0,8.41470984807896506652502321630299051e-01)
  munge(sin,pi - 1e-12,0,9.99999999999999979886734180690249881e-13)
  munge(cos,pi - 1e-12,1,-9.99999999999999999999999499999999955e-01)
  munge(log,1.L + 1e-20,0,9.99999999999994678933579770361535981e-21)
}

/////////////////////////////////////////////

LD power(LD const base, LD const pow) {
  if (pow != floor(smash(pow))) {
    cout << ">>> " << smash(pow) << endl;
    cout << ">>> " << floor(smash(pow)) << endl;
    throw runtime_error("Sorry, can't take non-integer power");
  }
  long int powmag = pow >= 0 ? pow : -pow;
  LD rover = base;
  LD rslt = 1.L;
  for (int ii = 0; ii < 32; ii++) {
    if (powmag == 0) break;
    if (ii) {
      rover *= rover;
    }
    if ((powmag & 1L)) {
      rslt *= rover;
    }
    powmag >>= 1;
  }
  return (pow >= 0) ? rslt : 1.L / rslt;
}

string fmt(LD const _val) {
  LD val(_val >= 0 ? _val : -_val);
  if (isinf(smash(val))) return "inf";
  stringstream rslt;
  rslt << ((_val >= 0) ? " " : "-");

// fails if number is too small to represent as a double:
  int exx = 0;
  if (val < 0.L) throw runtime_error("wtf?");
  if (val == 0.L) {
// don't try to normalize 0
  } else {
    LD acc(val);
    while (acc < 1) {
      acc *= 1.e5L;
      exx -= 5;
    }
    exx += floor(log10(smash(acc)));
    if (exx < -4932) {
      val = 1e-10;
    } else {
      LD scale = power(10.L, -exx);
      if (scale == 0) {
        cout << "_val: " << smash(val) << endl;
        cout << "val: " << smash(_val) << " eq: " << (val == 0) << endl;
        cout << "exx: " << exx << endl;
        cout << "scale: " << smash(scale) << endl;
        throw runtime_error("Sorry, too big for me");
      }
      val *= scale;
    }
    if (val < 0.L) throw runtime_error("WTF? < 0");
    if (val > 10.L) {
      cout << "??? " << exx << endl;
      throw runtime_error("WTF? > 1.e6");
    }
  }

  for (int ii = 0; ii < 36; ii++) {
    int foo = val;
    if (foo < 0) throw runtime_error("WTF??");
    rslt << int(foo);
    if (ii == 0) rslt << ".";
    LD sub = LD(foo);
    val -= sub;
    val *= 10.L;
  }
  if (exx) {
    string sign = exx < 0 ? "-" : "";
    rslt << "e" << setfill('0') << sign << setw(2) << abs(exx);
  }

  string penult = rslt.str();
  int pad = 43 - penult.length();
  if (pad > 0) penult += string(pad, ' ');
  return penult;
}

By way of background: It works just fine on an arm64 platform using clang, so my code is not completely crazy. It can be compiled with: :; clang++ float128.c

Here is the output when things are working properly. You can see that all the Deltas are zero, as they should be:

architecture: __ARM_ARCH_ISA_A64  version: "Clang 19.1.7"  notOptimizing
  ***  munging: atan(1.0L) shift 0
should be:   7.85398163397448309615660845819875699e-01
initialize:  7.85398163397448309615660845819875699e-01   Δ: 0
assign:      7.85398163397448309615660845819875699e-01   Δ: 0
vector:      7.85398163397448309615660845819875699e-01   Δ: 0
  ***  munging: sin(1.0L) shift 0
should be:   8.41470984807896506652502321630299051e-01
initialize:  8.41470984807896506652502321630298992e-01   Δ: 0
assign:      8.41470984807896506652502321630298992e-01   Δ: 0
vector:      8.41470984807896506652502321630298992e-01   Δ: 0
  ***  munging: sin(pi - 1e-12) shift 0
should be:   9.99999999999999979886734180690249881e-13
initialize:  0.99999999999999997988673418069024986e-12   Δ: 0
assign:      0.99999999999999997988673418069024986e-12   Δ: 0
vector:      0.99999999999999997988673418069024986e-12   Δ: 0
  ***  munging: cos(pi - 1e-12) shift 1
should be:  -9.99999999999999999999999499999999955e-01
shifted:     5.00000000044794469975736991122850022e-25 
initialize: -0.99999999999999999999999949999999993       Δ: 0
assign:     -0.99999999999999999999999949999999993       Δ: 0
vector:     -0.99999999999999999999999949999999993       Δ: 0
  ***  munging: log(1.L + 1e-20) shift 0
should be:   9.99999999999994678933579770361535981e-21
initialize:  9.99999999999994678933579770361536055e-21   Δ: 0
assign:      9.99999999999994678933579770361536055e-21   Δ: 0
vector:      9.99999999999994678933579770361536055e-21   Δ: 0

Things are not so good on my Ubuntu 24.04.1 LTS "noble" x86_64 platform ... and it fails in exactly the same way on an older "jammy" platform. It's compiled with: :; g++ -O3 -g -Wall -Wstrict-overflow=2 -std=c++23 -mlong-double-128 float128.c

Here is the buggy output:

architecture: __x86_64  version: "11.4.0"  Optimizing
  ***  munging: atan(1.0L) shift 0
should be:   7.85398163397448309615660845819875699e-01
initialize:  7.85398163397448309615660845819875699e-01   Δ: 0
assign:      7.85398163397448309615660845819875699e-01   Δ: 0
vector:      1.00000000000000000000000000000000000       Δ: 0.214602
  ***  munging: sin(1.0L) shift 0
should be:   8.41470984807896506652502321630299051e-01
initialize:  8.41470984807896506652502321630298992e-01   Δ: -9.62965e-35
assign:      8.41470984807896506652502321630298992e-01   Δ: -9.62965e-35
vector:      1.00000000000000000000000000000000000       Δ: 0.158529
  ***  munging: sin(pi - 1e-12) shift 0
should be:   9.99999999999999979886734180690249881e-13
initialize:  3.14159265358879323846264338329961614       Δ: 3.14159
assign:      3.14159265358879323846264338329961614       Δ: 3.14159
vector:      3.14159265358879323846264338329961614       Δ: 3.14159
  ***  munging: cos(pi - 1e-12) shift 1
should be:  -9.99999999999999999999999499999999955e-01
shifted:     5.00000000044794469975736991122850022e-25 
initialize:  3.14159265358879323846264338329961614       Δ: 4.14159
assign:      3.14159265358879323846264338329961614       Δ: 4.14159
vector:      3.14159265358879323846264338329961614       Δ: 4.14159
  ***  munging: log(1.L + 1e-20) shift 0
should be:   9.99999999999994678933579770361535981e-21
initialize:  9.99999999999994678933579770361536055e-21   Δ: 0
assign:      9.99999999999994678933579770361536055e-21   Δ: 0
vector:      1.00000000000000000000999999999999994       Δ: 1

It fails differently depending on optimization level, compiled with: :; g++ -O0 -g -Wall -Wstrict-overflow=2 -std=c++23 -mlong-double-128 float128.c

Here is the differently-buggy output:

architecture: __x86_64  version: "11.4.0"  notOptimizing
  ***  munging: atan(1.0L) shift 0
should be:   7.85398163397448309615660845819875699e-01
initialize:  7.85398163397448309615660845819875699e-01   Δ: 0
assign:      1.00000000000000000000000000000000000       Δ: 0.214602
vector:      1.00000000000000000000000000000000000       Δ: 0.214602
  ***  munging: sin(1.0L) shift 0
should be:   8.41470984807896506652502321630299051e-01
initialize:  8.41470984807896506652502321630298992e-01   Δ: -9.62965e-35
assign:      1.00000000000000000000000000000000000       Δ: 0.158529
vector:      1.00000000000000000000000000000000000       Δ: 0.158529
  ***  munging: sin(pi - 1e-12) shift 0
should be:   9.99999999999999979886734180690249881e-13
initialize:  0.00000000010000000000000000364321973e-4947  Δ: -1e-12
assign:      0.00000000010000000000000000364321973e-4947  Δ: -1e-12
vector:      0.00000000010000000000000000364321973e-4947  Δ: -1e-12
  ***  munging: cos(pi - 1e-12) shift 1
should be:  -9.99999999999999999999999499999999955e-01
shifted:     5.00000000044794469975736991122850022e-25 
initialize:  0.00000000010000000000000000364321973e-4947  Δ: 1
assign:      0.00000000010000000000000000364321973e-4947  Δ: 1
vector:      0.00000000010000000000000000364321973e-4947  Δ: 1
  ***  munging: log(1.L + 1e-20) shift 0
should be:   9.99999999999994678933579770361535981e-21
initialize:  9.99999999999994678933579770361536055e-21   Δ: 0
assign:      1.00000000000000000000999999999999994       Δ: 1
vector:      1.00000000000000000000999999999999994       Δ: 1

=========================================

If you simplify the example very much, it fails differently or not at all.

So, questions: Is anybody else seeing this? Is there any way to obtain reliable results? Would it help to rewrite all my code to call the quadmath library functions, rather than the cmath functions?

本文标签： g gives wrong answersfloat128 aka long doublecmathStack Overflow

版权声明：本文标题：g++ gives wrong answers : float128 aka long double : cmath - Stack Overflow 内容由网友自发贡献，该文观点仅代表作者本人，转载请联系作者并注明出处：http://www.betaflare.com/web/1741794597a2397861.html，本站仅提供信息存储空间服务，不拥有所有权，不承担相关法律责任。如发现本站有涉嫌抄袭侵权/违法违规的内容，一经查实，本站将立刻删除。

编程频道|软件玩家 - 软件改变生活！

g++ gives wrong answers : float128 aka long double : cmath - Stack Overflow

更多相关文章