```1 /**
2 This module contains basic summation algorithms.
3
5
6 Authors: \$(WEB 9il.github.io, Ilya Yaroshenko)
7
8 Source: \$(PHOBOSSRC std/numeric/_summation.d)
9 */
10 module atmosphere.summation;
11
12 import std.traits;
13 import std.typecons;
14 import std.range;
15 import std.math;
16
17 private template SummationType(F)
18     if (isFloatingPoint!F || isComplex!F)
19 {
20     version(X86) //workaround for Issue 13474
21     {
22         static if (!is(Unqual!F == real) && (isComplex!F || !is(Unqual!(typeof(F.init.re)) == real)))
23             pragma(msg, "Warning: Summation algorithms on x86 use 80bit representation for single and double floating point numbers.");
24         static if (isComplex!F)
25         {
26             import std.complex : Complex;
27             alias SummationType = Complex!real;
28         }
29         else
30             alias SummationType = real;
31     }
32     else
33         alias SummationType = F;
34 }
35
36 /++
37 Computes accurate sum of binary logarithms of input range \$(D r).
38 +/
39 ElementType!Range sumOfLog2s(Range)(Range r)
40     if (isInputRange!Range && isFloatingPoint!(ElementType!Range))
41 {
42     long exp;
43     Unqual!(typeof(return)) x = 1;
44     foreach (e; r)
45     {
46         if (e < 0)
47             return typeof(return).nan;
48         int lexp = void;
49         x *= frexp(e, lexp);
50         exp += lexp;
51         if (x < 0.5)
52         {
53             x *= 2;
54             exp--;
55         }
56     }
57     return exp + log2(x);
58 }
59
60 ///
61 unittest
62 {
63     import std.math, std.numeric;
64     assert(sumOfLog2s(new double[0]) == 0);
65     assert(sumOfLog2s([0.0L]) == -real.infinity);
66     assert(sumOfLog2s([-0.0L]) == -real.infinity);
67     assert(sumOfLog2s([2.0L]) == 1);
68     assert(sumOfLog2s([-2.0L]).isNaN());
69     assert(sumOfLog2s([real.nan]).isNaN());
70     assert(sumOfLog2s([-real.nan]).isNaN());
71     assert(sumOfLog2s([real.infinity]) == real.infinity);
72     assert(sumOfLog2s([-real.infinity]).isNaN());
73     assert(sumOfLog2s([ 0.25, 0.25, 0.25, 0.125 ]) == -9);
74 }
75
76
77 /++
78 Computes geometric mean of input range \$(D r).
79 +/
80 ElementType!Range geometricMean(Range)(Range r)
81     if (isInputRange!Range && isFloatingPoint!(ElementType!Range))
82 {
83     static if (hasLength!Range)
84     {
85         immutable n = r.length;
86         immutable s = r.sumOfLog2s();
87     }
88     else
89     {
90         import std.range : tee;
91         size_t n;
92         immutable s = r.tee!((_){n++;}).sumOfLog2s();
93     }
94     return exp2(s / n);
95 }
96
97 ///
98 unittest {
99     assert(geometricMean([2.0, 8.0]) == 4.0);
100 }
101
102
103 /++
104 Summation algorithms.
105 +/
106 enum Summation
107 {
108     /++
109     Fast summation algorithm.
110     +/
111     Fast,
112
113     /++
114     Naive algorithm (one by one).
115     +/
116     Naive,
117
118     /++
119     \$(LUCKY Pairwise summation) algorithm. Range must be a finite sliceable range.
120     +/
121     Pairwise,
122
123     /++
124     \$(LUCKY Kahan summation) algorithm.
125     +/
126     Kahan,
127
128     /++
129     \$(LUCKY Kahan-Babuška-Neumaier summation algorithm). \$(D KBN) gives more accurate results then \$(D Kahan).
130     +/
131     KBN,
132
133     /++
134     \$(LUCKY Generalized Kahan-Babuška summation algorithm), order 2. \$(D KB2) gives more accurate results then \$(D Kahan) and \$(D KBN).
135     +/
136     KB2,
137
138     /++
139     Precise summation algorithm.
140     The value of the sum is rounded to the nearest representable
141     floating-point number using the \$(LUCKY round-half-to-even rule).
142     Result can be differ from the exact value on \$(D X86), \$(D nextDown(proir) <= result &&  result <= nextUp(proir)).
143     The current implementation re-establish special value semantics across iterations (i.e. handling -inf + inf).
144
146         "Adaptive Precision Floating-Point Arithmetic and Fast Robust Geometric Predicates", Jonathan Richard Shewchuk),
147         \$(LINK2 http://bugs.python.org/file10357/msum4.py, Mark Dickinson's post at bugs.python.org).
148     +/
149     /+
150     Precise summation function as msum() by Raymond Hettinger in
151     <http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/393090>,
152     enhanced with the exact partials sum and roundoff from Mark
153     Dickinson's post at <http://bugs.python.org/file10357/msum4.py>.
154     See those links for more details, proofs and other references.
155     IEEE 754R floating point semantics are assumed.
156     +/
157     Precise,
158
159     /++
160     Performs \$(D Pairwise) summation for random access ranges.
161     Otherwise performs \$(D KBN) summation of floating point and complex numbers and
162     \$(D Kahan) summation of user defined types.
163     +/
164     Appropriate,
165 }
166
167
168 /++
169 Computes sum of range.
170 +/
171 template fsum(F, Summation summation = Summation.Precise)
172     if (isFloatingPoint!F && isMutable!F)
173 {
174     F fsum(Range)(Range r)
175         if (isSummable!(Range, F))
176     {
177         alias sum = Algo!(Range, summation);
178         return sum!(Range, Unqual!F)(r);
179     }
180
181     F fsum(Range)(F seed, Range r)
182         if (isSummable!(Range, Unqual!F))
183     {
184         alias sum = Algo!(Range, summation);
185         return sum!(Range, Unqual!F)(r, seed);
186     }
187 }
188
189 ///ditto
190 template fsum(Summation summation = Summation.Precise)
191 {
192     ForeachType!Range fsum(Range)(Range r)
193         if (isSummable!(Range, Unqual!(ForeachType!Range)))
194     {
195         alias sum = Algo!(Range, summation);
196         return sum!(Range, Unqual!(typeof(return)))(r);
197     }
198
199     F fsum(F, Range)(F seed, Range r)
200         if (isSummable!(Range, Unqual!F))
201     {
202         alias sum = Algo!(Range, summation);
203         return sum!(Range, F)(r, seed);
204     }
205 }
206
207 ///
208 unittest {
209     import std.algorithm;
210     auto ar = [1, 1e100, 1, -1e100].map!(a => a*10000);
211     const r = 20000;
212     assert(r == ar.fsum!(Summation.KBN));
213     assert(r == ar.fsum!(Summation.KB2));
214     assert(r == ar.fsum); //Summation.Precise
215 }
216
217 // FIXME
218 // Fails for 32bit systems.
220 // and https://github.com/D-Programming-Language/phobos/pull/2513
221 version(X86)
222 {
223
224 }
225 else
226 {
227     unittest {
228         import std.algorithm;
229         auto ar = [1, 1e100, 1, -1e100].map!(a => a*10000);
230         const r = 20000;
231         assert(r != ar.fsum!(Summation.Naive));
232         assert(r != ar.fsum!(Summation.Kahan));
233         assert(r != ar.fsum!(Summation.Pairwise));
234     }
235 }
236
237 ///
238 unittest
239 {
240     import std.math, std.algorithm, std.range;
241     auto ar = 1000
242         .iota
243         .map!(n => 1.7L.pow(n+1) - 1.7L.pow(n))
244         .array
245         ;
246     //Summation.Precise is default
247     real d = 1.7L.pow(1000);
248     assert(fsum(ar.chain([-d])) == -1);
249     assert(fsum(-d, ar.retro) == -1);
250 }
251
252 /++
253 \$(D Naive), \$(D Pairwise) and \$(D Kahan) algorithms can be used for user defined types.
254 +/
255 unittest {
256     static struct Quaternion(F)
257         if (isFloatingPoint!F)
258     {
259         F[3] array;
260
262         Quaternion opBinary(string op)(auto ref Quaternion rhs) const
263             if (op == "+" || op == "-")
264         {
265             Quaternion ret = void;
266             foreach (i, ref e; ret.array)
267                 mixin("e = array[i] "~op~" rhs.array[i];");
268             return ret;
269         }
270
272         Quaternion opOpAssign(string op)(auto ref Quaternion rhs)
273             if (op == "+" || op == "-")
274         {
275             foreach (i, ref e; array)
276                 mixin("e "~op~"= rhs.array[i];");
277             return this;
278         }
279
280         ///constructor with single FP argument
281         this(F f)
282         {
283             array[] = f;
284         }
285
286         ///assigment with single FP argument
287         void opAssign(F f)
288         {
289             array[] = f;
290         }
291     }
292
293     Quaternion!double q, p, r;
294     q.array = [0, 1, 2];
295     p.array = [3, 4, 5];
296     r.array = [3, 5, 7];
297
298     assert(r == [p, q].fsum!(Summation.Naive));
299     assert(r == [p, q].fsum!(Summation.Pairwise));
300     assert(r == [p, q].fsum!(Summation.Kahan));
301 }
302
303 /++
304 All summation algorithms available for complex numbers.
305 +/
306 unittest
307 {
308     import std.complex;
309     Complex!double[] ar = [complex(1.0, 2), complex(2, 3), complex(3, 4), complex(4, 5)];
310     Complex!double r = complex(10, 14);
311     assert(r == ar.fsum!(Summation.Fast));
312     assert(r == ar.fsum!(Summation.Naive));
313     assert(r == ar.fsum!(Summation.Pairwise));
314     assert(r == ar.fsum!(Summation.Kahan));
315     assert(r == ar.fsum!(Summation.KBN));
316     assert(r == ar.fsum!(Summation.KB2));
317     assert(r == ar.fsum); //Summation.Precise
318 }
319
320
321 /++
322 Output range for summation.
323 \$(D Precise), \$(D KB2), \$(D KBN) and \$(D Kahan) algorithms are supported.
324 +/
325 struct Summator(T, Summation summation = Summation.Precise)
326     if (
327         isMutable!T && (
328             summation == Summation.Precise && isFloatingPoint!T
329              || summation == Summation.Kahan && isSummable!T
330              || (summation == Summation.KBN || summation == Summation.KB2)
331                    && (isFloatingPoint!T || isComplex!T)
332             )
333         )
334 {
335     static if (summation == Summation.Kahan)
336         alias F = T;
337     else
338         alias F = SummationType!T;
339
340     static if (summation == Summation.Precise)
341     {
342         import std.internal.scopebuffer;
343
344     private:
345         enum F M = (cast(F)(2)) ^^ (T.max_exp - 1);
346         F[32] scopeBufferArray = void;
347         ScopeBuffer!F partials;
348         //sum for NaN and infinity.
349         F s;
350         //Overflow Degree. Count of 2^^F.max_exp minus count of -(2^^F.max_exp)
351         sizediff_t o;
352
353
354         /++
355         Compute the sum of a list of nonoverlapping floats.
356         On input, partials is a list of nonzero, nonspecial,
357         nonoverlapping floats, strictly increasing in magnitude, but
358         possibly not all having the same sign.
359         On output, the sum of partials gives the error in the returned
360         result, which is correctly rounded (using the round-half-to-even
361         rule).
362         Two floating point values x and y are non-overlapping if the least significant nonzero
363         bit of x is more significant than the most significant nonzero bit of y, or vice-versa.
364         +/
365         static F partialsReduce(F s, in F[] partials)
366         in
367         {
368             debug(numeric) assert(!partials.length || s.isFinite);
369         }
370         body
371         {
372             bool _break = void;
373             foreach_reverse(i, y; partials)
374             {
375                 s = partialsReducePred(s, y, i ? partials[i-1] : 0, _break);
376                 if (_break)
377                     break;
378                 debug(numeric) assert(s.isFinite);
379             }
380             return s;
381         }
382
383         static F partialsReducePred(F s, F y, F z, out bool _break)
384         out(result)
385         {
386             debug(numeric) assert(result.isFinite);
387         }
388         body
389         {
390             F x = s;
391             s = x + y;
392             F d = s - x;
393             F l = y - d;
394             debug(numeric)
395             {
396                 assert(x.isFinite);
397                 assert(y.isFinite);
398                 assert(s.isFinite);
399                 assert(fabs(y) < fabs(x));
400             }
401             if (l)
402             {
403             //Make half-even rounding work across multiple partials.
404             //Needed so that sum([1e-16, 1, 1e16]) will round-up the last
405             //digit to two instead of down to zero (the 1e-16 makes the 1
406             //slightly closer to two). Can guarantee commutativity.
407                 if (z && !signbit(l * z))
408                 {
409                     l *= 2;
410                     x = s + l;
411                     F t = x - s;
412                     if (l == t)
413                         s = x;
414                 }
415                 _break = true;
416             }
417             return s;
418         }
419
420         //Returns corresponding infinity if is overflow and 0 otherwise.
421         F overflow()
422         {
423             if (o == 0)
424                 return 0;
425             if (partials.length && (o == -1 || o == 1)  && signbit(o * partials[\$-1]))
426             {
427                 // problem case: decide whether result is representable
428                 F x = o * M;
429                 F y = partials[\$-1] / 2;
430                 F h = x + y;
431                 F d = h - x;
432                 F l = (y - d) * 2;
433                 y = h * 2;
434                 d = h + l;
435                 F t = d - h;
436                 version(X86)
437                 {
438                     if (!.isInfinity(cast(T)y) || !.isInfinity(sum()))
439                         return 0;
440                 }
441                 else
442                 {
443                     if (!.isInfinity(cast(T)y) || partials.length > 1 && !signbit(l * partials[\$-2]) && t == l)
444                         return 0;
445                 }
446             }
447             return F.infinity * o;
448         }
449     }
450     else
451     static if (summation == Summation.KB2)
452     {
453         F s;
454         F cs;
455         F ccs;
456     }
457     else
458     static if (summation == Summation.KBN)
459     {
460         F s;
461         F c;
462     }
463     else
464     static if (summation == Summation.Kahan)
465     {
466         F s;
467         F c;
468         F y; // do not declare in the loop/put (algo can be used for matrixes and etc)
469         F t; // ditto
470     }
471
472
473 public:
474
475     ///
476     this(T n)
477     {
478         static if (summation == Summation.Precise)
479         {
480             partials = scopeBuffer(scopeBufferArray);
481             s = 0.0;
482             o = 0;
483             if (n) put(n);
484         }
485         else
486         static if (summation == Summation.KB2)
487         {
488             s = n;
489             cs = 0.0;
490             ccs = 0.0;
491
492         }
493         else
494         static if (summation == Summation.KBN)
495         {
496             s = n;
497             c = 0.0;
498         }
499         else
500         static if (summation == Summation.Kahan)
501         {
502             s = n;
503             c = 0.0;
504         }
505     }
506
507     ///
508     //@disable this();
509
510     // free ScopeBuffer
511     static if (summation == Summation.Precise)
512     ~this()
513     {
514         partials.free;
515     }
516
517     // copy ScopeBuffer if necessary
518     static if (summation == Summation.Precise)
519     this(this)
520     {
521         auto a = partials[];
522         if (scopeBufferArray.ptr !is a.ptr)
523         {
524             partials = scopeBuffer(scopeBufferArray);
525             partials.put(a);
526         }
527     }
528
529     ///Adds \$(D x) to the internal partial sums.
530     void put(T n)
531     {
532         F x = n;
533         static if (summation == Summation.Precise)
534         {
535             if (.isFinite(x))
536             {
537                 size_t i;
538                 foreach (y; partials[])
539                 {
540                     F h = x + y;
541                     if (.isInfinity(cast(T)h))
542                     {
543                         if (fabs(x) < fabs(y))
544                         {
545                             F t = x; x = y; y = t;
546                         }
547                         //h == -F.infinity
548                         if (signbit(h))
549                         {
550                             x += M;
551                             x += M;
552                             o--;
553                         }
554                         //h == +F.infinity
555                         else
556                         {
557                             x -= M;
558                             x -= M;
559                             o++;
560                         }
561                         debug(numeric) assert(x.isFinite);
562                         h = x + y;
563                     }
564                     debug(numeric) assert(h.isFinite);
565                     F l;
566                     if (fabs(x) < fabs(y))
567                     {
568                         F t = h - y;
569                         l = x - t;
570                     }
571                     else
572                     {
573                         F t = h - x;
574                         l = y - t;
575                     }
576                     debug(numeric) assert(l.isFinite);
577                     if (l)
578                     {
579                         partials[i++] = l;
580                     }
581                     x = h;
582                 }
583                 partials.length = i;
584                 if (x)
585                 {
586                     partials.put(x);
587                 }
588             }
589             else
590             {
591                 s += x;
592             }
593         }
594         else
595         static if (summation == Summation.KB2)
596         {
597             static if (isFloatingPoint!F)
598             {
599                 F t = s + x;
600                 F c = void;
601                 if (fabs(s) >= fabs(x))
602                 {
603                     F d = s - t;
604                     c = d + x;
605                 }
606                 else
607                 {
608                     F d = x - t;
609                     c = d + s;
610                 }
611                 s = t;
612                 t = cs + c;
613                 if (fabs(cs) >= fabs(c))
614                 {
615                     F d = cs - t;
616                     d += c;
617                     ccs += d;
618                 }
619                 else
620                 {
621                     F d = c - t;
622                     d += cs;
623                     ccs += d;
624                 }
625                 cs = t;
626             }
627             else
628             {
629                 F t = s + x;
630                 if (fabs(s.re) < fabs(x.re))
631                 {
632                     auto t_re = s.re;
633                     s.re = x.re;
634                     x.re = t_re;
635                 }
636                 if (fabs(s.im) < fabs(x.im))
637                 {
638                     auto t_im = s.im;
639                     s.im = x.im;
640                     x.im = t_im;
641                 }
642                 F c = (s-t)+x;
643                 s = t;
644                 if (fabs(cs.re) < fabs(c.re))
645                 {
646                     auto t_re = cs.re;
647                     cs.re = c.re;
648                     c.re = t_re;
649                 }
650                 if (fabs(cs.im) < fabs(c.im))
651                 {
652                     auto t_im = cs.im;
653                     cs.im = c.im;
654                     c.im = t_im;
655                 }
656                 F d = cs - t;
657                 d += c;
658                 ccs += d;
659                 cs = t;
660             }
661         }
662         else
663         static if (summation == Summation.KBN)
664         {
665             static if (isFloatingPoint!F)
666             {
667                 F t = s + x;
668                 if (fabs(s) >= fabs(x))
669                 {
670                     F d = s - t;
671                     d += x;
672                     c += d;
673                 }
674                 else
675                 {
676                     F d = x - t;
677                     d += s;
678                     c += d;
679                 }
680                 s = t;
681             }
682             else
683             {
684                 F t = s + x;
685                 if (fabs(s.re) < fabs(x.re))
686                 {
687                     auto t_re = s.re;
688                     s.re = x.re;
689                     x.re = t_re;
690                 }
691                 if (fabs(s.im) < fabs(x.im))
692                 {
693                     auto t_im = s.im;
694                     s.im = x.im;
695                     x.im = t_im;
696                 }
697                 F d = s - t;
698                 d += x;
699                 c += d;
700                 s = t;
701             }
702         }
703         else
704         static if (summation == Summation.Kahan)
705         {
706             y = x - c;
707             t = s + y;
708             c = t - s;
709             c -= y;
710             s = t;
711         }
712     }
713
714     /++
715     Adds \$(D x) to the internal partial sums.
716     This operation doesn't re-establish special
717     value semantics across iterations (i.e. handling -inf + inf).
718     Preconditions: \$(D isFinite(x)).
719     +/
720     static if (summation == Summation.Precise)
721     package void unsafePut(F x)
722     in {
723         assert(.isFinite(x));
724     }
725     body {
726         size_t i;
727         foreach (y; partials[])
728         {
729             F h = x + y;
730             debug(numeric) assert(.isFinite(h));
731             F l;
732             if (fabs(x) < fabs(y))
733             {
734                 F t = h - y;
735                 l = x - t;
736             }
737             else
738             {
739                 F t = h - x;
740                 l = y - t;
741             }
742             debug(numeric) assert(.isFinite(l));
743             if (l)
744             {
745                 partials[i++] = l;
746             }
747             x = h;
748         }
749         partials.length = i;
750         if (x)
751         {
752             partials.put(x);
753         }
754     }
755
756     ///
757     unittest {
758         import std.math, std.algorithm, std.range;
759         auto r = iota(1000).map!(n => 1.7L.pow(n+1) - 1.7L.pow(n));
760         Summator!real s = 0.0;
761         put(s, r);
762         s -= 1.7L.pow(1000);
763         assert(s.sum() == -1);
764     }
765
766     ///Returns the value of the sum.
767     T sum()
768     {
769         /++
770         Returns the value of the sum, rounded to the nearest representable
771         floating-point number using the round-half-to-even rule.
772         Result can be differ from the exact value on \$(D X86), \$(D nextDown(proir) <= result &&  result <= nextUp(proir)).
773         +/
774         static if (summation == Summation.Precise)
775         {
776             debug(numeric)
777             {
778                 foreach (y; partials[])
779                 {
780                     assert(y);
781                     assert(y.isFinite);
782                 }
783                 //TODO: Add Non-Overlapping check to std.math
784                 import std.algorithm : isSorted, map;
785                 assert(partials[].map!(a => fabs(a)).isSorted);
786             }
787
788             if (s)
789                 return s;
790             auto parts = partials[];
791             F y = 0.0;
792             //pick last
793             if (parts.length)
794             {
795                 y = parts[\$-1];
796                 parts = parts[0..\$-1];
797             }
798             if (o)
799             {
800                 immutable F of = o;
801                 if (y && (o == -1 || o == 1)  && signbit(of * y))
802                 {
803                     // problem case: decide whether result is representable
804                     y /= 2;
805                     F x = of * M;
806                     immutable F h = x + y;
807                     F t = h - x;
808                     F l = (y - t) * 2;
809                     y = h * 2;
810                     if (.isInfinity(cast(T)y))
811                     {
812                         // overflow, except in edge case...
813                         x = h + l;
814                         t = x - h;
815                         y = parts.length && t == l && !signbit(l*parts[\$-1]) ?
816                             x * 2 :
817                             F.infinity * of;
818                         parts = null;
819                     }
820                     else if (l)
821                     {
822                         bool _break;
823                         y = partialsReducePred(y, l, parts.length ? parts[\$-1] : 0, _break);
824                         if (_break)
825                             parts = null;
826                     }
827                 }
828                 else
829                 {
830                     y = F.infinity * of;
831                     parts = null;
832                 }
833             }
834             return partialsReduce(y, parts);
835         }
836         else
837         static if (summation == Summation.KB2)
838         {
839             return cast(T)(s+(cs+ccs));
840         }
841         else
842         static if (summation == Summation.KBN)
843         {
844             return cast(T)(s+c);
845         }
846         else
847         static if (summation == Summation.Kahan)
848         {
849             return cast(T)s;
850         }
851     }
852
853     version(none)
854     static if (summation == Summation.Precise)
855     F partialsSum()
856     {
857         debug(numeric) partialsDebug;
858         auto parts = partials[];
859         F y = 0.0;
860         //pick last
861         if (parts.length)
862         {
863             y = parts[\$-1];
864             parts = parts[0..\$-1];
865         }
866         return partialsReduce(y, parts);
867     }
868
869     ///Returns \$(D Summator) with extended internal partial sums.
870     C opCast(C : Summator!P, P)()
871         if (
872             isMutable!C &&
873             P.max_exp >= T.max_exp &&
874             P.mant_dig >= T.mant_dig
875         )
876     {
877         static if (is(P == T))
878                 return this;
879         else
880         static if (summation == Summation.Precise)
881         {
882             typeof(return) ret = void;
883             ret.s = s;
884             ret.o = o;
885             ret.partials = scopeBuffer(ret.scopeBufferArray);
886             foreach (p; partials[])
887             {
888                 ret.partials.put(p);
889             }
890             enum exp_diff = P.max_exp / T.max_exp;
891             static if (exp_diff)
892             {
893                 if (ret.o)
894                 {
895                     immutable f = ret.o / exp_diff;
896                     immutable t = cast(int)(ret.o % exp_diff);
897                     ret.o = f;
898                     ret.put((P(2) ^^ T.max_exp) * t);
899                 }
900             }
901             return ret;
902         }
903         else
904         static if (summation == Summation.KB2)
905         {
906             typeof(return) ret = void;
907             ret.s = s;
908             ret.cs = cs;
909             ret.ccs = ccs;
910             return ret;
911         }
912         else
913         static if (summation == Summation.KBN)
914         {
915             typeof(return) ret = void;
916             ret.s = s;
917             ret.c = c;
918             return ret;
919         }
920         else
921         static if (summation == Summation.Kahan)
922         {
923             typeof(return) ret = void;
924             ret.s = s;
925             ret.c = c;
926             return ret;
927         }
928     }
929
930     ///
931     unittest
932     {
933         import std.math;
934         float  M = 2.0f ^^ (float.max_exp-1);
935         double N = 2.0  ^^ (float.max_exp-1);
936         auto s = Summator!float(0); //float summator
937         s += M;
938         s += M;
939         assert(float.infinity == s.sum());
940         auto e = cast(Summator!double) s;
941         assert(isFinite(e.sum()));
942         assert(N+N == e.sum());
943     }
944
945     /++
948     +/
949     C opCast(C)() if (is(Unqual!C == T))
950     {
951         return cast(C)sum();
952     }
953
955     void opAssign(T rhs)
956     {
957         static if (summation == Summation.Precise)
958         {
959             partials.free;
960             partials = scopeBuffer(scopeBufferArray);
961             s = 0.0;
962             o = 0;
963             if (rhs) put(rhs);
964         }
965         else
966         static if (summation == Summation.KB2)
967         {
968             s = rhs;
969             cs = 0.0;
970             ccs = 0.0;
971         }
972         else
973         static if (summation == Summation.KBN)
974         {
975             s = rhs;
976             c = 0.0;
977         }
978         else
979         static if (summation == Summation.Kahan)
980         {
981             s = rhs;
982             c = 0.0;
983         }
984     }
985
986     ///ditto
987     void opOpAssign(string op : "+")(T rhs)
988     {
989         put(rhs);
990     }
991
992     ///ditto
993     void opOpAssign(string op : "+")(ref Summator rhs)
994     {
995         static if (summation == Summation.Precise)
996         {
997             s += rhs.s;
998             o += rhs.o;
999             foreach (f; rhs.partials[])
1000                 put(f);
1001         }
1002         else
1003         static if (summation == Summation.KB2)
1004         {
1005             put(rhs.ccs);
1006             put(rhs.cs);
1007             put(rhs.s);
1008         }
1009         else
1010         static if (summation == Summation.KBN)
1011         {
1012             put(rhs.c);
1013             put(rhs.s);
1014         }
1015         else
1016         static if (summation == Summation.Kahan)
1017         {
1018             put(rhs.s);
1019         }
1020     }
1021
1022     ///ditto
1023     void opOpAssign(string op : "-")(T rhs)
1024     {
1025         static if (summation == Summation.Kahan)
1026         {
1027             y = 0.0;
1028             y -= rhs;
1029             y -= c;
1030             t = s + y;
1031             c = t - s;
1032             c -= y;
1033             s = t;
1034         }
1035         else
1036         {
1037             put(-rhs);
1038         }
1039     }
1040
1041     ///ditto
1042     void opOpAssign(string op : "-")(ref Summator rhs)
1043     {
1044         static if (summation == Summation.Precise)
1045         {
1046             s -= rhs.s;
1047             o -= rhs.o;
1048             foreach (f; rhs.partials[])
1049                 put(-f);
1050         }
1051         else
1052         static if (summation == Summation.KB2)
1053         {
1054             put(-rhs.ccs);
1055             put(-rhs.cs);
1056             put(-rhs.s);
1057         }
1058         else
1059         static if (summation == Summation.KBN)
1060         {
1061             put(-rhs.c);
1062             put(-rhs.s);
1063         }
1064         else
1065         static if (summation == Summation.Kahan)
1066         {
1067             this -= rhs.s;
1068         }
1069     }
1070
1071     ///
1072     nothrow unittest {
1073         import std.math, std.algorithm, std.range;
1074         auto r1 = iota(500).map!(a => 1.7L.pow(a+1) - 1.7L.pow(a));
1075         auto r2 = iota(500, 1000).map!(a => 1.7L.pow(a+1) - 1.7L.pow(a));
1076         Summator!real s1 = 0, s2 = 0.0;
1077         foreach (e; r1) s1 += e;
1078         foreach (e; r2) s2 -= e;
1079         s1 -= s2;
1080         s1 -= 1.7L.pow(1000);
1081         assert(s1.sum() == -1);
1082     }
1083
1084     nothrow unittest {
1085         import std.typetuple;
1086         with(Summation)
1087         foreach (summation; TypeTuple!(Kahan, KBN, KB2, Precise))
1088         foreach (T; TypeTuple!(float, double, real))
1089         {
1090             Summator!(T, summation) sum = 1;
1091             sum += 3;
1092             assert(sum.sum == 4);
1093             sum -= 10;
1094             assert(sum.sum == -6);
1095             Summator!(T, summation) sum2 = 3;
1096             sum -= sum2;
1097             assert(sum.sum == -9);
1098             sum2 = 100;
1099             sum += 100;
1100             assert(sum.sum == 91);
1101             auto sum3 = cast(Summator!(real, summation))sum;
1102             assert(sum3.sum == 91);
1103             sum = sum2;
1104         }
1105     }
1106
1107     static if (summation == Summation.Precise)
1108     {
1109         ///Returns \$(D true) if current sum is a NaN.
1110         bool isNaN()
1111         {
1112             return .isNaN(s);
1113         }
1114
1115         ///Returns \$(D true) if current sum is finite (not infinite or NaN).
1116         bool isFinite()
1117         {
1118             if (s)
1119                 return false;
1120             return !overflow;
1121         }
1122
1123         ///Returns \$(D true) if current sum is ±∞.
1124         bool isInfinity()
1125         {
1126             return .isInfinity(s) || overflow();
1127         }
1128     }
1129     else static if(isFloatingPoint!F)
1130     {
1131         ///Returns \$(D true) if current sum is a NaN.
1132         bool isNaN()
1133         {
1134             return .isNaN(sum());
1135         }
1136
1137         ///Returns \$(D true) if current sum is finite (not infinite or NaN).
1138         bool isFinite()
1139         {
1140             return cast(bool).isFinite(sum());
1141         }
1142
1143         ///Returns \$(D true) if current sum is ±∞.
1144         bool isInfinity()
1145         {
1146             return .isInfinity(sum());
1147         }
1148     }
1149 }
1150
1151 ///
1152 unittest {
1153     import std.range;
1154     import std.algorithm: swap;
1155
1156     ///Moving mean
1157     class MovingAverage
1158     {
1159         Summator!double summator;
1160         double[] circularBuffer;
1161         size_t frontIndex;
1162
1163         double avg() @property
1164         {
1165             return summator.sum() / circularBuffer.length;
1166         }
1167
1168         this(double[] buffer)
1169         {
1170             assert(!buffer.empty);
1171             circularBuffer = buffer;
1172             summator = 0;
1173             .put(summator, buffer);
1174         }
1175
1176         ///operation without rounding
1177         void put(double x)
1178         {
1179             summator += x;
1180             swap(circularBuffer[frontIndex++], x);
1181             summator -= x;
1182             frontIndex %= circularBuffer.length;
1183         }
1184     }
1185
1186     /// ma always keeps pricese average of last 1000 elements
1187     auto ma = new MovingAverage(iota(0.0, 1000.0).array);
1188     assert(ma.avg == (1000*999/2) / 1000.0);
1189     /// move by 10 elements
1190     put(ma, iota(1000.0, 1010.0));
1191     assert(ma.avg == (1010*1009/2 - 10*9/2) / 1000.0);
1192 }
1193
1194 // check default constructor
1195 unittest
1196 {
1197     Summator!double d;
1198     assert(d.isNaN());
1199     assert(d.sum().isNaN());
1200     d += 100;
1201     assert(d.isNaN());
1202     assert(d.sum().isNaN());
1203     d = 1;
1204     d += 1000;
1205     assert(d.sum == 1001);
1206 }
1207
1208 nothrow unittest
1209 {
1210     import std.range;
1211     import std.algorithm;
1212     import std.math;
1213
1214     Summator!double summator = 0.0;
1215
1216     enum double M = (cast(double)2) ^^ (double.max_exp - 1);
1217     Tuple!(double[], double)[] tests = [
1218         tuple(new double[0], 0.0),
1219         tuple([0.0], 0.0),
1220         tuple([1e100, 1.0, -1e100, 1e-100, 1e50, -1, -1e50], 1e-100),
1221         tuple([1e308, 1e308, -1e308], 1e308),
1222         tuple([-1e308, 1e308, 1e308], 1e308),
1223         tuple([1e308, -1e308, 1e308], 1e308),
1224         tuple([M, M, -2.0^^1000], 1.7976930277114552e+308),
1225         tuple([M, M, M, M, -M, -M, -M], 8.9884656743115795e+307),
1226         tuple([2.0^^53, -0.5, -2.0^^-54], 2.0^^53-1.0),
1227         tuple([2.0^^53, 1.0, 2.0^^-100], 2.0^^53+2.0),
1228         tuple([2.0^^53+10.0, 1.0, 2.0^^-100], 2.0^^53+12.0),
1229         tuple([2.0^^53-4.0, 0.5, 2.0^^-54], 2.0^^53-3.0),
1230         tuple([M-2.0^^970, -1, M], 1.7976931348623157e+308),
1231         tuple([double.max, double.max*2.^^-54], double.max),
1232         tuple([double.max, double.max*2.^^-53], double.infinity),
1233         tuple(iota(1, 1001).map!(a => 1.0/a).array , 7.4854708605503451),
1234         tuple(iota(1, 1001).map!(a => (-1.0)^^a/a).array, -0.69264743055982025), //0.693147180559945309417232121458176568075500134360255254120680...
1235         tuple(iota(1, 1001).map!(a => 1.0/a).retro.array , 7.4854708605503451),
1236         tuple(iota(1, 1001).map!(a => (-1.0)^^a/a).retro.array, -0.69264743055982025),
1237         tuple([double.infinity, -double.infinity, double.nan], double.nan),
1238         tuple([double.nan, double.infinity, -double.infinity], double.nan),
1239         tuple([double.infinity, double.nan, double.infinity], double.nan),
1240         tuple([double.infinity, double.infinity], double.infinity),
1241         tuple([double.infinity, -double.infinity], double.nan),
1242         tuple([-double.infinity, 1e308, 1e308, -double.infinity], -double.infinity),
1243         tuple([M-2.0^^970, 0.0, M], double.infinity),
1244         tuple([M-2.0^^970, 1.0, M], double.infinity),
1245         tuple([M, M], double.infinity),
1246         tuple([M, M, -1], double.infinity),
1247         tuple([M, M, M, M, -M, -M], double.infinity),
1248         tuple([M, M, M, M, -M, M], double.infinity),
1249         tuple([-M, -M, -M, -M], -double.infinity),
1250         tuple([M, M, -2.^^971], double.max),
1251         tuple([M, M, -2.^^970], double.infinity),
1252         tuple([-2.^^970, M, M, -2.^^-1074], double.max),
1253         tuple([M, M, -2.^^970, 2.^^-1074], double.infinity),
1254         tuple([-M, 2.^^971, -M], -double.max),
1255         tuple([-M, -M, 2.^^970], -double.infinity),
1256         tuple([-M, -M, 2.^^970, 2.^^-1074], -double.max),
1257         tuple([-2.^^-1074, -M, -M, 2.^^970], -double.infinity),
1258         tuple([2.^^930, -2.^^980, M, M, M, -M], 1.7976931348622137e+308),
1259         tuple([M, M, -1e307], 1.6976931348623159e+308),
1260         tuple([1e16, 1., 1e-16], 10000000000000002.0),
1261     ];
1262     foreach (i, test; tests)
1263     {
1264         foreach (t; test[0]) summator.put(t);
1265         auto r = test[1];
1266         auto s = summator.sum;
1267         version(X86)
1268         {
1269             assert(summator.isNaN() == r.isNaN());
1270             assert(summator.isFinite() == r.isFinite() || r == -double.max && s == -double.infinity || r == double.max && s == double.infinity);
1271             assert(summator.isInfinity() == r.isInfinity() || r == -double.max && s == -double.infinity || r == double.max && s == double.infinity);
1272             assert(nextDown(s) <= r && r <= nextUp(s) || s.isNaN && r.isNaN);
1273         }
1274         else
1275         {
1276             assert(summator.isNaN() == r.isNaN());
1277             assert(summator.isFinite() == r.isFinite());
1278             assert(summator.isInfinity() == r.isInfinity());
1279             assert(s == r || s.isNaN && r.isNaN);
1280         }
1281         summator = 0.0;
1282     }
1283 }
1284
1285
1286 private:
1287
1288 template isComplex(C)
1289 {
1290     import std.complex : Complex;
1291     enum bool isComplex = is(C : Complex!F, F);
1292 }
1293
1294 version(X86)
1295 {
1296
1297 }
1298 else
1299 {
1300     // FIXME (perfomance issue): fabs in std.math available only for for real.
1301     F fabs(F)(F f) //+-0, +-NaN, +-inf doesn't matter
1302     {
1303         if (__ctfe)
1304         {
1305             return f < 0 ? -f : f;
1306         }
1307         else
1308         {
1309             version(LDC)
1310             {
1311                 import ldc.intrinsics : llvm_fabs;
1312                 return llvm_fabs(f);
1313             }
1314             else
1315             {
1316                 import core.stdc.tgmath : fabs;
1317                 return fabs(f);
1318             }
1319         }
1320     }
1321 }
1322
1323 template isSummable(F)
1324 {
1325     enum bool isSummable =
1326         __traits(compiles,
1327         {
1328             F a = 0.1, b, c;
1329             b = 2.3;
1330             c = a + b;
1331             c = a - b;
1332             a += b;
1333             a -= b;
1334         });
1335 }
1336
1337 template isSummable(Range, F)
1338 {
1339     enum bool isSummable =
1340         isInputRange!Range &&
1341         isImplicitlyConvertible!(Unqual!(ForeachType!Range), F) &&
1342         !isInfinite!Range &&
1343         isSummable!F;
1344 }
1345
1346 /++
1347 Naive summation algorithm.
1348 +/
1349 F sumNaive(Range, F = Unqual!(ForeachType!Range))(Range r, F s = 0)
1350 {
1351     foreach (x; r)
1352     {
1353         s += x;
1354     }
1355     return s;
1356 }
1357
1358 ///TODO
1359 alias sumFast = sumNaive;
1360
1361 /++
1362 \$(LUCKY Pairwise summation) algorithm. Range must be a finite sliceable range.
1363 +/
1364 F sumPairwise(Range, F = Unqual!(ForeachType!Range))(Range r)
1365     if (hasLength!Range && hasSlicing!Range)
1366 {
1367     import std.range : hasLength, hasSlicing;
1368     static assert(hasLength!Range && hasSlicing!Range);
1369     switch (r.length)
1370     {
1371         case 0: return F(0);
1372         case 1: return cast(F)r[0];
1373         case 2: return cast(F)(r[0] + cast(F)r[1]);
1374         default: auto n = r.length/2; return cast(F)(sumPairwise!(Range, F)(r[0..n]) + sumPairwise!(Range, F)(r[n..\$]));
1375     }
1376 }
1377
1378 F sumPairwise(Range, F = Unqual!(ForeachType!Range))(Range r, F seed)
1379 {
1380     F s = seed;
1381     s += sumPairwise!Range(r);
1382     return s;
1383 }
1384
1385
1386 template sumGenericKahan(Summation summation)
1387 {
1388     T sumGenericKahan(Range, T = Unqual!(ForeachType!Range))(Range r, T s = 0.0)
1389         if (summation == Summation.Kahan && isSummable!T
1390         || (summation == Summation.KBN || summation == Summation.KB2)
1391             && (isFloatingPoint!T || isComplex!T))
1392     {
1393         auto sum = Summator!(Unqual!T, summation)(s);
1394         foreach (e; r)
1395         {
1396             sum.put(e);
1397         }
1398         return sum.sum;
1399     }
1400 }
1401
1402 /++
1403 \$(LUCKY Kahan summation) algorithm.
1404 +/
1405 /++
1406 ---------------------
1407 s := x[1]
1408 c := 0
1409 FOR k := 2 TO n DO
1410 y := x[k] - c
1411 t := s + y
1412 c := (t - s) - y
1413 s := t
1414 END DO
1415 ---------------------
1416 +/
1417 alias sumKahan = sumGenericKahan!(Summation.Kahan);
1418
1419
1420 /++
1421 \$(LUCKY Kahan-Babuška-Neumaier summation algorithm).
1422 \$(D KBN) gives more accurate results then \$(D Kahan).
1423 +/
1424 /++
1425 ---------------------
1426 s := x[1]
1427 c := 0
1428 FOR i := 2 TO n DO
1429 t := s + x[i]
1430 IF ABS(s) >= ABS(x[i]) THEN
1431     c := c + ((s-t)+x[i])
1432 ELSE
1433     c := c + ((x[i]-t)+s)
1434 END IF
1435 s := t
1436 END DO
1437 s := s + c
1438 ---------------------
1439 +/
1440 alias sumKBN = sumGenericKahan!(Summation.KBN);
1441
1442
1443 /++
1444 \$(LUCKY Generalized Kahan-Babuška summation algorithm), order 2.
1445 \$(D KB2) gives more accurate results then \$(D Kahan) and \$(D KBN).
1446 +/
1447 /++
1448 ---------------------
1449 s := 0 ; cs := 0 ; ccs := 0
1450 FOR j := 1 TO n DO
1451     t := s + x[i]
1452     IF ABS(s) >= ABS(x[i]) THEN
1453         c := (s-t) + x[i]
1454     ELSE
1455         c := (x[i]-t) + s
1456     END IF
1457     s := t
1458     t := cs + c
1459     IF ABS(cs) >= ABS(c) THEN
1460         cc := (cs-t) + c
1461     ELSE
1462         cc := (c-t) + cs
1463     END IF
1464     cs := t
1465     ccs := ccs + cc
1466 END FOR
1467 RETURN s+cs+ccs
1468 ---------------------
1469 +/
1470 alias sumKB2 = sumGenericKahan!(Summation.KB2);
1471
1472 unittest
1473 {
1474     import std.typetuple;
1475     with(Summation)
1476     foreach (F; TypeTuple!(float, double, real))
1477     {
1478         F[] ar = [1, 2, 3, 4];
1479         F r = 10;
1480         assert(r == ar.fsum!Fast());
1481         assert(r == ar.fsum!Pairwise());
1482         assert(r == ar.fsum!Kahan());
1483         assert(r == ar.fsum!KBN());
1484         assert(r == ar.fsum!KB2());
1485         assert(r == ar.fsum!Appropriate());
1486     }
1487 }
1488
1489 //@@@BUG@@@: DMD 2.066 Segmentation fault (core dumped)
1490 version(none)
1491 unittest
1492 {
1493     import core.simd;
1494     static if (__traits(compiles, double2.init + double2.init))
1495     {
1496         double2[] ar = [double2([1.0, 2]), double2([2, 3]), double2([3, 4]), double2([4, 6])];
1497         assert(ar.sumFast().array == double2([10, 14]).array);
1498         assert(ar.sumPairwise().array == double2([10, 14]).array);
1499         assert(ar.sumKahan().array == double2([10, 14]).array);
1500     }
1501 }
1502
1503
1504 /++
1505 Precise summation.
1506 +/
1507 F sumPrecise(Range, F = Unqual!(ForeachType!Range))(Range r, F seed = 0)
1508     if (isFloatingPoint!F || isComplex!F)
1509 {
1510     static if (isFloatingPoint!F)
1511     {
1512         auto sum = Summator!F(seed);
1513         foreach (e; r)
1514         {
1515             sum.put(e);
1516         }
1517         return sum.sum;
1518     }
1519     else
1520     {
1521         alias T = typeof(F.init.re);
1522         static if (isForwardRange!Range)
1523         {
1524             auto s = r.save;
1525             auto sum = Summator!T(seed.re);
1526             foreach (e; r)
1527             {
1528                 sum.put(e.re);
1529             }
1530             T sumRe = sum.sum;
1531             sum = seed.im;
1532             foreach (e; s)
1533             {
1534                 sum.put(e.im);
1535             }
1536             return F(sumRe, sum.sum);
1537         }
1538         else
1539         {
1540             auto sumRe = Summator!T(seed.re);
1541             auto sumIm = Summator!T(seed.im);
1542             foreach (e; r)
1543             {
1544                 sumRe.put(e.re);
1545                 sumIm.put(e.im);
1546             }
1547             return F(sumRe.sum, sumIm.sum);
1548         }
1549     }
1550 }
1551
1552 template Algo(Range, Summation summation)
1553 {
1554     static if (summation == Summation.Fast)
1555         alias Algo = sumFast;
1556     else
1557     static if (summation == Summation.Naive)
1558         alias Algo = sumNaive;
1559     else
1560     static if (summation == Summation.Pairwise)
1561         alias Algo = sumPairwise;
1562     else
1563     static if (summation == Summation.Kahan)
1564         alias Algo = sumKahan;
1565     else
1566     static if (summation == Summation.KBN)
1567         alias Algo = sumKBN;
1568     else
1569     static if (summation == Summation.KB2)
1570         alias Algo = sumKB2;
1571     else
1572     static if (summation == Summation.Precise)
1573         alias Algo = sumPrecise;
1574     else
1575     static if (summation == Summation.Appropriate)
1576     {
1577         static if (isRandomAccessRange!Range)
1578             alias Algo = Algo!(Range, Summation.Pairwise);
1579         else
1580         static if (isFloatingPoint!(ForeachType!Range) || isComplex!(ForeachType!Range))
1581             alias Algo = Algo!(Range, Summation.KBN);
1582         else
1583             alias Algo = Algo!(Range, Summation.Kahan);
1584     }
1585     else
1586     static assert(0);
1587 }
1588
1589 unittest {
1590     static struct UDT
1591     {
1592         UDT opBinary(string op)(auto ref UDT rhs) const {
1593             UDT ret;
1594             return ret;
1595         }
1596         UDT opOpAssign(string op)(auto ref UDT rhs) {
1597             return this;
1598         }
1599         this(double f){}
1600         void opAssign(double f){}
1601     }
1602     import std.range;
1603     import std.complex : Complex;
1604     static assert(__traits(isSame, Algo!(double[], Summation.Appropriate), sumPairwise));
1605     static assert(__traits(isSame, Algo!(Complex!double[], Summation.Appropriate), sumPairwise));
1606     static assert(__traits(isSame, Algo!(UDT[], Summation.Appropriate), sumPairwise));
1607     static assert(__traits(isSame, Algo!(ForwardRange!double, Summation.Appropriate), sumKBN));
1608     static assert(__traits(isSame, Algo!(ForwardRange!(Complex!double), Summation.Appropriate), sumKBN));
1609     static assert(__traits(isSame, Algo!(ForwardRange!UDT, Summation.Appropriate), sumKahan));
1610 }```