Loading...
Searching...
No Matches
invgamma.h
Go to the documentation of this file.
1#ifndef _BBM_INVGAMMA_H_
2#define _BBM_INVGAMMA_H_
3
4#include "util/poly.h"
5#include "util/gamma.h"
6
7/************************************************************************/
8/*! \file invgamma.h
9
10 \brief Packet friendly computation of the the inverse of the incomplete
11 gamma functions:
12 + gamma_p_inv: inverse (x) of p = gamma_p(a, x)
13 + gamma_q_inv: inverse (x) of q = gamma_q(a, x)
14
15 Follows: "Computation of the Incomplete Gamma Function Ratios and their
16 Inverse", DiDonata and Morris [1986]: https://doi.org/10.1145/22721.23109
17
18*************************************************************************/
19
20namespace bbm {
21 namespace detail {
22 namespace inverse_gamma {
23
24 //! \brief Eq 21
25 //! b > 0.6 || (b > 0.45 && a >= 0.3)
26 template<typename T, typename Mask>
27 inline T eq21(const T& a, const T& p, const T& q, const T& gamma, const T& b, Mask todo)
28 {
29 // exit if nothing to do
30 if(bbm::none(todo)) return 0;
31
32 // compute 'u'
33 T u = 0;
34 Mask mask = todo && ((b*q > 1e-8) && (q > 1e-5));
35 todo &= !mask;
36 if(bbm::any(mask)) u = bbm::select(mask, bbm::pow(p * gamma*a, 1/a), u);
37 if(bbm::any(todo)) u = bbm::select(todo, bbm::exp((-q / a) - std::numbers::egamma), u);
38
39 // Eq21
40 return (u / (1 - (u / (a+1))));
41 }
42
43 //! \brief Eq 22
44 //! a < 0.3 && 0.35 <= b <= 0.6
45 template<typename T>
46 inline T eq22(const T& /*a*/, const T& b)
47 {
48 T t = bbm::exp(-std::numbers::egamma - b);
49 T u = t * bbm::exp(t);
50 return t*bbm::exp(u);
51 }
52
53 //! \brief Eq 23
54 //! (.15 <= b < 0.35) || (.15 <= b < 0.45 and a >= 0.3)
55 template<typename T>
56 inline T eq23(const T& a, const T& y)
57 {
58 T u = y - ((1-a) * bbm::log(y));
59 return y - ((1-a) * bbm::log(u)) - bbm::log(1 + ((1-a)/(1+u)));
60 }
61
62 //! \brief Eq 24
63 //! 0.01 < b < 0.15
64 template<typename T>
65 inline T eq24(const T& a, const T& y)
66 {
67 T u = y - ((1-a) * bbm::log(y));
68 return y - ((1-a) * bbm::log(u)) - bbm::log(bbm::poly(u, (2-a)*(3-a), 2*(3-a), 1) / bbm::poly(u, 2, (5-a), 1));
69 }
70
71 //! \brief Eq 25
72 template<typename T>
73 inline T eq25(const T& a, const T& y)
74 {
75 T a2 = a*a;
76 T a3 = a2*a;
77
78 T c1 = (a-1) * bbm::log(y);
79 T c2 = (a-1) * (1+c1);
80 T c3 = (a-1) * bbm::poly(c1, 0.5*(3*a-5), (a-2), -0.5);
81 T c4 = (a-1) * bbm::poly(c1, (11*a2 - 46*a + 47) / 6.0, (a2 - 6*a + 7), -0.5*(3*a-5), 1.0/3.0);
82 T c5 = (a-1) * bbm::poly(c1, (25*a3 - 195*a2 + 477*a - 379) / 12.0, (2*a3 - 25*a2 + 72*a - 61) * 0.5, (-3*a2 + 13*a - 13), (11*a - 17) / 6.0, -0.25);
83
84 // done.
85 return y + bbm::poly( bbm::rcp(y), c1, c2, c3, c4, c5);
86 }
87
88 //! \brief Eq 32: Q(a,x) = 1/2 erfc(s/sqrt(2)) => solve for s with minmax approximation
89 template<typename T>
90 inline T eq32(const T& p, const T& q)
91 {
92 T t = bbm::select(p < 0.5, bbm::sqrt(-2*bbm::log(p)), bbm::sqrt(-2*bbm::log(q)));
93 T s = t - bbm::poly(t, 3.31125922108741, 11.6616720288968, 4.28342155967104, 0.213623493715853) / bbm::poly(t, 1, 6.61053765625462, 6.40691597760039, 1.27364489782223, 0.3611708101884203e-1);
94 return bbm::select(p < 0.5, -s, s);
95 }
96
97 //! \brief Eq 31: Cornish-Fisher 6-term approximation for x
98 template<typename T>
99 inline T eq31(const T& a, const T& p, const T& q)
100 {
101 // Eq 31
102 T sqrta = bbm::sqrt(a);
103 T s = eq32(p,q);
104 T w = bbm::poly(s,
105 a - 1.0/3.0 + 16/(810*a), // s^0 term
106 sqrta - 7/(36*sqrta) - 433/(38880*a*sqrta), // s^1 term
107 1.0/3.0 - 7/(810*a), // s^2 term
108 1/(36*sqrta) + 256/(38880*a*sqrta), // s^3 term
109 -3/(810*a), // s^4 term
110 9/(38880*a*sqrta)); // s^5 term
111
112 // Done.
113 return w;
114 }
115
116 //! \brief Eq 33
117 //! a < 500 or |1-w/a| > 1e-6 and p > 0.5
118 template<typename T>
119 inline T eq33(const T& a, const T& y, const T& w)
120 {
121 T u = y + ((a-1) * bbm::log(w)) - bbm::log(1 + (1-a)/(1+w));
122 return y + ((a-1) * bbm::log(u)) - bbm::log(1 + (1-a)/(1+u));
123 }
124
125 //! \brief Eq 34: Sn
126 // S0 = 1
127 // Sn = 1 + sum_{i=1}^N x^i / (PI_{j=1}^i (a+j))
128 template<size_t N, typename T>
129 inline T Sn(const T& x, const T& a, const T& tolerance=0)
130 {
131 T sum = 1;
132 T partial = 1;
133 auto mask = (partial > tolerance);
134
135 // max N iterations or until tolerance is met
136 for(size_t i=1; i <= N && bbm::any(mask); ++i)
137 {
138 partial *= bbm::select(mask, x / (a + i), 0); // only update partial if tolerance not met.
139 sum += partial;
140
141 // converged?
142 mask = (partial > tolerance);
143 }
144
145 // Done.
146 return sum;
147 }
148
149 //! \brief Eq 34: Fn
150 // Fn(x) = exp((v+x-lnSn(x))/a)
151 template<size_t N, typename T>
152 inline T Fn(const T& x, const T& a, const T& v, const T& tolerance=0)
153 {
154 return bbm::exp((v + x - bbm::log(Sn<N>(x,a,tolerance)))/a);
155 }
156
157 //! \brief Eq 35
158 //! a > 500 && p < 0.5 && w < 0.15*(a+1)
159 template<typename T>
160 inline T eq35(const T& a, const T& p, const T& w)
161 {
162 T v = bbm::log(p) + bbm::lgamma(a+1);
163
164 T u1 = Fn<0>(w, a, v);
165 T u2 = Fn<1>(u1, a, v);
166 T u3 = Fn<2>(u2, a, v);
167 T z = Fn<3>(u3, a, v);
168
169 // Done.
170 return z;
171 }
172
173 //! \brief Eq 36
174 //! 0.01*(a+1) < z <= 0.7*(a+1)
175 template<typename T>
176 inline T eq36(const T& a, const T&p, const T& z)
177 {
178 T lnSn = bbm::log( Sn<100>(z, a, T(1e-4)) );
179 T v = bbm::log(p) + bbm::lgamma(a + 1);
180 T zbar = bbm::exp((v + z - lnSn) / a);
181 T result = zbar * (1 - (a * bbm::log(zbar) - z - v + lnSn) / (a - zbar));
182
183 // Done.
184 return result;
185 }
186
187
188 //! \brief Eq 3
189 //! R(a,x) for 20 > a > 0
190 template<typename T>
191 inline T eq3(const T& a, const T& x, const T& lg)
192 {
193 return bbm::exp(-x - lg + bbm::log(x)*a);
194 }
195
196 //! \brief Eq 4
197 //! R(a,x) for a >= 20
198 template<typename T>
199 inline T eq4(const T& a, const T& x, const T& lg)
200 {
201 T lambda = x/a;
202 T delta = lg - ((a - 0.5)*bbm::log(a)) + a - 0.5*bbm::log(2.0*std::numbers::pi);
203 T phi = lambda - 1 - bbm::log(lambda);
204 return bbm::sqrt(0.5*a/std::numbers::pi) * bbm::exp(-a*phi - delta);
205 }
206
207 //! \brief R(a,x)
208 template<typename T>
209 inline T R(const T& a, const T& x, const T& lg)
210 {
211 return bbm::select(a < 20, eq3(a,x,lg), eq4(a,x,lg));
212 }
213
214 /******************************************************************/
215
216 //! \brief handle case where a < 1
217 template<typename T, typename Mask>
218 inline T a_less_one(const T& a, const T& p, const T& q, Mask todo, Mask& /*converged*/)
219 {
220 // exit if nothing to do
221 if(bbm::none(todo)) return 0;
222
223 // Handle 5 different cases
224 T gamma = bbm::tgamma(a);
225 T b = q * gamma;
226
227 // Case 1: b > 0.6 || (b >= 0.45 && a >= 0.3)
228 Mask mask = todo && ((b > 0.6) || (b >= 0.45 && a >= 0.3));
229 todo &= !mask;
230 T result = bbm::select(mask, eq21(a,p,q,gamma,b,mask), 0);
231 if(bbm::none(todo)) return result;
232
233 // Case 2: a < 0.3 && b >= 0.35
234 mask = todo && ((a < 0.3) && (b >= 0.35));
235 todo &= !mask;
236 if(bbm::any(mask)) result = bbm::select(mask, eq22(a,b), result);
237 if(bbm::none(todo)) return result;
238
239 // Case 3: b >= 0.15 || a >= 0.3
240 T y = -bbm::log(b);
241 mask = todo && ((b >= 0.15) || (a >= 0.3));
242 todo &= !mask;
243 if(bbm::any(mask)) result = bbm::select(mask, eq23(a,y), result);
244 if(bbm::none(todo)) return result;
245
246 // Case 4: b > 0.1
247 mask = todo && (b > 0.1);
248 todo &= !mask;
249 if(bbm::any(mask)) result = bbm::select(mask, eq24(a,y), result);
250 if(bbm::none(todo)) return result;
251
252 // Case 5: otherwise (b <= 0.1)
253 result = bbm::select(todo, eq25(a,y), result);
254
255 // Done.
256 return result;
257 }
258
259 //! \brief handle case where a > 1 & p > 0.5
260 template<typename T, typename Mask>
261 inline T p_greater_half(const T& a, const T& q, const T& w, Mask todo, Mask& /*converged*/)
262 {
263 // exit if nothing to do
264 if(bbm::none(todo)) return 0;
265
266 // Case 1: w < 3*a
267 Mask mask = todo && (w < 3*a);
268 todo &= !mask;
269 T result = bbm::select(mask, w, 0);
270 if(bbm::none(todo)) return result;
271
272 // Case 2: lb <= -D*2.3
273 T D = bbm::max(a*(a-1), 2);
274 T lg = bbm::lgamma(a);
275 T lb = bbm::log(q) + lg;
276
277 mask = todo && (lb <= -D*2.3);
278 todo &= !mask;
279 if(bbm::any(mask)) result = bbm::select(mask, eq25(a,-lb), result);
280 if(bbm::none(todo)) return result;
281
282 // Case 3: otherwise
283 result = bbm::select(todo, eq33(a,-lb,w), result);
284
285 // Done.
286 return result;
287 }
288
289 //! \brief handle case where a > 1 & p < 0.5
290 template<typename T, typename Mask>
291 inline T p_less_half(const T& a, const T& p, const T& w, Mask todo, Mask& converged)
292 {
293 // exit if nothing to do
294 if(bbm::none(todo)) return 0;
295
296 // Compute z
297 Mask mask = todo && (w < 0.15*(a+1));
298 T z = w;
299 if(bbm::any(mask)) z = bbm::select(mask, eq35(a,p,w), z);
300
301 // Case 1: z < (0.01*(a+1) || z > 0.7*(a+1)
302 mask = todo && (z < 0.01*(a+1) || z > 0.7*(a+1));
303 todo &= !mask;
304 converged |= (z <= 0.002*(a+1));
305 T result = bbm::select(mask, z, 0); // converged if z <= 0.002*(a+1)
306 if(bbm::none(todo)) return result;
307
308 // Case 2: otherwise
309 result = bbm::select(todo, eq36(a,p,z), result);
310
311 // Done.
312 return result;
313 }
314
315
316 //! \brief handle case where a > 1
317 template<typename T, typename Mask>
318 inline T a_greater_one(const T& a, const T& p, const T& q, Mask todo, Mask& converged)
319 {
320 // exit if nothing to do
321 if(bbm::none(todo)) return 0;
322
323 // Eq 31
324 T w = eq31(a,p,q);
325
326 // Case 1: a >= 500 && |1-w/a| < 1e-6
327 Mask mask = todo && (a >= 500 && bbm::abs(1-w/a) < 1e-6);
328 todo &= !mask;
329 converged |= mask;
330 T result = bbm::select(mask, w, 0); // Converged
331 if(bbm::none(todo)) return result;
332
333 // Case 2: p > 0.5
334 mask = todo && (p > 0.5);
335 todo &= !mask;
336 result = bbm::select(mask, p_greater_half(a,q,w,mask,converged), result);
337 if(bbm::none(todo)) return result;
338
339 // Case 3: otherwise (p < 0.5)
340 result = bbm::select(todo, p_less_half(a,p,w,todo,converged), result);
341
342 // Done.
343 return result;
344 }
345
346 /******************************************************************/
347
348 /******************************************************************/
349 /*! \brief Provides an initial estimate of the inverse (x) of (p,q) given an 'a'.
350
351 \param a = 'a' parameter of the incomplete gamma functions
352 \param p = upper normalized incomplete gamma function
353 \param q = 1 - p (lower gamma)
354 \param mask = enable/disbale lanes
355 \param converged = will be set to true if the corresponding lanes in 'x' are converged to 10 digits.
356 \returns the inverse: 'x' (also changes 'converged').
357 *******************************************************************/
358 template<typename T, typename Mask>
359 inline T estimate(const T& a, const T& p, const T& q, Mask mask, Mask& converged)
360 {
361 T result = 0;
362
363 // Case 1: a == 1
364 auto mask1 = mask && (a == 1);
365 converged |= mask1;
366 if(bbm::any(mask1)) result = bbm::select(mask1, -bbm::log(q), result);
367 if(bbm::all(mask1)) return result;
368
369 // Case 2: a < 1
370 auto mask2 = mask && (a < 1);
371 result = bbm::select(mask2, a_less_one(a, p, q, mask2, converged), result);
372 if(bbm::all(mask1 || mask2)) return result;
373
374 // Case 3:
375 auto mask3 = mask && (a > 1);
376 result = bbm::select(mask3, a_greater_one(a, p, q, mask3, converged), result);
377
378 // Done.
379 return result;
380 }
381
382 /******************************************************************/
383 /*! \brief compute (x) the inverse of (p,q) given an 'a'
384
385 \tparam NUM_ITR = maximum number of iterations for Newtan-Raphson; 3 suffices
386 \param a = 'a' parameter of the incomplete gamma function
387 \param p = result of the upper normalized incomplete gamma function
388 \param q = 1-p
389 \param mask = enable/disable lanes
390 \returns the inverse: 'x'
391 *******************************************************************/
392 template<typename T, typename Mask, size_t NUM_ITR=3>
393 inline T inverse(const T& a, const T& p, const T& q, Mask mask)
394 {
395 // exit if nothing to do
396 if(bbm::none(mask)) return 0;
397
398 // get initial esitimate
399 Mask converged = false;
400 auto x = estimate(a, p, q, mask, converged);
401 T lg = bbm::lgamma(a);
402
403 // Newton-Raphson iterations
404 for(size_t itr=0; itr < NUM_ITR && !bbm::all(converged); ++itr)
405 {
406 T r = R(a,x,lg);
407 auto pq = bbm::gamma_pq(a,x);
408 T t = bbm::select(p <= 0.5, (bbm::get<"p">(pq) - p), (q - bbm::get<"q">(pq))) / r;
409 T w = 0.5 * (a - 1 - x);
410
411 // update x
412 auto m = (bbm::abs(t) <= 0.1) && (bbm::abs(w*t) <= 0.1);
413 x *= 1 - bbm::select(!converged, t + bbm::select(m, w*t*t, 0), 0);
414 }
415
416 // Done.
417 return x;
418 }
419
420 } // end inverse_gamme namespace
421 } // end detail namespace
422
423
424 /**********************************************************************/
425 /*! \brief the inverse of the normalized upper incomplete gamma function
426
427 \f$ x = P^{-1}(a, p)\f$
428
429 such that \f$ p = P(a, x) \f$.
430 ***********************************************************************/
431 template<typename TA, typename TP>
432 inline auto gamma_p_inv(const TA& a, const TP& p)
433 {
434 using result_t = decltype(a + p);
435 auto mask = (a > 0) && (p > 0);
436 return bbm::detail::inverse_gamma::inverse(result_t(a), result_t(p), result_t(1-p), mask);
437 }
438
439 /**********************************************************************/
440 /*! \brief the inverse of the normalized lower incomplete gamma function
441
442 \f$ x = Q^{-1}(a, q)\f$
443
444 such that \f$ q = Q(a, x) \f$.
445 ***********************************************************************/
446 template<typename TA, typename TQ>
447 inline auto gamma_q_inv(const TA& a, const TQ& q)
448 {
449 using result_t = decltype(a + q);
450 auto mask = (a > 0) && (q > 0);
451 return bbm::detail::inverse_gamma::inverse(result_t(a), result_t(1-q), result_t(q), mask);
452 }
453
454
455} // end bbm namespace
456
457
458#endif /* _BBM_INVGAMMA_H_ */
Packet-type friendly implementation of:
constexpr decltype(auto) b(bbm::color< T > &c)
Definition: color.h:24
constexpr decltype(auto) r(bbm::color< T > &c)
Definition: color.h:18
T & phi(vec2d< T > &v)
Definition: spherical.h:39
constexpr decltype(auto) u(bbm::vec2d< T > &v)
Definition: vec.h:37
constexpr decltype(auto) y(bbm::vec3d< T > &v)
Definition: vec.h:23
constexpr decltype(auto) z(bbm::vec3d< T > &v)
Definition: vec.h:26
constexpr decltype(auto) v(bbm::vec2d< T > &v)
Definition: vec.h:40
constexpr decltype(auto) x(bbm::vec3d< T > &v)
Definition: vec.h:20
Definition: aggregatebsdf.h:29
auto gamma_p_inv(const TA &a, const TP &p)
the inverse of the normalized upper incomplete gamma function
Definition: invgamma.h:432
constexpr auto select(MASK &&mask, const A &a, const A &b)
Definition: backbone.h:255
auto gamma_q_inv(const TA &a, const TQ &q)
the inverse of the normalized lower incomplete gamma function
Definition: invgamma.h:447
auto tgamma(const TA &a, const TX &x)
Unnormalized incomplete upper gamma function.
Definition: gamma.h:532
constexpr auto poly(T &&x, T0 &&c0, Ts &&... c)
Compute a polynomial .
Definition: poly.h:35
auto gamma_pq(const TA &a, const TX &x)
Normalized incomplete upper and lower gamma function.
Definition: gamma.h:593
Compile time polynomial using Horner's method.