From 29ff01293373e1faf28f7c3dc2e46c86076583bf Mon Sep 17 00:00:00 2001 From: Bill Schmidt Date: Tue, 5 Jun 2018 11:14:19 -0500 Subject: [PATCH] Incremental updates through vec_mul. Signed-off-by: Bill Schmidt --- Intrinsics_Reference/ch_vec_reference.xml | 1073 ++++++++++++++++----- 1 file changed, 823 insertions(+), 250 deletions(-) diff --git a/Intrinsics_Reference/ch_vec_reference.xml b/Intrinsics_Reference/ch_vec_reference.xml index 5d9cf17..9b49bd2 100644 --- a/Intrinsics_Reference/ch_vec_reference.xml +++ b/Intrinsics_Reference/ch_vec_reference.xml @@ -7537,12 +7537,12 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - ARG1 + a - ARG2 + b @@ -8791,14 +8791,14 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> xscvdpspn t,a - xxextractuw u,t,4 + xxextractuw u,t,0 xxinsertw r/b,u,(3-c)*4 xscvdpspn t,a - xxextractuw u,t,4 + xxextractuw u,t,0 xxinsertw r/b,u,c*4 @@ -9140,12 +9140,12 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - + r - + a @@ -9178,15 +9178,22 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vec_madd - Vector ... Spelled Out Name TBD + Vector Multiply-Add - r = vec_madd (ARG1, ARG2, ARG3) + r = vec_madd (a, b, c) Purpose: - Returns a vector containing the results of performing a fused multiply-add operation for each corresponding set of elements of the given vectors. - - Result value: The value of each element of the result is the product of the values of the corresponding elements of ARG1 and ARG2, added to the value of the corresponding element of ARG3. + Returns a vector containing the results of performing a fused + multiply-add operation for each corresponding set of elements of the + source vectors. + + Result value: The value of each + element of r is the product of the + values of the corresponding elements of a and b, added + to the value of the corresponding element of c. Endian considerations: None. @@ -9201,24 +9208,24 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - + r - + - ARG1 + a - + - ARG2 + b - + - ARG3 + c @@ -9241,7 +9248,9 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector signed short - sample implementation TBD + + vmladduhm r,a,b,c + @@ -9258,7 +9267,9 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector unsigned short - sample implementation TBD + + vmladduhm r,a,b,c + @@ -9275,7 +9286,9 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector signed short - sample implementation TBD + + vmladduhm r,a,b,c + @@ -9292,7 +9305,9 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector unsigned short - sample implementation TBD + + vmladduhm r,a,b,c + @@ -9309,7 +9324,9 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector double - sample implementation TBD + + xvmaddmdp r/a,b,c + @@ -9326,7 +9343,9 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector float - sample implementation TBD + + xvmaddmsp r/a,b,c + @@ -9338,15 +9357,23 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vec_madds - Vector ... Spelled Out Name TBD + Vector Multiply-Add Saturated - r = vec_madds (ARG1, ARG2, ARG3) + r = vec_madds (a, b, c) Purpose: - Returns a vector containing the results of performing a saturated multiply-high-and-add operation for each corresponding set of elements of the given vectors. - - Result value: For each element of the result, the value is produced in the following way: The values of the corresponding elements of ARG1 and ARG2 are multiplied. The value of the 17 most-significant bits of this product is then added, using 16-bit-saturated addition, to the value of the corresponding element of ARG3. + Returns a vector containing the results of performing a saturated + multiply-high-and-add operation for each corresponding set of elements + of the source vectors. + + Result value: The value of each + element of r is produced as follows: + The values of the corresponding elements of a and b are + multiplied. The value of the 17 most-significant bits of this product + is then added, using 16-bit-saturated addition, to the value of the + corresponding element of c. Endian considerations: None. @@ -9361,24 +9388,24 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - + r - + - ARG1 + a - + - ARG2 + b - + - ARG3 + c @@ -9401,7 +9428,9 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector signed short - sample implementation TBD + + vmhaddshs r,a,b,c + @@ -9413,15 +9442,20 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vec_max - Vector ... Spelled Out Name TBD + Vector Maximum - r = vec_max (ARG1, ARG2) + r = vec_max (a, b)) Purpose: - + Returns a vector containing the maximum value from each set of + corresponding elements of the source vectors. + + Result value: + The value of each element of r is the + maximum of the values of the corresponding elements of a and b. - Endian considerations: None. @@ -9442,12 +9476,12 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - ARG1 + a - ARG2 + b @@ -9467,7 +9501,9 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector signed char - sample implementation TBD + + vmaxsb r,a,b + @@ -9481,7 +9517,9 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector unsigned char - sample implementation TBD + + vmaxub r,a,b + @@ -9495,7 +9533,9 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector signed int - sample implementation TBD + + vmaxsw r,a,b + @@ -9509,7 +9549,9 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector unsigned int - sample implementation TBD + + vmaxuw r,a,b + @@ -9523,7 +9565,9 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector signed long long - sample implementation TBD + + vmaxsd r,a,b + @@ -9537,7 +9581,9 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector unsigned long long - sample implementation TBD + + vmaxud r,a,b + @@ -9551,7 +9597,9 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector signed short - sample implementation TBD + + vmaxsh r,a,b + @@ -9565,7 +9613,9 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector unsigned short - sample implementation TBD + + vmaxuh r,a,b + @@ -9579,7 +9629,9 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector double - sample implementation TBD + + xvmaxdp r,a,b + @@ -9593,7 +9645,9 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector float - sample implementation TBD + + xvmaxsp r,a,b + @@ -9605,45 +9659,55 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vec_mergee - Vector ... Spelled Out Name TBD + Vector Merge Even - r = vec_mergee (ARG1, ARG2) + r = vec_mergee (a, b) Purpose: - Merges the even-numbered values from the two vectors. + Merges the even-numbered values from two vectors. - Result value: The even-numbered elements of ARG1 are stored into the even-numbered elements of the result. The even-numbered elements of ARG2 are stored in the odd-numbered elements of the result. + Result value: The even-numbered + elements of a are stored into the + even-numbered elements of r. The + even-numbered elements of b are stored + into the odd-numbered elements of r. Endian considerations: - None. + The element numbering within a register is left-to-right for big-endian + targets, and right-to-left for little-endian targets. Supported type signatures for vec_mergee - + + - + r - + - ARG1 + a - + - ARG2 + b - Example Implementation + Example LE Implementation + + + Example BE Implementation @@ -9659,7 +9723,14 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector bool int - sample implementation TBD + + vmrgow r,b,a + + + + + vmrgew r,a,b + @@ -9673,7 +9744,14 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector signed int - sample implementation TBD + + vmrgow r,b,a + + + + + vmrgew r,a,b + @@ -9687,7 +9765,14 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector unsigned int - sample implementation TBD + + vmrgow r,b,a + + + + + vmrgew r,a,b + @@ -9701,7 +9786,14 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector bool long long - sample implementation TBD + + xxpermdi r,b,a,3 + + + + + xxpermdi r,a,b,0 + @@ -9715,7 +9807,14 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector signed long long - sample implementation TBD + + xxpermdi r,b,a,3 + + + + + xxpermdi r,a,b,0 + @@ -9729,35 +9828,56 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector unsigned long long - sample implementation TBD + + xxpermdi r,b,a,3 + + + + + xxpermdi r,a,b,0 + - vector float + vector double - vector float + vector double - vector float + vector double - sample implementation TBD + + xxpermdi r,b,a,3 + + + + + xxpermdi r,a,b,0 + - vector double + vector float - vector double + vector float - vector double + vector float - sample implementation TBD + + vmrgow r,b,a + + + + + vmrgew r,a,b + @@ -9769,48 +9889,64 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vec_mergeh - Vector ... Spelled Out Name TBD + Vector Merge High - r = vec_mergeh (ARG1, ARG2) + r = vec_mergeh (a, b) Purpose: - Merges the most-significant halves of two vectors. - - Result value: Assume that the elements of each vector are numbered beginning with 0. The even-numbered elements of the result are taken, in order, from the elements in the most-significant 8 bytes of ARG1. The odd-numbered elements of the result are taken, in order, from the elements in the most-significant 8 bytes of ARG2. + Merges the first halves (in element order) of two vectors. + + Result value: The + nth element of r, + if n is an even number, is given the value of the + (n/2)th element of a. The (n+1)th element + of r, if n is an + even number, is given the value of the (n/2)th + element of b. Endian considerations: - None. + The element numbering within a register is left-to-right for big-endian + targets, and right-to-left for little-endian targets. + + Notes: + No Power compilers yet support the vector _Float16 type, so that + interface is currently deferred.
Supported type signatures for vec_mergeh - + + - + r - + - ARG1 + a - + - ARG2 + b - Example Implementation + Example LE Implementation + Example BE Implementation + + Restrictions @@ -9827,7 +9963,14 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector bool char - sample implementation TBD + + vmrglb r,b,a + + + + + vmrghb r,a,b + @@ -9844,7 +9987,14 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector signed char - sample implementation TBD + + vmrglb r,b,a + + + + + vmrghb r,a,b + @@ -9861,7 +10011,14 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector unsigned char - sample implementation TBD + + vmrglb r,b,a + + + + + vmrghb r,a,b + @@ -9878,7 +10035,14 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector bool int - sample implementation TBD + + vmrglw r,b,a + + + + + vmrghw r,a,b + @@ -9895,7 +10059,14 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector signed int - sample implementation TBD + + vmrglw r,b,a + + + + + vmrghw r,a,b + @@ -9912,7 +10083,14 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector unsigned int - sample implementation TBD + + vmrglw r,b,a + + + + + vmrghw r,a,b + @@ -9929,7 +10107,14 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector bool long long - sample implementation TBD + + xxpermdi r,b,a,3 + + + + + xxpermdi r,a,b,0 + @@ -9946,7 +10131,14 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector signed long long - sample implementation TBD + + xxpermdi r,b,a,3 + + + + + xxpermdi r,a,b,0 + @@ -9963,7 +10155,14 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector unsigned long long - sample implementation TBD + + xxpermdi r,b,a,3 + + + + + xxpermdi r,a,b,0 + @@ -9980,7 +10179,14 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector pixel - sample implementation TBD + + vmrglh r,b,a + + + + + vmrghh r,a,b + @@ -9997,7 +10203,14 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector bool short - sample implementation TBD + + vmrglh r,b,a + + + + + vmrghh r,a,b + @@ -10014,7 +10227,14 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector signed short - sample implementation TBD + + vmrglh r,b,a + + + + + vmrghh r,a,b + @@ -10031,7 +10251,14 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector unsigned short - sample implementation TBD + + vmrglh r,b,a + + + + + vmrghh r,a,b + @@ -10048,7 +10275,14 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector double - sample implementation TBD + + xxpermdi r,b,a,3 + + + + + xxpermdi r,a,b,0 + @@ -10065,7 +10299,14 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector float - sample implementation TBD + + vmrglw r,b,a + + + + + vmrghw r,a,b + @@ -10079,13 +10320,20 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector _Float16 - vector _Float16 + vector _Float16 - sample implementation TBD + + vmrglh r,b,a + + + + + vmrghh r,a,b + - ISA 3.0 or later + Deferred @@ -10097,48 +10345,67 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vec_mergel - Vector ... Spelled Out Name TBD + Vector Merge Low - r = vec_mergel (ARG1, ARG2) + r = vec_mergel (a, b) Purpose: - Merges the least-significant halves of two vectors. - - Result value: Assume that the elements of each vector are numbered beginning with 0. The even-numbered elements of the result are taken, in order, from the elements in the least-significant 8 bytes of ARG1. The odd-numbered elements of the result are taken, in order, from the elements in the least-significant 8 bytes of ARG2. + Merges the last halves (in element order) of two vectors. + + Result value: Let + m be the number of elements in r. The nth element of + r, if n is an even + number, is given the value of the m/2 + + (n/2)th element of a. The (n+1)th element + of r, if n is an + even number, is given the value of the m/2 + + (n/2)th element of b. Endian considerations: - None. + The element numbering within a register is left-to-right for big-endian + targets, and right-to-left for little-endian targets. + + Notes: + No Power compilers yet support the vector _Float16 type, so that + interface is currently deferred.
Supported type signatures for vec_mergel - + + - + r - + - ARG1 + a - + - ARG2 + b - Example Implementation + Example LE Implementation + Example BE Implementation + + Restrictions @@ -10155,7 +10422,14 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector bool char - sample implementation TBD + + vmrghb r,b,a + + + + + vmrglb r,a,b + @@ -10172,7 +10446,14 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector signed char - sample implementation TBD + + vmrghb r,b,a + + + + + vmrglb r,a,b + @@ -10189,7 +10470,14 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector unsigned char - sample implementation TBD + + vmrghb r,b,a + + + + + vmrglb r,a,b + @@ -10206,7 +10494,14 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector bool int - sample implementation TBD + + vmrghw r,b,a + + + + + vmrglw r,a,b + @@ -10223,7 +10518,14 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector signed int - sample implementation TBD + + vmrghw r,b,a + + + + + vmrglw r,a,b + @@ -10240,7 +10542,14 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector unsigned int - sample implementation TBD + + vmrghw r,b,a + + + + + vmrglw r,a,b + @@ -10257,7 +10566,14 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector bool long long - sample implementation TBD + + xxpermdi r,b,a,0 + + + + + xxpermdi r,a,b,3 + @@ -10274,7 +10590,14 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector signed long long - sample implementation TBD + + xxpermdi r,b,a,0 + + + + + xxpermdi r,a,b,3 + @@ -10291,7 +10614,14 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector unsigned long long - sample implementation TBD + + xxpermdi r,b,a,0 + + + + + xxpermdi r,a,b,3 + @@ -10308,7 +10638,14 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector pixel - sample implementation TBD + + vmrghh r,b,a + + + + + vmrglh r,a,b + @@ -10325,7 +10662,14 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector bool short - sample implementation TBD + + vmrghh r,b,a + + + + + vmrglh r,a,b + @@ -10342,7 +10686,14 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector signed short - sample implementation TBD + + vmrghh r,b,a + + + + + vmrglh r,a,b + @@ -10359,7 +10710,14 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector unsigned short - sample implementation TBD + + vmrghh r,b,a + + + + + vmrglh r,a,b + @@ -10376,7 +10734,14 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector double - sample implementation TBD + + xxpermdi r,b,a,0 + + + + + xxpermdi r,a,b,3 + @@ -10393,7 +10758,14 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector float - sample implementation TBD + + vmrghw r,b,a + + + + + vmrglw r,a,b + @@ -10410,10 +10782,17 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector _Float16 - sample implementation TBD + + vmrghh r,b,a + + + + + vmrglh r,a,b + - ISA 3.0 or later + Deferred @@ -10425,47 +10804,55 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vec_mergeo - Vector ... Spelled Out Name TBD + Vector Merge Odd - r = vec_mergeo (ARG1, ARG2) + r = vec_mergeo (a, b) Purpose: - Merges the odd-numbered halves of two vectors. + Merges the odd-numbered values from two vectors. - Result value: The odd-numbered elements of ARG1 are stored in the even-numbered elements of the result. -The odd-numbered elements of ARG2 are stored in the - odd-numbered elements of the result. + Result value: The odd-numbered + elements of a are stored into the + even-numbered elements of r. The + odd-numbered elements of b are stored + into the odd-numbered elements of r. Endian considerations: - None. + The element numbering within a register is left-to-right for big-endian + targets, and right-to-left for little-endian targets.
Supported type signatures for vec_mergeo - + + - + r - + - ARG1 + a - + - ARG2 + b - Example Implementation + Example LE Implementation + + + Example BE Implementation @@ -10481,7 +10868,14 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector bool int - sample implementation TBD + + vmrgew r,b,a + + + + + vmrgow r,a,b + @@ -10495,7 +10889,14 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector signed int - sample implementation TBD + + vmrgew r,b,a + + + + + vmrgow r,a,b + @@ -10509,7 +10910,14 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector unsigned int - sample implementation TBD + + vmrgew r,b,a + + + + + vmrgow r,a,b + @@ -10523,7 +10931,14 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector bool long long - sample implementation TBD + + xxpermdi r,b,a,0 + + + + + xxpermdi r,a,b,3 + @@ -10537,7 +10952,14 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector signed long long - sample implementation TBD + + xxpermdi r,b,a,0 + + + + + xxpermdi r,a,b,3 + @@ -10551,7 +10973,14 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector unsigned long long - sample implementation TBD + + xxpermdi r,b,a,0 + + + + + xxpermdi r,a,b,3 + @@ -10565,7 +10994,14 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector double - sample implementation TBD + + xxpermdi r,b,a,0 + + + + + xxpermdi r,a,b,3 + @@ -10579,7 +11015,14 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector float - sample implementation TBD + + vmrgew r,b,a + + + + + vmrgow r,a,b + @@ -10591,15 +11034,19 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vec_min - Vector ... Spelled Out Name TBD + Vector Minimum - r = vec_min (ARG1, ARG2) + r = vec_min (a, b) Purpose: - Returns a vector containing the minimum value from each set of corresponding elements of the given vectors. + Returns a vector containing the minimum value from each set of + corresponding elements of the source vectors. - Result value: The value of each element of the result is the minimum of the values of the corresponding elements of ARG1 and ARG2. + Result value: The value of each + element of r is the minimum of the + values of the corresponding elements of a and b. Endian considerations: None. @@ -10620,12 +11067,12 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - ARG1 + a - ARG2 + b @@ -10645,7 +11092,9 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector signed char - sample implementation TBD + + vminsb r,a,b + @@ -10659,7 +11108,9 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector unsigned char - sample implementation TBD + + vminub r,a,b + @@ -10673,7 +11124,9 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector signed int - sample implementation TBD + + vminsw r,a,b + @@ -10687,7 +11140,9 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector unsigned int - sample implementation TBD + + vminuw r,a,b + @@ -10701,7 +11156,9 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector signed long long - sample implementation TBD + + vminsd r,a,b + @@ -10715,7 +11172,9 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector unsigned long long - sample implementation TBD + + vminud r,a,b + @@ -10729,7 +11188,9 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector signed short - sample implementation TBD + + vminsh r,a,b + @@ -10743,7 +11204,9 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector unsigned short - sample implementation TBD + + vminuh r,a,b + @@ -10757,7 +11220,9 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector double - sample implementation TBD + + xvmindp r,a,b + @@ -10771,7 +11236,9 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector float - sample implementation TBD + + xvminsp r,a,b + @@ -10783,15 +11250,24 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vec_mradds - Vector ... Spelled Out Name TBD + Vector Multiply-High Round and Add Saturated - r = vec_mradds (ARG1, ARG2, ARG3) + r = vec_mradds (a, b, c) Purpose: - Returns a vector containing the results of performing a saturated multiply-high-round-and-add operation for each corresponding set of elements of the given vectors. - - Result value: For each element of the result, the value is produced in the following way: The values of the corresponding elements of ARG1 and ARG2 are multiplied and rounded such that the 15 least-significant bits are 0. The value of the 17 most-significant bits of this rounded product is then added, using 16-bit-saturated addition, to the value of the corresponding element of ARG3. + Returns a vector containing the results of performing a saturated + multiply-high-round-and-add operation for each corresponding set of + elements of the source vectors. + + Result value: The value of each + element of r is produced as follows. + The values of the corresponding elements of a and b are + multiplied and rounded such that the 15 least-significant bits are 0. + The value of the 17 most-significant bits of this rounded product is + then added, using 16-bit-saturated addition, to the value of the + corresponding element of c. Endian considerations: None. @@ -10806,24 +11282,24 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - + r - + - ARG1 + a - + - ARG2 + b - + - ARG3 + c @@ -10846,7 +11322,9 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector signed short - sample implementation TBD + + vmhraddshs r,a,b,c + @@ -10858,15 +11336,21 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vec_msub - Vector ... Spelled Out Name TBD + Vector Multiply-Subtract - r = vec_msub (ARG1, ARG2, ARG3) + r = vec_msub (a, b, c) Purpose: - Returns a vector containing the results of performing a multiply-subtract operation using the given vectors. + Returns a vector containing the results of performing a multiply-subtract + operation using the source vectors. - Result value: This function multiplies each element in ARG1 by the corresponding element in ARG2 and then subtracts the corresponding element in ARG3 from the result. + Result value: Each element of + r is produced by multiplying the + corresponding element of a by the + corresponding element of b and then + subtracting the corresponding element of c. Endian considerations: None. @@ -10881,24 +11365,24 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - + r - + - ARG1 + a - + - ARG2 + b - + - ARG3 + c @@ -10921,7 +11405,9 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector double - sample implementation TBD + + xvmsubmdp r/a,b,c + @@ -10938,7 +11424,9 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector float - sample implementation TBD + + xvmsubmsp r/a,b,c + @@ -10950,15 +11438,28 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vec_msum - Vector ... Spelled Out Name TBD + Vector Multiply-Sum - r = vec_msum (ARG1, ARG2, ARG3) + r = vec_msum (a, b, c) Purpose: - Returns a vector containing the results of performing a multiply-sum operation using the given vectors. - - Result value: Assume that the elements of each vector are numbered beginning with 0. If ARG1 is a vector signed char or a vector unsigned char vector, then let m be 4. Otherwise, let m be 2. For each element n of the result vector, the value is obtained in the following way: For p = mn to mn + m – 1, multiply element p of ARG1 by element p of ARG2. Add the sum of these products to element n of ARG3. All additions are performed using 32-bit modular arithmetic. + Returns a vector containing the results of performing a multiply-sum + operation using the source vectors. + + Result value: Assume that the + elements of each vector are numbered beginning with 0. If + a is a vector signed char or a vector + unsigned char vector, then let m be 4. Otherwise, + let m be 2. The value of each element + n of r is obtained + as follows. For p = mn to + mn + m – 1, multiply + element p of a + by element p of b. + Add the sum of these products to element n of + c. All additions are performed using + 32-bit modular arithmetic. Endian considerations: None. @@ -10973,24 +11474,24 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - + r - + - ARG1 + a - + - ARG2 + b - + - ARG3 + c @@ -11013,7 +11514,9 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector signed int - sample implementation TBD + + vmsummbm r,a,b,c + @@ -11030,7 +11533,9 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector signed int - sample implementation TBD + + vmsumshm r,a,b,c + @@ -11047,7 +11552,9 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector unsigned int - sample implementation TBD + + vmsumubm r,a,b,c + @@ -11064,7 +11571,9 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector unsigned int - sample implementation TBD + + vmsumuhm r,a,b,c + @@ -11076,15 +11585,25 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vec_msums - Vector ... Spelled Out Name TBD + Vector Multiply-Sum Saturated - r = vec_msums (ARG1, ARG2, ARG3) + r = vec_msums (a, b, c) Purpose: - Returns a vector containing the results of performing a saturated multiply-sum operation using the given vectors. - - Result value: Assume that the elements of each vector are numbered beginning with 0. For each element n of the result vector, the value is obtained in the following way: For p = 2n to 2n+1, multiply element p of ARG1 by element p of ARG2. Add the sum of these products to element n of ARG3. All additions are performed using 32-bit saturated arithmetic. + Returns a vector containing the results of performing a saturated + multiply-sum operation using the source vectors. + + Result value: Assume that the + elements of each vector are numbered beginning with 0. The value of each + element n of r + is obtained as follows. For p = + 2n to 2n+1, multiply element + p of a by element + p of b. Add the + sum of these products to element n of + c. All additions are performed using + 32-bit saturated arithmetic. Endian considerations: None. @@ -11099,24 +11618,24 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - + r - + - ARG1 + a - + - ARG2 + b - + - ARG3 + c @@ -11139,7 +11658,9 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector signed int - sample implementation TBD + + vmsumshs r,a,b,c + @@ -11156,7 +11677,9 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector unsigned int - sample implementation TBD + + vmsumuhs r,a,b,c + @@ -11168,18 +11691,40 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vec_mul - Vector ... Spelled Out Name TBD + Vector Multiply - r = vec_mul (ARG1, ARG2) + r = vec_mul (a, b) Purpose: - Returns a vector containing the results of performing a multiply operation using the given vectors. + Returns a vector containing the results of performing a multiply + operation using the source vectors. - Result value: This function multiplies corresponding elements in the given vectors and then assigns the result to corresponding elements in the result vector. + Result value: Each element of + r receives the product of + the corresponding elements of a and + b. Endian considerations: None. + Notes: + + + + The example implementation for vector char assumes that the + address of the permute control vector for the vperm instruction + is in a register identified by pcv. Its value is + {1,17,3,19,5,21,7,23,9,25,11,27,13,29,15,31}. + + + + + There are currently no vector instructions to support vector long + long multiplication, so the compiler must perform two scalar + multiplies on the vector elements for this case. + + +
Supported type signatures for vec_mul @@ -11197,12 +11742,12 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - ARG1 + a - ARG2 + b @@ -11222,7 +11767,12 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector signed char - sample implementation TBD + + vmulesb t,a,b + vmulosb u,a,b + lxvw4x v,0,pcv + vperm r,t,u,v + @@ -11236,7 +11786,12 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector unsigned char - sample implementation TBD + + vmulesb t,a,b + vmulosb u,a,b + lxvw4x v,0,pcv + vperm r,t,u,v + @@ -11250,7 +11805,9 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector signed int - sample implementation TBD + + vmuluwm r,a,b + @@ -11264,7 +11821,9 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector unsigned int - sample implementation TBD + + vmuluwm r,a,b + @@ -11278,7 +11837,9 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector signed long long - sample implementation TBD + + [scalarized] + @@ -11292,7 +11853,9 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector unsigned long long - sample implementation TBD + + [scalarized] + @@ -11306,7 +11869,10 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector signed short - sample implementation TBD + + xxspltib t,0 + vmladduhm r,a,b,t + @@ -11320,7 +11886,10 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector unsigned short - sample implementation TBD + + xxspltib t,0 + vmladduhm r,a,b,t + @@ -11334,7 +11903,9 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector double - sample implementation TBD + + xvmuldp r,a,b + @@ -11348,7 +11919,9 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector float - sample implementation TBD + + xvmulsp r,a,b +