11#include " lib/Transforms/LowerPolynomialEval/Patterns.h"
22
3- #include < algorithm>
4- #include < cmath>
53#include < cstdint>
6- #include < vector>
74
85#include " lib/Dialect/Polynomial/IR/PolynomialAttributes.h"
96#include " lib/Dialect/Polynomial/IR/PolynomialOps.h"
107#include " lib/Kernel/ArithmeticDag.h"
118#include " lib/Kernel/IRMaterializingVisitor.h"
12- #include " lib/Kernel/KernelImplementation.h"
13- #include " lib/Utils/MathUtils.h"
149#include " lib/Utils/Polynomial/ChebyshevPatersonStockmeyer.h"
10+ #include " lib/Utils/Polynomial/Horner.h"
11+ #include " lib/Utils/Polynomial/PatersonStockmeyer.h"
1512#include " lib/Utils/Polynomial/Polynomial.h"
1613#include " lib/Utils/Utils.h"
17- #include " llvm/include/llvm/ADT/SmallVectorExtras.h" // from @llvm-project
18- #include " llvm/include/llvm/ADT/TypeSwitch.h" // from @llvm-project
19- #include " llvm/include/llvm/Support/Casting.h" // from @llvm-project
20- #include " llvm/include/llvm/Support/Debug.h" // from @llvm-project
21- #include " mlir/include/mlir/Dialect/Arith/IR/Arith.h" // from @llvm-project
22- #include " mlir/include/mlir/IR/Attributes.h" // from @llvm-project
23- #include " mlir/include/mlir/IR/Builders.h" // from @llvm-project
24- #include " mlir/include/mlir/IR/BuiltinAttributeInterfaces.h" // from @llvm-project
14+ #include " llvm/include/llvm/ADT/SmallVectorExtras.h" // from @llvm-project
15+ #include " llvm/include/llvm/Support/Casting.h" // from @llvm-project
16+ #include " llvm/include/llvm/Support/Debug.h" // from @llvm-project
17+ #include " mlir/include/mlir/Dialect/Arith/IR/Arith.h" // from @llvm-project
18+ #include " mlir/include/mlir/IR/Attributes.h" // from @llvm-project
19+ #include " mlir/include/mlir/IR/Builders.h" // from @llvm-project
2520#include " mlir/include/mlir/IR/BuiltinAttributes.h" // from @llvm-project
2621#include " mlir/include/mlir/IR/BuiltinTypeInterfaces.h" // from @llvm-project
2722#include " mlir/include/mlir/IR/ImplicitLocOpBuilder.h" // from @llvm-project
3025#include " mlir/include/mlir/IR/Types.h" // from @llvm-project
3126#include " mlir/include/mlir/IR/Value.h" // from @llvm-project
3227#include " mlir/include/mlir/Support/LLVM.h" // from @llvm-project
33- #include " mlir/include/mlir/Transforms/Passes.h" // from @llvm-project
3428
3529#define DEBUG_TYPE " lower-polynomial-eval"
3630
@@ -46,12 +40,6 @@ using polynomial::TypedFloatPolynomialAttr;
4640
4741LogicalResult LowerViaHorner::matchAndRewrite (EvalOp op,
4842 PatternRewriter& rewriter) const {
49- Type evaluatedType = op.getValue ().getType ();
50- ImplicitLocOpBuilder b (op.getLoc (), rewriter);
51- b.setInsertionPoint (op);
52-
53- LLVM_DEBUG (llvm::dbgs () << " evaluatedType: " << evaluatedType << " \n " );
54-
5543 auto attr =
5644 dyn_cast<polynomial::TypedFloatPolynomialAttr>(op.getPolynomialAttr ());
5745 if (!attr) return failure ();
@@ -62,154 +50,61 @@ LogicalResult LowerViaHorner::matchAndRewrite(EvalOp op,
6250 const int degreeThreshold = 5 ;
6351 if (!shouldForce () && maxDegree > degreeThreshold) return failure ();
6452
53+ // Convert coefficient map to std::map<int64_t, double>
6554 auto monomialMap = attr.getValue ().getPolynomial ().getCoeffMap ();
66- DenseMap <int64_t , TypedAttr> attributeMap ;
55+ std::map <int64_t , double > coefficients ;
6756 for (auto & [key, monomial] : monomialMap) {
68- attributeMap. insert (
69- { key, getScalarOrDenseAttr (evaluatedType, monomial. getCoefficient ())}) ;
57+ double coeffValue = monomial. getCoefficient (). convertToDouble ();
58+ coefficients[ key] = coeffValue ;
7059 }
7160
72- // Start with the coefficient of the highest degree term
73- Value result =
74- arith::ConstantOp::create (b, evaluatedType, attributeMap[maxDegree]);
75-
76- // Apply Horner's method, accounting for possible missing terms
77- auto x = op.getOperand ();
78- for (int64_t i = maxDegree - 1 ; i >= 0 ; i--) {
79- // Multiply by x
80- result = arith::MulFOp::create (b, result, x);
61+ // Create ArithmeticDag nodes
62+ auto xNode =
63+ kernel::ArithmeticDagNode<kernel::SSAValue>::leaf (op.getOperand ());
64+ auto resultNode =
65+ polynomial::hornerMonomialPolynomialEvaluation (xNode, coefficients);
8166
82- // Add coefficient if this term exists, otherwise continue
83- if (attributeMap.find (i) != attributeMap.end ()) {
84- auto coeffConst =
85- arith::ConstantOp::create (b, evaluatedType, attributeMap.at (i));
86- result = arith::AddFOp::create (b, result, coeffConst);
87- }
88- }
67+ // Use IRMaterializingVisitor to convert to MLIR
68+ ImplicitLocOpBuilder b (op.getLoc (), rewriter);
69+ kernel::IRMaterializingVisitor visitor (b, op.getValue ().getType ());
70+ Value finalOutput = resultNode->visit (visitor);
8971
90- rewriter.replaceOp (op, result );
72+ rewriter.replaceOp (op, finalOutput );
9173 return success ();
9274}
9375
9476LogicalResult LowerViaPatersonStockmeyerMonomial::matchAndRewrite (
9577 EvalOp op, PatternRewriter& rewriter) const {
96- Type evaluatedType = op.getValue ().getType ();
97- ImplicitLocOpBuilder b (op.getLoc (), rewriter);
98- b.setInsertionPoint (op);
99-
10078 auto attr =
10179 dyn_cast<polynomial::TypedFloatPolynomialAttr>(op.getPolynomialAttr ());
10280 if (!attr) return failure ();
10381
10482 FloatPolynomial polynomial = attr.getValue ().getPolynomial ();
10583 auto terms = polynomial.getTerms ();
106-
10784 int64_t maxDegree = terms.back ().getExponent ().getSExtValue ();
10885 const int degreeThreshold = 5 ;
10986 if (!shouldForce () && maxDegree > degreeThreshold) return failure ();
11087
88+ // Convert coefficient map to std::map<int64_t, double>
11189 auto monomialMap = attr.getValue ().getPolynomial ().getCoeffMap ();
112- DenseMap <int64_t , TypedAttr> attributeMap ;
90+ std::map <int64_t , double > coefficients ;
11391 for (auto & [key, monomial] : monomialMap) {
114- attributeMap[key] =
115- getScalarOrDenseAttr (evaluatedType, monomial. getCoefficient ()) ;
92+ double coeffValue = monomial. getCoefficient (). convertToDouble ();
93+ coefficients[key] = coeffValue ;
11694 }
11795
118- // Choose k optimally - sqrt of maxDegree is typically a good choice
119- int64_t k = std::max (static_cast <int64_t >(std::ceil (std::sqrt (maxDegree))),
120- static_cast <int64_t >(1 ));
121-
122- // Precompute x^1, x^2, ..., x^k
123- Value x = op.getOperand ();
124- std::vector<Value> xPowers (k + 1 );
125- xPowers[0 ] =
126- arith::ConstantOp::create (b, evaluatedType, b.getOneAttr (evaluatedType));
127- xPowers[1 ] = x;
128- for (int64_t i = 2 ; i <= k; i++) {
129- if (i % 2 == 0 ) {
130- // x^{2k} = (x^{k})^2
131- xPowers[i] =
132- arith::MulFOp::create (b, xPowers[i / 2 ], xPowers[i / 2 ]).getResult ();
133- } else {
134- // x^{2k+1} = x^{k}x^{k+1}
135- xPowers[i] = arith::MulFOp::create (b, xPowers[i / 2 ], xPowers[i / 2 + 1 ])
136- .getResult ();
137- }
138- }
139-
140- // Number of chunks we'll need
141- int64_t m =
142- static_cast <int64_t >(std::ceil (static_cast <double >(maxDegree + 1 ) / k));
143- std::vector<Value> chunkValues (m, nullptr );
144-
145- for (int64_t i = 0 ; i < m; i++) {
146- // Start with coefficient of degree (i+1)*k-1, if present
147- int64_t highestDegreeInChunk = std::min ((i + 1 ) * k - 1 , maxDegree);
148- int64_t lowestDegreeInChunk = i * k;
96+ // Create ArithmeticDag nodes
97+ auto xNode =
98+ kernel::ArithmeticDagNode<kernel::SSAValue>::leaf (op.getOperand ());
99+ auto resultNode = polynomial::patersonStockmeyerMonomialPolynomialEvaluation (
100+ xNode, coefficients);
149101
150- Value chunkValue = nullptr ;
151- bool hasTerms = false ;
152-
153- for (int64_t j = lowestDegreeInChunk; j <= highestDegreeInChunk; j++) {
154- if (attributeMap.count (j)) {
155- // Get the power index relative to the chunk's starting point
156- int64_t powerIndex = j - lowestDegreeInChunk;
157-
158- Value coeff =
159- arith::ConstantOp::create (b, evaluatedType, attributeMap[j]);
160- Value term;
161-
162- if (powerIndex == 0 ) {
163- term = coeff; // x^0 = 1
164- } else {
165- term = arith::MulFOp::create (b, coeff, xPowers[powerIndex]);
166- }
167-
168- if (!hasTerms) {
169- chunkValue = term;
170- hasTerms = true ;
171- } else {
172- chunkValue = arith::AddFOp::create (b, chunkValue, term);
173- }
174- }
175- }
176-
177- if (hasTerms) {
178- chunkValues[i] = chunkValue;
179- } else {
180- chunkValues[i] = arith::ConstantOp::create (b, evaluatedType,
181- b.getZeroAttr (evaluatedType));
182- }
183- }
184-
185- // Combine chunks using Horner's method with x^k
186- Value result = nullptr ;
187- bool hasNonEmptyChunk = false ;
188-
189- for (int64_t i = m - 1 ; i >= 0 ; i--) {
190- if (chunkValues[i]) {
191- if (!hasNonEmptyChunk) {
192- // First non-empty chunk encountered
193- result = chunkValues[i];
194- hasNonEmptyChunk = true ;
195- } else {
196- // Multiply previous result by x^k and add this chunk
197- result = arith::MulFOp::create (b, result, xPowers[k]);
198- result = arith::AddFOp::create (b, result, chunkValues[i]);
199- }
200- } else if (hasNonEmptyChunk) {
201- // Empty chunk but we have previous chunks
202- result = arith::MulFOp::create (b, result, xPowers[k]);
203- }
204- }
205-
206- // Handle the case where no terms were found
207- if (!hasNonEmptyChunk) {
208- result = arith::ConstantOp::create (b, evaluatedType,
209- b.getZeroAttr (evaluatedType));
210- }
102+ // Use IRMaterializingVisitor to convert to MLIR
103+ ImplicitLocOpBuilder b (op.getLoc (), rewriter);
104+ kernel::IRMaterializingVisitor visitor (b, op.getValue ().getType ());
105+ Value finalOutput = resultNode->visit (visitor);
211106
212- rewriter.replaceOp (op, result );
107+ rewriter.replaceOp (op, finalOutput );
213108 return success ();
214109}
215110
@@ -261,7 +156,7 @@ LogicalResult LowerViaPatersonStockmeyerChebyshev::matchAndRewrite(
261156 SSAValue xNode (xInput);
262157
263158 auto resultNode = polynomial::patersonStockmeyerChebyshevPolynomialEvaluation (
264- xNode, chebCoeffs);
159+ xNode, chebCoeffs, getMinCoefficientThreshold () );
265160
266161 IRMaterializingVisitor visitor (b, op.getValue ().getType ());
267162 Value finalOutput = resultNode->visit (visitor);
0 commit comments