google
diff --git a/‎lib/Transforms/LowerPolynomialEval/BUILD‎
Lines changed: 2 additions & 0 deletions b/‎lib/Transforms/LowerPolynomialEval/BUILD‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎lib/Transforms/LowerPolynomialEval/LowerPolynomialEval.cpp‎
Lines changed: 24 additions & 17 deletions b/‎lib/Transforms/LowerPolynomialEval/LowerPolynomialEval.cpp‎
Lines changed: 24 additions & 17 deletions
diff --git a/‎lib/Transforms/LowerPolynomialEval/LowerPolynomialEval.td‎
Lines changed: 4 additions & 0 deletions b/‎lib/Transforms/LowerPolynomialEval/LowerPolynomialEval.td‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎lib/Transforms/LowerPolynomialEval/Patterns.cpp‎
Lines changed: 37 additions & 142 deletions b/‎lib/Transforms/LowerPolynomialEval/Patterns.cpp‎
Lines changed: 37 additions & 142 deletions
diff --git a/‎lib/Transforms/LowerPolynomialEval/Patterns.h‎
Lines changed: 18 additions & 3 deletions b/‎lib/Transforms/LowerPolynomialEval/Patterns.h‎
Lines changed: 18 additions & 3 deletions
@@ -34,6 +34,8 @@ cc_library(
         "@heir//lib/Utils:MathUtils",
         "@heir//lib/Utils/Polynomial",
         "@heir//lib/Utils/Polynomial:ChebyshevPatersonStockmeyer",
+        "@heir//lib/Utils/Polynomial:Horner",
+        "@heir//lib/Utils/Polynomial:PatersonStockmeyer",
         "@llvm-project//llvm:Support",
         "@llvm-project//mlir:ArithDialect",
         "@llvm-project//mlir:IR",
 
@@ -25,23 +25,30 @@ struct LowerPolynomialEval
     MLIRContext* context = &getContext();
     RewritePatternSet patterns(context);
 
-    if (method == PolynomialApproximationMethod::Automatic) {
-      patterns.add<LowerViaHorner, LowerViaPatersonStockmeyerChebyshev,
-                   LowerViaPatersonStockmeyerMonomial>(context,
-                                                       /*force=*/false);
-    } else if (method == PolynomialApproximationMethod::Horner) {
-      patterns.add<LowerViaHorner>(context, /*force=*/true);
-    } else if (method == PolynomialApproximationMethod::PatersonStockmeyer) {
-      patterns.add<LowerViaPatersonStockmeyerMonomial>(context,
-                                                       /*force=*/true);
-    } else if (method ==
-               PolynomialApproximationMethod::PatersonStockmeyerChebyshev) {
-      patterns.add<LowerViaPatersonStockmeyerChebyshev>(context,
-                                                        /*force=*/true);
-    } else {
-      getOperation()->emitError() << "Unknown lowering method: " << method;
-      signalPassFailure();
-      return;
+    switch (method) {
+      case PolynomialApproximationMethod::Automatic:
+        patterns.add<LowerViaHorner, LowerViaPatersonStockmeyerMonomial>(
+            context, /*force=*/false);
+        patterns.add<LowerViaPatersonStockmeyerChebyshev>(
+            context,
+            /*force=*/false, minCoefficientThreshold);
+        break;
+      case PolynomialApproximationMethod::Horner:
+        patterns.add<LowerViaHorner>(context, /*force=*/true);
+        break;
+      case PolynomialApproximationMethod::PatersonStockmeyer:
+        patterns.add<LowerViaPatersonStockmeyerMonomial>(context,
+                                                         /*force=*/true);
+        break;
+      case PolynomialApproximationMethod::PatersonStockmeyerChebyshev:
+        patterns.add<LowerViaPatersonStockmeyerChebyshev>(
+            context,
+            /*force=*/true, minCoefficientThreshold);
+        break;
+      default:
+        getOperation()->emitError() << "Unknown lowering method: " << method;
+        signalPassFailure();
+        return;
     }
 
     walkAndApplyPatterns(getOperation(), std::move(patterns));
 
@@ -44,6 +44,10 @@ def LowerPolynomialEval : Pass<"lower-polynomial-eval"> {
                 clEnumValN(mlir::heir::PolynomialApproximationMethod::PatersonStockmeyerChebyshev,
                            "pscheb", "Paterson-Stockmeyer method (Chebyshev basis)")
           )}]>,
+    Option<"minCoefficientThreshold", "min-coefficient-threshold", "double",
+          /*default=*/"1e-12",
+          "Minimum threshold for coefficients to be included in the lowered polynomial. "
+          "Coefficients with absolute value below this threshold will be dropped.">,
   ];
 }
 
 
@@ -1,27 +1,22 @@
 #include "lib/Transforms/LowerPolynomialEval/Patterns.h"
 
-#include <algorithm>
-#include <cmath>
 #include <cstdint>
-#include <vector>
 
 #include "lib/Dialect/Polynomial/IR/PolynomialAttributes.h"
 #include "lib/Dialect/Polynomial/IR/PolynomialOps.h"
 #include "lib/Kernel/ArithmeticDag.h"
 #include "lib/Kernel/IRMaterializingVisitor.h"
-#include "lib/Kernel/KernelImplementation.h"
-#include "lib/Utils/MathUtils.h"
 #include "lib/Utils/Polynomial/ChebyshevPatersonStockmeyer.h"
+#include "lib/Utils/Polynomial/Horner.h"
+#include "lib/Utils/Polynomial/PatersonStockmeyer.h"
 #include "lib/Utils/Polynomial/Polynomial.h"
 #include "lib/Utils/Utils.h"
-#include "llvm/include/llvm/ADT/SmallVectorExtras.h"   // from @llvm-project
-#include "llvm/include/llvm/ADT/TypeSwitch.h"          // from @llvm-project
-#include "llvm/include/llvm/Support/Casting.h"         // from @llvm-project
-#include "llvm/include/llvm/Support/Debug.h"           // from @llvm-project
-#include "mlir/include/mlir/Dialect/Arith/IR/Arith.h"  // from @llvm-project
-#include "mlir/include/mlir/IR/Attributes.h"           // from @llvm-project
-#include "mlir/include/mlir/IR/Builders.h"             // from @llvm-project
-#include "mlir/include/mlir/IR/BuiltinAttributeInterfaces.h"  // from @llvm-project
+#include "llvm/include/llvm/ADT/SmallVectorExtras.h"     // from @llvm-project
+#include "llvm/include/llvm/Support/Casting.h"           // from @llvm-project
+#include "llvm/include/llvm/Support/Debug.h"             // from @llvm-project
+#include "mlir/include/mlir/Dialect/Arith/IR/Arith.h"    // from @llvm-project
+#include "mlir/include/mlir/IR/Attributes.h"             // from @llvm-project
+#include "mlir/include/mlir/IR/Builders.h"               // from @llvm-project
 #include "mlir/include/mlir/IR/BuiltinAttributes.h"      // from @llvm-project
 #include "mlir/include/mlir/IR/BuiltinTypeInterfaces.h"  // from @llvm-project
 #include "mlir/include/mlir/IR/ImplicitLocOpBuilder.h"   // from @llvm-project
@@ -30,7 +25,6 @@
 #include "mlir/include/mlir/IR/Types.h"                  // from @llvm-project
 #include "mlir/include/mlir/IR/Value.h"                  // from @llvm-project
 #include "mlir/include/mlir/Support/LLVM.h"              // from @llvm-project
-#include "mlir/include/mlir/Transforms/Passes.h"         // from @llvm-project
 
 #define DEBUG_TYPE "lower-polynomial-eval"
 
@@ -46,12 +40,6 @@ using polynomial::TypedFloatPolynomialAttr;
 
 LogicalResult LowerViaHorner::matchAndRewrite(EvalOp op,
                                               PatternRewriter& rewriter) const {
-  Type evaluatedType = op.getValue().getType();
-  ImplicitLocOpBuilder b(op.getLoc(), rewriter);
-  b.setInsertionPoint(op);
-
-  LLVM_DEBUG(llvm::dbgs() << "evaluatedType: " << evaluatedType << "\n");
-
   auto attr =
       dyn_cast<polynomial::TypedFloatPolynomialAttr>(op.getPolynomialAttr());
   if (!attr) return failure();
@@ -62,154 +50,61 @@ LogicalResult LowerViaHorner::matchAndRewrite(EvalOp op,
   const int degreeThreshold = 5;
   if (!shouldForce() && maxDegree > degreeThreshold) return failure();
 
+  // Convert coefficient map to std::map<int64_t, double>
   auto monomialMap = attr.getValue().getPolynomial().getCoeffMap();
-  DenseMap<int64_t, TypedAttr> attributeMap;
+  std::map<int64_t, double> coefficients;
   for (auto& [key, monomial] : monomialMap) {
-    attributeMap.insert(
-        {key, getScalarOrDenseAttr(evaluatedType, monomial.getCoefficient())});
+    double coeffValue = monomial.getCoefficient().convertToDouble();
+    coefficients[key] = coeffValue;
   }
 
-  // Start with the coefficient of the highest degree term
-  Value result =
-      arith::ConstantOp::create(b, evaluatedType, attributeMap[maxDegree]);
-
-  // Apply Horner's method, accounting for possible missing terms
-  auto x = op.getOperand();
-  for (int64_t i = maxDegree - 1; i >= 0; i--) {
-    // Multiply by x
-    result = arith::MulFOp::create(b, result, x);
+  // Create ArithmeticDag nodes
+  auto xNode =
+      kernel::ArithmeticDagNode<kernel::SSAValue>::leaf(op.getOperand());
+  auto resultNode =
+      polynomial::hornerMonomialPolynomialEvaluation(xNode, coefficients);
 
-    // Add coefficient if this term exists, otherwise continue
-    if (attributeMap.find(i) != attributeMap.end()) {
-      auto coeffConst =
-          arith::ConstantOp::create(b, evaluatedType, attributeMap.at(i));
-      result = arith::AddFOp::create(b, result, coeffConst);
-    }
-  }
+  // Use IRMaterializingVisitor to convert to MLIR
+  ImplicitLocOpBuilder b(op.getLoc(), rewriter);
+  kernel::IRMaterializingVisitor visitor(b, op.getValue().getType());
+  Value finalOutput = resultNode->visit(visitor);
 
-  rewriter.replaceOp(op, result);
+  rewriter.replaceOp(op, finalOutput);
   return success();
 }
 
 LogicalResult LowerViaPatersonStockmeyerMonomial::matchAndRewrite(
     EvalOp op, PatternRewriter& rewriter) const {
-  Type evaluatedType = op.getValue().getType();
-  ImplicitLocOpBuilder b(op.getLoc(), rewriter);
-  b.setInsertionPoint(op);
-
   auto attr =
       dyn_cast<polynomial::TypedFloatPolynomialAttr>(op.getPolynomialAttr());
   if (!attr) return failure();
 
   FloatPolynomial polynomial = attr.getValue().getPolynomial();
   auto terms = polynomial.getTerms();
-
   int64_t maxDegree = terms.back().getExponent().getSExtValue();
   const int degreeThreshold = 5;
   if (!shouldForce() && maxDegree > degreeThreshold) return failure();
 
+  // Convert coefficient map to std::map<int64_t, double>
   auto monomialMap = attr.getValue().getPolynomial().getCoeffMap();
-  DenseMap<int64_t, TypedAttr> attributeMap;
+  std::map<int64_t, double> coefficients;
   for (auto& [key, monomial] : monomialMap) {
-    attributeMap[key] =
-        getScalarOrDenseAttr(evaluatedType, monomial.getCoefficient());
+    double coeffValue = monomial.getCoefficient().convertToDouble();
+    coefficients[key] = coeffValue;
   }
 
-  // Choose k optimally - sqrt of maxDegree is typically a good choice
-  int64_t k = std::max(static_cast<int64_t>(std::ceil(std::sqrt(maxDegree))),
-                       static_cast<int64_t>(1));
-
-  // Precompute x^1, x^2, ..., x^k
-  Value x = op.getOperand();
-  std::vector<Value> xPowers(k + 1);
-  xPowers[0] =
-      arith::ConstantOp::create(b, evaluatedType, b.getOneAttr(evaluatedType));
-  xPowers[1] = x;
-  for (int64_t i = 2; i <= k; i++) {
-    if (i % 2 == 0) {
-      // x^{2k} = (x^{k})^2
-      xPowers[i] =
-          arith::MulFOp::create(b, xPowers[i / 2], xPowers[i / 2]).getResult();
-    } else {
-      // x^{2k+1} = x^{k}x^{k+1}
-      xPowers[i] = arith::MulFOp::create(b, xPowers[i / 2], xPowers[i / 2 + 1])
-                       .getResult();
-    }
-  }
-
-  // Number of chunks we'll need
-  int64_t m =
-      static_cast<int64_t>(std::ceil(static_cast<double>(maxDegree + 1) / k));
-  std::vector<Value> chunkValues(m, nullptr);
-
-  for (int64_t i = 0; i < m; i++) {
-    // Start with coefficient of degree (i+1)*k-1, if present
-    int64_t highestDegreeInChunk = std::min((i + 1) * k - 1, maxDegree);
-    int64_t lowestDegreeInChunk = i * k;
+  // Create ArithmeticDag nodes
+  auto xNode =
+      kernel::ArithmeticDagNode<kernel::SSAValue>::leaf(op.getOperand());
+  auto resultNode = polynomial::patersonStockmeyerMonomialPolynomialEvaluation(
+      xNode, coefficients);
 
-    Value chunkValue = nullptr;
-    bool hasTerms = false;
-
-    for (int64_t j = lowestDegreeInChunk; j <= highestDegreeInChunk; j++) {
-      if (attributeMap.count(j)) {
-        // Get the power index relative to the chunk's starting point
-        int64_t powerIndex = j - lowestDegreeInChunk;
-
-        Value coeff =
-            arith::ConstantOp::create(b, evaluatedType, attributeMap[j]);
-        Value term;
-
-        if (powerIndex == 0) {
-          term = coeff;  // x^0 = 1
-        } else {
-          term = arith::MulFOp::create(b, coeff, xPowers[powerIndex]);
-        }
-
-        if (!hasTerms) {
-          chunkValue = term;
-          hasTerms = true;
-        } else {
-          chunkValue = arith::AddFOp::create(b, chunkValue, term);
-        }
-      }
-    }
-
-    if (hasTerms) {
-      chunkValues[i] = chunkValue;
-    } else {
-      chunkValues[i] = arith::ConstantOp::create(b, evaluatedType,
-                                                 b.getZeroAttr(evaluatedType));
-    }
-  }
-
-  // Combine chunks using Horner's method with x^k
-  Value result = nullptr;
-  bool hasNonEmptyChunk = false;
-
-  for (int64_t i = m - 1; i >= 0; i--) {
-    if (chunkValues[i]) {
-      if (!hasNonEmptyChunk) {
-        // First non-empty chunk encountered
-        result = chunkValues[i];
-        hasNonEmptyChunk = true;
-      } else {
-        // Multiply previous result by x^k and add this chunk
-        result = arith::MulFOp::create(b, result, xPowers[k]);
-        result = arith::AddFOp::create(b, result, chunkValues[i]);
-      }
-    } else if (hasNonEmptyChunk) {
-      // Empty chunk but we have previous chunks
-      result = arith::MulFOp::create(b, result, xPowers[k]);
-    }
-  }
-
-  // Handle the case where no terms were found
-  if (!hasNonEmptyChunk) {
-    result = arith::ConstantOp::create(b, evaluatedType,
-                                       b.getZeroAttr(evaluatedType));
-  }
+  // Use IRMaterializingVisitor to convert to MLIR
+  ImplicitLocOpBuilder b(op.getLoc(), rewriter);
+  kernel::IRMaterializingVisitor visitor(b, op.getValue().getType());
+  Value finalOutput = resultNode->visit(visitor);
 
-  rewriter.replaceOp(op, result);
+  rewriter.replaceOp(op, finalOutput);
   return success();
 }
 
@@ -261,7 +156,7 @@ LogicalResult LowerViaPatersonStockmeyerChebyshev::matchAndRewrite(
   SSAValue xNode(xInput);
 
   auto resultNode = polynomial::patersonStockmeyerChebyshevPolynomialEvaluation(
-      xNode, chebCoeffs);
+      xNode, chebCoeffs, getMinCoefficientThreshold());
 
   IRMaterializingVisitor visitor(b, op.getValue().getType());
   Value finalOutput = resultNode->visit(visitor);
 
@@ -12,7 +12,7 @@ namespace mlir {
 namespace heir {
 
 struct LoweringBase : public OpRewritePattern<polynomial::EvalOp> {
-  LoweringBase(mlir::MLIRContext* context, bool force = false)
+  LoweringBase(MLIRContext* context, bool force = false)
       : mlir::OpRewritePattern<polynomial::EvalOp>(context), force(force) {}
 
   bool shouldForce() const { return force; }
@@ -23,6 +23,21 @@ struct LoweringBase : public OpRewritePattern<polynomial::EvalOp> {
   const bool force;
 };
 
+struct ChebyshevLoweringBase : public LoweringBase {
+  using LoweringBase::LoweringBase;
+
+  ChebyshevLoweringBase(MLIRContext* context, bool force = false,
+                        double minCoefficientThreshold = 1e-12)
+      : LoweringBase(context, force),
+        minCoefficientThreshold(minCoefficientThreshold) {}
+
+  double getMinCoefficientThreshold() const { return minCoefficientThreshold; }
+
+ private:
+  // Minimum threshold for coefficients to be included in the lowered polynomial
+  const double minCoefficientThreshold;
+};
+
 // Lower polynomial.eval that uses a monomial float polynomial to a series of
 // adds and muls via Horner's method. Supports scalar and tensor operands of
 // floating point types.
@@ -46,8 +61,8 @@ struct LowerViaPatersonStockmeyerMonomial : public LoweringBase {
 // Lower polynomial.eval that uses a Chebyshev float polynomial to a series of
 // adds and muls via the Paterson-Stockmeyer method. Supports scalar and tensor
 // operands of floating point types.
-struct LowerViaPatersonStockmeyerChebyshev : public LoweringBase {
-  using LoweringBase::LoweringBase;
+struct LowerViaPatersonStockmeyerChebyshev : public ChebyshevLoweringBase {
+  using ChebyshevLoweringBase::ChebyshevLoweringBase;
 
   LogicalResult matchAndRewrite(polynomial::EvalOp op,
                                 PatternRewriter& rewriter) const override;