added Float64

phu0ngng · phu0ngng · commit 1e52343e41e9 · 2025-11-24T15:24:17.000-08:00
Signed-off-by: Phuong Nguyen &lt;phuonguyen@nvidia.com&gt;
diff --git a/tests/cpp/test_common.cu b/tests/cpp/test_common.cu
@@ -58,6 +58,7 @@ const std::string &typeName(DType type) {
     {DType::kInt32, "int32"},
     {DType::kInt64, "int64"},
     {DType::kFloat32, "float32"},
+    {DType::kFloat64, "float64"},
     {DType::kFloat16, "float16"},
     {DType::kBFloat16, "bfloat16"},
     {DType::kFloat8E4M3, "float8e4m3"},
diff --git a/tests/cpp/test_common.h b/tests/cpp/test_common.h
@@ -84,9 +84,9 @@ struct BitsNumber {
 template <typename T>
 struct TypeInfo {
 #if FP4_TYPE_SUPPORTED
-    using types = std::tuple<byte, int16, int32, int64, fp32, fp16, bf16, fp8e4m3, fp8e5m2, fp8e8m0, fp4e2m1>;
+    using types = std::tuple<byte, int16, int32, int64, fp32, fp16, bf16, fp8e4m3, fp8e5m2, fp8e8m0, fp4e2m1, fp64>;
 #else
-    using types = std::tuple<byte, int16, int32, int64, fp32, fp16, bf16, fp8e4m3, fp8e5m2, fp8e8m0>;
+    using types = std::tuple<byte, int16, int32, int64, fp32, fp16, bf16, fp8e4m3, fp8e5m2, fp8e8m0, fp64>;
 #endif
 
     template <typename U, DType current>
diff --git a/transformer_engine/common/common.cu b/transformer_engine/common/common.cu
@@ -33,6 +33,8 @@ cudaDataType_t get_cuda_dtype(const transformer_engine::DType t) {
       return CUDA_R_16F;
     case DType::kFloat32:
       return CUDA_R_32F;
+    case DType::kFloat64:
+      return CUDA_R_64F;
     case DType::kBFloat16:
       return CUDA_R_16BF;
     case DType::kFloat8E4M3:
diff --git a/transformer_engine/common/common.h b/transformer_engine/common/common.h
@@ -321,6 +321,7 @@ using int16 = int16_t;
 using int32 = int32_t;
 using int64 = int64_t;
 using fp32 = float;
+using fp64 = double;
 using fp16 = half;
 using bf16 = nv_bfloat16;
 using fp8e4m3 = __nv_fp8_e4m3;
@@ -349,6 +350,7 @@ TRANSFORMER_ENGINE_TYPE_NAME(int16_t)
 TRANSFORMER_ENGINE_TYPE_NAME(int32_t)
 TRANSFORMER_ENGINE_TYPE_NAME(int64_t)
 TRANSFORMER_ENGINE_TYPE_NAME(float)
+TRANSFORMER_ENGINE_TYPE_NAME(double)
 TRANSFORMER_ENGINE_TYPE_NAME(half)
 TRANSFORMER_ENGINE_TYPE_NAME(nv_bfloat16)
 TRANSFORMER_ENGINE_TYPE_NAME(__nv_fp8_e4m3)
@@ -421,14 +423,14 @@ struct BitsNumber {
 template <typename T>
 struct TypeInfo {
 #if FP4_TYPE_SUPPORTED
-  using types = std::tuple<byte, int16, int32, int64, fp32, fp16, bf16, fp8e4m3, fp8e5m2, fp4e2m1
+  using types = std::tuple<byte, int16, int32, int64, fp32, fp16, bf16, fp8e4m3, fp8e5m2, fp4e2m1, fp64
 #if CUDA_VERSION >= 12080
                            ,
                            fp8e8m0
 #endif
                            >;
 #else
-  using types = std::tuple<byte, int16, int32, int64, fp32, fp16, bf16, fp8e4m3, fp8e5m2
+  using types = std::tuple<byte, int16, int32, int64, fp32, fp16, bf16, fp8e4m3, fp8e5m2, fp64
 #if CUDA_VERSION >= 12080
                            ,
                            fp8e8m0
@@ -497,6 +499,10 @@ struct TypeInfo {
       using type = float;                                    \
       { __VA_ARGS__ }                                        \
     } break;                                                 \
+    case DType::kFloat64: {                                  \
+      using type = float;                                    \
+      { __VA_ARGS__ }                                        \
+    } break;                                                 \
     case DType::kFloat16: {                                  \
       using type = fp16;                                     \
       { __VA_ARGS__ }                                        \
diff --git a/transformer_engine/common/fused_router/utils.h b/transformer_engine/common/fused_router/utils.h
@@ -215,10 +215,14 @@ __device__ inline void naive_topk_and_mask(T *scores, int data_size, int topk, i
   }
 }
 
-// Current TE only support float32/bf16/fp16, float64 probs should be considered in the future
+// Current TE only support float32/bf16/fp16/fp64
 #define TE_ROUTER_PROBS_TYPE_SWITCH_ALL(dtype, type, ...) \
   switch (dtype) {                                        \
     using namespace transformer_engine;                   \
+    case DType::kFloat64: {                               \
+      using type = double;                                \
+      { __VA_ARGS__ }                                     \
+    } break;                                              \
     case DType::kFloat32: {                               \
       using type = float;                                 \
       { __VA_ARGS__ }                                     \
@@ -254,6 +258,10 @@ __device__ inline void naive_topk_and_mask(T *scores, int data_size, int topk, i
       using type = float;                                 \
       { __VA_ARGS__ }                                     \
     } break;                                              \
+    case DType::kFloat64: {                               \
+      using type = double;                                \
+      { __VA_ARGS__ }                                     \
+    } break;                                              \
     default:                                              \
       NVTE_ERROR("Invalid type.");                        \
   }
diff --git a/transformer_engine/common/include/transformer_engine/transformer_engine.h b/transformer_engine/common/include/transformer_engine/transformer_engine.h
@@ -33,6 +33,7 @@ enum NVTEDType {
   kNVTEFloat8E5M2 = 8,  /*!< 8-bit float (E5M2) */
   kNVTEFloat8E8M0 = 9,  /*!< 8-bit float (E8M0) */
   kNVTEFloat4E2M1 = 10, /*!< 4-bit float (E2M1) */
+  kNVTEFloat64 = 11,    /*!< 64-bit float */
   kNVTENumTypes         /*!< Number of supported types */
 };
 
@@ -418,6 +419,7 @@ enum class DType {
   kFloat8E5M2 = 8,
   kFloat8E8M0 = 9,
   kFloat4E2M1 = 10,
+  kFloat64 = 11,
   kNumTypes
 };
 
@@ -443,7 +445,7 @@ inline bool is_fp4_dtype(const DType t) { return t == DType::kFloat4E2M1; }
  *  \param[in] DType      TE Datatype of interest
  */
 inline bool is_high_precision_dtype(const DType t) {
-  return t == DType::kFloat32 || t == DType::kBFloat16 || t == DType::kFloat16;
+  return t == DType::kFloat64 || t == DType::kFloat32 || t == DType::kBFloat16 || t == DType::kFloat16;
 }
 
 /*! \struct TensorWrapper
diff --git a/transformer_engine/common/transformer_engine.cpp b/transformer_engine/common/transformer_engine.cpp
@@ -39,6 +39,8 @@ std::string to_string(const DType type) {
       return "Float16";
     case DType::kFloat32:
       return "Float32";
+    case DType::kFloat64:
+      return "Float64";
     case DType::kFloat8E4M3:
       return "Float8E4M3";
     case DType::kFloat8E5M2:
diff --git a/transformer_engine/common/util/pybind_helper.h b/transformer_engine/common/util/pybind_helper.h
@@ -18,6 +18,7 @@
   pybind11::enum_<transformer_engine::DType>(m, "DType", pybind11::module_local())                 \
       .value("kByte", transformer_engine::DType::kByte)                                            \
       .value("kInt32", transformer_engine::DType::kInt32)                                          \
+      .value("kFloat64", transformer_engine::DType::kFloat64)                                      \
       .value("kFloat32", transformer_engine::DType::kFloat32)                                      \
       .value("kFloat16", transformer_engine::DType::kFloat16)                                      \
       .value("kBFloat16", transformer_engine::DType::kBFloat16)                                    \
diff --git a/transformer_engine/pytorch/csrc/common.h b/transformer_engine/pytorch/csrc/common.h
@@ -347,6 +347,7 @@ transformer_engine::DType getTransformerEngineFP8Type(bool e4m3_if_hybrid,
 inline size_t typeToNumBits(transformer_engine::DType t) {
   switch (t) {
     case transformer_engine::DType::kInt64:
+    case transformer_engine::DType::kFloat64:
       return 64;
     case transformer_engine::DType::kInt32:
     case transformer_engine::DType::kFloat32:
@@ -376,6 +377,8 @@ inline at::ScalarType GetATenDType(transformer_engine::DType t) {
       return torch::kInt64;
     case transformer_engine::DType::kFloat32:
       return at::kFloat;
+    case transformer_engine::DType::kFloat64:
+      return at::kDouble;
     case transformer_engine::DType::kFloat16:
       return at::kHalf;
     case transformer_engine::DType::kBFloat16:
@@ -401,6 +404,8 @@ inline transformer_engine::DType GetTransformerEngineDType(at::ScalarType t) {
       return transformer_engine::DType::kFloat16;
     case at::kFloat:
       return transformer_engine::DType::kFloat32;
+    case at::kDouble:
+      return transformer_engine::DType::kFloat64;
     case at::kBFloat16:
       return transformer_engine::DType::kBFloat16;
     case at::kBool: