tilelang.intrinsics.mma_layout ============================== .. py:module:: tilelang.intrinsics.mma_layout Attributes ---------- .. autoapisummary:: tilelang.intrinsics.mma_layout.shared_16x8_to_mma_32x4_layout_sr_a tilelang.intrinsics.mma_layout.shared_16x8_to_mma_32x4_layout_sr_b tilelang.intrinsics.mma_layout.shared_16x8_to_mma_32x4_layout_rs_a tilelang.intrinsics.mma_layout.shared_16x8_to_mma_32x4_layout_rs_b tilelang.intrinsics.mma_layout.shared_16x16_to_mma_32x8_layout_sr_a tilelang.intrinsics.mma_layout.shared_16x16_to_mma_32x8_layout_sr_b tilelang.intrinsics.mma_layout.shared_16x16_to_mma_32x8_layout_rs_a tilelang.intrinsics.mma_layout.shared_16x16_to_mma_32x8_layout_rs_b tilelang.intrinsics.mma_layout.shared_16x32_to_mma_32x16_layout_sr_a tilelang.intrinsics.mma_layout.shared_16x32_to_mma_32x16_layout_sr_b tilelang.intrinsics.mma_layout.shared_16x32_to_mma_32x16_layout_rs_a tilelang.intrinsics.mma_layout.shared_16x32_to_mma_32x16_layout_rs_b Functions --------- .. autoapisummary:: tilelang.intrinsics.mma_layout.ldmatrix_32x4_to_shared_16x8_layout_a tilelang.intrinsics.mma_layout.ldmatrix_32x4_to_shared_16x8_layout_b tilelang.intrinsics.mma_layout.ldmatrix_32x8_to_shared_16x16_layout tilelang.intrinsics.mma_layout.ldmatrix_trans_32x8_to_shared_16x16_layout tilelang.intrinsics.mma_layout.ldmatrix_32x16_to_shared_16x32_layout_a tilelang.intrinsics.mma_layout.ldmatrix_32x16_to_shared_16x32_layout_b tilelang.intrinsics.mma_layout.mma_store_32x8_to_shared_16x16_layout tilelang.intrinsics.mma_layout.mma_store_32x2_to_shared_8x8_layout_fp64 tilelang.intrinsics.mma_layout.shared_16x8_to_mma_a_32x4_layout tilelang.intrinsics.mma_layout.shared_16x8_to_mma_a_32x4_layout_trans tilelang.intrinsics.mma_layout.shared_16x8_to_mma_b_32x4_layout tilelang.intrinsics.mma_layout.shared_16x8_to_mma_b_32x4_layout_trans tilelang.intrinsics.mma_layout.shared_16x16_to_mma_a_32x8_layout tilelang.intrinsics.mma_layout.shared_16x16_to_mma_a_32x8_layout_trans tilelang.intrinsics.mma_layout.shared_16x16_to_mma_b_32x8_layout tilelang.intrinsics.mma_layout.shared_16x16_to_mma_b_32x8_layout_trans tilelang.intrinsics.mma_layout.shared_16x32_to_mma_a_32x16_layout tilelang.intrinsics.mma_layout.shared_32x16_to_mma_a_32x16_layout_trans tilelang.intrinsics.mma_layout.shared_16x32_to_mma_b_32x16_layout tilelang.intrinsics.mma_layout.shared_32x16_to_mma_b_32x16_layout_trans tilelang.intrinsics.mma_layout.mma_32x8_to_shared_16x16_layout tilelang.intrinsics.mma_layout.mma_load_a_32x4_to_shared_16x8_layout tilelang.intrinsics.mma_layout.mma_load_b_32x4_to_shared_16x8_layout tilelang.intrinsics.mma_layout.mma_load_a_32x16_to_shared_16x32_layout tilelang.intrinsics.mma_layout.mma_load_a_32x8_to_shared_16x16_layout tilelang.intrinsics.mma_layout.mma_load_b_32x16_to_shared_16x32_layout tilelang.intrinsics.mma_layout.mma_load_b_32x8_to_shared_16x16_layout tilelang.intrinsics.mma_layout.shared_16x16_to_mma_32x8_smoothlayout tilelang.intrinsics.mma_layout.shared_16x32_to_mma_32x16_smoothlayout tilelang.intrinsics.mma_layout.shared_32x16_to_mma_32x16_smoothlayout tilelang.intrinsics.mma_layout.get_swizzle_layout tilelang.intrinsics.mma_layout.make_mma_swizzle_layout Module Contents --------------- .. py:function:: ldmatrix_32x4_to_shared_16x8_layout_a(thread_id, local_id) .. py:function:: ldmatrix_32x4_to_shared_16x8_layout_b(thread_id, local_id) .. py:function:: ldmatrix_32x8_to_shared_16x16_layout(thread_id, local_id) .. py:function:: ldmatrix_trans_32x8_to_shared_16x16_layout(thread_id, local_id) .. py:function:: ldmatrix_32x16_to_shared_16x32_layout_a(thread_id, local_id) .. py:function:: ldmatrix_32x16_to_shared_16x32_layout_b(thread_id, local_id) .. py:function:: mma_store_32x8_to_shared_16x16_layout(thread_id, local_id) .. py:function:: mma_store_32x2_to_shared_8x8_layout_fp64(thread_id, local_id) .. py:function:: shared_16x8_to_mma_a_32x4_layout(i, j) .. py:function:: shared_16x8_to_mma_a_32x4_layout_trans(i, j) .. py:function:: shared_16x8_to_mma_b_32x4_layout(i, j) .. py:function:: shared_16x8_to_mma_b_32x4_layout_trans(i, j) .. py:data:: shared_16x8_to_mma_32x4_layout_sr_a .. py:data:: shared_16x8_to_mma_32x4_layout_sr_b .. py:data:: shared_16x8_to_mma_32x4_layout_rs_a .. py:data:: shared_16x8_to_mma_32x4_layout_rs_b .. py:function:: shared_16x16_to_mma_a_32x8_layout(i, j) .. py:function:: shared_16x16_to_mma_a_32x8_layout_trans(i, j) .. py:function:: shared_16x16_to_mma_b_32x8_layout(i, j) .. py:function:: shared_16x16_to_mma_b_32x8_layout_trans(i, j) .. py:data:: shared_16x16_to_mma_32x8_layout_sr_a .. py:data:: shared_16x16_to_mma_32x8_layout_sr_b .. py:data:: shared_16x16_to_mma_32x8_layout_rs_a .. py:data:: shared_16x16_to_mma_32x8_layout_rs_b .. py:function:: shared_16x32_to_mma_a_32x16_layout(i, j) .. py:function:: shared_32x16_to_mma_a_32x16_layout_trans(i, j) .. py:function:: shared_16x32_to_mma_b_32x16_layout(i, j) .. py:function:: shared_32x16_to_mma_b_32x16_layout_trans(i, j) .. py:data:: shared_16x32_to_mma_32x16_layout_sr_a .. py:data:: shared_16x32_to_mma_32x16_layout_sr_b .. py:data:: shared_16x32_to_mma_32x16_layout_rs_a .. py:data:: shared_16x32_to_mma_32x16_layout_rs_b .. py:function:: mma_32x8_to_shared_16x16_layout(thread_id, local_id) .. py:function:: mma_load_a_32x4_to_shared_16x8_layout(thread_id, local_id) .. py:function:: mma_load_b_32x4_to_shared_16x8_layout(thread_id, local_id) .. py:function:: mma_load_a_32x16_to_shared_16x32_layout(thread_id, local_id) .. py:function:: mma_load_a_32x8_to_shared_16x16_layout(thread_id, local_id) groupID = %laneid >> 2 threadID_in_group = %laneid % 4 row = groupID for ai where 0 <= i < 2 || 4 <= i < 6 groupID + 8 Otherwise col = (threadID_in_group * 2) + (i & 0x1) for ai where i < 4 (threadID_in_group * 2) + (i & 0x1) + 8 for ai where i >= 4 .. py:function:: mma_load_b_32x16_to_shared_16x32_layout(thread_id, local_id) .. py:function:: mma_load_b_32x8_to_shared_16x16_layout(thread_id, local_id) groupID = %laneid >> 2 threadID_in_group = %laneid % 4 row = (threadID_in_group * 2) + (i & 0x1) for bi where i < 2 (threadID_in_group * 2) + (i & 0x1) + 8 for bi where i >= 2 col = groupID .. py:function:: shared_16x16_to_mma_32x8_smoothlayout(i, j) .. py:function:: shared_16x32_to_mma_32x16_smoothlayout(i, j) .. py:function:: shared_32x16_to_mma_32x16_smoothlayout(i, j) .. py:function:: get_swizzle_layout(row_idx, col_idx, row_size, dtype, swizzle_bytes=None) .. py:function:: make_mma_swizzle_layout(shared_buf, is_smooth = False)