diff --git a/README.md b/README.md index 50aca26..9d1e4b0 100644 --- a/README.md +++ b/README.md @@ -34,39 +34,39 @@ See the [CONTRIBUTING.md](CONTRIBUTING.md) file for details. Below is a sample of the GPU specifications data included in the repository: -Attribute (Unit) | H100 | L40S ---- | --- | --- -FP64 (TFLOPS) | 25.6 | 1.4 -FP64 Tensor Core (TFLOPS) | 51 | ? -FP32 (TFLOPS) | 51.2 | 91.6 -TF32 Tensor Core (TFLOPS) | ? | 183 -TF32 Tensor Core with Sparsity (TFLOPS) | 756 | 366 -FP16 (TFLOPS) | 204.9 | 91.6 -FP16 Tensor Core (TFLOPS) | ? | 362 -FP16 Tensor Core with Sparsity (TFLOPS) | ? | 733 -BF16 (TFLOPS) | ? | ? -BF16 Tensor Core (TFLOPS) | ? | 362 -BF16 Tensor Core with Sparsity (TFLOPS) | 1513 | 733 -FP8 (TFLOPS) | N/A | N/A -FP8 Tensor Core (TFLOPS) | ? | 733 -FP8 Tensor Core with Sparsity (TFLOPS) | 3026 | 1466 -INT8 (TOPS) | ? | ? -INT8 Tensor Core (TOPS) | ? | 733 -INT8 Tensor Core with Sparsity (TOPS) | 3026 | 1466 -INT4 (TOPS) | N/A | N/A -INT4 Tensor Core (TOPS) | ? | 733 -INT4 Tensor Core with Sparsity (TOPS) | ? | 1466 -**Architecture Details** | | | -GPU Name | H100 | L40S -Manufacturer | NVIDIA | NVIDIA -Architecture | Hopper | Ada Lovelace -NVIDIA RT Cores | ? | 142 (3rd gen) -NVIDIA Tensor Cores | 456 (4th gen) | 568 (4th gen) -NVIDIA CUDA Cores | 14592 | 18176 -GPU Memory (GB) | 80 | 48 -Memory Bandwidth (GB/s) | 2048 | 864 -Interconnect Type | PCIe Gen5 | PCIe Gen4 -Encoders and Decoders | 0, 7 | 3, 3 -CUDA Compute Capability | 9 | 8.9 -Power Consumption (W) | 350 | 300 +Attribute (Unit) | H100 | L40S | A100 PCIe 80GB +--- | --- | --- | --- +FP64 (TFLOPS) | 25.6 | 1.4 | 9.7 +FP64 Tensor Core (TFLOPS) | 51 | ? | 19.5 +FP32 (TFLOPS) | 51.2 | 91.6 | 19.5 +TF32 Tensor Core (TFLOPS) | ? | 183 | 156 +TF32 Tensor Core with Sparsity (TFLOPS) | 756 | 366 | 312 +FP16 (TFLOPS) | 204.9 | 91.6 | 78 +FP16 Tensor Core (TFLOPS) | ? | 362 | 312 +FP16 Tensor Core with Sparsity (TFLOPS) | ? | 733 | 624 +BF16 (TFLOPS) | ? | ? | ? +BF16 Tensor Core (TFLOPS) | ? | 362 | 312 +BF16 Tensor Core with Sparsity (TFLOPS) | 1513 | 733 | 624 +FP8 (TFLOPS) | N/A | N/A | N/A +FP8 Tensor Core (TFLOPS) | ? | 733 | N/A +FP8 Tensor Core with Sparsity (TFLOPS) | 3026 | 1466 | N/A +INT8 (TOPS) | ? | ? | ? +INT8 Tensor Core (TOPS) | ? | 733 | 624 +INT8 Tensor Core with Sparsity (TOPS) | 3026 | 1466 | 1248 +INT4 (TOPS) | N/A | N/A | N/A +INT4 Tensor Core (TOPS) | ? | 733 | ? +INT4 Tensor Core with Sparsity (TOPS) | ? | 1466 | ? +**Architecture Details** | | | | +GPU Name | H100 | L40S | A100 PCIe 80GB +Manufacturer | NVIDIA | NVIDIA | NVIDIA +Architecture | Hopper | Ada Lovelace | Ampere +NVIDIA RT Cores | ? | 142 (3rd gen) | ? +NVIDIA Tensor Cores | 456 (4th gen) | 568 (4th gen) | 432 (3rd gen) +NVIDIA CUDA Cores | 14592 | 18176 | 6912 +GPU Memory (GB) | 80 | 48 | 80 +Memory Bandwidth (GB/s) | 2048 | 864 | 1935 +Interconnect Type | PCIe Gen5 | PCIe Gen4 | PCIe Gen4 +Encoders and Decoders | 0, 7 | 3, 3 | 0, 5 +CUDA Compute Capability | 9 | 8.9 | 8 +Power Consumption (W) | 350 | 300 | 300 diff --git a/data/specs.json b/data/specs.json index 77eaa3c..c1b0f84 100644 --- a/data/specs.json +++ b/data/specs.json @@ -440,13 +440,13 @@ }, "v100_pcie": { "name": "V100 PCIe", - "fp64": 7, + "fp64": 7.1, "fp64_tensor_core": 0, - "fp32": 14, + "fp32": 14.1, "tf32_tensor_core": 112, "tf32_tensor_core_sparsity": 0, - "fp16": 0, - "fp16_tensor_core": 0, + "fp16": 28.3, + "fp16_tensor_core": 112, "fp16_tensor_core_sparsity": null, "bf16": 0, "bf16_tensor_core": 0, @@ -474,7 +474,8 @@ "power_consumption": 250, "sources": [ "https://images.nvidia.com/content/technologies/volta/pdf/volta-v100-datasheet-update-us-1165301-r5.pdf", - "https://www.techpowerup.com/gpu-specs/tesla-v100-pcie-32-gb.c3184" + "https://www.techpowerup.com/gpu-specs/tesla-v100-pcie-32-gb.c3184", + "https://developer.nvidia.com/video-encode-and-decode-gpu-support-matrix-new" ] }, "v100_sxm2": { @@ -484,8 +485,8 @@ "fp32": 15.7, "tf32_tensor_core": 125, "tf32_tensor_core_sparsity": 0, - "fp16": 0, - "fp16_tensor_core": 0, + "fp16": 31.3, + "fp16_tensor_core": 125, "fp16_tensor_core_sparsity": null, "bf16": 0, "bf16_tensor_core": 0, @@ -513,6 +514,7 @@ "power_consumption": 300, "sources": [ "https://images.nvidia.com/content/technologies/volta/pdf/volta-v100-datasheet-update-us-1165301-r5.pdf", + "https://www.techpowerup.com/gpu-specs/tesla-v100-sxm2-16-gb.c3018", "https://www.techpowerup.com/gpu-specs/tesla-v100-sxm2-32-gb.c3183", "https://developer.nvidia.com/video-encode-and-decode-gpu-support-matrix-new" ] diff --git a/specs.md b/specs.md index f10015a..3ad04a7 100644 --- a/specs.md +++ b/specs.md @@ -2,13 +2,13 @@ Attribute (Unit) | H100 | L40S | L4 | A100 PCIe 40GB | A100 PCIe 80GB | A100 SXM4 40GB | A100 SXM4 80GB | A10 | T4 | Quadro RTX 5000 | V100 PCIe | V100 SXM2 | V100S PCIe --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- -FP64 (TFLOPS) | 25.6 | 1.4 | 0.5 | 9.7 | 9.7 | 9.7 | 9.7 | 1 | ? | 0.3 | 7 | 7.8 | 8.2 +FP64 (TFLOPS) | 25.6 | 1.4 | 0.5 | 9.7 | 9.7 | 9.7 | 9.7 | 1 | ? | 0.3 | 7.1 | 7.8 | 8.2 FP64 Tensor Core (TFLOPS) | 51 | ? | ? | 19.5 | 19.5 | 19.5 | 19.5 | ? | N/A | N/A | N/A | N/A | N/A -FP32 (TFLOPS) | 51.2 | 91.6 | 30.3 | 19.5 | 19.5 | 19.5 | 19.5 | 31.2 | 8.1 | 11.2 | 14 | 15.7 | 16.4 +FP32 (TFLOPS) | 51.2 | 91.6 | 30.3 | 19.5 | 19.5 | 19.5 | 19.5 | 31.2 | 8.1 | 11.2 | 14.1 | 15.7 | 16.4 TF32 Tensor Core (TFLOPS) | ? | 183 | 60 | 156 | 156 | 156 | 156 | 62.5 | N/A | N/A | 112 | 125 | N/A TF32 Tensor Core with Sparsity (TFLOPS) | 756 | 366 | 120 | 312 | 312 | 312 | 312 | 125 | N/A | N/A | N/A | N/A | N/A -FP16 (TFLOPS) | 204.9 | 91.6 | 30.3 | 78 | 78 | 78 | 78 | 31.2 | 65 | 22.3 | N/A | N/A | 32.8 -FP16 Tensor Core (TFLOPS) | ? | 362 | 121 | 312 | 312 | 312 | 312 | ? | ? | ? | N/A | N/A | 130 +FP16 (TFLOPS) | 204.9 | 91.6 | 30.3 | 78 | 78 | 78 | 78 | 31.2 | 65 | 22.3 | 28.3 | 31.3 | 32.8 +FP16 Tensor Core (TFLOPS) | ? | 362 | 121 | 312 | 312 | 312 | 312 | ? | ? | ? | 112 | 125 | 130 FP16 Tensor Core with Sparsity (TFLOPS) | ? | 733 | 242 | 624 | 624 | 624 | 624 | ? | ? | ? | ? | ? | ? BF16 (TFLOPS) | ? | ? | ? | ? | ? | ? | ? | ? | N/A | N/A | N/A | N/A | N/A BF16 Tensor Core (TFLOPS) | ? | 362 | 121 | 312 | 312 | 312 | 312 | 125 | N/A | N/A | N/A | N/A | N/A