Skip to content

Commit 19630d6

Browse files
committed
Cleanup version of the xelink support
1 parent 44562ac commit 19630d6

File tree

3 files changed

+116
-4
lines changed

3 files changed

+116
-4
lines changed
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
{
2+
"Info": "2x 4 tile 4 GiB PVC [Ponte Vecchio] GPUs",
3+
"DevCount": 2,
4+
"TilesPerDev": 4,
5+
"DevsPerNode": 1,
6+
"DevMemSize": 4294967296,
7+
"Capabilities": {
8+
"platform": "fake_PVC",
9+
"connections": "0.1-0.0_0.2-0.0_0.3-0.0_1.0-0.0_1.1-0.0_1.2-0.0_1.3-0.0_0.2-0.1_0.3-0.1_1.0-0.1_1.1-0.1_1.2-0.1_1.3-0.1_0.3-0.2_1.0-0.2_1.1-0.2_1.2-0.2_1.3-0.2_1.0-0.3_1.1-0.3_1.2-0.3_1.3-0.3_1.1-1.0_1.2-1.0_1.3-1.0_1.2-1.1_1.3-1.1_1.3-1.2",
10+
"connection-topology": "RAW"
11+
}
12+
}
13+
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
{
2+
"Info": "8x 4 GiB PVC [Ponte Vecchio] GPUs",
3+
"DevCount": 8,
4+
"TilesPerDev": 2,
5+
"DevsPerNode": 2,
6+
"DevMemSize": 4294967296,
7+
"Capabilities": {
8+
"platform": "fake_PVC",
9+
"connections": "",
10+
"connection-topology": "FULL"
11+
}
12+
}

cmd/gpu_fakedev/gpu_fakedev.go

Lines changed: 91 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2021-2022 Intel Corporation. All Rights Reserved.
1+
// Copyright 2021-2023 Intel Corporation. All Rights Reserved.
22
//
33
// Licensed under the Apache License, Version 2.0 (the "License");
44
// you may not use this file except in compliance with the License.
@@ -44,6 +44,7 @@ import (
4444
"os"
4545
"path/filepath"
4646
"strconv"
47+
"strings"
4748

4849
"golang.org/x/sys/unix"
4950
)
@@ -58,9 +59,11 @@ const (
5859
devfsPath = "dev"
5960
mib = 1024.0 * 1024.0
6061
// null device major, minor on linux.
61-
devNullMajor = 1
62-
devNullMinor = 3
63-
devNullType = unix.S_IFCHR
62+
devNullMajor = 1
63+
devNullMinor = 3
64+
devNullType = unix.S_IFCHR
65+
maxK8sLabelSize = 63
66+
fullyConnected = "FULL"
6467
)
6568

6669
var verbose bool
@@ -270,6 +273,8 @@ func generateDriFiles(opts genOptions) {
270273
log.Printf("Generating fake DRI device(s) sysfs, debugfs and devfs content under '%s' & '%s'",
271274
sysfsPath, devfsPath)
272275

276+
makeXelinkSideCar(opts.Capabilities["connection-topology"], opts.DevCount, opts.TilesPerDev, opts.Capabilities["connections"])
277+
273278
opts.dirs, opts.files = 0, 0
274279
for i := 0; i < opts.DevCount; i++ {
275280
if err := addSysfsDriTree(sysfsPath, &opts, i); err != nil {
@@ -291,6 +296,88 @@ func generateDriFiles(opts genOptions) {
291296
log.Printf("Done, created %d dirs, %d devices and %d files.", opts.dirs, opts.devs, opts.files)
292297
}
293298

299+
func makeXelinkSideCar(topology string, gpus, tiles int, connections string) {
300+
if topology != fullyConnected {
301+
log.Printf("XELINK: generate xelink sidecar label file, using (GPUs: %d, Tiles: %d)", gpus, tiles)
302+
} else {
303+
log.Printf("XELINK: generate xelink sidecar label file, using (GPUs: %d, Tiles: %d, Topology: %s)", gpus, tiles, topology)
304+
}
305+
306+
if topology == fullyConnected {
307+
saveSideCarFile(buildConnectionList(gpus, tiles))
308+
} else {
309+
saveSideCarFile(connections)
310+
}
311+
}
312+
313+
func buildConnectionList(gpus, tiles int) string {
314+
mm := 0
315+
316+
var nodes = make([]string, 0)
317+
318+
for mm < gpus {
319+
nn := 0
320+
for nn < tiles {
321+
nodes = append(nodes, fmt.Sprintf("%d.%d", mm, nn))
322+
nn++
323+
}
324+
mm++
325+
}
326+
327+
var links = make(map[string]struct{}, 0)
328+
329+
var smap = make([]string, 0)
330+
331+
for _, from := range nodes {
332+
for _, to := range nodes {
333+
// no self links, TODO ignore in-gpu xelinks
334+
if to == from {
335+
continue
336+
}
337+
338+
link := fmt.Sprintf("%s-%s", to, from)
339+
340+
reverselink := fmt.Sprintf("%s-%s", from, to)
341+
if _, exists := links[reverselink]; !exists {
342+
links[link] = struct{}{}
343+
344+
smap = append(smap, link)
345+
}
346+
}
347+
}
348+
349+
return fmt.Sprintf("%s", strings.Join(smap, "_"))
350+
}
351+
352+
func saveSideCarFile(connections string) {
353+
f, err := os.Create("xpum-sidecar-labels.txt")
354+
if err != nil {
355+
panic(err)
356+
}
357+
defer f.Close()
358+
359+
// Write first line without Z prefix
360+
line := fmt.Sprintf("xpumanager.intel.com/xe-links=%s", connections[:min(len(connections), maxK8sLabelSize)])
361+
fmt.Println(line)
362+
363+
if _, err := f.WriteString(line + "\n"); err != nil {
364+
panic(err)
365+
}
366+
367+
index := 2
368+
369+
// Write next lines with Z prefix
370+
for i := maxK8sLabelSize; i < len(connections); i += (maxK8sLabelSize - 1) {
371+
line := fmt.Sprintf("xpumanager.intel.com/xe-links%d=Z%s", index, connections[i:min(len(connections), i+maxK8sLabelSize-1)])
372+
fmt.Println(line)
373+
374+
if _, err := f.WriteString(line + "\n"); err != nil {
375+
panic(err)
376+
}
377+
index++
378+
}
379+
}
380+
294381
// getOptions parses options from given JSON file, validates and returns them.
295382
func getOptions(name string) genOptions {
296383
if name == "" {

0 commit comments

Comments
 (0)