Initial commit of master's thesis
This is the version I submitted to RWTH Aachen University at November 9, 2018.
This commit is contained in:
1
listings/activate_cpu.sh
Normal file
1
listings/activate_cpu.sh
Normal file
@@ -0,0 +1 @@
|
||||
# echo 1 > /sys/devices/system/cpu/<cpuX>/online
|
48
listings/cm_switch.c
Normal file
48
listings/cm_switch.c
Normal file
@@ -0,0 +1,48 @@
|
||||
struct rdma_cm_event *event;
|
||||
|
||||
while (rdma_get_cm_event(event_channel, &event) == 0) {
|
||||
|
||||
switch (event->event) {
|
||||
case RDMA_CM_EVENT_ADDR_RESOLVED:
|
||||
// Create QP, receive CQ, and send CQ.
|
||||
// Call rdma_resolve_route()
|
||||
// State: STARTED
|
||||
case RDMA_CM_EVENT_ADDR_ERROR:
|
||||
// Try fallback and set mode rdma_cm_id to listening
|
||||
// State: STARTED
|
||||
case RDMA_CM_EVENT_ROUTE_RESOLVED:
|
||||
// Call rdma_connect()
|
||||
// State: PENDING_CONNECT
|
||||
case RDMA_CM_EVENT_ROUTE_ERROR:
|
||||
// Try fallback and set mode rdma_cm_id to listening
|
||||
// State: STARTED
|
||||
case RDMA_CM_EVENT_UNREACHABLE:
|
||||
// Try fallback and set mode rdma_cm_id to listening
|
||||
// State: STARTED
|
||||
case RDMA_CM_EVENT_CONNECT_REQUEST:
|
||||
// Create QP, receive CQ, and send CQ.
|
||||
// Call rdma_accept()
|
||||
// State: PENDING_CONNECT
|
||||
case RDMA_CM_EVENT_CONNECT_ERROR:
|
||||
// Try fallback and set mode rdma_cm_id to listening
|
||||
// State: STARTED
|
||||
case RDMA_CM_EVENT_REJECTED:
|
||||
// Try fallback and set mode rdma_cm_id to listening
|
||||
// State: STARTED
|
||||
case RDMA_CM_EVENT_ESTABLISHED:
|
||||
// In case of UD, save address handle from event struct
|
||||
// State: CONNECTED
|
||||
case RDMA_CM_EVENT_DISCONNECTED:
|
||||
// Release all buffers and destroy everything
|
||||
// State: STARTED
|
||||
case RDMA_CM_EVENT_TIMEWAIT_EXIT:
|
||||
break;
|
||||
default:
|
||||
// Error message: unkown event
|
||||
}
|
||||
|
||||
rdma_ack_cm_event(event);
|
||||
}
|
||||
|
||||
|
||||
|
17
listings/cq_time.c
Normal file
17
listings/cq_time.c
Normal file
@@ -0,0 +1,17 @@
|
||||
struct timespec tp;
|
||||
|
||||
while (1) {
|
||||
ibv_get_cq_event(); // Only necessary for event based polling
|
||||
|
||||
while (ibv_poll_cq()) {
|
||||
clock_gettime(CLOCK_MONOTONIC, &tp);
|
||||
|
||||
/**
|
||||
* Save tp and message identifier in an array and
|
||||
* return as soon as possible, so that as little
|
||||
* as possible time is lost before polling goes on.
|
||||
*/
|
||||
}
|
||||
|
||||
ibv_req_notify_cq(); // Only necessary for event based polling
|
||||
}
|
3
listings/cset_create.sh
Normal file
3
listings/cset_create.sh
Normal file
@@ -0,0 +1,3 @@
|
||||
# cset set -c 0-15 -s system --cpu_exclusive
|
||||
# cset set -c 16,18,20,22 -s real-time-0 --cpu_exclusive --mem=0
|
||||
# cset set -c 17,19,21,23 -s real-time-1 --cpu_exclusive --mem=1
|
2
listings/cset_exec.sh
Normal file
2
listings/cset_exec.sh
Normal file
@@ -0,0 +1,2 @@
|
||||
# cset proc --set=real-time-0 --exec ./<application> -- <args>
|
||||
# cset proc --set=real-time-1 --exec ./<application> -- <args>
|
1
listings/cset_move.sh
Normal file
1
listings/cset_move.sh
Normal file
@@ -0,0 +1 @@
|
||||
# cset proc --move -f root -t system --kthread --thread --force
|
1
listings/get_irq_affinity.sh
Normal file
1
listings/get_irq_affinity.sh
Normal file
@@ -0,0 +1 @@
|
||||
$ cat /proc/irq/<irqX>/smp_affinity
|
5
listings/ibv_comp_channel.h
Normal file
5
listings/ibv_comp_channel.h
Normal file
@@ -0,0 +1,5 @@
|
||||
struct ibv_comp_channel {
|
||||
struct ibv_context *context;
|
||||
int fd;
|
||||
int refcnt;
|
||||
};
|
6
listings/ibv_recv_wr.h
Normal file
6
listings/ibv_recv_wr.h
Normal file
@@ -0,0 +1,6 @@
|
||||
struct ibv_recv_wr {
|
||||
uint64_t wr_id;
|
||||
struct ibv_recv_wr *next;
|
||||
struct ibv_sge *sg_list;
|
||||
int num_sge;
|
||||
};
|
26
listings/ibv_send_wr.h
Normal file
26
listings/ibv_send_wr.h
Normal file
@@ -0,0 +1,26 @@
|
||||
struct ibv_send_wr {
|
||||
uint64_t wr_id;
|
||||
struct ibv_send_wr *next;
|
||||
struct ibv_sge *sg_list;
|
||||
int num_sge;
|
||||
enum ibv_wr_opcode opcode;
|
||||
int send_flags;
|
||||
uint32_t imm_data;
|
||||
union {
|
||||
struct {
|
||||
uint64_t remote_addr;
|
||||
uint32_t rkey;
|
||||
} rdma;
|
||||
struct {
|
||||
uint64_t remote_addr;
|
||||
uint64_t compare_add;
|
||||
uint64_t swap;
|
||||
uint32_t rkey;
|
||||
} atomic;
|
||||
struct {
|
||||
struct ibv_ah *ah;
|
||||
uint32_t remote_qpn;
|
||||
uint32_t remote_qkey;
|
||||
} ud;
|
||||
} wr;
|
||||
};
|
5
listings/ibv_sge.h
Normal file
5
listings/ibv_sge.h
Normal file
@@ -0,0 +1,5 @@
|
||||
struct ibv_sge {
|
||||
uint64_t addr;
|
||||
uint32_t length;
|
||||
uint32_t lkey;
|
||||
};
|
41
listings/infiniband.conf
Normal file
41
listings/infiniband.conf
Normal file
@@ -0,0 +1,41 @@
|
||||
source_node = {
|
||||
type = "infiniband",
|
||||
rdma_transport_mode = "${IB_MODE}",
|
||||
|
||||
in = {
|
||||
address = "10.0.0.2:1337",
|
||||
|
||||
max_wrs = 4,
|
||||
cq_size = 4,
|
||||
buffer_subtraction = 2
|
||||
},
|
||||
out = {
|
||||
address = "10.0.0.1:1337",
|
||||
resolution_timeout = 1000,
|
||||
send_inline = true,
|
||||
max_inline_data = 128,
|
||||
use_fallback = true,
|
||||
|
||||
max_wrs = 4096,
|
||||
cq_size = 4096,
|
||||
periodic_signaling = 2048
|
||||
}
|
||||
},
|
||||
|
||||
target_node = {
|
||||
type = "infiniband",
|
||||
rdma_transport_mode = "${IB_MODE}",
|
||||
|
||||
in = {
|
||||
address = "10.0.0.1:1337",
|
||||
|
||||
max_wrs = 512,
|
||||
cq_size = 512,
|
||||
buffer_subtraction = 64,
|
||||
|
||||
signals = {
|
||||
count = ${NUM_VALUE},
|
||||
type = "float"
|
||||
}
|
||||
}
|
||||
}
|
5
listings/memory_alignment_a.h
Normal file
5
listings/memory_alignment_a.h
Normal file
@@ -0,0 +1,5 @@
|
||||
struct a {
|
||||
char c;
|
||||
int i;
|
||||
short s;
|
||||
}
|
5
listings/memory_alignment_b.h
Normal file
5
listings/memory_alignment_b.h
Normal file
@@ -0,0 +1,5 @@
|
||||
struct __attribute__((__packed__)) b {
|
||||
char c;
|
||||
int i;
|
||||
short s;
|
||||
}
|
33
listings/node_config.conf
Normal file
33
listings/node_config.conf
Normal file
@@ -0,0 +1,33 @@
|
||||
nodes = {
|
||||
node_1 = {
|
||||
type = "file",
|
||||
|
||||
// Global settings for node_1
|
||||
|
||||
in = {
|
||||
// Settings for node input, e.g., file to read from
|
||||
}
|
||||
},
|
||||
node_2 = {
|
||||
type = "infiniband",
|
||||
|
||||
// Global settings for node
|
||||
|
||||
in = {
|
||||
// Settings for node input, e.g., address of local
|
||||
// InfiniBand HCA to use
|
||||
},
|
||||
|
||||
out = {
|
||||
// Settings for node output, e.g., remote InfiniBand
|
||||
// node to write to
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
paths = (
|
||||
{
|
||||
in = "node_1",
|
||||
out = "node_2"
|
||||
}
|
||||
)
|
16
listings/rdtsc.h
Normal file
16
listings/rdtsc.h
Normal file
@@ -0,0 +1,16 @@
|
||||
static inline uint64_t rdtsc()
|
||||
{
|
||||
uint64_t tsc;
|
||||
|
||||
__asm__ __volatile__(
|
||||
"lfence;"
|
||||
"rdtsc;"
|
||||
"shl $32, %%rdx;"
|
||||
"or %%rdx,%%rax"
|
||||
: "=a" (tsc)
|
||||
:
|
||||
: "%rcx", "%rdx", "memory"
|
||||
);
|
||||
|
||||
return tsc;
|
||||
}
|
17
listings/rdtscp.h
Normal file
17
listings/rdtscp.h
Normal file
@@ -0,0 +1,17 @@
|
||||
static inline uint64_t rdtscp()
|
||||
{
|
||||
uint64_t tsc;
|
||||
|
||||
|
||||
__asm__ __volatile__(
|
||||
"rdtscp;"
|
||||
"shl $32, %%rdx;"
|
||||
"or %%rdx,%%rax"
|
||||
: "=a" (tsc)
|
||||
:
|
||||
: "%rcx", "%rdx", "memory"
|
||||
);
|
||||
|
||||
return tsc;
|
||||
}
|
||||
|
14
listings/rdtscp_wait.c
Normal file
14
listings/rdtscp_wait.c
Normal file
@@ -0,0 +1,14 @@
|
||||
uint64_t task_wait(struct task *t)
|
||||
{
|
||||
int ret;
|
||||
uint64_t steps, now;
|
||||
|
||||
do {
|
||||
now = rdtscp();
|
||||
} while (now < t->next);
|
||||
|
||||
for (steps = 0; t->next < now; steps++)
|
||||
t->next += t->period;
|
||||
|
||||
return steps;
|
||||
}
|
2
listings/read_write_original.h
Normal file
2
listings/read_write_original.h
Normal file
@@ -0,0 +1,2 @@
|
||||
int (*read)(struct node *n, struct sample *smps[], unsigned cnt);
|
||||
int (*write)(struct node *n, struct sample *smps[], unsigned cnt);
|
5
listings/read_write_proposal.h
Normal file
5
listings/read_write_proposal.h
Normal file
@@ -0,0 +1,5 @@
|
||||
int (*read)(struct node *n, struct sample *smps[], unsigned cnt,
|
||||
unsigned *release);
|
||||
|
||||
int (*write)(struct node *n, struct sample *smps[], unsigned cnt,
|
||||
unsigned *release);
|
13
listings/send_time.c
Normal file
13
listings/send_time.c
Normal file
@@ -0,0 +1,13 @@
|
||||
// `int messages' represents the number of messages to be sent
|
||||
struct timespec tp[messages];
|
||||
|
||||
for (int i = 0; i < messages; i++) {
|
||||
/**
|
||||
* Prepare WR with an sge that points to tv_nsec of tp[i].
|
||||
* By using an array of timespecs, it is guaranteed that
|
||||
* the timestamp will not be overwritten.
|
||||
*/
|
||||
|
||||
clock_gettime(CLOCK_MONOTONIC, &tp[i]);
|
||||
ibv_post_send();
|
||||
}
|
1
listings/set_irq_affinity.sh
Normal file
1
listings/set_irq_affinity.sh
Normal file
@@ -0,0 +1 @@
|
||||
# echo FFFF > /proc/irq/<irqX>/smp_affinity
|
24
listings/signal_generation.c
Normal file
24
listings/signal_generation.c
Normal file
@@ -0,0 +1,24 @@
|
||||
int signal_generator_read(struct node *n, struct sample *smps[],
|
||||
unsigned cnt, unsigned *release)
|
||||
{
|
||||
struct signal_generator *s = (struct signal_generator *) n->_vd;
|
||||
struct timespace ts;
|
||||
int steps;
|
||||
|
||||
/* Block until 1/p->rate seconds elapsed */
|
||||
steps = task_wait(&s->task);
|
||||
|
||||
if (steps > 1 && s->monitor_missed) {
|
||||
warn("Missed steps: %u", steps-1);
|
||||
|
||||
s->missed_steps += steps-1;
|
||||
}
|
||||
|
||||
ts = time_now();
|
||||
|
||||
/**
|
||||
* Generate sample(s) with signal and timestamp ts .
|
||||
* Return this sample via the *smps[] parameter of
|
||||
* signal_generator_read()
|
||||
*/
|
||||
}
|
8
listings/states.h
Normal file
8
listings/states.h
Normal file
@@ -0,0 +1,8 @@
|
||||
enum state {
|
||||
STATE_DESTROYED = 0,
|
||||
STATE_INITIALIZED = 1,
|
||||
STATE_PARSED = 2,
|
||||
STATE_CHECKED = 3,
|
||||
STATE_STARTED = 4,
|
||||
STATE_STOPPED = 5
|
||||
};
|
33
listings/struct_node.h
Normal file
33
listings/struct_node.h
Normal file
@@ -0,0 +1,33 @@
|
||||
struct node_direction {
|
||||
int enabled;
|
||||
int builtin;
|
||||
int vectorize;
|
||||
|
||||
struct list hooks;
|
||||
|
||||
json_t *cfg;
|
||||
};
|
||||
|
||||
struct node
|
||||
{
|
||||
char *name;
|
||||
char *_name;
|
||||
char *_name_long;
|
||||
|
||||
int affinity;
|
||||
|
||||
uint64_t sequence;
|
||||
|
||||
struct stats *stats;
|
||||
|
||||
struct node_direction in, out;
|
||||
|
||||
struct list signals;
|
||||
|
||||
enum state state;
|
||||
|
||||
struct node_type *_vt;
|
||||
void *_vd;
|
||||
|
||||
json_t *cfg;
|
||||
};
|
41
listings/struct_nodetype.h
Normal file
41
listings/struct_nodetype.h
Normal file
@@ -0,0 +1,41 @@
|
||||
struct node_type {
|
||||
int vectorize;
|
||||
int flags;
|
||||
|
||||
enum state state;
|
||||
|
||||
struct list instance;
|
||||
|
||||
size_t size;
|
||||
size_t pool_size;
|
||||
|
||||
struct {
|
||||
// Global, per node-type
|
||||
int (*start)(struct super_node *sn);
|
||||
int (*stop)();
|
||||
} type;
|
||||
|
||||
// Function pointers
|
||||
void * (*create)();
|
||||
int (*init)();
|
||||
int (*destroy)(struct node *n);
|
||||
int (*parse)(struct node *n, json_t *cfg);
|
||||
int (*check)(struct node *n);
|
||||
char * (*print)(struct node *n);
|
||||
int (*start)(struct node *n);
|
||||
int (*stop)(struct node *n);
|
||||
|
||||
int (*read)(struct node *n, struct sample *smps[],
|
||||
unsigned cnt, unsigned *release);
|
||||
|
||||
int (*write)(struct node *n, struct sample *smps[],
|
||||
unsigned cnt, unsigned *release);
|
||||
|
||||
int (*reverse)(struct node *n);
|
||||
|
||||
int (*fd)(struct node *n);
|
||||
|
||||
// Memory Type
|
||||
struct memory_type * (*memory_type)(struct node *n,
|
||||
struct memory_type *parent);
|
||||
};
|
19
listings/struct_sample.h
Normal file
19
listings/struct_sample.h
Normal file
@@ -0,0 +1,19 @@
|
||||
struct sample {
|
||||
uint64_t sequence;
|
||||
int length;
|
||||
int capacity;
|
||||
int flags;
|
||||
|
||||
struct list *signals;
|
||||
|
||||
atomic_int refcnt;
|
||||
ptrdiff_t pool_off;
|
||||
|
||||
struct {
|
||||
struct timespec origin;
|
||||
struct timespec received;
|
||||
} ts;
|
||||
|
||||
union signal_data data[];
|
||||
};
|
||||
|
26
listings/time_thread.c
Normal file
26
listings/time_thread.c
Normal file
@@ -0,0 +1,26 @@
|
||||
// Global variable
|
||||
struct timespec tp;
|
||||
|
||||
void * t_function(void * ctx)
|
||||
{
|
||||
while (1) {
|
||||
clock_gettime(CLOCK_MONOTONIC, &tp);
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
pthread_t t_thread;
|
||||
pthread_create(&t_thread, NULL, t_function, NULL);
|
||||
|
||||
// `int messages' represents the number of messages to be sent
|
||||
for (int i = 0; i < messages; i++) {
|
||||
/**
|
||||
* Prepare WR with sge that points to tp.tv_nsec. It will
|
||||
* continue to change since the thread continues to run in the
|
||||
* background.
|
||||
*/
|
||||
|
||||
// No need to invoke clock_gettime() here
|
||||
ibv_post_send(); // Post prepared WR
|
||||
}
|
12
listings/timerfd_wait.c
Normal file
12
listings/timerfd_wait.c
Normal file
@@ -0,0 +1,12 @@
|
||||
uint64_t task_wait(struct task *t)
|
||||
{
|
||||
int ret;
|
||||
uint64_t steps;
|
||||
|
||||
ret = read(t->fd, &steps, sizeof(steps));
|
||||
|
||||
if (ret < 0)
|
||||
return 0;
|
||||
|
||||
return steps;
|
||||
}
|
4
listings/timespec.c
Normal file
4
listings/timespec.c
Normal file
@@ -0,0 +1,4 @@
|
||||
struct timespec {
|
||||
time_t tv_sec; /* seconds */
|
||||
long tv_nsec; /* nanoseconds */
|
||||
};
|
16
listings/tuned_latency_performance.conf
Normal file
16
listings/tuned_latency_performance.conf
Normal file
@@ -0,0 +1,16 @@
|
||||
[main]
|
||||
summary=Optimize for deterministic performance at the cost of
|
||||
increased power consumption
|
||||
|
||||
[cpu]
|
||||
force_latency=1
|
||||
governor=performance
|
||||
energy_perf_bias=performance
|
||||
min_perf_pct=100
|
||||
|
||||
[sysctl]
|
||||
kernel.sched_min_granularity_ns=10000000
|
||||
vm.dirty_ratio=10
|
||||
vm.dirty_background_ratio=3
|
||||
vm.swappiness=10
|
||||
kernel.sched_migration_cost_ns=5000000
|
Reference in New Issue
Block a user