Initial commit of master's thesis

This is the version I submitted to RWTH Aachen University at November 9,
2018.
This commit is contained in:
2018-11-12 12:56:59 +01:00
parent ffbcce77f9
commit af25b4b828
1136 changed files with 127398252 additions and 2 deletions

View File

@@ -0,0 +1,47 @@
\begin{table}[ht!]
\footnotesize
\centering
\caption{Dell PowerEdge T630 test system for benchmarks.}
\vspace{3mm}
\label{tab:benchmark_testsystem}
\begin{tabular}{l l}
\hlineB{3}
\Tstrut
\gls{cpu} & 2\times~Intel\textregistered~Xeon\textregistered~E5-2643 v4, \SI{20}{\mebi\byte} cache, \SI{3.40}{\giga\hertz} base frequency
\Bstrut\\
\Tstrut
Chipset & Intel\textregistered~C610
\Bstrut\\
\Tstrut
\acrshort{ram} & \SI{32}{\giga\byte}, DDR-4 \SI{2400}{\mega\hertz}, ECC buffered
\Bstrut\\
\Tstrut
Motherboard & Dell PowerEdge T630 System Board NT78X
\Bstrut\\
\Tstrut
Storage & Intel \textregistered~SSD DC P3700 Series, \gls{pcie} (Gen 2) \times8, \SI{400}{\giga\byte}
\Bstrut\\
\Tstrut
\gls{hca} & 2\times~Mellanox\textregistered{} ConnectX\textregistered-4 MT27700, \gls{pcie} (Gen 3) \times16, \SI[per-mode=symbol]{100}{\giga\bit\per\second}
\Bstrut\\
\Tstrut
Physical link & \SI{0.5}{\meter} Mellanox\textregistered{} MCP100-E00A Passive copper Cable, \SI[per-mode=symbol]{100}{\giga\bit\per\second}
\Bstrut\\
\Tstrut
\gls{os} & Fedora 27 @ Linux kernel 4.13.9-200
\Bstrut\\
\Tstrut
\gls{ofed} & MLNX OFED Linux 4.4-2.0.7.0
\Bstrut\\
\Tstrut
VILLASnode & Compiled version on commit 0819207c55ef06c7b98ddfe98637eb2b5e1e5d0b
\Tstrut\Bstrut\\
\hlineB{3}
\end{tabular}
\end{table}
%\footnotetext{\url{https://www.dell.com/en-us/work/shop/servers-storage-and-networking/sf/poweredge-tower-servers}}
%\footnotetext{\url{https://ark.intel.com/products/92989/Intel-Xeon-Processor-E5-2643-v4-20M-Cache-3_40-GHz}}
%\footnotetext{\url{https://www.intel.com/content/dam/www/public/us/en/documents/specification-updates/x99-chipset-pch-spec-update.pdf}}
%\footnotetext{\url{https://www.intel.com/content/www/us/en/products/memory-storage/solid-state-drives/data-center-ssds/dc-p3700-series.html}}
%\footnotetext{\url{http://www.mellanox.com/page/products_dyn?product_family=201&mtag=connectx_4_vpi_card}}

View File

@@ -0,0 +1,21 @@
\begin{table}[ht!]
\centering
\caption{Conditionally required Communication Management messages, used to acquire \acrfull{ud} addressing information.}
\label{tab:conditionally_required_cm_messages}
\vspace{3mm}
\begin{tabular}{p{2.6cm} | p{11.4cm}}
\hlineB{3}
\Tstrut
\textbf{CM message} & \textbf{Description}\\
\hhline{=|=}
\Tstrut
\acrshort{sidrreq} & The \textit{\acrlong{sidrreq}} is used to request \gls{ud} addressing information from a remote node for a certain service ID.
\Bstrut\\
\hline
\Tstrut
\acrshort{sidrrep} & The \textit{\acrlong{sidrrep}} is a response to the \acrshort{sidrreq} and contains all information to communicate with the entity that was specified as service ID in the request message.
\Bstrut\\
\hlineB{3}
\end{tabular}
\end{table}

View File

@@ -0,0 +1,70 @@
\begin{table}[ht!]
\centering
\caption{$\min t_{lat}$, $\tilde{t}_{lat}$, and $\max t_{lat}$ measured with \texttt{ib\_send\_lat}. All communication went over an \gls{rc} \gls{rdma} \gls{cm} \gls{qp} and was sent with the normal \textit{send} operation. Every test contained 1000 iterations and messages that were smaller than \SI{188}{\byte} were sent inline.}\label{tab:correlation_benchmarks}
\vspace{3mm}
\begin{tabular}{r | c c c c}
\hlineB{3}
\Tstrut
& \multicolumn{3}{c}{round-trip benchmark} & one-way benchmark
\Bstrut
\\
\Tstrut
message size [\SI{}{\byte}] & $\min{t_{lat}}$ [\SI{}{\micro\second}] & $\tilde{t}_{lat}$ [\SI{}{\micro\second}] & $\max{t_{lat}}$ [\SI{}{\micro\second}] & $\tilde{t}_{lat}$ [\SI{}{\micro\second}]
\Bstrut
\\
\hhline{=|====}
\Tstrut
\SI{8}{} & 0.80 & 0.83 & 2.37 & 0.94
\Bstrut
\\
\Tstrut
\SI{16}{} & 0.79 & 0.83 & 4.06 & 0.91
\Bstrut
\\
\Tstrut
\SI{32}{} & 0.79 & 0.82 & 4.29 & 0.91
\Bstrut
\\
\Tstrut
\SI{64}{} & 0.82 & 0.86 & 2.20 & 1.00
\Bstrut
\\
\Tstrut
\SI{128}{} & 0.86 & 0.90 & 1.73 & 1.01
\Bstrut
\\
\Tstrut
\SI{256}{} & 1.24 & 1.30 & 2.05 & 1.36
\Bstrut
\\
\Tstrut
\SI{512}{} & 1.31 & 1.35 & 2.56 & 1.42
\Bstrut
\\
\Tstrut
\SI{1024}{} & 1.45 & 1.49 & 2.78 & 1.63
\Bstrut
\\
\Tstrut
\SI{2048}{} & 1.71 & 1.75 & 2.87 & 2.29
\Bstrut
\\
\Tstrut
\SI{4096}{} & 2.22 & 2.27 & 2.87 & 2.93
\Bstrut
\\
\Tstrut
\SI{8192}{} & 2.55 & 2.61 & 3.97 & 3.62
\Bstrut
\\
\Tstrut
\SI{16384}{} & 3.14 & 3.21 & 4.54 & 4.58
\Bstrut
\\
\Tstrut
\SI{32768}{} & 4.48 & 4.59 & 5.78 & 5.96
\Bstrut
\\
\hlineB{3}
\end{tabular}
\end{table}

View File

@@ -0,0 +1,48 @@
\begin{table}[ht!]
\centering
\caption{\textit{InfiniBand} node-type components from \autoref{fig:villasnode_implementation} and the respective sections of the present work that elaborate upon these components.}
\label{tab:infiniband_node_components}
\vspace{3mm}
\begin{tabular}{ l | c c c}
\hlineB{3}
\Tstrut
\textbf{Component} & \textbf{\nameref{chap:basics}} & \textbf{\nameref{chap:architecture}} & \textbf{\nameref{chap:implementation}}\\
\hhline{=|===}
\Tstrut
\gls{hca} & \autoref{sec:iba} & &
\Bstrut\\
\Tstrut
Queue pair & \autoref{sec:qp} & &
\Bstrut\\
\Tstrut
Protection domain & \autoref{sec:memory} & &
\Bstrut\\
\Tstrut
Event channels & \autoref{sec:eventchannels} & &
\Bstrut\\
\Tstrut
Communication identifier & \autoref{sec:rdmacm} & &
\Bstrut\\
\Tstrut
Buffers & \autoref{sec:memory} & \autoref{sec:memorymanagement} &
\Bstrut\\
\Tstrut
VILLASnode & & \autoref{sec:villasbasics} &
\Bstrut\\
\Tstrut
Read-function & & \autoref{sec:readwrite_interfaces} & \autoref{sec:villas_read}
\Bstrut\\
\Tstrut
Write-function & & \autoref{sec:readwrite_interfaces} & \autoref{sec:villas_write}
\Bstrut\\
\Tstrut
Start-function & & & \autoref{sec:villas_start}
\Bstrut\\
\Tstrut
Management thread & & & \autoref{sec:comm_management}
\Bstrut\\
\hlineB{3}
\end{tabular}
\vspace{-1cm}
\end{table}

View File

@@ -0,0 +1,52 @@
\begin{table}[ht!]
\centering
\vspace{3.5cm}
\caption{The percentage of missed steps in the in and out files that were generated by the VILLASnode node-type benchmark for the \textit{nanomsg} and \textit{zeromq} node-type. Although a considerable number of samples never got transmitted, especially for high rates, no samples were dropped after the first sequence number appeared in the out files.}\label{tab:missed_steps_nanomsg_zeromq}
\vspace{3mm}
\begin{tabular}{r r | c c c c}
\hlineB{3}
\Tstrut
\textbf{rate [Hz]} & \textbf{file} & \multicolumn{4}{c}{\textbf{Missed steps [\%]}}
\Bstrut
\\
\Tstrut
& & \textit{nanomsg} & \textit{nanomsg (lo)} & \textit{zeromq} & \textit{zeromq (lo)} \\
\hhline{==|====}
\Tstrut
\multirow{2}{*}{\SI{100}} & in & 8.03 & 8.03 & 8.03 & 8.03
\\
& out & 8.04 & 8.03 & 8.04 & 8.04\Bstrut
\\
\Tstrut
\multirow{2}{*}{\SI{2500}} & in & 3.72 & 3.71 & 3.72 & 3.71
\\
& out & 3.80 & 3.71 & 3.78 & 3.78\Bstrut
\\
\Tstrut
\multirow{2}{*}{\SI{5000}} & in & 0.03 & 0.03 & 0.03 & 0.04
\\
& out & 0.20 & 0.03 & 0.15 & 0.18\Bstrut
\\
\Tstrut
\multirow{2}{*}{\SI{10000}} & in & 0.04 & 0.05 & 0.04 & 0.07
\\
& out & 0.36 & 0.05 & 0.27 & 0.34\Bstrut
\\
\Tstrut
\multirow{2}{*}{\SI{25000}} & in & 0.08 & 0.08 & 0.11 & 0.11
\\
& out & 0.90 & 0.08 & 0.70 & 0.76\Bstrut
\\
\Tstrut
\multirow{2}{*}{\SI{50000}} & in & 0.17 & 0.17 & 0.24 & 0.22
\\
& out & 1.75 & 0.17 & 1.42 & 1.58\Bstrut
\\
\Tstrut
\multirow{2}{*}{\SI{100000}} & in & 0.54 & 0.99 & 0.45 & 0.61
\\
& out & 3.91 & 1.00 & 2.68 & 3.33\Bstrut
\\
\hlineB{3}
\end{tabular}
\end{table}

View File

@@ -0,0 +1,44 @@
\begin{table}[ht!]
\centering
\caption{$\tilde{t}_{lat}$ [\SI{}{\micro\second}] as reported by \texttt{ib\_send\_lat} for different service types and message sizes with a varying \gls{mtu}. All communication went over an \gls{rdma} \gls{cm} \gls{qp} and was sent with the normal \textit{send} operation. Every test contained 1000 iterations and messages that were smaller than \SI{188}{\byte} were sent inline.}\label{tab:mtu_performance}
\vspace{3mm}
\begin{tabular}{l r | c c c c c}
\hlineB{3}
\Tstrut
\textbf{service type} & \textbf{size} & \multicolumn{5}{c}{\textbf{\gls{mtu}}}
\Bstrut
\\
\Tstrut
& & \SI{256}{\byte} & \SI{512}{\byte} & \SI{1024}{\byte} & \SI{2048}{\byte} & \SI{4096}{\byte} \\
\hhline{==|=====}
\Tstrut
\multirow{4}{*}{\gls{rc}} & \SI{32}{\byte} & 0.83 & 0.81 & 0.82 & 0.82 & 0.83
\Bstrut
\\
\Tstrut
& \SI{1}{\kilo\byte} & 1.58 & 1.58 & 1.52 & 1.60 & 1.61
\Bstrut
\\
\Tstrut
& \SI{4}{\kilo\byte} & 2.26 & 2.25 & 2.25 & 2.27 & 2.28
\Bstrut
\\
\Tstrut
& \SI{32}{\kilo\byte} & 4.56 & 4.58 & 4.57 & 4.58 & 4.57
\Bstrut
\\ \hline
\Tstrut
\multirow{3}{*}{\gls{ud}} & \SI{32}{\byte} & 0.86 & 0.87 & 0.86 & 0.87 & 0.86
\Bstrut
\\
\Tstrut
& \SI{1}{\kilo\byte} & \NO & \NO & 1.44 & \redcell{1.54} & 1.45
\Bstrut
\\
\Tstrut
& \SI{4}{\kilo\byte} & \NO & \NO & \NO & \NO & 2.22
\Bstrut
\\
\hlineB{3}
\end{tabular}
\end{table}

View File

@@ -0,0 +1,37 @@
\begin{table}[ht!]
\centering
\caption{The benchmark's settings which were used to analyze the latency of messages sent whilst both the sending and receiving node were busy polling.}
\label{tab:oneway_settings_busy}
\vspace{3mm}
\footnotesize
\begin{tabular}{!{\hspace{-2pt}}c|!{\hspace{-8pt}}c!{\hspace{-4pt}}c!{\hspace{-4pt}}c!{\hspace{-4pt}}c!{\hspace{-6pt}}c!{\hspace{-2pt}}ccc!{\hspace{-3pt}}ccc!{\hspace{-5pt}}}
\multicolumn{1}{c}{} & \rot{service type} & \rot{polling (send)} & \rot{polling (recv)} & \rot{inline mode} & \rot{unsignaled}& \rot{operation} & \rot{burst size} & \rot{repetitions} & \rot{loop pauses} & \rot{timestamp} & \rotright{message size}\\
\hlineB{3}
\Tstrut
\autoref{fig:oneway_busy_a} & \graycellleftline{\acrshort{rc}} & busy & busy & \NO & \enspace\NO & send & 8000 & \graycell{20} & \graycell{\SI{0}{\nano\second}} & $t_{subm}$ & \SI{32}{\byte}
\Bstrut\\
\Tstrut
\autoref{fig:oneway_busy_b} & \graycellleftline{\acrshort{uc}} & busy & busy & \NO & \enspace\NO & send & 8000 & \graycell{20} & \graycell{\SI{0}{\nano\second}} & $t_{subm}$ & \SI{32}{\byte}
\Bstrut\\
\Tstrut
\autoref{fig:oneway_busy_c} & \graycellleftline{\acrshort{ud}} & busy & busy & \NO & \enspace\NO & send & 8000 & \graycell{20} & \graycell{\SI{0}{\nano\second}} & $t_{subm}$ & \SI{32}{\byte}
\Bstrut\\
\Tstrut
\autoref{fig:oneway_busy_d} & \graycellleftline{\acrshort{rc}} & busy & busy & \NO & \enspace\NO & send & 8000 & \graycell{1} & \graycell{\SI[scientific-notation=true]{1e9}{\nano\second}} & $t_{subm}$ & \SI{32}{\byte}
\Bstrut\\
\Tstrut
\autoref{fig:oneway_busy_e} & \graycellleftline{\acrshort{uc}} & busy & busy & \NO & \enspace\NO & send & 8000 & \graycell{1} & \graycell{\SI[scientific-notation=true]{1e9}{\nano\second}} & $t_{subm}$ & \SI{32}{\byte}
\Bstrut\\
\Tstrut
\autoref{fig:oneway_busy_f} & \graycellleftline{\acrshort{ud}} & busy & busy & \NO & \enspace\NO & send & 8000 & \graycell{1} & \graycell{\SI[scientific-notation=true]{1e9}{\nano\second}} & $t_{subm}$ & \SI{32}{\byte}
\Bstrut\\
\hlineB{3}
\end{tabular}
\newpage
\end{table}

View File

@@ -0,0 +1,37 @@
\begin{table}[ht!]
\centering
\caption{The benchmark's settings which were used to analyze the latency of messages sent whilst both the sending and receiving node were waiting for an event.}
\footnotesize
\label{tab:oneway_settings_event}
\vspace{3mm}
\begin{tabular}{!{\hspace{-2pt}}c|!{\hspace{-8pt}}c!{\hspace{-4pt}}c!{\hspace{-4pt}}c!{\hspace{-4pt}}c!{\hspace{-6pt}}c!{\hspace{-2pt}}ccc!{\hspace{-3pt}}ccc!{\hspace{-5pt}}}
\multicolumn{1}{c}{} & \rot{service type} & \rot{polling (send)} & \rot{polling (recv)} & \rot{inline mode} & \rot{unsignaled}& \rot{operation} & \rot{burst size} & \rot{repetitions} & \rot{loop pauses} & \rot{timestamp} & \rotright{message size}\\
\hlineB{3}
\Tstrut
\autoref{fig:oneway_event_a} & \graycellleftline{\acrshort{rc}} & event & event & \NO & \enspace\NO & send & 8000 & \graycell{20} & \graycell{\SI{0}{\nano\second}} & $t_{subm}$ & \SI{32}{\byte}
\Bstrut\\
\Tstrut
\autoref{fig:oneway_event_b} & \graycellleftline{\acrshort{uc}} & event & event & \NO & \enspace\NO & send & 8000 & \graycell{20} & \graycell{\SI{0}{\nano\second}} & $t_{subm}$ & \SI{32}{\byte}
\Bstrut\\
\Tstrut
\autoref{fig:oneway_event_c} & \graycellleftline{\acrshort{ud}} & event & event & \NO & \enspace\NO & send & 8000 & \graycell{20} & \graycell{\SI{0}{\nano\second}} & $t_{subm}$ & \SI{32}{\byte}
\Bstrut\\
\Tstrut
\autoref{fig:oneway_event_d} & \graycellleftline{\acrshort{rc}} & event & event & \NO & \enspace\NO & send & 8000 & \graycell{1} & \graycell{\SI[scientific-notation=true]{1e9}{\nano\second}} & $t_{subm}$ & \SI{32}{\byte}
\Bstrut\\
\Tstrut
\autoref{fig:oneway_event_e} & \graycellleftline{\acrshort{uc}} & event & event & \NO & \enspace\NO & send & 8000 & \graycell{1} & \graycell{\SI[scientific-notation=true]{1e9}{\nano\second}} & $t_{subm}$ & \SI{32}{\byte}
\Bstrut\\
\Tstrut
\autoref{fig:oneway_event_f} & \graycellleftline{\acrshort{ud}} & event & event & \NO & \enspace\NO & send & 8000 & \graycell{1} & \graycell{\SI[scientific-notation=true]{1e9}{\nano\second}} & $t_{subm}$ & \SI{32}{\byte}
\Bstrut\\
\hlineB{3}
\end{tabular}
\newpage
\end{table}

View File

@@ -0,0 +1,21 @@
\begin{table}[ht!]
\centering
\caption{The benchmark's settings which were used to analyze the influence of sending messages inline on the latency.}
\label{tab:oneway_settings_inline}
\vspace{3mm}
\footnotesize
\begin{tabular}{!{\hspace{-2pt}}c|!{\hspace{-2pt}}c!{\hspace{-4pt}}c!{\hspace{-4pt}}c!{\hspace{-4pt}}c!{\hspace{-6pt}}c!{\hspace{-2pt}}ccc!{\hspace{-3pt}}ccc!{\hspace{-5pt}}}
\multicolumn{1}{c}{} & \rot{service type} & \rot{polling (send)} & \rot{polling (recv)} & \rot{inline mode} & \rot{unsignaled}& \rot{operation} & \rot{burst size} & \rot{repetitions} & \rot{loop pauses} & \rot{timestamp} & \rotright{message size}\\
\hlineB{3}
\Tstrut
\autoref{fig:oneway_inline} & \acrshort{uc} & busy & busy & \graycell{\NO} & \enspace\NO & send & 8000 & 20 & \SI{0}{\nano\second} & $t_{subm}$ & \SI{32}{\byte}
\Bstrut\\
\Tstrut
\autoref{fig:oneway_inline} & \acrshort{uc} & busy & busy & \graycell{\OK} & \enspace\NO & send & 8000 & 20 & \SI{0}{\nano\second} & $t_{subm}$ & \SI{32}{\byte}
\Bstrut\\
\hlineB{3}
\end{tabular}
\newpage
\end{table}

View File

@@ -0,0 +1,59 @@
\begin{table}[ht!]
\centering
\caption{The benchmark's settings which were used to analyze the influence of message size on the latency, with $i\in[0,12]$, $j\in[0,7]$, and $k\in[0,9]$.}
\label{tab:oneway_settings_message_size}
\vspace{3mm}
\footnotesize
\begin{tabular}{!{\hspace{-2pt}}c|!{\hspace{-8pt}}c!{\hspace{-6pt}}c!{\hspace{-4pt}}c!{\hspace{-2pt}}c!{\hspace{-6pt}}c!{\hspace{-2pt}}ccc!{\hspace{-3pt}}ccc!{\hspace{-9pt}}}
\multicolumn{1}{c}{} & \rot{service type} & \rot{polling (send)} & \rot{polling (recv)} & \rot{inline mode} & \rot{unsignaled}& \rot{operation} & \rot{burst size} & \rot{repetitions} & \rot{loop pauses} & \rot{timestamp} & \rotright{message size}\\
\hlineB{3}
\Tstrut
\autoref{fig:oneway_message_size_a} \rlap{\hspace{0.195em}\blacktriangle}\textbf{---} & \graycellleftline{\acrshort{rc}} & busy & busy & \graycell{\NO} & \enspace\NO & \graycell{send} & \graycell{8000} & 10 &\SI{0}{\nano\second} & $t_{subm}$ & \graycellright{$\SI[parse-numbers=false]{8\cdot2^i}{\byte}$}
\Bstrut\\
\Tstrut
\autoref{fig:oneway_message_size_a} {\color{plot_blue}\rlap{\hspace{0.195em}\blacktriangle}\textbf{---}} & \graycellleftline{\acrshort{rc}} & busy & busy & \graycell{\OK} & \enspace\NO & \graycell{send} & \graycell{$M_{inl}$\footnotemark[1]} & 10 &\SI{0}{\nano\second} & $t_{subm}$ & \graycellright{$\SI[parse-numbers=false]{8\cdot2^j}{\byte}$}
\Bstrut\\
\Tstrut
\autoref{fig:oneway_message_size_a} {\color{plot_gray}\rlap{\hspace{0.195em}\blacktriangledown}\textbf{---}} & \graycellleftline{\acrshort{rc}} & busy & busy & \graycell{\NO} & \enspace\NO & \graycell{rdma} & \graycell{8000} & 10 &\SI{0}{\nano\second} & $t_{subm}$ & \graycellright{$\SI[parse-numbers=false]{8\cdot2^i}{\byte}$}
\Bstrut\\
\Tstrut
\autoref{fig:oneway_message_size_a} {\color{plot_light_blue}\rlap{\hspace{0.195em}\blacktriangledown}\textbf{---}} & \graycellleftline{\acrshort{rc}} & busy & busy & \graycell{\OK} & \enspace\NO & \graycell{rdma} & \graycell{$M_{inl}$\footnotemark[1]} & 10 &\SI{0}{\nano\second} & $t_{subm}$ & \graycellright{$\SI[parse-numbers=false]{8\cdot2^j}{\byte}$}
\Bstrut\\
\hline
\Tstrut
\autoref{fig:oneway_message_size_b} \rlap{\hspace{0.195em}\blacktriangle}\textbf{---} & \graycellleftline{\acrshort{uc}} & busy & busy & \graycell{\NO} & \enspace\NO & \graycell{send} & \graycell{8000} & 10 & \SI{0}{\nano\second} & $t_{subm}$ & \graycellright{$\SI[parse-numbers=false]{8\cdot2^i}{\byte}$}
\Bstrut\\
\Tstrut
\autoref{fig:oneway_message_size_b} {\color{plot_blue}\rlap{\hspace{0.195em}\blacktriangle}\textbf{---}} & \graycellleftline{\acrshort{uc}} & busy & busy & \graycell{\OK} & \enspace\NO & \graycell{send} & \graycell{$M_{inl}$\footnotemark[1]} & 10 & \SI{0}{\nano\second} & $t_{subm}$ & \graycellright{$\SI[parse-numbers=false]{8\cdot2^j}{\byte}$}
\Bstrut\\
\Tstrut
\autoref{fig:oneway_message_size_b} {\color{plot_gray}\rlap{\hspace{0.195em}\blacktriangledown}\textbf{---}} & \graycellleftline{\acrshort{uc}} & busy & busy & \graycell{\NO} & \enspace\NO & \graycell{rdma} & \graycell{8000} & 10 & \SI{0}{\nano\second} & $t_{subm}$ & \graycellright{$\SI[parse-numbers=false]{8\cdot2^i}{\byte}$}
\Bstrut\\
\Tstrut
\autoref{fig:oneway_message_size_b} {\color{plot_light_blue}\rlap{\hspace{0.195em}\blacktriangledown}\textbf{---}} & \graycellleftline{\acrshort{uc}} & busy & busy & \graycell{\OK} & \enspace\NO & \graycell{rdma} & \graycell{$M_{inl}$\footnotemark[1]} & 10 & \SI{0}{\nano\second} & $t_{subm}$ & \graycellright{$\SI[parse-numbers=false]{8\cdot2^j}{\byte}$}
\Bstrut\\
\hline
\Tstrut
\autoref{fig:oneway_message_size_c} \hspace{0.1em}\rlap{\hspace{0.195em}\blacktriangle}\textbf{---} & \graycellleftline{\acrshort{ud}} & busy & busy & \graycell{\NO} & \enspace\NO & \graycell{send} & \graycell{8000} & 10 & \SI{0}{\nano\second} & $t_{subm}$ & \graycellright{$\SI[parse-numbers=false]{8\cdot2^k}{\byte}$}
\Bstrut\\
\Tstrut
\autoref{fig:oneway_message_size_c} {\color{plot_blue}\hspace{0.1em}\rlap{\hspace{0.195em}\blacktriangle}\textbf{---}} & \graycellleftline{\acrshort{ud}} & busy & busy & \graycell{\OK} & \enspace\NO & \graycell{send} & \graycell{$M_{inl}$\footnotemark[1]} & 10 & \SI{0}{\nano\second} & $t_{subm}$ & \graycellright{$\SI[parse-numbers=false]{8\cdot2^j}{\byte}$}
\Bstrut\\
\hlineB{3}
\end{tabular}
\newpage
\end{table}
\footnotetext[1]{The maximum size $M_{inl}$ of a \gls{qp} for a given message size is dependent on the \gls{hca}. In case of the Mellanox ConnectX\textregistered-4, each queue of a \gls{qp} could hold 8000, 8000, 8000, 6552, 5461, 4096, and 2730 \glspl{wr} for a message size of \SI{8}{\byte}, \SI{16}{\byte}, \SI{32}{\byte}, \SI{64}{\byte}, \SI{128}{\byte}, \SI{256}{\byte}, and \SI{512}{\byte}, respectivly.}

View File

@@ -0,0 +1,39 @@
\begin{table}[ht!]
\vspace{3.5cm}
\footnotesize
\centering
\caption{The benchmark's settings which were used to analyze whether the steep slope between \SI{128}{\byte} and \SI{256}{\byte} in \autoref{fig:oneway_message_size} was caused by the non-constant burst sizes, with $j\in[0,7]$.}
\label{tab:oneway_settings_message_size_inline}
\vspace{3mm}
\footnotesize
\begin{tabular}{!{\hspace{-2pt}}c|!{\hspace{-8pt}}c!{\hspace{-6pt}}c!{\hspace{-4pt}}c!{\hspace{-2pt}}c!{\hspace{-6pt}}c!{\hspace{-2pt}}ccc!{\hspace{-3pt}}ccc!{\hspace{-9pt}}}
\multicolumn{1}{c}{} & \rot{service type} & \rot{polling (send)} & \rot{polling (recv)} & \rot{inline mode} & \rot{unsignaled}& \rot{operation} & \rot{burst size} & \rot{repetitions} & \rot{loop pauses} & \rot{timestamp} & \rotright{message size}\\
\hlineB{3}
\Tstrut
\autoref{fig:oneway_message_size_inline_a} {\color{plot_blue}\rlap{\hspace{0.195em}\blacktriangle}\textbf{---}} & \graycellleftline{\acrshort{rc}} & busy & busy & \OK & \enspace\NO & \graycell{send} & 2730 & 10 &\SI{0}{\nano\second} & $t_{subm}$ & \graycellright{$\SI[parse-numbers=false]{8\cdot2^j}{\byte}$}
\Bstrut\\
\Tstrut
\autoref{fig:oneway_message_size_inline_a} {\color{plot_light_blue}\rlap{\hspace{0.195em}\blacktriangledown}\textbf{---}} & \graycellleftline{\acrshort{rc}} & busy & busy & \OK & \enspace\NO & \graycell{rdma} & 2730 & 10 &\SI{0}{\nano\second} & $t_{subm}$ & \graycellright{$\SI[parse-numbers=false]{8\cdot2^j}{\byte}$}
\Bstrut\\
\hline
\Tstrut
\autoref{fig:oneway_message_size_inline_b} {\color{plot_blue}\rlap{\hspace{0.195em}\blacktriangle}\textbf{---}} & \graycellleftline{\acrshort{uc}} & busy & busy & \OK & \enspace\NO & \graycell{send} & 2730 & 10 & \SI{0}{\nano\second} & $t_{subm}$ & \graycellright{$\SI[parse-numbers=false]{8\cdot2^j}{\byte}$}
\Bstrut\\
\Tstrut
\autoref{fig:oneway_message_size_inline_b} {\color{plot_light_blue}\rlap{\hspace{0.195em}\blacktriangledown}\textbf{---}} & \graycellleftline{\acrshort{uc}} & busy & busy & \OK & \enspace\NO & \graycell{rdma} & 2730 & 10 & \SI{0}{\nano\second} & $t_{subm}$ & \graycellright{$\SI[parse-numbers=false]{8\cdot2^j}{\byte}$}
\Bstrut\\
\hline
\Tstrut
\autoref{fig:oneway_message_size_inline_c} {\color{plot_blue}\hspace{0.1em}\rlap{\hspace{0.195em}\blacktriangle}\textbf{---}} & \graycellleftline{\acrshort{ud}} & busy & busy & \OK & \enspace\NO & \graycell{send} & 2730 & 10 & \SI{0}{\nano\second} & $t_{subm}$ & \graycellright{$\SI[parse-numbers=false]{8\cdot2^j}{\byte}$}
\Bstrut\\
\hlineB{3}
\end{tabular}
\newpage
\end{table}

View File

@@ -0,0 +1,37 @@
\begin{table}[ht!]
\centering
\caption{The benchmark's settings which were used to analyze whether the increasing latency in \autoref{fig:oneway_message_size} was caused by congestion control, with $i\in[0,12]$.}
\label{tab:oneway_settings_message_size_wait}
\vspace{3mm}
\footnotesize
\begin{tabular}{!{\hspace{-2pt}}c|!{\hspace{-8pt}}c!{\hspace{-6pt}}c!{\hspace{-4pt}}c!{\hspace{-2pt}}c!{\hspace{-6pt}}c!{\hspace{-2pt}}ccc!{\hspace{-3pt}}ccc!{\hspace{-9pt}}}
\multicolumn{1}{c}{} & \rot{service type} & \rot{polling (send)} & \rot{polling (recv)} & \rot{inline mode} & \rot{unsignaled}& \rot{operation} & \rot{burst size} & \rot{repetitions} & \rot{loop pauses} & \rot{timestamp} & \rotright{message size}\\
\hlineB{3}
\Tstrut
\autoref{fig:oneway_message_size_wait_a} {\color{black}\rlap{\hspace{0.195em}\blacktriangle}\textbf{---}} & \graycellleftline{\acrshort{rc}} & busy & busy & \NO & \enspace\NO & \graycell{send} & 8000 & 5 &\SI{5500}{\nano\second} & $t_{subm}$ & \graycellright{$\SI[parse-numbers=false]{8\cdot2^i}{\byte}$}
\Bstrut\\
\Tstrut
\autoref{fig:oneway_message_size_wait_a} {\color{plot_gray}\rlap{\hspace{0.195em}\blacktriangledown}\textbf{---}} & \graycellleftline{\acrshort{rc}} & busy & busy & \NO & \enspace\NO & \graycell{rdma} & 8000 & 5 &\SI{5500}{\nano\second} & $t_{subm}$ & \graycellright{$\SI[parse-numbers=false]{8\cdot2^i}{\byte}$}
\Bstrut\\
\hline
\Tstrut
\autoref{fig:oneway_message_size_wait_b} {\color{black}\rlap{\hspace{0.195em}\blacktriangle}\textbf{---}} & \graycellleftline{\acrshort{uc}} & busy & busy & \NO & \enspace\NO & \graycell{send} & 8000 & 5 & \SI{5500}{\nano\second} & $t_{subm}$ & \graycellright{$\SI[parse-numbers=false]{8\cdot2^i}{\byte}$}
\Bstrut\\
\Tstrut
\autoref{fig:oneway_message_size_wait_b} {\color{plot_gray}\rlap{\hspace{0.195em}\blacktriangledown}\textbf{---}} & \graycellleftline{\acrshort{uc}} & busy & busy & \NO & \enspace\NO & \graycell{rdma} & 8000 & 5 & \SI{5500}{\nano\second} & $t_{subm}$ & \graycellright{$\SI[parse-numbers=false]{8\cdot2^i}{\byte}$}
\Bstrut\\
\hline
\Tstrut
\autoref{fig:oneway_message_size_wait_c} {\color{black}\hspace{0.1em}\rlap{\hspace{0.195em}\blacktriangle}\textbf{---}} & \graycellleftline{\acrshort{ud}} & busy & busy & \NO & \enspace\NO & \graycell{send} & 8000 & 5 & \SI{5500}{\nano\second} & $t_{subm}$ & \graycellright{$\SI[parse-numbers=false]{8\cdot2^i}{\byte}$}
\Bstrut\\
\hlineB{3}
\end{tabular}
\newpage
\end{table}

View File

@@ -0,0 +1,29 @@
\begin{table}[ht!]
\centering
\caption{The benchmark's settings which were used to analayze the effect on latency of sending messages through memory semantics instead of channel semantics.}
\label{tab:oneway_settings_rdma}
\vspace{3mm}
\footnotesize
\begin{tabular}{!{\hspace{-2pt}}c|!{\hspace{-8pt}}c!{\hspace{-4pt}}c!{\hspace{-4pt}}c!{\hspace{-4pt}}c!{\hspace{-6pt}}c!{\hspace{-2pt}}ccc!{\hspace{-3pt}}ccc!{\hspace{-5pt}}}
\multicolumn{1}{c}{} & \rot{service type} & \rot{polling (send)} & \rot{polling (recv)} & \rot{inline mode} & \rot{unsignaled}& \rot{operation} & \rot{burst size} & \rot{repetitions} & \rot{loop pauses} & \rot{timestamp} & \rotright{message size}\\
\hlineB{3}
\Tstrut
\autoref{fig:oneway_rdma_a} & \graycellleftline{\acrshort{rc}} & busy & busy & \OK & \enspace\NO & \graycell{send} & 8000 & 20 & \SI{0}{\nano\second} & $t_{subm}$ & \SI{32}{\byte}
\Bstrut\\
\Tstrut
\autoref{fig:oneway_rdma_a} & \graycellleftline{\acrshort{rc}} & busy & busy & \OK & \enspace\NO & \graycell{rdma} & 8000 & 20 & \SI{0}{\nano\second} & $t_{subm}$ & \SI{32}{\byte}
\Bstrut\\
\Tstrut
\autoref{fig:oneway_rdma_b} & \graycellleftline{\acrshort{uc}} & busy & busy & \OK & \enspace\NO & \graycell{send} & 8000 & 20 & \SI{0}{\nano\second} & $t_{subm}$ & \SI{32}{\byte}
\Bstrut\\
\Tstrut
\autoref{fig:oneway_rdma_b} & \graycellleftline{\acrshort{uc}} & busy & busy & \OK & \enspace\NO & \graycell{rdma} & 8000 & 20 & \SI{0}{\nano\second} & $t_{subm}$ & \SI{32}{\byte}
\Bstrut\\
\hlineB{3}
\end{tabular}
\newpage
\end{table}

View File

@@ -0,0 +1,21 @@
\begin{table}[ht!]
\centering
\caption{The benchmark's settings which were used to analyze the difference in time between the moment that a \acrfull{wr} is submitted to the \acrfull{sq} and the moment the corresponding message is actually sent.}
\label{tab:oneway_settings_submit_send_comparison}
\vspace{3mm}
\footnotesize
\begin{tabular}{!{\hspace{-2pt}}c|!{\hspace{-2pt}}c!{\hspace{-4pt}}c!{\hspace{-4pt}}c!{\hspace{-4pt}}c!{\hspace{-6pt}}c!{\hspace{-2pt}}ccc!{\hspace{-3pt}}ccc!{\hspace{-5pt}}}
\multicolumn{1}{c}{} & \rot{service type} & \rot{polling (send)} & \rot{polling (recv)} & \rot{inline mode} & \rot{unsignaled}& \rot{operation} & \rot{burst size} & \rot{repetitions} & \rot{loop pauses} & \rot{timestamp} & \rotright{message size}\\
\hlineB{3}
\Tstrut
\autoref{fig:oneway_submit_send_comparison} & \acrshort{uc} & busy & busy & \NO & \enspace\NO & send & 8000 & 20 & \SI{0}{\nano\second} & \graycell{$t_{subm}$} & \SI{32}{\byte}
\Bstrut\\
\Tstrut
\autoref{fig:oneway_submit_send_comparison} & \acrshort{uc} & busy & busy & \NO & \enspace\NO & send & 8000 & 20 & \SI{0}{\nano\second} & \graycell{$t_{send}$} & \SI{32}{\byte}
\Bstrut\\
\hlineB{3}
\end{tabular}
\newpage
\end{table}

View File

@@ -0,0 +1,21 @@
\begin{table}[ht!]
\centering
\caption{The benchmark's settings which were used to analyze the influence of \acrfull{cqe} creation on latency for \textit{send} operations.}
\label{tab:oneway_settings_unsignaled_inline}
\vspace{3mm}
\footnotesize
\begin{tabular}{!{\hspace{-2pt}}c|!{\hspace{-2pt}}c!{\hspace{-4pt}}c!{\hspace{-4pt}}c!{\hspace{-4pt}}c!{\hspace{6pt}}!{\hspace{-6pt}}c!{\hspace{-2pt}}ccc!{\hspace{-3pt}}ccc!{\hspace{-5pt}}}
\multicolumn{1}{c}{} & \rot{service type} & \rot{polling (send)} & \rot{polling (recv)} & \rot{inline mode} & \rot{unsignaled}& \rot{operation} & \rot{burst size} & \rot{repetitions} & \rot{loop pauses} & \rot{timestamp} & \rotright{message size}\\
\hlineB{3}
\Tstrut
\autoref{fig:oneway_unsignaled_inline} & \acrshort{uc} & busy & busy & \OK & \graycell{\NO} & send & 8000 & 20 & \SI{0}{\nano\second} & $t_{subm}$ & \SI{32}{\byte}
\Bstrut\\
\Tstrut
\autoref{fig:oneway_unsignaled_inline} & \acrshort{uc} & busy & busy & \OK & \graycell{\OK} & send & 8000 & 20 & \SI{0}{\nano\second} & $t_{subm}$ & \SI{32}{\byte}
\Bstrut\\
\hlineB{3}
\end{tabular}
\newpage
\end{table}

View File

@@ -0,0 +1,21 @@
\begin{table}[ht!]
\centering
\caption{The benchmark's settings which were used to analyze the influence of \acrfull{cqe} creation on latency for \textit{\gls{rdma} write} operations.}
\label{tab:oneway_settings_unsignaled_rdma}
\vspace{3mm}
\footnotesize
\begin{tabular}{!{\hspace{-2pt}}c|!{\hspace{-2pt}}c!{\hspace{-4pt}}c!{\hspace{-4pt}}c!{\hspace{-4pt}}c!{\hspace{6pt}}!{\hspace{-6pt}}c!{\hspace{-2pt}}ccc!{\hspace{-3pt}}ccc!{\hspace{-5pt}}}
\multicolumn{1}{c}{} & \rot{service type} & \rot{polling (send)} & \rot{polling (recv)} & \rot{inline mode} & \rot{unsignaled}& \rot{operation} & \rot{burst size} & \rot{repetitions} & \rot{loop pauses} & \rot{timestamp} & \rotright{message size}\\
\hlineB{3}
\Tstrut
\autoref{fig:oneway_unsignaled_rdma} & \acrshort{uc} & busy & busy & \OK & \graycell{\NO} & rdma & 8000 & 20 & \SI{0}{\nano\second} & $t_{subm}$ & \SI{32}{\byte}
\Bstrut\\
\Tstrut
\autoref{fig:oneway_unsignaled_rdma} & \acrshort{uc} & busy & busy & \OK & \graycell{\OK} & rdma & 8000 & 20 & \SI{0}{\nano\second} & $t_{subm}$ & \SI{32}{\byte}
\Bstrut\\
\hlineB{3}
\end{tabular}
\newpage
\end{table}

View File

@@ -0,0 +1,73 @@
\begin{longtable}{ p{3.0cm} | p{11cm} }
\caption{Explanation of abbreviations from \autoref{fig:iba_packet_format}. More details on the content of the different packets can be found in the \gls{iba} specification~\cite{infinibandvol1}.}\label{tab:packet_abbreviations}
\vspace{3mm}
\endfirsthead
\endhead
\hlineB{3}
\Tstrut
\textbf{Abbreviation} & \textbf{Description}\\
\hhline{=|=}
\Tstrut
\acrshort{lrh} & \textit{\acrlong{lrh}}: detailed information on this header is provided in a separate paragraph below.
\Bstrut\\
\hline
\Tstrut
\acrshort{grh} & \textit{\acrlong{grh}}: detailed information on this header is provided in a separate paragraph below.
\Bstrut\\
\hline
\Tstrut
\acrshort{bth} & \textit{\acrlong{bth}}: Every packet in the \gls{iba} contains this header. It contains fields for the \gls{iba} transport and holds, i.a., the packet type, the destination queue pair number, and the packet sequence number.
\Tstrut\Bstrut\\
\hline
\Tstrut
\acrshortpl{eth} & \textit{\acrlongpl{eth}}: These headers are optional and are used if applicable, based on the packet type in the \acrshort{bth}\@. All following headers which end with \textit{ETH} are extended transport headers.
\Bstrut\\
\hline
\Tstrut
\acrshort{rdeth} & \textit{\acrlong{rdeth}}: This header contains the end-to-end context, used with the reliable datagram service type.
\Bstrut\\
\hline
\Tstrut
\acrshort{deth} & \textit{\acrlong{deth}}: This header contains the queue key and the source queue pair number for datagram transfers.
\Bstrut\\
\hline
\Tstrut
\acrshort{reth} & \textit{\acrlong{reth}}: This header contains the virtual address, remote key, and DMA length for an \gls{rdma} operation.
\Bstrut\\
\hline
\Tstrut
\acrshort{atomiceth} & \textit{\acrlong{atomiceth}}: This header is used for atomic operations and is similar to the \acrshort{reth}\@. Instead of a length field, it contains a swap (or add) field and a compare data field.
\Bstrut\\
\hline
\Tstrut
\acrshort{aeth} & \textit{\acrlong{aeth}}: This header serves as acknowledge field in \gls{rdma} read response first, \gls{rdma} read response last, \gls{rdma} read response only, and acknowledge packets.
\Bstrut\\
\hline
\Tstrut
\acrshort{atomicacketh} & \textit{\acrlong{atomicacketh}}: This header is similar to the \acrshort{aeth}, but for atomic acknowledgments. It only contains the original remote data.
\Bstrut\\
\hline
\Tstrut
\acrshort{imm} & \textit{\acrlong{imm}}: This optional block can be used to add \SI{32}{\bit} of custom data to \textit{send} or \textit{\gls{rdma} write} packets. The containing \SI{32}{\bit} of data will be visible in the receive completion queue element.
\Bstrut\\
\hline
\Tstrut
\acrshort{ieth} & \textit{\acrlong{ieth}}: This header contains a remote key which will be used to invalidate a remote memory region.
\Tstrut\Bstrut\\
\hline
\Tstrut
Payload & \textit{Payload}: This is the actual payload to be sent. This field will be as big as the \gls{mtu} of the network.
\Bstrut\\
\hline
\Tstrut
\acrshort{icrc} & \textit{\acrlong{icrc}}: This is the redundancy check for blocks that do not change during their transmission from source to destination.
\newline
The \gls{crc} which is used is the same as in the Ethernet standard: the CRC-32 with the polynomial \texttt{0x04C11DB7}.
\Bstrut\\
\hline
\Tstrut
\acrshort{vcrc} & \textit{\acrlong{vcrc}}: This is the redundancy check for blocks that do change during their transmissing from source to destination.
\Tstrut\\
\hlineB{3}
\end{longtable}

32
tables/qp_performance.tex Normal file
View File

@@ -0,0 +1,32 @@
\begin{table}[ht!]
\centering
\caption{$\tilde{t}_{lat}$ [\SI{}{\micro\second}] as reported by \texttt{ib\_send\_lat} for different service types and queue pair types with a varying message size. All communication was sent with the normal \textit{send} operation. Every test contained 1000 iterations and messages that were smaller than \SI{188}{\byte} were sent inline.}\label{tab:qp_performance}
\vspace{3mm}
\begin{tabular}{l r | c c c c c c}
\hlineB{3}
\Tstrut
\textbf{service type} & \textbf{\gls{qp} type} & \multicolumn{6}{c}{\textbf{message size}}
\Bstrut
\\
\Tstrut
& & \SI{32}{\byte} & \SI{128}{\byte} & \SI{512}{\byte} & \SI{2}{\kilo\byte} & \SI{8}{\kilo\byte} & \SI{32}{\kilo\byte} \\
\hhline{==|======}
\Tstrut
\multirow{2}{*}{\gls{rc}} & regular & 0.77 & 0.86 & 1.31 & 1.68 & 2.60 & 4.56
\Bstrut
\\
\Tstrut
& \gls{rdma} \gls{cm} & 0.81 & 0.90 & 1.33 & 1.73 & 2.57 & 4.57
\Bstrut
\\ \hline
\Tstrut
\multirow{2}{*}{\gls{ud}} & regular & 0.80 & 0.84 & 1.26 & 1.74 & \NO & \NO
\Bstrut
\\
\Tstrut
& \gls{rdma} \gls{cm} & \redcell{0.86} & 0.90 & 1.31 & 1.71 & \NO & \NO
\Bstrut
\\
\hlineB{3}
\end{tabular}
\end{table}

View File

@@ -0,0 +1,41 @@
\begin{table}[ht!]
\centering
\caption{Required Communication Management messages, used for all service types except \acrfull{ud}.}
\label{tab:required_cm_messages}
\vspace{3mm}
\begin{tabular}{p{2.6cm} | p{11.4cm}}
\hlineB{3}
\Tstrut
\textbf{CM message} & \textbf{Description}\\
\hhline{=|=}
\Tstrut
\acrshort{req} & A \textit{\acrlong{req}} is used to initiate the communication establishment sequence. The node that sends this message provides the remote host with its queue pair number and its \gls{gid} and/or \gls{lid}.
\Bstrut\\
\hline
\Tstrut
\acrshort{mra} & A \textit{\acrlong{mra}} is used as a response to a \acrshort{req}, a LAP (Load Alternate Path, an optional communication message), or a \acrshort{rep}. It is used if the node which receives one of the formerly mentioned messages does not expect to be able to respond within the specified time-out. With this mechanism, unnecessary time-outs are prevented.
\Bstrut\\
\hline
\Tstrut
\acrshort{rej} & By replying with \textit{\acrlong{rej}} to a \acrshort{req}, a node indicates that it will not continue with the communication establishment sequence. The reason can be found, i.a., in the data field of the \acrshort{rej}.
\Bstrut\\
\hline
\Tstrut
\acrshort{rep} & A node sends a \textit{\acrlong{rep}} if it wants to accept a previously received \acrshort{rep} and all its parameters.
\Bstrut\\
\hline
\Tstrut
\acrshort{rtu} & A node replies with \textit{\acrlong{rtu}} after it received a \acrshort{rep} to indicate that transmission can be started.
\Bstrut\\
\hline
\Tstrut
\acrshort{dreq} & A \textit{\acrlong{dreq}} is sent if a node wants to disconnect a queue pair.
\Bstrut\\
\hline
\Tstrut
\acrshort{drep} & A \textit{\acrlong{drep}} is used to acknowledge that a \acrshort{dreq} is received.
\Bstrut\\
\hlineB{3}
\end{tabular}
\end{table}

40
tables/service_types.tex Normal file
View File

@@ -0,0 +1,40 @@
\begin{table}
\centering
\caption{InfiniBand Architecture's service types.}
\label{tab:service_types}
\vspace{3mm}
\begin{tabular}{ p{5.2cm} | p{9.1cm}}
\hlineB{3}
\Tstrut
\textbf{Service Type} & \textbf{Description}\\
\hhline{=|=}
\Tstrut
\acrfull{rc} & In this mode, one \acrshort{qp} on a local node is connected to one \acrshort{qp} on a remote node. This service type ensures message delivery to---thus not consumption by---the remote node. Messages are sent in order and a combination of hardware and channel adapter software resends at communication failure.
\Bstrut\\
\hline
\Tstrut
\acrfull{uc} & Like \gls{rc}, this service type connects one local \acrshort{qp} with one remote \acrshort{qp}\@. It is unreliable and thus does not support acknowledgment of delivery and simply drops undelivered messages.
\Bstrut\\
\hline
\Tstrut
\acrfull{ud} & This service type allows a local \acrshort{qp} to communicate with any other unreliable datagram \acrshort{qp} without connecting to it. Like \gls{uc}, this mode is unreliable and thus simply drops packets if they get lost.\newline
The ability to send data to another \acrshort{qp} without connecting to it is beneficial for scalability.
\Bstrut\\
\hline
\Tstrut
\acrfull{rd} & Reliable datagram enables a local \acrshort{qp} to communicate with any other \gls{rd} \acrshort{qp} without connecting to it. Contrary to \gls{ud}, this service type is reliable and thus tries to resend messages when they get lost.\newline
Since reliable datagram is not implemented in the \acrshort{ofed} stack (\autoref{sec:iblibs}), hence not practically usable, it will not be further discussed in the present work.
\Bstrut\\
\hline
\Tstrut
Raw Datagram & This allows a \acrshort{qp} to send raw datagram messages, which means that \gls{iba} specific headers are stripped from the packets. This service type can be divided into IPv6 raw datagram and EtherType raw datagram.\newline
This service type will not be further discussed in the present work.
\Bstrut\\
\hlineB{3}
\end{tabular}
\newpage
\end{table}

View File

@@ -0,0 +1,26 @@
\begin{table}[ht!]
\centering
\caption{Comparison of the performance of timer functions. All tests were performed with a rate of \SI{100}{\kilo\hertz}, with 10 64-bit floating-point numbers per sample, \gls{rc} as service type, and with the InfiniBand node-type as node-type under test. Every test contains \SI{250000}{} samples.}
\vspace{3mm}
\label{tab:timer_comparison}
\begin{tabular}{c | l l}
\hlineB{3}
\Tstrut
\textbf{optimized environment} & \multicolumn{2}{c}{\textbf{missed samples}}
\Bstrut
\\
\Tstrut
& \multicolumn{1}{c}{\textbf{\texttt{timerfd}}} & \multicolumn{1}{c}{\textbf{\gls{tsc}}} \\
\hhline{=|==}
\Tstrut
\NO & $\frac{12085}{250000}\cdot\SI{100}{\percent} = \SI{4.83}{\percent}$ & $\frac{3035}{250000}\cdot\SI{100}{\percent} = \SI{1.21}{\percent}$
\Bstrutlarger
\\
\Tstrut
\OK & $\frac{1692}{250000}\cdot\SI{100}{\percent} = \SI{0.68}{\percent}$ & \greencell{$\frac{1244}{250000}\cdot\SI{100}{\percent} = \SI{0.50}{\percent}$}
\Bstrutlarger
\\
\hlineB{3}
\end{tabular}
\end{table}

View File

@@ -0,0 +1,41 @@
\begin{table}[ht!]
\centering
\caption{Supported operations with various service types. Although \acrfull{rd} theoretically supports all operations, it is not supported by the \gls{ofed} stack.}
\vspace{3mm}
\label{tab:transport_modes}
\begin{tabular}{r | c c c c}
\hlineB{3}
\Tstrut
\textbf{Operation} & \textbf{UD} & \textbf{UC} & \textbf{RC} & \textbf{RD} \\
\hhline{=|====}
\Tstrut
send & \OK & \OK & \OK &
\Bstrut
\\
\Tstrut
send with immediate & \OK & \OK & \OK &
\Bstrut
\\
\Tstrut
\gls{rdma} write & & \OK & \OK &
\Bstrut
\\
\Tstrut
\gls{rdma} write with immediate & & \OK & \OK &
\Bstrut
\\
\Tstrut
\gls{rdma} read & & & \OK &
\Bstrut
\\
\Tstrut
atomic compare \& swap & & & \OK &
\Bstrut
\\
\Tstrut
atomic fetch \& add & & & \OK &
\Bstrut\\
\hlineB{3}
\end{tabular}
\end{table}

View File

@@ -0,0 +1,93 @@
\begin{table}[ht!]
\centering
\caption[Interfaces supported by VILLASnode as of June 2018.]{Interfaces supported by VILLASnode as of June 2018\footnotemark.}
\label{tab:villasnode_nodes}
\vspace{3mm}
\begin{tabular}{l | l | p{8.5cm}}
\hlineB{3}
\Tstrut
\textbf{Section} & \textbf{Node Name} & \textbf{Description} \\
\hhline{=|=|=}
\Tstrut
\multirow{10}{*}{\makecell[l]{internal\\communication}} & \textit{file } & support for file log/replay
\Bstrut\\
\hhline{~|-|-}
\Tstrut
& \textit{shmem } & \acrshort{posix} shared memory interface with external processes
\Bstrut\\
\hhline{~|-|-}
\Tstrut
& \textit{loopback } & internal loopback using a queued \acrshort{fifo} buffer
\Bstrut\\
\hhline{~|-|-}
\Tstrut
& \textit{signal } & configurable signal generator for testing purposes
\Bstrut\\
\hhline{~|-|-}
\Tstrut
& \textit{stats } & send communication statistics to other nodes
\Bstrut\\
\hhline{~|-|-}
\Tstrut
& \textit{test\undershort rtt} & measurement of round-trip time, packet loss, and sending rates
\Bstrut\\
\hline
\Tstrut
\multirow{10}{*}{\makecell[l]{server-server\\communication}} & \textit{socket } & BSD network sockets for Packet, IP, or \acrshort{udp} layer
\Bstrut\\
\hhline{~|-|-}
\Tstrut
& \textit{zeromq } & ZeroMQ publish/subscribe messaging
\Bstrut\\
\hhline{~|-|-}
\Tstrut
& \textit{influxdb } & InfluxDB time-series database
\Bstrut\\
\hhline{~|-|-}
\Tstrut
& \textit{nanomsg } & nanomsg publish/subscribe messaging
\Bstrut\\
\hhline{~|-|-}
\Tstrut
& \textit{amqp } & Advanced Message Queuing Protocol
\Bstrut\\
\hhline{~|-|-}
\Tstrut
& \textit{mqtt } & Message Queuing Telemetry Transport
\Bstrut\\
\hhline{~|-|-}
\Tstrut
& \textit{ngsi } & OMA Next Generation Services Interface 10
\Bstrut\\
\hhline{~|-|-}
\Tstrut
& \textit{websocket} & send and receive samples of a WebSocket connection
\Bstrut\\
\hline
\Tstrut
\multirow{6}{*}{\makecell[l]{simulator-server\\communication}} & \textit{opal } & OPAL-RT asynchronous processes
\Bstrut\\
\hhline{~|-|-}
\Tstrut
& \textit{fpga } & VILLASfpga \acrshort{pcie} card
\Bstrut\\
\hhline{~|-|-}
\Tstrut
& \textit{comedi } & interface to Comedia devices
\Bstrut\\
\hhline{~|-|-}
\Tstrut
& \textit{gtwif } & RTDS GTWIF workstation interface
\Bstrut\\
\hhline{~|-|-}
\Tstrut
& \textit{iec61850-9-2} & IEC 61850-9-2 Samples Values
\Bstrut\\
\hhline{~|-|-}
\Tstrut
& \textit{iec61850-8-1} & IEC 61850-8-1 GOOSE Telegrams
\Bstrut\\
\hlineB{3}
\end{tabular}
\end{table}