Initial commit of master's thesis
This is the version I submitted to RWTH Aachen University at November 9, 2018.
This commit is contained in:
47
tables/benchmark_testsystem.tex
Normal file
47
tables/benchmark_testsystem.tex
Normal file
@@ -0,0 +1,47 @@
|
||||
\begin{table}[ht!]
|
||||
\footnotesize
|
||||
\centering
|
||||
\caption{Dell PowerEdge T630 test system for benchmarks.}
|
||||
\vspace{3mm}
|
||||
|
||||
\label{tab:benchmark_testsystem}
|
||||
\begin{tabular}{l l}
|
||||
\hlineB{3}
|
||||
\Tstrut
|
||||
\gls{cpu} & 2\times~Intel\textregistered~Xeon\textregistered~E5-2643 v4, \SI{20}{\mebi\byte} cache, \SI{3.40}{\giga\hertz} base frequency
|
||||
\Bstrut\\
|
||||
\Tstrut
|
||||
Chipset & Intel\textregistered~C610
|
||||
\Bstrut\\
|
||||
\Tstrut
|
||||
\acrshort{ram} & \SI{32}{\giga\byte}, DDR-4 \SI{2400}{\mega\hertz}, ECC buffered
|
||||
\Bstrut\\
|
||||
\Tstrut
|
||||
Motherboard & Dell PowerEdge T630 System Board NT78X
|
||||
\Bstrut\\
|
||||
\Tstrut
|
||||
Storage & Intel \textregistered~SSD DC P3700 Series, \gls{pcie} (Gen 2) \times8, \SI{400}{\giga\byte}
|
||||
\Bstrut\\
|
||||
\Tstrut
|
||||
\gls{hca} & 2\times~Mellanox\textregistered{} ConnectX\textregistered-4 MT27700, \gls{pcie} (Gen 3) \times16, \SI[per-mode=symbol]{100}{\giga\bit\per\second}
|
||||
\Bstrut\\
|
||||
\Tstrut
|
||||
Physical link & \SI{0.5}{\meter} Mellanox\textregistered{} MCP100-E00A Passive copper Cable, \SI[per-mode=symbol]{100}{\giga\bit\per\second}
|
||||
\Bstrut\\
|
||||
\Tstrut
|
||||
\gls{os} & Fedora 27 @ Linux kernel 4.13.9-200
|
||||
\Bstrut\\
|
||||
\Tstrut
|
||||
\gls{ofed} & MLNX OFED Linux 4.4-2.0.7.0
|
||||
\Bstrut\\
|
||||
\Tstrut
|
||||
VILLASnode & Compiled version on commit 0819207c55ef06c7b98ddfe98637eb2b5e1e5d0b
|
||||
\Tstrut\Bstrut\\
|
||||
\hlineB{3}
|
||||
\end{tabular}
|
||||
\end{table}
|
||||
%\footnotetext{\url{https://www.dell.com/en-us/work/shop/servers-storage-and-networking/sf/poweredge-tower-servers}}
|
||||
%\footnotetext{\url{https://ark.intel.com/products/92989/Intel-Xeon-Processor-E5-2643-v4-20M-Cache-3_40-GHz}}
|
||||
%\footnotetext{\url{https://www.intel.com/content/dam/www/public/us/en/documents/specification-updates/x99-chipset-pch-spec-update.pdf}}
|
||||
%\footnotetext{\url{https://www.intel.com/content/www/us/en/products/memory-storage/solid-state-drives/data-center-ssds/dc-p3700-series.html}}
|
||||
%\footnotetext{\url{http://www.mellanox.com/page/products_dyn?product_family=201&mtag=connectx_4_vpi_card}}
|
21
tables/conditionally_required_cm_messages.tex
Normal file
21
tables/conditionally_required_cm_messages.tex
Normal file
@@ -0,0 +1,21 @@
|
||||
\begin{table}[ht!]
|
||||
\centering
|
||||
\caption{Conditionally required Communication Management messages, used to acquire \acrfull{ud} addressing information.}
|
||||
\label{tab:conditionally_required_cm_messages}
|
||||
\vspace{3mm}
|
||||
|
||||
\begin{tabular}{p{2.6cm} | p{11.4cm}}
|
||||
\hlineB{3}
|
||||
\Tstrut
|
||||
\textbf{CM message} & \textbf{Description}\\
|
||||
\hhline{=|=}
|
||||
\Tstrut
|
||||
\acrshort{sidrreq} & The \textit{\acrlong{sidrreq}} is used to request \gls{ud} addressing information from a remote node for a certain service ID.
|
||||
\Bstrut\\
|
||||
\hline
|
||||
\Tstrut
|
||||
\acrshort{sidrrep} & The \textit{\acrlong{sidrrep}} is a response to the \acrshort{sidrreq} and contains all information to communicate with the entity that was specified as service ID in the request message.
|
||||
\Bstrut\\
|
||||
\hlineB{3}
|
||||
\end{tabular}
|
||||
\end{table}
|
70
tables/correlation_benchmarks.tex
Normal file
70
tables/correlation_benchmarks.tex
Normal file
@@ -0,0 +1,70 @@
|
||||
\begin{table}[ht!]
|
||||
\centering
|
||||
\caption{$\min t_{lat}$, $\tilde{t}_{lat}$, and $\max t_{lat}$ measured with \texttt{ib\_send\_lat}. All communication went over an \gls{rc} \gls{rdma} \gls{cm} \gls{qp} and was sent with the normal \textit{send} operation. Every test contained 1000 iterations and messages that were smaller than \SI{188}{\byte} were sent inline.}\label{tab:correlation_benchmarks}
|
||||
\vspace{3mm}
|
||||
\begin{tabular}{r | c c c c}
|
||||
\hlineB{3}
|
||||
\Tstrut
|
||||
& \multicolumn{3}{c}{round-trip benchmark} & one-way benchmark
|
||||
\Bstrut
|
||||
\\
|
||||
\Tstrut
|
||||
message size [\SI{}{\byte}] & $\min{t_{lat}}$ [\SI{}{\micro\second}] & $\tilde{t}_{lat}$ [\SI{}{\micro\second}] & $\max{t_{lat}}$ [\SI{}{\micro\second}] & $\tilde{t}_{lat}$ [\SI{}{\micro\second}]
|
||||
\Bstrut
|
||||
\\
|
||||
\hhline{=|====}
|
||||
\Tstrut
|
||||
\SI{8}{} & 0.80 & 0.83 & 2.37 & 0.94
|
||||
\Bstrut
|
||||
\\
|
||||
\Tstrut
|
||||
\SI{16}{} & 0.79 & 0.83 & 4.06 & 0.91
|
||||
\Bstrut
|
||||
\\
|
||||
\Tstrut
|
||||
\SI{32}{} & 0.79 & 0.82 & 4.29 & 0.91
|
||||
\Bstrut
|
||||
\\
|
||||
\Tstrut
|
||||
\SI{64}{} & 0.82 & 0.86 & 2.20 & 1.00
|
||||
\Bstrut
|
||||
\\
|
||||
\Tstrut
|
||||
\SI{128}{} & 0.86 & 0.90 & 1.73 & 1.01
|
||||
\Bstrut
|
||||
\\
|
||||
\Tstrut
|
||||
\SI{256}{} & 1.24 & 1.30 & 2.05 & 1.36
|
||||
\Bstrut
|
||||
\\
|
||||
\Tstrut
|
||||
\SI{512}{} & 1.31 & 1.35 & 2.56 & 1.42
|
||||
\Bstrut
|
||||
\\
|
||||
\Tstrut
|
||||
\SI{1024}{} & 1.45 & 1.49 & 2.78 & 1.63
|
||||
\Bstrut
|
||||
\\
|
||||
\Tstrut
|
||||
\SI{2048}{} & 1.71 & 1.75 & 2.87 & 2.29
|
||||
\Bstrut
|
||||
\\
|
||||
\Tstrut
|
||||
\SI{4096}{} & 2.22 & 2.27 & 2.87 & 2.93
|
||||
\Bstrut
|
||||
\\
|
||||
\Tstrut
|
||||
\SI{8192}{} & 2.55 & 2.61 & 3.97 & 3.62
|
||||
\Bstrut
|
||||
\\
|
||||
\Tstrut
|
||||
\SI{16384}{} & 3.14 & 3.21 & 4.54 & 4.58
|
||||
\Bstrut
|
||||
\\
|
||||
\Tstrut
|
||||
\SI{32768}{} & 4.48 & 4.59 & 5.78 & 5.96
|
||||
\Bstrut
|
||||
\\
|
||||
\hlineB{3}
|
||||
\end{tabular}
|
||||
\end{table}
|
48
tables/infiniband_node_components.tex
Normal file
48
tables/infiniband_node_components.tex
Normal file
@@ -0,0 +1,48 @@
|
||||
\begin{table}[ht!]
|
||||
\centering
|
||||
\caption{\textit{InfiniBand} node-type components from \autoref{fig:villasnode_implementation} and the respective sections of the present work that elaborate upon these components.}
|
||||
\label{tab:infiniband_node_components}
|
||||
\vspace{3mm}
|
||||
|
||||
\begin{tabular}{ l | c c c}
|
||||
\hlineB{3}
|
||||
\Tstrut
|
||||
\textbf{Component} & \textbf{\nameref{chap:basics}} & \textbf{\nameref{chap:architecture}} & \textbf{\nameref{chap:implementation}}\\
|
||||
\hhline{=|===}
|
||||
\Tstrut
|
||||
\gls{hca} & \autoref{sec:iba} & &
|
||||
\Bstrut\\
|
||||
\Tstrut
|
||||
Queue pair & \autoref{sec:qp} & &
|
||||
\Bstrut\\
|
||||
\Tstrut
|
||||
Protection domain & \autoref{sec:memory} & &
|
||||
\Bstrut\\
|
||||
\Tstrut
|
||||
Event channels & \autoref{sec:eventchannels} & &
|
||||
\Bstrut\\
|
||||
\Tstrut
|
||||
Communication identifier & \autoref{sec:rdmacm} & &
|
||||
\Bstrut\\
|
||||
\Tstrut
|
||||
Buffers & \autoref{sec:memory} & \autoref{sec:memorymanagement} &
|
||||
\Bstrut\\
|
||||
\Tstrut
|
||||
VILLASnode & & \autoref{sec:villasbasics} &
|
||||
\Bstrut\\
|
||||
\Tstrut
|
||||
Read-function & & \autoref{sec:readwrite_interfaces} & \autoref{sec:villas_read}
|
||||
\Bstrut\\
|
||||
\Tstrut
|
||||
Write-function & & \autoref{sec:readwrite_interfaces} & \autoref{sec:villas_write}
|
||||
\Bstrut\\
|
||||
\Tstrut
|
||||
Start-function & & & \autoref{sec:villas_start}
|
||||
\Bstrut\\
|
||||
\Tstrut
|
||||
Management thread & & & \autoref{sec:comm_management}
|
||||
\Bstrut\\
|
||||
\hlineB{3}
|
||||
\end{tabular}
|
||||
\vspace{-1cm}
|
||||
\end{table}
|
52
tables/missed_steps_nanomsg_zeromq.tex
Normal file
52
tables/missed_steps_nanomsg_zeromq.tex
Normal file
@@ -0,0 +1,52 @@
|
||||
\begin{table}[ht!]
|
||||
\centering
|
||||
\vspace{3.5cm}
|
||||
\caption{The percentage of missed steps in the in and out files that were generated by the VILLASnode node-type benchmark for the \textit{nanomsg} and \textit{zeromq} node-type. Although a considerable number of samples never got transmitted, especially for high rates, no samples were dropped after the first sequence number appeared in the out files.}\label{tab:missed_steps_nanomsg_zeromq}
|
||||
\vspace{3mm}
|
||||
\begin{tabular}{r r | c c c c}
|
||||
\hlineB{3}
|
||||
\Tstrut
|
||||
\textbf{rate [Hz]} & \textbf{file} & \multicolumn{4}{c}{\textbf{Missed steps [\%]}}
|
||||
\Bstrut
|
||||
\\
|
||||
\Tstrut
|
||||
& & \textit{nanomsg} & \textit{nanomsg (lo)} & \textit{zeromq} & \textit{zeromq (lo)} \\
|
||||
\hhline{==|====}
|
||||
\Tstrut
|
||||
\multirow{2}{*}{\SI{100}} & in & 8.03 & 8.03 & 8.03 & 8.03
|
||||
\\
|
||||
& out & 8.04 & 8.03 & 8.04 & 8.04\Bstrut
|
||||
\\
|
||||
\Tstrut
|
||||
\multirow{2}{*}{\SI{2500}} & in & 3.72 & 3.71 & 3.72 & 3.71
|
||||
\\
|
||||
& out & 3.80 & 3.71 & 3.78 & 3.78\Bstrut
|
||||
\\
|
||||
\Tstrut
|
||||
\multirow{2}{*}{\SI{5000}} & in & 0.03 & 0.03 & 0.03 & 0.04
|
||||
\\
|
||||
& out & 0.20 & 0.03 & 0.15 & 0.18\Bstrut
|
||||
\\
|
||||
\Tstrut
|
||||
\multirow{2}{*}{\SI{10000}} & in & 0.04 & 0.05 & 0.04 & 0.07
|
||||
\\
|
||||
& out & 0.36 & 0.05 & 0.27 & 0.34\Bstrut
|
||||
\\
|
||||
\Tstrut
|
||||
\multirow{2}{*}{\SI{25000}} & in & 0.08 & 0.08 & 0.11 & 0.11
|
||||
\\
|
||||
& out & 0.90 & 0.08 & 0.70 & 0.76\Bstrut
|
||||
\\
|
||||
\Tstrut
|
||||
\multirow{2}{*}{\SI{50000}} & in & 0.17 & 0.17 & 0.24 & 0.22
|
||||
\\
|
||||
& out & 1.75 & 0.17 & 1.42 & 1.58\Bstrut
|
||||
\\
|
||||
\Tstrut
|
||||
\multirow{2}{*}{\SI{100000}} & in & 0.54 & 0.99 & 0.45 & 0.61
|
||||
\\
|
||||
& out & 3.91 & 1.00 & 2.68 & 3.33\Bstrut
|
||||
\\
|
||||
\hlineB{3}
|
||||
\end{tabular}
|
||||
\end{table}
|
44
tables/mtu_performance.tex
Normal file
44
tables/mtu_performance.tex
Normal file
@@ -0,0 +1,44 @@
|
||||
\begin{table}[ht!]
|
||||
\centering
|
||||
\caption{$\tilde{t}_{lat}$ [\SI{}{\micro\second}] as reported by \texttt{ib\_send\_lat} for different service types and message sizes with a varying \gls{mtu}. All communication went over an \gls{rdma} \gls{cm} \gls{qp} and was sent with the normal \textit{send} operation. Every test contained 1000 iterations and messages that were smaller than \SI{188}{\byte} were sent inline.}\label{tab:mtu_performance}
|
||||
\vspace{3mm}
|
||||
\begin{tabular}{l r | c c c c c}
|
||||
\hlineB{3}
|
||||
\Tstrut
|
||||
\textbf{service type} & \textbf{size} & \multicolumn{5}{c}{\textbf{\gls{mtu}}}
|
||||
\Bstrut
|
||||
\\
|
||||
\Tstrut
|
||||
& & \SI{256}{\byte} & \SI{512}{\byte} & \SI{1024}{\byte} & \SI{2048}{\byte} & \SI{4096}{\byte} \\
|
||||
\hhline{==|=====}
|
||||
\Tstrut
|
||||
\multirow{4}{*}{\gls{rc}} & \SI{32}{\byte} & 0.83 & 0.81 & 0.82 & 0.82 & 0.83
|
||||
\Bstrut
|
||||
\\
|
||||
\Tstrut
|
||||
& \SI{1}{\kilo\byte} & 1.58 & 1.58 & 1.52 & 1.60 & 1.61
|
||||
\Bstrut
|
||||
\\
|
||||
\Tstrut
|
||||
& \SI{4}{\kilo\byte} & 2.26 & 2.25 & 2.25 & 2.27 & 2.28
|
||||
\Bstrut
|
||||
\\
|
||||
\Tstrut
|
||||
& \SI{32}{\kilo\byte} & 4.56 & 4.58 & 4.57 & 4.58 & 4.57
|
||||
\Bstrut
|
||||
\\ \hline
|
||||
\Tstrut
|
||||
\multirow{3}{*}{\gls{ud}} & \SI{32}{\byte} & 0.86 & 0.87 & 0.86 & 0.87 & 0.86
|
||||
\Bstrut
|
||||
\\
|
||||
\Tstrut
|
||||
& \SI{1}{\kilo\byte} & \NO & \NO & 1.44 & \redcell{1.54} & 1.45
|
||||
\Bstrut
|
||||
\\
|
||||
\Tstrut
|
||||
& \SI{4}{\kilo\byte} & \NO & \NO & \NO & \NO & 2.22
|
||||
\Bstrut
|
||||
\\
|
||||
\hlineB{3}
|
||||
\end{tabular}
|
||||
\end{table}
|
37
tables/oneway_settings_busy.tex
Normal file
37
tables/oneway_settings_busy.tex
Normal file
@@ -0,0 +1,37 @@
|
||||
\begin{table}[ht!]
|
||||
\centering
|
||||
\caption{The benchmark's settings which were used to analyze the latency of messages sent whilst both the sending and receiving node were busy polling.}
|
||||
\label{tab:oneway_settings_busy}
|
||||
\vspace{3mm}
|
||||
|
||||
\footnotesize
|
||||
\begin{tabular}{!{\hspace{-2pt}}c|!{\hspace{-8pt}}c!{\hspace{-4pt}}c!{\hspace{-4pt}}c!{\hspace{-4pt}}c!{\hspace{-6pt}}c!{\hspace{-2pt}}ccc!{\hspace{-3pt}}ccc!{\hspace{-5pt}}}
|
||||
\multicolumn{1}{c}{} & \rot{service type} & \rot{polling (send)} & \rot{polling (recv)} & \rot{inline mode} & \rot{unsignaled}& \rot{operation} & \rot{burst size} & \rot{repetitions} & \rot{loop pauses} & \rot{timestamp} & \rotright{message size}\\
|
||||
\hlineB{3}
|
||||
\Tstrut
|
||||
\autoref{fig:oneway_busy_a} & \graycellleftline{\acrshort{rc}} & busy & busy & \NO & \enspace\NO & send & 8000 & \graycell{20} & \graycell{\SI{0}{\nano\second}} & $t_{subm}$ & \SI{32}{\byte}
|
||||
\Bstrut\\
|
||||
|
||||
\Tstrut
|
||||
\autoref{fig:oneway_busy_b} & \graycellleftline{\acrshort{uc}} & busy & busy & \NO & \enspace\NO & send & 8000 & \graycell{20} & \graycell{\SI{0}{\nano\second}} & $t_{subm}$ & \SI{32}{\byte}
|
||||
\Bstrut\\
|
||||
|
||||
\Tstrut
|
||||
\autoref{fig:oneway_busy_c} & \graycellleftline{\acrshort{ud}} & busy & busy & \NO & \enspace\NO & send & 8000 & \graycell{20} & \graycell{\SI{0}{\nano\second}} & $t_{subm}$ & \SI{32}{\byte}
|
||||
\Bstrut\\
|
||||
|
||||
\Tstrut
|
||||
\autoref{fig:oneway_busy_d} & \graycellleftline{\acrshort{rc}} & busy & busy & \NO & \enspace\NO & send & 8000 & \graycell{1} & \graycell{\SI[scientific-notation=true]{1e9}{\nano\second}} & $t_{subm}$ & \SI{32}{\byte}
|
||||
\Bstrut\\
|
||||
|
||||
\Tstrut
|
||||
\autoref{fig:oneway_busy_e} & \graycellleftline{\acrshort{uc}} & busy & busy & \NO & \enspace\NO & send & 8000 & \graycell{1} & \graycell{\SI[scientific-notation=true]{1e9}{\nano\second}} & $t_{subm}$ & \SI{32}{\byte}
|
||||
\Bstrut\\
|
||||
|
||||
\Tstrut
|
||||
\autoref{fig:oneway_busy_f} & \graycellleftline{\acrshort{ud}} & busy & busy & \NO & \enspace\NO & send & 8000 & \graycell{1} & \graycell{\SI[scientific-notation=true]{1e9}{\nano\second}} & $t_{subm}$ & \SI{32}{\byte}
|
||||
\Bstrut\\
|
||||
\hlineB{3}
|
||||
\end{tabular}
|
||||
\newpage
|
||||
\end{table}
|
37
tables/oneway_settings_event.tex
Normal file
37
tables/oneway_settings_event.tex
Normal file
@@ -0,0 +1,37 @@
|
||||
\begin{table}[ht!]
|
||||
\centering
|
||||
\caption{The benchmark's settings which were used to analyze the latency of messages sent whilst both the sending and receiving node were waiting for an event.}
|
||||
\footnotesize
|
||||
\label{tab:oneway_settings_event}
|
||||
\vspace{3mm}
|
||||
|
||||
\begin{tabular}{!{\hspace{-2pt}}c|!{\hspace{-8pt}}c!{\hspace{-4pt}}c!{\hspace{-4pt}}c!{\hspace{-4pt}}c!{\hspace{-6pt}}c!{\hspace{-2pt}}ccc!{\hspace{-3pt}}ccc!{\hspace{-5pt}}}
|
||||
\multicolumn{1}{c}{} & \rot{service type} & \rot{polling (send)} & \rot{polling (recv)} & \rot{inline mode} & \rot{unsignaled}& \rot{operation} & \rot{burst size} & \rot{repetitions} & \rot{loop pauses} & \rot{timestamp} & \rotright{message size}\\
|
||||
\hlineB{3}
|
||||
\Tstrut
|
||||
\autoref{fig:oneway_event_a} & \graycellleftline{\acrshort{rc}} & event & event & \NO & \enspace\NO & send & 8000 & \graycell{20} & \graycell{\SI{0}{\nano\second}} & $t_{subm}$ & \SI{32}{\byte}
|
||||
\Bstrut\\
|
||||
|
||||
\Tstrut
|
||||
\autoref{fig:oneway_event_b} & \graycellleftline{\acrshort{uc}} & event & event & \NO & \enspace\NO & send & 8000 & \graycell{20} & \graycell{\SI{0}{\nano\second}} & $t_{subm}$ & \SI{32}{\byte}
|
||||
\Bstrut\\
|
||||
|
||||
\Tstrut
|
||||
\autoref{fig:oneway_event_c} & \graycellleftline{\acrshort{ud}} & event & event & \NO & \enspace\NO & send & 8000 & \graycell{20} & \graycell{\SI{0}{\nano\second}} & $t_{subm}$ & \SI{32}{\byte}
|
||||
\Bstrut\\
|
||||
|
||||
\Tstrut
|
||||
\autoref{fig:oneway_event_d} & \graycellleftline{\acrshort{rc}} & event & event & \NO & \enspace\NO & send & 8000 & \graycell{1} & \graycell{\SI[scientific-notation=true]{1e9}{\nano\second}} & $t_{subm}$ & \SI{32}{\byte}
|
||||
\Bstrut\\
|
||||
|
||||
\Tstrut
|
||||
\autoref{fig:oneway_event_e} & \graycellleftline{\acrshort{uc}} & event & event & \NO & \enspace\NO & send & 8000 & \graycell{1} & \graycell{\SI[scientific-notation=true]{1e9}{\nano\second}} & $t_{subm}$ & \SI{32}{\byte}
|
||||
\Bstrut\\
|
||||
|
||||
\Tstrut
|
||||
\autoref{fig:oneway_event_f} & \graycellleftline{\acrshort{ud}} & event & event & \NO & \enspace\NO & send & 8000 & \graycell{1} & \graycell{\SI[scientific-notation=true]{1e9}{\nano\second}} & $t_{subm}$ & \SI{32}{\byte}
|
||||
\Bstrut\\
|
||||
\hlineB{3}
|
||||
\end{tabular}
|
||||
\newpage
|
||||
\end{table}
|
21
tables/oneway_settings_inline.tex
Normal file
21
tables/oneway_settings_inline.tex
Normal file
@@ -0,0 +1,21 @@
|
||||
\begin{table}[ht!]
|
||||
\centering
|
||||
\caption{The benchmark's settings which were used to analyze the influence of sending messages inline on the latency.}
|
||||
\label{tab:oneway_settings_inline}
|
||||
\vspace{3mm}
|
||||
|
||||
\footnotesize
|
||||
\begin{tabular}{!{\hspace{-2pt}}c|!{\hspace{-2pt}}c!{\hspace{-4pt}}c!{\hspace{-4pt}}c!{\hspace{-4pt}}c!{\hspace{-6pt}}c!{\hspace{-2pt}}ccc!{\hspace{-3pt}}ccc!{\hspace{-5pt}}}
|
||||
\multicolumn{1}{c}{} & \rot{service type} & \rot{polling (send)} & \rot{polling (recv)} & \rot{inline mode} & \rot{unsignaled}& \rot{operation} & \rot{burst size} & \rot{repetitions} & \rot{loop pauses} & \rot{timestamp} & \rotright{message size}\\
|
||||
\hlineB{3}
|
||||
\Tstrut
|
||||
\autoref{fig:oneway_inline} & \acrshort{uc} & busy & busy & \graycell{\NO} & \enspace\NO & send & 8000 & 20 & \SI{0}{\nano\second} & $t_{subm}$ & \SI{32}{\byte}
|
||||
\Bstrut\\
|
||||
|
||||
\Tstrut
|
||||
\autoref{fig:oneway_inline} & \acrshort{uc} & busy & busy & \graycell{\OK} & \enspace\NO & send & 8000 & 20 & \SI{0}{\nano\second} & $t_{subm}$ & \SI{32}{\byte}
|
||||
\Bstrut\\
|
||||
\hlineB{3}
|
||||
\end{tabular}
|
||||
\newpage
|
||||
\end{table}
|
59
tables/oneway_settings_message_size.tex
Normal file
59
tables/oneway_settings_message_size.tex
Normal file
@@ -0,0 +1,59 @@
|
||||
\begin{table}[ht!]
|
||||
\centering
|
||||
\caption{The benchmark's settings which were used to analyze the influence of message size on the latency, with $i\in[0,12]$, $j\in[0,7]$, and $k\in[0,9]$.}
|
||||
\label{tab:oneway_settings_message_size}
|
||||
\vspace{3mm}
|
||||
|
||||
\footnotesize
|
||||
\begin{tabular}{!{\hspace{-2pt}}c|!{\hspace{-8pt}}c!{\hspace{-6pt}}c!{\hspace{-4pt}}c!{\hspace{-2pt}}c!{\hspace{-6pt}}c!{\hspace{-2pt}}ccc!{\hspace{-3pt}}ccc!{\hspace{-9pt}}}
|
||||
\multicolumn{1}{c}{} & \rot{service type} & \rot{polling (send)} & \rot{polling (recv)} & \rot{inline mode} & \rot{unsignaled}& \rot{operation} & \rot{burst size} & \rot{repetitions} & \rot{loop pauses} & \rot{timestamp} & \rotright{message size}\\
|
||||
|
||||
\hlineB{3}
|
||||
\Tstrut
|
||||
\autoref{fig:oneway_message_size_a} \rlap{\hspace{0.195em}\blacktriangle}\textbf{---} & \graycellleftline{\acrshort{rc}} & busy & busy & \graycell{\NO} & \enspace\NO & \graycell{send} & \graycell{8000} & 10 &\SI{0}{\nano\second} & $t_{subm}$ & \graycellright{$\SI[parse-numbers=false]{8\cdot2^i}{\byte}$}
|
||||
\Bstrut\\
|
||||
|
||||
\Tstrut
|
||||
\autoref{fig:oneway_message_size_a} {\color{plot_blue}\rlap{\hspace{0.195em}\blacktriangle}\textbf{---}} & \graycellleftline{\acrshort{rc}} & busy & busy & \graycell{\OK} & \enspace\NO & \graycell{send} & \graycell{$M_{inl}$\footnotemark[1]} & 10 &\SI{0}{\nano\second} & $t_{subm}$ & \graycellright{$\SI[parse-numbers=false]{8\cdot2^j}{\byte}$}
|
||||
\Bstrut\\
|
||||
|
||||
\Tstrut
|
||||
\autoref{fig:oneway_message_size_a} {\color{plot_gray}\rlap{\hspace{0.195em}\blacktriangledown}\textbf{---}} & \graycellleftline{\acrshort{rc}} & busy & busy & \graycell{\NO} & \enspace\NO & \graycell{rdma} & \graycell{8000} & 10 &\SI{0}{\nano\second} & $t_{subm}$ & \graycellright{$\SI[parse-numbers=false]{8\cdot2^i}{\byte}$}
|
||||
\Bstrut\\
|
||||
|
||||
\Tstrut
|
||||
\autoref{fig:oneway_message_size_a} {\color{plot_light_blue}\rlap{\hspace{0.195em}\blacktriangledown}\textbf{---}} & \graycellleftline{\acrshort{rc}} & busy & busy & \graycell{\OK} & \enspace\NO & \graycell{rdma} & \graycell{$M_{inl}$\footnotemark[1]} & 10 &\SI{0}{\nano\second} & $t_{subm}$ & \graycellright{$\SI[parse-numbers=false]{8\cdot2^j}{\byte}$}
|
||||
\Bstrut\\
|
||||
|
||||
\hline
|
||||
|
||||
\Tstrut
|
||||
\autoref{fig:oneway_message_size_b} \rlap{\hspace{0.195em}\blacktriangle}\textbf{---} & \graycellleftline{\acrshort{uc}} & busy & busy & \graycell{\NO} & \enspace\NO & \graycell{send} & \graycell{8000} & 10 & \SI{0}{\nano\second} & $t_{subm}$ & \graycellright{$\SI[parse-numbers=false]{8\cdot2^i}{\byte}$}
|
||||
\Bstrut\\
|
||||
|
||||
\Tstrut
|
||||
\autoref{fig:oneway_message_size_b} {\color{plot_blue}\rlap{\hspace{0.195em}\blacktriangle}\textbf{---}} & \graycellleftline{\acrshort{uc}} & busy & busy & \graycell{\OK} & \enspace\NO & \graycell{send} & \graycell{$M_{inl}$\footnotemark[1]} & 10 & \SI{0}{\nano\second} & $t_{subm}$ & \graycellright{$\SI[parse-numbers=false]{8\cdot2^j}{\byte}$}
|
||||
\Bstrut\\
|
||||
|
||||
\Tstrut
|
||||
\autoref{fig:oneway_message_size_b} {\color{plot_gray}\rlap{\hspace{0.195em}\blacktriangledown}\textbf{---}} & \graycellleftline{\acrshort{uc}} & busy & busy & \graycell{\NO} & \enspace\NO & \graycell{rdma} & \graycell{8000} & 10 & \SI{0}{\nano\second} & $t_{subm}$ & \graycellright{$\SI[parse-numbers=false]{8\cdot2^i}{\byte}$}
|
||||
\Bstrut\\
|
||||
|
||||
\Tstrut
|
||||
\autoref{fig:oneway_message_size_b} {\color{plot_light_blue}\rlap{\hspace{0.195em}\blacktriangledown}\textbf{---}} & \graycellleftline{\acrshort{uc}} & busy & busy & \graycell{\OK} & \enspace\NO & \graycell{rdma} & \graycell{$M_{inl}$\footnotemark[1]} & 10 & \SI{0}{\nano\second} & $t_{subm}$ & \graycellright{$\SI[parse-numbers=false]{8\cdot2^j}{\byte}$}
|
||||
\Bstrut\\
|
||||
|
||||
\hline
|
||||
|
||||
\Tstrut
|
||||
\autoref{fig:oneway_message_size_c} \hspace{0.1em}\rlap{\hspace{0.195em}\blacktriangle}\textbf{---} & \graycellleftline{\acrshort{ud}} & busy & busy & \graycell{\NO} & \enspace\NO & \graycell{send} & \graycell{8000} & 10 & \SI{0}{\nano\second} & $t_{subm}$ & \graycellright{$\SI[parse-numbers=false]{8\cdot2^k}{\byte}$}
|
||||
\Bstrut\\
|
||||
|
||||
\Tstrut
|
||||
\autoref{fig:oneway_message_size_c} {\color{plot_blue}\hspace{0.1em}\rlap{\hspace{0.195em}\blacktriangle}\textbf{---}} & \graycellleftline{\acrshort{ud}} & busy & busy & \graycell{\OK} & \enspace\NO & \graycell{send} & \graycell{$M_{inl}$\footnotemark[1]} & 10 & \SI{0}{\nano\second} & $t_{subm}$ & \graycellright{$\SI[parse-numbers=false]{8\cdot2^j}{\byte}$}
|
||||
\Bstrut\\
|
||||
\hlineB{3}
|
||||
\end{tabular}
|
||||
\newpage
|
||||
\end{table}
|
||||
\footnotetext[1]{The maximum size $M_{inl}$ of a \gls{qp} for a given message size is dependent on the \gls{hca}. In case of the Mellanox ConnectX\textregistered-4, each queue of a \gls{qp} could hold 8000, 8000, 8000, 6552, 5461, 4096, and 2730 \glspl{wr} for a message size of \SI{8}{\byte}, \SI{16}{\byte}, \SI{32}{\byte}, \SI{64}{\byte}, \SI{128}{\byte}, \SI{256}{\byte}, and \SI{512}{\byte}, respectivly.}
|
39
tables/oneway_settings_message_size_inline.tex
Normal file
39
tables/oneway_settings_message_size_inline.tex
Normal file
@@ -0,0 +1,39 @@
|
||||
\begin{table}[ht!]
|
||||
\vspace{3.5cm}
|
||||
\footnotesize
|
||||
\centering
|
||||
\caption{The benchmark's settings which were used to analyze whether the steep slope between \SI{128}{\byte} and \SI{256}{\byte} in \autoref{fig:oneway_message_size} was caused by the non-constant burst sizes, with $j\in[0,7]$.}
|
||||
\label{tab:oneway_settings_message_size_inline}
|
||||
\vspace{3mm}
|
||||
|
||||
\footnotesize
|
||||
\begin{tabular}{!{\hspace{-2pt}}c|!{\hspace{-8pt}}c!{\hspace{-6pt}}c!{\hspace{-4pt}}c!{\hspace{-2pt}}c!{\hspace{-6pt}}c!{\hspace{-2pt}}ccc!{\hspace{-3pt}}ccc!{\hspace{-9pt}}}
|
||||
\multicolumn{1}{c}{} & \rot{service type} & \rot{polling (send)} & \rot{polling (recv)} & \rot{inline mode} & \rot{unsignaled}& \rot{operation} & \rot{burst size} & \rot{repetitions} & \rot{loop pauses} & \rot{timestamp} & \rotright{message size}\\
|
||||
\hlineB{3}
|
||||
\Tstrut
|
||||
\autoref{fig:oneway_message_size_inline_a} {\color{plot_blue}\rlap{\hspace{0.195em}\blacktriangle}\textbf{---}} & \graycellleftline{\acrshort{rc}} & busy & busy & \OK & \enspace\NO & \graycell{send} & 2730 & 10 &\SI{0}{\nano\second} & $t_{subm}$ & \graycellright{$\SI[parse-numbers=false]{8\cdot2^j}{\byte}$}
|
||||
\Bstrut\\
|
||||
|
||||
\Tstrut
|
||||
\autoref{fig:oneway_message_size_inline_a} {\color{plot_light_blue}\rlap{\hspace{0.195em}\blacktriangledown}\textbf{---}} & \graycellleftline{\acrshort{rc}} & busy & busy & \OK & \enspace\NO & \graycell{rdma} & 2730 & 10 &\SI{0}{\nano\second} & $t_{subm}$ & \graycellright{$\SI[parse-numbers=false]{8\cdot2^j}{\byte}$}
|
||||
\Bstrut\\
|
||||
|
||||
\hline
|
||||
|
||||
\Tstrut
|
||||
\autoref{fig:oneway_message_size_inline_b} {\color{plot_blue}\rlap{\hspace{0.195em}\blacktriangle}\textbf{---}} & \graycellleftline{\acrshort{uc}} & busy & busy & \OK & \enspace\NO & \graycell{send} & 2730 & 10 & \SI{0}{\nano\second} & $t_{subm}$ & \graycellright{$\SI[parse-numbers=false]{8\cdot2^j}{\byte}$}
|
||||
\Bstrut\\
|
||||
|
||||
\Tstrut
|
||||
\autoref{fig:oneway_message_size_inline_b} {\color{plot_light_blue}\rlap{\hspace{0.195em}\blacktriangledown}\textbf{---}} & \graycellleftline{\acrshort{uc}} & busy & busy & \OK & \enspace\NO & \graycell{rdma} & 2730 & 10 & \SI{0}{\nano\second} & $t_{subm}$ & \graycellright{$\SI[parse-numbers=false]{8\cdot2^j}{\byte}$}
|
||||
\Bstrut\\
|
||||
|
||||
\hline
|
||||
|
||||
\Tstrut
|
||||
\autoref{fig:oneway_message_size_inline_c} {\color{plot_blue}\hspace{0.1em}\rlap{\hspace{0.195em}\blacktriangle}\textbf{---}} & \graycellleftline{\acrshort{ud}} & busy & busy & \OK & \enspace\NO & \graycell{send} & 2730 & 10 & \SI{0}{\nano\second} & $t_{subm}$ & \graycellright{$\SI[parse-numbers=false]{8\cdot2^j}{\byte}$}
|
||||
\Bstrut\\
|
||||
\hlineB{3}
|
||||
\end{tabular}
|
||||
\newpage
|
||||
\end{table}
|
37
tables/oneway_settings_message_size_wait.tex
Normal file
37
tables/oneway_settings_message_size_wait.tex
Normal file
@@ -0,0 +1,37 @@
|
||||
\begin{table}[ht!]
|
||||
\centering
|
||||
\caption{The benchmark's settings which were used to analyze whether the increasing latency in \autoref{fig:oneway_message_size} was caused by congestion control, with $i\in[0,12]$.}
|
||||
\label{tab:oneway_settings_message_size_wait}
|
||||
\vspace{3mm}
|
||||
|
||||
\footnotesize
|
||||
\begin{tabular}{!{\hspace{-2pt}}c|!{\hspace{-8pt}}c!{\hspace{-6pt}}c!{\hspace{-4pt}}c!{\hspace{-2pt}}c!{\hspace{-6pt}}c!{\hspace{-2pt}}ccc!{\hspace{-3pt}}ccc!{\hspace{-9pt}}}
|
||||
\multicolumn{1}{c}{} & \rot{service type} & \rot{polling (send)} & \rot{polling (recv)} & \rot{inline mode} & \rot{unsignaled}& \rot{operation} & \rot{burst size} & \rot{repetitions} & \rot{loop pauses} & \rot{timestamp} & \rotright{message size}\\
|
||||
\hlineB{3}
|
||||
\Tstrut
|
||||
\autoref{fig:oneway_message_size_wait_a} {\color{black}\rlap{\hspace{0.195em}\blacktriangle}\textbf{---}} & \graycellleftline{\acrshort{rc}} & busy & busy & \NO & \enspace\NO & \graycell{send} & 8000 & 5 &\SI{5500}{\nano\second} & $t_{subm}$ & \graycellright{$\SI[parse-numbers=false]{8\cdot2^i}{\byte}$}
|
||||
\Bstrut\\
|
||||
|
||||
\Tstrut
|
||||
\autoref{fig:oneway_message_size_wait_a} {\color{plot_gray}\rlap{\hspace{0.195em}\blacktriangledown}\textbf{---}} & \graycellleftline{\acrshort{rc}} & busy & busy & \NO & \enspace\NO & \graycell{rdma} & 8000 & 5 &\SI{5500}{\nano\second} & $t_{subm}$ & \graycellright{$\SI[parse-numbers=false]{8\cdot2^i}{\byte}$}
|
||||
\Bstrut\\
|
||||
|
||||
\hline
|
||||
|
||||
\Tstrut
|
||||
\autoref{fig:oneway_message_size_wait_b} {\color{black}\rlap{\hspace{0.195em}\blacktriangle}\textbf{---}} & \graycellleftline{\acrshort{uc}} & busy & busy & \NO & \enspace\NO & \graycell{send} & 8000 & 5 & \SI{5500}{\nano\second} & $t_{subm}$ & \graycellright{$\SI[parse-numbers=false]{8\cdot2^i}{\byte}$}
|
||||
\Bstrut\\
|
||||
|
||||
\Tstrut
|
||||
\autoref{fig:oneway_message_size_wait_b} {\color{plot_gray}\rlap{\hspace{0.195em}\blacktriangledown}\textbf{---}} & \graycellleftline{\acrshort{uc}} & busy & busy & \NO & \enspace\NO & \graycell{rdma} & 8000 & 5 & \SI{5500}{\nano\second} & $t_{subm}$ & \graycellright{$\SI[parse-numbers=false]{8\cdot2^i}{\byte}$}
|
||||
\Bstrut\\
|
||||
|
||||
\hline
|
||||
|
||||
\Tstrut
|
||||
\autoref{fig:oneway_message_size_wait_c} {\color{black}\hspace{0.1em}\rlap{\hspace{0.195em}\blacktriangle}\textbf{---}} & \graycellleftline{\acrshort{ud}} & busy & busy & \NO & \enspace\NO & \graycell{send} & 8000 & 5 & \SI{5500}{\nano\second} & $t_{subm}$ & \graycellright{$\SI[parse-numbers=false]{8\cdot2^i}{\byte}$}
|
||||
\Bstrut\\
|
||||
\hlineB{3}
|
||||
\end{tabular}
|
||||
\newpage
|
||||
\end{table}
|
29
tables/oneway_settings_rdma.tex
Normal file
29
tables/oneway_settings_rdma.tex
Normal file
@@ -0,0 +1,29 @@
|
||||
\begin{table}[ht!]
|
||||
\centering
|
||||
\caption{The benchmark's settings which were used to analayze the effect on latency of sending messages through memory semantics instead of channel semantics.}
|
||||
\label{tab:oneway_settings_rdma}
|
||||
\vspace{3mm}
|
||||
|
||||
\footnotesize
|
||||
\begin{tabular}{!{\hspace{-2pt}}c|!{\hspace{-8pt}}c!{\hspace{-4pt}}c!{\hspace{-4pt}}c!{\hspace{-4pt}}c!{\hspace{-6pt}}c!{\hspace{-2pt}}ccc!{\hspace{-3pt}}ccc!{\hspace{-5pt}}}
|
||||
\multicolumn{1}{c}{} & \rot{service type} & \rot{polling (send)} & \rot{polling (recv)} & \rot{inline mode} & \rot{unsignaled}& \rot{operation} & \rot{burst size} & \rot{repetitions} & \rot{loop pauses} & \rot{timestamp} & \rotright{message size}\\
|
||||
\hlineB{3}
|
||||
\Tstrut
|
||||
\autoref{fig:oneway_rdma_a} & \graycellleftline{\acrshort{rc}} & busy & busy & \OK & \enspace\NO & \graycell{send} & 8000 & 20 & \SI{0}{\nano\second} & $t_{subm}$ & \SI{32}{\byte}
|
||||
\Bstrut\\
|
||||
|
||||
\Tstrut
|
||||
\autoref{fig:oneway_rdma_a} & \graycellleftline{\acrshort{rc}} & busy & busy & \OK & \enspace\NO & \graycell{rdma} & 8000 & 20 & \SI{0}{\nano\second} & $t_{subm}$ & \SI{32}{\byte}
|
||||
\Bstrut\\
|
||||
|
||||
\Tstrut
|
||||
\autoref{fig:oneway_rdma_b} & \graycellleftline{\acrshort{uc}} & busy & busy & \OK & \enspace\NO & \graycell{send} & 8000 & 20 & \SI{0}{\nano\second} & $t_{subm}$ & \SI{32}{\byte}
|
||||
\Bstrut\\
|
||||
|
||||
\Tstrut
|
||||
\autoref{fig:oneway_rdma_b} & \graycellleftline{\acrshort{uc}} & busy & busy & \OK & \enspace\NO & \graycell{rdma} & 8000 & 20 & \SI{0}{\nano\second} & $t_{subm}$ & \SI{32}{\byte}
|
||||
\Bstrut\\
|
||||
\hlineB{3}
|
||||
\end{tabular}
|
||||
\newpage
|
||||
\end{table}
|
21
tables/oneway_settings_submit_send_comparison.tex
Normal file
21
tables/oneway_settings_submit_send_comparison.tex
Normal file
@@ -0,0 +1,21 @@
|
||||
\begin{table}[ht!]
|
||||
\centering
|
||||
\caption{The benchmark's settings which were used to analyze the difference in time between the moment that a \acrfull{wr} is submitted to the \acrfull{sq} and the moment the corresponding message is actually sent.}
|
||||
\label{tab:oneway_settings_submit_send_comparison}
|
||||
\vspace{3mm}
|
||||
|
||||
\footnotesize
|
||||
\begin{tabular}{!{\hspace{-2pt}}c|!{\hspace{-2pt}}c!{\hspace{-4pt}}c!{\hspace{-4pt}}c!{\hspace{-4pt}}c!{\hspace{-6pt}}c!{\hspace{-2pt}}ccc!{\hspace{-3pt}}ccc!{\hspace{-5pt}}}
|
||||
\multicolumn{1}{c}{} & \rot{service type} & \rot{polling (send)} & \rot{polling (recv)} & \rot{inline mode} & \rot{unsignaled}& \rot{operation} & \rot{burst size} & \rot{repetitions} & \rot{loop pauses} & \rot{timestamp} & \rotright{message size}\\
|
||||
\hlineB{3}
|
||||
\Tstrut
|
||||
\autoref{fig:oneway_submit_send_comparison} & \acrshort{uc} & busy & busy & \NO & \enspace\NO & send & 8000 & 20 & \SI{0}{\nano\second} & \graycell{$t_{subm}$} & \SI{32}{\byte}
|
||||
\Bstrut\\
|
||||
|
||||
\Tstrut
|
||||
\autoref{fig:oneway_submit_send_comparison} & \acrshort{uc} & busy & busy & \NO & \enspace\NO & send & 8000 & 20 & \SI{0}{\nano\second} & \graycell{$t_{send}$} & \SI{32}{\byte}
|
||||
\Bstrut\\
|
||||
\hlineB{3}
|
||||
\end{tabular}
|
||||
\newpage
|
||||
\end{table}
|
21
tables/oneway_settings_unsignaled_inline.tex
Normal file
21
tables/oneway_settings_unsignaled_inline.tex
Normal file
@@ -0,0 +1,21 @@
|
||||
\begin{table}[ht!]
|
||||
\centering
|
||||
\caption{The benchmark's settings which were used to analyze the influence of \acrfull{cqe} creation on latency for \textit{send} operations.}
|
||||
\label{tab:oneway_settings_unsignaled_inline}
|
||||
\vspace{3mm}
|
||||
|
||||
\footnotesize
|
||||
\begin{tabular}{!{\hspace{-2pt}}c|!{\hspace{-2pt}}c!{\hspace{-4pt}}c!{\hspace{-4pt}}c!{\hspace{-4pt}}c!{\hspace{6pt}}!{\hspace{-6pt}}c!{\hspace{-2pt}}ccc!{\hspace{-3pt}}ccc!{\hspace{-5pt}}}
|
||||
\multicolumn{1}{c}{} & \rot{service type} & \rot{polling (send)} & \rot{polling (recv)} & \rot{inline mode} & \rot{unsignaled}& \rot{operation} & \rot{burst size} & \rot{repetitions} & \rot{loop pauses} & \rot{timestamp} & \rotright{message size}\\
|
||||
\hlineB{3}
|
||||
\Tstrut
|
||||
\autoref{fig:oneway_unsignaled_inline} & \acrshort{uc} & busy & busy & \OK & \graycell{\NO} & send & 8000 & 20 & \SI{0}{\nano\second} & $t_{subm}$ & \SI{32}{\byte}
|
||||
\Bstrut\\
|
||||
|
||||
\Tstrut
|
||||
\autoref{fig:oneway_unsignaled_inline} & \acrshort{uc} & busy & busy & \OK & \graycell{\OK} & send & 8000 & 20 & \SI{0}{\nano\second} & $t_{subm}$ & \SI{32}{\byte}
|
||||
\Bstrut\\
|
||||
\hlineB{3}
|
||||
\end{tabular}
|
||||
\newpage
|
||||
\end{table}
|
21
tables/oneway_settings_unsignaled_rdma.tex
Normal file
21
tables/oneway_settings_unsignaled_rdma.tex
Normal file
@@ -0,0 +1,21 @@
|
||||
\begin{table}[ht!]
|
||||
\centering
|
||||
\caption{The benchmark's settings which were used to analyze the influence of \acrfull{cqe} creation on latency for \textit{\gls{rdma} write} operations.}
|
||||
\label{tab:oneway_settings_unsignaled_rdma}
|
||||
\vspace{3mm}
|
||||
|
||||
\footnotesize
|
||||
\begin{tabular}{!{\hspace{-2pt}}c|!{\hspace{-2pt}}c!{\hspace{-4pt}}c!{\hspace{-4pt}}c!{\hspace{-4pt}}c!{\hspace{6pt}}!{\hspace{-6pt}}c!{\hspace{-2pt}}ccc!{\hspace{-3pt}}ccc!{\hspace{-5pt}}}
|
||||
\multicolumn{1}{c}{} & \rot{service type} & \rot{polling (send)} & \rot{polling (recv)} & \rot{inline mode} & \rot{unsignaled}& \rot{operation} & \rot{burst size} & \rot{repetitions} & \rot{loop pauses} & \rot{timestamp} & \rotright{message size}\\
|
||||
\hlineB{3}
|
||||
\Tstrut
|
||||
\autoref{fig:oneway_unsignaled_rdma} & \acrshort{uc} & busy & busy & \OK & \graycell{\NO} & rdma & 8000 & 20 & \SI{0}{\nano\second} & $t_{subm}$ & \SI{32}{\byte}
|
||||
\Bstrut\\
|
||||
|
||||
\Tstrut
|
||||
\autoref{fig:oneway_unsignaled_rdma} & \acrshort{uc} & busy & busy & \OK & \graycell{\OK} & rdma & 8000 & 20 & \SI{0}{\nano\second} & $t_{subm}$ & \SI{32}{\byte}
|
||||
\Bstrut\\
|
||||
\hlineB{3}
|
||||
\end{tabular}
|
||||
\newpage
|
||||
\end{table}
|
73
tables/packet_abbreviations.tex
Normal file
73
tables/packet_abbreviations.tex
Normal file
@@ -0,0 +1,73 @@
|
||||
\begin{longtable}{ p{3.0cm} | p{11cm} }
|
||||
\caption{Explanation of abbreviations from \autoref{fig:iba_packet_format}. More details on the content of the different packets can be found in the \gls{iba} specification~\cite{infinibandvol1}.}\label{tab:packet_abbreviations}
|
||||
\vspace{3mm}
|
||||
\endfirsthead
|
||||
\endhead
|
||||
\hlineB{3}
|
||||
\Tstrut
|
||||
\textbf{Abbreviation} & \textbf{Description}\\
|
||||
\hhline{=|=}
|
||||
\Tstrut
|
||||
\acrshort{lrh} & \textit{\acrlong{lrh}}: detailed information on this header is provided in a separate paragraph below.
|
||||
\Bstrut\\
|
||||
\hline
|
||||
\Tstrut
|
||||
\acrshort{grh} & \textit{\acrlong{grh}}: detailed information on this header is provided in a separate paragraph below.
|
||||
\Bstrut\\
|
||||
\hline
|
||||
\Tstrut
|
||||
\acrshort{bth} & \textit{\acrlong{bth}}: Every packet in the \gls{iba} contains this header. It contains fields for the \gls{iba} transport and holds, i.a., the packet type, the destination queue pair number, and the packet sequence number.
|
||||
\Tstrut\Bstrut\\
|
||||
\hline
|
||||
\Tstrut
|
||||
\acrshortpl{eth} & \textit{\acrlongpl{eth}}: These headers are optional and are used if applicable, based on the packet type in the \acrshort{bth}\@. All following headers which end with \textit{ETH} are extended transport headers.
|
||||
\Bstrut\\
|
||||
\hline
|
||||
\Tstrut
|
||||
\acrshort{rdeth} & \textit{\acrlong{rdeth}}: This header contains the end-to-end context, used with the reliable datagram service type.
|
||||
\Bstrut\\
|
||||
\hline
|
||||
\Tstrut
|
||||
\acrshort{deth} & \textit{\acrlong{deth}}: This header contains the queue key and the source queue pair number for datagram transfers.
|
||||
\Bstrut\\
|
||||
\hline
|
||||
\Tstrut
|
||||
\acrshort{reth} & \textit{\acrlong{reth}}: This header contains the virtual address, remote key, and DMA length for an \gls{rdma} operation.
|
||||
\Bstrut\\
|
||||
\hline
|
||||
\Tstrut
|
||||
\acrshort{atomiceth} & \textit{\acrlong{atomiceth}}: This header is used for atomic operations and is similar to the \acrshort{reth}\@. Instead of a length field, it contains a swap (or add) field and a compare data field.
|
||||
\Bstrut\\
|
||||
\hline
|
||||
\Tstrut
|
||||
\acrshort{aeth} & \textit{\acrlong{aeth}}: This header serves as acknowledge field in \gls{rdma} read response first, \gls{rdma} read response last, \gls{rdma} read response only, and acknowledge packets.
|
||||
\Bstrut\\
|
||||
\hline
|
||||
\Tstrut
|
||||
\acrshort{atomicacketh} & \textit{\acrlong{atomicacketh}}: This header is similar to the \acrshort{aeth}, but for atomic acknowledgments. It only contains the original remote data.
|
||||
\Bstrut\\
|
||||
\hline
|
||||
\Tstrut
|
||||
\acrshort{imm} & \textit{\acrlong{imm}}: This optional block can be used to add \SI{32}{\bit} of custom data to \textit{send} or \textit{\gls{rdma} write} packets. The containing \SI{32}{\bit} of data will be visible in the receive completion queue element.
|
||||
\Bstrut\\
|
||||
\hline
|
||||
\Tstrut
|
||||
\acrshort{ieth} & \textit{\acrlong{ieth}}: This header contains a remote key which will be used to invalidate a remote memory region.
|
||||
\Tstrut\Bstrut\\
|
||||
\hline
|
||||
\Tstrut
|
||||
Payload & \textit{Payload}: This is the actual payload to be sent. This field will be as big as the \gls{mtu} of the network.
|
||||
\Bstrut\\
|
||||
\hline
|
||||
\Tstrut
|
||||
\acrshort{icrc} & \textit{\acrlong{icrc}}: This is the redundancy check for blocks that do not change during their transmission from source to destination.
|
||||
\newline
|
||||
|
||||
The \gls{crc} which is used is the same as in the Ethernet standard: the CRC-32 with the polynomial \texttt{0x04C11DB7}.
|
||||
\Bstrut\\
|
||||
\hline
|
||||
\Tstrut
|
||||
\acrshort{vcrc} & \textit{\acrlong{vcrc}}: This is the redundancy check for blocks that do change during their transmissing from source to destination.
|
||||
\Tstrut\\
|
||||
\hlineB{3}
|
||||
\end{longtable}
|
32
tables/qp_performance.tex
Normal file
32
tables/qp_performance.tex
Normal file
@@ -0,0 +1,32 @@
|
||||
\begin{table}[ht!]
|
||||
\centering
|
||||
\caption{$\tilde{t}_{lat}$ [\SI{}{\micro\second}] as reported by \texttt{ib\_send\_lat} for different service types and queue pair types with a varying message size. All communication was sent with the normal \textit{send} operation. Every test contained 1000 iterations and messages that were smaller than \SI{188}{\byte} were sent inline.}\label{tab:qp_performance}
|
||||
\vspace{3mm}
|
||||
\begin{tabular}{l r | c c c c c c}
|
||||
\hlineB{3}
|
||||
\Tstrut
|
||||
\textbf{service type} & \textbf{\gls{qp} type} & \multicolumn{6}{c}{\textbf{message size}}
|
||||
\Bstrut
|
||||
\\
|
||||
\Tstrut
|
||||
& & \SI{32}{\byte} & \SI{128}{\byte} & \SI{512}{\byte} & \SI{2}{\kilo\byte} & \SI{8}{\kilo\byte} & \SI{32}{\kilo\byte} \\
|
||||
\hhline{==|======}
|
||||
\Tstrut
|
||||
\multirow{2}{*}{\gls{rc}} & regular & 0.77 & 0.86 & 1.31 & 1.68 & 2.60 & 4.56
|
||||
\Bstrut
|
||||
\\
|
||||
\Tstrut
|
||||
& \gls{rdma} \gls{cm} & 0.81 & 0.90 & 1.33 & 1.73 & 2.57 & 4.57
|
||||
\Bstrut
|
||||
\\ \hline
|
||||
\Tstrut
|
||||
\multirow{2}{*}{\gls{ud}} & regular & 0.80 & 0.84 & 1.26 & 1.74 & \NO & \NO
|
||||
\Bstrut
|
||||
\\
|
||||
\Tstrut
|
||||
& \gls{rdma} \gls{cm} & \redcell{0.86} & 0.90 & 1.31 & 1.71 & \NO & \NO
|
||||
\Bstrut
|
||||
\\
|
||||
\hlineB{3}
|
||||
\end{tabular}
|
||||
\end{table}
|
41
tables/required_cm_messages.tex
Normal file
41
tables/required_cm_messages.tex
Normal file
@@ -0,0 +1,41 @@
|
||||
\begin{table}[ht!]
|
||||
\centering
|
||||
\caption{Required Communication Management messages, used for all service types except \acrfull{ud}.}
|
||||
\label{tab:required_cm_messages}
|
||||
\vspace{3mm}
|
||||
|
||||
\begin{tabular}{p{2.6cm} | p{11.4cm}}
|
||||
\hlineB{3}
|
||||
\Tstrut
|
||||
\textbf{CM message} & \textbf{Description}\\
|
||||
\hhline{=|=}
|
||||
\Tstrut
|
||||
\acrshort{req} & A \textit{\acrlong{req}} is used to initiate the communication establishment sequence. The node that sends this message provides the remote host with its queue pair number and its \gls{gid} and/or \gls{lid}.
|
||||
\Bstrut\\
|
||||
\hline
|
||||
\Tstrut
|
||||
\acrshort{mra} & A \textit{\acrlong{mra}} is used as a response to a \acrshort{req}, a LAP (Load Alternate Path, an optional communication message), or a \acrshort{rep}. It is used if the node which receives one of the formerly mentioned messages does not expect to be able to respond within the specified time-out. With this mechanism, unnecessary time-outs are prevented.
|
||||
\Bstrut\\
|
||||
\hline
|
||||
\Tstrut
|
||||
\acrshort{rej} & By replying with \textit{\acrlong{rej}} to a \acrshort{req}, a node indicates that it will not continue with the communication establishment sequence. The reason can be found, i.a., in the data field of the \acrshort{rej}.
|
||||
\Bstrut\\
|
||||
\hline
|
||||
\Tstrut
|
||||
\acrshort{rep} & A node sends a \textit{\acrlong{rep}} if it wants to accept a previously received \acrshort{rep} and all its parameters.
|
||||
\Bstrut\\
|
||||
\hline
|
||||
\Tstrut
|
||||
\acrshort{rtu} & A node replies with \textit{\acrlong{rtu}} after it received a \acrshort{rep} to indicate that transmission can be started.
|
||||
\Bstrut\\
|
||||
\hline
|
||||
\Tstrut
|
||||
\acrshort{dreq} & A \textit{\acrlong{dreq}} is sent if a node wants to disconnect a queue pair.
|
||||
\Bstrut\\
|
||||
\hline
|
||||
\Tstrut
|
||||
\acrshort{drep} & A \textit{\acrlong{drep}} is used to acknowledge that a \acrshort{dreq} is received.
|
||||
\Bstrut\\
|
||||
\hlineB{3}
|
||||
\end{tabular}
|
||||
\end{table}
|
40
tables/service_types.tex
Normal file
40
tables/service_types.tex
Normal file
@@ -0,0 +1,40 @@
|
||||
\begin{table}
|
||||
\centering
|
||||
\caption{InfiniBand Architecture's service types.}
|
||||
\label{tab:service_types}
|
||||
\vspace{3mm}
|
||||
|
||||
\begin{tabular}{ p{5.2cm} | p{9.1cm}}
|
||||
\hlineB{3}
|
||||
\Tstrut
|
||||
\textbf{Service Type} & \textbf{Description}\\
|
||||
\hhline{=|=}
|
||||
\Tstrut
|
||||
\acrfull{rc} & In this mode, one \acrshort{qp} on a local node is connected to one \acrshort{qp} on a remote node. This service type ensures message delivery to---thus not consumption by---the remote node. Messages are sent in order and a combination of hardware and channel adapter software resends at communication failure.
|
||||
\Bstrut\\
|
||||
\hline
|
||||
\Tstrut
|
||||
\acrfull{uc} & Like \gls{rc}, this service type connects one local \acrshort{qp} with one remote \acrshort{qp}\@. It is unreliable and thus does not support acknowledgment of delivery and simply drops undelivered messages.
|
||||
\Bstrut\\
|
||||
\hline
|
||||
\Tstrut
|
||||
\acrfull{ud} & This service type allows a local \acrshort{qp} to communicate with any other unreliable datagram \acrshort{qp} without connecting to it. Like \gls{uc}, this mode is unreliable and thus simply drops packets if they get lost.\newline
|
||||
|
||||
The ability to send data to another \acrshort{qp} without connecting to it is beneficial for scalability.
|
||||
\Bstrut\\
|
||||
\hline
|
||||
\Tstrut
|
||||
\acrfull{rd} & Reliable datagram enables a local \acrshort{qp} to communicate with any other \gls{rd} \acrshort{qp} without connecting to it. Contrary to \gls{ud}, this service type is reliable and thus tries to resend messages when they get lost.\newline
|
||||
|
||||
Since reliable datagram is not implemented in the \acrshort{ofed} stack (\autoref{sec:iblibs}), hence not practically usable, it will not be further discussed in the present work.
|
||||
\Bstrut\\
|
||||
\hline
|
||||
\Tstrut
|
||||
Raw Datagram & This allows a \acrshort{qp} to send raw datagram messages, which means that \gls{iba} specific headers are stripped from the packets. This service type can be divided into IPv6 raw datagram and EtherType raw datagram.\newline
|
||||
|
||||
This service type will not be further discussed in the present work.
|
||||
\Bstrut\\
|
||||
\hlineB{3}
|
||||
\end{tabular}
|
||||
\newpage
|
||||
\end{table}
|
26
tables/timer_comparison.tex
Normal file
26
tables/timer_comparison.tex
Normal file
@@ -0,0 +1,26 @@
|
||||
\begin{table}[ht!]
|
||||
\centering
|
||||
\caption{Comparison of the performance of timer functions. All tests were performed with a rate of \SI{100}{\kilo\hertz}, with 10 64-bit floating-point numbers per sample, \gls{rc} as service type, and with the InfiniBand node-type as node-type under test. Every test contains \SI{250000}{} samples.}
|
||||
\vspace{3mm}
|
||||
|
||||
\label{tab:timer_comparison}
|
||||
\begin{tabular}{c | l l}
|
||||
\hlineB{3}
|
||||
\Tstrut
|
||||
\textbf{optimized environment} & \multicolumn{2}{c}{\textbf{missed samples}}
|
||||
\Bstrut
|
||||
\\
|
||||
\Tstrut
|
||||
& \multicolumn{1}{c}{\textbf{\texttt{timerfd}}} & \multicolumn{1}{c}{\textbf{\gls{tsc}}} \\
|
||||
\hhline{=|==}
|
||||
\Tstrut
|
||||
\NO & $\frac{12085}{250000}\cdot\SI{100}{\percent} = \SI{4.83}{\percent}$ & $\frac{3035}{250000}\cdot\SI{100}{\percent} = \SI{1.21}{\percent}$
|
||||
\Bstrutlarger
|
||||
\\
|
||||
\Tstrut
|
||||
\OK & $\frac{1692}{250000}\cdot\SI{100}{\percent} = \SI{0.68}{\percent}$ & \greencell{$\frac{1244}{250000}\cdot\SI{100}{\percent} = \SI{0.50}{\percent}$}
|
||||
\Bstrutlarger
|
||||
\\
|
||||
\hlineB{3}
|
||||
\end{tabular}
|
||||
\end{table}
|
41
tables/transport_modes.tex
Normal file
41
tables/transport_modes.tex
Normal file
@@ -0,0 +1,41 @@
|
||||
\begin{table}[ht!]
|
||||
\centering
|
||||
\caption{Supported operations with various service types. Although \acrfull{rd} theoretically supports all operations, it is not supported by the \gls{ofed} stack.}
|
||||
\vspace{3mm}
|
||||
|
||||
\label{tab:transport_modes}
|
||||
\begin{tabular}{r | c c c c}
|
||||
\hlineB{3}
|
||||
\Tstrut
|
||||
\textbf{Operation} & \textbf{UD} & \textbf{UC} & \textbf{RC} & \textbf{RD} \\
|
||||
\hhline{=|====}
|
||||
\Tstrut
|
||||
send & \OK & \OK & \OK &
|
||||
\Bstrut
|
||||
\\
|
||||
\Tstrut
|
||||
send with immediate & \OK & \OK & \OK &
|
||||
\Bstrut
|
||||
\\
|
||||
\Tstrut
|
||||
\gls{rdma} write & & \OK & \OK &
|
||||
\Bstrut
|
||||
\\
|
||||
\Tstrut
|
||||
\gls{rdma} write with immediate & & \OK & \OK &
|
||||
\Bstrut
|
||||
\\
|
||||
\Tstrut
|
||||
\gls{rdma} read & & & \OK &
|
||||
\Bstrut
|
||||
\\
|
||||
\Tstrut
|
||||
atomic compare \& swap & & & \OK &
|
||||
\Bstrut
|
||||
\\
|
||||
\Tstrut
|
||||
atomic fetch \& add & & & \OK &
|
||||
\Bstrut\\
|
||||
\hlineB{3}
|
||||
\end{tabular}
|
||||
\end{table}
|
93
tables/villasnode_nodes.tex
Normal file
93
tables/villasnode_nodes.tex
Normal file
@@ -0,0 +1,93 @@
|
||||
\begin{table}[ht!]
|
||||
\centering
|
||||
\caption[Interfaces supported by VILLASnode as of June 2018.]{Interfaces supported by VILLASnode as of June 2018\footnotemark.}
|
||||
\label{tab:villasnode_nodes}
|
||||
\vspace{3mm}
|
||||
|
||||
\begin{tabular}{l | l | p{8.5cm}}
|
||||
\hlineB{3}
|
||||
\Tstrut
|
||||
\textbf{Section} & \textbf{Node Name} & \textbf{Description} \\
|
||||
\hhline{=|=|=}
|
||||
\Tstrut
|
||||
\multirow{10}{*}{\makecell[l]{internal\\communication}} & \textit{file } & support for file log/replay
|
||||
\Bstrut\\
|
||||
\hhline{~|-|-}
|
||||
\Tstrut
|
||||
& \textit{shmem } & \acrshort{posix} shared memory interface with external processes
|
||||
\Bstrut\\
|
||||
\hhline{~|-|-}
|
||||
\Tstrut
|
||||
& \textit{loopback } & internal loopback using a queued \acrshort{fifo} buffer
|
||||
\Bstrut\\
|
||||
\hhline{~|-|-}
|
||||
\Tstrut
|
||||
& \textit{signal } & configurable signal generator for testing purposes
|
||||
\Bstrut\\
|
||||
\hhline{~|-|-}
|
||||
\Tstrut
|
||||
& \textit{stats } & send communication statistics to other nodes
|
||||
\Bstrut\\
|
||||
\hhline{~|-|-}
|
||||
\Tstrut
|
||||
& \textit{test\undershort rtt} & measurement of round-trip time, packet loss, and sending rates
|
||||
\Bstrut\\
|
||||
\hline
|
||||
\Tstrut
|
||||
\multirow{10}{*}{\makecell[l]{server-server\\communication}} & \textit{socket } & BSD network sockets for Packet, IP, or \acrshort{udp} layer
|
||||
\Bstrut\\
|
||||
\hhline{~|-|-}
|
||||
\Tstrut
|
||||
& \textit{zeromq } & ZeroMQ publish/subscribe messaging
|
||||
\Bstrut\\
|
||||
\hhline{~|-|-}
|
||||
\Tstrut
|
||||
& \textit{influxdb } & InfluxDB time-series database
|
||||
\Bstrut\\
|
||||
\hhline{~|-|-}
|
||||
\Tstrut
|
||||
& \textit{nanomsg } & nanomsg publish/subscribe messaging
|
||||
\Bstrut\\
|
||||
\hhline{~|-|-}
|
||||
\Tstrut
|
||||
& \textit{amqp } & Advanced Message Queuing Protocol
|
||||
\Bstrut\\
|
||||
\hhline{~|-|-}
|
||||
\Tstrut
|
||||
& \textit{mqtt } & Message Queuing Telemetry Transport
|
||||
\Bstrut\\
|
||||
\hhline{~|-|-}
|
||||
\Tstrut
|
||||
& \textit{ngsi } & OMA Next Generation Services Interface 10
|
||||
\Bstrut\\
|
||||
\hhline{~|-|-}
|
||||
\Tstrut
|
||||
& \textit{websocket} & send and receive samples of a WebSocket connection
|
||||
\Bstrut\\
|
||||
\hline
|
||||
\Tstrut
|
||||
\multirow{6}{*}{\makecell[l]{simulator-server\\communication}} & \textit{opal } & OPAL-RT asynchronous processes
|
||||
\Bstrut\\
|
||||
\hhline{~|-|-}
|
||||
\Tstrut
|
||||
& \textit{fpga } & VILLASfpga \acrshort{pcie} card
|
||||
\Bstrut\\
|
||||
\hhline{~|-|-}
|
||||
\Tstrut
|
||||
& \textit{comedi } & interface to Comedia devices
|
||||
\Bstrut\\
|
||||
\hhline{~|-|-}
|
||||
\Tstrut
|
||||
& \textit{gtwif } & RTDS GTWIF workstation interface
|
||||
\Bstrut\\
|
||||
\hhline{~|-|-}
|
||||
\Tstrut
|
||||
& \textit{iec61850-9-2} & IEC 61850-9-2 Samples Values
|
||||
\Bstrut\\
|
||||
\hhline{~|-|-}
|
||||
\Tstrut
|
||||
& \textit{iec61850-8-1} & IEC 61850-8-1 GOOSE Telegrams
|
||||
\Bstrut\\
|
||||
\hlineB{3}
|
||||
\end{tabular}
|
||||
\end{table}
|
Reference in New Issue
Block a user