1821 lines
83 KiB
HTML
1821 lines
83 KiB
HTML
<!DOCTYPE HTML>
|
|
<html lang="en" class="sidebar-visible no-js">
|
|
<head>
|
|
<!-- Book generated using mdBook -->
|
|
<meta charset="UTF-8">
|
|
<title>Futures Explained in 200 Lines of Rust</title>
|
|
|
|
<meta name="robots" content="noindex" />
|
|
|
|
|
|
<meta content="text/html; charset=utf-8" http-equiv="Content-Type">
|
|
<meta name="description" content="">
|
|
<meta name="viewport" content="width=device-width, initial-scale=1">
|
|
<meta name="theme-color" content="#ffffff" />
|
|
|
|
<link rel="shortcut icon" href="favicon.png">
|
|
<link rel="stylesheet" href="css/variables.css">
|
|
<link rel="stylesheet" href="css/general.css">
|
|
<link rel="stylesheet" href="css/chrome.css">
|
|
<link rel="stylesheet" href="css/print.css" media="print">
|
|
|
|
<!-- Fonts -->
|
|
<link rel="stylesheet" href="FontAwesome/css/font-awesome.css">
|
|
<link href="https://fonts.googleapis.com/css?family=Open+Sans:300italic,400italic,600italic,700italic,800italic,400,300,600,700,800" rel="stylesheet" type="text/css">
|
|
<link href="https://fonts.googleapis.com/css?family=Source+Code+Pro:500" rel="stylesheet" type="text/css">
|
|
|
|
<!-- Highlight.js Stylesheets -->
|
|
<link rel="stylesheet" href="highlight.css">
|
|
<link rel="stylesheet" href="tomorrow-night.css">
|
|
<link rel="stylesheet" href="ayu-highlight.css">
|
|
|
|
<!-- Custom theme stylesheets -->
|
|
|
|
|
|
|
|
</head>
|
|
<body class="light">
|
|
<!-- Provide site root to javascript -->
|
|
<script type="text/javascript">
|
|
var path_to_root = "";
|
|
var default_theme = "light";
|
|
</script>
|
|
|
|
<!-- Work around some values being stored in localStorage wrapped in quotes -->
|
|
<script type="text/javascript">
|
|
try {
|
|
var theme = localStorage.getItem('mdbook-theme');
|
|
var sidebar = localStorage.getItem('mdbook-sidebar');
|
|
|
|
if (theme.startsWith('"') && theme.endsWith('"')) {
|
|
localStorage.setItem('mdbook-theme', theme.slice(1, theme.length - 1));
|
|
}
|
|
|
|
if (sidebar.startsWith('"') && sidebar.endsWith('"')) {
|
|
localStorage.setItem('mdbook-sidebar', sidebar.slice(1, sidebar.length - 1));
|
|
}
|
|
} catch (e) { }
|
|
</script>
|
|
|
|
<!-- Set the theme before any content is loaded, prevents flash -->
|
|
<script type="text/javascript">
|
|
var theme;
|
|
try { theme = localStorage.getItem('mdbook-theme'); } catch(e) { }
|
|
if (theme === null || theme === undefined) { theme = default_theme; }
|
|
document.body.className = theme;
|
|
document.querySelector('html').className = theme + ' js';
|
|
</script>
|
|
|
|
<!-- Hide / unhide sidebar before it is displayed -->
|
|
<script type="text/javascript">
|
|
var html = document.querySelector('html');
|
|
var sidebar = 'hidden';
|
|
if (document.body.clientWidth >= 1080) {
|
|
try { sidebar = localStorage.getItem('mdbook-sidebar'); } catch(e) { }
|
|
sidebar = sidebar || 'visible';
|
|
}
|
|
html.classList.remove('sidebar-visible');
|
|
html.classList.add("sidebar-" + sidebar);
|
|
</script>
|
|
|
|
<nav id="sidebar" class="sidebar" aria-label="Table of contents">
|
|
<div class="sidebar-scrollbox">
|
|
<ol class="chapter"><li><a href="0_introduction.html"><strong aria-hidden="true">1.</strong> Introduction</a></li><li><a href="1_background_information.html"><strong aria-hidden="true">2.</strong> Some background information</a></li><li><a href="2_trait_objects.html"><strong aria-hidden="true">3.</strong> Trait objects and fat pointers</a></li><li><a href="3_generators_pin.html"><strong aria-hidden="true">4.</strong> Generators and Pin</a></li><li><a href="4_pin.html"><strong aria-hidden="true">5.</strong> Pin</a></li><li><a href="5_reactor_executor.html"><strong aria-hidden="true">6.</strong> Reactor/Executor Pattern</a></li><li><a href="6_future_example.html"><strong aria-hidden="true">7.</strong> The main example</a></li><li><a href="7_conclusion.html"><strong aria-hidden="true">8.</strong> Conclusion and exercises</a></li><li><a href="8_concurrent_futures.html"><strong aria-hidden="true">9.</strong> Bonus 1: concurrent futures</a></li></ol>
|
|
</div>
|
|
<div id="sidebar-resize-handle" class="sidebar-resize-handle"></div>
|
|
</nav>
|
|
|
|
<div id="page-wrapper" class="page-wrapper">
|
|
|
|
<div class="page">
|
|
|
|
<div id="menu-bar" class="menu-bar">
|
|
<div id="menu-bar-sticky-container">
|
|
<div class="left-buttons">
|
|
<button id="sidebar-toggle" class="icon-button" type="button" title="Toggle Table of Contents" aria-label="Toggle Table of Contents" aria-controls="sidebar">
|
|
<i class="fa fa-bars"></i>
|
|
</button>
|
|
<button id="theme-toggle" class="icon-button" type="button" title="Change theme" aria-label="Change theme" aria-haspopup="true" aria-expanded="false" aria-controls="theme-list">
|
|
<i class="fa fa-paint-brush"></i>
|
|
</button>
|
|
<ul id="theme-list" class="theme-popup" aria-label="Themes" role="menu">
|
|
<li role="none"><button role="menuitem" class="theme" id="light">Light (default)</button></li>
|
|
<li role="none"><button role="menuitem" class="theme" id="rust">Rust</button></li>
|
|
<li role="none"><button role="menuitem" class="theme" id="coal">Coal</button></li>
|
|
<li role="none"><button role="menuitem" class="theme" id="navy">Navy</button></li>
|
|
<li role="none"><button role="menuitem" class="theme" id="ayu">Ayu</button></li>
|
|
</ul>
|
|
|
|
<button id="search-toggle" class="icon-button" type="button" title="Search. (Shortkey: s)" aria-label="Toggle Searchbar" aria-expanded="false" aria-keyshortcuts="S" aria-controls="searchbar">
|
|
<i class="fa fa-search"></i>
|
|
</button>
|
|
|
|
</div>
|
|
|
|
<h1 class="menu-title">Futures Explained in 200 Lines of Rust</h1>
|
|
|
|
<div class="right-buttons">
|
|
<a href="print.html" title="Print this book" aria-label="Print this book">
|
|
<i id="print-button" class="fa fa-print"></i>
|
|
</a>
|
|
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
|
|
<div id="search-wrapper" class="hidden">
|
|
<form id="searchbar-outer" class="searchbar-outer">
|
|
<input type="search" name="search" id="searchbar" name="searchbar" placeholder="Search this book ..." aria-controls="searchresults-outer" aria-describedby="searchresults-header">
|
|
</form>
|
|
<div id="searchresults-outer" class="searchresults-outer hidden">
|
|
<div id="searchresults-header" class="searchresults-header"></div>
|
|
<ul id="searchresults">
|
|
</ul>
|
|
</div>
|
|
</div>
|
|
|
|
|
|
<!-- Apply ARIA attributes after the sidebar and the sidebar toggle button are added to the DOM -->
|
|
<script type="text/javascript">
|
|
document.getElementById('sidebar-toggle').setAttribute('aria-expanded', sidebar === 'visible');
|
|
document.getElementById('sidebar').setAttribute('aria-hidden', sidebar !== 'visible');
|
|
Array.from(document.querySelectorAll('#sidebar a')).forEach(function(link) {
|
|
link.setAttribute('tabIndex', sidebar === 'visible' ? 0 : -1);
|
|
});
|
|
</script>
|
|
|
|
<div id="content" class="content">
|
|
<main>
|
|
<h1><a class="header" href="#futures-explained-in-200-lines-of-rust" id="futures-explained-in-200-lines-of-rust">Futures Explained in 200 Lines of Rust</a></h1>
|
|
<p>This book aims to explain <code>Futures</code> in Rust using an example driven approach.</p>
|
|
<p>The goal is to get a better understanding of <code>Futures</code> by implementing a toy
|
|
<code>Reactor</code>, a very simple <code>Executor</code> and our own <code>Futures</code>. </p>
|
|
<p>We'll start off a bit differently than most other explanations. Instead of
|
|
deferring some of the details about what's special about futures in Rust we
|
|
try to tackle that head on first. We'll be as brief as possible, but as thorough
|
|
as needed. This way, most question will be answered and explored up front. </p>
|
|
<p>We'll end up with futures that can run an any executor like <code>tokio</code> and <code>async_str</code>.</p>
|
|
<p>In the end I've made some reader exercises you can do if you want to fix some
|
|
of the most glaring omissions and shortcuts we took and create a slightly better
|
|
example yourself.</p>
|
|
<h2><a class="header" href="#what-does-this-book-give-you-that-isnt-covered-elsewhere" id="what-does-this-book-give-you-that-isnt-covered-elsewhere">What does this book give you that isn't covered elsewhere?</a></h2>
|
|
<p>That's a valid question. There are many good resources and examples already. First
|
|
of all, this book will focus on <code>Futures</code> and <code>async/await</code> specifically and
|
|
not in the context of any specific runtime.</p>
|
|
<p>Secondly, I've always found small runnable examples very exiting to learn from.
|
|
Thanks to Mdbook the examples can even be edited and explored further. It's
|
|
all code that you can download, play with and learn from.</p>
|
|
<p>We'll and end up with an understandable example including a <code>Future</code>
|
|
implementation, an <code>Executor</code> and a <code>Reactor</code> in less than 200 lines of code.
|
|
We don't rely on any dependencies or real I/O which means it's very easy to
|
|
explore further and try your own ideas.</p>
|
|
<h2><a class="header" href="#credits-and-thanks" id="credits-and-thanks">Credits and thanks</a></h2>
|
|
<p>I'll like to take the chance of thanking the people behind <code>mio</code>, <code>tokio</code>,
|
|
<code>async_std</code>, <code>Futures</code>, <code>libc</code>, <code>crossbeam</code> and many other libraries which so
|
|
much is built upon. Reading and exploring some of this code is nothing less than
|
|
impressive. Even the RFCs that much of the design is built upon is written in a
|
|
way that mortal people can understand, and that requires a lot of work. So thanks!</p>
|
|
<h1><a class="header" href="#some-background-information" id="some-background-information">Some background information</a></h1>
|
|
<blockquote>
|
|
<p><strong>Relevant for:</strong></p>
|
|
<ul>
|
|
<li>High level introduction to concurrency in Rust</li>
|
|
<li>Knowing what Rust provides and not when working with async</li>
|
|
<li>Understanding why we need runtimes </li>
|
|
<li>Knowing that Rust has <code>Futures 1.0</code> and <code>Futures 3.0</code>, and how to deal with them</li>
|
|
<li>Getting pointers to further reading on concurrency in general</li>
|
|
</ul>
|
|
</blockquote>
|
|
<p>Before we start implementing our <code>Futures</code> , we'll go through some background
|
|
information that will help demystify some of the concepts we encounter.</p>
|
|
<p>Actually, after going through these concepts, implementing futures will seem
|
|
pretty simple. I promise.</p>
|
|
<h2><a class="header" href="#async-in-rust" id="async-in-rust">Async in Rust</a></h2>
|
|
<p>Let's get some of the common roadblocks out of the way first.</p>
|
|
<p>Async in Rust is different from most other languages in the sense that Rust
|
|
has an extremely lightweight runtime.</p>
|
|
<p>In languages like C#, JavaScript, Java and GO, the runtime is already there. So
|
|
if you come from one of those languages this will seem a bit strange to you.</p>
|
|
<h3><a class="header" href="#what-rusts-standard-library-takes-care-of" id="what-rusts-standard-library-takes-care-of">What Rust's standard library takes care of</a></h3>
|
|
<ol>
|
|
<li>The definition of an interruptible task</li>
|
|
<li>An extremely efficient technique to start, suspend, resume and store tasks
|
|
which are executed concurrently. </li>
|
|
<li>A defined way to wake up a suspended task</li>
|
|
</ol>
|
|
<p>That's really what Rusts standard library does. As you see there is no definition
|
|
of non-blocking I/O, how these tasks are created or how they're run.</p>
|
|
<h3><a class="header" href="#what-you-need-to-find-elsewhere" id="what-you-need-to-find-elsewhere">What you need to find elsewhere</a></h3>
|
|
<p>A runtime. Well, in Rust we normally divide the runtime into two parts:</p>
|
|
<ul>
|
|
<li>The Reactor</li>
|
|
<li>The Executor</li>
|
|
</ul>
|
|
<p>Reactors create leaf <code>Futures</code>, and provides things like non-blocking sockets,
|
|
an event queue and so on.</p>
|
|
<p>Executors, accepts one or more asynchronous tasks called <code>Futures</code> and takes
|
|
care of actually running the code we write, suspend the tasks when they're
|
|
waiting for I/O and resumes them.</p>
|
|
<p>In theory, we could choose one <code>Reactor</code> and one <code>Executor</code> that have nothing
|
|
to do with each other besides one creates leaf <code>Futures</code> and one runs them, but
|
|
in reality today you'll most often get both in a <code>Runtime</code>.</p>
|
|
<p>There are mainly two such runtimes today <a href="https://github.com/async-rs/async-std">async_std</a> and <a href="https://github.com/tokio-rs/tokio">tokio</a>.</p>
|
|
<p>Quite a bit of complexity attributed to <code>Futures</code> are actually complexity rooted
|
|
in runtimes. Creating an efficient runtime is hard. Learning how to use one
|
|
correctly can be hard as well, but both are excellent and it's just like
|
|
learning any new library.</p>
|
|
<p>The difference between Rust and other languages is that you have to make an
|
|
active choice when it comes to picking a runtime. Most often you'll just use
|
|
the one provided for you.</p>
|
|
<h2><a class="header" href="#futures-10-and-futures-30" id="futures-10-and-futures-30">Futures 1.0 and Futures 3.0</a></h2>
|
|
<p>I'll not spend too much time on this, but it feels wrong to not mention that
|
|
there have been several iterations on how async should work in Rust.</p>
|
|
<p><code>Futures 3.0</code> works with the relatively new <code>async/await</code> syntax in Rust and
|
|
it's what we'll learn.</p>
|
|
<p>Now, since this is rather recent, you can encounter creates that use <code>Futures 1.0</code>
|
|
still. This will get resolved in time, but unfortunately it's not always easy
|
|
to know in advance.</p>
|
|
<p>A good sign is that if you're required to use combinators like <code>and_then</code> then
|
|
you're using <code>Futures 1.0</code>.</p>
|
|
<p>While not directly compatible, there is a tool that let's you relatively easily
|
|
convert a <code>Future 1.0</code> to a <code>Future 3.0</code> and vice a verca. You can find all you
|
|
need in the <a href="https://github.com/rust-lang/futures-rs"><code>futures-rs</code></a> crate and all <a href="https://rust-lang.github.io/futures-rs/blog/2019/04/18/compatibility-layer.html">information you need here</a>.</p>
|
|
<h2><a class="header" href="#first-things-first" id="first-things-first">First things first</a></h2>
|
|
<p>If you find the concepts of concurrency and async programming confusing in
|
|
general, I know where you're coming from and I have written some resources to
|
|
try to give a high level overview that will make it easier to learn Rusts
|
|
<code>Futures</code> afterwards:</p>
|
|
<ul>
|
|
<li><a href="https://cfsamson.github.io/book-exploring-async-basics/1_concurrent_vs_parallel.html">Async Basics - The difference between concurrency and parallelism</a></li>
|
|
<li><a href="https://cfsamson.github.io/book-exploring-async-basics/2_async_history.html">Async Basics - Async history</a></li>
|
|
<li><a href="https://cfsamson.github.io/book-exploring-async-basics/5_strategies_for_handling_io.html">Async Basics - Strategies for handling I/O</a></li>
|
|
<li><a href="https://cfsamson.github.io/book-exploring-async-basics/6_epoll_kqueue_iocp.html">Async Basics - Epoll, Kqueue and IOCP</a></li>
|
|
</ul>
|
|
<p>Now learning these concepts by studying futures is making it much harder than
|
|
it needs to be, so go on and read these chapters. I'll be right here when
|
|
you're back. </p>
|
|
<p>However, if you feel that you have the basics covered, then go right on. </p>
|
|
<p>Let's get moving!</p>
|
|
<h1><a class="header" href="#trait-objects-and-fat-pointers" id="trait-objects-and-fat-pointers">Trait objects and fat pointers</a></h1>
|
|
<blockquote>
|
|
<p><strong>Relevant for:</strong></p>
|
|
<ul>
|
|
<li>Understanding how the Waker object is constructed</li>
|
|
<li>Getting a basic feel for "type erased" objects and what they are</li>
|
|
<li>Learning the basics of dynamic dispatch</li>
|
|
</ul>
|
|
</blockquote>
|
|
<h2><a class="header" href="#trait-objects-and-dynamic-dispatch" id="trait-objects-and-dynamic-dispatch">Trait objects and dynamic dispatch</a></h2>
|
|
<p>One of the most confusing topic we encounter when implementing our own <code>Futures</code>
|
|
is how we implement a <code>Waker</code> . Creating a <code>Waker</code> involves creating a <code>vtable</code>
|
|
which allows us to use dynamic dispatch to call methods on a <em>type erased</em> trait
|
|
object we construct our selves.</p>
|
|
<blockquote>
|
|
<p>If you want to know more about dynamic dispatch in Rust I can recommend an article written by Adam Schwalm called <a href="https://alschwalm.com/blog/static/2017/03/07/exploring-dynamic-dispatch-in-rust/">Exploring Dynamic Dispatch in Rust</a>.</p>
|
|
</blockquote>
|
|
<p>Let's explain this a bit more in detail.</p>
|
|
<h2><a class="header" href="#fat-pointers-in-rust" id="fat-pointers-in-rust">Fat pointers in Rust</a></h2>
|
|
<p>Let's take a look at the size of some different pointer types in Rust. If we
|
|
run the following code. <em>(You'll have to press "play" to see the output)</em>:</p>
|
|
<pre><pre class="playpen"><code class="language-rust"># use std::mem::size_of;
|
|
trait SomeTrait { }
|
|
|
|
fn main() {
|
|
println!("======== The size of different pointers in Rust: ========");
|
|
println!("&dyn Trait:-----{}", size_of::<&dyn SomeTrait>());
|
|
println!("&[&dyn Trait]:--{}", size_of::<&[&dyn SomeTrait]>());
|
|
println!("Box<Trait>:-----{}", size_of::<Box<SomeTrait>>());
|
|
println!("&i32:-----------{}", size_of::<&i32>());
|
|
println!("&[i32]:---------{}", size_of::<&[i32]>());
|
|
println!("Box<i32>:-------{}", size_of::<Box<i32>>());
|
|
println!("&Box<i32>:------{}", size_of::<&Box<i32>>());
|
|
println!("[&dyn Trait;4]:-{}", size_of::<[&dyn SomeTrait; 4]>());
|
|
println!("[i32;4]:--------{}", size_of::<[i32; 4]>());
|
|
}
|
|
</code></pre></pre>
|
|
<p>As you see from the output after running this, the sizes of the references varies.
|
|
Many are 8 bytes (which is a pointer size on 64 bit systems), but some are 16
|
|
bytes.</p>
|
|
<p>The 16 byte sized pointers are called "fat pointers" since they carry more extra
|
|
information.</p>
|
|
<p><strong>Example <code>&[i32]</code> :</strong> </p>
|
|
<ul>
|
|
<li>The first 8 bytes is the actual pointer to the first element in the array (or part of an array the slice refers to)</li>
|
|
<li>The second 8 bytes is the length of the slice.</li>
|
|
</ul>
|
|
<p><strong>Example <code>&dyn SomeTrait</code>:</strong></p>
|
|
<p>This is the type of fat pointer we'll concern ourselves about going forward.
|
|
<code>&dyn SomeTrait</code> is a reference to a trait, or what Rust calls <em>trait objects</em>.</p>
|
|
<p>The layout for a pointer to a <em>trait object</em> looks like this: </p>
|
|
<ul>
|
|
<li>The first 8 bytes points to the <code>data</code> for the trait object</li>
|
|
<li>The second 8 bytes points to the <code>vtable</code> for the trait object</li>
|
|
</ul>
|
|
<p>The reason for this is to allow us to refer to an object we know nothing about
|
|
except that it implements the methods defined by our trait. To allow accomplish this we use <em>dynamic dispatch</em>.</p>
|
|
<p>Let's explain this in code instead of words by implementing our own trait
|
|
object from these parts:</p>
|
|
<blockquote>
|
|
<p>This is an example of <em>editable</em> code. You can change everything in the example
|
|
and try to run it. If you want to go back, press the undo symbol. Keep an eye
|
|
out for these as we go forward. Many examples will be editable.</p>
|
|
</blockquote>
|
|
<pre><pre class="playpen"><code class="language-rust editable">// A reference to a trait object is a fat pointer: (data_ptr, vtable_ptr)
|
|
trait Test {
|
|
fn add(&self) -> i32;
|
|
fn sub(&self) -> i32;
|
|
fn mul(&self) -> i32;
|
|
}
|
|
|
|
// This will represent our home brewn fat pointer to a trait object
|
|
#[repr(C)]
|
|
struct FatPointer<'a> {
|
|
/// A reference is a pointer to an instantiated `Data` instance
|
|
data: &'a mut Data,
|
|
/// Since we need to pass in literal values like length and alignment it's
|
|
/// easiest for us to convert pointers to usize-integers instead of the other way around.
|
|
vtable: *const usize,
|
|
}
|
|
|
|
// This is the data in our trait object. It's just two numbers we want to operate on.
|
|
struct Data {
|
|
a: i32,
|
|
b: i32,
|
|
}
|
|
|
|
// ====== function definitions ======
|
|
fn add(s: &Data) -> i32 {
|
|
s.a + s.b
|
|
}
|
|
fn sub(s: &Data) -> i32 {
|
|
s.a - s.b
|
|
}
|
|
fn mul(s: &Data) -> i32 {
|
|
s.a * s.b
|
|
}
|
|
|
|
fn main() {
|
|
let mut data = Data {a: 3, b: 2};
|
|
// vtable is like special purpose array of pointer-length types with a fixed
|
|
// format where the three first values has a special meaning like the
|
|
// length of the array is encoded in the array itself as the second value.
|
|
let vtable = vec![
|
|
0, // pointer to `Drop` (which we're not implementing here)
|
|
6, // lenght of vtable
|
|
8, // alignment
|
|
|
|
// we need to make sure we add these in the same order as defined in the Trait.
|
|
add as usize, // function pointer - try changing the order of `add`
|
|
sub as usize, // function pointer - and `sub` to see what happens
|
|
mul as usize, // function pointer
|
|
];
|
|
|
|
let fat_pointer = FatPointer { data: &mut data, vtable: vtable.as_ptr()};
|
|
let test = unsafe { std::mem::transmute::<FatPointer, &dyn Test>(fat_pointer) };
|
|
|
|
// And voalá, it's now a trait object we can call methods on
|
|
println!("Add: 3 + 2 = {}", test.add());
|
|
println!("Sub: 3 - 2 = {}", test.sub());
|
|
println!("Mul: 3 * 2 = {}", test.mul());
|
|
}
|
|
|
|
</code></pre></pre>
|
|
<p>The reason we go through this will be clear later on when we implement our own
|
|
<code>Waker</code> we'll actually set up a <code>vtable</code> like we do here to and knowing what
|
|
it is will make this much less mysterious.</p>
|
|
<h1><a class="header" href="#generators" id="generators">Generators</a></h1>
|
|
<blockquote>
|
|
<p><strong>Relevant for:</strong></p>
|
|
<ul>
|
|
<li>Understanding how the async/await syntax works since it's how <code>await</code> is implemented</li>
|
|
<li>Why we need <code>Pin</code></li>
|
|
<li>Why Rusts async model is extremely efficient</li>
|
|
</ul>
|
|
<p>The motivation for <code>Generators</code> can be found in <a href="https://github.com/rust-lang/rfcs/blob/master/text/2033-experimental-coroutines.md">RFC#2033</a>. It's very
|
|
well written and I can recommend reading through it (it talks as much about
|
|
async/await as it does about generators).</p>
|
|
</blockquote>
|
|
<p>The second difficult part that there seems to be a lot of questions about
|
|
is Generators and the <code>Pin</code> type. Since they're related we'll start off by
|
|
exploring generators first. By doing that we'll soon get to see why
|
|
we need to be able to "pin" some data to a fixed location in memory and
|
|
get an introduction to <code>Pin</code> as well.</p>
|
|
<p>Basically, there were three main options that were discussed when Rust was
|
|
desiging how the language would handle concurrency:</p>
|
|
<ol>
|
|
<li>Stackful coroutines, better known as green threads.</li>
|
|
<li>Using combinators.</li>
|
|
<li>Stackless coroutines, better known as generators.</li>
|
|
</ol>
|
|
<h3><a class="header" href="#stackful-coroutinesgreen-threads" id="stackful-coroutinesgreen-threads">Stackful coroutines/green threads</a></h3>
|
|
<p>I've written about green threads before. Go check out
|
|
<a href="https://cfsamson.gitbook.io/green-threads-explained-in-200-lines-of-rust/">Green Threads Explained in 200 lines of Rust</a> if you're interested.</p>
|
|
<p>Green threads uses the same mechanisms as an OS does by creating a thread for
|
|
each task, setting up a stack, save the CPU's state and jump
|
|
from one task(thread) to another by doing a "context switch". We yield control to the scheduler which then
|
|
continues running a different task.</p>
|
|
<p>Rust had green threads once, but they were removed before it hit 1.0. The state
|
|
of execution is stored in each stack so in such a solution there would be no need
|
|
for <code>async</code>, <code>await</code>, <code>Futures</code> or <code>Pin</code>. All this would be implementation
|
|
details for the library.</p>
|
|
<h3><a class="header" href="#combinators" id="combinators">Combinators</a></h3>
|
|
<p><code>Futures 1.0</code> used combinators. If you've worked with <code>Promises</code> in JavaScript,
|
|
you already know combinators. In Rust they look like this:</p>
|
|
<pre><code class="language-rust noplaypen ignore">let future = Connection::connect(conn_str).and_then(|conn| {
|
|
conn.query("somerequest").map(|row|{
|
|
SomeStruct::from(row)
|
|
}).collect::<Vec<SomeStruct>>()
|
|
});
|
|
|
|
let rows: Result<Vec<SomeStruct>, SomeLibraryError> = block_on(future).unwrap();
|
|
|
|
</code></pre>
|
|
<p>While an effective solution there are mainly three downsides I'll focus on:</p>
|
|
<ol>
|
|
<li>The error messages produced could be extremely long and arcane</li>
|
|
<li>Not optimal memory usage</li>
|
|
<li>Did not allow to borrow across combinator steps.</li>
|
|
</ol>
|
|
<p>Point #3, is actually a major drawback with <code>Futures 1.0</code>.</p>
|
|
<p>Not allowing borrows across suspension points ends up being very
|
|
un-ergonomic and often requiring extra allocations or copying to accomplish
|
|
some tasks which is inefficient.</p>
|
|
<p>The reason for the higher than optimal memory usage is that this is basically
|
|
a callback-based approach, where each closure stores all the data it needs
|
|
for computation. This means that as we chain these, the memory required to store
|
|
the needed state increases with each added step.</p>
|
|
<h3><a class="header" href="#stackless-coroutinesgenerators" id="stackless-coroutinesgenerators">Stackless coroutines/generators</a></h3>
|
|
<p>This is the model used in Rust today. It a few notable advantages:</p>
|
|
<ol>
|
|
<li>It's easy to convert normal Rust code to a stackless corotuine using using
|
|
async/await as keywords (it can even be done using a macro).</li>
|
|
<li>No need for context switching and saving/restoring CPU state</li>
|
|
<li>No need to handle dynamic stack allocation</li>
|
|
<li>Very memory efficient</li>
|
|
<li>Allowed for borrows across suspension points</li>
|
|
</ol>
|
|
<p>The last point is in contrast to <code>Futures 1.0</code>. With async/await we can do this:</p>
|
|
<pre><code class="language-rust ignore">async fn myfn() {
|
|
let text = String::from("Hello world");
|
|
let borrowed = &text[0..5];
|
|
somefuture.await;
|
|
println!("{}", borrowed);
|
|
}
|
|
</code></pre>
|
|
<p>Generators are implemented as state machines. The memory footprint of a chain
|
|
of computations is only defined by the largest footprint any single step
|
|
requires. That means that adding steps to a chain of computations might not
|
|
require any added memory at all.</p>
|
|
<h2><a class="header" href="#how-generators-work" id="how-generators-work">How generators work</a></h2>
|
|
<p>In Nightly Rust today you can use the <code>yield</code> keyword. Basically using this
|
|
keyword in a closure, converts it to a generator. A closure looking like this
|
|
(I'm going to use the terminology that's currently in Rust):</p>
|
|
<pre><code class="language-rust noplaypen ignore">let a = 4;
|
|
let b = move || {
|
|
println!("Hello");
|
|
yield a * 2;
|
|
println!("world!");
|
|
};
|
|
|
|
if let GeneratorState::Yielded(n) = gen.resume() {
|
|
println!("Got value {}", n);
|
|
}
|
|
|
|
if let GeneratorState::Complete(()) = gen.resume() {
|
|
()
|
|
};
|
|
</code></pre>
|
|
<p>Early on, before there was a consensus about the design of <code>Pin</code>, this
|
|
compiled to something looking similar to this:</p>
|
|
<pre><pre class="playpen"><code class="language-rust">fn main() {
|
|
let mut gen = GeneratorA::start(4);
|
|
|
|
if let GeneratorState::Yielded(n) = gen.resume() {
|
|
println!("Got value {}", n);
|
|
}
|
|
|
|
if let GeneratorState::Complete(()) = gen.resume() {
|
|
()
|
|
};
|
|
}
|
|
|
|
// If you've ever wondered why the parameters are called Y and R the naming from
|
|
// the original rfc most likely holds the answer
|
|
enum GeneratorState<Y, R> {
|
|
// originally called `CoResult`
|
|
Yielded(Y), // originally called `Yield(Y)`
|
|
Complete(R), // originally called `Return(R)`
|
|
}
|
|
|
|
trait Generator {
|
|
type Yield;
|
|
type Return;
|
|
fn resume(&mut self) -> GeneratorState<Self::Yield, Self::Return>;
|
|
}
|
|
|
|
enum GeneratorA {
|
|
Enter(i32),
|
|
Yield1(i32),
|
|
Exit,
|
|
}
|
|
|
|
impl GeneratorA {
|
|
fn start(a1: i32) -> Self {
|
|
GeneratorA::Enter(a1)
|
|
}
|
|
}
|
|
|
|
impl Generator for GeneratorA {
|
|
type Yield = i32;
|
|
type Return = ();
|
|
fn resume(&mut self) -> GeneratorState<Self::Yield, Self::Return> {
|
|
// lets us get ownership over current state
|
|
match std::mem::replace(&mut *self, GeneratorA::Exit) {
|
|
GeneratorA::Enter(a1) => {
|
|
|
|
/*|---code before yield---|*/
|
|
/*|*/ println!("Hello"); /*|*/
|
|
/*|*/ let a = a1 * 2; /*|*/
|
|
/*|------------------------|*/
|
|
|
|
*self = GeneratorA::Yield1(a);
|
|
GeneratorState::Yielded(a)
|
|
}
|
|
GeneratorA::Yield1(_) => {
|
|
|
|
/*|----code after yield----|*/
|
|
/*|*/ println!("world!"); /*|*/
|
|
/*|-------------------------|*/
|
|
|
|
*self = GeneratorA::Exit;
|
|
GeneratorState::Complete(())
|
|
}
|
|
GeneratorA::Exit => panic!("Can't advance an exited generator!"),
|
|
}
|
|
}
|
|
}
|
|
|
|
</code></pre></pre>
|
|
<blockquote>
|
|
<p>The <code>yield</code> keyword was discussed first in <a href="https://github.com/rust-lang/rfcs/pull/1823">RFC#1823</a> and in <a href="https://github.com/rust-lang/rfcs/pull/1832">RFC#1832</a>.</p>
|
|
</blockquote>
|
|
<p>Now that you know that the <code>yield</code> keyword in reality rewrites your code to become a state machine,
|
|
you'll also know the basics of how <code>await</code> works. It's very similar.</p>
|
|
<p>Now, there are some limitations in our naive state machine above. What happens when you have a
|
|
<code>borrow</code> across a <code>yield</code> point?</p>
|
|
<p>We could forbid that, but <strong>one of the major design goals for the async/await syntax has been
|
|
to allow this</strong>. These kinds of borrows were not possible using <code>Futures 1.0</code> so we can't let this
|
|
limitation just slip and call it a day yet.</p>
|
|
<p>Instead of discussing it in theory, let's look at some code. </p>
|
|
<blockquote>
|
|
<p>We'll use the optimized version of the state machines which is used in Rust today. For a more
|
|
in deapth explanation see <a href="https://tmandry.gitlab.io/blog/posts/optimizing-await-1/">Tyler Mandry's execellent article: How Rust optimizes async/await</a></p>
|
|
</blockquote>
|
|
<pre><code class="language-rust noplaypen ignore">let a = 4;
|
|
let b = move || {
|
|
let to_borrow = String::new("Hello");
|
|
let borrowed = &to_borrow;
|
|
println!("{}", borrowed);
|
|
yield a * 2;
|
|
println!("{} world!", borrowed);
|
|
};
|
|
</code></pre>
|
|
<p>Now what does our rewritten state machine look like with this example?</p>
|
|
<pre><pre class="playpen"><code class="language-rust compile_fail">
|
|
# #![allow(unused_variables)]
|
|
#fn main() {
|
|
# // If you've ever wondered why the parameters are called Y and R the naming from
|
|
# // the original rfc most likely holds the answer
|
|
# enum GeneratorState<Y, R> {
|
|
# // originally called `CoResult`
|
|
# Yielded(Y), // originally called `Yield(Y)`
|
|
# Complete(R), // originally called `Return(R)`
|
|
# }
|
|
#
|
|
# trait Generator {
|
|
# type Yield;
|
|
# type Return;
|
|
# fn resume(&mut self) -> GeneratorState<Self::Yield, Self::Return>;
|
|
# }
|
|
|
|
enum GeneratorA {
|
|
Enter,
|
|
Yield1 {
|
|
to_borrow: String,
|
|
borrowed: &String, // uh, what lifetime should this have?
|
|
},
|
|
Exit,
|
|
}
|
|
|
|
# impl GeneratorA {
|
|
# fn start() -> Self {
|
|
# GeneratorA::Enter
|
|
# }
|
|
# }
|
|
|
|
impl Generator for GeneratorA {
|
|
type Yield = usize;
|
|
type Return = ();
|
|
fn resume(&mut self) -> GeneratorState<Self::Yield, Self::Return> {
|
|
// lets us get ownership over current state
|
|
match std::mem::replace(&mut *self, GeneratorA::Exit) {
|
|
GeneratorA::Enter => {
|
|
let to_borrow = String::from("Hello");
|
|
let borrowed = &to_borrow;
|
|
*self = GeneratorA::Yield1 {to_borrow, borrowed};
|
|
GeneratorState::Yielded(borrowed.len())
|
|
}
|
|
|
|
GeneratorA::Yield1 {to_borrow, borrowed} => {
|
|
println!("Hello {}", borrowed);
|
|
*self = GeneratorA::Exit;
|
|
GeneratorState::Complete(())
|
|
}
|
|
GeneratorA::Exit => panic!("Can't advance an exited generator!"),
|
|
}
|
|
}
|
|
}
|
|
#}</code></pre></pre>
|
|
<p>If you try to compile this you'll get an error (just try it yourself by pressing play).</p>
|
|
<p>What is the lifetime of <code>&String</code>. It's not the same as the lifetime of <code>Self</code>. It's not <code>static</code>.
|
|
Turns out that it's not possible for us in Rusts syntax to describe this lifetime, which means, that
|
|
to make this work, we'll have to let the compiler know that <em>we</em> control this correct.</p>
|
|
<p>That means turning to unsafe.</p>
|
|
<p>Let's try to write an implementation that will compiler using <code>unsafe</code>. As you'll
|
|
see we end up in a <em>self referential struct</em>. A struct which holds references
|
|
into itself.</p>
|
|
<p>As you'll notice, this compiles just fine!</p>
|
|
<pre><pre class="playpen"><code class="language-rust editable">pub fn main() {
|
|
let mut gen = GeneratorA::start();
|
|
let mut gen2 = GeneratorA::start();
|
|
|
|
if let GeneratorState::Yielded(n) = gen.resume() {
|
|
println!("Got value {}", n);
|
|
}
|
|
|
|
// If you uncomment this, very bad things can happen. This is why we need `Pin`
|
|
// std::mem::swap(&mut gen, &mut gen2);
|
|
|
|
if let GeneratorState::Yielded(n) = gen2.resume() {
|
|
println!("Got value {}", n);
|
|
}
|
|
|
|
// if you uncomment `mem::swap`.. this should now start gen2.
|
|
if let GeneratorState::Complete(()) = gen.resume() {
|
|
()
|
|
};
|
|
}
|
|
|
|
enum GeneratorState<Y, R> {
|
|
Yielded(Y), // originally called `Yield(Y)`
|
|
Complete(R), // originally called `Return(R)`
|
|
}
|
|
|
|
trait Generator {
|
|
type Yield;
|
|
type Return;
|
|
fn resume(&mut self) -> GeneratorState<Self::Yield, Self::Return>;
|
|
}
|
|
|
|
enum GeneratorA {
|
|
Enter,
|
|
Yield1 {
|
|
to_borrow: String,
|
|
borrowed: *const String, // Normally you'll see `std::ptr::NonNull` used instead of *ptr
|
|
},
|
|
Exit,
|
|
}
|
|
|
|
impl GeneratorA {
|
|
fn start() -> Self {
|
|
GeneratorA::Enter
|
|
}
|
|
}
|
|
impl Generator for GeneratorA {
|
|
type Yield = usize;
|
|
type Return = ();
|
|
fn resume(&mut self) -> GeneratorState<Self::Yield, Self::Return> {
|
|
// lets us get ownership over current state
|
|
match self {
|
|
GeneratorA::Enter => {
|
|
let to_borrow = String::from("Hello");
|
|
let borrowed = &to_borrow;
|
|
let res = borrowed.len();
|
|
|
|
// Tricks to actually get a self reference
|
|
*self = GeneratorA::Yield1 {to_borrow, borrowed: std::ptr::null()};
|
|
match self {
|
|
GeneratorA::Yield1{to_borrow, borrowed} => *borrowed = to_borrow,
|
|
_ => ()
|
|
};
|
|
|
|
GeneratorState::Yielded(res)
|
|
}
|
|
|
|
GeneratorA::Yield1 {borrowed, ..} => {
|
|
let borrowed: &String = unsafe {&**borrowed};
|
|
println!("{} world", borrowed);
|
|
*self = GeneratorA::Exit;
|
|
GeneratorState::Complete(())
|
|
}
|
|
GeneratorA::Exit => panic!("Can't advance an exited generator!"),
|
|
}
|
|
}
|
|
}
|
|
</code></pre></pre>
|
|
<blockquote>
|
|
<p>Try to uncomment the line with <code>mem::swap</code> and see the result of running this code.</p>
|
|
</blockquote>
|
|
<p>While the example above compiles just fine, we expose users of this code to
|
|
both possible undefined behavior and other memory errors while using just safe
|
|
Rust. This is a big problem!</p>
|
|
<p>But now, let's prevent the segfault from happening using <code>Pin</code>. We'll discuss
|
|
<code>Pin</code> more below, but you'll get an introduction here by just reading the
|
|
comments.</p>
|
|
<pre><pre class="playpen"><code class="language-rust editable">#![feature(optin_builtin_traits)]
|
|
use std::pin::Pin;
|
|
|
|
pub fn main() {
|
|
let gen1 = GeneratorA::start();
|
|
let gen2 = GeneratorA::start();
|
|
// Before we pin the pointers, this is safe to do
|
|
// std::mem::swap(&mut gen, &mut gen2);
|
|
|
|
// constructing a `Pin::new()` on a type which does not implement `Unpin` is unsafe.
|
|
// However, as I mentioned in the start of the next chapter about `Pin` a
|
|
// boxed type automatically implements `Unpin` so to stay in safe Rust we can use
|
|
// that to avoid unsafe. You can also use crates like `pin_utils` to do this safely,
|
|
// just remember that they use unsafe under the hood so it's like using an already-reviewed
|
|
// unsafe implementation.
|
|
|
|
let mut pinned1 = Box::pin(gen1);
|
|
let mut pinned2 = Box::pin(gen2);
|
|
// Uncomment these if you think it's safe to pin the values to the stack instead
|
|
// (it is in this case). Remember to comment out the two previous lines first.
|
|
//let mut pinned1 = unsafe { Pin::new_unchecked(&mut gen1) };
|
|
//let mut pinned2 = unsafe { Pin::new_unchecked(&mut gen2) };
|
|
|
|
if let GeneratorState::Yielded(n) = pinned1.as_mut().resume() {
|
|
println!("Got value {}", n);
|
|
}
|
|
|
|
if let GeneratorState::Yielded(n) = pinned2.as_mut().resume() {
|
|
println!("Gen2 got value {}", n);
|
|
};
|
|
|
|
// This won't work
|
|
// std::mem::swap(&mut gen, &mut gen2);
|
|
// This will work but will just swap the pointers. Nothing inherently bad happens here.
|
|
// std::mem::swap(&mut pinned1, &mut pinned2);
|
|
|
|
let _ = pinned1.as_mut().resume();
|
|
let _ = pinned2.as_mut().resume();
|
|
}
|
|
|
|
enum GeneratorState<Y, R> {
|
|
// originally called `CoResult`
|
|
Yielded(Y), // originally called `Yield(Y)`
|
|
Complete(R), // originally called `Return(R)`
|
|
}
|
|
|
|
trait Generator {
|
|
type Yield;
|
|
type Return;
|
|
fn resume(self: Pin<&mut Self>) -> GeneratorState<Self::Yield, Self::Return>;
|
|
}
|
|
|
|
enum GeneratorA {
|
|
Enter,
|
|
Yield1 {
|
|
to_borrow: String,
|
|
borrowed: *const String, // Normally you'll see `std::ptr::NonNull` used instead of *ptr
|
|
},
|
|
Exit,
|
|
}
|
|
|
|
impl GeneratorA {
|
|
fn start() -> Self {
|
|
GeneratorA::Enter
|
|
}
|
|
}
|
|
|
|
// This tells us that the underlying pointer is not safe to move after pinning. In this case,
|
|
// only we as implementors "feel" this, however, if someone is relying on our Pinned pointer
|
|
// this will prevent them from moving it. You need to enable the feature flag
|
|
// `#![feature(optin_builtin_traits)]` and use the nightly compiler to implement `!Unpin`.
|
|
// Normally, you would use `std::marker::PhantomPinned` to indicate that the
|
|
// struct is `!Unpin`.
|
|
impl !Unpin for GeneratorA { }
|
|
|
|
impl Generator for GeneratorA {
|
|
type Yield = usize;
|
|
type Return = ();
|
|
fn resume(self: Pin<&mut Self>) -> GeneratorState<Self::Yield, Self::Return> {
|
|
// lets us get ownership over current state
|
|
let this = unsafe { self.get_unchecked_mut() };
|
|
match this {
|
|
GeneratorA::Enter => {
|
|
let to_borrow = String::from("Hello");
|
|
let borrowed = &to_borrow;
|
|
let res = borrowed.len();
|
|
|
|
// Trick to actually get a self reference. We can't reference
|
|
// the `String` earlier since these references will point to the
|
|
// location in this stack frame which will not be valid anymore
|
|
// when this function returns.
|
|
*this = GeneratorA::Yield1 {to_borrow, borrowed: std::ptr::null()};
|
|
match this {
|
|
GeneratorA::Yield1{to_borrow, borrowed} => *borrowed = to_borrow,
|
|
_ => ()
|
|
};
|
|
|
|
GeneratorState::Yielded(res)
|
|
}
|
|
|
|
GeneratorA::Yield1 {borrowed, ..} => {
|
|
let borrowed: &String = unsafe {&**borrowed};
|
|
println!("{} world", borrowed);
|
|
*this = GeneratorA::Exit;
|
|
GeneratorState::Complete(())
|
|
}
|
|
GeneratorA::Exit => panic!("Can't advance an exited generator!"),
|
|
}
|
|
}
|
|
}
|
|
</code></pre></pre>
|
|
<p>Now, as you see, the user of this code must either:</p>
|
|
<ol>
|
|
<li>Box the value and thereby allocating it on the heap</li>
|
|
<li>Use <code>unsafe</code> and pin the value to the stack. The user knows that if they move
|
|
the value afterwards it will violate the guarantee they promise to uphold when
|
|
they did their unsafe implementation.</li>
|
|
</ol>
|
|
<p>Now, the code which is created and the need for <code>Pin</code> to allow for borrowing
|
|
across <code>yield</code> points should be pretty clear. </p>
|
|
<h1><a class="header" href="#pin" id="pin">Pin</a></h1>
|
|
<blockquote>
|
|
<p><strong>Relevant for</strong></p>
|
|
<ol>
|
|
<li>To understand <code>Generators</code> and <code>Futures</code></li>
|
|
<li>Knowing how to use <code>Pin</code> is required when implementing your own <code>Future</code></li>
|
|
<li>To understand self-referential types in Rust</li>
|
|
<li>This is the way borrowing across <code>await</code> points is accomplished</li>
|
|
</ol>
|
|
<p><code>Pin</code> was suggested in <a href="https://github.com/rust-lang/rfcs/blob/master/text/2349-pin.md">RFC#2349</a></p>
|
|
</blockquote>
|
|
<p>We already got a brief introduction of <code>Pin</code> in the previous chapters, so we'll
|
|
start off here with some definitions and a set of rules to remember.</p>
|
|
<h2><a class="header" href="#definitions" id="definitions">Definitions</a></h2>
|
|
<p>Pin consists of the <code>Pin</code> type and the <code>Unpin</code> marker. Pin's purpose in life is
|
|
to govern the rules that need to apply for types which implement <code>!Unpin</code>.</p>
|
|
<p>Pin is only relevant for pointers. A reference to an object is a pointer.</p>
|
|
<p>Yep, that's double negation for you, as in "does-not-implement-unpin". For this
|
|
chapter and only this chapter we'll rename these markers to:</p>
|
|
<blockquote>
|
|
<p><code>!Unpin</code> = <code>MustStay</code> and <code>Unpin</code> = <code>CanMove</code></p>
|
|
</blockquote>
|
|
<p>It just makes it so much easier to understand them.</p>
|
|
<h2><a class="header" href="#rules-to-remember" id="rules-to-remember">Rules to remember</a></h2>
|
|
<ol>
|
|
<li>
|
|
<p>If <code>T: CanMove</code> (which is the default), then <code>Pin<'a, T></code> is entirely equivalent to <code>&'a mut T</code>. in other words: <code>CanMove</code> means it's OK for this type to be moved even when pinned, so <code>Pin</code> will have no effect on such a type.</p>
|
|
</li>
|
|
<li>
|
|
<p>Getting a <code>&mut T</code> to a pinned pointer requires unsafe if <code>T: MustStay</code>. In other words: requiring a pinned pointer to a type which is <code>MustStay</code> prevents the <em>user</em> of that API from moving that value unless it choses to write <code>unsafe</code> code.</p>
|
|
</li>
|
|
<li>
|
|
<p>Pinning does nothing special with that memory like putting it into some "read only" memory or anything fancy. It only tells the compiler that some operations on this value should be forbidden. </p>
|
|
</li>
|
|
<li>
|
|
<p>Most standard library types implement <code>CanMove</code>. The same goes for most
|
|
"normal" types you encounter in Rust. <code>Futures</code> and <code>Generators</code> are two
|
|
exceptions.</p>
|
|
</li>
|
|
<li>
|
|
<p>The main use case for <code>Pin</code> is to allow self referential types, the whole
|
|
justification for stabilizing them was to allow that. There are still corner
|
|
cases in the API which are being explored.</p>
|
|
</li>
|
|
<li>
|
|
<p>The implementation behind objects that are <code>MustStay</code> is most likely unsafe.
|
|
Moving such a type can cause the universe to crash. As of the time of writing
|
|
this book, creating an reading fields of a self referential struct still requires <code>unsafe</code>.</p>
|
|
</li>
|
|
<li>
|
|
<p>You're not really meant to be implementing <code>MustStay</code>, but you can on nightly with a feature flag, or by adding <code>std::marker::PhantomPinned</code> to your type.</p>
|
|
</li>
|
|
<li>
|
|
<p>When Pinning, you can either pin a value to memory either on the stack or
|
|
on the heap.</p>
|
|
</li>
|
|
<li>
|
|
<p>Pinning a <code>MustStay</code> pointer to the stack requires <code>unsafe</code></p>
|
|
</li>
|
|
<li>
|
|
<p>Pinning a <code>MustStay</code> pointer to the heap does not require <code>unsafe</code>. There is a shortcut for doing this using <code>Box::pin</code>.</p>
|
|
</li>
|
|
</ol>
|
|
<blockquote>
|
|
<p>Unsafe code does not mean it's literally "unsafe", it only relieves the
|
|
guarantees you normally get from the compiler. An <code>unsafe</code> implementation can
|
|
be perfectly safe to do, but you have no safety net.</p>
|
|
</blockquote>
|
|
<p>Let's take a look at an example:</p>
|
|
<pre><pre class="playpen"><code class="language-rust editable">use std::pin::Pin;
|
|
|
|
fn main() {
|
|
let mut test1 = Test::new("test1");
|
|
test1.init();
|
|
let mut test2 = Test::new("test2");
|
|
test2.init();
|
|
|
|
println!("a: {}, b: {}", test1.a(), test1.b());
|
|
std::mem::swap(&mut test1, &mut test2); // try commenting out this line
|
|
println!("a: {}, b: {}", test2.a(), test2.b());
|
|
|
|
}
|
|
|
|
#[derive(Debug)]
|
|
struct Test {
|
|
a: String,
|
|
b: *const String,
|
|
}
|
|
|
|
impl Test {
|
|
fn new(txt: &str) -> Self {
|
|
let a = String::from(txt);
|
|
Test {
|
|
a,
|
|
b: std::ptr::null(),
|
|
}
|
|
}
|
|
|
|
fn init(&mut self) {
|
|
let self_ref: *const String = &self.a;
|
|
self.b = self_ref;
|
|
}
|
|
|
|
fn a(&self) -> &str {
|
|
&self.a
|
|
}
|
|
|
|
fn b(&self) -> &String {
|
|
unsafe {&*(self.b)}
|
|
}
|
|
}
|
|
</code></pre></pre>
|
|
<p>Let's walk through this example since we'll be using it the rest of this chapter.</p>
|
|
<p>We have a self-referential struct <code>Test</code>. <code>Test</code> needs an <code>init</code> method to be
|
|
created which is strange but we'll need that to keep this example as short as
|
|
possible.</p>
|
|
<p><code>Test</code> provides two methods to get a reference to the value of the fields
|
|
<code>a</code> and <code>b</code>. Since <code>b</code> is a reference to <code>a</code> we store it as a pointer since
|
|
the borrowing rules of Rust doesn't allow us to define this lifetime.</p>
|
|
<p>In our main method we first instantiate two instances of <code>Test</code> and print out
|
|
the value of the fields on <code>test1</code>. We get:</p>
|
|
<pre><code class="language-rust ignore">a: test1, b: test1
|
|
</code></pre>
|
|
<p>Next we swap the data stored at the memory location which <code>test1</code> is pointing to
|
|
with the data stored at the memory location <code>test2</code> is pointing to and vice a verca.</p>
|
|
<p>We should expect that printing the fields of <code>test2</code> should display the same as
|
|
<code>test1</code> (since the object we printed before the swap has moved there now).</p>
|
|
<pre><code class="language-rust ignore">a: test1, b: test2
|
|
</code></pre>
|
|
<p>The pointer to <code>b</code> still points to the old location. That location is now
|
|
occupied with the string "test2". This can be a bit hard to visualize so I made
|
|
a figure that i hope can help.</p>
|
|
<p><strong>Fig 1: Before and after swap</strong>
|
|
<img src="./assets/swap_problem.jpg" alt="swap_problem" /></p>
|
|
<p>As you can see this results in unwanted behavior. It's easy to get this to
|
|
segfault, show UB and fail in other spectacular ways as well.</p>
|
|
<p>If we change the example to using <code>Pin</code> instead:</p>
|
|
<pre><pre class="playpen"><code class="language-rust editable">use std::pin::Pin;
|
|
use std::marker::PhantomPinned;
|
|
|
|
#[derive(Debug)]
|
|
struct Test {
|
|
a: String,
|
|
b: *const String,
|
|
_marker: PhantomPinned,
|
|
}
|
|
|
|
|
|
impl Test {
|
|
fn new(txt: &str) -> Self {
|
|
let a = String::from(txt);
|
|
Test {
|
|
a,
|
|
b: std::ptr::null(),
|
|
// This makes our type `!Unpin`
|
|
_marker: PhantomPinned,
|
|
}
|
|
}
|
|
fn init(&mut self) {
|
|
let self_ptr: *const String = &self.a;
|
|
self.b = self_ptr;
|
|
}
|
|
|
|
fn a<'a>(self: Pin<&'a Self>) -> &'a str {
|
|
&self.get_ref().a
|
|
}
|
|
|
|
fn b<'a>(self: Pin<&'a Self>) -> &'a String {
|
|
unsafe { &*(self.b) }
|
|
}
|
|
}
|
|
|
|
pub fn main() {
|
|
let mut test1 = Test::new("test1");
|
|
test1.init();
|
|
let mut test1_pin = unsafe { Pin::new_unchecked(&mut test1) };
|
|
let mut test2 = Test::new("test2");
|
|
test2.init();
|
|
let mut test2_pin = unsafe { Pin::new_unchecked(&mut test2) };
|
|
|
|
println!(
|
|
"a: {}, b: {}",
|
|
Test::a(test1_pin.as_ref()),
|
|
Test::b(test1_pin.as_ref())
|
|
);
|
|
|
|
// Try to uncomment this and see what happens
|
|
// std::mem::swap(test1_pin.as_mut(), test2_pin.as_mut());
|
|
println!(
|
|
"a: {}, b: {}",
|
|
Test::a(test2_pin.as_ref()),
|
|
Test::b(test2_pin.as_ref())
|
|
);
|
|
}
|
|
|
|
</code></pre></pre>
|
|
<p>Now, what we've done here is pinning a stack address. That will always be
|
|
<code>unsafe</code> if our type implements <code>!Unpin</code> (aka <code>MustStay</code>). </p>
|
|
<p>We use some tricks here, including requiring an <code>init</code>. If we want to fix that
|
|
and let users avoid <code>unsafe</code> we need to pin our data on the heap instead.</p>
|
|
<blockquote>
|
|
<p>Stack pinning will always depend on the current stack frame we're in, so we
|
|
can't create a self referential object in one stack frame and return it since
|
|
any pointers we take to "self" is invalidated.</p>
|
|
</blockquote>
|
|
<p>The next example solves some of our friction at the cost of a heap allocation.</p>
|
|
<pre><pre class="playpen"><code class="language-rust editbable">use std::pin::Pin;
|
|
use std::marker::PhantomPinned;
|
|
|
|
#[derive(Debug)]
|
|
struct Test {
|
|
a: String,
|
|
b: *const String,
|
|
_marker: PhantomPinned,
|
|
}
|
|
|
|
impl Test {
|
|
fn new(txt: &str) -> Pin<Box<Self>> {
|
|
let a = String::from(txt);
|
|
let t = Test {
|
|
a,
|
|
b: std::ptr::null(),
|
|
_marker: PhantomPinned,
|
|
};
|
|
let mut boxed = Box::pin(t);
|
|
let self_ptr: *const String = &boxed.as_ref().a;
|
|
unsafe { boxed.as_mut().get_unchecked_mut().b = self_ptr };
|
|
|
|
boxed
|
|
}
|
|
|
|
fn a<'a>(self: Pin<&'a Self>) -> &'a str {
|
|
&self.get_ref().a
|
|
}
|
|
|
|
fn b<'a>(self: Pin<&'a Self>) -> &'a String {
|
|
unsafe { &*(self.b) }
|
|
}
|
|
}
|
|
|
|
pub fn main() {
|
|
let mut test1 = Test::new("test1");
|
|
let mut test2 = Test::new("test2");
|
|
|
|
println!("a: {}, b: {}",test1.as_ref().a(), test1.as_ref().b());
|
|
|
|
// Try to uncomment this and see what happens
|
|
// std::mem::swap(&mut test1, &mut test2);
|
|
println!("a: {}, b: {}",test2.as_ref().a(), test2.as_ref().b());
|
|
}
|
|
</code></pre></pre>
|
|
<p>The fact that boxing (heap allocating) a value that implements <code>!Unpin</code> is safe
|
|
makes sense. Once the data is allocated on the heap it will have a stable address.</p>
|
|
<p>There is no need for us as users of the API to take special care and ensure
|
|
that the self-referential pointer stays valid.</p>
|
|
<p>There are ways to safely give some guarantees on stack pinning as well, but right
|
|
now you need to use a crate like <a href="https://github.com/rust-lang/rfcs/blob/master/text/2349-pin.md">pin_utils</a>:<a href="https://github.com/rust-lang/rfcs/blob/master/text/2349-pin.md">pin_utils</a> to do that.</p>
|
|
<h3><a class="header" href="#projectionstructural-pinning" id="projectionstructural-pinning">Projection/structural pinning</a></h3>
|
|
<p>In short, projection is using a field on your type. <code>mystruct.field1</code> is a
|
|
projection. Structural pinning is using <code>Pin</code> on struct fields. This has several
|
|
caveats and is not something you'll normally see so I refer to the documentation
|
|
for that.</p>
|
|
<h3><a class="header" href="#pin-and-drop" id="pin-and-drop">Pin and Drop</a></h3>
|
|
<p>The <code>Pin</code> guarantee exists from the moment the value is pinned until it's dropped.
|
|
In the <code>Drop</code> implementation you take a mutable reference to <code>self</code>, which means
|
|
extra care must be taken when implementing <code>Drop</code> for pinned types.</p>
|
|
<h2><a class="header" href="#putting-it-all-together" id="putting-it-all-together">Putting it all together</a></h2>
|
|
<p>This is exactly what we'll do when we implement our own <code>Futures</code> stay tuned,
|
|
we're soon finished.</p>
|
|
<h1><a class="header" href="#reactorexecutor-pattern" id="reactorexecutor-pattern">Reactor/Executor Pattern</a></h1>
|
|
<blockquote>
|
|
<p><strong>Relevant for:</strong></p>
|
|
<ul>
|
|
<li>Getting a high level overview of a common runtime model in Rust</li>
|
|
<li>Introducing these terms so we're on the same page when referring to them</li>
|
|
<li>Getting pointers on where to get more information about this pattern</li>
|
|
</ul>
|
|
</blockquote>
|
|
<p>If you don't know what this is, you should take a few minutes and read about
|
|
it. You will encounter the term <code>Reactor</code> and <code>Executor</code> a lot when working
|
|
with async code in Rust.</p>
|
|
<p>I have written a quick introduction explaining this pattern before which you
|
|
can take a look at here:</p>
|
|
<p><a href="https://cfsamsonbooks.gitbook.io/epoll-kqueue-iocp-explained/appendix-1/reactor-executor-pattern"><img src="./assets/reactorexecutor.png" alt="homepage" /></a></p>
|
|
<div style="text-align:center">
|
|
<a href="https://cfsamsonbooks.gitbook.io/epoll-kqueue-iocp-explained/appendix-1/reactor-executor-pattern">Epoll, Kqueue and IOCP Explained - The Reactor-Executor Pattern</a>
|
|
</div>
|
|
<p>I'll re-iterate the most important parts here.</p>
|
|
<p><strong>This pattern consists of at least 2 parts:</strong></p>
|
|
<ol>
|
|
<li><strong>A reactor</strong>
|
|
<ul>
|
|
<li>handles some kind of event queue</li>
|
|
<li>has the responsibility of respoonding to events</li>
|
|
</ul>
|
|
</li>
|
|
<li><strong>An executor</strong>
|
|
<ul>
|
|
<li>Often has a scheduler</li>
|
|
<li>Holds a set of suspended tasks, and has the responsibility of resuming
|
|
them when an event has occurred</li>
|
|
</ul>
|
|
</li>
|
|
<li><strong>The concept of a task</strong>
|
|
<ul>
|
|
<li>A set of operations that can be stopped half way and resumed later on</li>
|
|
</ul>
|
|
</li>
|
|
</ol>
|
|
<p>This kind of pattern common outside of Rust as well, but it's especially popular in Rust due to how well it alignes with the API provided by Rusts standard library. This model separates concerns between handling and scheduling tasks, and queing and responding to I/O events.</p>
|
|
<h2><a class="header" href="#the-reactor" id="the-reactor">The Reactor</a></h2>
|
|
<p>Since concurrency mostly makes sense when interacting with the outside world (or
|
|
at least some peripheral), we need something to actually abstract over this
|
|
interaction in an asynchronous way. </p>
|
|
<p>This is the <code>Reactors</code> job. Most often you'll
|
|
see reactors in rust use a library called <a href="https://github.com/tokio-rs/mio">Mio</a>, which provides non
|
|
blocking APIs and event notification for several platforms.</p>
|
|
<p>The reactor will typically give you something like a <code>TcpStream</code> (or any other resource) which you'll use to create an I/O request. What you get in return
|
|
is a <code>Future</code>. </p>
|
|
<p>We can call this kind of <code>Future</code> a "leaf Future`, since it's some operation
|
|
we'll actually wait on and that we can chain operations on which are performed
|
|
once the leaf future is ready. </p>
|
|
<h2><a class="header" href="#the-task" id="the-task">The Task</a></h2>
|
|
<p>In Rust we call an interruptible task a <code>Future</code>. Futures has a well defined interface, which means they can be used across the entire ecosystem. We can chain
|
|
these <code>Futures</code> so that once a "leaf future" is ready we'll perform a set of
|
|
operations. </p>
|
|
<p>These operations can spawn new leaf futures themselves.</p>
|
|
<h2><a class="header" href="#the-executor" id="the-executor">The executor</a></h2>
|
|
<p>The executors task is to take one or more futures and run them to completion.</p>
|
|
<p>The first thing an <code>executor</code> does when it get's a <code>Future</code> is polling it.</p>
|
|
<p><strong>When polled one of three things can happen:</strong></p>
|
|
<ul>
|
|
<li>The future returns <code>Ready</code> and we schedule whatever chained operations to run</li>
|
|
<li>The future hasn't been polled before so we pass it a <code>Waker</code> and suspend it</li>
|
|
<li>The futures has been polled before but is not ready and returns <code>Pending</code></li>
|
|
</ul>
|
|
<p>Rust provides a way for the Reactor and Executor to communicate through the <code>Waker</code>. The reactor stores this <code>Waker</code> and calls <code>Waker::wake()</code> on it once
|
|
a <code>Future</code> has resolved and should be polled again.</p>
|
|
<p>We'll get to know these concepts better in the following chapters.</p>
|
|
<p>Providing these pieces let's Rust take care a lot of the ergonomic "friction"
|
|
programmers meet when faced with async code, and still not dictate any
|
|
preferred runtime to actually do the scheduling and I/O queues.</p>
|
|
<p>With that out of the way, let's move on to actually implement all this in our
|
|
example.</p>
|
|
<h1><a class="header" href="#futures-in-rust" id="futures-in-rust">Futures in Rust</a></h1>
|
|
<p>We'll create our own <code>Futures</code> together with a fake reactor and a simple
|
|
executor which allows you to edit, run an play around with the code right here
|
|
in your browser.</p>
|
|
<p>I'll walk you through the example, but if you want to check it out closer, you
|
|
can always clone the repository and play around with the code yourself. There
|
|
are two branches. The <code>basic_example</code> is this code, and the <code>basic_example_commented</code>
|
|
is this example with extensive comments.</p>
|
|
<h2><a class="header" href="#implementing-our-own-futures" id="implementing-our-own-futures">Implementing our own Futures</a></h2>
|
|
<p>Let's start with why we wrote this book, by implementing our own <code>Futures</code>.</p>
|
|
<pre><pre class="playpen"><code class="language-rust edition2018">use std::{
|
|
future::Future, pin::Pin, sync::{mpsc::{channel, Sender}, Arc, Mutex},
|
|
task::{Context, Poll, RawWaker, RawWakerVTable, Waker},
|
|
thread::{self, JoinHandle}, time::{Duration, Instant}
|
|
};
|
|
|
|
fn main() {
|
|
// This is just to make it easier for us to see when our Future was resolved
|
|
let start = Instant::now();
|
|
|
|
// Many runtimes create a glocal `reactor` we pass it as an argument
|
|
let reactor = Reactor::new();
|
|
// Since we'll share this between threads we wrap it in a
|
|
// atmically-refcounted- mutex.
|
|
let reactor = Arc::new(Mutex::new(reactor));
|
|
|
|
// We create two tasks:
|
|
// - first parameter is the `reactor`
|
|
// - the second is a timeout in seconds
|
|
// - the third is an `id` to identify the task
|
|
let future1 = Task::new(reactor.clone(), 2, 1);
|
|
let future2 = Task::new(reactor.clone(), 1, 2);
|
|
|
|
// an `async` block works the same way as an `async fn` in that it compiles
|
|
// our code into a state machine, `yielding` at every `await` point.
|
|
let fut1 = async {
|
|
let val = future1.await;
|
|
let dur = (Instant::now() - start).as_secs_f32();
|
|
println!("Future got {} at time: {:.2}.", val, dur);
|
|
};
|
|
|
|
let fut2 = async {
|
|
let val = future2.await;
|
|
let dur = (Instant::now() - start).as_secs_f32();
|
|
println!("Future got {} at time: {:.2}.", val, dur);
|
|
};
|
|
|
|
// Our executor can only run one and one future, this is pretty normal
|
|
// though. You have a set of operations containing many futures that
|
|
// ends up as a single future that drives them all to completion.
|
|
let mainfut = async {
|
|
fut1.await;
|
|
fut2.await;
|
|
};
|
|
|
|
// This executor will block the main thread until the futures is resolved
|
|
block_on(mainfut);
|
|
// When we're done, we want to shut down our reactor thread so our program
|
|
// ends nicely.
|
|
reactor.lock().map(|mut r| r.close()).unwrap();
|
|
}
|
|
|
|
//// ============================ EXECUTOR ====================================
|
|
|
|
// Our executor takes any object which implements the `Future` trait
|
|
fn block_on<F: Future>(mut future: F) -> F::Output {
|
|
// the first thing we do is to construct a `Waker` which we'll pass on to
|
|
// the `reactor` so it can wake us up when an event is ready.
|
|
let mywaker = Arc::new(MyWaker{ thread: thread::current() });
|
|
let waker = waker_into_waker(Arc::into_raw(mywaker));
|
|
// The context struct is just a wrapper for a `Waker` object. Maybe in the
|
|
// future this will do more, but right now it's just a wrapper.
|
|
let mut cx = Context::from_waker(&waker);
|
|
|
|
// We poll in a loop, but it's not a busy loop. It will only run when
|
|
// an event occurs, or a thread has a "spurious wakeup" (an unexpected wakeup
|
|
// that can happen for no good reason).
|
|
let val = loop {
|
|
// So, since we run this on one thread and run one future to completion
|
|
// we can pin the `Future` to the stack. This is unsafe, but saves an
|
|
// allocation. We could `Box::pin` it too if we wanted. This is however
|
|
// safe since we don't move the `Future` here.
|
|
let pinned = unsafe { Pin::new_unchecked(&mut future) };
|
|
match Future::poll(pinned, &mut cx) {
|
|
// when the Future is ready we're finished
|
|
Poll::Ready(val) => break val,
|
|
// If we get a `pending` future we just go to sleep...
|
|
Poll::Pending => thread::park(),
|
|
};
|
|
};
|
|
val
|
|
}
|
|
|
|
// ====================== FUTURE IMPLEMENTATION ==============================
|
|
|
|
// This is the definition of our `Waker`. We use a regular thread-handle here.
|
|
// It works but it's not a good solution. If one of our `Futures` holds a handle
|
|
// to our thread and takes it with it to a different thread the followinc could
|
|
// happen:
|
|
// 1. Our future calls `unpark` from a different thread
|
|
// 2. Our `executor` thinks that data is ready and wakes up and polls the future
|
|
// 3. The future is not ready yet but one nanosecond later the `Reactor` gets
|
|
// an event and calles `wake()` which also unparks our thread.
|
|
// 4. This could all happen before we go to sleep again since these processes
|
|
// run in parallel.
|
|
// 5. Our reactor has called `wake` but our thread is still sleeping since it was
|
|
// awake alredy at that point.
|
|
// 6. We're deadlocked and our program stops working
|
|
// There are many better soloutions, here are some:
|
|
// - Use `std::sync::CondVar`
|
|
// - Use [crossbeam::sync::Parker](https://docs.rs/crossbeam/0.7.3/crossbeam/sync/struct.Parker.html)
|
|
#[derive(Clone)]
|
|
struct MyWaker {
|
|
thread: thread::Thread,
|
|
}
|
|
|
|
// This is the definition of our `Future`. It keeps all the information we
|
|
// need. This one holds a reference to our `reactor`, that's just to make
|
|
// this example as easy as possible. It doesn't need to hold a reference to
|
|
// the whole reactor, but it needs to be able to register itself with the
|
|
// reactor.
|
|
#[derive(Clone)]
|
|
pub struct Task {
|
|
id: usize,
|
|
reactor: Arc<Mutex<Reactor>>,
|
|
data: u64,
|
|
is_registered: bool,
|
|
}
|
|
|
|
// These are function definitions we'll use for our waker. Remember the
|
|
// "Trait Objects" chapter from the book.
|
|
fn mywaker_wake(s: &MyWaker) {
|
|
let waker_ptr: *const MyWaker = s;
|
|
let waker_arc = unsafe {Arc::from_raw(waker_ptr)};
|
|
waker_arc.thread.unpark();
|
|
}
|
|
|
|
// Since we use an `Arc` cloning is just increasing the refcount on the smart
|
|
// pointer.
|
|
fn mywaker_clone(s: &MyWaker) -> RawWaker {
|
|
let arc = unsafe { Arc::from_raw(s).clone() };
|
|
std::mem::forget(arc.clone()); // increase ref count
|
|
RawWaker::new(Arc::into_raw(arc) as *const (), &VTABLE)
|
|
}
|
|
|
|
// This is actually a "helper funtcion" to create a `Waker` vtable. In contrast
|
|
// to when we created a `Trait Object` from scratch we don't need to concern
|
|
// ourselves with the actual layout of the `vtable` and only provide a fixed
|
|
// set of functions
|
|
const VTABLE: RawWakerVTable = unsafe {
|
|
RawWakerVTable::new(
|
|
|s| mywaker_clone(&*(s as *const MyWaker)), // clone
|
|
|s| mywaker_wake(&*(s as *const MyWaker)), // wake
|
|
|s| mywaker_wake(*(s as *const &MyWaker)), // wake by ref
|
|
|s| drop(Arc::from_raw(s as *const MyWaker)), // decrease refcount
|
|
)
|
|
};
|
|
|
|
// Instead of implementing this on the `MyWaker` oject in `impl Mywaker...` we
|
|
// just use this pattern instead since it saves us some lines of code.
|
|
fn waker_into_waker(s: *const MyWaker) -> Waker {
|
|
let raw_waker = RawWaker::new(s as *const (), &VTABLE);
|
|
unsafe { Waker::from_raw(raw_waker) }
|
|
}
|
|
|
|
impl Task {
|
|
fn new(reactor: Arc<Mutex<Reactor>>, data: u64, id: usize) -> Self {
|
|
Task {
|
|
id,
|
|
reactor,
|
|
data,
|
|
is_registered: false,
|
|
}
|
|
}
|
|
}
|
|
|
|
// This is our `Future` implementation
|
|
impl Future for Task {
|
|
// The output for this kind of `leaf future` is just an `usize`. For other
|
|
// futures this could be something more interesting like a byte stream.
|
|
type Output = usize;
|
|
fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
|
|
let mut r = self.reactor.lock().unwrap();
|
|
// we check with the `Reactor` if this future is in its "readylist"
|
|
if r.is_ready(self.id) {
|
|
// if it is, we return the data. In this case it's just the ID of
|
|
// the task.
|
|
Poll::Ready(self.id)
|
|
} else if self.is_registered {
|
|
// If the future is registered alredy, we just return `Pending`
|
|
Poll::Pending
|
|
} else {
|
|
// If we get here, it must be the first time this `Future` is polled
|
|
// so we register a task with our `reactor`
|
|
r.register(self.data, cx.waker().clone(), self.id);
|
|
// oh, we have to drop the lock on our `Mutex` here because we can't
|
|
// have a shared and exclusive borrow at the same time
|
|
drop(r);
|
|
self.is_registered = true;
|
|
Poll::Pending
|
|
}
|
|
}
|
|
}
|
|
|
|
// =============================== REACTOR ===================================
|
|
|
|
// This is a "fake" reactor. It does no real I/O, but that also makes our
|
|
// code possible to run in the book and in the playground
|
|
struct Reactor {
|
|
// we need some way of registering a Task with the reactor. Normally this
|
|
// would be an "interest" in an I/O event
|
|
dispatcher: Sender<Event>,
|
|
handle: Option<JoinHandle<()>>,
|
|
// This is a list of tasks that are ready, which means they should be polled
|
|
// for data.
|
|
readylist: Arc<Mutex<Vec<usize>>>,
|
|
}
|
|
|
|
// We just have two kind of events. A timeout event, a "timeout" event called
|
|
// `Simple` and a `Close` event to close down our reactor.
|
|
#[derive(Debug)]
|
|
enum Event {
|
|
Close,
|
|
Simple(Waker, u64, usize),
|
|
}
|
|
|
|
impl Reactor {
|
|
fn new() -> Self {
|
|
// The way we register new events with our reactor is using a regular
|
|
// channel
|
|
let (tx, rx) = channel::<Event>();
|
|
let readylist = Arc::new(Mutex::new(vec![]));
|
|
let rl_clone = readylist.clone();
|
|
|
|
// This `Vec` will hold handles to all threads we spawn so we can
|
|
// join them later on and finish our programm in a good manner
|
|
let mut handles = vec![];
|
|
// This will be the "Reactor thread"
|
|
let handle = thread::spawn(move || {
|
|
// This simulates some I/O resource
|
|
for event in rx {
|
|
let rl_clone = rl_clone.clone();
|
|
match event {
|
|
// If we get a close event we break out of the loop we're in
|
|
Event::Close => break,
|
|
Event::Simple(waker, duration, id) => {
|
|
|
|
// When we get an event we simply spawn a new thread...
|
|
let event_handle = thread::spawn(move || {
|
|
//... which will just sleep for the number of seconds
|
|
// we provided when creating the `Task`.
|
|
thread::sleep(Duration::from_secs(duration));
|
|
// When it's done sleeping we put the ID of this task
|
|
// on the "readylist"
|
|
rl_clone.lock().map(|mut rl| rl.push(id)).unwrap();
|
|
// Then we call `wake` which will wake up our
|
|
// executor and start polling the futures
|
|
waker.wake();
|
|
});
|
|
|
|
handles.push(event_handle);
|
|
}
|
|
}
|
|
}
|
|
|
|
// When we exit the Reactor we first join all the handles on
|
|
// the child threads we've spawned so we catch any panics and
|
|
// release all resources.
|
|
for handle in handles {
|
|
handle.join().unwrap();
|
|
}
|
|
});
|
|
|
|
Reactor {
|
|
readylist,
|
|
dispatcher: tx,
|
|
handle: Some(handle),
|
|
}
|
|
}
|
|
|
|
fn register(&mut self, duration: u64, waker: Waker, data: usize) {
|
|
// registering an event is as simple as sending an `Event` through
|
|
// the channel.
|
|
self.dispatcher
|
|
.send(Event::Simple(waker, duration, data))
|
|
.unwrap();
|
|
}
|
|
|
|
fn close(&mut self) {
|
|
self.dispatcher.send(Event::Close).unwrap();
|
|
}
|
|
|
|
// We need a way to check if any event's are ready. This will simply
|
|
// look through the "readylist" for an event macthing the ID we want to
|
|
// check for.
|
|
fn is_ready(&self, id_to_check: usize) -> bool {
|
|
self.readylist
|
|
.lock()
|
|
.map(|rl| rl.iter().any(|id| *id == id_to_check))
|
|
.unwrap()
|
|
}
|
|
}
|
|
|
|
// When our `Reactor` is dropped we join the reactor thread with the thread
|
|
// owning our `Reactor` so we catch any panics and release all resources.
|
|
// It's not needed for this to work, but it really is a best practice to join
|
|
// all threads you spawn.
|
|
impl Drop for Reactor {
|
|
fn drop(&mut self) {
|
|
self.handle.take().map(|h| h.join().unwrap()).unwrap();
|
|
}
|
|
}
|
|
</code></pre></pre>
|
|
<h2><a class="header" href="#our-finished-code" id="our-finished-code">Our finished code</a></h2>
|
|
<p>Here is the whole example. You can edit it right here in your browser and
|
|
run it yourself. Have fun!</p>
|
|
<pre><pre class="playpen"><code class="language-rust edition2018 editable">
|
|
use std::{
|
|
future::Future, pin::Pin, sync::{mpsc::{channel, Sender}, Arc, Mutex},
|
|
task::{Context, Poll, RawWaker, RawWakerVTable, Waker},
|
|
thread::{self, JoinHandle}, time::{Duration, Instant}
|
|
};
|
|
|
|
fn main() {
|
|
let start = Instant::now();
|
|
|
|
// Many runtimes create a glocal `reactor` we pass it as an argument
|
|
let reactor = Reactor::new();
|
|
let reactor = Arc::new(Mutex::new(reactor));
|
|
|
|
let future1 = Task::new(reactor.clone(), 2, 1);
|
|
let future2 = Task::new(reactor.clone(), 1, 2);
|
|
|
|
let fut1 = async {
|
|
let val = future1.await;
|
|
let dur = (Instant::now() - start).as_secs_f32();
|
|
println!("Future got {} at time: {:.2}.", val, dur);
|
|
};
|
|
|
|
let fut2 = async {
|
|
let val = future2.await;
|
|
let dur = (Instant::now() - start).as_secs_f32();
|
|
println!("Future got {} at time: {:.2}.", val, dur);
|
|
};
|
|
|
|
let mainfut = async {
|
|
fut1.await;
|
|
fut2.await;
|
|
};
|
|
|
|
block_on(mainfut);
|
|
reactor.lock().map(|mut r| r.close()).unwrap();
|
|
}
|
|
|
|
//// ============================ EXECUTOR ====================================
|
|
fn block_on<F: Future>(mut future: F) -> F::Output {
|
|
let mywaker = Arc::new(MyWaker{ thread: thread::current() });
|
|
let waker = waker_into_waker(Arc::into_raw(mywaker));
|
|
let mut cx = Context::from_waker(&waker);
|
|
let val = loop {
|
|
let pinned = unsafe { Pin::new_unchecked(&mut future) };
|
|
match Future::poll(pinned, &mut cx) {
|
|
Poll::Ready(val) => break val,
|
|
Poll::Pending => thread::park(),
|
|
};
|
|
};
|
|
val
|
|
}
|
|
|
|
// ====================== FUTURE IMPLEMENTATION ==============================
|
|
#[derive(Clone)]
|
|
struct MyWaker {
|
|
thread: thread::Thread,
|
|
}
|
|
|
|
#[derive(Clone)]
|
|
pub struct Task {
|
|
id: usize,
|
|
reactor: Arc<Mutex<Reactor>>,
|
|
data: u64,
|
|
is_registered: bool,
|
|
}
|
|
|
|
fn mywaker_wake(s: &MyWaker) {
|
|
let waker_ptr: *const MyWaker = s;
|
|
let waker_arc = unsafe {Arc::from_raw(waker_ptr)};
|
|
waker_arc.thread.unpark();
|
|
}
|
|
|
|
fn mywaker_clone(s: &MyWaker) -> RawWaker {
|
|
let arc = unsafe { Arc::from_raw(s).clone() };
|
|
std::mem::forget(arc.clone()); // increase ref count
|
|
RawWaker::new(Arc::into_raw(arc) as *const (), &VTABLE)
|
|
}
|
|
|
|
const VTABLE: RawWakerVTable = unsafe {
|
|
RawWakerVTable::new(
|
|
|s| mywaker_clone(&*(s as *const MyWaker)), // clone
|
|
|s| mywaker_wake(&*(s as *const MyWaker)), // wake
|
|
|s| mywaker_wake(*(s as *const &MyWaker)), // wake by ref
|
|
|s| drop(Arc::from_raw(s as *const MyWaker)), // decrease refcount
|
|
)
|
|
};
|
|
|
|
fn waker_into_waker(s: *const MyWaker) -> Waker {
|
|
let raw_waker = RawWaker::new(s as *const (), &VTABLE);
|
|
unsafe { Waker::from_raw(raw_waker) }
|
|
}
|
|
|
|
impl Task {
|
|
fn new(reactor: Arc<Mutex<Reactor>>, data: u64, id: usize) -> Self {
|
|
Task {
|
|
id,
|
|
reactor,
|
|
data,
|
|
is_registered: false,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl Future for Task {
|
|
type Output = usize;
|
|
fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
|
|
let mut r = self.reactor.lock().unwrap();
|
|
if r.is_ready(self.id) {
|
|
Poll::Ready(self.id)
|
|
} else if self.is_registered {
|
|
Poll::Pending
|
|
} else {
|
|
r.register(self.data, cx.waker().clone(), self.id);
|
|
drop(r);
|
|
self.is_registered = true;
|
|
Poll::Pending
|
|
}
|
|
}
|
|
}
|
|
|
|
// =============================== REACTOR ===================================
|
|
struct Reactor {
|
|
dispatcher: Sender<Event>,
|
|
handle: Option<JoinHandle<()>>,
|
|
readylist: Arc<Mutex<Vec<usize>>>,
|
|
}
|
|
#[derive(Debug)]
|
|
enum Event {
|
|
Close,
|
|
Simple(Waker, u64, usize),
|
|
}
|
|
|
|
impl Reactor {
|
|
fn new() -> Self {
|
|
let (tx, rx) = channel::<Event>();
|
|
let readylist = Arc::new(Mutex::new(vec![]));
|
|
let rl_clone = readylist.clone();
|
|
let mut handles = vec![];
|
|
let handle = thread::spawn(move || {
|
|
// This simulates some I/O resource
|
|
for event in rx {
|
|
let rl_clone = rl_clone.clone();
|
|
match event {
|
|
Event::Close => break,
|
|
Event::Simple(waker, duration, id) => {
|
|
let event_handle = thread::spawn(move || {
|
|
thread::sleep(Duration::from_secs(duration));
|
|
rl_clone.lock().map(|mut rl| rl.push(id)).unwrap();
|
|
waker.wake();
|
|
});
|
|
|
|
handles.push(event_handle);
|
|
}
|
|
}
|
|
}
|
|
|
|
for handle in handles {
|
|
handle.join().unwrap();
|
|
}
|
|
});
|
|
|
|
Reactor {
|
|
readylist,
|
|
dispatcher: tx,
|
|
handle: Some(handle),
|
|
}
|
|
}
|
|
|
|
fn register(&mut self, duration: u64, waker: Waker, data: usize) {
|
|
self.dispatcher
|
|
.send(Event::Simple(waker, duration, data))
|
|
.unwrap();
|
|
}
|
|
|
|
fn close(&mut self) {
|
|
self.dispatcher.send(Event::Close).unwrap();
|
|
}
|
|
|
|
fn is_ready(&self, id_to_check: usize) -> bool {
|
|
self.readylist
|
|
.lock()
|
|
.map(|rl| rl.iter().any(|id| *id == id_to_check))
|
|
.unwrap()
|
|
}
|
|
}
|
|
|
|
impl Drop for Reactor {
|
|
fn drop(&mut self) {
|
|
self.handle.take().map(|h| h.join().unwrap()).unwrap();
|
|
}
|
|
}
|
|
</code></pre></pre>
|
|
<h1><a class="header" href="#conclusion-and-exercises" id="conclusion-and-exercises">Conclusion and exercises</a></h1>
|
|
<h1><a class="header" href="#reader-excercises" id="reader-excercises">Reader excercises</a></h1>
|
|
<p>So our implementation has taken some obvious shortcuts and could use some improvement. Actually digging into the code and try things yourself is a good way to learn. Here are som relatively simple and good exercises:</p>
|
|
<h2><a class="header" href="#avoid-threadpark" id="avoid-threadpark">Avoid <code>thread::park</code></a></h2>
|
|
<p>The big problem using <code>Thread::park</code> and <code>Thread::unpark</code> is that the user can access these same methods from their own code. Try to use another method of telling the OS to suspend our thread and wake it up again on our command. Some hints:</p>
|
|
<ul>
|
|
<li>Check out <code>CondVars</code>, here are two sources Wikipedia and the docs for <code>CondVar</code></li>
|
|
<li>Take a look at crates that help you with this exact problem like <a href="https://github.com/crossbeam-rs/crossbeam">Crossbeam </a>(specifically the <a href="https://docs.rs/crossbeam/0.7.3/crossbeam/sync/struct.Parker.html"><code>Parker</code></a>)</li>
|
|
</ul>
|
|
<h2><a class="header" href="#avoid-wrapping-the-whole-reactor-in-a-mutex-and-pass-it-around" id="avoid-wrapping-the-whole-reactor-in-a-mutex-and-pass-it-around">Avoid wrapping the whole <code>Reactor</code> in a mutex and pass it around</a></h2>
|
|
<p>First of all, protecting the whole <code>Reactor</code> and passing it around is overkill. We're only interested in synchronizing some parts of the information it contains. Try to refactor that out and only synchronize access to what's really needed.</p>
|
|
<ul>
|
|
<li>Do you want to pass around a reference to this information using an <code>Arc</code>?</li>
|
|
<li>Do you want to make this information global so it can be accessed from anywhere?</li>
|
|
</ul>
|
|
<p>Next , using a <code>Mutex</code> as a synchronization mechanism might be overkill since many methods only reads data. </p>
|
|
<ul>
|
|
<li>Could an <a href="https://doc.rust-lang.org/stable/std/sync/struct.RwLock.html"><code>RwLock</code></a> be more efficient some places?</li>
|
|
<li>Could you use any of the synchronization mechanisms in <a href="https://github.com/crossbeam-rs/crossbeam">Crossbeam</a>?</li>
|
|
<li>Do you want to dig into <a href="https://cfsamsonbooks.gitbook.io/epoll-kqueue-iocp-explained/appendix-1/atomics-in-rust">atomics in Rust and implement a synchronization mechanism</a> of your own?</li>
|
|
</ul>
|
|
<h2><a class="header" href="#avoid-creating-a-new-waker-for-every-event" id="avoid-creating-a-new-waker-for-every-event">Avoid creating a new Waker for every event</a></h2>
|
|
<p>Right now we create a new instance of a Waker for every event we create. Is this really needed? </p>
|
|
<ul>
|
|
<li>Could we create one instance and then cache it (see <a href="https://stjepang.github.io/2020/01/25/build-your-own-block-on.html">this article from <code>u/sjepang</code></a>)?
|
|
<ul>
|
|
<li>Should we cache it in <code>thread_local!</code> storage?</li>
|
|
<li>Or should be cache it using a global constant?</li>
|
|
</ul>
|
|
</li>
|
|
</ul>
|
|
<h2><a class="header" href="#could-we-implement-more-methods-on-our-executor" id="could-we-implement-more-methods-on-our-executor">Could we implement more methods on our executor?</a></h2>
|
|
<p>What about CPU intensive tasks? Right now they'll prevent our executor thread from progressing an handling events. Could you create a thread pool and create a method to send such tasks to the thread pool instead together with a Waker which will wake up the executor thread once the CPU intensive task is done?</p>
|
|
<p>In both <code>async_std</code> and <code>tokio</code> this method is called <code>spawn_blocking</code>, a good place to start is to read the documentation and the code thy use to implement that.</p>
|
|
<h2><a class="header" href="#further-reading" id="further-reading">Further reading</a></h2>
|
|
<p>There are many great resources for further study. Here are some of my suggestions:</p>
|
|
<p>The Asyc book:</p>
|
|
<h1><a class="header" href="#bonus-1-concurrent-futures" id="bonus-1-concurrent-futures">Bonus 1: concurrent futures</a></h1>
|
|
|
|
</main>
|
|
|
|
<nav class="nav-wrapper" aria-label="Page navigation">
|
|
<!-- Mobile navigation buttons -->
|
|
|
|
|
|
|
|
|
|
<div style="clear: both"></div>
|
|
</nav>
|
|
</div>
|
|
</div>
|
|
|
|
<nav class="nav-wide-wrapper" aria-label="Page navigation">
|
|
|
|
|
|
|
|
</nav>
|
|
|
|
</div>
|
|
|
|
|
|
<!-- Livereload script (if served using the cli tool) -->
|
|
<script type="text/javascript">
|
|
var socket = new WebSocket("ws://localhost:3001");
|
|
socket.onmessage = function (event) {
|
|
if (event.data === "reload") {
|
|
socket.close();
|
|
location.reload(true); // force reload from server (not from cache)
|
|
}
|
|
};
|
|
|
|
window.onbeforeunload = function() {
|
|
socket.close();
|
|
}
|
|
</script>
|
|
|
|
|
|
|
|
|
|
|
|
<script src="ace.js" type="text/javascript" charset="utf-8"></script>
|
|
<script src="editor.js" type="text/javascript" charset="utf-8"></script>
|
|
<script src="mode-rust.js" type="text/javascript" charset="utf-8"></script>
|
|
<script src="theme-dawn.js" type="text/javascript" charset="utf-8"></script>
|
|
<script src="theme-tomorrow_night.js" type="text/javascript" charset="utf-8"></script>
|
|
|
|
|
|
|
|
<script src="elasticlunr.min.js" type="text/javascript" charset="utf-8"></script>
|
|
<script src="mark.min.js" type="text/javascript" charset="utf-8"></script>
|
|
<script src="searcher.js" type="text/javascript" charset="utf-8"></script>
|
|
|
|
|
|
<script src="clipboard.min.js" type="text/javascript" charset="utf-8"></script>
|
|
<script src="highlight.js" type="text/javascript" charset="utf-8"></script>
|
|
<script src="book.js" type="text/javascript" charset="utf-8"></script>
|
|
|
|
<!-- Custom JS scripts -->
|
|
|
|
|
|
|
|
|
|
<script type="text/javascript">
|
|
window.addEventListener('load', function() {
|
|
window.setTimeout(window.print, 100);
|
|
});
|
|
</script>
|
|
|
|
|
|
|
|
</body>
|
|
</html>
|