| 1 | <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" |
| 2 | "http://www.w3.org/TR/html4/loose.dtd"> |
| 3 | <html> |
| 4 | <head><title>LWN.net Weekly Edition for August 30, 2018 [LWN.net]</title> |
| 5 | <link rel="next" href="/Articles/763254/"/> |
| 6 | <meta name="twitter:card" content="summary" /> |
| 7 | <meta name="twitter:site" content="@lwnnet" /> |
| 8 | <meta name="twitter:title" content="LWN.net Weekly Edition for August 30, 2018" /> |
| 9 | <meta name="twitter:description" content="Julia; C considered dangerous; 4.19 Merge window; I/O controller throughput; KDE onboarding; Dat." /> |
| 10 | <meta name="viewport" content="width=device-width, initial-scale=1"> |
| 11 | <meta HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=utf-8"> |
| 12 | <link rel="icon" href="/images/favicon.png" type="image/png"> |
| 13 | <link rel="alternate" type="application/rss+xml" title="LWN.net headlines" href="https://lwn.net/headlines/newrss"> |
| 14 | <link rel="stylesheet" href="/CSS/lwn"> |
| 15 | <link rel="stylesheet" href="/CSS/nosub"> |
| 16 | <link rel="stylesheet" href="/CSS/pure-min"> |
| 17 | <!--[if lte IE 8]> |
| 18 | <link rel="stylesheet" href="/CSS/grids-responsive-old-ie-min"> |
| 19 | <![endif]--> |
| 20 | <!--[if gt IE 8]><!--> |
| 21 | <link rel="stylesheet" href="/CSS/grids-responsive-min"> |
| 22 | <!--<![endif]--> |
| 23 | <link rel="stylesheet" href="/CSS/pure-lwn"> |
| 24 | |
| 25 | |
| 26 | <script type="text/javascript">var p="http",d="static";if(document.location.protocol=="https:"){p+="s";d="engine";}var z=document.createElement("script");z.type="text/javascript";z.async=true;z.src=p+"://"+d+".adzerk.net/ados.js";var s=document.getElementsByTagName("script")[0];s.parentNode.insertBefore(z,s);</script> |
| 27 | <script type="text/javascript"> |
| 28 | var ados_keywords = ados_keywords || []; |
| 29 | if( location.protocol=='https:' ) { |
| 30 | ados_keywords.push('T:SSL'); |
| 31 | } else { |
| 32 | ados_keywords.push('T:HTTP'); |
| 33 | } |
| 34 | |
| 35 | var ados = ados || {}; |
| 36 | ados.run = ados.run || []; |
| 37 | ados.run.push(function() { |
| 38 | |
| 39 | ados_add_placement(4669, 20979, "azk13321_leaderboard", 4).setZone(16026); |
| 40 | |
| 41 | ados_add_placement(4669, 20979, "azk93271_right_zone", [5,10,6]).setZone(16027); |
| 42 | |
| 43 | ados_add_placement(4669, 20979, "azk31017_tracking", 20).setZone(20995); |
| 44 | |
| 45 | |
| 46 | |
| 47 | ados_setKeywords(ados_keywords.join(', ')); |
| 48 | ados_load(); |
| 49 | });</script> |
| 50 | |
| 51 | </head> |
| 52 | <body bgcolor="#ffffff" link="Blue" VLINK="Green" alink="Green"> |
| 53 | <a name="t"></a> |
| 54 | <div id="menu"><a href="/"><img src="https://static.lwn.net/images/logo/barepenguin-70.png" class="logo" |
| 55 | border="0" alt="LWN.net Logo"> |
| 56 | <font class="logo">LWN<br>.net</font> |
| 57 | <font class="logobl">News from the source</font></a> |
| 58 | <a href="/"><img src="https://static.lwn.net/images/lcorner-ss.png" class="sslogo" |
| 59 | border="0" alt="LWN"></a><div class="navmenu-container"> |
| 60 | <ul class="navmenu"> |
| 61 | <li><a class="navmenu" href="#t"><b>Content</b></a><ul><li><a href="/current/">Weekly Edition</a></li><li><a href="/Archives/">Archives</a></li><li><a href="/Search/">Search</a></li><li><a href="/Kernel/">Kernel</a></li><li><a href="/Security/">Security</a></li><li><a href="/Distributions/">Distributions</a></li><li><a href="/Calendar/">Events calendar</a></li><li><a href="/Comments/unread">Unread comments</a></li><li><hr></li><li><a href="/op/FAQ.lwn">LWN FAQ</a></li><li><a href="/op/AuthorGuide.lwn">Write for us</a></li></ul></li> |
| 62 | <li><a class="navmenu" href="#t"><b>Edition</b></a><ul><li><a href="/Articles/763252/">⇒Front page</a></li><li><a href="/Articles/763254/">Brief items</a></li><li><a href="/Articles/763255/">Announcements</a></li><li><a href="/Articles/763252/bigpage">One big page</a></li><li><a href="/Articles/762816/">Previous week</a></li><li><a href="/Articles/763789/">Following week</a></li></ul></li> |
| 63 | </ul></div> |
| 64 | </div> <!-- menu --> |
| 65 | <div class="pure-g not-handset" style="margin-left: 10.5em"> |
| 66 | <div class="not-print"> |
| 67 | <div id="azk13321_leaderboard"></div> |
| 68 | </div> |
| 69 | </div> |
| 70 | <div class="topnav-container"> |
| 71 | <div class="not-handset"><form action="https://lwn.net/Login/" method="post" name="loginform" |
| 72 | class="loginform"> |
| 73 | <b>User:</b> <input type="text" name="Username" value="" size="8" /> <b>Password:</b> <input type="password" name="Password" size="8" /> <input type="hidden" name="target" value="/Articles/763252/" /> <input type="submit" name="submit" value="Log in" /></form> | |
| 74 | <form action="https://lwn.net/subscribe/" method="post" class="loginform"> |
| 75 | <input type="submit" name="submit" value="Subscribe" /> |
| 76 | </form> | |
| 77 | <form action="https://lwn.net/Login/newaccount" method="post" class="loginform"> |
| 78 | <input type="submit" name="submit" value="Register" /> |
| 79 | </form> |
| 80 | </div> |
| 81 | <div class="handset-only"> |
| 82 | <a href="/subscribe/"><b>Subscribe</b></a> / |
| 83 | <a href="/Login/"><b>Log in</b></a> / |
| 84 | <a href="/Login/newaccount"><b>New account</b></a> |
| 85 | </div> |
| 86 | </div><div class="pure-grid maincolumn"> |
| 87 | <div class="lwn-u-1 pure-u-md-19-24"> |
| 88 | <div class="PageHeadline"> |
| 89 | <h1>LWN.net Weekly Edition for August 30, 2018</h1> |
| 90 | </div> |
| 91 | <div class="ArticleText"> |
| 92 | <a name="763743"></a><h2 class="SummaryHL"><a href="/Articles/763743/">Welcome to the LWN.net Weekly Edition for August 30, 2018</a></h2> |
| 93 | |
| 94 | This edition contains the following feature content: |
| 95 | <p> |
| 96 | <ul class="spacylist"> |
| 97 | |
| 98 | <li> <a href="/Articles/763626/">An introduction to the Julia language, |
| 99 | part 1</a>: Julia is a language designed for intensive numerical |
| 100 | calculations; this article gives an overview of its core features. |
| 101 | |
| 102 | <li> <a href="/Articles/763641/">C considered dangerous</a>: a Linux |
| 103 | Security Summit talk on what is being done to make the use of C in the |
| 104 | kernel safer. |
| 105 | |
| 106 | <li> <a href="/Articles/763106/">The second half of the 4.19 merge |
| 107 | window</a>: the final features merged (or not merged) before the merge |
| 108 | window closed for this cycle. |
| 109 | |
| 110 | |
| 111 | <li> <a href="/Articles/763603/">Measuring (and fixing) I/O-controller |
| 112 | throughput loss</a>: the kernel's I/O controllers can provide useful |
| 113 | bandwidth guarantees, but at a significant cost in throughput. |
| 114 | |
| 115 | <li> <a href="/Articles/763175/">KDE's onboarding initiative, one year |
| 116 | later</a>: what has gone right in KDE's effort to make it easier for |
| 117 | contributors to join the project, and what remains to be done. |
| 118 | |
| 119 | <li> <a href="/Articles/763492/">Sharing and archiving data sets with |
| 120 | Dat</a>: an innovative approach to addressing and sharing data on the |
| 121 | net. |
| 122 | |
| 123 | </ul> |
| 124 | |
| 125 | <p> |
| 126 | This week's edition also includes these inner pages: |
| 127 | <p> |
| 128 | <ul class="spacylist"> |
| 129 | |
| 130 | <li> <a href="/Articles/763254/">Brief items</a>: Brief news items from throughout the community. |
| 131 | |
| 132 | <li> <a href="/Articles/763255/">Announcements</a>: Newsletters, conferences, security updates, patches, and more. |
| 133 | |
| 134 | </ul> |
| 135 | <p> |
| 136 | Please enjoy this week's edition, and, as always, thank you for |
| 137 | supporting LWN.net. |
| 138 | <p><a href="/Articles/763743/#Comments">Comments (none posted)</a> |
| 139 | <p> |
| 140 | <a name="763626"></a><h2 class="SummaryHL"><a href="/Articles/763626/">An introduction to the Julia language, part 1</a></h2> |
| 141 | |
| 142 | <div class="GAByline"> |
| 143 | <p>August 28, 2018</p> |
| 144 | <p>This article was contributed by Lee Phillips</p> |
| 145 | </div> |
| 146 | <p><a href="http://julialang.org/">Julia</a> is a young computer language |
| 147 | aimed at serving the needs of scientists, engineers, and other |
| 148 | practitioners of numerically intensive programming. It was first publicly |
| 149 | released in 2012. After an intense period of language development, version |
| 150 | 1.0 was <a |
| 151 | href="https://julialang.org/blog/2018/08/one-point-zero">released</a> on |
| 152 | August 8. The 1.0 release promises years of language |
| 153 | stability; users can be confident that developments in the 1.x series will |
| 154 | not break their code. |
| 155 | This is the first part of a two-part article introducing the world of Julia. |
| 156 | This part will introduce enough of the language syntax and constructs to |
| 157 | allow you to begin to write simple programs. The following installment will |
| 158 | acquaint you with the additional pieces needed to create real projects, and to |
| 159 | make use of Julia's ecosystem. |
| 160 | |
| 161 | <h4>Goals and history</h4> |
| 162 | |
| 163 | <p>The Julia project has ambitious goals. It wants the language to perform |
| 164 | about as well as Fortran or C when running numerical algorithms, while |
| 165 | remaining as pleasant to program in as Python. I believe the project has |
| 166 | met these goals and is poised to see increasing adoption by numerical |
| 167 | researchers, especially now that an official, stable release is |
| 168 | available.</p> |
| 169 | |
| 170 | <p>The Julia project maintains a <a |
| 171 | href="https://julialang.org/benchmarks/">micro-benchmark page</a> that compares its |
| 172 | numerical performance against both statically compiled languages (C, |
| 173 | Fortran) and dynamically typed languages (R, Python). While it's certainly |
| 174 | possible to argue about the relevance and fairness of particular |
| 175 | benchmarks, the data overall supports the Julia team's contention that Julia |
| 176 | has generally achieved parity with Fortran and C; the benchmark |
| 177 | source code is available.</p> |
| 178 | |
| 179 | <p>Julia began as research in computer science at MIT; its creators are |
| 180 | Alan Edelman, Stefan Karpinski, Jeff Bezanson, and Viral Shah. These four |
| 181 | remain active developers of the language. They, along with Keno Fischer, |
| 182 | co-founder and CTO of <a href="https://juliacomputing.com/">Julia |
| 183 | Computing</a>, were kind enough to share their thoughts with us about |
| 184 | the language. I'll be drawing |
| 185 | on their comments later on; for now, let's get a taste of |
| 186 | what Julia code looks like.</p> |
| 187 | |
| 188 | <h4>Getting started</h4> |
| 189 | |
| 190 | <p>To explore Julia initially, start up its standard <a |
| 191 | href="https://en.wikipedia.org/wiki/Read%E2%80%93eval%E2%80%93print_loop">read-eval-print |
| 192 | loop</a> (REPL) |
| 193 | by typing <code>julia</code> at the terminal, assuming that you have installed |
| 194 | it. You will then be |
| 195 | able to interact with what will seem to be an interpreted language — but, |
| 196 | behind the scenes, those commands are being compiled by a |
| 197 | just-in-time (JIT) compiler that uses the <a href="http://llvm.org/">LLVM |
| 198 | compiler framework</a>. This allows Julia to be interactive, while turning |
| 199 | the code into fast, native machine instructions. However, the JIT compiler |
| 200 | passes sometimes introduce noticeable delays at the REPL, especially when |
| 201 | using a function for the first time.</p> |
| 202 | |
| 203 | <p>To run a Julia program non-interactively, execute a command like: |
| 204 | <pre> |
| 205 | $ julia script.jl <args> |
| 206 | </pre> |
| 207 | |
| 208 | <p>Julia has all the usual data structures: numbers of various types |
| 209 | (including complex and rational numbers), multidimensional arrays, |
| 210 | dictionaries, strings, and characters. Functions are first-class: they can |
| 211 | be passed as arguments to other functions, can be members of arrays, |
| 212 | and so on.</p> |
| 213 | |
| 214 | <p>Julia embraces Unicode. Strings, which are enclosed in double quotes, |
| 215 | are arrays of Unicode characters, which are enclosed in single quotes. The |
| 216 | "<tt>*</tt>" operator is used for string and character concatenation. Thus |
| 217 | 'a' and 'β' are characters, and 'aβ' is a syntax error. "a" and |
| 218 | "β" are strings, as are "aβ", 'a' * 'β', and |
| 219 | "a" * "β" — all evaluate to the same string. |
| 220 | |
| 221 | <p>Variable and function names can contain non-ASCII characters. This, along |
| 222 | with Julia's clever syntax that understands numbers prepended to variables |
| 223 | to mean multiplication, goes a long way to allowing the numerical scientist |
| 224 | to write code that more closely resembles the compact mathematical notation |
| 225 | of the equations that usually lie behind it.</p> |
| 226 | |
| 227 | <pre> |
| 228 | julia> ε₁ = 0.01 |
| 229 | 0.01 |
| 230 | |
| 231 | julia> ε₂ = 0.02 |
| 232 | 0.02 |
| 233 | |
| 234 | julia> 2ε₁ + 3ε₂ |
| 235 | 0.08 |
| 236 | </pre> |
| 237 | |
| 238 | <p>And where does Julia come down on the age-old debate of what do about |
| 239 | <tt>1/2</tt>? In Fortran and Python 2, this will get you 0, since 1 and 2 are |
| 240 | integers, and the result is rounded down to the integer 0. This was deemed |
| 241 | inconsistent, and confusing to some, so it was changed in Python 3 to |
| 242 | return 0.5 — which is what you |
| 243 | get in Julia, too.</p> |
| 244 | |
| 245 | <p>While we're on the subject of fractions, Julia can handle rational |
| 246 | numbers, with a special syntax: <tt>3//5 + 2//3</tt> returns |
| 247 | <tt>19//15</tt>, while <tt>3/5 + 2/3</tt> |
| 248 | gets you the floating-point answer 1.2666666666666666. Internally, Julia |
| 249 | thinks of a rational number in its reduced form, so the expression |
| 250 | <tt>6//8 == 3//4</tt> returns <code>true</code>, and <code>numerator(6//8)</code> returns |
| 251 | <code>3</code>.</p> |
| 252 | |
| 253 | <h4>Arrays</h4> |
| 254 | |
| 255 | <p>Arrays are enclosed in square brackets and indexed with an iterator that |
| 256 | can contain a step value:</p> |
| 257 | |
| 258 | <pre> |
| 259 | julia> a = [1, 2, 3, 4, 5, 6] |
| 260 | 6-element Array{Int64,1}: |
| 261 | 1 |
| 262 | 2 |
| 263 | 3 |
| 264 | 4 |
| 265 | 5 |
| 266 | 6 |
| 267 | |
| 268 | julia> a[1:2:end] |
| 269 | 3-element Array{Int64,1}: |
| 270 | 1 |
| 271 | 3 |
| 272 | 5 |
| 273 | </pre> |
| 274 | |
| 275 | <p>As you can see, indexing starts at one, and the useful <code>end</code> |
| 276 | index means the obvious thing. When you define a variable in the REPL, |
| 277 | Julia replies with the type and value of the assigned data; you can suppress this output by ending your input line with a semicolon.</p> |
| 278 | |
| 279 | <p>Since arrays are such a vital part of numerical computation, and Julia |
| 280 | makes them easy to work with, we'll spend a bit more time with them than the other data structures.</p> |
| 281 | |
| 282 | <p>To illustrate the syntax, we can start with a couple of 2D arrays, defined at the REPL:</p> |
| 283 | |
| 284 | <pre> |
| 285 | julia> a = [1 2 3; 4 5 6] |
| 286 | 2×3 Array{Int64,2}: |
| 287 | 1 2 3 |
| 288 | 4 5 6 |
| 289 | |
| 290 | julia> z = [-1 -2 -3; -4 -5 -6]; |
| 291 | </pre> |
| 292 | |
| 293 | <p>Indexing is as expected:</p> |
| 294 | |
| 295 | <pre> |
| 296 | julia> a[1, 2] |
| 297 | 2 |
| 298 | </pre> |
| 299 | |
| 300 | <p>You can glue arrays together horizontally:</p> |
| 301 | |
| 302 | <pre> |
| 303 | julia> [a z] |
| 304 | 2×6 Array{Int64,2}: |
| 305 | 1 2 3 -1 -2 -3 |
| 306 | 4 5 6 -4 -5 -6 |
| 307 | </pre> |
| 308 | |
| 309 | <p>And vertically:</p> |
| 310 | |
| 311 | <pre> |
| 312 | julia> [a; z] |
| 313 | 4×3 Array{Int64,2}: |
| 314 | 1 2 3 |
| 315 | 4 5 6 |
| 316 | -1 -2 -3 |
| 317 | -4 -5 -6 |
| 318 | </pre> |
| 319 | |
| 320 | <p>Julia has all the usual operators for handling arrays, and <a |
| 321 | href="http://www.3blue1brown.com/essence-of-linear-algebra-page/">linear |
| 322 | algebra</a> functions that work with matrices (2D arrays). The linear |
| 323 | algebra functions are part of Julia's standard library, but need to be |
| 324 | imported with a command like "<code>using LinearAlgebra</code>", which is a detail |
| 325 | omitted from the current documentation. The functions include such things as |
| 326 | determinants, matrix inverses, eigenvalues and eigenvectors, many kinds of |
| 327 | matrix factorizations, etc. Julia has not reinvented the wheel here, but |
| 328 | wisely uses the <a href="http://www.netlib.org/lapack/">LAPACK</a> Fortran |
| 329 | library of battle-tested linear algebra routines.</p> |
| 330 | |
| 331 | <p>The extension of arithmetic operators to arrays is usually intuitive:</p> |
| 332 | |
| 333 | <pre> |
| 334 | julia> a + z |
| 335 | 2×3 Array{Int64,2}: |
| 336 | 0 0 0 |
| 337 | 0 0 0 |
| 338 | </pre> |
| 339 | |
| 340 | <p>And the numerical prepending syntax works with arrays, too:</p> |
| 341 | |
| 342 | <pre> |
| 343 | julia> 3a + 4z |
| 344 | 2×3 Array{Int64,2}: |
| 345 | -1 -2 -3 |
| 346 | -4 -5 -6 |
| 347 | </pre> |
| 348 | |
| 349 | <p>Putting a multiplication operator between two matrices gets you matrix |
| 350 | multiplication:</p> |
| 351 | |
| 352 | <pre> |
| 353 | julia> a * transpose(a) |
| 354 | 2×2 Array{Int64,2}: |
| 355 | 14 32 |
| 356 | 32 77 |
| 357 | </pre> |
| 358 | |
| 359 | <p>You can "broadcast" numbers to cover all the elements in an |
| 360 | array by prepending the usual arithmetic operators with a dot:</p> |
| 361 | |
| 362 | <pre> |
| 363 | julia> 1 .+ a |
| 364 | 2×3 Array{Int64,2}: |
| 365 | 2 3 4 |
| 366 | 5 6 7 |
| 367 | </pre> |
| 368 | |
| 369 | <p>Note that the language only actually requires the dot for some |
| 370 | operators, but not for others, such as "*" and "/". The |
| 371 | reasons for this are arcane, and it probably makes sense to be consistent |
| 372 | and use the dot whenever you intend broadcasting. Note also that the |
| 373 | current version of the official documentation is incorrect in claiming that |
| 374 | you may omit the dot from "+" and "-"; in fact, this |
| 375 | now gives an error.</p> |
| 376 | |
| 377 | <p>You can use the dot notation to turn any function into one that operates |
| 378 | on each element of an array:</p> |
| 379 | |
| 380 | <pre> |
| 381 | julia> round.(sin.([0, π/2, π, 3π/2, 2π])) |
| 382 | 5-element Array{Float64,1}: |
| 383 | 0.0 |
| 384 | 1.0 |
| 385 | 0.0 |
| 386 | -1.0 |
| 387 | -0.0 |
| 388 | </pre> |
| 389 | |
| 390 | <p>The example above illustrates chaining two dotted functions |
| 391 | together. The Julia compiler turns expressions like this into |
| 392 | "fused" operations: instead of applying each function in turn to |
| 393 | create a new array that is passed to the next function, the compiler |
| 394 | combines the functions into a single compound function that is applied once |
| 395 | over the array, creating a significant optimization.</p> |
| 396 | |
| 397 | <p>You can use this dot notation with any function, including your own, to |
| 398 | turn it into a version that operates element-wise over arrays.</p> |
| 399 | |
| 400 | <p>Dictionaries (associative arrays) can be defined with several |
| 401 | syntaxes. Here's one:</p> |
| 402 | |
| 403 | <pre> |
| 404 | julia> d1 = Dict("A"=>1, "B"=>2) |
| 405 | Dict{String,Int64} with 2 entries: |
| 406 | "B" => 2 |
| 407 | "A" => 1 |
| 408 | </pre> |
| 409 | |
| 410 | <p>You may have noticed that the code snippets so far have not included any |
| 411 | type declarations. Every value in Julia has a type, but the compiler will |
| 412 | infer types if they are not specified. It is generally not necessary to |
| 413 | declare types for performance, but type declarations sometimes serve other |
| 414 | purposes, that we'll return to later. Julia has a deep and sophisticated |
| 415 | type system, including user-defined types and C-like structs. Types can |
| 416 | have behaviors associated with them, and can inherit behaviors from other |
| 417 | types. The best thing about Julia's type system is that you can ignore it |
| 418 | entirely, use just a few pieces of it, or spend weeks studying its |
| 419 | design.</p> |
| 420 | |
| 421 | <h4>Control flow</h4> |
| 422 | |
| 423 | <p>Julia code is organized in blocks, which can indicate control flow, |
| 424 | function definitions, and other code units. Blocks are terminated with the |
| 425 | <code>end</code> keyword, and indentation is not significant. Statements |
| 426 | are separated either with newlines or semicolons.</p> |
| 427 | |
| 428 | <p>Julia has the typical control flow constructs; here is a |
| 429 | <code>while</code> block:</p> |
| 430 | |
| 431 | <pre> |
| 432 | julia> i = 1; |
| 433 | |
| 434 | julia> while i < 5 |
| 435 | print(i) |
| 436 | global i = i + 1 |
| 437 | end |
| 438 | 1234 |
| 439 | </pre> |
| 440 | |
| 441 | <p>Notice the <code>global</code> keyword. Most blocks in Julia introduce a |
| 442 | local scope for variables; without this keyword here, we would get an error |
| 443 | about an undefined variable.</p> |
| 444 | |
| 445 | <p>Julia has the usual <code>if</code> statements and <code>for</code> |
| 446 | loops that use the same iterators that we introduced above for array |
| 447 | indexing. We can also iterate over collections:</p> |
| 448 | |
| 449 | <pre> |
| 450 | julia> for i ∈ ['a', 'b', 'c'] |
| 451 | println(i) |
| 452 | end |
| 453 | a |
| 454 | b |
| 455 | c |
| 456 | </pre> |
| 457 | |
| 458 | <p>In place of the fancy math symbol in this <code>for</code> loop, we can |
| 459 | use "<tt>=</tt>" or "<tt>in</tt>". If you want to use |
| 460 | the math symbol but |
| 461 | have no convenient way to type it, the REPL will help you: type |
| 462 | "<tt>\in</tt>" and the TAB key, and the symbol appears; you can type many |
| 463 | <a href="/Articles/657157/">LaTeX</a> expressions into the |
| 464 | REPL in this way.</p> |
| 465 | |
| 466 | <h4>Development of Julia</h4> |
| 467 | |
| 468 | <p>The language is developed on GitHub, with over 700 contributors. The |
| 469 | Julia team mentioned in their email to us that the decision to use GitHub |
| 470 | has been particularly good for Julia, as it streamlined the process for |
| 471 | many of their contributors, who are scientists or domain experts in various |
| 472 | fields, rather than professional software developers.</p> |
| 473 | |
| 474 | <p>The creators of Julia have <a |
| 475 | href="https://julialang.org/publications/julia-fresh-approach-BEKS.pdf">published |
| 476 | [PDF]</a> |
| 477 | a detailed “mission statement” for the language, describing their aims and |
| 478 | motivations. A key issue that they wanted their language to solve is what |
| 479 | they called the "two-language problem." This situation is |
| 480 | familiar to anyone who has used Python or another dynamic language on a |
| 481 | demanding numerical problem. To get good performance, you will wind up |
| 482 | rewriting the numerically intensive parts of the program in C or Fortran, |
| 483 | dealing with the interface between the two languages, and may still be |
| 484 | disappointed in the overhead presented by calling the foreign routines from |
| 485 | your original code. |
| 486 | |
| 487 | <p> |
| 488 | For Python, <a |
| 489 | href="/Articles/738915/">NumPy and SciPy</a> wrap many |
| 490 | numerical routines, written in Fortran or C, for efficient use from that |
| 491 | language, but you can only take advantage of this if your calculation fits |
| 492 | the pattern of an available routine; in more general cases, where you will |
| 493 | have to write a loop over your data, you are stuck with Python's native |
| 494 | performance, which is orders of magnitude slower. If you switch to an |
| 495 | alternative, faster implementation of Python, such as <a |
| 496 | href="https://pypy.org/">PyPy</a>, the numerical libraries may not be |
| 497 | compatible; NumPy became available for PyPy only within about the past |
| 498 | year.</p> |
| 499 | |
| 500 | <p>Julia solves the two-language problem by being as expressive and simple |
| 501 | to program in as a dynamic scripting language, while having the native |
| 502 | performance of a static, compiled language. There is no need to write |
| 503 | numerical libraries in a second language, but C or Fortran library routines |
| 504 | can be called using a facility that Julia has built-in. Other languages, |
| 505 | such as <a href="https://github.com/JuliaPy/PyCall.jl">Python</a> or <a |
| 506 | href="https://github.com/JuliaInterop/RCall.jl">R</a>, can also interoperate |
| 507 | easily with Julia using external packages.</p> |
| 508 | |
| 509 | <h4>Documentation</h4> |
| 510 | |
| 511 | <p>There are many resources to turn to to learn the language. There is an |
| 512 | extensive and detailed <a |
| 513 | href="https://docs.julialang.org/en/stable/">manual</a> at Julia |
| 514 | headquarters, and this may be a good place to start. However, although the |
| 515 | first few chapters provide a gentle introduction, the material soon becomes |
| 516 | dense and, at times, hard to follow, with references to concepts that are |
| 517 | not explained until later chapters. Fortunately, there is a <a |
| 518 | href="https://julialang.org/learning/">"learning" link</a> at the |
| 519 | top of the Julia home page, which takes you to a long list of videos, |
| 520 | tutorials, books, articles, and classes both about Julia and that use Julia |
| 521 | in teaching subjects such a numerical analysis. There is also a fairly good |
| 522 | <a |
| 523 | href="http://bogumilkaminski.pl/files/julia_express.pdf">cheat-sheet [PDF]</a>, which was |
| 524 | just updated for v. 1.0.</p> |
| 525 | |
| 526 | <p>If you're coming from Python, <a |
| 527 | href="https://docs.julialang.org/en/stable/manual/noteworthy-differences/#Noteworthy-differences-from-Python-1">this |
| 528 | list</a> of noteworthy differences between Python and Julia syntax will |
| 529 | probably be useful.</p> |
| 530 | |
| 531 | <p>Some of the linked tutorials are in the form of <a |
| 532 | href="https://lwn.net/Articles/746386/">Jupyter notebooks</a> — indeed, |
| 533 | the name "Jupyter" is formed from "Julia", |
| 534 | "Python", and "R", which are the three original languages supported by |
| 535 | the interface. The <a href="https://github.com/JuliaLang/IJulia.jl">Julia |
| 536 | kernel for Jupyter</a> was recently upgraded to support v. 1.0. Judicious |
| 537 | sampling of a variety of documentation sources, combined with liberal |
| 538 | experimentation, may be the best way of learning the language. Jupyter |
| 539 | makes this experimentation more inviting for those who enjoy the web-based |
| 540 | interface, but the REPL that comes with Julia helps a great deal in this |
| 541 | regard by providing, for instance, TAB completion and an extensive help |
| 542 | system invoked by simply pressing the "?" key.</p> |
| 543 | |
| 544 | <h4>Stay tuned</h4> |
| 545 | |
| 546 | <p> |
| 547 | The <a href="/Articles/764001/">next installment</a> in this two-part series will explain how Julia is |
| 548 | organized around the concept of "multiple dispatch". You will learn how to |
| 549 | create functions and make elementary use of Julia's type system. We'll see how |
| 550 | to install packages and use modules, and how to make graphs. Finally, Part 2 |
| 551 | will briefly survey the important topics of macros and distributed computing. |
| 552 | <p><a href="/Articles/763626/#Comments">Comments (80 posted)</a> |
| 553 | <p> |
| 554 | <a name="763641"></a><h2 class="SummaryHL"><a href="/Articles/763641/">C considered dangerous</a></h2> |
| 555 | |
| 556 | <div class="FeatureByline"> |
| 557 | By <b>Jake Edge</b><br>August 29, 2018 |
| 558 | <hr> |
| 559 | <a href="/Archives/ConferenceByYear/#2018-Linux_Security_Summit_NA">LSS NA</a> |
| 560 | </div> |
| 561 | <p> |
| 562 | At the North America edition of the <a |
| 563 | href="https://events.linuxfoundation.org/events/linux-security-summit-north-america-2018/">2018 |
| 564 | Linux Security Summit</a> (LSS NA), which was held in late August in Vancouver, |
| 565 | Canada, Kees Cook gave a presentation on some of the dangers that come with |
| 566 | programs written in C. In particular, of course, the Linux kernel is |
| 567 | mostly written in C, which means that the security of our systems rests on |
| 568 | a somewhat dangerous foundation. But there are things that can be done to |
| 569 | help firm things up by "<span>Making C Less Dangerous</span>" as the title |
| 570 | of his talk suggested. |
| 571 | </p> |
| 572 | |
| 573 | <p> |
| 574 | He began with a brief summary of the work that he and others are doing as |
| 575 | part of the <a |
| 576 | href="https://kernsec.org/wiki/index.php/Kernel_Self_Protection_Project">Kernel |
| 577 | Self Protection Project</a> (KSPP). The goal of the project is to get |
| 578 | kernel protections merged into the mainline. These protections are not |
| 579 | targeted at protecting user-space processes from other (possibly rogue) |
| 580 | processes, but are, instead, focused on protecting the kernel from |
| 581 | user-space code. There are around 12 organizations and ten individuals |
| 582 | working on roughly 20 different technologies as part of the KSPP, he said. The |
| 583 | progress has been "slow and steady", he said, which is how he thinks it |
| 584 | should go. |
| 585 | </p> |
| 586 | |
| 587 | <a href="/Articles/763644/"> |
| 588 | <img src="https://static.lwn.net/images/2018/lssna-cook-sm.jpg" border=0 hspace=5 align="right" |
| 589 | alt="[Kees Cook]" title="Kees Cook" width=214 height=300> |
| 590 | </a> |
| 591 | |
| 592 | <p> |
| 593 | One of the main problems is that C is treated mostly like a fancy assembler. |
| 594 | The kernel developers do this because they want the kernel to be as fast |
| 595 | and as small as possible. There are other reasons, too, such as the need to do |
| 596 | architecture-specific tasks that lack a C API (e.g. setting up page tables, |
| 597 | switching to 64-bit mode). |
| 598 | </p> |
| 599 | |
| 600 | <p> |
| 601 | But there is lots of undefined behavior in C. This "operational baggage" |
| 602 | can lead to various problems. In addition, C has a weak standard library |
| 603 | with multiple utility functions that have various pitfalls. In C, the content |
| 604 | of uninitialized automatic variables is undefined, but in the machine code that it |
| 605 | gets translated to, the value is whatever happened to be in that memory |
| 606 | location before. In C, a function pointer can be called even if the type |
| 607 | of the pointer does not match the type of the function being |
| 608 | called—assembly doesn't care, it just jumps to a location, he said. |
| 609 | </p> |
| 610 | |
| 611 | <p> |
| 612 | The APIs in the standard library are also bad in many cases. He asked: why |
| 613 | is there no argument to <tt>memcpy()</tt> to specify the maximum |
| 614 | destination length? He noted a recent <a |
| 615 | href="https://raphlinus.github.io/programming/rust/2018/08/17/undefined-behavior.html">blog |
| 616 | post</a> from Raph Levien entitled "With Undefined Behavior, Anything is |
| 617 | Possible". That obviously resonated with Cook, as he pointed out his |
| 618 | T-shirt—with the title and artwork from the post. |
| 619 | </p> |
| 620 | |
| 621 | <h4>Less danger</h4> |
| 622 | |
| 623 | <p> |
| 624 | He then moved on to some things that kernel developers can do (and are |
| 625 | doing) to get away from some of the dangers of C. He began with |
| 626 | variable-length arrays (VLAs), which can be used to overflow the stack to |
| 627 | access |
| 628 | data outside of its region. Even if the stack has a guard page, VLAs can |
| 629 | be used to jump past it to write into other memory, which can then be used |
| 630 | by some other kind of attack. The C language is "perfectly fine with |
| 631 | this". It is easy to find uses of VLAs with the <tt>-Wvla</tt> flag, however. |
| 632 | </p> |
| 633 | |
| 634 | <p> |
| 635 | But it turns out that VLAs are <a href="/Articles/749064/">not just bad |
| 636 | from a security perspective</a>, |
| 637 | they are also slow. In a micro-benchmark associated with a <a |
| 638 | href="https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=02361bc77888">patch |
| 639 | removing a VLA</a>, a 13% performance boost came from using a fixed-size |
| 640 | array. He dug in a bit further and found that much more code is being |
| 641 | generated to handle a VLA, which explains the speed increase. Since Linus |
| 642 | Torvalds has <a |
| 643 | href="https://lore.kernel.org/lkml/CA+55aFzCG-zNmZwX4A2FQpadafLfEzK6CC=qPXydAacU1RqZWA@mail.gmail.com/T/#u">declared |
| 644 | that VLAs should be removed</a> from the kernel because they cause security |
| 645 | problems and also slow the kernel down; Cook said "don't use VLAs". |
| 646 | </p> |
| 647 | |
| 648 | <p> |
| 649 | Another problem area is <tt>switch</tt> statements, in particular where |
| 650 | there is no <tt>break</tt> for a <tt>case</tt>. That could mean that the |
| 651 | programmer expects and wants to fall through to the next case or it could |
| 652 | be that the <tt>break</tt> was simply forgotten. There is a way to get a |
| 653 | warning from the compiler for fall-throughs, but there needs to be a way to |
| 654 | mark those that are truly meant to be that way. A special fall-through |
| 655 | "statement" in the form of a comment is what has been agreed on within the |
| 656 | static-analysis community. He and others have been going through each of |
| 657 | the places where there is no <tt>break</tt> to add these comments (or a |
| 658 | <tt>break</tt>); they |
| 659 | have "found a lot of bugs this way", he said. |
| 660 | </p> |
| 661 | |
| 662 | <p> |
| 663 | Uninitialized local variables will generate a warning, but not if the |
| 664 | variable is passed in by reference. There are some GCC plugins that will |
| 665 | automatically initialize these variables, but there are also patches for |
| 666 | both GCC and Clang to provide a compiler option to do so. Neither of those |
| 667 | is upstream yet, but Torvalds has praised the effort so the kernel would |
| 668 | likely use the option. An interesting side |
| 669 | effect that came about while investigating this was a warning he got about |
| 670 | unreachable code when he |
| 671 | enabled the auto-initialization. There were two variables declared just |
| 672 | after a <tt>switch</tt> (and outside of any <tt>case</tt>), where they |
| 673 | would never be reached. |
| 674 | </p> |
| 675 | |
| 676 | <p> |
| 677 | Arithmetic overflow is another undefined behavior in C that can cause various |
| 678 | problems. GCC can check for signed overflow, which performs well |
| 679 | (the overhead is in the noise, he said), but adding warning messages for it does grow |
| 680 | the kernel by 6%; making the overflow abort, instead, only adds 0.1%. |
| 681 | Clang can check for both signed and unsigned overflow; signed overflow is |
| 682 | undefined, while unsigned overflow is defined, but often unexpected. |
| 683 | Marking places where unsigned overflow is expected is needed; it |
| 684 | would be nice to get those annotations put into the kernel, Cook said. |
| 685 | </p> |
| 686 | |
| 687 | <p> |
| 688 | Explicit bounds checking is expensive. Doing it for |
| 689 | <tt>copy_{to,from}_user()</tt> is a less than 1% performance hit, but |
| 690 | adding it to the <tt>strcpy()</tt> and <tt>memcpy()</tt> families are |
| 691 | around a 2% hit. Pre-Meltdown that would have been a totally impossible |
| 692 | performance regression for security, he said; post-Meltdown, since it is |
| 693 | less than 5%, maybe there is a chance to add this checking. |
| 694 | </p> |
| 695 | |
| 696 | <p> |
| 697 | Better APIs would help as well. He pointed to the evolution of |
| 698 | <tt>strcpy()</tt>, through <tt>str<b>n</b>cpy()</tt> and |
| 699 | <tt>str<b>l</b>cpy()</tt> (each with their own bounds flaws) to |
| 700 | <tt>str<b>s</b>cpy()</tt>, which seems to be "OK so far". He also mentioned |
| 701 | <tt>memcpy()</tt> again as a poor API with respect to bounds checking. |
| 702 | </p> |
| 703 | |
| 704 | <p> |
| 705 | Hardware support for bounds checking is available in the application |
| 706 | data integrity (ADI) feature for SPARC and is coming for Arm; it may also be |
| 707 | available for Intel processors at some point. These all use a form of |
| 708 | "memory tagging", where allocations get a tag that is stored in the |
| 709 | high-order byte of the address. An offset from the address can be checked |
| 710 | by the hardware to see if it still falls within the allocated region based |
| 711 | on the tag. |
| 712 | </p> |
| 713 | |
| 714 | <p> |
| 715 | Control-flow integrity (CFI) has become more of an issue lately because |
| 716 | much of what attackers had used in the past has been marked as "no execute" |
| 717 | so they are turning to using existing code "gadgets" already present in the |
| 718 | kernel by hijacking existing indirect function calls. In C, you can just call |
| 719 | pointers without regard to the type as it just treats them as an |
| 720 | address to jump to. Clang has a CFI-sanitize feature that enforces the |
| 721 | function prototype to restrict the calls that can be made. It is done at |
| 722 | runtime and is not perfect, in part because there are lots of functions in |
| 723 | the kernel that take one unsigned long parameter and return an unsigned long. |
| 724 | </p> |
| 725 | |
| 726 | <p> |
| 727 | Attacks on CFI have both a "forward edge", which is what CFI sanitize |
| 728 | tries to handle, and a "backward edge" that comes from manipulating the stack |
| 729 | values, the return address in particular. Clang has two methods available |
| 730 | to prevent the stack manipulation. The first is the "safe stack", which |
| 731 | puts various important items (e.g. "safe" variables, register spills, and |
| 732 | the return address) on a separate stack. Alternatively, the "shadow stack" |
| 733 | feature creates a separate stack just for return addresses. |
| 734 | </p> |
| 735 | |
| 736 | <p> |
| 737 | One problem with these other stacks is that they are still writable, so if |
| 738 | an attacker can find them in memory, they can still perform their attacks. |
| 739 | Hardware-based protections, like Intel's Control-Flow Enforcement |
| 740 | Technology (CET), <a href="/Articles/758245/">provides a read-only shadow |
| 741 | call stack</a> for return addresses. Another hardware protection is <a |
| 742 | href="/Articles/718888/">pointer authentication</a> for Arm, which adds a |
| 743 | kind of encrypted tag to the return address that can be verified before it |
| 744 | is used. |
| 745 | </p> |
| 746 | |
| 747 | <h4>Status and challenges</h4> |
| 748 | |
| 749 | <p> |
| 750 | Cook then went through the current status of handling these different |
| 751 | problems in the kernel. VLAs are almost completely gone, he said, just a |
| 752 | few remain in the crypto subsystem; he hopes those VLAs will be gone by 4.20 (or |
| 753 | whatever the number of the next kernel release turns out to be). Once that |
| 754 | happens, he plans to turn on <tt>-Wvla</tt> for the kernel build so that |
| 755 | none creep back in. |
| 756 | </p> |
| 757 | |
| 758 | <p> |
| 759 | There has been steady progress made on marking fall-through cases in |
| 760 | <tt>switch</tt> statements. Only 745 remain to be handled of the 2311 that |
| 761 | existed when this work started; each one requires scrutiny to determine |
| 762 | what the author's intent is. Auto-initialized local variables can be done |
| 763 | using compiler plugins, but that is "not quite what we want", he said. |
| 764 | More compiler support would be helpful there. For arithmetic overflow, it |
| 765 | would be nice to see GCC get support for the unsigned case, but memory |
| 766 | allocations are now doing explicit overflow checking at this point. |
| 767 | </p> |
| 768 | |
| 769 | <p> |
| 770 | Bounds checking has seen some "crying about performance hits", so we are |
| 771 | waiting impatiently for hardware support, he said. CFI forward-edge |
| 772 | protection needs <a href="/Articles/744507/">link-time optimization</a> |
| 773 | (LTO) support for Clang in the kernel, but it is currently working on |
| 774 | Android. For backward-edge mitigation, the Clang shadow call stack is |
| 775 | working on Android, but we are impatiently waiting for hardware support for |
| 776 | that too. |
| 777 | </p> |
| 778 | |
| 779 | <p> |
| 780 | There are a number of challenges in doing security development for the |
| 781 | kernel, Cook said. There are cultural boundaries due to conservatism |
| 782 | within the kernel community; that requires patiently working and reworking |
| 783 | features in order to get them upstream. There are, of course, technical |
| 784 | challenges because of the complexity of security changes; those kinds of |
| 785 | problems can be solved. There are also resource limitations in terms of |
| 786 | developers, testers, reviewers, and so on. KSPP and the other kernel |
| 787 | security developers are still making that "slow but steady" progress. |
| 788 | </p> |
| 789 | |
| 790 | <p> |
| 791 | Cook's <a href="https://outflux.net/slides/2018/lss/danger.pdf">slides |
| 792 | [PDF]</a> are available for interested readers; before long, there should |
| 793 | be a video available of the talk as well. |
| 794 | |
| 795 | <p> |
| 796 | [I would like to thank LWN's travel sponsor, the Linux Foundation, for |
| 797 | travel assistance to attend the Linux Security Summit in Vancouver.] |
| 798 | <p><a href="/Articles/763641/#Comments">Comments (70 posted)</a> |
| 799 | <p> |
| 800 | <a name="763106"></a><h2 class="SummaryHL"><a href="/Articles/763106/">The second half of the 4.19 merge window</a></h2> |
| 801 | |
| 802 | <div class="FeatureByline"> |
| 803 | By <b>Jonathan Corbet</b><br>August 26, 2018 |
| 804 | </div> |
| 805 | By the time Linus Torvalds <a href="/Articles/763497/">released |
| 806 | 4.19-rc1</a> and closed |
| 807 | the merge window for this development cycle, 12,317 non-merge |
| 808 | changesets had found their way into the mainline; about 4,800 of those |
| 809 | landed after <a href="/Articles/762566/">last week's summary</a> was |
| 810 | written. As tends to be the case |
| 811 | late in the merge window, many of those changes were fixes for the bigger |
| 812 | patches that went in early, but there were also a number of new features |
| 813 | added. Some of the more significant changes include: |
| 814 | <br clear="all"> |
| 815 | <p> |
| 816 | |
| 817 | <h4>Core kernel</h4> |
| 818 | <p> |
| 819 | <ul class="spacylist"> |
| 820 | |
| 821 | <li> The full set of patches adding <a |
| 822 | href="/Articles/761118/">control-group awareness to the out-of-memory |
| 823 | killer</a> has <i>not</i> been merged due to ongoing disagreements, |
| 824 | but one piece of it has: there is a new <tt>memory.oom.group</tt> |
| 825 | control knob that will cause all processes within a control group to |
| 826 | be killed in an out-of-memory situation. |
| 827 | <li> A new set of protections has been added to prevent an attacker from |
| 828 | fooling a program into writing to an existing file or FIFO. An open |
| 829 | with the <tt>O_CREAT</tt> flag to a file or FIFO in a world-writable, |
| 830 | sticky |
| 831 | directory (e.g. <tt>/tmp</tt>) will fail if the owner of the opening |
| 832 | process is not the owner of either the target file or the containing |
| 833 | directory. This behavior, disabled by default, is controlled by the |
| 834 | new <tt>protected_regular</tt> and <tt>protected_fifos</tt> sysctl |
| 835 | knobs. |
| 836 | |
| 837 | </ul> |
| 838 | |
| 839 | <h4>Filesystems and block layer</h4> |
| 840 | <p> |
| 841 | <ul class="spacylist"> |
| 842 | |
| 843 | <li> The dm-integrity device-mapper target can now use a separate device |
| 844 | for metadata storage. |
| 845 | <li> EROFS, the "enhanced read-only filesystem", has been added to the |
| 846 | staging tree. It is "<span>a lightweight read-only file system with |
| 847 | modern designs (eg. page-sized blocks, inline xattrs/data, etc.) for |
| 848 | scenarios which need high-performance read-only requirements, |
| 849 | eg. firmwares in mobile phone or LIVECDs</span>" |
| 850 | <li> The new "metadata copy-up" feature in overlayfs will avoid copying a |
| 851 | file's contents to the upper layer on a metadata-only change. See <a |
| 852 | href="https://git.kernel.org/linus/d5791044d2e5749ef4de84161cec5532e2111540">this |
| 853 | commit</a> for details. |
| 854 | |
| 855 | </ul> |
| 856 | <p> |
| 857 | |
| 858 | <h4>Hardware support</h4> |
| 859 | <p> |
| 860 | <ul class="spacylist"> |
| 861 | |
| 862 | <li> <b>Graphics</b>: |
| 863 | Qualcomm Adreno A6xx GPUs. |
| 864 | |
| 865 | <li> <b>Industrial I/O</b>: |
| 866 | Spreadtrum SC27xx series PMIC analog-to-digital converters, |
| 867 | Analog Devices AD5758 digital-to-analog converters, |
| 868 | Intersil ISL29501 time-of-flight sensors, |
| 869 | Silicon Labs SI1133 UV index/ambient light sensor chips, and |
| 870 | Bosch Sensortec BME680 sensors. |
| 871 | |
| 872 | |
| 873 | <li> <b>Miscellaneous</b>: |
| 874 | Generic ADC-based resistive touchscreens, |
| 875 | Generic ASIC devices via the Google <a |
| 876 | href="/ml/linux-kernel/20180630000253.70103-1-sque@chromium.org/">Gasket |
| 877 | framework</a>, |
| 878 | Analog Devices ADGS1408/ADGS1409 multiplexers, |
| 879 | Actions Semi Owl SoCs DMA controllers, |
| 880 | MEN 16Z069 watchdog timers, |
| 881 | Rohm BU21029 touchscreen controllers, |
| 882 | Cirrus Logic CS47L35, CS47L85, CS47L90, and CS47L91 codecs, |
| 883 | Cougar 500k gaming keyboards, |
| 884 | Qualcomm GENI-based I2C controllers, |
| 885 | Actions Semiconductor Owl I2C controllers, |
| 886 | ChromeOS EC-based USBPD chargers, and |
| 887 | Analog Devices ADP5061 battery chargers. |
| 888 | |
| 889 | <li> <b>USB</b>: |
| 890 | Nuvoton NPCM7XX on-chip EHCI USB controllers, |
| 891 | Broadcom Stingray PCIe PHYs, and |
| 892 | Renesas R-Car generation 3 PCIe PHYs. |
| 893 | |
| 894 | <li> There is also a new subsystem for the abstraction of GNSS (global |
| 895 | navigation satellite systems — GPS, for example) receivers in the |
| 896 | kernel. To date, such devices have been handled with an abundance of |
| 897 | user-space drivers; the hope is to bring some order in this area. |
| 898 | Support for u-blox and SiRFstar receivers has been added as well. |
| 899 | |
| 900 | </ul> |
| 901 | |
| 902 | |
| 903 | <p> |
| 904 | <h4>Kernel internal</h4> |
| 905 | <p> |
| 906 | <ul class="spacylist"> |
| 907 | |
| 908 | <li> The <tt>__deprecated</tt> marker, used to mark interfaces that should |
| 909 | no longer be used, has been deprecated and removed from the kernel |
| 910 | entirely. <a |
| 911 | href="https://git.kernel.org/linus/771c035372a036f83353eef46dbb829780330234">Torvalds |
| 912 | said</a>: "<span>They are not useful. They annoy |
| 913 | everybody, and nobody ever does anything about them, because it's |
| 914 | always 'somebody elses problem'. And when people start thinking that |
| 915 | warnings are normal, they stop looking at them, and the real warnings |
| 916 | that mean something go unnoticed.</span>" |
| 917 | <li> The minimum version of GCC required by the kernel has been moved up to |
| 918 | 4.6. |
| 919 | |
| 920 | </ul> |
| 921 | <p> |
| 922 | |
| 923 | There are a couple of significant changes that failed to get in this time |
| 924 | around, including the <a |
| 925 | href="/Articles/745073/">XArray</a> data structure. The patches are |
| 926 | thought to be ready, but they had the bad luck to be based on a tree that |
| 927 | failed to be merged for other reasons, so Torvalds <a |
| 928 | href="/ml/linux-kernel/CA+55aFxFjAmrFpwQmEHCthHOzgidCKnod+cNDEE+3Spu9o1s3w@mail.gmail.com/">didn't |
| 929 | even look at them</a>. That, in turn, blocks another set of patches intended to |
| 930 | enable migration of slab-allocated objects. |
| 931 | <p> |
| 932 | The other big deferral is the <a href="/Articles/759499/">new system-call |
| 933 | API for filesystem mounting</a>. Despite ongoing <a |
| 934 | href="/Articles/762355/">concerns</a> about what happens when the same |
| 935 | low-level device is mounted multiple times with conflicting options, Al |
| 936 | Viro sent <a |
| 937 | href="/ml/linux-fsdevel/20180823223145.GK6515@ZenIV.linux.org.uk/">a pull |
| 938 | request</a> to send this work upstream. The ensuing discussion made it |
| 939 | clear that there is still not a consensus in this area, though, so it seems |
| 940 | that this work has to wait for another cycle. |
| 941 | <p> |
| 942 | Assuming all goes well, the kernel will stabilize over the coming weeks and |
| 943 | the final 4.19 release will happen in mid-October. |
| 944 | <p><a href="/Articles/763106/#Comments">Comments (1 posted)</a> |
| 945 | <p> |
| 946 | <a name="763603"></a><h2 class="SummaryHL"><a href="/Articles/763603/">Measuring (and fixing) I/O-controller throughput loss</a></h2> |
| 947 | |
| 948 | <div class="GAByline"> |
| 949 | <p>August 29, 2018</p> |
| 950 | <p>This article was contributed by Paolo Valente</p> |
| 951 | </div> |
| 952 | <p>Many services, from web hosting and video streaming to cloud storage, |
| 953 | need to move data to and from storage. They also often require that each per-client |
| 954 | I/O flow be guaranteed a non-zero amount of bandwidth and a bounded latency. An |
| 955 | expensive way to provide these guarantees is to over-provision |
| 956 | storage resources, keeping each resource underutilized, and thus |
| 957 | have plenty of bandwidth available for the few I/O flows dispatched to |
| 958 | each medium. Alternatively one can use an I/O controller. Linux provides |
| 959 | two mechanisms designed to throttle some I/O streams to allow others to |
| 960 | meet their bandwidth and latency requirements. These mechanisms work, but |
| 961 | they come at a cost: a loss of as much as 80% of total available I/O |
| 962 | bandwidth. I have run some tests to demonstrate this problem; some |
| 963 | upcoming improvements to the <a href="/Articles/601799/">bfq I/O |
| 964 | scheduler</a> promise to improve the situation considerably. |
| 965 | <p> |
| 966 | |
| 967 | <p>Throttling does guarantee control, even on drives that happen to be |
| 968 | highly utilized but, as will be seen, it has a hard time |
| 969 | actually ensuring that drives are highly utilized. Even with greedy I/O |
| 970 | flows, throttling |
| 971 | easily ends up utilizing as little as 20% of the available speed of a |
| 972 | flash-based drive. |
| 973 | |
| 974 | Such a speed loss may be particularly problematic with lower-end |
| 975 | storage. On the opposite end, it is also disappointing with |
| 976 | high-end hardware, as the Linux block I/O stack itself has been |
| 977 | <a href="/Articles/552904">redesigned from the ground up</a> to fully utilize the |
| 978 | high speed of modern, fast storage. In |
| 979 | addition, throttling fails to guarantee the expected bandwidths if I/O |
| 980 | contains both reads and writes, or is sporadic in nature. |
| 981 | |
| 982 | <p>On the bright side, there now seems to be an effective alternative for |
| 983 | controlling I/O: the proportional-share policy provided by the bfq I/O |
| 984 | scheduler. It enables nearly 100% storage bandwidth utilization, |
| 985 | at least with some of the workloads that are problematic for |
| 986 | throttling. An upcoming version of bfq may be able to |
| 987 | achieve this result with almost all workloads. Finally, bfq |
| 988 | guarantees bandwidths with all workloads. The current limitation of |
| 989 | bfq is that its execution overhead becomes significant at speeds above |
| 990 | 400,000 I/O operations per second on commodity CPUs. |
| 991 | |
| 992 | <p>Using the bfq I/O scheduler, Linux can now guarantee |
| 993 | low latency to lightweight flows containing sporadic, short I/O. No |
| 994 | throughput issues arise, and no configuration is required. This |
| 995 | capability benefits important, time-sensitive tasks, such as |
| 996 | video or audio streaming, as well as executing commands or starting |
| 997 | applications. |
| 998 | |
| 999 | Although benchmarks are not available yet, these guarantees might also be |
| 1000 | provided by the newly proposed <a href="/Articles/758963/">I/O latency |
| 1001 | controller</a>. It allows administrators to set target latencies for I/O |
| 1002 | requests originating from each group of processes, and favors the |
| 1003 | groups with the lowest target latency. |
| 1004 | |
| 1005 | <h4>The testbed</h4> |
| 1006 | |
| 1007 | <p>I ran the tests with an ext4 filesystem mounted on a PLEXTOR |
| 1008 | PX-256M5S SSD, which features a peak rate of ~160MB/s with random I/O, |
| 1009 | and of ~500MB/s with sequential I/O. I used blk-mq, in Linux |
| 1010 | 4.18. The system was equipped with a 2.4GHz Intel Core i7-2760QM |
| 1011 | CPU and 1.3GHz DDR3 DRAM. In such a system, a single thread doing |
| 1012 | synchronous reads reaches a throughput of 23MB/s. |
| 1013 | |
| 1014 | <p> |
| 1015 | For the purposes of these tests, each process is considered to be in one of |
| 1016 | two groups, termed "target" and "interferers". |
| 1017 | A target is a single-process, I/O-bound group whose I/O is focused on. In |
| 1018 | particular, I measure the I/O throughput enjoyed by this group to get |
| 1019 | the minimum bandwidth delivered to the group. |
| 1020 | An interferer is single-process group whose role is to generate |
| 1021 | additional I/O that interferes with the I/O of the target. |
| 1022 | The tested workloads contain one target and multiple interferers. |
| 1023 | |
| 1024 | <p>The single process in each group either reads or writes, through |
| 1025 | asynchronous (buffered) operations, to one file — different from the file read |
| 1026 | or written by any other process — after invalidating the buffer cache |
| 1027 | for the file. I define a reader or writer process as either "random" or |
| 1028 | "sequential", depending on whether it reads or writes its file at random |
| 1029 | positions or sequentially. |
| 1030 | Finally, an interferer is defined as being either "active" or "inactive" |
| 1031 | depending on whether it performs I/O during the test. When an |
| 1032 | interferer is mentioned, it is assumed that the interferer is active. |
| 1033 | |
| 1034 | <p>Workloads are defined so as to try to cover the combinations that, I |
| 1035 | believe, most influence the performance of the storage device and of |
| 1036 | the I/O policies. For brevity, in this article I show results for only |
| 1037 | two groups of workloads: |
| 1038 | <p> |
| 1039 | <ul class="spacylist"> |
| 1040 | |
| 1041 | <li> <b>Static sequential</b>: four synchronous sequential readers or four |
| 1042 | asynchronous sequential writers, plus five inactive interferers. |
| 1043 | |
| 1044 | <li> <b>Static random</b>: four synchronous random readers, all with a block |
| 1045 | size equal to 4k, plus five inactive interferers. |
| 1046 | </ul> |
| 1047 | |
| 1048 | <p>To create each workload, I considered, for each mix of |
| 1049 | interferers in the group, two possibilities for the target: it could be |
| 1050 | either a random or a sequential synchronous reader. |
| 1051 | |
| 1052 | In <a |
| 1053 | href="http://algogroup.unimore.it/people/paolo/pub-docs/extended-lat-bw-throughput.pdf">a |
| 1054 | longer version of this article [PDF]</a>, you will also find results |
| 1055 | for workloads with varying degrees of I/O randomness, and for |
| 1056 | dynamic workloads (containing sporadic I/O sources). These extra results |
| 1057 | confirm the losses of throughput and I/O control for throttling that |
| 1058 | are shown here. |
| 1059 | |
| 1060 | <h4>I/O policies</h4> |
| 1061 | |
| 1062 | <p>Linux provides two I/O-control mechanisms for guaranteeing (a minimum) |
| 1063 | bandwidth, or at least fairness, to long-lived flows: the throttling |
| 1064 | and proportional-share I/O policies. |
| 1065 | With throttling, one can set a maximum bandwidth limit — "max limit" for |
| 1066 | brevity — for the I/O of each group. Max limits can be used, |
| 1067 | in an indirect way, to provide the service guarantee at the focus of this |
| 1068 | article. For example, to guarantee minimum bandwidths to I/O flows, a group can |
| 1069 | be guaranteed a minimum bandwidth by limiting the maximum bandwidth of |
| 1070 | all the other groups. |
| 1071 | |
| 1072 | <p>Unfortunately, max limits have two drawbacks in terms of |
| 1073 | throughput. First, if some groups do not use their allocated bandwidth, |
| 1074 | that bandwidth cannot be reclaimed by other active groups. Second, |
| 1075 | limits must comply with the worst-case speed of the device, namely, |
| 1076 | its random-I/O peak rate. Such limits will clearly leave a lot of |
| 1077 | throughput unused with workloads that otherwise would drive the |
| 1078 | device to higher throughput levels. |
| 1079 | |
| 1080 | Maximizing throughput is simply not a goal of max limits. So, for |
| 1081 | brevity, test results with max limits are not shown here. You can |
| 1082 | find these results, plus a more detailed description of the above |
| 1083 | drawbacks, in the long version of this article. |
| 1084 | |
| 1085 | <p>Because of these drawbacks, a new, still experimental, low limit |
| 1086 | has been added to the throttling policy. If a group is |
| 1087 | assigned a low limit, then the throttling policy automatically |
| 1088 | limits the I/O of the other groups in such a way to |
| 1089 | guarantee to the group a minimum bandwidth equal to its assigned low |
| 1090 | limit. This new throttling mechanism throttles no group as long as |
| 1091 | every group is getting at least its assigned minimum bandwidth. I tested |
| 1092 | this mechanism, but did not consider the interesting problem |
| 1093 | of guaranteeing minimum bandwidths while, at the same time, enforcing |
| 1094 | maximum bandwidths. |
| 1095 | |
| 1096 | <p>The other I/O policy available in Linux, proportional share, |
| 1097 | provides weighted fairness. Each group is assigned a weight, and should |
| 1098 | receive a portion of the total throughput proportional to its weight. |
| 1099 | This scheme guarantees minimum bandwidths in the same way that low limits do |
| 1100 | in throttling. In particular, it guarantees to each group a minimum |
| 1101 | bandwidth equal to the ratio between the weight of the group, and the |
| 1102 | sum of the weights of all the groups that may be active at the same |
| 1103 | time. |
| 1104 | |
| 1105 | <p>The actual implementation of the proportional-share policy, on a given |
| 1106 | drive, depends on what flavor of the block layer is in use for that |
| 1107 | drive. If the drive is using the legacy block interface, the policy is |
| 1108 | implemented by |
| 1109 | the cfq I/O scheduler. Unfortunately, cfq fails to control |
| 1110 | bandwidths with flash-based storage, especially on drives featuring |
| 1111 | command queueing. This case is not considered in these tests. With |
| 1112 | drives using the multiqueue interface, |
| 1113 | proportional share is implemented by bfq. This is the |
| 1114 | combination considered in the tests. |
| 1115 | |
| 1116 | <p>To benchmark both throttling (low limits) and proportional share, I |
| 1117 | tested, for each workload, the combinations of I/O policies and I/O |
| 1118 | schedulers reported in the table below. In the end, there are three test |
| 1119 | cases for each workload. In addition, for some workloads, I considered two |
| 1120 | versions of bfq for the proportional-share policy. |
| 1121 | |
| 1122 | <blockquote> |
| 1123 | <table class="OddEven"> |
| 1124 | <tr> |
| 1125 | <th align="left" width="14%" valign="top">Name </th> |
| 1126 | <th align="left" width="14%" valign="top">I/O policy </th> |
| 1127 | <th align="left" width="14%" valign="top">Scheduler </th> |
| 1128 | <th align="left" width="14%" valign="top">Parameter for target </th> |
| 1129 | <th align="left" width="14%" valign="top">Parameter for each |
| 1130 | of the four active interferers </th> |
| 1131 | <th align="left" width="14%" valign="top">Parameter for each of the five inactive |
| 1132 | interferers </th> |
| 1133 | <th align="left" width="14%" valign="top">Sum of parameters</th> |
| 1134 | </tr> |
| 1135 | <tr> |
| 1136 | <td align="left" width="14%" valign="top">low-none</td> |
| 1137 | <td align="left" width="14%" valign="top">Throttling with low limits</td> |
| 1138 | <td align="left" width="14%" valign="top">none</td> |
| 1139 | <td align="left" width="14%" valign="top">10MB/s</td> |
| 1140 | <td align="left" width="14%" valign="top">10MB/s |
| 1141 | (tot: 40)</td> |
| 1142 | <td align="left" width="14%" valign="top">20MB/s (tot: 100)</td> |
| 1143 | <td align="left" width="14%" valign="top">150MB/s</td> |
| 1144 | </tr> |
| 1145 | <tr> |
| 1146 | <td align="left" width="14%" valign="top">prop-bfq</td> |
| 1147 | <td align="left" width="14%" valign="top">Proportional share</td> |
| 1148 | <td align="left" width="14%" valign="top">bfq</td> |
| 1149 | <td align="left" width="14%" valign="top">300</td> |
| 1150 | <td align="left" width="14%" valign="top">100 (tot: 400)</td> |
| 1151 | <td align="left" width="14%" valign="top">200 |
| 1152 | (tot: 1000)</td> |
| 1153 | <td align="left" width="14%" valign="top">1700</td> |
| 1154 | </tr> |
| 1155 | </table> |
| 1156 | </blockquote> |
| 1157 | |
| 1158 | |
| 1159 | |
| 1160 | |
| 1161 | <p>For low limits, I report results with only none as the I/O scheduler, |
| 1162 | because the results are the same with kyber and mq-deadline. |
| 1163 | |
| 1164 | <p>The capabilities of the storage medium and of low limits drove the policy |
| 1165 | configurations. In particular: |
| 1166 | |
| 1167 | <ul class="spacylist"> |
| 1168 | |
| 1169 | <li> The configuration of the target and of the active interferers for |
| 1170 | low-none is the one for which low-none provides |
| 1171 | its best possible minimum-bandwidth guarantee to the target: 10MB/s, |
| 1172 | guaranteed if all interferers are readers. |
| 1173 | Results remain the same regardless of the values used for target |
| 1174 | latency and idle time; I set them to 100µs and |
| 1175 | 1000µs, respectively, for every group.</li> |
| 1176 | |
| 1177 | <li> Low limits for inactive interferers are set to twice the limits for |
| 1178 | active interferers, to pose greater difficulties to the |
| 1179 | policy.</li> |
| 1180 | |
| 1181 | <li> I chose weights for prop-bfq so as to guarantee about the same |
| 1182 | minimum bandwidth as low-none to the target, in the same |
| 1183 | only-reader worst case as for low-none and to preserve, between |
| 1184 | the weights of active and inactive interferers, the same ratio as |
| 1185 | between the low limits of active and inactive interferers.</li> |
| 1186 | </ul> |
| 1187 | <p>Full details on configurations can be found in the long version of this |
| 1188 | article. |
| 1189 | |
| 1190 | <p>Each workload was run ten times for each policy, plus ten times without |
| 1191 | any I/O control, i.e., with none as I/O scheduler and no I/O policy in |
| 1192 | use. For each run, I measured the I/O throughput of the target (which |
| 1193 | reveals the bandwidth provided to the target), the cumulative I/O |
| 1194 | throughput of the interferers, and the total I/O throughput. These |
| 1195 | quantities fluctuated very little during each run, as well as across |
| 1196 | different runs. Thus in the graphs I report only averages over per-run |
| 1197 | average throughputs. In particular, for the case of no I/O control, I |
| 1198 | report only the total I/O throughput, to give an idea of the throughput |
| 1199 | that can be reached without imposing any control. |
| 1200 | |
| 1201 | <h4>Results</h4> |
| 1202 | |
| 1203 | <p> |
| 1204 | This plot shows throughput results for the simplest group of |
| 1205 | workloads: the static-sequential set. |
| 1206 | |
| 1207 | <blockquote> |
| 1208 | <img src="https://static.lwn.net/images/2018/iocontrol/fig1.png" alt="[Figure 1]" class="photo"> |
| 1209 | </blockquote> |
| 1210 | <p> |
| 1211 | |
| 1212 | With a random reader as |
| 1213 | the target against sequential readers as interferers, low-none does |
| 1214 | guarantee the configured low limit to the target. Yet it reaches only a |
| 1215 | low total throughput. The throughput of |
| 1216 | the random reader evidently oscillates around 10MB/s during the test. |
| 1217 | This implies that it is at least slightly below 10MB/s for a significant |
| 1218 | percentage of the time. But when this happens, the low-limit mechanism |
| 1219 | limits the maximum bandwidth of every active group to the low limit set |
| 1220 | for the group, i.e., to just 10MB/s. |
| 1221 | The end result is a total throughput lower than 10% of the throughput |
| 1222 | reached without I/O control. |
| 1223 | <p> |
| 1224 | That said, the high throughput achieved without I/O control is |
| 1225 | obtained by choking the random I/O of the target in favor of |
| 1226 | the sequential I/O of the interferers. Thus, it |
| 1227 | is probably more interesting to compare low-none throughput with the |
| 1228 | throughput reachable while actually guaranteeing 10MB/s to the target. |
| 1229 | The target is a single, synchronous, random reader, which reaches 23MB/s while |
| 1230 | active. So, to guarantee 10MB/s to the target, it is enough to |
| 1231 | serve it for about half of the time, and the interferers for the other |
| 1232 | half. Since the device reaches ~500MB/s with the sequential I/O of the |
| 1233 | interferers, the resulting throughput with this service scheme would be |
| 1234 | (500+23)/2, or about 260MB/s. low-none thus reaches less than 20% |
| 1235 | of the |
| 1236 | total throughput that could be reached while still preserving the target |
| 1237 | bandwidth. |
| 1238 | |
| 1239 | <p>prop-bfq provides the target with a slightly higher throughput than |
| 1240 | low-none. This makes it harder for prop-bfq to reach a high total |
| 1241 | throughput, because prop-bfq serves more random I/O (from the target) |
| 1242 | than low-none. Nevertheless, prop-bfq gets a much higher total |
| 1243 | throughput than low-none. According to the above estimate, this |
| 1244 | throughput is about 90% of the maximum throughput that could be reached, |
| 1245 | for this workload, without violating service guarantees. The reason for |
| 1246 | this good result is that bfq provides an effective implementation of |
| 1247 | the proportional-share service policy. At any time, each active group is |
| 1248 | granted a fraction of the current total throughput, and the sum of these |
| 1249 | fractions is equal to one; so group bandwidths naturally saturate the |
| 1250 | available total throughput at all times. |
| 1251 | |
| 1252 | <p>Things change with the second workload: a random reader against |
| 1253 | sequential writers. Now low-none reaches a much higher total |
| 1254 | throughput than prop-bfq. low-none serves |
| 1255 | much more sequential (write) I/O than prop-bfq because writes somehow |
| 1256 | break the low-limit mechanisms and prevail over the reads of the target. |
| 1257 | Conceivably, this happens because writes tend to both starve reads in |
| 1258 | the OS (mainly by eating all available I/O tags) and to cheat on their |
| 1259 | completion time in the drive. In contrast, bfq is intentionally |
| 1260 | configured to privilege reads, to counter these issues. |
| 1261 | |
| 1262 | <p>In particular, low-none gets an even higher throughput than no |
| 1263 | I/O control at all because it penalizes the random I/O of the target even more |
| 1264 | than the no-controller configuration. |
| 1265 | |
| 1266 | <p>Finally, with the last two workloads, prop-bfq reaches even |
| 1267 | higher total throughput than with the first two. It happens |
| 1268 | because the target also does sequential I/O, and serving sequential |
| 1269 | I/O is much more beneficial for throughput than serving random I/O. With |
| 1270 | these two workloads, the total throughput is, respectively, close to or |
| 1271 | much higher than that reached without I/O control. For the last |
| 1272 | workload, the total throughput is much higher because, differently from |
| 1273 | none, bfq privileges reads over asynchronous writes, and reads yield |
| 1274 | a higher throughput than writes. In contrast, low-none still gets |
| 1275 | lower or much lower throughput than prop-bfq, because of the same |
| 1276 | issues that hinder low-none throughput with the first two workloads. |
| 1277 | |
| 1278 | <p>As for bandwidth guarantees, with readers as interferers (third |
| 1279 | workload), prop-bfq, as expected, gives the target a fraction of the |
| 1280 | total throughput proportional to its weight. bfq approximates |
| 1281 | perfect proportional-share bandwidth distribution among groups doing I/O |
| 1282 | of the same type (reads or writes) and with the same locality |
| 1283 | (sequential or random). With the last workload, prop-bfq gives much |
| 1284 | more throughput to the reader than to all the interferers, because |
| 1285 | interferers are asynchronous writers, and bfq privileges reads. |
| 1286 | |
| 1287 | <p>The second group of workloads (static random), is the one, among all |
| 1288 | the workloads considered, for which prop-bfq performs worst. |
| 1289 | Results are shown below: |
| 1290 | <p> |
| 1291 | <blockquote> |
| 1292 | <img src="https://static.lwn.net/images/2018/iocontrol/fig2.png" alt="[Figure 2]" class="photo"> |
| 1293 | </blockquote> |
| 1294 | <p> |
| 1295 | |
| 1296 | This chart |
| 1297 | reports results not only for mainline bfq, but also for an |
| 1298 | improved version of |
| 1299 | bfq which is currently under public testing. |
| 1300 | As can be seen, with only random readers, prop-bfq reaches a |
| 1301 | much lower total throughput than low-none. This happens because of |
| 1302 | the Achilles heel of the bfq I/O scheduler. If the process in service |
| 1303 | does synchronous I/O and has a higher weight than some other process, then, to |
| 1304 | give strong bandwidth guarantees to that process, bfq plugs I/O |
| 1305 | dispatching every time the process temporarily stops issuing |
| 1306 | I/O requests. In this respect, processes actually have differentiated |
| 1307 | weights and do synchronous I/O in the workloads tested. So bfq |
| 1308 | systematically performs I/O plugging for them. Unfortunately, this |
| 1309 | plugging empties the internal queues of the drive, which kills |
| 1310 | throughput with random I/O. And the I/O of all processes in these |
| 1311 | workloads is also random. |
| 1312 | |
| 1313 | <p>The situation reverses with a sequential reader as target. Yet, the most |
| 1314 | interesting results come from the new version of bfq, containing |
| 1315 | small changes to counter exactly the above weakness. This |
| 1316 | version recovers most of the throughput loss with the workload made of |
| 1317 | only random I/O and more; with the second workload, where the target is |
| 1318 | a sequential reader, it reaches about 3.7 times the total throughput of |
| 1319 | low-none. |
| 1320 | <p> |
| 1321 | |
| 1322 | When the main concern is the latency of flows containing short I/O, |
| 1323 | Linux seems now rather high performing, thanks to the bfq I/O |
| 1324 | scheduler and the I/O latency controller. But if the |
| 1325 | requirement is to provide explicit bandwidth guarantees (or just fairness) to |
| 1326 | I/O flows, then one must be ready to give up much or most of the speed of |
| 1327 | the storage media. bfq helps with some workloads, but loses most of |
| 1328 | the throughput with workloads consisting of mostly random |
| 1329 | I/O. Fortunately, there is apparently hope for much better |
| 1330 | performance since an improvement, still under development, seems to |
| 1331 | enable bfq to reach a high throughput with all workloads tested so |
| 1332 | far. |
| 1333 | |
| 1334 | |
| 1335 | |
| 1336 | <p> |
| 1337 | [ I wish to thank Vivek Goyal for enabling me to make this article |
| 1338 | much more fair and sound.]<div class="MakeALink"> |
| 1339 | <table align="right"><tr><td> |
| 1340 | <form action="/SubscriberLink/MakeLink" method="post"> |
| 1341 | <input type="hidden" name="articleid" value="763603"> |
| 1342 | <input type="submit" value="Send a free link"></form> |
| 1343 | </td></tr></table> |
| 1344 | </div> |
| 1345 | <br clear="all"> |
| 1346 | |
| 1347 | <p><a href="/Articles/763603/#Comments">Comments (4 posted)</a> |
| 1348 | <p> |
| 1349 | <a name="763175"></a><h2 class="SummaryHL"><a href="/Articles/763175/">KDE's onboarding initiative, one year later</a></h2> |
| 1350 | |
| 1351 | <div class="GAByline"> |
| 1352 | <p>August 24, 2018</p> |
| 1353 | <p>This article was contributed by Marta Rybczyńska</p> |
| 1354 | <hr> |
| 1355 | <a href="/Archives/ConferenceByYear/#2018-Akademy">Akademy</a> |
| 1356 | </div> |
| 1357 | <p>In 2017, the KDE community decided on <a |
| 1358 | href="https://dot.kde.org/2017/11/30/kdes-goals-2018-and-beyond">three |
| 1359 | goals</a> |
| 1360 | to concentrate on for the next few years. One of them was <a |
| 1361 | href="https://phabricator.kde.org/T7116">streamlining the onboarding of new |
| 1362 | contributors</a> (the others were <a |
| 1363 | href="https://phabricator.kde.org/T6831">improving |
| 1364 | usability</a> and <a href="https://phabricator.kde.org/T7050">privacy</a>). |
| 1365 | During <a href="https://akademy.kde.org/">Akademy</a>, the yearly KDE |
| 1366 | conference |
| 1367 | that was held in Vienna in August, Neofytos Kolokotronis shared the status |
| 1368 | of the |
| 1369 | onboarding goal, the work done during the last year, and further plans. |
| 1370 | While it is a complicated process in a project as big and diverse as KDE, |
| 1371 | numerous improvements have been already made.</p> |
| 1372 | |
| 1373 | <p>Two of the three KDE community goals were proposed by relative |
| 1374 | newcomers. Kolokotronis was one of those, having joined the <a |
| 1375 | href="https://community.kde.org/Promo">KDE Promo team</a> |
| 1376 | not long before proposing |
| 1377 | the focus on onboarding. He had previously been involved with <a |
| 1378 | href="https://www.chakralinux.org/">Chakra |
| 1379 | Linux</a>, a distribution based on KDE software. The fact that new |
| 1380 | members of the community proposed strategic goals was also noted in the <a |
| 1381 | href="https://conf.kde.org/en/Akademy2018/public/events/79">Sunday keynote |
| 1382 | by Claudia Garad</a>.</p> |
| 1383 | |
| 1384 | <p>Proper onboarding adds excitement to the contribution process and |
| 1385 | increases retention, he explained. When we look at <a |
| 1386 | href="https://en.wikipedia.org/wiki/Onboarding">the definition of |
| 1387 | onboarding</a>, |
| 1388 | it is a process in which the new contributors acquire knowledge, skills, and |
| 1389 | behaviors so that they can contribute effectively. Kolokotronis proposed |
| 1390 | to see it also as socialization: integration into the project's relationships, |
| 1391 | culture, structure, and procedures.</p> |
| 1392 | |
| 1393 | <p>The gains from proper onboarding are many. The project can grow by |
| 1394 | attracting new blood with new perspectives and solutions. The community |
| 1395 | maintains its health and stays vibrant. Another important advantage of |
| 1396 | efficient onboarding is that replacing current contributors becomes easier |
| 1397 | when they change interests, jobs, or leave the project for whatever reason. |
| 1398 | Finally, successful onboarding adds new advocates to the project.</p> |
| 1399 | |
| 1400 | <h4>Achievements so far and future plans</h4> |
| 1401 | |
| 1402 | <p>The team started with ideas for a centralized onboarding process for the |
| 1403 | whole of KDE. They found out quickly that this would not work because KDE |
| 1404 | is "very decentralized", so it is hard to provide tools and |
| 1405 | procedures that are going to work for the whole project. According to |
| 1406 | Kolokotronis, other characteristics of KDE that impact onboarding are high |
| 1407 | diversity, remote and online teams, and hundreds of contributors in dozens of |
| 1408 | projects and teams. In addition, new contributors already know in which |
| 1409 | area they want to take part and they prefer specific information that will |
| 1410 | be directly useful for them.</p> |
| 1411 | |
| 1412 | <p>So the team changed its approach; several changes have since been proposed |
| 1413 | and implemented. The <a href="https://community.kde.org/Get_Involved">Get |
| 1414 | Involved</a> page, which is expected to be one of the resources new |
| 1415 | contributors read first, has been rewritten. For the <a |
| 1416 | href="https://community.kde.org/KDE/Junior_Jobs">Junior Jobs page</a>, the |
| 1417 | team is |
| 1418 | |
| 1419 | <a href="/Articles/763189/"><img |
| 1420 | src="https://static.lwn.net/images/conf/2018/akademy/NeofytosKolokotronis-sm.jpg" alt="[Neofytos |
| 1421 | Kolokotronis]" title="Neofytos Kolokotronis" class="rthumb"></a> |
| 1422 | |
| 1423 | |
| 1424 | <a |
| 1425 | href="https://phabricator.kde.org/T8686">discussing</a> what the |
| 1426 | generic content for KDE as a whole should be. The team simplified <a |
| 1427 | href="https://phabricator.kde.org/T7646">Phabricator registration</a>, |
| 1428 | which |
| 1429 | resulted in documenting the process better. Another part of the work |
| 1430 | includes the <a href="https://bugs.kde.org/">KDE Bugzilla</a>; it includes, |
| 1431 | for example initiatives to limit the number of |
| 1432 | states of a ticket or remove obsolete products.</p> |
| 1433 | |
| 1434 | <p>The <a href="https://www.plasma-mobile.org/index.html">Plasma Mobile</a> |
| 1435 | team is heavily involved in the onboarding goal. The Plasma Mobile |
| 1436 | developers have simplified their |
| 1437 | development environment setup and created an <a |
| 1438 | href="https://www.plasma-mobile.org/findyourway">interactive "Get |
| 1439 | Involved"</a> page. In addition, the Plasma team changed the way task |
| 1440 | descriptions are written; they now contain more detail, so that it is |
| 1441 | easier to get |
| 1442 | involved. The basic description should be short and clear, and it should include |
| 1443 | details of the problem and possible solutions. The developers try to |
| 1444 | share the list of skills necessary to fulfill the tasks and include clear |
| 1445 | links to the technical resources needed.</p> |
| 1446 | |
| 1447 | <p>Kolokotronis and team also identified a new potential source of |
| 1448 | contributors for KDE: distributions using |
| 1449 | KDE. They have the advantage of already knowing and using the software. |
| 1450 | |
| 1451 | The next idea the team is working on is to make sure that setting up a |
| 1452 | development environment is easy. The team plans to work on this during a |
| 1453 | dedicated sprint this autumn.</p> |
| 1454 | |
| 1455 | <h4>Searching for new contributors</h4> |
| 1456 | |
| 1457 | <p>Kolokotronis plans to search for new contributors at the periphery of the |
| 1458 | project, among the "skilled enthusiasts": loyal users who actually care |
| 1459 | about the project. They "can make wonders", he said. Those |
| 1460 | individuals may be also less confident or shy, have troubles making the |
| 1461 | first step, and need guidance. The project leaders should take that into |
| 1462 | account.</p> |
| 1463 | |
| 1464 | <p>In addition, newcomers are all different. Kolokotronis |
| 1465 | provided a long list of how contributors differ, |
| 1466 | including skills and knowledge, motives and |
| 1467 | interests, and time and dedication. His advice is to "try to find their |
| 1468 | superpower", the skills they have that are missing in the team. Those |
| 1469 | "superpowers" can then be used for the benefit of the project.</p> |
| 1470 | |
| 1471 | <p>If a project does nothing else, he said, it can start with its documentation. |
| 1472 | However, this does not only mean code documentation. Writing down the |
| 1473 | procedures or information about the internal work of the project, like who |
| 1474 | is working on what, is an important part of a project's documentation and helps |
| 1475 | newcomers. There should be also guidelines on how to start, especially |
| 1476 | setting up the development environment.</p> |
| 1477 | |
| 1478 | <p>The first thing the project leaders should do, according to |
| 1479 | Kolokotronis, is to spend time on introducing newcomers to the project. |
| 1480 | Ideally every new contributor should be assigned mentors — more |
| 1481 | experienced members who can help them when needed. The mentors and project |
| 1482 | leaders should find tasks that are interesting for each person. Answering |
| 1483 | an audience question on suggestions for shy new |
| 1484 | contributors, he recommended even more mentoring. It is also very helpful |
| 1485 | to make sure that newcomers have enough to read, but "avoid RTFM", he highlighted. It |
| 1486 | is also easy for a new contributor "to fly away", he said. The solution is |
| 1487 | to keep requesting things and be proactive.</p> |
| 1488 | |
| 1489 | <h4>What the project can do?</h4> |
| 1490 | |
| 1491 | <p>Kolokotronis suggested a number of actions for a project when it wants to |
| 1492 | improve its onboarding. The first step is preparation: the project |
| 1493 | leaders should know the team's and the project's needs. Long-term |
| 1494 | planning is important, too. It is not enough to wait for contributors to |
| 1495 | come — the project should be proactive, which means reaching out to |
| 1496 | candidates, suggesting appropriate tasks and, finally, making people |
| 1497 | available for the newcomers if they need help.</p> |
| 1498 | |
| 1499 | <p>This leads to next step: to be a mentor. Kolokotronis suggests being a |
| 1500 | "great host", but also trying to phase out the dependency on the mentor |
| 1501 | rapidly. "We have |
| 1502 | been all newcomers", he said. It can be intimidating to join an existing |
| 1503 | group. Onboarding creates a sense of belonging which, in turn, increases |
| 1504 | retention.</p> |
| 1505 | |
| 1506 | <p>The last step proposed was to be strategic. This includes thinking about |
| 1507 | the emotions you want newcomers to feel. Kolokotronis explained the |
| 1508 | strategic part with an example. The overall goal is (surprise!) improve |
| 1509 | onboarding |
| 1510 | of new contributors. An intermediate objective might be to keep the |
| 1511 | newcomers after they have made their first commit. If your strategy is to keep them |
| 1512 | confident and proud, you can use different tactics like praise and |
| 1513 | acknowledgment of the work in public. Another useful tactic may be assigning |
| 1514 | simple tasks, according to the skill of the contributor.</p> |
| 1515 | |
| 1516 | <p>To summarize, the most important thing, according to Kolokotronis, is to |
| 1517 | respond quickly and spend time with new contributors. This time should be |
| 1518 | used to explain procedures, and to introduce the people and culture. It is also |
| 1519 | essential to guide first contributions and praise contributor's skill and |
| 1520 | effort. |
| 1521 | Increase the difficulty of tasks over time to keep contributors motivated and |
| 1522 | challenged. And finally, he said, |
| 1523 | "turn them into mentors".</p> |
| 1524 | |
| 1525 | <p>Kolokotronis acknowledges that onboarding "takes time" and "everyone |
| 1526 | complains" about it. However, he is convinced that it is beneficial in the |
| 1527 | long term |
| 1528 | and that it decreases developer turnover.</p> |
| 1529 | |
| 1530 | <h4>Advice to newcomers</h4> |
| 1531 | |
| 1532 | <p>Kolokotronis concluded with some suggestions for newcomers to a |
| 1533 | project. They should try |
| 1534 | to be persistent and to not get discouraged when something goes wrong. |
| 1535 | Building connections from the very beginning is helpful. He suggests |
| 1536 | asking questions as if you were already a member "and things will be fine". |
| 1537 | However, accept criticism if it happens.</p> |
| 1538 | |
| 1539 | <p>One of the next actions of the onboarding team will be to collect |
| 1540 | feedback from newcomers and experienced contributors to see if they agree |
| 1541 | on the ideas and processes introduced so far.</p> |
| 1542 | <p><a href="/Articles/763175/#Comments">Comments (none posted)</a> |
| 1543 | <p> |
| 1544 | <a name="763492"></a><h2 class="SummaryHL"><a href="/Articles/763492/">Sharing and archiving data sets with Dat</a></h2> |
| 1545 | |
| 1546 | <div class="GAByline"> |
| 1547 | <p>August 27, 2018</p> |
| 1548 | <p>This article was contributed by Antoine Beaupré</p> |
| 1549 | </div> |
| 1550 | <p><a href="https://datproject.org">Dat</a> is a new peer-to-peer protocol |
| 1551 | that uses some of the concepts of |
| 1552 | <a href="https://www.bittorrent.com/">BitTorrent</a> and Git. Dat primarily |
| 1553 | targets researchers and |
| 1554 | open-data activists as it is a great tool for sharing, archiving, and |
| 1555 | cataloging large data sets. But it can also be used to implement |
| 1556 | decentralized web applications in a novel way.</p> |
| 1557 | |
| 1558 | <h4>Dat quick primer</h4> |
| 1559 | |
| 1560 | <p>Dat is written in JavaScript, so it can be installed with <code>npm</code>, but |
| 1561 | there are <a href="https://github.com/datproject/dat/releases">standalone |
| 1562 | binary builds</a> and |
| 1563 | a <a href="https://docs.datproject.org/install">desktop application</a> (as an AppImage). An <a href="https://datbase.org/">online viewer</a> can |
| 1564 | be used to inspect data for those who do not want to install |
| 1565 | arbitrary binaries on their computers.</p> |
| 1566 | |
| 1567 | <p>The command-line application allows basic operations like downloading |
| 1568 | existing data sets and sharing your own. |
| 1569 | Dat uses a 32-byte hex string that is an <a |
| 1570 | href="https://ed25519.cr.yp.to/">ed25519 public key</a>, which is |
| 1571 | is used to discover and find content on the net. |
| 1572 | For example, this will |
| 1573 | download some sample data:</p> |
| 1574 | |
| 1575 | <pre> |
| 1576 | $ dat clone \ |
| 1577 | dat://778f8d955175c92e4ced5e4f5563f69bfec0c86cc6f670352c457943666fe639 \ |
| 1578 | ~/Downloads/dat-demo |
| 1579 | </pre> |
| 1580 | |
| 1581 | <p>Similarly, the <code>share</code> command is used to share content. It indexes |
| 1582 | the files in a given directory and creates a new unique address like |
| 1583 | the one above. The <code>share</code> |
| 1584 | command starts a server that uses multiple discovery mechanisms (currently, the <a href="https://en.wikipedia.org/wiki/Mainline_DHT">Mainline Distributed |
| 1585 | Hash Table</a> (DHT), a <a href="https://github.com/mafintosh/dns-discovery">custom DNS server</a>, and |
| 1586 | multicast DNS) to announce the content to its peers. This is how |
| 1587 | another user, armed with that public key, can download that content |
| 1588 | with <code>dat clone</code> or mirror the files continuously with |
| 1589 | <code>dat sync</code>.</p> |
| 1590 | |
| 1591 | <p>So far, this looks a lot like BitTorrent <a href="https://en.wikipedia.org/wiki/Magnet_URI_scheme">magnet links</a> updated |
| 1592 | with 21st century cryptography. But Dat adds revisions on top of that, |
| 1593 | so modifications are automatically shared through the swarm. That is |
| 1594 | important for public data sets as those |
| 1595 | are often dynamic in nature. Revisions also make it possible to use |
| 1596 | <a href="https://blog.datproject.org/2017/10/13/using-dat-for-automatic-file-backups/">Dat as a backup system</a> by saving the data incrementally using an |
| 1597 | <a href="https://github.com/mafintosh/hypercore-archiver">archiver</a>.</p> |
| 1598 | |
| 1599 | <p>While Dat is designed to work on larger data sets, processing them |
| 1600 | for sharing may take a while. For example, sharing the Linux |
| 1601 | kernel source code required about five minutes as Dat worked on |
| 1602 | indexing all of the files. This is comparable to the performance offered by |
| 1603 | <a href="https://ipfs.io/">IPFS</a> and BitTorrent. Data sets with |
| 1604 | more or larger files may take quite a bit more time. |
| 1605 | |
| 1606 | <p> |
| 1607 | One advantage that Dat has over IPFS is that it |
| 1608 | doesn't duplicate the data. When IPFS imports new data, it duplicates |
| 1609 | the files into <code>~/.ipfs</code>. For collections of small files like the |
| 1610 | kernel, this is not a huge problem, but for larger files like videos or |
| 1611 | music, it's a significant limitation. IPFS eventually implemented a |
| 1612 | solution to this <a href="https://github.com/ipfs/go-ipfs/issues/875">problem</a> in the form of the experimental |
| 1613 | <a href="https://github.com/ipfs/go-ipfs/blob/master/docs/experimental-features.md#ipfs-filestore">filestore feature</a>, but it's not enabled by default. Even with |
| 1614 | that feature enabled, though, changes to data sets are not automatically |
| 1615 | tracked. In comparison, Dat operation on dynamic data feels much |
| 1616 | lighter. The downside is that each set needs its own <code>dat share</code> |
| 1617 | process.</p> |
| 1618 | |
| 1619 | <p>Like any peer-to-peer system, Dat needs at least one peer to stay online to |
| 1620 | offer the content, which is impractical for mobile devices. Hosting |
| 1621 | providers like <a href="https://hashbase.io/">Hashbase</a> (which is a <a href="https://github.com/datprotocol/DEPs/blob/master/proposals/0003-http-pinning-service-api.md">pinning service</a> in Dat |
| 1622 | jargon) can help users keep content online without running their own |
| 1623 | <a href="https://docs.datproject.org/server">server</a>. The closest parallel in the traditional web ecosystem |
| 1624 | would probably be content distribution networks (CDN) although pinning |
| 1625 | services are not necessarily geographically distributed and a CDN does |
| 1626 | not necessarily retain a complete copy of a website.</p> |
| 1627 | |
| 1628 | <a href="/Articles/763544/"> |
| 1629 | <img src="https://static.lwn.net/images/2018/dat-photoapp-sm.png" border=0 hspace=5 align="right" |
| 1630 | width=300 height=392 alt="[Photo app]" title="Photo app"> |
| 1631 | </a> |
| 1632 | |
| 1633 | <p>A web browser called <a href="https://beakerbrowser.com/">Beaker</a>, based on the <a href="https://electronjs.org/">Electron</a> framework, |
| 1634 | can access Dat content natively without going through a pinning |
| 1635 | service. Furthermore, Beaker is essential to get any of the <a |
| 1636 | href="https://github.com/beakerbrowser/explore">Dat |
| 1637 | applications</a> working, as they fundamentally rely on <code>dat://</code> URLs |
| 1638 | to do their magic. This means that Dat applications won't work for |
| 1639 | most users unless they install that special web browser. There is a |
| 1640 | <a href="https://addons.mozilla.org/en-US/firefox/addon/dat-p2p-protocol/">Firefox extension</a> called "<a href="https://github.com/sammacbeth/dat-fox">dat-fox</a>" for people who don't want |
| 1641 | to install yet another browser, but it requires installing a |
| 1642 | <a href="https://github.com/sammacbeth/dat-fox-helper">helper program</a>. The extension will be able to load <code>dat://</code> URLs |
| 1643 | but many applications will still not work. For example, the <a |
| 1644 | href="https://github.com/beakerbrowser/dat-photos-app">photo gallery |
| 1645 | application</a> completely fails with dat-fox.</p> |
| 1646 | |
| 1647 | <p>Dat-based applications look promising from a privacy point of view. |
| 1648 | Because of its peer-to-peer nature, users regain control over where |
| 1649 | their data is stored: either on their own computer, an online server, or |
| 1650 | by a trusted third party. But considering the protocol is not well |
| 1651 | established in current web browsers, I foresee difficulties in |
| 1652 | adoption of that aspect of the Dat ecosystem. Beyond that, it is rather |
| 1653 | disappointing that Dat applications cannot run natively in a web |
| 1654 | browser given that JavaScript is designed exactly for that.</p> |
| 1655 | |
| 1656 | <h4>Dat privacy</h4> |
| 1657 | |
| 1658 | <p>An advantage Dat has over other peer-to-peer protocols like BitTorrent |
| 1659 | is end-to-end encryption. I was originally concerned by the encryption |
| 1660 | design when reading the <a |
| 1661 | href="https://github.com/datproject/docs/raw/master/papers/dat-paper.pdf">academic |
| 1662 | paper [PDF]</a>:</p> |
| 1663 | |
| 1664 | <div class="BigQuote"> |
| 1665 | <p>It is up to client programs to make design decisions around which |
| 1666 | discovery networks they trust. For example if a Dat client decides |
| 1667 | to use the BitTorrent DHT to discover peers, and |
| 1668 | they are searching |
| 1669 | for a publicly shared Dat key (e.g. a key cited publicly in a |
| 1670 | published scientific paper) with known contents, then because of the |
| 1671 | privacy design of the BitTorrent DHT it becomes public knowledge |
| 1672 | what key that client is searching for.</p> |
| 1673 | </div> |
| 1674 | |
| 1675 | <p>So in other words, to share a secret file with another user, the |
| 1676 | public key is transmitted over a secure side-channel, only to then |
| 1677 | leak during the discovery process. Fortunately, the public Dat key |
| 1678 | is not directly used during discovery as it is <a |
| 1679 | href="https://github.com/datprotocol/DEPs/blob/653e0cf40233b5d474cddc04235577d9d55b2934/proposals/0000-peer-discovery.md#discovery-keys">hashed |
| 1680 | with BLAKE2B</a>. Still, the security model of Dat assumes the public |
| 1681 | key is private, which is a rather counterintuitive concept that might upset |
| 1682 | cryptographers and confuse users who are frequently encouraged to type |
| 1683 | such strings in address bars and search engines as part of the Dat |
| 1684 | experience. There is a <a |
| 1685 | href="https://docs.datproject.org/security">security & privacy FAQ</a> |
| 1686 | in the Dat |
| 1687 | documentation warning about this problem:</p> |
| 1688 | |
| 1689 | <div class="BigQuote"> |
| 1690 | <p>One of the key elements of Dat privacy is that the public key is |
| 1691 | never used in any discovery network. The public key is hashed, |
| 1692 | creating the discovery key. Whenever peers attempt to connect to |
| 1693 | each other, they use the discovery key.</p> |
| 1694 | |
| 1695 | <p>Data is encrypted using the public key, so it is important that this |
| 1696 | key stays secure.</p> |
| 1697 | </div> |
| 1698 | |
| 1699 | <p>There are other privacy issues outlined in the |
| 1700 | document; it states that "<span>Dat faces similar privacy risks as |
| 1701 | BitTorrent</span>":</p> |
| 1702 | |
| 1703 | <div class="BigQuote"> |
| 1704 | <p>When you download a dataset, your IP address is exposed to the users |
| 1705 | sharing that dataset. This may lead to honeypot servers collecting |
| 1706 | IP addresses, as we've seen in Bittorrent. However, with dataset |
| 1707 | sharing we can create a web of trust model where specific |
| 1708 | institutions are trusted as primary sources for datasets, |
| 1709 | diminishing the sharing of IP addresses.</p> |
| 1710 | </div> |
| 1711 | |
| 1712 | <p>A Dat blog post refers to this issue as <a href="https://blog.datproject.org/2016/12/12/reader-privacy-on-the-p2p-web/">reader privacy</a> and it |
| 1713 | is, indeed, a sensitive issue in peer-to-peer networks. It is how |
| 1714 | BitTorrent users are discovered and served scary verbiage from lawyers, |
| 1715 | after all. But Dat makes this a little better because, to join a swarm, |
| 1716 | you must know what you are looking for already, which means peers who |
| 1717 | can look at swarm activity only include users who know the secret |
| 1718 | public key. This works well for secret content, but for larger, public |
| 1719 | data sets, it is a real problem; it is why the Dat project has <a |
| 1720 | href="https://blog.datproject.org/2017/12/10/dont-ship/">avoided |
| 1721 | creating a Wikipedia mirror</a> so far.</p> |
| 1722 | |
| 1723 | <p>I found another privacy issue that is not documented in the security FAQ |
| 1724 | during my review of the protocol. As mentioned earlier, |
| 1725 | the <a href="https://github.com/datprotocol/DEPs/pull/7">Dat discovery |
| 1726 | protocol</a> routinely |
| 1727 | phones home to DNS servers operated by the Dat project. |
| 1728 | This implies that the default discovery servers (and an |
| 1729 | attacker watching over their traffic) know who is publishing or seeking |
| 1730 | content, in essence discovering the "social network" behind Dat. This |
| 1731 | discovery mechanism can be disabled in clients, but a similar privacy |
| 1732 | issue applies to the DHT as well, although that is distributed so it |
| 1733 | doesn't require trust of the Dat project itself.</p> |
| 1734 | |
| 1735 | <p>Considering those aspects of the protocol, privacy-conscious users |
| 1736 | will probably want to use Tor or other anonymization techniques to |
| 1737 | work around those concerns.</p> |
| 1738 | |
| 1739 | <h4>The future of Dat</h4> |
| 1740 | |
| 1741 | <p><a href="https://blog.datproject.org/2017/06/01/dat-sleep-release/">Dat 2.0 was released in June 2017</a> with performance improvements and |
| 1742 | protocol changes. <a href="https://github.com/datprotocol/DEPs">Dat |
| 1743 | Enhancement Proposals</a> (DEPs) guide the project's |
| 1744 | future development; most work is currently geared toward |
| 1745 | implementing the draft "<a href="https://github.com/datprotocol/DEPs/blob/master/proposals/0008-multiwriter.md">multi-writer proposal</a>" in |
| 1746 | <a href="https://github.com/mafintosh/hyperdb">HyperDB</a>. Without |
| 1747 | multi-writer support, only the |
| 1748 | original publisher of a Dat can modify it. According to Joe Hand, |
| 1749 | co-executive-director of <a href="https://codeforscience.org/">Code for Science & Society</a> (CSS) and |
| 1750 | Dat core developer, in an IRC chat, "supporting multiwriter is a big requirement for lots |
| 1751 | of folks". For example, while Dat might allow Alice to share her |
| 1752 | research results with Bob, he cannot modify or contribute back to those |
| 1753 | results. The multi-writer extension allows for Alice to assign trust |
| 1754 | to Bob so he can have write access to the data. |
| 1755 | |
| 1756 | <p> |
| 1757 | Unfortunately, the |
| 1758 | current proposal doesn't solve the "<span>hard problems</span>" of |
| 1759 | "<span>conflict merges |
| 1760 | and secure key distribution</span>". The former will be worked out through |
| 1761 | user interface tweaks, but the latter is a classic problem that security |
| 1762 | projects have typically trouble finding |
| 1763 | solutions for—Dat is no exception. How will Alice securely trust |
| 1764 | Bob? The OpenPGP web of trust? Hexadecimal fingerprints read over the |
| 1765 | phone? Dat doesn't provide a magic solution to this problem.</p> |
| 1766 | |
| 1767 | <p>Another thing limiting adoption is that Dat is not packaged in any |
| 1768 | distribution that I could find (although I <a href="https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=890565">requested it in |
| 1769 | Debian</a>) and, considering the speed of change of the JavaScript |
| 1770 | ecosystem, this is unlikely to change any time soon. A <a |
| 1771 | href="https://github.com/datrs">Rust |
| 1772 | implementation</a> of the Dat protocol has started, however, which |
| 1773 | might be easier to package than the multitude of <a href="https://nodejs.org/en/">Node.js</a> |
| 1774 | modules. In terms of mobile device support, there is an experimental |
| 1775 | Android web browser with Dat support called <a href="https://bunsenbrowser.github.io/#!index.md">Bunsen</a>, which somehow |
| 1776 | doesn't run on my phone. Some adventurous users have successfully run Dat |
| 1777 | in <a href="https://termux.com/">Termux</a>. I haven't found an app |
| 1778 | running on iOS at this |
| 1779 | point.</p> |
| 1780 | |
| 1781 | <p>Even beyond platform support, distributed protocols like Dat have a |
| 1782 | tough slope to climb against the virtual monopoly of more centralized |
| 1783 | protocols, so it remains to be seen how popular those tools will |
| 1784 | be. Hand says Dat is supported by multiple non-profit |
| 1785 | organizations. Beyond CSS, <a href="https://bluelinklabs.com/">Blue Link Labs</a> is working on the |
| 1786 | Beaker Browser as a self-funded startup and a grass-roots |
| 1787 | organization, <a href="https://www.digital-democracy.org/">Digital Democracy</a>, has contributed to the |
| 1788 | project. The <a href="https://archive.org">Internet Archive</a> has <a href="https://blog.archive.org/2018/06/05/internet-archive-code-for-science-and-society-and-california-digital-library-to-partner-on-a-data-sharing-and-preservation-pilot-project/">announced a collaboration</a> |
| 1789 | between itself, CSS, and the California Digital Library to launch a pilot |
| 1790 | project to see "<span>how members of a cooperative, decentralized |
| 1791 | network can leverage shared services to ensure data preservation while |
| 1792 | reducing storage costs and increasing replication counts</span>". |
| 1793 | |
| 1794 | <p> |
| 1795 | Hand said |
| 1796 | adoption in academia has been "slow but steady" and that the <a href="https://github.com/codeforscience/Dat-in-the-Lab">Dat in |
| 1797 | the Lab project</a> has helped identify areas that could help |
| 1798 | researchers adopt the project. Unfortunately, as is the case with many |
| 1799 | free-software projects, he said that "our team is definitely a bit |
| 1800 | limited on bandwidth to push for bigger adoption". Hand |
| 1801 | said that the project received a grant from <a |
| 1802 | href="https://www.mozilla.org/en-US/moss/">Mozilla Open Source |
| 1803 | Support</a> to improve its documentation, which will be a big help.</p> |
| 1804 | |
| 1805 | <p>Ultimately, Dat suffers from a problem common to all peer-to-peer |
| 1806 | applications, which is naming. Dat addresses are not exactly |
| 1807 | intuitive: humans do not remember strings of 64 hexadecimal characters |
| 1808 | well. For this, Dat took a <a href="https://github.com/datprotocol/DEPs/blob/master/proposals/0005-dns.md">similar approach</a> to IPFS by using |
| 1809 | DNS <tt>TXT</tt> records and <code>/.well-known</code> URL paths to bridge existing, |
| 1810 | human-readable names with Dat hashes. So this sacrifices a part of the |
| 1811 | decentralized nature of the project in favor of usability.</p> |
| 1812 | |
| 1813 | <p>I have tested a lot of distributed protocols like Dat in the past and |
| 1814 | I am not sure Dat is a clear winner. It certainly has advantages over |
| 1815 | IPFS in terms of usability and resource usage, but the lack of |
| 1816 | packages on most platforms is a big limit to adoption for most |
| 1817 | people. This means it will be difficult to share content with my |
| 1818 | friends and family with Dat anytime soon, which would probably be my |
| 1819 | primary use case for the project. Until the protocol reaches the wider |
| 1820 | adoption that BitTorrent has seen in terms of platform support, I will |
| 1821 | probably wait before switching everything over to this |
| 1822 | promising project.</p> |
| 1823 | <p><a href="/Articles/763492/#Comments">Comments (11 posted)</a> |
| 1824 | <p> |
| 1825 | <p> |
| 1826 | <b>Page editor</b>: Jonathan Corbet<br> |
| 1827 | <h2>Inside this week's LWN.net Weekly Edition</h2> |
| 1828 | <ul> |
| 1829 | <li> <a href="/Articles/763254/">Briefs</a>: OpenSSH 7.8; 4.19-rc1; Which stable?; Netdev 0x12; Bison 3.1; Quotes; ... |
| 1830 | <li> <a href="/Articles/763255/">Announcements</a>: Newsletters; events; security updates; kernel patches; ... |
| 1831 | </ul> |
| 1832 | <b>Next page</b>: |
| 1833 | <a href="/Articles/763254/">Brief items>></a><br> |
| 1834 | |
| 1835 | </div> <!-- ArticleText --> |
| 1836 | </div> |
| 1837 | <div class="lwn-u-1 pure-u-md-1-6 not-print"> |
| 1838 | <div id="azk93271_right_zone"></div> |
| 1839 | </div> |
| 1840 | </div> <!-- pure-grid --> |
| 1841 | |
| 1842 | <br clear="all"> |
| 1843 | <center> |
| 1844 | <P> |
| 1845 | <font size="-2"> |
| 1846 | Copyright © 2018, Eklektix, Inc.<BR> |
| 1847 | |
| 1848 | Comments and public postings are copyrighted by their creators.<br> |
| 1849 | Linux is a registered trademark of Linus Torvalds<br> |
| 1850 | </font> |
| 1851 | </center> |
| 1852 | |
| 1853 | <script type="text/javascript"> |
| 1854 | var gaJsHost = (("https:" == document.location.protocol) ? "https://ssl." : "http://www."); |
| 1855 | document.write(unescape("%3Cscript src='" + gaJsHost + "google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E")); |
| 1856 | </script> |
| 1857 | <script type="text/javascript"> |
| 1858 | try { |
| 1859 | var pageTracker = _gat._getTracker("UA-2039382-1"); |
| 1860 | pageTracker._trackPageview(); |
| 1861 | } catch(err) {}</script> |
| 1862 | |
| 1863 | </body></html> |
| 1864 | |