Commit | Line | Data |
---|---|---|
1aaa6ba3 NR |
1 | <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" |
2 | "http://www.w3.org/TR/html4/loose.dtd"> | |
3 | <html> | |
4 | <head><title>LWN.net Weekly Edition for August 30, 2018 [LWN.net]</title> | |
5 | <link rel="next" href="/Articles/763254/"/> | |
6 | <meta name="twitter:card" content="summary" /> | |
7 | <meta name="twitter:site" content="@lwnnet" /> | |
8 | <meta name="twitter:title" content="LWN.net Weekly Edition for August 30, 2018" /> | |
9 | <meta name="twitter:description" content="Julia; C considered dangerous; 4.19 Merge window; I/O controller throughput; KDE onboarding; Dat." /> | |
10 | <meta name="viewport" content="width=device-width, initial-scale=1"> | |
11 | <meta HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=utf-8"> | |
12 | <link rel="icon" href="/images/favicon.png" type="image/png"> | |
13 | <link rel="alternate" type="application/rss+xml" title="LWN.net headlines" href="https://lwn.net/headlines/newrss"> | |
14 | <link rel="stylesheet" href="/CSS/lwn"> | |
15 | <link rel="stylesheet" href="/CSS/nosub"> | |
16 | <link rel="stylesheet" href="/CSS/pure-min"> | |
17 | <!--[if lte IE 8]> | |
18 | <link rel="stylesheet" href="/CSS/grids-responsive-old-ie-min"> | |
19 | <![endif]--> | |
20 | <!--[if gt IE 8]><!--> | |
21 | <link rel="stylesheet" href="/CSS/grids-responsive-min"> | |
22 | <!--<![endif]--> | |
23 | <link rel="stylesheet" href="/CSS/pure-lwn"> | |
24 | ||
25 | ||
26 | <script type="text/javascript">var p="http",d="static";if(document.location.protocol=="https:"){p+="s";d="engine";}var z=document.createElement("script");z.type="text/javascript";z.async=true;z.src=p+"://"+d+".adzerk.net/ados.js";var s=document.getElementsByTagName("script")[0];s.parentNode.insertBefore(z,s);</script> | |
27 | <script type="text/javascript"> | |
28 | var ados_keywords = ados_keywords || []; | |
29 | if( location.protocol=='https:' ) { | |
30 | ados_keywords.push('T:SSL'); | |
31 | } else { | |
32 | ados_keywords.push('T:HTTP'); | |
33 | } | |
34 | ||
35 | var ados = ados || {}; | |
36 | ados.run = ados.run || []; | |
37 | ados.run.push(function() { | |
38 | ||
39 | ados_add_placement(4669, 20979, "azk13321_leaderboard", 4).setZone(16026); | |
40 | ||
41 | ados_add_placement(4669, 20979, "azk93271_right_zone", [5,10,6]).setZone(16027); | |
42 | ||
43 | ados_add_placement(4669, 20979, "azk31017_tracking", 20).setZone(20995); | |
44 | ||
45 | ||
46 | ||
47 | ados_setKeywords(ados_keywords.join(', ')); | |
48 | ados_load(); | |
49 | });</script> | |
50 | ||
51 | </head> | |
52 | <body bgcolor="#ffffff" link="Blue" VLINK="Green" alink="Green"> | |
53 | <a name="t"></a> | |
54 | <div id="menu"><a href="/"><img src="https://static.lwn.net/images/logo/barepenguin-70.png" class="logo" | |
55 | border="0" alt="LWN.net Logo"> | |
56 | <font class="logo">LWN<br>.net</font> | |
57 | <font class="logobl">News from the source</font></a> | |
58 | <a href="/"><img src="https://static.lwn.net/images/lcorner-ss.png" class="sslogo" | |
59 | border="0" alt="LWN"></a><div class="navmenu-container"> | |
60 | <ul class="navmenu"> | |
61 | <li><a class="navmenu" href="#t"><b>Content</b></a><ul><li><a href="/current/">Weekly Edition</a></li><li><a href="/Archives/">Archives</a></li><li><a href="/Search/">Search</a></li><li><a href="/Kernel/">Kernel</a></li><li><a href="/Security/">Security</a></li><li><a href="/Distributions/">Distributions</a></li><li><a href="/Calendar/">Events calendar</a></li><li><a href="/Comments/unread">Unread comments</a></li><li><hr></li><li><a href="/op/FAQ.lwn">LWN FAQ</a></li><li><a href="/op/AuthorGuide.lwn">Write for us</a></li></ul></li> | |
62 | <li><a class="navmenu" href="#t"><b>Edition</b></a><ul><li><a href="/Articles/763252/">⇒Front page</a></li><li><a href="/Articles/763254/">Brief items</a></li><li><a href="/Articles/763255/">Announcements</a></li><li><a href="/Articles/763252/bigpage">One big page</a></li><li><a href="/Articles/762816/">Previous week</a></li><li><a href="/Articles/763789/">Following week</a></li></ul></li> | |
63 | </ul></div> | |
64 | </div> <!-- menu --> | |
65 | <div class="pure-g not-handset" style="margin-left: 10.5em"> | |
66 | <div class="not-print"> | |
67 | <div id="azk13321_leaderboard"></div> | |
68 | </div> | |
69 | </div> | |
70 | <div class="topnav-container"> | |
71 | <div class="not-handset"><form action="https://lwn.net/Login/" method="post" name="loginform" | |
72 | class="loginform"> | |
73 | <b>User:</b> <input type="text" name="Username" value="" size="8" /> <b>Password:</b> <input type="password" name="Password" size="8" /> <input type="hidden" name="target" value="/Articles/763252/" /> <input type="submit" name="submit" value="Log in" /></form> | | |
74 | <form action="https://lwn.net/subscribe/" method="post" class="loginform"> | |
75 | <input type="submit" name="submit" value="Subscribe" /> | |
76 | </form> | | |
77 | <form action="https://lwn.net/Login/newaccount" method="post" class="loginform"> | |
78 | <input type="submit" name="submit" value="Register" /> | |
79 | </form> | |
80 | </div> | |
81 | <div class="handset-only"> | |
82 | <a href="/subscribe/"><b>Subscribe</b></a> / | |
83 | <a href="/Login/"><b>Log in</b></a> / | |
84 | <a href="/Login/newaccount"><b>New account</b></a> | |
85 | </div> | |
86 | </div><div class="pure-grid maincolumn"> | |
87 | <div class="lwn-u-1 pure-u-md-19-24"> | |
88 | <div class="PageHeadline"> | |
89 | <h1>LWN.net Weekly Edition for August 30, 2018</h1> | |
90 | </div> | |
91 | <div class="ArticleText"> | |
92 | <a name="763743"></a><h2 class="SummaryHL"><a href="/Articles/763743/">Welcome to the LWN.net Weekly Edition for August 30, 2018</a></h2> | |
93 | ||
94 | This edition contains the following feature content: | |
95 | <p> | |
96 | <ul class="spacylist"> | |
97 | ||
98 | <li> <a href="/Articles/763626/">An introduction to the Julia language, | |
99 | part 1</a>: Julia is a language designed for intensive numerical | |
100 | calculations; this article gives an overview of its core features. | |
101 | ||
102 | <li> <a href="/Articles/763641/">C considered dangerous</a>: a Linux | |
103 | Security Summit talk on what is being done to make the use of C in the | |
104 | kernel safer. | |
105 | ||
106 | <li> <a href="/Articles/763106/">The second half of the 4.19 merge | |
107 | window</a>: the final features merged (or not merged) before the merge | |
108 | window closed for this cycle. | |
109 | ||
110 | ||
111 | <li> <a href="/Articles/763603/">Measuring (and fixing) I/O-controller | |
112 | throughput loss</a>: the kernel's I/O controllers can provide useful | |
113 | bandwidth guarantees, but at a significant cost in throughput. | |
114 | ||
115 | <li> <a href="/Articles/763175/">KDE's onboarding initiative, one year | |
116 | later</a>: what has gone right in KDE's effort to make it easier for | |
117 | contributors to join the project, and what remains to be done. | |
118 | ||
119 | <li> <a href="/Articles/763492/">Sharing and archiving data sets with | |
120 | Dat</a>: an innovative approach to addressing and sharing data on the | |
121 | net. | |
122 | ||
123 | </ul> | |
124 | ||
125 | <p> | |
126 | This week's edition also includes these inner pages: | |
127 | <p> | |
128 | <ul class="spacylist"> | |
129 | ||
130 | <li> <a href="/Articles/763254/">Brief items</a>: Brief news items from throughout the community. | |
131 | ||
132 | <li> <a href="/Articles/763255/">Announcements</a>: Newsletters, conferences, security updates, patches, and more. | |
133 | ||
134 | </ul> | |
135 | <p> | |
136 | Please enjoy this week's edition, and, as always, thank you for | |
137 | supporting LWN.net. | |
138 | <p><a href="/Articles/763743/#Comments">Comments (none posted)</a> | |
139 | <p> | |
140 | <a name="763626"></a><h2 class="SummaryHL"><a href="/Articles/763626/">An introduction to the Julia language, part 1</a></h2> | |
141 | ||
142 | <div class="GAByline"> | |
143 | <p>August 28, 2018</p> | |
144 | <p>This article was contributed by Lee Phillips</p> | |
145 | </div> | |
146 | <p><a href="http://julialang.org/">Julia</a> is a young computer language | |
147 | aimed at serving the needs of scientists, engineers, and other | |
148 | practitioners of numerically intensive programming. It was first publicly | |
149 | released in 2012. After an intense period of language development, version | |
150 | 1.0 was <a | |
151 | href="https://julialang.org/blog/2018/08/one-point-zero">released</a> on | |
152 | August 8. The 1.0 release promises years of language | |
153 | stability; users can be confident that developments in the 1.x series will | |
154 | not break their code. | |
155 | This is the first part of a two-part article introducing the world of Julia. | |
156 | This part will introduce enough of the language syntax and constructs to | |
157 | allow you to begin to write simple programs. The following installment will | |
158 | acquaint you with the additional pieces needed to create real projects, and to | |
159 | make use of Julia's ecosystem. | |
160 | ||
161 | <h4>Goals and history</h4> | |
162 | ||
163 | <p>The Julia project has ambitious goals. It wants the language to perform | |
164 | about as well as Fortran or C when running numerical algorithms, while | |
165 | remaining as pleasant to program in as Python. I believe the project has | |
166 | met these goals and is poised to see increasing adoption by numerical | |
167 | researchers, especially now that an official, stable release is | |
168 | available.</p> | |
169 | ||
170 | <p>The Julia project maintains a <a | |
171 | href="https://julialang.org/benchmarks/">micro-benchmark page</a> that compares its | |
172 | numerical performance against both statically compiled languages (C, | |
173 | Fortran) and dynamically typed languages (R, Python). While it's certainly | |
174 | possible to argue about the relevance and fairness of particular | |
175 | benchmarks, the data overall supports the Julia team's contention that Julia | |
176 | has generally achieved parity with Fortran and C; the benchmark | |
177 | source code is available.</p> | |
178 | ||
179 | <p>Julia began as research in computer science at MIT; its creators are | |
180 | Alan Edelman, Stefan Karpinski, Jeff Bezanson, and Viral Shah. These four | |
181 | remain active developers of the language. They, along with Keno Fischer, | |
182 | co-founder and CTO of <a href="https://juliacomputing.com/">Julia | |
183 | Computing</a>, were kind enough to share their thoughts with us about | |
184 | the language. I'll be drawing | |
185 | on their comments later on; for now, let's get a taste of | |
186 | what Julia code looks like.</p> | |
187 | ||
188 | <h4>Getting started</h4> | |
189 | ||
190 | <p>To explore Julia initially, start up its standard <a | |
191 | href="https://en.wikipedia.org/wiki/Read%E2%80%93eval%E2%80%93print_loop">read-eval-print | |
192 | loop</a> (REPL) | |
193 | by typing <code>julia</code> at the terminal, assuming that you have installed | |
194 | it. You will then be | |
195 | able to interact with what will seem to be an interpreted language — but, | |
196 | behind the scenes, those commands are being compiled by a | |
197 | just-in-time (JIT) compiler that uses the <a href="http://llvm.org/">LLVM | |
198 | compiler framework</a>. This allows Julia to be interactive, while turning | |
199 | the code into fast, native machine instructions. However, the JIT compiler | |
200 | passes sometimes introduce noticeable delays at the REPL, especially when | |
201 | using a function for the first time.</p> | |
202 | ||
203 | <p>To run a Julia program non-interactively, execute a command like: | |
204 | <pre> | |
205 | $ julia script.jl <args> | |
206 | </pre> | |
207 | ||
208 | <p>Julia has all the usual data structures: numbers of various types | |
209 | (including complex and rational numbers), multidimensional arrays, | |
210 | dictionaries, strings, and characters. Functions are first-class: they can | |
211 | be passed as arguments to other functions, can be members of arrays, | |
212 | and so on.</p> | |
213 | ||
214 | <p>Julia embraces Unicode. Strings, which are enclosed in double quotes, | |
215 | are arrays of Unicode characters, which are enclosed in single quotes. The | |
216 | "<tt>*</tt>" operator is used for string and character concatenation. Thus | |
217 | 'a' and 'β' are characters, and 'aβ' is a syntax error. "a" and | |
218 | "β" are strings, as are "aβ", 'a' * 'β', and | |
219 | "a" * "β" — all evaluate to the same string. | |
220 | ||
221 | <p>Variable and function names can contain non-ASCII characters. This, along | |
222 | with Julia's clever syntax that understands numbers prepended to variables | |
223 | to mean multiplication, goes a long way to allowing the numerical scientist | |
224 | to write code that more closely resembles the compact mathematical notation | |
225 | of the equations that usually lie behind it.</p> | |
226 | ||
227 | <pre> | |
228 | julia> ε₁ = 0.01 | |
229 | 0.01 | |
230 | ||
231 | julia> ε₂ = 0.02 | |
232 | 0.02 | |
233 | ||
234 | julia> 2ε₁ + 3ε₂ | |
235 | 0.08 | |
236 | </pre> | |
237 | ||
238 | <p>And where does Julia come down on the age-old debate of what do about | |
239 | <tt>1/2</tt>? In Fortran and Python 2, this will get you 0, since 1 and 2 are | |
240 | integers, and the result is rounded down to the integer 0. This was deemed | |
241 | inconsistent, and confusing to some, so it was changed in Python 3 to | |
242 | return 0.5 — which is what you | |
243 | get in Julia, too.</p> | |
244 | ||
245 | <p>While we're on the subject of fractions, Julia can handle rational | |
246 | numbers, with a special syntax: <tt>3//5 + 2//3</tt> returns | |
247 | <tt>19//15</tt>, while <tt>3/5 + 2/3</tt> | |
248 | gets you the floating-point answer 1.2666666666666666. Internally, Julia | |
249 | thinks of a rational number in its reduced form, so the expression | |
250 | <tt>6//8 == 3//4</tt> returns <code>true</code>, and <code>numerator(6//8)</code> returns | |
251 | <code>3</code>.</p> | |
252 | ||
253 | <h4>Arrays</h4> | |
254 | ||
255 | <p>Arrays are enclosed in square brackets and indexed with an iterator that | |
256 | can contain a step value:</p> | |
257 | ||
258 | <pre> | |
259 | julia> a = [1, 2, 3, 4, 5, 6] | |
260 | 6-element Array{Int64,1}: | |
261 | 1 | |
262 | 2 | |
263 | 3 | |
264 | 4 | |
265 | 5 | |
266 | 6 | |
267 | ||
268 | julia> a[1:2:end] | |
269 | 3-element Array{Int64,1}: | |
270 | 1 | |
271 | 3 | |
272 | 5 | |
273 | </pre> | |
274 | ||
275 | <p>As you can see, indexing starts at one, and the useful <code>end</code> | |
276 | index means the obvious thing. When you define a variable in the REPL, | |
277 | Julia replies with the type and value of the assigned data; you can suppress this output by ending your input line with a semicolon.</p> | |
278 | ||
279 | <p>Since arrays are such a vital part of numerical computation, and Julia | |
280 | makes them easy to work with, we'll spend a bit more time with them than the other data structures.</p> | |
281 | ||
282 | <p>To illustrate the syntax, we can start with a couple of 2D arrays, defined at the REPL:</p> | |
283 | ||
284 | <pre> | |
285 | julia> a = [1 2 3; 4 5 6] | |
286 | 2×3 Array{Int64,2}: | |
287 | 1 2 3 | |
288 | 4 5 6 | |
289 | ||
290 | julia> z = [-1 -2 -3; -4 -5 -6]; | |
291 | </pre> | |
292 | ||
293 | <p>Indexing is as expected:</p> | |
294 | ||
295 | <pre> | |
296 | julia> a[1, 2] | |
297 | 2 | |
298 | </pre> | |
299 | ||
300 | <p>You can glue arrays together horizontally:</p> | |
301 | ||
302 | <pre> | |
303 | julia> [a z] | |
304 | 2×6 Array{Int64,2}: | |
305 | 1 2 3 -1 -2 -3 | |
306 | 4 5 6 -4 -5 -6 | |
307 | </pre> | |
308 | ||
309 | <p>And vertically:</p> | |
310 | ||
311 | <pre> | |
312 | julia> [a; z] | |
313 | 4×3 Array{Int64,2}: | |
314 | 1 2 3 | |
315 | 4 5 6 | |
316 | -1 -2 -3 | |
317 | -4 -5 -6 | |
318 | </pre> | |
319 | ||
320 | <p>Julia has all the usual operators for handling arrays, and <a | |
321 | href="http://www.3blue1brown.com/essence-of-linear-algebra-page/">linear | |
322 | algebra</a> functions that work with matrices (2D arrays). The linear | |
323 | algebra functions are part of Julia's standard library, but need to be | |
324 | imported with a command like "<code>using LinearAlgebra</code>", which is a detail | |
325 | omitted from the current documentation. The functions include such things as | |
326 | determinants, matrix inverses, eigenvalues and eigenvectors, many kinds of | |
327 | matrix factorizations, etc. Julia has not reinvented the wheel here, but | |
328 | wisely uses the <a href="http://www.netlib.org/lapack/">LAPACK</a> Fortran | |
329 | library of battle-tested linear algebra routines.</p> | |
330 | ||
331 | <p>The extension of arithmetic operators to arrays is usually intuitive:</p> | |
332 | ||
333 | <pre> | |
334 | julia> a + z | |
335 | 2×3 Array{Int64,2}: | |
336 | 0 0 0 | |
337 | 0 0 0 | |
338 | </pre> | |
339 | ||
340 | <p>And the numerical prepending syntax works with arrays, too:</p> | |
341 | ||
342 | <pre> | |
343 | julia> 3a + 4z | |
344 | 2×3 Array{Int64,2}: | |
345 | -1 -2 -3 | |
346 | -4 -5 -6 | |
347 | </pre> | |
348 | ||
349 | <p>Putting a multiplication operator between two matrices gets you matrix | |
350 | multiplication:</p> | |
351 | ||
352 | <pre> | |
353 | julia> a * transpose(a) | |
354 | 2×2 Array{Int64,2}: | |
355 | 14 32 | |
356 | 32 77 | |
357 | </pre> | |
358 | ||
359 | <p>You can "broadcast" numbers to cover all the elements in an | |
360 | array by prepending the usual arithmetic operators with a dot:</p> | |
361 | ||
362 | <pre> | |
363 | julia> 1 .+ a | |
364 | 2×3 Array{Int64,2}: | |
365 | 2 3 4 | |
366 | 5 6 7 | |
367 | </pre> | |
368 | ||
369 | <p>Note that the language only actually requires the dot for some | |
370 | operators, but not for others, such as "*" and "/". The | |
371 | reasons for this are arcane, and it probably makes sense to be consistent | |
372 | and use the dot whenever you intend broadcasting. Note also that the | |
373 | current version of the official documentation is incorrect in claiming that | |
374 | you may omit the dot from "+" and "-"; in fact, this | |
375 | now gives an error.</p> | |
376 | ||
377 | <p>You can use the dot notation to turn any function into one that operates | |
378 | on each element of an array:</p> | |
379 | ||
380 | <pre> | |
381 | julia> round.(sin.([0, π/2, π, 3π/2, 2π])) | |
382 | 5-element Array{Float64,1}: | |
383 | 0.0 | |
384 | 1.0 | |
385 | 0.0 | |
386 | -1.0 | |
387 | -0.0 | |
388 | </pre> | |
389 | ||
390 | <p>The example above illustrates chaining two dotted functions | |
391 | together. The Julia compiler turns expressions like this into | |
392 | "fused" operations: instead of applying each function in turn to | |
393 | create a new array that is passed to the next function, the compiler | |
394 | combines the functions into a single compound function that is applied once | |
395 | over the array, creating a significant optimization.</p> | |
396 | ||
397 | <p>You can use this dot notation with any function, including your own, to | |
398 | turn it into a version that operates element-wise over arrays.</p> | |
399 | ||
400 | <p>Dictionaries (associative arrays) can be defined with several | |
401 | syntaxes. Here's one:</p> | |
402 | ||
403 | <pre> | |
404 | julia> d1 = Dict("A"=>1, "B"=>2) | |
405 | Dict{String,Int64} with 2 entries: | |
406 | "B" => 2 | |
407 | "A" => 1 | |
408 | </pre> | |
409 | ||
410 | <p>You may have noticed that the code snippets so far have not included any | |
411 | type declarations. Every value in Julia has a type, but the compiler will | |
412 | infer types if they are not specified. It is generally not necessary to | |
413 | declare types for performance, but type declarations sometimes serve other | |
414 | purposes, that we'll return to later. Julia has a deep and sophisticated | |
415 | type system, including user-defined types and C-like structs. Types can | |
416 | have behaviors associated with them, and can inherit behaviors from other | |
417 | types. The best thing about Julia's type system is that you can ignore it | |
418 | entirely, use just a few pieces of it, or spend weeks studying its | |
419 | design.</p> | |
420 | ||
421 | <h4>Control flow</h4> | |
422 | ||
423 | <p>Julia code is organized in blocks, which can indicate control flow, | |
424 | function definitions, and other code units. Blocks are terminated with the | |
425 | <code>end</code> keyword, and indentation is not significant. Statements | |
426 | are separated either with newlines or semicolons.</p> | |
427 | ||
428 | <p>Julia has the typical control flow constructs; here is a | |
429 | <code>while</code> block:</p> | |
430 | ||
431 | <pre> | |
432 | julia> i = 1; | |
433 | ||
434 | julia> while i < 5 | |
435 | print(i) | |
436 | global i = i + 1 | |
437 | end | |
438 | 1234 | |
439 | </pre> | |
440 | ||
441 | <p>Notice the <code>global</code> keyword. Most blocks in Julia introduce a | |
442 | local scope for variables; without this keyword here, we would get an error | |
443 | about an undefined variable.</p> | |
444 | ||
445 | <p>Julia has the usual <code>if</code> statements and <code>for</code> | |
446 | loops that use the same iterators that we introduced above for array | |
447 | indexing. We can also iterate over collections:</p> | |
448 | ||
449 | <pre> | |
450 | julia> for i ∈ ['a', 'b', 'c'] | |
451 | println(i) | |
452 | end | |
453 | a | |
454 | b | |
455 | c | |
456 | </pre> | |
457 | ||
458 | <p>In place of the fancy math symbol in this <code>for</code> loop, we can | |
459 | use "<tt>=</tt>" or "<tt>in</tt>". If you want to use | |
460 | the math symbol but | |
461 | have no convenient way to type it, the REPL will help you: type | |
462 | "<tt>\in</tt>" and the TAB key, and the symbol appears; you can type many | |
463 | <a href="/Articles/657157/">LaTeX</a> expressions into the | |
464 | REPL in this way.</p> | |
465 | ||
466 | <h4>Development of Julia</h4> | |
467 | ||
468 | <p>The language is developed on GitHub, with over 700 contributors. The | |
469 | Julia team mentioned in their email to us that the decision to use GitHub | |
470 | has been particularly good for Julia, as it streamlined the process for | |
471 | many of their contributors, who are scientists or domain experts in various | |
472 | fields, rather than professional software developers.</p> | |
473 | ||
474 | <p>The creators of Julia have <a | |
475 | href="https://julialang.org/publications/julia-fresh-approach-BEKS.pdf">published | |
476 | [PDF]</a> | |
477 | a detailed “mission statement” for the language, describing their aims and | |
478 | motivations. A key issue that they wanted their language to solve is what | |
479 | they called the "two-language problem." This situation is | |
480 | familiar to anyone who has used Python or another dynamic language on a | |
481 | demanding numerical problem. To get good performance, you will wind up | |
482 | rewriting the numerically intensive parts of the program in C or Fortran, | |
483 | dealing with the interface between the two languages, and may still be | |
484 | disappointed in the overhead presented by calling the foreign routines from | |
485 | your original code. | |
486 | ||
487 | <p> | |
488 | For Python, <a | |
489 | href="/Articles/738915/">NumPy and SciPy</a> wrap many | |
490 | numerical routines, written in Fortran or C, for efficient use from that | |
491 | language, but you can only take advantage of this if your calculation fits | |
492 | the pattern of an available routine; in more general cases, where you will | |
493 | have to write a loop over your data, you are stuck with Python's native | |
494 | performance, which is orders of magnitude slower. If you switch to an | |
495 | alternative, faster implementation of Python, such as <a | |
496 | href="https://pypy.org/">PyPy</a>, the numerical libraries may not be | |
497 | compatible; NumPy became available for PyPy only within about the past | |
498 | year.</p> | |
499 | ||
500 | <p>Julia solves the two-language problem by being as expressive and simple | |
501 | to program in as a dynamic scripting language, while having the native | |
502 | performance of a static, compiled language. There is no need to write | |
503 | numerical libraries in a second language, but C or Fortran library routines | |
504 | can be called using a facility that Julia has built-in. Other languages, | |
505 | such as <a href="https://github.com/JuliaPy/PyCall.jl">Python</a> or <a | |
506 | href="https://github.com/JuliaInterop/RCall.jl">R</a>, can also interoperate | |
507 | easily with Julia using external packages.</p> | |
508 | ||
509 | <h4>Documentation</h4> | |
510 | ||
511 | <p>There are many resources to turn to to learn the language. There is an | |
512 | extensive and detailed <a | |
513 | href="https://docs.julialang.org/en/stable/">manual</a> at Julia | |
514 | headquarters, and this may be a good place to start. However, although the | |
515 | first few chapters provide a gentle introduction, the material soon becomes | |
516 | dense and, at times, hard to follow, with references to concepts that are | |
517 | not explained until later chapters. Fortunately, there is a <a | |
518 | href="https://julialang.org/learning/">"learning" link</a> at the | |
519 | top of the Julia home page, which takes you to a long list of videos, | |
520 | tutorials, books, articles, and classes both about Julia and that use Julia | |
521 | in teaching subjects such a numerical analysis. There is also a fairly good | |
522 | <a | |
523 | href="http://bogumilkaminski.pl/files/julia_express.pdf">cheat-sheet [PDF]</a>, which was | |
524 | just updated for v. 1.0.</p> | |
525 | ||
526 | <p>If you're coming from Python, <a | |
527 | href="https://docs.julialang.org/en/stable/manual/noteworthy-differences/#Noteworthy-differences-from-Python-1">this | |
528 | list</a> of noteworthy differences between Python and Julia syntax will | |
529 | probably be useful.</p> | |
530 | ||
531 | <p>Some of the linked tutorials are in the form of <a | |
532 | href="https://lwn.net/Articles/746386/">Jupyter notebooks</a> — indeed, | |
533 | the name "Jupyter" is formed from "Julia", | |
534 | "Python", and "R", which are the three original languages supported by | |
535 | the interface. The <a href="https://github.com/JuliaLang/IJulia.jl">Julia | |
536 | kernel for Jupyter</a> was recently upgraded to support v. 1.0. Judicious | |
537 | sampling of a variety of documentation sources, combined with liberal | |
538 | experimentation, may be the best way of learning the language. Jupyter | |
539 | makes this experimentation more inviting for those who enjoy the web-based | |
540 | interface, but the REPL that comes with Julia helps a great deal in this | |
541 | regard by providing, for instance, TAB completion and an extensive help | |
542 | system invoked by simply pressing the "?" key.</p> | |
543 | ||
544 | <h4>Stay tuned</h4> | |
545 | ||
546 | <p> | |
547 | The <a href="/Articles/764001/">next installment</a> in this two-part series will explain how Julia is | |
548 | organized around the concept of "multiple dispatch". You will learn how to | |
549 | create functions and make elementary use of Julia's type system. We'll see how | |
550 | to install packages and use modules, and how to make graphs. Finally, Part 2 | |
551 | will briefly survey the important topics of macros and distributed computing. | |
552 | <p><a href="/Articles/763626/#Comments">Comments (80 posted)</a> | |
553 | <p> | |
554 | <a name="763641"></a><h2 class="SummaryHL"><a href="/Articles/763641/">C considered dangerous</a></h2> | |
555 | ||
556 | <div class="FeatureByline"> | |
557 | By <b>Jake Edge</b><br>August 29, 2018 | |
558 | <hr> | |
559 | <a href="/Archives/ConferenceByYear/#2018-Linux_Security_Summit_NA">LSS NA</a> | |
560 | </div> | |
561 | <p> | |
562 | At the North America edition of the <a | |
563 | href="https://events.linuxfoundation.org/events/linux-security-summit-north-america-2018/">2018 | |
564 | Linux Security Summit</a> (LSS NA), which was held in late August in Vancouver, | |
565 | Canada, Kees Cook gave a presentation on some of the dangers that come with | |
566 | programs written in C. In particular, of course, the Linux kernel is | |
567 | mostly written in C, which means that the security of our systems rests on | |
568 | a somewhat dangerous foundation. But there are things that can be done to | |
569 | help firm things up by "<span>Making C Less Dangerous</span>" as the title | |
570 | of his talk suggested. | |
571 | </p> | |
572 | ||
573 | <p> | |
574 | He began with a brief summary of the work that he and others are doing as | |
575 | part of the <a | |
576 | href="https://kernsec.org/wiki/index.php/Kernel_Self_Protection_Project">Kernel | |
577 | Self Protection Project</a> (KSPP). The goal of the project is to get | |
578 | kernel protections merged into the mainline. These protections are not | |
579 | targeted at protecting user-space processes from other (possibly rogue) | |
580 | processes, but are, instead, focused on protecting the kernel from | |
581 | user-space code. There are around 12 organizations and ten individuals | |
582 | working on roughly 20 different technologies as part of the KSPP, he said. The | |
583 | progress has been "slow and steady", he said, which is how he thinks it | |
584 | should go. | |
585 | </p> | |
586 | ||
587 | <a href="/Articles/763644/"> | |
588 | <img src="https://static.lwn.net/images/2018/lssna-cook-sm.jpg" border=0 hspace=5 align="right" | |
589 | alt="[Kees Cook]" title="Kees Cook" width=214 height=300> | |
590 | </a> | |
591 | ||
592 | <p> | |
593 | One of the main problems is that C is treated mostly like a fancy assembler. | |
594 | The kernel developers do this because they want the kernel to be as fast | |
595 | and as small as possible. There are other reasons, too, such as the need to do | |
596 | architecture-specific tasks that lack a C API (e.g. setting up page tables, | |
597 | switching to 64-bit mode). | |
598 | </p> | |
599 | ||
600 | <p> | |
601 | But there is lots of undefined behavior in C. This "operational baggage" | |
602 | can lead to various problems. In addition, C has a weak standard library | |
603 | with multiple utility functions that have various pitfalls. In C, the content | |
604 | of uninitialized automatic variables is undefined, but in the machine code that it | |
605 | gets translated to, the value is whatever happened to be in that memory | |
606 | location before. In C, a function pointer can be called even if the type | |
607 | of the pointer does not match the type of the function being | |
608 | called—assembly doesn't care, it just jumps to a location, he said. | |
609 | </p> | |
610 | ||
611 | <p> | |
612 | The APIs in the standard library are also bad in many cases. He asked: why | |
613 | is there no argument to <tt>memcpy()</tt> to specify the maximum | |
614 | destination length? He noted a recent <a | |
615 | href="https://raphlinus.github.io/programming/rust/2018/08/17/undefined-behavior.html">blog | |
616 | post</a> from Raph Levien entitled "With Undefined Behavior, Anything is | |
617 | Possible". That obviously resonated with Cook, as he pointed out his | |
618 | T-shirt—with the title and artwork from the post. | |
619 | </p> | |
620 | ||
621 | <h4>Less danger</h4> | |
622 | ||
623 | <p> | |
624 | He then moved on to some things that kernel developers can do (and are | |
625 | doing) to get away from some of the dangers of C. He began with | |
626 | variable-length arrays (VLAs), which can be used to overflow the stack to | |
627 | access | |
628 | data outside of its region. Even if the stack has a guard page, VLAs can | |
629 | be used to jump past it to write into other memory, which can then be used | |
630 | by some other kind of attack. The C language is "perfectly fine with | |
631 | this". It is easy to find uses of VLAs with the <tt>-Wvla</tt> flag, however. | |
632 | </p> | |
633 | ||
634 | <p> | |
635 | But it turns out that VLAs are <a href="/Articles/749064/">not just bad | |
636 | from a security perspective</a>, | |
637 | they are also slow. In a micro-benchmark associated with a <a | |
638 | href="https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=02361bc77888">patch | |
639 | removing a VLA</a>, a 13% performance boost came from using a fixed-size | |
640 | array. He dug in a bit further and found that much more code is being | |
641 | generated to handle a VLA, which explains the speed increase. Since Linus | |
642 | Torvalds has <a | |
643 | href="https://lore.kernel.org/lkml/CA+55aFzCG-zNmZwX4A2FQpadafLfEzK6CC=qPXydAacU1RqZWA@mail.gmail.com/T/#u">declared | |
644 | that VLAs should be removed</a> from the kernel because they cause security | |
645 | problems and also slow the kernel down; Cook said "don't use VLAs". | |
646 | </p> | |
647 | ||
648 | <p> | |
649 | Another problem area is <tt>switch</tt> statements, in particular where | |
650 | there is no <tt>break</tt> for a <tt>case</tt>. That could mean that the | |
651 | programmer expects and wants to fall through to the next case or it could | |
652 | be that the <tt>break</tt> was simply forgotten. There is a way to get a | |
653 | warning from the compiler for fall-throughs, but there needs to be a way to | |
654 | mark those that are truly meant to be that way. A special fall-through | |
655 | "statement" in the form of a comment is what has been agreed on within the | |
656 | static-analysis community. He and others have been going through each of | |
657 | the places where there is no <tt>break</tt> to add these comments (or a | |
658 | <tt>break</tt>); they | |
659 | have "found a lot of bugs this way", he said. | |
660 | </p> | |
661 | ||
662 | <p> | |
663 | Uninitialized local variables will generate a warning, but not if the | |
664 | variable is passed in by reference. There are some GCC plugins that will | |
665 | automatically initialize these variables, but there are also patches for | |
666 | both GCC and Clang to provide a compiler option to do so. Neither of those | |
667 | is upstream yet, but Torvalds has praised the effort so the kernel would | |
668 | likely use the option. An interesting side | |
669 | effect that came about while investigating this was a warning he got about | |
670 | unreachable code when he | |
671 | enabled the auto-initialization. There were two variables declared just | |
672 | after a <tt>switch</tt> (and outside of any <tt>case</tt>), where they | |
673 | would never be reached. | |
674 | </p> | |
675 | ||
676 | <p> | |
677 | Arithmetic overflow is another undefined behavior in C that can cause various | |
678 | problems. GCC can check for signed overflow, which performs well | |
679 | (the overhead is in the noise, he said), but adding warning messages for it does grow | |
680 | the kernel by 6%; making the overflow abort, instead, only adds 0.1%. | |
681 | Clang can check for both signed and unsigned overflow; signed overflow is | |
682 | undefined, while unsigned overflow is defined, but often unexpected. | |
683 | Marking places where unsigned overflow is expected is needed; it | |
684 | would be nice to get those annotations put into the kernel, Cook said. | |
685 | </p> | |
686 | ||
687 | <p> | |
688 | Explicit bounds checking is expensive. Doing it for | |
689 | <tt>copy_{to,from}_user()</tt> is a less than 1% performance hit, but | |
690 | adding it to the <tt>strcpy()</tt> and <tt>memcpy()</tt> families are | |
691 | around a 2% hit. Pre-Meltdown that would have been a totally impossible | |
692 | performance regression for security, he said; post-Meltdown, since it is | |
693 | less than 5%, maybe there is a chance to add this checking. | |
694 | </p> | |
695 | ||
696 | <p> | |
697 | Better APIs would help as well. He pointed to the evolution of | |
698 | <tt>strcpy()</tt>, through <tt>str<b>n</b>cpy()</tt> and | |
699 | <tt>str<b>l</b>cpy()</tt> (each with their own bounds flaws) to | |
700 | <tt>str<b>s</b>cpy()</tt>, which seems to be "OK so far". He also mentioned | |
701 | <tt>memcpy()</tt> again as a poor API with respect to bounds checking. | |
702 | </p> | |
703 | ||
704 | <p> | |
705 | Hardware support for bounds checking is available in the application | |
706 | data integrity (ADI) feature for SPARC and is coming for Arm; it may also be | |
707 | available for Intel processors at some point. These all use a form of | |
708 | "memory tagging", where allocations get a tag that is stored in the | |
709 | high-order byte of the address. An offset from the address can be checked | |
710 | by the hardware to see if it still falls within the allocated region based | |
711 | on the tag. | |
712 | </p> | |
713 | ||
714 | <p> | |
715 | Control-flow integrity (CFI) has become more of an issue lately because | |
716 | much of what attackers had used in the past has been marked as "no execute" | |
717 | so they are turning to using existing code "gadgets" already present in the | |
718 | kernel by hijacking existing indirect function calls. In C, you can just call | |
719 | pointers without regard to the type as it just treats them as an | |
720 | address to jump to. Clang has a CFI-sanitize feature that enforces the | |
721 | function prototype to restrict the calls that can be made. It is done at | |
722 | runtime and is not perfect, in part because there are lots of functions in | |
723 | the kernel that take one unsigned long parameter and return an unsigned long. | |
724 | </p> | |
725 | ||
726 | <p> | |
727 | Attacks on CFI have both a "forward edge", which is what CFI sanitize | |
728 | tries to handle, and a "backward edge" that comes from manipulating the stack | |
729 | values, the return address in particular. Clang has two methods available | |
730 | to prevent the stack manipulation. The first is the "safe stack", which | |
731 | puts various important items (e.g. "safe" variables, register spills, and | |
732 | the return address) on a separate stack. Alternatively, the "shadow stack" | |
733 | feature creates a separate stack just for return addresses. | |
734 | </p> | |
735 | ||
736 | <p> | |
737 | One problem with these other stacks is that they are still writable, so if | |
738 | an attacker can find them in memory, they can still perform their attacks. | |
739 | Hardware-based protections, like Intel's Control-Flow Enforcement | |
740 | Technology (CET), <a href="/Articles/758245/">provides a read-only shadow | |
741 | call stack</a> for return addresses. Another hardware protection is <a | |
742 | href="/Articles/718888/">pointer authentication</a> for Arm, which adds a | |
743 | kind of encrypted tag to the return address that can be verified before it | |
744 | is used. | |
745 | </p> | |
746 | ||
747 | <h4>Status and challenges</h4> | |
748 | ||
749 | <p> | |
750 | Cook then went through the current status of handling these different | |
751 | problems in the kernel. VLAs are almost completely gone, he said, just a | |
752 | few remain in the crypto subsystem; he hopes those VLAs will be gone by 4.20 (or | |
753 | whatever the number of the next kernel release turns out to be). Once that | |
754 | happens, he plans to turn on <tt>-Wvla</tt> for the kernel build so that | |
755 | none creep back in. | |
756 | </p> | |
757 | ||
758 | <p> | |
759 | There has been steady progress made on marking fall-through cases in | |
760 | <tt>switch</tt> statements. Only 745 remain to be handled of the 2311 that | |
761 | existed when this work started; each one requires scrutiny to determine | |
762 | what the author's intent is. Auto-initialized local variables can be done | |
763 | using compiler plugins, but that is "not quite what we want", he said. | |
764 | More compiler support would be helpful there. For arithmetic overflow, it | |
765 | would be nice to see GCC get support for the unsigned case, but memory | |
766 | allocations are now doing explicit overflow checking at this point. | |
767 | </p> | |
768 | ||
769 | <p> | |
770 | Bounds checking has seen some "crying about performance hits", so we are | |
771 | waiting impatiently for hardware support, he said. CFI forward-edge | |
772 | protection needs <a href="/Articles/744507/">link-time optimization</a> | |
773 | (LTO) support for Clang in the kernel, but it is currently working on | |
774 | Android. For backward-edge mitigation, the Clang shadow call stack is | |
775 | working on Android, but we are impatiently waiting for hardware support for | |
776 | that too. | |
777 | </p> | |
778 | ||
779 | <p> | |
780 | There are a number of challenges in doing security development for the | |
781 | kernel, Cook said. There are cultural boundaries due to conservatism | |
782 | within the kernel community; that requires patiently working and reworking | |
783 | features in order to get them upstream. There are, of course, technical | |
784 | challenges because of the complexity of security changes; those kinds of | |
785 | problems can be solved. There are also resource limitations in terms of | |
786 | developers, testers, reviewers, and so on. KSPP and the other kernel | |
787 | security developers are still making that "slow but steady" progress. | |
788 | </p> | |
789 | ||
790 | <p> | |
791 | Cook's <a href="https://outflux.net/slides/2018/lss/danger.pdf">slides | |
792 | [PDF]</a> are available for interested readers; before long, there should | |
793 | be a video available of the talk as well. | |
794 | ||
795 | <p> | |
796 | [I would like to thank LWN's travel sponsor, the Linux Foundation, for | |
797 | travel assistance to attend the Linux Security Summit in Vancouver.] | |
798 | <p><a href="/Articles/763641/#Comments">Comments (70 posted)</a> | |
799 | <p> | |
800 | <a name="763106"></a><h2 class="SummaryHL"><a href="/Articles/763106/">The second half of the 4.19 merge window</a></h2> | |
801 | ||
802 | <div class="FeatureByline"> | |
803 | By <b>Jonathan Corbet</b><br>August 26, 2018 | |
804 | </div> | |
805 | By the time Linus Torvalds <a href="/Articles/763497/">released | |
806 | 4.19-rc1</a> and closed | |
807 | the merge window for this development cycle, 12,317 non-merge | |
808 | changesets had found their way into the mainline; about 4,800 of those | |
809 | landed after <a href="/Articles/762566/">last week's summary</a> was | |
810 | written. As tends to be the case | |
811 | late in the merge window, many of those changes were fixes for the bigger | |
812 | patches that went in early, but there were also a number of new features | |
813 | added. Some of the more significant changes include: | |
814 | <br clear="all"> | |
815 | <p> | |
816 | ||
817 | <h4>Core kernel</h4> | |
818 | <p> | |
819 | <ul class="spacylist"> | |
820 | ||
821 | <li> The full set of patches adding <a | |
822 | href="/Articles/761118/">control-group awareness to the out-of-memory | |
823 | killer</a> has <i>not</i> been merged due to ongoing disagreements, | |
824 | but one piece of it has: there is a new <tt>memory.oom.group</tt> | |
825 | control knob that will cause all processes within a control group to | |
826 | be killed in an out-of-memory situation. | |
827 | <li> A new set of protections has been added to prevent an attacker from | |
828 | fooling a program into writing to an existing file or FIFO. An open | |
829 | with the <tt>O_CREAT</tt> flag to a file or FIFO in a world-writable, | |
830 | sticky | |
831 | directory (e.g. <tt>/tmp</tt>) will fail if the owner of the opening | |
832 | process is not the owner of either the target file or the containing | |
833 | directory. This behavior, disabled by default, is controlled by the | |
834 | new <tt>protected_regular</tt> and <tt>protected_fifos</tt> sysctl | |
835 | knobs. | |
836 | ||
837 | </ul> | |
838 | ||
839 | <h4>Filesystems and block layer</h4> | |
840 | <p> | |
841 | <ul class="spacylist"> | |
842 | ||
843 | <li> The dm-integrity device-mapper target can now use a separate device | |
844 | for metadata storage. | |
845 | <li> EROFS, the "enhanced read-only filesystem", has been added to the | |
846 | staging tree. It is "<span>a lightweight read-only file system with | |
847 | modern designs (eg. page-sized blocks, inline xattrs/data, etc.) for | |
848 | scenarios which need high-performance read-only requirements, | |
849 | eg. firmwares in mobile phone or LIVECDs</span>" | |
850 | <li> The new "metadata copy-up" feature in overlayfs will avoid copying a | |
851 | file's contents to the upper layer on a metadata-only change. See <a | |
852 | href="https://git.kernel.org/linus/d5791044d2e5749ef4de84161cec5532e2111540">this | |
853 | commit</a> for details. | |
854 | ||
855 | </ul> | |
856 | <p> | |
857 | ||
858 | <h4>Hardware support</h4> | |
859 | <p> | |
860 | <ul class="spacylist"> | |
861 | ||
862 | <li> <b>Graphics</b>: | |
863 | Qualcomm Adreno A6xx GPUs. | |
864 | ||
865 | <li> <b>Industrial I/O</b>: | |
866 | Spreadtrum SC27xx series PMIC analog-to-digital converters, | |
867 | Analog Devices AD5758 digital-to-analog converters, | |
868 | Intersil ISL29501 time-of-flight sensors, | |
869 | Silicon Labs SI1133 UV index/ambient light sensor chips, and | |
870 | Bosch Sensortec BME680 sensors. | |
871 | ||
872 | ||
873 | <li> <b>Miscellaneous</b>: | |
874 | Generic ADC-based resistive touchscreens, | |
875 | Generic ASIC devices via the Google <a | |
876 | href="/ml/linux-kernel/20180630000253.70103-1-sque@chromium.org/">Gasket | |
877 | framework</a>, | |
878 | Analog Devices ADGS1408/ADGS1409 multiplexers, | |
879 | Actions Semi Owl SoCs DMA controllers, | |
880 | MEN 16Z069 watchdog timers, | |
881 | Rohm BU21029 touchscreen controllers, | |
882 | Cirrus Logic CS47L35, CS47L85, CS47L90, and CS47L91 codecs, | |
883 | Cougar 500k gaming keyboards, | |
884 | Qualcomm GENI-based I2C controllers, | |
885 | Actions Semiconductor Owl I2C controllers, | |
886 | ChromeOS EC-based USBPD chargers, and | |
887 | Analog Devices ADP5061 battery chargers. | |
888 | ||
889 | <li> <b>USB</b>: | |
890 | Nuvoton NPCM7XX on-chip EHCI USB controllers, | |
891 | Broadcom Stingray PCIe PHYs, and | |
892 | Renesas R-Car generation 3 PCIe PHYs. | |
893 | ||
894 | <li> There is also a new subsystem for the abstraction of GNSS (global | |
895 | navigation satellite systems — GPS, for example) receivers in the | |
896 | kernel. To date, such devices have been handled with an abundance of | |
897 | user-space drivers; the hope is to bring some order in this area. | |
898 | Support for u-blox and SiRFstar receivers has been added as well. | |
899 | ||
900 | </ul> | |
901 | ||
902 | ||
903 | <p> | |
904 | <h4>Kernel internal</h4> | |
905 | <p> | |
906 | <ul class="spacylist"> | |
907 | ||
908 | <li> The <tt>__deprecated</tt> marker, used to mark interfaces that should | |
909 | no longer be used, has been deprecated and removed from the kernel | |
910 | entirely. <a | |
911 | href="https://git.kernel.org/linus/771c035372a036f83353eef46dbb829780330234">Torvalds | |
912 | said</a>: "<span>They are not useful. They annoy | |
913 | everybody, and nobody ever does anything about them, because it's | |
914 | always 'somebody elses problem'. And when people start thinking that | |
915 | warnings are normal, they stop looking at them, and the real warnings | |
916 | that mean something go unnoticed.</span>" | |
917 | <li> The minimum version of GCC required by the kernel has been moved up to | |
918 | 4.6. | |
919 | ||
920 | </ul> | |
921 | <p> | |
922 | ||
923 | There are a couple of significant changes that failed to get in this time | |
924 | around, including the <a | |
925 | href="/Articles/745073/">XArray</a> data structure. The patches are | |
926 | thought to be ready, but they had the bad luck to be based on a tree that | |
927 | failed to be merged for other reasons, so Torvalds <a | |
928 | href="/ml/linux-kernel/CA+55aFxFjAmrFpwQmEHCthHOzgidCKnod+cNDEE+3Spu9o1s3w@mail.gmail.com/">didn't | |
929 | even look at them</a>. That, in turn, blocks another set of patches intended to | |
930 | enable migration of slab-allocated objects. | |
931 | <p> | |
932 | The other big deferral is the <a href="/Articles/759499/">new system-call | |
933 | API for filesystem mounting</a>. Despite ongoing <a | |
934 | href="/Articles/762355/">concerns</a> about what happens when the same | |
935 | low-level device is mounted multiple times with conflicting options, Al | |
936 | Viro sent <a | |
937 | href="/ml/linux-fsdevel/20180823223145.GK6515@ZenIV.linux.org.uk/">a pull | |
938 | request</a> to send this work upstream. The ensuing discussion made it | |
939 | clear that there is still not a consensus in this area, though, so it seems | |
940 | that this work has to wait for another cycle. | |
941 | <p> | |
942 | Assuming all goes well, the kernel will stabilize over the coming weeks and | |
943 | the final 4.19 release will happen in mid-October. | |
944 | <p><a href="/Articles/763106/#Comments">Comments (1 posted)</a> | |
945 | <p> | |
946 | <a name="763603"></a><h2 class="SummaryHL"><a href="/Articles/763603/">Measuring (and fixing) I/O-controller throughput loss</a></h2> | |
947 | ||
948 | <div class="GAByline"> | |
949 | <p>August 29, 2018</p> | |
950 | <p>This article was contributed by Paolo Valente</p> | |
951 | </div> | |
952 | <p>Many services, from web hosting and video streaming to cloud storage, | |
953 | need to move data to and from storage. They also often require that each per-client | |
954 | I/O flow be guaranteed a non-zero amount of bandwidth and a bounded latency. An | |
955 | expensive way to provide these guarantees is to over-provision | |
956 | storage resources, keeping each resource underutilized, and thus | |
957 | have plenty of bandwidth available for the few I/O flows dispatched to | |
958 | each medium. Alternatively one can use an I/O controller. Linux provides | |
959 | two mechanisms designed to throttle some I/O streams to allow others to | |
960 | meet their bandwidth and latency requirements. These mechanisms work, but | |
961 | they come at a cost: a loss of as much as 80% of total available I/O | |
962 | bandwidth. I have run some tests to demonstrate this problem; some | |
963 | upcoming improvements to the <a href="/Articles/601799/">bfq I/O | |
964 | scheduler</a> promise to improve the situation considerably. | |
965 | <p> | |
966 | ||
967 | <p>Throttling does guarantee control, even on drives that happen to be | |
968 | highly utilized but, as will be seen, it has a hard time | |
969 | actually ensuring that drives are highly utilized. Even with greedy I/O | |
970 | flows, throttling | |
971 | easily ends up utilizing as little as 20% of the available speed of a | |
972 | flash-based drive. | |
973 | ||
974 | Such a speed loss may be particularly problematic with lower-end | |
975 | storage. On the opposite end, it is also disappointing with | |
976 | high-end hardware, as the Linux block I/O stack itself has been | |
977 | <a href="/Articles/552904">redesigned from the ground up</a> to fully utilize the | |
978 | high speed of modern, fast storage. In | |
979 | addition, throttling fails to guarantee the expected bandwidths if I/O | |
980 | contains both reads and writes, or is sporadic in nature. | |
981 | ||
982 | <p>On the bright side, there now seems to be an effective alternative for | |
983 | controlling I/O: the proportional-share policy provided by the bfq I/O | |
984 | scheduler. It enables nearly 100% storage bandwidth utilization, | |
985 | at least with some of the workloads that are problematic for | |
986 | throttling. An upcoming version of bfq may be able to | |
987 | achieve this result with almost all workloads. Finally, bfq | |
988 | guarantees bandwidths with all workloads. The current limitation of | |
989 | bfq is that its execution overhead becomes significant at speeds above | |
990 | 400,000 I/O operations per second on commodity CPUs. | |
991 | ||
992 | <p>Using the bfq I/O scheduler, Linux can now guarantee | |
993 | low latency to lightweight flows containing sporadic, short I/O. No | |
994 | throughput issues arise, and no configuration is required. This | |
995 | capability benefits important, time-sensitive tasks, such as | |
996 | video or audio streaming, as well as executing commands or starting | |
997 | applications. | |
998 | ||
999 | Although benchmarks are not available yet, these guarantees might also be | |
1000 | provided by the newly proposed <a href="/Articles/758963/">I/O latency | |
1001 | controller</a>. It allows administrators to set target latencies for I/O | |
1002 | requests originating from each group of processes, and favors the | |
1003 | groups with the lowest target latency. | |
1004 | ||
1005 | <h4>The testbed</h4> | |
1006 | ||
1007 | <p>I ran the tests with an ext4 filesystem mounted on a PLEXTOR | |
1008 | PX-256M5S SSD, which features a peak rate of ~160MB/s with random I/O, | |
1009 | and of ~500MB/s with sequential I/O. I used blk-mq, in Linux | |
1010 | 4.18. The system was equipped with a 2.4GHz Intel Core i7-2760QM | |
1011 | CPU and 1.3GHz DDR3 DRAM. In such a system, a single thread doing | |
1012 | synchronous reads reaches a throughput of 23MB/s. | |
1013 | ||
1014 | <p> | |
1015 | For the purposes of these tests, each process is considered to be in one of | |
1016 | two groups, termed "target" and "interferers". | |
1017 | A target is a single-process, I/O-bound group whose I/O is focused on. In | |
1018 | particular, I measure the I/O throughput enjoyed by this group to get | |
1019 | the minimum bandwidth delivered to the group. | |
1020 | An interferer is single-process group whose role is to generate | |
1021 | additional I/O that interferes with the I/O of the target. | |
1022 | The tested workloads contain one target and multiple interferers. | |
1023 | ||
1024 | <p>The single process in each group either reads or writes, through | |
1025 | asynchronous (buffered) operations, to one file — different from the file read | |
1026 | or written by any other process — after invalidating the buffer cache | |
1027 | for the file. I define a reader or writer process as either "random" or | |
1028 | "sequential", depending on whether it reads or writes its file at random | |
1029 | positions or sequentially. | |
1030 | Finally, an interferer is defined as being either "active" or "inactive" | |
1031 | depending on whether it performs I/O during the test. When an | |
1032 | interferer is mentioned, it is assumed that the interferer is active. | |
1033 | ||
1034 | <p>Workloads are defined so as to try to cover the combinations that, I | |
1035 | believe, most influence the performance of the storage device and of | |
1036 | the I/O policies. For brevity, in this article I show results for only | |
1037 | two groups of workloads: | |
1038 | <p> | |
1039 | <ul class="spacylist"> | |
1040 | ||
1041 | <li> <b>Static sequential</b>: four synchronous sequential readers or four | |
1042 | asynchronous sequential writers, plus five inactive interferers. | |
1043 | ||
1044 | <li> <b>Static random</b>: four synchronous random readers, all with a block | |
1045 | size equal to 4k, plus five inactive interferers. | |
1046 | </ul> | |
1047 | ||
1048 | <p>To create each workload, I considered, for each mix of | |
1049 | interferers in the group, two possibilities for the target: it could be | |
1050 | either a random or a sequential synchronous reader. | |
1051 | ||
1052 | In <a | |
1053 | href="http://algogroup.unimore.it/people/paolo/pub-docs/extended-lat-bw-throughput.pdf">a | |
1054 | longer version of this article [PDF]</a>, you will also find results | |
1055 | for workloads with varying degrees of I/O randomness, and for | |
1056 | dynamic workloads (containing sporadic I/O sources). These extra results | |
1057 | confirm the losses of throughput and I/O control for throttling that | |
1058 | are shown here. | |
1059 | ||
1060 | <h4>I/O policies</h4> | |
1061 | ||
1062 | <p>Linux provides two I/O-control mechanisms for guaranteeing (a minimum) | |
1063 | bandwidth, or at least fairness, to long-lived flows: the throttling | |
1064 | and proportional-share I/O policies. | |
1065 | With throttling, one can set a maximum bandwidth limit — "max limit" for | |
1066 | brevity — for the I/O of each group. Max limits can be used, | |
1067 | in an indirect way, to provide the service guarantee at the focus of this | |
1068 | article. For example, to guarantee minimum bandwidths to I/O flows, a group can | |
1069 | be guaranteed a minimum bandwidth by limiting the maximum bandwidth of | |
1070 | all the other groups. | |
1071 | ||
1072 | <p>Unfortunately, max limits have two drawbacks in terms of | |
1073 | throughput. First, if some groups do not use their allocated bandwidth, | |
1074 | that bandwidth cannot be reclaimed by other active groups. Second, | |
1075 | limits must comply with the worst-case speed of the device, namely, | |
1076 | its random-I/O peak rate. Such limits will clearly leave a lot of | |
1077 | throughput unused with workloads that otherwise would drive the | |
1078 | device to higher throughput levels. | |
1079 | ||
1080 | Maximizing throughput is simply not a goal of max limits. So, for | |
1081 | brevity, test results with max limits are not shown here. You can | |
1082 | find these results, plus a more detailed description of the above | |
1083 | drawbacks, in the long version of this article. | |
1084 | ||
1085 | <p>Because of these drawbacks, a new, still experimental, low limit | |
1086 | has been added to the throttling policy. If a group is | |
1087 | assigned a low limit, then the throttling policy automatically | |
1088 | limits the I/O of the other groups in such a way to | |
1089 | guarantee to the group a minimum bandwidth equal to its assigned low | |
1090 | limit. This new throttling mechanism throttles no group as long as | |
1091 | every group is getting at least its assigned minimum bandwidth. I tested | |
1092 | this mechanism, but did not consider the interesting problem | |
1093 | of guaranteeing minimum bandwidths while, at the same time, enforcing | |
1094 | maximum bandwidths. | |
1095 | ||
1096 | <p>The other I/O policy available in Linux, proportional share, | |
1097 | provides weighted fairness. Each group is assigned a weight, and should | |
1098 | receive a portion of the total throughput proportional to its weight. | |
1099 | This scheme guarantees minimum bandwidths in the same way that low limits do | |
1100 | in throttling. In particular, it guarantees to each group a minimum | |
1101 | bandwidth equal to the ratio between the weight of the group, and the | |
1102 | sum of the weights of all the groups that may be active at the same | |
1103 | time. | |
1104 | ||
1105 | <p>The actual implementation of the proportional-share policy, on a given | |
1106 | drive, depends on what flavor of the block layer is in use for that | |
1107 | drive. If the drive is using the legacy block interface, the policy is | |
1108 | implemented by | |
1109 | the cfq I/O scheduler. Unfortunately, cfq fails to control | |
1110 | bandwidths with flash-based storage, especially on drives featuring | |
1111 | command queueing. This case is not considered in these tests. With | |
1112 | drives using the multiqueue interface, | |
1113 | proportional share is implemented by bfq. This is the | |
1114 | combination considered in the tests. | |
1115 | ||
1116 | <p>To benchmark both throttling (low limits) and proportional share, I | |
1117 | tested, for each workload, the combinations of I/O policies and I/O | |
1118 | schedulers reported in the table below. In the end, there are three test | |
1119 | cases for each workload. In addition, for some workloads, I considered two | |
1120 | versions of bfq for the proportional-share policy. | |
1121 | ||
1122 | <blockquote> | |
1123 | <table class="OddEven"> | |
1124 | <tr> | |
1125 | <th align="left" width="14%" valign="top">Name </th> | |
1126 | <th align="left" width="14%" valign="top">I/O policy </th> | |
1127 | <th align="left" width="14%" valign="top">Scheduler </th> | |
1128 | <th align="left" width="14%" valign="top">Parameter for target </th> | |
1129 | <th align="left" width="14%" valign="top">Parameter for each | |
1130 | of the four active interferers </th> | |
1131 | <th align="left" width="14%" valign="top">Parameter for each of the five inactive | |
1132 | interferers </th> | |
1133 | <th align="left" width="14%" valign="top">Sum of parameters</th> | |
1134 | </tr> | |
1135 | <tr> | |
1136 | <td align="left" width="14%" valign="top">low-none</td> | |
1137 | <td align="left" width="14%" valign="top">Throttling with low limits</td> | |
1138 | <td align="left" width="14%" valign="top">none</td> | |
1139 | <td align="left" width="14%" valign="top">10MB/s</td> | |
1140 | <td align="left" width="14%" valign="top">10MB/s | |
1141 | (tot: 40)</td> | |
1142 | <td align="left" width="14%" valign="top">20MB/s (tot: 100)</td> | |
1143 | <td align="left" width="14%" valign="top">150MB/s</td> | |
1144 | </tr> | |
1145 | <tr> | |
1146 | <td align="left" width="14%" valign="top">prop-bfq</td> | |
1147 | <td align="left" width="14%" valign="top">Proportional share</td> | |
1148 | <td align="left" width="14%" valign="top">bfq</td> | |
1149 | <td align="left" width="14%" valign="top">300</td> | |
1150 | <td align="left" width="14%" valign="top">100 (tot: 400)</td> | |
1151 | <td align="left" width="14%" valign="top">200 | |
1152 | (tot: 1000)</td> | |
1153 | <td align="left" width="14%" valign="top">1700</td> | |
1154 | </tr> | |
1155 | </table> | |
1156 | </blockquote> | |
1157 | ||
1158 | ||
1159 | ||
1160 | ||
1161 | <p>For low limits, I report results with only none as the I/O scheduler, | |
1162 | because the results are the same with kyber and mq-deadline. | |
1163 | ||
1164 | <p>The capabilities of the storage medium and of low limits drove the policy | |
1165 | configurations. In particular: | |
1166 | ||
1167 | <ul class="spacylist"> | |
1168 | ||
1169 | <li> The configuration of the target and of the active interferers for | |
1170 | low-none is the one for which low-none provides | |
1171 | its best possible minimum-bandwidth guarantee to the target: 10MB/s, | |
1172 | guaranteed if all interferers are readers. | |
1173 | Results remain the same regardless of the values used for target | |
1174 | latency and idle time; I set them to 100µs and | |
1175 | 1000µs, respectively, for every group.</li> | |
1176 | ||
1177 | <li> Low limits for inactive interferers are set to twice the limits for | |
1178 | active interferers, to pose greater difficulties to the | |
1179 | policy.</li> | |
1180 | ||
1181 | <li> I chose weights for prop-bfq so as to guarantee about the same | |
1182 | minimum bandwidth as low-none to the target, in the same | |
1183 | only-reader worst case as for low-none and to preserve, between | |
1184 | the weights of active and inactive interferers, the same ratio as | |
1185 | between the low limits of active and inactive interferers.</li> | |
1186 | </ul> | |
1187 | <p>Full details on configurations can be found in the long version of this | |
1188 | article. | |
1189 | ||
1190 | <p>Each workload was run ten times for each policy, plus ten times without | |
1191 | any I/O control, i.e., with none as I/O scheduler and no I/O policy in | |
1192 | use. For each run, I measured the I/O throughput of the target (which | |
1193 | reveals the bandwidth provided to the target), the cumulative I/O | |
1194 | throughput of the interferers, and the total I/O throughput. These | |
1195 | quantities fluctuated very little during each run, as well as across | |
1196 | different runs. Thus in the graphs I report only averages over per-run | |
1197 | average throughputs. In particular, for the case of no I/O control, I | |
1198 | report only the total I/O throughput, to give an idea of the throughput | |
1199 | that can be reached without imposing any control. | |
1200 | ||
1201 | <h4>Results</h4> | |
1202 | ||
1203 | <p> | |
1204 | This plot shows throughput results for the simplest group of | |
1205 | workloads: the static-sequential set. | |
1206 | ||
1207 | <blockquote> | |
1208 | <img src="https://static.lwn.net/images/2018/iocontrol/fig1.png" alt="[Figure 1]" class="photo"> | |
1209 | </blockquote> | |
1210 | <p> | |
1211 | ||
1212 | With a random reader as | |
1213 | the target against sequential readers as interferers, low-none does | |
1214 | guarantee the configured low limit to the target. Yet it reaches only a | |
1215 | low total throughput. The throughput of | |
1216 | the random reader evidently oscillates around 10MB/s during the test. | |
1217 | This implies that it is at least slightly below 10MB/s for a significant | |
1218 | percentage of the time. But when this happens, the low-limit mechanism | |
1219 | limits the maximum bandwidth of every active group to the low limit set | |
1220 | for the group, i.e., to just 10MB/s. | |
1221 | The end result is a total throughput lower than 10% of the throughput | |
1222 | reached without I/O control. | |
1223 | <p> | |
1224 | That said, the high throughput achieved without I/O control is | |
1225 | obtained by choking the random I/O of the target in favor of | |
1226 | the sequential I/O of the interferers. Thus, it | |
1227 | is probably more interesting to compare low-none throughput with the | |
1228 | throughput reachable while actually guaranteeing 10MB/s to the target. | |
1229 | The target is a single, synchronous, random reader, which reaches 23MB/s while | |
1230 | active. So, to guarantee 10MB/s to the target, it is enough to | |
1231 | serve it for about half of the time, and the interferers for the other | |
1232 | half. Since the device reaches ~500MB/s with the sequential I/O of the | |
1233 | interferers, the resulting throughput with this service scheme would be | |
1234 | (500+23)/2, or about 260MB/s. low-none thus reaches less than 20% | |
1235 | of the | |
1236 | total throughput that could be reached while still preserving the target | |
1237 | bandwidth. | |
1238 | ||
1239 | <p>prop-bfq provides the target with a slightly higher throughput than | |
1240 | low-none. This makes it harder for prop-bfq to reach a high total | |
1241 | throughput, because prop-bfq serves more random I/O (from the target) | |
1242 | than low-none. Nevertheless, prop-bfq gets a much higher total | |
1243 | throughput than low-none. According to the above estimate, this | |
1244 | throughput is about 90% of the maximum throughput that could be reached, | |
1245 | for this workload, without violating service guarantees. The reason for | |
1246 | this good result is that bfq provides an effective implementation of | |
1247 | the proportional-share service policy. At any time, each active group is | |
1248 | granted a fraction of the current total throughput, and the sum of these | |
1249 | fractions is equal to one; so group bandwidths naturally saturate the | |
1250 | available total throughput at all times. | |
1251 | ||
1252 | <p>Things change with the second workload: a random reader against | |
1253 | sequential writers. Now low-none reaches a much higher total | |
1254 | throughput than prop-bfq. low-none serves | |
1255 | much more sequential (write) I/O than prop-bfq because writes somehow | |
1256 | break the low-limit mechanisms and prevail over the reads of the target. | |
1257 | Conceivably, this happens because writes tend to both starve reads in | |
1258 | the OS (mainly by eating all available I/O tags) and to cheat on their | |
1259 | completion time in the drive. In contrast, bfq is intentionally | |
1260 | configured to privilege reads, to counter these issues. | |
1261 | ||
1262 | <p>In particular, low-none gets an even higher throughput than no | |
1263 | I/O control at all because it penalizes the random I/O of the target even more | |
1264 | than the no-controller configuration. | |
1265 | ||
1266 | <p>Finally, with the last two workloads, prop-bfq reaches even | |
1267 | higher total throughput than with the first two. It happens | |
1268 | because the target also does sequential I/O, and serving sequential | |
1269 | I/O is much more beneficial for throughput than serving random I/O. With | |
1270 | these two workloads, the total throughput is, respectively, close to or | |
1271 | much higher than that reached without I/O control. For the last | |
1272 | workload, the total throughput is much higher because, differently from | |
1273 | none, bfq privileges reads over asynchronous writes, and reads yield | |
1274 | a higher throughput than writes. In contrast, low-none still gets | |
1275 | lower or much lower throughput than prop-bfq, because of the same | |
1276 | issues that hinder low-none throughput with the first two workloads. | |
1277 | ||
1278 | <p>As for bandwidth guarantees, with readers as interferers (third | |
1279 | workload), prop-bfq, as expected, gives the target a fraction of the | |
1280 | total throughput proportional to its weight. bfq approximates | |
1281 | perfect proportional-share bandwidth distribution among groups doing I/O | |
1282 | of the same type (reads or writes) and with the same locality | |
1283 | (sequential or random). With the last workload, prop-bfq gives much | |
1284 | more throughput to the reader than to all the interferers, because | |
1285 | interferers are asynchronous writers, and bfq privileges reads. | |
1286 | ||
1287 | <p>The second group of workloads (static random), is the one, among all | |
1288 | the workloads considered, for which prop-bfq performs worst. | |
1289 | Results are shown below: | |
1290 | <p> | |
1291 | <blockquote> | |
1292 | <img src="https://static.lwn.net/images/2018/iocontrol/fig2.png" alt="[Figure 2]" class="photo"> | |
1293 | </blockquote> | |
1294 | <p> | |
1295 | ||
1296 | This chart | |
1297 | reports results not only for mainline bfq, but also for an | |
1298 | improved version of | |
1299 | bfq which is currently under public testing. | |
1300 | As can be seen, with only random readers, prop-bfq reaches a | |
1301 | much lower total throughput than low-none. This happens because of | |
1302 | the Achilles heel of the bfq I/O scheduler. If the process in service | |
1303 | does synchronous I/O and has a higher weight than some other process, then, to | |
1304 | give strong bandwidth guarantees to that process, bfq plugs I/O | |
1305 | dispatching every time the process temporarily stops issuing | |
1306 | I/O requests. In this respect, processes actually have differentiated | |
1307 | weights and do synchronous I/O in the workloads tested. So bfq | |
1308 | systematically performs I/O plugging for them. Unfortunately, this | |
1309 | plugging empties the internal queues of the drive, which kills | |
1310 | throughput with random I/O. And the I/O of all processes in these | |
1311 | workloads is also random. | |
1312 | ||
1313 | <p>The situation reverses with a sequential reader as target. Yet, the most | |
1314 | interesting results come from the new version of bfq, containing | |
1315 | small changes to counter exactly the above weakness. This | |
1316 | version recovers most of the throughput loss with the workload made of | |
1317 | only random I/O and more; with the second workload, where the target is | |
1318 | a sequential reader, it reaches about 3.7 times the total throughput of | |
1319 | low-none. | |
1320 | <p> | |
1321 | ||
1322 | When the main concern is the latency of flows containing short I/O, | |
1323 | Linux seems now rather high performing, thanks to the bfq I/O | |
1324 | scheduler and the I/O latency controller. But if the | |
1325 | requirement is to provide explicit bandwidth guarantees (or just fairness) to | |
1326 | I/O flows, then one must be ready to give up much or most of the speed of | |
1327 | the storage media. bfq helps with some workloads, but loses most of | |
1328 | the throughput with workloads consisting of mostly random | |
1329 | I/O. Fortunately, there is apparently hope for much better | |
1330 | performance since an improvement, still under development, seems to | |
1331 | enable bfq to reach a high throughput with all workloads tested so | |
1332 | far. | |
1333 | ||
1334 | ||
1335 | ||
1336 | <p> | |
1337 | [ I wish to thank Vivek Goyal for enabling me to make this article | |
1338 | much more fair and sound.]<div class="MakeALink"> | |
1339 | <table align="right"><tr><td> | |
1340 | <form action="/SubscriberLink/MakeLink" method="post"> | |
1341 | <input type="hidden" name="articleid" value="763603"> | |
1342 | <input type="submit" value="Send a free link"></form> | |
1343 | </td></tr></table> | |
1344 | </div> | |
1345 | <br clear="all"> | |
1346 | ||
1347 | <p><a href="/Articles/763603/#Comments">Comments (4 posted)</a> | |
1348 | <p> | |
1349 | <a name="763175"></a><h2 class="SummaryHL"><a href="/Articles/763175/">KDE's onboarding initiative, one year later</a></h2> | |
1350 | ||
1351 | <div class="GAByline"> | |
1352 | <p>August 24, 2018</p> | |
1353 | <p>This article was contributed by Marta Rybczyńska</p> | |
1354 | <hr> | |
1355 | <a href="/Archives/ConferenceByYear/#2018-Akademy">Akademy</a> | |
1356 | </div> | |
1357 | <p>In 2017, the KDE community decided on <a | |
1358 | href="https://dot.kde.org/2017/11/30/kdes-goals-2018-and-beyond">three | |
1359 | goals</a> | |
1360 | to concentrate on for the next few years. One of them was <a | |
1361 | href="https://phabricator.kde.org/T7116">streamlining the onboarding of new | |
1362 | contributors</a> (the others were <a | |
1363 | href="https://phabricator.kde.org/T6831">improving | |
1364 | usability</a> and <a href="https://phabricator.kde.org/T7050">privacy</a>). | |
1365 | During <a href="https://akademy.kde.org/">Akademy</a>, the yearly KDE | |
1366 | conference | |
1367 | that was held in Vienna in August, Neofytos Kolokotronis shared the status | |
1368 | of the | |
1369 | onboarding goal, the work done during the last year, and further plans. | |
1370 | While it is a complicated process in a project as big and diverse as KDE, | |
1371 | numerous improvements have been already made.</p> | |
1372 | ||
1373 | <p>Two of the three KDE community goals were proposed by relative | |
1374 | newcomers. Kolokotronis was one of those, having joined the <a | |
1375 | href="https://community.kde.org/Promo">KDE Promo team</a> | |
1376 | not long before proposing | |
1377 | the focus on onboarding. He had previously been involved with <a | |
1378 | href="https://www.chakralinux.org/">Chakra | |
1379 | Linux</a>, a distribution based on KDE software. The fact that new | |
1380 | members of the community proposed strategic goals was also noted in the <a | |
1381 | href="https://conf.kde.org/en/Akademy2018/public/events/79">Sunday keynote | |
1382 | by Claudia Garad</a>.</p> | |
1383 | ||
1384 | <p>Proper onboarding adds excitement to the contribution process and | |
1385 | increases retention, he explained. When we look at <a | |
1386 | href="https://en.wikipedia.org/wiki/Onboarding">the definition of | |
1387 | onboarding</a>, | |
1388 | it is a process in which the new contributors acquire knowledge, skills, and | |
1389 | behaviors so that they can contribute effectively. Kolokotronis proposed | |
1390 | to see it also as socialization: integration into the project's relationships, | |
1391 | culture, structure, and procedures.</p> | |
1392 | ||
1393 | <p>The gains from proper onboarding are many. The project can grow by | |
1394 | attracting new blood with new perspectives and solutions. The community | |
1395 | maintains its health and stays vibrant. Another important advantage of | |
1396 | efficient onboarding is that replacing current contributors becomes easier | |
1397 | when they change interests, jobs, or leave the project for whatever reason. | |
1398 | Finally, successful onboarding adds new advocates to the project.</p> | |
1399 | ||
1400 | <h4>Achievements so far and future plans</h4> | |
1401 | ||
1402 | <p>The team started with ideas for a centralized onboarding process for the | |
1403 | whole of KDE. They found out quickly that this would not work because KDE | |
1404 | is "very decentralized", so it is hard to provide tools and | |
1405 | procedures that are going to work for the whole project. According to | |
1406 | Kolokotronis, other characteristics of KDE that impact onboarding are high | |
1407 | diversity, remote and online teams, and hundreds of contributors in dozens of | |
1408 | projects and teams. In addition, new contributors already know in which | |
1409 | area they want to take part and they prefer specific information that will | |
1410 | be directly useful for them.</p> | |
1411 | ||
1412 | <p>So the team changed its approach; several changes have since been proposed | |
1413 | and implemented. The <a href="https://community.kde.org/Get_Involved">Get | |
1414 | Involved</a> page, which is expected to be one of the resources new | |
1415 | contributors read first, has been rewritten. For the <a | |
1416 | href="https://community.kde.org/KDE/Junior_Jobs">Junior Jobs page</a>, the | |
1417 | team is | |
1418 | ||
1419 | <a href="/Articles/763189/"><img | |
1420 | src="https://static.lwn.net/images/conf/2018/akademy/NeofytosKolokotronis-sm.jpg" alt="[Neofytos | |
1421 | Kolokotronis]" title="Neofytos Kolokotronis" class="rthumb"></a> | |
1422 | ||
1423 | ||
1424 | <a | |
1425 | href="https://phabricator.kde.org/T8686">discussing</a> what the | |
1426 | generic content for KDE as a whole should be. The team simplified <a | |
1427 | href="https://phabricator.kde.org/T7646">Phabricator registration</a>, | |
1428 | which | |
1429 | resulted in documenting the process better. Another part of the work | |
1430 | includes the <a href="https://bugs.kde.org/">KDE Bugzilla</a>; it includes, | |
1431 | for example initiatives to limit the number of | |
1432 | states of a ticket or remove obsolete products.</p> | |
1433 | ||
1434 | <p>The <a href="https://www.plasma-mobile.org/index.html">Plasma Mobile</a> | |
1435 | team is heavily involved in the onboarding goal. The Plasma Mobile | |
1436 | developers have simplified their | |
1437 | development environment setup and created an <a | |
1438 | href="https://www.plasma-mobile.org/findyourway">interactive "Get | |
1439 | Involved"</a> page. In addition, the Plasma team changed the way task | |
1440 | descriptions are written; they now contain more detail, so that it is | |
1441 | easier to get | |
1442 | involved. The basic description should be short and clear, and it should include | |
1443 | details of the problem and possible solutions. The developers try to | |
1444 | share the list of skills necessary to fulfill the tasks and include clear | |
1445 | links to the technical resources needed.</p> | |
1446 | ||
1447 | <p>Kolokotronis and team also identified a new potential source of | |
1448 | contributors for KDE: distributions using | |
1449 | KDE. They have the advantage of already knowing and using the software. | |
1450 | ||
1451 | The next idea the team is working on is to make sure that setting up a | |
1452 | development environment is easy. The team plans to work on this during a | |
1453 | dedicated sprint this autumn.</p> | |
1454 | ||
1455 | <h4>Searching for new contributors</h4> | |
1456 | ||
1457 | <p>Kolokotronis plans to search for new contributors at the periphery of the | |
1458 | project, among the "skilled enthusiasts": loyal users who actually care | |
1459 | about the project. They "can make wonders", he said. Those | |
1460 | individuals may be also less confident or shy, have troubles making the | |
1461 | first step, and need guidance. The project leaders should take that into | |
1462 | account.</p> | |
1463 | ||
1464 | <p>In addition, newcomers are all different. Kolokotronis | |
1465 | provided a long list of how contributors differ, | |
1466 | including skills and knowledge, motives and | |
1467 | interests, and time and dedication. His advice is to "try to find their | |
1468 | superpower", the skills they have that are missing in the team. Those | |
1469 | "superpowers" can then be used for the benefit of the project.</p> | |
1470 | ||
1471 | <p>If a project does nothing else, he said, it can start with its documentation. | |
1472 | However, this does not only mean code documentation. Writing down the | |
1473 | procedures or information about the internal work of the project, like who | |
1474 | is working on what, is an important part of a project's documentation and helps | |
1475 | newcomers. There should be also guidelines on how to start, especially | |
1476 | setting up the development environment.</p> | |
1477 | ||
1478 | <p>The first thing the project leaders should do, according to | |
1479 | Kolokotronis, is to spend time on introducing newcomers to the project. | |
1480 | Ideally every new contributor should be assigned mentors — more | |
1481 | experienced members who can help them when needed. The mentors and project | |
1482 | leaders should find tasks that are interesting for each person. Answering | |
1483 | an audience question on suggestions for shy new | |
1484 | contributors, he recommended even more mentoring. It is also very helpful | |
1485 | to make sure that newcomers have enough to read, but "avoid RTFM", he highlighted. It | |
1486 | is also easy for a new contributor "to fly away", he said. The solution is | |
1487 | to keep requesting things and be proactive.</p> | |
1488 | ||
1489 | <h4>What the project can do?</h4> | |
1490 | ||
1491 | <p>Kolokotronis suggested a number of actions for a project when it wants to | |
1492 | improve its onboarding. The first step is preparation: the project | |
1493 | leaders should know the team's and the project's needs. Long-term | |
1494 | planning is important, too. It is not enough to wait for contributors to | |
1495 | come — the project should be proactive, which means reaching out to | |
1496 | candidates, suggesting appropriate tasks and, finally, making people | |
1497 | available for the newcomers if they need help.</p> | |
1498 | ||
1499 | <p>This leads to next step: to be a mentor. Kolokotronis suggests being a | |
1500 | "great host", but also trying to phase out the dependency on the mentor | |
1501 | rapidly. "We have | |
1502 | been all newcomers", he said. It can be intimidating to join an existing | |
1503 | group. Onboarding creates a sense of belonging which, in turn, increases | |
1504 | retention.</p> | |
1505 | ||
1506 | <p>The last step proposed was to be strategic. This includes thinking about | |
1507 | the emotions you want newcomers to feel. Kolokotronis explained the | |
1508 | strategic part with an example. The overall goal is (surprise!) improve | |
1509 | onboarding | |
1510 | of new contributors. An intermediate objective might be to keep the | |
1511 | newcomers after they have made their first commit. If your strategy is to keep them | |
1512 | confident and proud, you can use different tactics like praise and | |
1513 | acknowledgment of the work in public. Another useful tactic may be assigning | |
1514 | simple tasks, according to the skill of the contributor.</p> | |
1515 | ||
1516 | <p>To summarize, the most important thing, according to Kolokotronis, is to | |
1517 | respond quickly and spend time with new contributors. This time should be | |
1518 | used to explain procedures, and to introduce the people and culture. It is also | |
1519 | essential to guide first contributions and praise contributor's skill and | |
1520 | effort. | |
1521 | Increase the difficulty of tasks over time to keep contributors motivated and | |
1522 | challenged. And finally, he said, | |
1523 | "turn them into mentors".</p> | |
1524 | ||
1525 | <p>Kolokotronis acknowledges that onboarding "takes time" and "everyone | |
1526 | complains" about it. However, he is convinced that it is beneficial in the | |
1527 | long term | |
1528 | and that it decreases developer turnover.</p> | |
1529 | ||
1530 | <h4>Advice to newcomers</h4> | |
1531 | ||
1532 | <p>Kolokotronis concluded with some suggestions for newcomers to a | |
1533 | project. They should try | |
1534 | to be persistent and to not get discouraged when something goes wrong. | |
1535 | Building connections from the very beginning is helpful. He suggests | |
1536 | asking questions as if you were already a member "and things will be fine". | |
1537 | However, accept criticism if it happens.</p> | |
1538 | ||
1539 | <p>One of the next actions of the onboarding team will be to collect | |
1540 | feedback from newcomers and experienced contributors to see if they agree | |
1541 | on the ideas and processes introduced so far.</p> | |
1542 | <p><a href="/Articles/763175/#Comments">Comments (none posted)</a> | |
1543 | <p> | |
1544 | <a name="763492"></a><h2 class="SummaryHL"><a href="/Articles/763492/">Sharing and archiving data sets with Dat</a></h2> | |
1545 | ||
1546 | <div class="GAByline"> | |
1547 | <p>August 27, 2018</p> | |
1548 | <p>This article was contributed by Antoine Beaupré</p> | |
1549 | </div> | |
1550 | <p><a href="https://datproject.org">Dat</a> is a new peer-to-peer protocol | |
1551 | that uses some of the concepts of | |
1552 | <a href="https://www.bittorrent.com/">BitTorrent</a> and Git. Dat primarily | |
1553 | targets researchers and | |
1554 | open-data activists as it is a great tool for sharing, archiving, and | |
1555 | cataloging large data sets. But it can also be used to implement | |
1556 | decentralized web applications in a novel way.</p> | |
1557 | ||
1558 | <h4>Dat quick primer</h4> | |
1559 | ||
1560 | <p>Dat is written in JavaScript, so it can be installed with <code>npm</code>, but | |
1561 | there are <a href="https://github.com/datproject/dat/releases">standalone | |
1562 | binary builds</a> and | |
1563 | a <a href="https://docs.datproject.org/install">desktop application</a> (as an AppImage). An <a href="https://datbase.org/">online viewer</a> can | |
1564 | be used to inspect data for those who do not want to install | |
1565 | arbitrary binaries on their computers.</p> | |
1566 | ||
1567 | <p>The command-line application allows basic operations like downloading | |
1568 | existing data sets and sharing your own. | |
1569 | Dat uses a 32-byte hex string that is an <a | |
1570 | href="https://ed25519.cr.yp.to/">ed25519 public key</a>, which is | |
1571 | is used to discover and find content on the net. | |
1572 | For example, this will | |
1573 | download some sample data:</p> | |
1574 | ||
1575 | <pre> | |
1576 | $ dat clone \ | |
1577 | dat://778f8d955175c92e4ced5e4f5563f69bfec0c86cc6f670352c457943666fe639 \ | |
1578 | ~/Downloads/dat-demo | |
1579 | </pre> | |
1580 | ||
1581 | <p>Similarly, the <code>share</code> command is used to share content. It indexes | |
1582 | the files in a given directory and creates a new unique address like | |
1583 | the one above. The <code>share</code> | |
1584 | command starts a server that uses multiple discovery mechanisms (currently, the <a href="https://en.wikipedia.org/wiki/Mainline_DHT">Mainline Distributed | |
1585 | Hash Table</a> (DHT), a <a href="https://github.com/mafintosh/dns-discovery">custom DNS server</a>, and | |
1586 | multicast DNS) to announce the content to its peers. This is how | |
1587 | another user, armed with that public key, can download that content | |
1588 | with <code>dat clone</code> or mirror the files continuously with | |
1589 | <code>dat sync</code>.</p> | |
1590 | ||
1591 | <p>So far, this looks a lot like BitTorrent <a href="https://en.wikipedia.org/wiki/Magnet_URI_scheme">magnet links</a> updated | |
1592 | with 21st century cryptography. But Dat adds revisions on top of that, | |
1593 | so modifications are automatically shared through the swarm. That is | |
1594 | important for public data sets as those | |
1595 | are often dynamic in nature. Revisions also make it possible to use | |
1596 | <a href="https://blog.datproject.org/2017/10/13/using-dat-for-automatic-file-backups/">Dat as a backup system</a> by saving the data incrementally using an | |
1597 | <a href="https://github.com/mafintosh/hypercore-archiver">archiver</a>.</p> | |
1598 | ||
1599 | <p>While Dat is designed to work on larger data sets, processing them | |
1600 | for sharing may take a while. For example, sharing the Linux | |
1601 | kernel source code required about five minutes as Dat worked on | |
1602 | indexing all of the files. This is comparable to the performance offered by | |
1603 | <a href="https://ipfs.io/">IPFS</a> and BitTorrent. Data sets with | |
1604 | more or larger files may take quite a bit more time. | |
1605 | ||
1606 | <p> | |
1607 | One advantage that Dat has over IPFS is that it | |
1608 | doesn't duplicate the data. When IPFS imports new data, it duplicates | |
1609 | the files into <code>~/.ipfs</code>. For collections of small files like the | |
1610 | kernel, this is not a huge problem, but for larger files like videos or | |
1611 | music, it's a significant limitation. IPFS eventually implemented a | |
1612 | solution to this <a href="https://github.com/ipfs/go-ipfs/issues/875">problem</a> in the form of the experimental | |
1613 | <a href="https://github.com/ipfs/go-ipfs/blob/master/docs/experimental-features.md#ipfs-filestore">filestore feature</a>, but it's not enabled by default. Even with | |
1614 | that feature enabled, though, changes to data sets are not automatically | |
1615 | tracked. In comparison, Dat operation on dynamic data feels much | |
1616 | lighter. The downside is that each set needs its own <code>dat share</code> | |
1617 | process.</p> | |
1618 | ||
1619 | <p>Like any peer-to-peer system, Dat needs at least one peer to stay online to | |
1620 | offer the content, which is impractical for mobile devices. Hosting | |
1621 | providers like <a href="https://hashbase.io/">Hashbase</a> (which is a <a href="https://github.com/datprotocol/DEPs/blob/master/proposals/0003-http-pinning-service-api.md">pinning service</a> in Dat | |
1622 | jargon) can help users keep content online without running their own | |
1623 | <a href="https://docs.datproject.org/server">server</a>. The closest parallel in the traditional web ecosystem | |
1624 | would probably be content distribution networks (CDN) although pinning | |
1625 | services are not necessarily geographically distributed and a CDN does | |
1626 | not necessarily retain a complete copy of a website.</p> | |
1627 | ||
1628 | <a href="/Articles/763544/"> | |
1629 | <img src="https://static.lwn.net/images/2018/dat-photoapp-sm.png" border=0 hspace=5 align="right" | |
1630 | width=300 height=392 alt="[Photo app]" title="Photo app"> | |
1631 | </a> | |
1632 | ||
1633 | <p>A web browser called <a href="https://beakerbrowser.com/">Beaker</a>, based on the <a href="https://electronjs.org/">Electron</a> framework, | |
1634 | can access Dat content natively without going through a pinning | |
1635 | service. Furthermore, Beaker is essential to get any of the <a | |
1636 | href="https://github.com/beakerbrowser/explore">Dat | |
1637 | applications</a> working, as they fundamentally rely on <code>dat://</code> URLs | |
1638 | to do their magic. This means that Dat applications won't work for | |
1639 | most users unless they install that special web browser. There is a | |
1640 | <a href="https://addons.mozilla.org/en-US/firefox/addon/dat-p2p-protocol/">Firefox extension</a> called "<a href="https://github.com/sammacbeth/dat-fox">dat-fox</a>" for people who don't want | |
1641 | to install yet another browser, but it requires installing a | |
1642 | <a href="https://github.com/sammacbeth/dat-fox-helper">helper program</a>. The extension will be able to load <code>dat://</code> URLs | |
1643 | but many applications will still not work. For example, the <a | |
1644 | href="https://github.com/beakerbrowser/dat-photos-app">photo gallery | |
1645 | application</a> completely fails with dat-fox.</p> | |
1646 | ||
1647 | <p>Dat-based applications look promising from a privacy point of view. | |
1648 | Because of its peer-to-peer nature, users regain control over where | |
1649 | their data is stored: either on their own computer, an online server, or | |
1650 | by a trusted third party. But considering the protocol is not well | |
1651 | established in current web browsers, I foresee difficulties in | |
1652 | adoption of that aspect of the Dat ecosystem. Beyond that, it is rather | |
1653 | disappointing that Dat applications cannot run natively in a web | |
1654 | browser given that JavaScript is designed exactly for that.</p> | |
1655 | ||
1656 | <h4>Dat privacy</h4> | |
1657 | ||
1658 | <p>An advantage Dat has over other peer-to-peer protocols like BitTorrent | |
1659 | is end-to-end encryption. I was originally concerned by the encryption | |
1660 | design when reading the <a | |
1661 | href="https://github.com/datproject/docs/raw/master/papers/dat-paper.pdf">academic | |
1662 | paper [PDF]</a>:</p> | |
1663 | ||
1664 | <div class="BigQuote"> | |
1665 | <p>It is up to client programs to make design decisions around which | |
1666 | discovery networks they trust. For example if a Dat client decides | |
1667 | to use the BitTorrent DHT to discover peers, and | |
1668 | they are searching | |
1669 | for a publicly shared Dat key (e.g. a key cited publicly in a | |
1670 | published scientific paper) with known contents, then because of the | |
1671 | privacy design of the BitTorrent DHT it becomes public knowledge | |
1672 | what key that client is searching for.</p> | |
1673 | </div> | |
1674 | ||
1675 | <p>So in other words, to share a secret file with another user, the | |
1676 | public key is transmitted over a secure side-channel, only to then | |
1677 | leak during the discovery process. Fortunately, the public Dat key | |
1678 | is not directly used during discovery as it is <a | |
1679 | href="https://github.com/datprotocol/DEPs/blob/653e0cf40233b5d474cddc04235577d9d55b2934/proposals/0000-peer-discovery.md#discovery-keys">hashed | |
1680 | with BLAKE2B</a>. Still, the security model of Dat assumes the public | |
1681 | key is private, which is a rather counterintuitive concept that might upset | |
1682 | cryptographers and confuse users who are frequently encouraged to type | |
1683 | such strings in address bars and search engines as part of the Dat | |
1684 | experience. There is a <a | |
1685 | href="https://docs.datproject.org/security">security & privacy FAQ</a> | |
1686 | in the Dat | |
1687 | documentation warning about this problem:</p> | |
1688 | ||
1689 | <div class="BigQuote"> | |
1690 | <p>One of the key elements of Dat privacy is that the public key is | |
1691 | never used in any discovery network. The public key is hashed, | |
1692 | creating the discovery key. Whenever peers attempt to connect to | |
1693 | each other, they use the discovery key.</p> | |
1694 | ||
1695 | <p>Data is encrypted using the public key, so it is important that this | |
1696 | key stays secure.</p> | |
1697 | </div> | |
1698 | ||
1699 | <p>There are other privacy issues outlined in the | |
1700 | document; it states that "<span>Dat faces similar privacy risks as | |
1701 | BitTorrent</span>":</p> | |
1702 | ||
1703 | <div class="BigQuote"> | |
1704 | <p>When you download a dataset, your IP address is exposed to the users | |
1705 | sharing that dataset. This may lead to honeypot servers collecting | |
1706 | IP addresses, as we've seen in Bittorrent. However, with dataset | |
1707 | sharing we can create a web of trust model where specific | |
1708 | institutions are trusted as primary sources for datasets, | |
1709 | diminishing the sharing of IP addresses.</p> | |
1710 | </div> | |
1711 | ||
1712 | <p>A Dat blog post refers to this issue as <a href="https://blog.datproject.org/2016/12/12/reader-privacy-on-the-p2p-web/">reader privacy</a> and it | |
1713 | is, indeed, a sensitive issue in peer-to-peer networks. It is how | |
1714 | BitTorrent users are discovered and served scary verbiage from lawyers, | |
1715 | after all. But Dat makes this a little better because, to join a swarm, | |
1716 | you must know what you are looking for already, which means peers who | |
1717 | can look at swarm activity only include users who know the secret | |
1718 | public key. This works well for secret content, but for larger, public | |
1719 | data sets, it is a real problem; it is why the Dat project has <a | |
1720 | href="https://blog.datproject.org/2017/12/10/dont-ship/">avoided | |
1721 | creating a Wikipedia mirror</a> so far.</p> | |
1722 | ||
1723 | <p>I found another privacy issue that is not documented in the security FAQ | |
1724 | during my review of the protocol. As mentioned earlier, | |
1725 | the <a href="https://github.com/datprotocol/DEPs/pull/7">Dat discovery | |
1726 | protocol</a> routinely | |
1727 | phones home to DNS servers operated by the Dat project. | |
1728 | This implies that the default discovery servers (and an | |
1729 | attacker watching over their traffic) know who is publishing or seeking | |
1730 | content, in essence discovering the "social network" behind Dat. This | |
1731 | discovery mechanism can be disabled in clients, but a similar privacy | |
1732 | issue applies to the DHT as well, although that is distributed so it | |
1733 | doesn't require trust of the Dat project itself.</p> | |
1734 | ||
1735 | <p>Considering those aspects of the protocol, privacy-conscious users | |
1736 | will probably want to use Tor or other anonymization techniques to | |
1737 | work around those concerns.</p> | |
1738 | ||
1739 | <h4>The future of Dat</h4> | |
1740 | ||
1741 | <p><a href="https://blog.datproject.org/2017/06/01/dat-sleep-release/">Dat 2.0 was released in June 2017</a> with performance improvements and | |
1742 | protocol changes. <a href="https://github.com/datprotocol/DEPs">Dat | |
1743 | Enhancement Proposals</a> (DEPs) guide the project's | |
1744 | future development; most work is currently geared toward | |
1745 | implementing the draft "<a href="https://github.com/datprotocol/DEPs/blob/master/proposals/0008-multiwriter.md">multi-writer proposal</a>" in | |
1746 | <a href="https://github.com/mafintosh/hyperdb">HyperDB</a>. Without | |
1747 | multi-writer support, only the | |
1748 | original publisher of a Dat can modify it. According to Joe Hand, | |
1749 | co-executive-director of <a href="https://codeforscience.org/">Code for Science & Society</a> (CSS) and | |
1750 | Dat core developer, in an IRC chat, "supporting multiwriter is a big requirement for lots | |
1751 | of folks". For example, while Dat might allow Alice to share her | |
1752 | research results with Bob, he cannot modify or contribute back to those | |
1753 | results. The multi-writer extension allows for Alice to assign trust | |
1754 | to Bob so he can have write access to the data. | |
1755 | ||
1756 | <p> | |
1757 | Unfortunately, the | |
1758 | current proposal doesn't solve the "<span>hard problems</span>" of | |
1759 | "<span>conflict merges | |
1760 | and secure key distribution</span>". The former will be worked out through | |
1761 | user interface tweaks, but the latter is a classic problem that security | |
1762 | projects have typically trouble finding | |
1763 | solutions for—Dat is no exception. How will Alice securely trust | |
1764 | Bob? The OpenPGP web of trust? Hexadecimal fingerprints read over the | |
1765 | phone? Dat doesn't provide a magic solution to this problem.</p> | |
1766 | ||
1767 | <p>Another thing limiting adoption is that Dat is not packaged in any | |
1768 | distribution that I could find (although I <a href="https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=890565">requested it in | |
1769 | Debian</a>) and, considering the speed of change of the JavaScript | |
1770 | ecosystem, this is unlikely to change any time soon. A <a | |
1771 | href="https://github.com/datrs">Rust | |
1772 | implementation</a> of the Dat protocol has started, however, which | |
1773 | might be easier to package than the multitude of <a href="https://nodejs.org/en/">Node.js</a> | |
1774 | modules. In terms of mobile device support, there is an experimental | |
1775 | Android web browser with Dat support called <a href="https://bunsenbrowser.github.io/#!index.md">Bunsen</a>, which somehow | |
1776 | doesn't run on my phone. Some adventurous users have successfully run Dat | |
1777 | in <a href="https://termux.com/">Termux</a>. I haven't found an app | |
1778 | running on iOS at this | |
1779 | point.</p> | |
1780 | ||
1781 | <p>Even beyond platform support, distributed protocols like Dat have a | |
1782 | tough slope to climb against the virtual monopoly of more centralized | |
1783 | protocols, so it remains to be seen how popular those tools will | |
1784 | be. Hand says Dat is supported by multiple non-profit | |
1785 | organizations. Beyond CSS, <a href="https://bluelinklabs.com/">Blue Link Labs</a> is working on the | |
1786 | Beaker Browser as a self-funded startup and a grass-roots | |
1787 | organization, <a href="https://www.digital-democracy.org/">Digital Democracy</a>, has contributed to the | |
1788 | project. The <a href="https://archive.org">Internet Archive</a> has <a href="https://blog.archive.org/2018/06/05/internet-archive-code-for-science-and-society-and-california-digital-library-to-partner-on-a-data-sharing-and-preservation-pilot-project/">announced a collaboration</a> | |
1789 | between itself, CSS, and the California Digital Library to launch a pilot | |
1790 | project to see "<span>how members of a cooperative, decentralized | |
1791 | network can leverage shared services to ensure data preservation while | |
1792 | reducing storage costs and increasing replication counts</span>". | |
1793 | ||
1794 | <p> | |
1795 | Hand said | |
1796 | adoption in academia has been "slow but steady" and that the <a href="https://github.com/codeforscience/Dat-in-the-Lab">Dat in | |
1797 | the Lab project</a> has helped identify areas that could help | |
1798 | researchers adopt the project. Unfortunately, as is the case with many | |
1799 | free-software projects, he said that "our team is definitely a bit | |
1800 | limited on bandwidth to push for bigger adoption". Hand | |
1801 | said that the project received a grant from <a | |
1802 | href="https://www.mozilla.org/en-US/moss/">Mozilla Open Source | |
1803 | Support</a> to improve its documentation, which will be a big help.</p> | |
1804 | ||
1805 | <p>Ultimately, Dat suffers from a problem common to all peer-to-peer | |
1806 | applications, which is naming. Dat addresses are not exactly | |
1807 | intuitive: humans do not remember strings of 64 hexadecimal characters | |
1808 | well. For this, Dat took a <a href="https://github.com/datprotocol/DEPs/blob/master/proposals/0005-dns.md">similar approach</a> to IPFS by using | |
1809 | DNS <tt>TXT</tt> records and <code>/.well-known</code> URL paths to bridge existing, | |
1810 | human-readable names with Dat hashes. So this sacrifices a part of the | |
1811 | decentralized nature of the project in favor of usability.</p> | |
1812 | ||
1813 | <p>I have tested a lot of distributed protocols like Dat in the past and | |
1814 | I am not sure Dat is a clear winner. It certainly has advantages over | |
1815 | IPFS in terms of usability and resource usage, but the lack of | |
1816 | packages on most platforms is a big limit to adoption for most | |
1817 | people. This means it will be difficult to share content with my | |
1818 | friends and family with Dat anytime soon, which would probably be my | |
1819 | primary use case for the project. Until the protocol reaches the wider | |
1820 | adoption that BitTorrent has seen in terms of platform support, I will | |
1821 | probably wait before switching everything over to this | |
1822 | promising project.</p> | |
1823 | <p><a href="/Articles/763492/#Comments">Comments (11 posted)</a> | |
1824 | <p> | |
1825 | <p> | |
1826 | <b>Page editor</b>: Jonathan Corbet<br> | |
1827 | <h2>Inside this week's LWN.net Weekly Edition</h2> | |
1828 | <ul> | |
1829 | <li> <a href="/Articles/763254/">Briefs</a>: OpenSSH 7.8; 4.19-rc1; Which stable?; Netdev 0x12; Bison 3.1; Quotes; ... | |
1830 | <li> <a href="/Articles/763255/">Announcements</a>: Newsletters; events; security updates; kernel patches; ... | |
1831 | </ul> | |
1832 | <b>Next page</b>: | |
1833 | <a href="/Articles/763254/">Brief items>></a><br> | |
1834 | ||
1835 | </div> <!-- ArticleText --> | |
1836 | </div> | |
1837 | <div class="lwn-u-1 pure-u-md-1-6 not-print"> | |
1838 | <div id="azk93271_right_zone"></div> | |
1839 | </div> | |
1840 | </div> <!-- pure-grid --> | |
1841 | ||
1842 | <br clear="all"> | |
1843 | <center> | |
1844 | <P> | |
1845 | <font size="-2"> | |
1846 | Copyright © 2018, Eklektix, Inc.<BR> | |
1847 | ||
1848 | Comments and public postings are copyrighted by their creators.<br> | |
1849 | Linux is a registered trademark of Linus Torvalds<br> | |
1850 | </font> | |
1851 | </center> | |
1852 | ||
1853 | <script type="text/javascript"> | |
1854 | var gaJsHost = (("https:" == document.location.protocol) ? "https://ssl." : "http://www."); | |
1855 | document.write(unescape("%3Cscript src='" + gaJsHost + "google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E")); | |
1856 | </script> | |
1857 | <script type="text/javascript"> | |
1858 | try { | |
1859 | var pageTracker = _gat._getTracker("UA-2039382-1"); | |
1860 | pageTracker._trackPageview(); | |
1861 | } catch(err) {}</script> | |
1862 | ||
1863 | </body></html> | |
1864 |