Fix new tests and make TestLWN work

author Niki Roo <niki@nikiroo.be>

Sun, 23 Sep 2018 04:04:04 +0000 (06:04 +0200)

committer Niki Roo <niki@nikiroo.be>

Sun, 23 Sep 2018 04:04:04 +0000 (06:04 +0200)
author Niki Roo <niki@nikiroo.be>
Sun, 23 Sep 2018 04:04:04 +0000 (06:04 +0200)
committer Niki Roo <niki@nikiroo.be>
Sun, 23 Sep 2018 04:04:04 +0000 (06:04 +0200)
diff --git a/changelog.md b/changelog.md

index 730c6fd44721ccdf0c88a6ea047ea75f697bbbc9..38e5285471881adc79dfd650475daa9169100acf 100644 (file)
--- a/changelog.md
+++ b/changelog.md
@@ -7,6 +7,7 @@
  - Fix html output for LWN (do not allow raw text!)
  - Allow <pre> in all supports
  - Lot of other small fixes
  - Fix html output for LWN (do not allow raw text!)
  - Allow <pre> in all supports
  - Lot of other small fixes
+- New tests based upon static "example" content (not dynamic)
  
  ## Version 1.2.0
  
  
  ## Version 1.2.0
  
diff --git a/libs/nikiroo-utils-4.4.2-sources.jar b/libs/nikiroo-utils-4.4.3-sources.jar

similarity index 98%

rename from libs/nikiroo-utils-4.4.2-sources.jar

rename to libs/nikiroo-utils-4.4.3-sources.jar

index a2869e605140a1bc1f7da7108456ecec03a024fc..3385280388d78ebc6af7382814a586f341adc37d 100644 (file)

Binary files a/libs/nikiroo-utils-4.4.2-sources.jar and b/libs/nikiroo-utils-4.4.3-sources.jar differ
diff --git a/src/be/nikiroo/gofetch/test/TestBase.java b/src/be/nikiroo/gofetch/test/TestBase.java

index e8bfde089785b1ce73408430aa5c4a850320f169..1da6b48a6a2f7451bb13425bb4a9d4125665f1bc 100644 (file)
--- a/src/be/nikiroo/gofetch/test/TestBase.java
+++ b/src/be/nikiroo/gofetch/test/TestBase.java
@@ -19,6 +19,13 @@ import be.nikiroo.utils.test.TestLauncher;
  
  /**
   * Base class for {@link BasicSupport}s testing.
  
  /**
   * Base class for {@link BasicSupport}s testing.
+ * <p>
+ * It will use the paths:
+ * <ul>
+ * <li><tt>test/XXX/source</tt>: the html source files</li>
+ * <li><tt>test/XXX/expected</tt>: the expected output</li>
+ * <li><tt>test/XXX/actual</tt>: the actual output of the last test</li>
+ * </ul>
   * 
   * @author niki
   */
   * 
   * @author niki
   */
@@ -28,15 +35,16 @@ abstract class TestBase extends TestLauncher {
                 addTest(support);
         }
  
                 addTest(support);
         }
  
-       static protected InputStream doOpen(Map<URL, File> map, URL url)
-                       throws IOException {
+       static protected InputStream doOpen(BasicSupport support,
+                       Map<URL, File> map, URL url) throws IOException {
                 File file = map.get(url);
                 if (file == null) {
                         throw new FileNotFoundException("Test file not found for URL: "
                                         + url);
                 }
  
                 File file = map.get(url);
                 if (file == null) {
                         throw new FileNotFoundException("Test file not found for URL: "
                                         + url);
                 }
  
-               return new FileInputStream(file);
+               return new FileInputStream("test/source/" + support.getType() + "/"
+                               + file);
  
         }
  
  
         }
  
@@ -47,10 +55,15 @@ abstract class TestBase extends TestLauncher {
                                 File expected = new File("test/expected/" + support.getType());
                                 File actual = new File("test/result/" + support.getType());
  
                                 File expected = new File("test/expected/" + support.getType());
                                 File actual = new File("test/result/" + support.getType());
  
+                               IOUtils.deltree(actual);
+                               expected.mkdirs();
+                               actual.mkdirs();
+
                                 Output gopher = new Gopher(support.getType(), "", "", 70);
                                 Output html = new Html(support.getType(), "", "", 80);
  
                                 for (Story story : support.list()) {
                                 Output gopher = new Gopher(support.getType(), "", "", 70);
                                 Output html = new Html(support.getType(), "", "", 80);
  
                                 for (Story story : support.list()) {
+                                       support.fetch(story);
                                         IOUtils.writeSmallFile(new File(actual, story.getId()
                                                         + ".header"), gopher.exportHeader(story));
                                         IOUtils.writeSmallFile(
                                         IOUtils.writeSmallFile(new File(actual, story.getId()
                                                         + ".header"), gopher.exportHeader(story));
                                         IOUtils.writeSmallFile(
diff --git a/src/be/nikiroo/gofetch/test/TestLWN.java b/src/be/nikiroo/gofetch/test/TestLWN.java

index 597e761a328161cfdda66becfbb247f5fe96f71e..1025813c36aed7725e189ae6de2b92adfe0a6f77 100644 (file)
--- a/src/be/nikiroo/gofetch/test/TestLWN.java
+++ b/src/be/nikiroo/gofetch/test/TestLWN.java
@@ -15,7 +15,32 @@ public class TestLWN extends TestBase {
  
         static private Map<URL, File> getMap() throws MalformedURLException {
                 Map<URL, File> map = new HashMap<URL, File>();
  
         static private Map<URL, File> getMap() throws MalformedURLException {
                 Map<URL, File> map = new HashMap<URL, File>();
-               map.put(new URL("http://fanfan.be/"), new File("/tmp/none"));
+
+               map.put(new URL("https://lwn.net/"), new File("index.html"));
+
+               map.put(new URL("https://lwn.net/Articles/763252/"), new File(
+                               "Articles/763252.html"));
+               map.put(new URL("https://lwn.net/Articles/763987/"), new File(
+                               "Articles/763987.html"));
+               map.put(new URL("https://lwn.net/Articles/764046"), new File(
+                               "Articles/764046.html"));
+               map.put(new URL("https://lwn.net/Articles/764055"), new File(
+                               "Articles/764055.html"));
+               map.put(new URL("https://lwn.net/Articles/764130"), new File(
+                               "Articles/764130.html"));
+               map.put(new URL("https://lwn.net/Articles/764182"), new File(
+                               "Articles/764182.html"));
+               map.put(new URL("https://lwn.net/Articles/764184/"), new File(
+                               "Articles/764184.html"));
+               map.put(new URL("https://lwn.net/Articles/764202/"), new File(
+                               "Articles/764202.html"));
+               map.put(new URL("https://lwn.net/Articles/764219"), new File(
+                               "Articles/764219.html"));
+               map.put(new URL("https://lwn.net/Articles/764300"), new File(
+                               "Articles/764300.html"));
+               map.put(new URL("https://lwn.net/Articles/764321/"), new File(
+                               "Articles/764321.html"));
+
                 return map;
         }
  
                 return map;
         }
  
@@ -23,7 +48,7 @@ public class TestLWN extends TestBase {
                 super(new LWN() {
                         @Override
                         protected InputStream open(URL url) throws IOException {
                 super(new LWN() {
                         @Override
                         protected InputStream open(URL url) throws IOException {
-                               return doOpen(getMap(), url);
+                               return doOpen(this, getMap(), url);
                         }
  
                         @Override
                         }
  
                         @Override
diff --git a/test/expected/LWN/0000763252 b/test/expected/LWN/0000763252

new file mode 100644 (file)

index 0000000..f5a2048
--- /dev/null
+++ b/test/expected/LWN/0000763252
@@ -0,0 +1,1957 @@
+             LWN.NET WEEKLY EDITION FOR AUGUST 30, 2018            \r
+\r
+  \r
+\r
+  o News link: https://lwn.net/Articles/763252/\r
+  o Source link: \r
+\r
+\r
+    [1]Welcome  to  the  LWN.net Weekly Edition for August 30, 2018\r
+    This edition contains the following feature content:\r
+    \r
+    [2]An  introduction  to the Julia language, part 1 : Julia is a\r
+    language  designed  for  intensive numerical calculations; this\r
+    article gives an overview of its core features.\r
+    \r
+    [3]C  considered  dangerous  :  a Linux Security Summit talk on\r
+    what is being done to make the use of C in the kernel safer.\r
+    \r
+    [4]The  second  half  of  the  4.19  merge  window  : the final\r
+    features  merged (or not merged) before the merge window closed\r
+    for this cycle.\r
+    \r
+    [5]Measuring  (and fixing) I/O-controller throughput loss : the\r
+    kernel's   I/O   controllers   can   provide  useful  bandwidth\r
+    guarantees, but at a significant cost in throughput.\r
+    \r
+    [6]KDE's  onboarding initiative, one year later : what has gone\r
+    right  in  KDE's  effort  to make it easier for contributors to\r
+    join the project, and what remains to be done.\r
+    \r
+    [7]Sharing  and  archiving  data  sets with Dat : an innovative\r
+    approach to addressing and sharing data on the net.\r
+    \r
+    This week's edition also includes these inner pages:\r
+    \r
+    [8]Brief   items   :  Brief  news  items  from  throughout  the\r
+    community.\r
+    \r
+    [9]Announcements  : Newsletters, conferences, security updates,\r
+    patches, and more.\r
+    \r
+    Please  enjoy  this  week's  edition, and, as always, thank you\r
+    for supporting LWN.net.\r
+    \r
+    [10]Comments (none posted)\r
+    \r
+    [11]An introduction to the Julia language, part 1\r
+    \r
+    August 28, 2018\r
+    \r
+    This article was contributed by Lee Phillips\r
+    \r
+    [12]Julia  is  a  young  computer language aimed at serving the\r
+    needs  of  scientists,  engineers,  and  other practitioners of\r
+    numerically   intensive  programming.  It  was  first  publicly\r
+    released   in   2012.  After  an  intense  period  of  language\r
+    development,  version 1.0 was [13]released on August 8. The 1.0\r
+    release  promises  years  of  language  stability; users can be\r
+    confident  that  developments  in the 1.x series will not break\r
+    their  code.  This  is  the  first  part  of a two-part article\r
+    introducing  the  world  of  Julia.  This  part  will introduce\r
+    enough  of  the  language syntax and constructs to allow you to\r
+    begin  to write simple programs. The following installment will\r
+    acquaint  you  with the additional pieces needed to create real\r
+    projects, and to make use of Julia's ecosystem.\r
+    \r
+    Goals and history\r
+    \r
+    The  Julia  project  has ambitious goals. It wants the language\r
+    to  perform  about  as  well  as  Fortran  or  C  when  running\r
+    numerical  algorithms,  while  remaining as pleasant to program\r
+    in  as Python. I believe the project has met these goals and is\r
+    poised  to  see  increasing  adoption by numerical researchers,\r
+    especially now that an official, stable release is available.\r
+    \r
+    The  Julia  project  maintains  a [14]micro-benchmark page that\r
+    compares  its  numerical  performance  against  both statically\r
+    compiled   languages   (C,   Fortran)   and  dynamically  typed\r
+    languages  (R,  Python). While it's certainly possible to argue\r
+    about  the relevance and fairness of particular benchmarks, the\r
+    data  overall  supports  the Julia team's contention that Julia\r
+    has   generally   achieved  parity  with  Fortran  and  C;  the\r
+    benchmark source code is available.\r
+    \r
+    Julia  began  as  research  in  computer  science  at  MIT; its\r
+    creators  are  Alan  Edelman,  Stefan Karpinski, Jeff Bezanson,\r
+    and  Viral  Shah.  These  four  remain active developers of the\r
+    language.  They, along with Keno Fischer, co-founder and CTO of\r
+    [15]Julia  Computing , were kind enough to share their thoughts\r
+    with  us  about the language. I'll be drawing on their comments\r
+    later  on;  for now, let's get a taste of what Julia code looks\r
+    like.\r
+    \r
+    Getting started\r
+    \r
+    To   explore   Julia   initially,   start   up   its   standard\r
+    [16]read-eval-print   loop   (REPL)  by  typing  julia  at  the\r
+    terminal,  assuming  that  you have installed it. You will then\r
+    be  able  to  interact with what will seem to be an interpreted\r
+    language  —  but,  behind  the scenes, those commands are being\r
+    compiled  by  a  just-in-time  (JIT)  compiler  that  uses  the\r
+    [17]LLVM   compiler   framework  .  This  allows  Julia  to  be\r
+    interactive,  while  turning the code into fast, native machine\r
+    instructions.   However,  the  JIT  compiler  passes  sometimes\r
+    introduce  noticeable delays at the REPL, especially when using\r
+    a function for the first time.\r
+    \r
+    To  run  a  Julia  program non-interactively, execute a command\r
+    like: $ julia script.jl\r
+    \r
+    Julia  has  all  the  usual data structures: numbers of various\r
+    types     (including    complex    and    rational    numbers),\r
+    multidimensional    arrays,    dictionaries,    strings,    and\r
+    characters.  Functions  are  first-class: they can be passed as\r
+    arguments  to other functions, can be members of arrays, and so\r
+    on.\r
+    \r
+    Julia  embraces  Unicode. Strings, which are enclosed in double\r
+    quotes,  are  arrays  of Unicode characters, which are enclosed\r
+    in  single  quotes.  The  " * " operator is used for string and\r
+    character  concatenation.  Thus 'a' and 'β' are characters, and\r
+    'aβ'  is  a syntax error. "a" and "β" are strings, as are "aβ",\r
+    'a' * 'β', and "a" * "β" — all evaluate to the same string.\r
+    \r
+    Variable  and  function names can contain non-ASCII characters.\r
+    This,   along  with  Julia's  clever  syntax  that  understands\r
+    numbers  prepended  to variables to mean multiplication, goes a\r
+    long  way  to  allowing  the  numerical scientist to write code\r
+    that  more  closely resembles the compact mathematical notation\r
+    of the equations that usually lie behind it.  julia ε₁ = 0.01\r
+    \r
+    0.01\r
+    \r
+    julia ε₂ = 0.02\r
+    \r
+    0.02\r
+    \r
+    julia 2ε₁ + 3ε₂\r
+    \r
+    0.08\r
+    \r
+    And  where  does  Julia come down on the age-old debate of what\r
+    do  about  1/2  ? In Fortran and Python 2, this will get you 0,\r
+    since  1  and 2 are integers, and the result is rounded down to\r
+    the  integer  0. This was deemed inconsistent, and confusing to\r
+    some,  so  it  was changed in Python 3 to return 0.5 — which is\r
+    what you get in Julia, too.\r
+    \r
+    While  we're  on  the  subject  of  fractions, Julia can handle\r
+    rational  numbers,  with  a special syntax: 3//5 + 2//3 returns\r
+    19//15  ,  while  3/5  + 2/3 gets you the floating-point answer\r
+    1.2666666666666666.  Internally,  Julia  thinks  of  a rational\r
+    number  in  its  reduced  form,  so the expression 6//8 == 3//4\r
+    returns true , and numerator(6//8) returns 3 .\r
+    \r
+    Arrays\r
+    \r
+    Arrays  are  enclosed  in  square  brackets and indexed with an\r
+    iterator  that  can  contain a step value:  julia a = [1, 2, 3,\r
+    4, 5, 6]\r
+    \r
+    6-element Array{Int64,1}:\r
+    \r
+    1\r
+    \r
+    2\r
+    \r
+    3\r
+    \r
+    4\r
+    \r
+    5\r
+    \r
+    6\r
+    \r
+    julia a[1:2:end]\r
+    \r
+    3-element Array{Int64,1}:\r
+    \r
+    1\r
+    \r
+    3\r
+    \r
+    5\r
+    \r
+    As  you  can  see,  indexing  starts at one, and the useful end\r
+    index  means  the  obvious thing. When you define a variable in\r
+    the  REPL,  Julia  replies  with  the  type  and  value  of the\r
+    assigned  data;  you  can  suppress  this output by ending your\r
+    input line with a semicolon.\r
+    \r
+    Since  arrays  are  such a vital part of numerical computation,\r
+    and  Julia makes them easy to work with, we'll spend a bit more\r
+    time with them than the other data structures.\r
+    \r
+    To  illustrate  the  syntax,  we  can start with a couple of 2D\r
+    arrays, defined at the REPL:  julia a = [1 2 3; 4 5 6]\r
+    \r
+    2×3 Array{Int64,2}:\r
+    \r
+    1 2 3\r
+    \r
+    4 5 6\r
+    \r
+    julia z = [-1 -2 -3; -4 -5 -6];\r
+    \r
+    Indexing is as expected:  julia a[1, 2]\r
+    \r
+    2\r
+    \r
+    You can glue arrays together horizontally:  julia [a z]\r
+    \r
+    2×6 Array{Int64,2}:\r
+    \r
+    1 2 3 -1 -2 -3\r
+    \r
+    4 5 6 -4 -5 -6\r
+    \r
+    And vertically:  julia [a; z]\r
+    \r
+    4×3 Array{Int64,2}:\r
+    \r
+    1  2  3\r
+    \r
+    4  5  6\r
+    \r
+    -1 -2 -3\r
+    \r
+    -4 -5 -6\r
+    \r
+    Julia  has  all  the  usual  operators for handling arrays, and\r
+    [18]linear  algebra  functions  that  work  with  matrices  (2D\r
+    arrays).  The  linear  algebra  functions  are  part of Julia's\r
+    standard  library,  but need to be imported with a command like\r
+    "  using  LinearAlgebra  ",  which is a detail omitted from the\r
+    current  documentation.  The  functions  include such things as\r
+    determinants,  matrix  inverses,  eigenvalues and eigenvectors,\r
+    many  kinds  of  matrix  factorizations,  etc.  Julia  has  not\r
+    reinvented  the  wheel  here,  but  wisely  uses the [19]LAPACK\r
+    Fortran library of battle-tested linear algebra routines.\r
+    \r
+    The  extension  of  arithmetic  operators  to arrays is usually\r
+    intuitive:  julia a + z\r
+    \r
+    2×3 Array{Int64,2}:\r
+    \r
+    0 0 0\r
+    \r
+    0 0 0\r
+    \r
+    And  the  numerical  prepending  syntax works with arrays, too:\r
+    julia 3a + 4z\r
+    \r
+    2×3 Array{Int64,2}:\r
+    \r
+    -1 -2 -3\r
+    \r
+    -4 -5 -6\r
+    \r
+    Putting  a  multiplication  operator  between two matrices gets\r
+    you matrix multiplication:  julia a * transpose(a)\r
+    \r
+    2×2 Array{Int64,2}:\r
+    \r
+    14 32\r
+    \r
+    32 77\r
+    \r
+    You  can  "broadcast"  numbers  to cover all the elements in an\r
+    array  by prepending the usual arithmetic operators with a dot:\r
+    julia 1 .+ a\r
+    \r
+    2×3 Array{Int64,2}:\r
+    \r
+    2 3 4\r
+    \r
+    5 6 7\r
+    \r
+    Note  that the language only actually requires the dot for some\r
+    operators,  but  not  for  others,  such  as  "*"  and "/". The\r
+    reasons  for this are arcane, and it probably makes sense to be\r
+    consistent  and  use  the dot whenever you intend broadcasting.\r
+    Note   also   that   the   current   version  of  the  official\r
+    documentation  is  incorrect  in claiming that you may omit the\r
+    dot from "+" and "-"; in fact, this now gives an error.\r
+    \r
+    You  can  use  the  dot  notation to turn any function into one\r
+    that   operates   on   each   element   of  an  array:    julia\r
+    round.(sin.([0, π/2, π, 3π/2, 2π]))\r
+    \r
+    5-element Array{Float64,1}:\r
+    \r
+    0.0\r
+    \r
+    1.0\r
+    \r
+    0.0\r
+    \r
+    -1.0\r
+    \r
+    -0.0\r
+    \r
+    The  example  above  illustrates  chaining two dotted functions\r
+    together.  The  Julia compiler turns expressions like this into\r
+    "fused"  operations:  instead of applying each function in turn\r
+    to  create a new array that is passed to the next function, the\r
+    compiler   combines   the  functions  into  a  single  compound\r
+    function  that  is  applied  once  over  the  array, creating a\r
+    significant optimization.\r
+    \r
+    You  can  use  this  dot  notation with any function, including\r
+    your  own, to turn it into a version that operates element-wise\r
+    over arrays.\r
+    \r
+    Dictionaries  (associative  arrays) can be defined with several\r
+    syntaxes. Here's one:  julia d1 = Dict("A"=1, "B"=2)\r
+    \r
+    Dict{String,Int64} with 2 entries:\r
+    \r
+    "B" = 2\r
+    \r
+    "A" = 1\r
+    \r
+    You  may  have  noticed  that the code snippets so far have not\r
+    included  any  type  declarations.  Every  value in Julia has a\r
+    type,  but  the  compiler  will  infer  types  if  they are not\r
+    specified.  It  is generally not necessary to declare types for\r
+    performance,   but  type  declarations  sometimes  serve  other\r
+    purposes,  that  we'll  return  to  later. Julia has a deep and\r
+    sophisticated  type  system,  including  user-defined types and\r
+    C-like  structs. Types can have behaviors associated with them,\r
+    and  can  inherit  behaviors  from  other types. The best thing\r
+    about  Julia's  type system is that you can ignore it entirely,\r
+    use  just  a  few  pieces  of  it,  or spend weeks studying its\r
+    design.\r
+    \r
+    Control flow\r
+    \r
+    Julia  code  is organized in blocks, which can indicate control\r
+    flow,  function  definitions,  and other code units. Blocks are\r
+    terminated  with  the  end  keyword,  and  indentation  is  not\r
+    significant.  Statements  are separated either with newlines or\r
+    semicolons.\r
+    \r
+    Julia  has the typical control flow constructs; here is a while\r
+    block:  julia i = 1;\r
+    \r
+    julia while i 5\r
+    \r
+    print(i)\r
+    \r
+    global i = i + 1\r
+    \r
+    end\r
+    \r
+    1234\r
+    \r
+    Notice  the  global  keyword.  Most blocks in Julia introduce a\r
+    local  scope for variables; without this keyword here, we would\r
+    get an error about an undefined variable.\r
+    \r
+    Julia  has  the  usual if statements and for loops that use the\r
+    same  iterators that we introduced above for array indexing. We\r
+    can  also  iterate  over collections:  julia for i ∈ ['a', 'b',\r
+    'c']\r
+    \r
+    println(i)\r
+    \r
+    end\r
+    \r
+    a\r
+    \r
+    b\r
+    \r
+    c\r
+    \r
+    In  place of the fancy math symbol in this for loop, we can use\r
+    "  =  "  or " in ". If you want to use the math symbol but have\r
+    no  convenient  way  to type it, the REPL will help you: type "\r
+    \in  "  and  the  TAB key, and the symbol appears; you can type\r
+    many [20]LaTeX expressions into the REPL in this way.\r
+    \r
+    Development of Julia\r
+    \r
+    The   language   is   developed   on   GitHub,  with  over  700\r
+    contributors.  The  Julia  team  mentioned in their email to us\r
+    that  the decision to use GitHub has been particularly good for\r
+    Julia,  as  it  streamlined  the  process  for  many  of  their\r
+    contributors,  who  are scientists or domain experts in various\r
+    fields, rather than professional software developers.\r
+    \r
+    The  creators  of  Julia  have  [21]published  [PDF] a detailed\r
+    “mission  statement”  for  the  language, describing their aims\r
+    and  motivations.  A  key issue that they wanted their language\r
+    to  solve  is what they called the "two-language problem." This\r
+    situation  is familiar to anyone who has used Python or another\r
+    dynamic  language on a demanding numerical problem. To get good\r
+    performance,   you  will  wind  up  rewriting  the  numerically\r
+    intensive  parts  of  the program in C or Fortran, dealing with\r
+    the  interface  between  the  two  languages,  and may still be\r
+    disappointed  in  the overhead presented by calling the foreign\r
+    routines from your original code.\r
+    \r
+    For  Python,  [22]NumPy and SciPy wrap many numerical routines,\r
+    written  in Fortran or C, for efficient use from that language,\r
+    but  you  can  only  take advantage of this if your calculation\r
+    fits  the  pattern  of  an  available  routine; in more general\r
+    cases,  where you will have to write a loop over your data, you\r
+    are  stuck with Python's native performance, which is orders of\r
+    magnitude  slower.  If  you  switch  to  an alternative, faster\r
+    implementation  of  Python,  such  as  [23]PyPy , the numerical\r
+    libraries  may  not  be  compatible; NumPy became available for\r
+    PyPy only within about the past year.\r
+    \r
+    Julia  solves  the  two-language problem by being as expressive\r
+    and  simple  to  program  in  as  a dynamic scripting language,\r
+    while  having  the  native  performance  of  a static, compiled\r
+    language.  There  is  no need to write numerical libraries in a\r
+    second  language,  but  C  or  Fortran  library routines can be\r
+    called   using  a  facility  that  Julia  has  built-in.  Other\r
+    languages,  such as [24]Python or [25]R , can also interoperate\r
+    easily with Julia using external packages.\r
+    \r
+    Documentation\r
+    \r
+    There  are  many  resources  to  turn to to learn the language.\r
+    There   is  an  extensive  and  detailed  [26]manual  at  Julia\r
+    headquarters,  and  this may be a good place to start. However,\r
+    although  the first few chapters provide a gentle introduction,\r
+    the  material soon becomes dense and, at times, hard to follow,\r
+    with  references to concepts that are not explained until later\r
+    chapters.  Fortunately,  there  is a [27]"learning" link at the\r
+    top  of  the Julia home page, which takes you to a long list of\r
+    videos,  tutorials,  books,  articles,  and  classes both about\r
+    Julia  and that use Julia in teaching subjects such a numerical\r
+    analysis.  There  is also a fairly good [28]cheat-sheet [PDF] ,\r
+    which was just updated for v. 1.0.\r
+    \r
+    If  you're  coming  from  Python,  [29]this  list of noteworthy\r
+    differences  between  Python  and Julia syntax will probably be\r
+    useful.\r
+    \r
+    Some  of  the  linked  tutorials are in the form of [30]Jupyter\r
+    notebooks  — indeed, the name "Jupyter" is formed from "Julia",\r
+    "Python",  and  "R",  which  are  the  three original languages\r
+    supported  by  the  interface. The [31]Julia kernel for Jupyter\r
+    was  recently upgraded to support v. 1.0. Judicious sampling of\r
+    a  variety  of  documentation  sources,  combined  with liberal\r
+    experimentation,  may be the best way of learning the language.\r
+    Jupyter  makes this experimentation more inviting for those who\r
+    enjoy  the  web-based  interface,  but the REPL that comes with\r
+    Julia  helps  a  great  deal  in  this regard by providing, for\r
+    instance,  TAB  completion and an extensive help system invoked\r
+    by simply pressing the "?" key.\r
+    \r
+    Stay tuned\r
+    \r
+    The  [32]next  installment in this two-part series will explain\r
+    how   Julia  is  organized  around  the  concept  of  "multiple\r
+    dispatch".  You  will  learn  how  to create functions and make\r
+    elementary  use  of  Julia's  type  system.  We'll  see  how to\r
+    install  packages  and  use  modules,  and  how to make graphs.\r
+    Finally,  Part  2  will  briefly survey the important topics of\r
+    macros and distributed computing.\r
+    \r
+    [33]Comments (80 posted)\r
+    \r
+    [34]C considered dangerous\r
+    \r
+    By Jake Edge\r
+    \r
+    August 29, 2018\r
+    \r
+    [35]LSS NA\r
+    \r
+    At  the  North  America  edition of the [36]2018 Linux Security\r
+    Summit  (LSS  NA),  which was held in late August in Vancouver,\r
+    Canada,  Kees  Cook  gave a presentation on some of the dangers\r
+    that  come  with  programs  written  in  C.  In  particular, of\r
+    course,  the  Linux  kernel is mostly written in C, which means\r
+    that  the security of our systems rests on a somewhat dangerous\r
+    foundation.  But there are things that can be done to help firm\r
+    things  up  by  " Making C Less Dangerous " as the title of his\r
+    talk suggested.\r
+    \r
+    He  began  with  a brief summary of the work that he and others\r
+    are  doing  as  part  of the [37]Kernel Self Protection Project\r
+    (KSPP).  The  goal  of the project is to get kernel protections\r
+    merged  into  the  mainline. These protections are not targeted\r
+    at  protecting user-space processes from other (possibly rogue)\r
+    processes,  but  are, instead, focused on protecting the kernel\r
+    from  user-space  code.  There  are around 12 organizations and\r
+    ten  individuals  working  on roughly 20 different technologies\r
+    as  part  of the KSPP, he said. The progress has been "slow and\r
+    steady", he said, which is how he thinks it should go.  [38]\r
+    \r
+    One  of  the  main  problems is that C is treated mostly like a\r
+    fancy  assembler.  The  kernel  developers do this because they\r
+    want  the  kernel to be as fast and as small as possible. There\r
+    are   other   reasons,   too,   such   as   the   need   to  do\r
+    architecture-specific  tasks that lack a C API (e.g. setting up\r
+    page tables, switching to 64-bit mode).\r
+    \r
+    But   there   is   lots   of  undefined  behavior  in  C.  This\r
+    "operational   baggage"   can  lead  to  various  problems.  In\r
+    addition,  C  has a weak standard library with multiple utility\r
+    functions  that  have  various  pitfalls.  In C, the content of\r
+    uninitialized  automatic  variables  is  undefined,  but in the\r
+    machine  code that it gets translated to, the value is whatever\r
+    happened  to  be  in  that  memory  location  before.  In  C, a\r
+    function  pointer can be called even if the type of the pointer\r
+    does  not  match the type of the function being called—assembly\r
+    doesn't care, it just jumps to a location, he said.\r
+    \r
+    The  APIs  in  the standard library are also bad in many cases.\r
+    He  asked:  why is there no argument to memcpy() to specify the\r
+    maximum  destination  length?  He  noted a recent [39]blog post\r
+    from  Raph  Levien  entitled "With Undefined Behavior, Anything\r
+    is  Possible".  That  obviously  resonated  with  Cook,  as  he\r
+    pointed  out  his  T-shirt—with  the title and artwork from the\r
+    post.\r
+    \r
+    Less danger\r
+    \r
+    He  then  moved on to some things that kernel developers can do\r
+    (and  are  doing) to get away from some of the dangers of C. He\r
+    began  with variable-length arrays (VLAs), which can be used to\r
+    overflow  the  stack to access data outside of its region. Even\r
+    if  the  stack  has a guard page, VLAs can be used to jump past\r
+    it  to  write into other memory, which can then be used by some\r
+    other  kind  of  attack. The C language is "perfectly fine with\r
+    this".  It  is  easy  to find uses of VLAs with the -Wvla flag,\r
+    however.\r
+    \r
+    But  it  turns  out  that  VLAs  are  [40]not  just  bad from a\r
+    security   perspective   ,   they   are   also   slow.   In   a\r
+    micro-benchmark  associated with a [41]patch removing a VLA , a\r
+    13%  performance  boost  came from using a fixed-size array. He\r
+    dug  in  a  bit  further and found that much more code is being\r
+    generated  to  handle a VLA, which explains the speed increase.\r
+    Since  Linus  Torvalds  has  [42]declared  that  VLAs should be\r
+    removed  from  the  kernel because they cause security problems\r
+    and also slow the kernel down; Cook said "don't use VLAs".\r
+    \r
+    Another  problem area is switch statements, in particular where\r
+    there  is  no  break  for  a  case  .  That could mean that the\r
+    programmer  expects  and wants to fall through to the next case\r
+    or  it could be that the break was simply forgotten. There is a\r
+    way  to  get a warning from the compiler for fall-throughs, but\r
+    there  needs  to be a way to mark those that are truly meant to\r
+    be  that way. A special fall-through "statement" in the form of\r
+    a   comment   is   what   has   been   agreed   on  within  the\r
+    static-analysis  community.  He  and  others  have  been  going\r
+    through  each  of  the  places  where  there is no break to add\r
+    these  comments  (or  a break ); they have "found a lot of bugs\r
+    this way", he said.\r
+    \r
+    Uninitialized  local variables will generate a warning, but not\r
+    if  the  variable is passed in by reference. There are some GCC\r
+    plugins  that  will  automatically  initialize these variables,\r
+    but  there are also patches for both GCC and Clang to provide a\r
+    compiler  option  to  do  so. Neither of those is upstream yet,\r
+    but  Torvalds has praised the effort so the kernel would likely\r
+    use  the  option.  An  interesting  side effect that came about\r
+    while   investigating   this   was   a  warning  he  got  about\r
+    unreachable  code  when  he  enabled  the  auto-initialization.\r
+    There  were  two  variables  declared  just after a switch (and\r
+    outside of any case ), where they would never be reached.\r
+    \r
+    Arithmetic  overflow  is  another  undefined behavior in C that\r
+    can  cause various problems. GCC can check for signed overflow,\r
+    which  performs  well  (the overhead is in the noise, he said),\r
+    but  adding warning messages for it does grow the kernel by 6%;\r
+    making  the  overflow abort, instead, only adds 0.1%. Clang can\r
+    check  for  both  signed and unsigned overflow; signed overflow\r
+    is  undefined,  while  unsigned  overflow is defined, but often\r
+    unexpected.  Marking places where unsigned overflow is expected\r
+    is  needed;  it would be nice to get those annotations put into\r
+    the kernel, Cook said.\r
+    \r
+    Explicit   bounds   checking   is   expensive.   Doing  it  for\r
+    copy_{to,from}_user()  is  a  less than 1% performance hit, but\r
+    adding  it  to  the strcpy() and memcpy() families are around a\r
+    2%  hit. Pre-Meltdown that would have been a totally impossible\r
+    performance  regression  for  security, he said; post-Meltdown,\r
+    since  it  is less than 5%, maybe there is a chance to add this\r
+    checking.\r
+    \r
+    Better  APIs would help as well. He pointed to the evolution of\r
+    strcpy()  ,  through  str  n  cpy()  and str l cpy() (each with\r
+    their  own bounds flaws) to str s cpy() , which seems to be "OK\r
+    so  far".  He  also mentioned memcpy() again as a poor API with\r
+    respect to bounds checking.\r
+    \r
+    Hardware  support  for  bounds  checking  is  available  in the\r
+    application  data  integrity  (ADI)  feature  for  SPARC and is\r
+    coming  for  Arm; it may also be available for Intel processors\r
+    at  some point. These all use a form of "memory tagging", where\r
+    allocations  get a tag that is stored in the high-order byte of\r
+    the  address.  An offset from the address can be checked by the\r
+    hardware  to  see if it still falls within the allocated region\r
+    based on the tag.\r
+    \r
+    Control-flow  integrity  (CFI)  has  become  more  of  an issue\r
+    lately  because much of what attackers had used in the past has\r
+    been  marked  as  "no  execute"  so  they  are turning to using\r
+    existing  code  "gadgets"  already  present  in  the  kernel by\r
+    hijacking  existing indirect function calls. In C, you can just\r
+    call  pointers  without  regard  to  the type as it just treats\r
+    them  as  an  address  to  jump  to.  Clang  has a CFI-sanitize\r
+    feature  that  enforces  the function prototype to restrict the\r
+    calls  that  can  be  made.  It  is  done at runtime and is not\r
+    perfect,  in  part  because  there are lots of functions in the\r
+    kernel  that  take  one  unsigned  long parameter and return an\r
+    unsigned long.\r
+    \r
+    Attacks  on  CFI  have both a "forward edge", which is what CFI\r
+    sanitize  tries  to  handle,  and  a "backward edge" that comes\r
+    from  manipulating  the  stack  values,  the  return address in\r
+    particular.  Clang  has  two  methods  available to prevent the\r
+    stack  manipulation.  The first is the "safe stack", which puts\r
+    various   important  items  (e.g.  "safe"  variables,  register\r
+    spills,   and   the   return  address)  on  a  separate  stack.\r
+    Alternatively,  the  "shadow  stack" feature creates a separate\r
+    stack just for return addresses.\r
+    \r
+    One  problem  with  these  other  stacks is that they are still\r
+    writable,  so  if an attacker can find them in memory, they can\r
+    still  perform  their attacks. Hardware-based protections, like\r
+    Intel's     Control-Flow    Enforcement    Technology    (CET),\r
+    [43]provides   a   read-only   shadow  call  stack  for  return\r
+    addresses.   Another   hardware   protection   is   [44]pointer\r
+    authentication  for  Arm, which adds a kind of encrypted tag to\r
+    the return address that can be verified before it is used.\r
+    \r
+    Status and challenges\r
+    \r
+    Cook  then  went  through  the current status of handling these\r
+    different  problems  in  the kernel. VLAs are almost completely\r
+    gone,  he  said,  just a few remain in the crypto subsystem; he\r
+    hopes  those  VLAs will be gone by 4.20 (or whatever the number\r
+    of  the  next  kernel  release  turns  out  to  be).  Once that\r
+    happens,  he  plans  to  turn  on -Wvla for the kernel build so\r
+    that none creep back in.\r
+    \r
+    There  has  been  steady  progress made on marking fall-through\r
+    cases  in  switch  statements. Only 745 remain to be handled of\r
+    the  2311  that  existed  when  this  work  started;  each  one\r
+    requires  scrutiny  to  determine  what the author's intent is.\r
+    Auto-initialized  local  variables  can  be done using compiler\r
+    plugins,  but  that  is "not quite what we want", he said. More\r
+    compiler   support  would  be  helpful  there.  For  arithmetic\r
+    overflow,  it  would  be  nice  to  see GCC get support for the\r
+    unsigned  case,  but  memory allocations are now doing explicit\r
+    overflow checking at this point.\r
+    \r
+    Bounds  checking has seen some "crying about performance hits",\r
+    so  we  are  waiting impatiently for hardware support, he said.\r
+    CFI  forward-edge  protection  needs [45]link-time optimization\r
+    (LTO)  support  for  Clang  in  the kernel, but it is currently\r
+    working  on  Android.  For  backward-edge mitigation, the Clang\r
+    shadow   call   stack   is  working  on  Android,  but  we  are\r
+    impatiently waiting for hardware support for that too.\r
+    \r
+    There  are a number of challenges in doing security development\r
+    for  the  kernel,  Cook said. There are cultural boundaries due\r
+    to  conservatism  within  the  kernel  community; that requires\r
+    patiently  working  and reworking features in order to get them\r
+    upstream.  There  are,  of course, technical challenges because\r
+    of  the complexity of security changes; those kinds of problems\r
+    can  be solved. There are also resource limitations in terms of\r
+    developers,  testers,  reviewers, and so on. KSPP and the other\r
+    kernel  security  developers  are  still  making that "slow but\r
+    steady" progress.\r
+    \r
+    Cook's  [46]slides  [PDF] are available for interested readers;\r
+    before  long,  there should be a video available of the talk as\r
+    well.\r
+    \r
+    [I  would  like  to  thank  LWN's  travel  sponsor,  the  Linux\r
+    Foundation,  for travel assistance to attend the Linux Security\r
+    Summit in Vancouver.]\r
+    \r
+    [47]Comments (70 posted)\r
+    \r
+    [48]The second half of the 4.19 merge window\r
+    \r
+    By Jonathan Corbet\r
+    \r
+    August  26,  2018    By  the  time  Linus Torvalds [49]released\r
+    4.19-rc1  and  closed  the  merge  window  for this development\r
+    cycle,  12,317  non-merge  changesets  had found their way into\r
+    the  mainline;  about  4,800  of  those  landed  after [50]last\r
+    week's  summary  was  written.  As tends to be the case late in\r
+    the  merge  window,  many  of  those changes were fixes for the\r
+    bigger  patches  that  went  in  early,  but  there were also a\r
+    number  of  new  features  added.  Some of the more significant\r
+    changes include:\r
+    \r
+    Core kernel\r
+    \r
+    The  full  set of patches adding [51]control-group awareness to\r
+    the  out-of-memory  killer  has  not been merged due to ongoing\r
+    disagreements,  but  one  piece  of  it  has:  there  is  a new\r
+    memory.oom.group  control  knob  that  will cause all processes\r
+    within  a  control  group  to  be  killed  in  an out-of-memory\r
+    situation.\r
+    \r
+    A  new set of protections has been added to prevent an attacker\r
+    from  fooling  a  program  into  writing to an existing file or\r
+    FIFO.  An  open  with  the  O_CREAT flag to a file or FIFO in a\r
+    world-writable,  sticky directory (e.g. /tmp ) will fail if the\r
+    owner  of  the  opening  process is not the owner of either the\r
+    target   file  or  the  containing  directory.  This  behavior,\r
+    disabled    by    default,    is    controlled   by   the   new\r
+    protected_regular and protected_fifos sysctl knobs.\r
+    \r
+    Filesystems and block layer\r
+    \r
+    The  dm-integrity  device-mapper  target can now use a separate\r
+    device for metadata storage.\r
+    \r
+    EROFS,  the  "enhanced read-only filesystem", has been added to\r
+    the  staging  tree. It is " a lightweight read-only file system\r
+    with    modern   designs   (eg.   page-sized   blocks,   inline\r
+    xattrs/data,  etc.)  for  scenarios which need high-performance\r
+    read-only  requirements,  eg.  firmwares  in  mobile  phone  or\r
+    LIVECDs "\r
+    \r
+    The  new  "metadata  copy-up"  feature  in overlayfs will avoid\r
+    copying   a   file's   contents   to   the  upper  layer  on  a\r
+    metadata-only change. See [52]this commit for details.\r
+    \r
+    Hardware support\r
+    \r
+    Graphics : Qualcomm Adreno A6xx GPUs.\r
+    \r
+    Industrial    I/O    :    Spreadtrum    SC27xx    series   PMIC\r
+    analog-to-digital    converters,    Analog    Devices    AD5758\r
+    digital-to-analog  converters, Intersil ISL29501 time-of-flight\r
+    sensors,  Silicon  Labs  SI1133  UV  index/ambient light sensor\r
+    chips, and Bosch Sensortec BME680 sensors.\r
+    \r
+    Miscellaneous   :  Generic  ADC-based  resistive  touchscreens,\r
+    Generic  ASIC  devices  via  the  Google [53]Gasket framework ,\r
+    Analog  Devices  ADGS1408/ADGS1409  multiplexers,  Actions Semi\r
+    Owl  SoCs  DMA  controllers,  MEN  16Z069 watchdog timers, Rohm\r
+    BU21029   touchscreen   controllers,   Cirrus   Logic  CS47L35,\r
+    CS47L85,  CS47L90,  and  CS47L91  codecs,  Cougar  500k  gaming\r
+    keyboards,   Qualcomm   GENI-based   I2C  controllers,  Actions\r
+    Semiconductor  Owl  I2C  controllers,  ChromeOS  EC-based USBPD\r
+    chargers, and Analog Devices ADP5061 battery chargers.\r
+    \r
+    USB  :  Nuvoton  NPCM7XX on-chip EHCI USB controllers, Broadcom\r
+    Stingray PCIe PHYs, and Renesas R-Car generation 3 PCIe PHYs.\r
+    \r
+    There  is  also  a  new  subsystem  for the abstraction of GNSS\r
+    (global  navigation  satellite  systems  —  GPS,  for  example)\r
+    receivers  in  the  kernel.  To  date,  such  devices have been\r
+    handled  with  an  abundance of user-space drivers; the hope is\r
+    to  bring  some  order  in  this  area.  Support for u-blox and\r
+    SiRFstar receivers has been added as well.\r
+    \r
+    Kernel internal\r
+    \r
+    The  __deprecated  marker,  used to mark interfaces that should\r
+    no  longer  be  used,  has been deprecated and removed from the\r
+    kernel  entirely.  [54]Torvalds  said  : " They are not useful.\r
+    They  annoy  everybody,  and  nobody  ever  does anything about\r
+    them,  because  it's  always 'somebody elses problem'. And when\r
+    people  start  thinking  that  warnings  are  normal, they stop\r
+    looking  at  them, and the real warnings that mean something go\r
+    unnoticed. "\r
+    \r
+    The  minimum  version  of  GCC  required by the kernel has been\r
+    moved up to 4.6.\r
+    \r
+    There  are  a  couple of significant changes that failed to get\r
+    in  this  time around, including the [55]XArray data structure.\r
+    The  patches are thought to be ready, but they had the bad luck\r
+    to  be  based  on  a  tree  that  failed to be merged for other\r
+    reasons,  so  Torvalds  [56]didn't even look at them . That, in\r
+    turn,   blocks  another  set  of  patches  intended  to  enable\r
+    migration of slab-allocated objects.\r
+    \r
+    The  other  big  deferral  is  the  [57]new system-call API for\r
+    filesystem  mounting  . Despite ongoing [58]concerns about what\r
+    happens  when  the  same  low-level  device is mounted multiple\r
+    times  with  conflicting  options,  Al  Viro  sent  [59]a  pull\r
+    request  to  send  this  work  upstream. The ensuing discussion\r
+    made  it  clear  that  there  is  still not a consensus in this\r
+    area,  though,  so  it  seems  that  this  work has to wait for\r
+    another cycle.\r
+    \r
+    Assuming  all  goes  well,  the  kernel will stabilize over the\r
+    coming  weeks  and  the  final  4.19  release  will  happen  in\r
+    mid-October.\r
+    \r
+    [60]Comments (1 posted)\r
+    \r
+    [61]Measuring (and fixing) I/O-controller throughput loss\r
+    \r
+    August 29, 2018\r
+    \r
+    This article was contributed by Paolo Valente\r
+    \r
+    Many  services,  from  web hosting and video streaming to cloud\r
+    storage,  need  to  move  data  to  and from storage. They also\r
+    often  require  that  each  per-client I/O flow be guaranteed a\r
+    non-zero   amount  of  bandwidth  and  a  bounded  latency.  An\r
+    expensive  way to provide these guarantees is to over-provision\r
+    storage  resources,  keeping  each  resource underutilized, and\r
+    thus  have  plenty of bandwidth available for the few I/O flows\r
+    dispatched  to  each  medium.  Alternatively one can use an I/O\r
+    controller.  Linux provides two mechanisms designed to throttle\r
+    some  I/O  streams  to allow others to meet their bandwidth and\r
+    latency  requirements.  These mechanisms work, but they come at\r
+    a  cost:  a  loss  of  as  much  as  80% of total available I/O\r
+    bandwidth.  I  have run some tests to demonstrate this problem;\r
+    some   upcoming  improvements  to  the  [62]bfq  I/O  scheduler\r
+    promise to improve the situation considerably.\r
+    \r
+    Throttling  does  guarantee control, even on drives that happen\r
+    to  be highly utilized but, as will be seen, it has a hard time\r
+    actually  ensuring  that  drives are highly utilized. Even with\r
+    greedy  I/O  flows,  throttling  easily  ends  up  utilizing as\r
+    little  as  20%  of the available speed of a flash-based drive.\r
+    Such   a  speed  loss  may  be  particularly  problematic  with\r
+    lower-end   storage.   On   the   opposite   end,  it  is  also\r
+    disappointing  with  high-end  hardware, as the Linux block I/O\r
+    stack  itself  has  been  [63]redesigned  from the ground up to\r
+    fully  utilize  the  high  speed  of  modern,  fast storage. In\r
+    addition,   throttling   fails   to   guarantee   the  expected\r
+    bandwidths  if  I/O  contains  both  reads  and  writes,  or is\r
+    sporadic in nature.\r
+    \r
+    On  the  bright  side,  there  now  seems  to  be  an effective\r
+    alternative  for controlling I/O: the proportional-share policy\r
+    provided  by  the  bfq  I/O  scheduler.  It enables nearly 100%\r
+    storage  bandwidth  utilization,  at  least  with  some  of the\r
+    workloads  that  are  problematic  for  throttling. An upcoming\r
+    version  of  bfq may be able to achieve this result with almost\r
+    all  workloads.  Finally,  bfq  guarantees  bandwidths with all\r
+    workloads.  The current limitation of bfq is that its execution\r
+    overhead  becomes  significant  at  speeds  above  400,000  I/O\r
+    operations per second on commodity CPUs.\r
+    \r
+    Using  the  bfq  I/O  scheduler,  Linux  can  now guarantee low\r
+    latency  to  lightweight  flows containing sporadic, short I/O.\r
+    No  throughput  issues arise, and no configuration is required.\r
+    This  capability benefits important, time-sensitive tasks, such\r
+    as  video  or audio streaming, as well as executing commands or\r
+    starting  applications.  Although  benchmarks are not available\r
+    yet,  these  guarantees  might  also  be  provided by the newly\r
+    proposed  [64]I/O latency controller . It allows administrators\r
+    to  set target latencies for I/O requests originating from each\r
+    group  of  processes,  and  favors  the  groups with the lowest\r
+    target latency.\r
+    \r
+    The testbed\r
+    \r
+    I  ran  the  tests with an ext4 filesystem mounted on a PLEXTOR\r
+    PX-256M5S  SSD,  which  features  a  peak rate of ~160MB/s with\r
+    random  I/O,  and  of  ~500MB/s  with  sequential  I/O.  I used\r
+    blk-mq,  in  Linux  4.18. The system was equipped with a 2.4GHz\r
+    Intel  Core  i7-2760QM  CPU  and  1.3GHz  DDR3  DRAM. In such a\r
+    system,  a  single  thread  doing  synchronous  reads reaches a\r
+    throughput of 23MB/s.\r
+    \r
+    For  the purposes of these tests, each process is considered to\r
+    be  in  one of two groups, termed "target" and "interferers". A\r
+    target  is  a  single-process,  I/O-bound  group  whose  I/O is\r
+    focused  on.  In  particular,  I  measure  the  I/O  throughput\r
+    enjoyed  by  this  group to get the minimum bandwidth delivered\r
+    to  the group. An interferer is single-process group whose role\r
+    is  to  generate additional I/O that interferes with the I/O of\r
+    the  target.  The  tested  workloads  contain  one  target  and\r
+    multiple interferers.\r
+    \r
+    The  single  process  in  each  group  either  reads or writes,\r
+    through  asynchronous  (buffered)  operations,  to  one  file —\r
+    different  from the file read or written by any other process —\r
+    after  invalidating  the  buffer cache for the file. I define a\r
+    reader  or  writer  process as either "random" or "sequential",\r
+    depending  on  whether  it  reads  or writes its file at random\r
+    positions  or  sequentially.  Finally, an interferer is defined\r
+    as  being either "active" or "inactive" depending on whether it\r
+    performs  I/O during the test. When an interferer is mentioned,\r
+    it is assumed that the interferer is active.\r
+    \r
+    Workloads  are  defined  so as to try to cover the combinations\r
+    that,  I believe, most influence the performance of the storage\r
+    device  and of the I/O policies. For brevity, in this article I\r
+    show results for only two groups of workloads:\r
+    \r
+    Static  sequential  :  four  synchronous  sequential readers or\r
+    four   asynchronous  sequential  writers,  plus  five  inactive\r
+    interferers.\r
+    \r
+    Static  random  :  four  synchronous random readers, all with a\r
+    block size equal to 4k, plus five inactive interferers.\r
+    \r
+    To  create  each  workload,  I  considered,  for  each  mix  of\r
+    interferers  in the group, two possibilities for the target: it\r
+    could  be  either  a random or a sequential synchronous reader.\r
+    In  [65]a  longer version of this article [PDF] , you will also\r
+    find   results  for  workloads  with  varying  degrees  of  I/O\r
+    randomness,  and for dynamic workloads (containing sporadic I/O\r
+    sources).  These extra results confirm the losses of throughput\r
+    and I/O control for throttling that are shown here.\r
+    \r
+    I/O policies\r
+    \r
+    Linux  provides  two I/O-control mechanisms for guaranteeing (a\r
+    minimum)  bandwidth, or at least fairness, to long-lived flows:\r
+    the   throttling  and  proportional-share  I/O  policies.  With\r
+    throttling,  one  can  set  a  maximum  bandwidth  limit — "max\r
+    limit"  for brevity — for the I/O of each group. Max limits can\r
+    be  used,  in an indirect way, to provide the service guarantee\r
+    at  the  focus  of  this  article.  For  example,  to guarantee\r
+    minimum  bandwidths  to  I/O flows, a group can be guaranteed a\r
+    minimum  bandwidth by limiting the maximum bandwidth of all the\r
+    other groups.\r
+    \r
+    Unfortunately,  max  limits  have  two  drawbacks  in  terms of\r
+    throughput.  First,  if  some groups do not use their allocated\r
+    bandwidth,  that  bandwidth cannot be reclaimed by other active\r
+    groups.  Second,  limits  must comply with the worst-case speed\r
+    of  the  device,  namely, its random-I/O peak rate. Such limits\r
+    will  clearly  leave  a lot of throughput unused with workloads\r
+    that  otherwise  would  drive  the  device to higher throughput\r
+    levels.  Maximizing  throughput  is  simply  not  a goal of max\r
+    limits.  So,  for brevity, test results with max limits are not\r
+    shown  here.  You  can find these results, plus a more detailed\r
+    description  of  the  above  drawbacks,  in the long version of\r
+    this article.\r
+    \r
+    Because  of  these  drawbacks,  a  new, still experimental, low\r
+    limit  has  been  added to the throttling policy. If a group is\r
+    assigned  a low limit, then the throttling policy automatically\r
+    limits  the  I/O of the other groups in such a way to guarantee\r
+    to  the  group  a  minimum  bandwidth equal to its assigned low\r
+    limit.  This  new  throttling  mechanism  throttles no group as\r
+    long  as  every  group is getting at least its assigned minimum\r
+    bandwidth.  I  tested  this mechanism, but did not consider the\r
+    interesting  problem  of guaranteeing minimum bandwidths while,\r
+    at the same time, enforcing maximum bandwidths.\r
+    \r
+    The  other  I/O  policy available in Linux, proportional share,\r
+    provides  weighted  fairness.  Each group is assigned a weight,\r
+    and   should   receive   a  portion  of  the  total  throughput\r
+    proportional  to  its  weight.  This  scheme guarantees minimum\r
+    bandwidths  in  the  same way that low limits do in throttling.\r
+    In  particular, it guarantees to each group a minimum bandwidth\r
+    equal  to  the  ratio  between the weight of the group, and the\r
+    sum  of the weights of all the groups that may be active at the\r
+    same time.\r
+    \r
+    The  actual implementation of the proportional-share policy, on\r
+    a  given drive, depends on what flavor of the block layer is in\r
+    use  for  that  drive.  If  the drive is using the legacy block\r
+    interface,  the policy is implemented by the cfq I/O scheduler.\r
+    Unfortunately,   cfq   fails   to   control   bandwidths   with\r
+    flash-based  storage,  especially  on  drives featuring command\r
+    queueing.  This  case  is  not  considered in these tests. With\r
+    drives  using  the  multiqueue interface, proportional share is\r
+    implemented  by  bfq. This is the combination considered in the\r
+    tests.\r
+    \r
+    To  benchmark  both  throttling  (low  limits) and proportional\r
+    share,  I  tested,  for  each workload, the combinations of I/O\r
+    policies  and  I/O  schedulers  reported in the table below. In\r
+    the  end,  there  are  three  test  cases for each workload. In\r
+    addition,  for some workloads, I considered two versions of bfq\r
+    for the proportional-share policy.\r
+    \r
+    Name\r
+    \r
+    I/O policy\r
+    \r
+    Scheduler\r
+    \r
+    Parameter for target\r
+    \r
+    Parameter for each of the four active interferers\r
+    \r
+    Parameter for each of the five inactive interferers\r
+    \r
+    Sum of parameters\r
+    \r
+    low-none\r
+    \r
+    Throttling with low limits\r
+    \r
+    none\r
+    \r
+    10MB/s\r
+    \r
+    10MB/s (tot: 40)\r
+    \r
+    20MB/s (tot: 100)\r
+    \r
+    150MB/s\r
+    \r
+    prop-bfq\r
+    \r
+    Proportional share\r
+    \r
+    bfq\r
+    \r
+    300\r
+    \r
+    100 (tot: 400)\r
+    \r
+    200 (tot: 1000)\r
+    \r
+    1700\r
+    \r
+    For  low  limits,  I  report  results with only none as the I/O\r
+    scheduler,  because  the  results  are  the same with kyber and\r
+    mq-deadline.\r
+    \r
+    The  capabilities of the storage medium and of low limits drove\r
+    the policy configurations. In particular:\r
+    \r
+    The  configuration  of the target and of the active interferers\r
+    for  low-none  is  the one for which low-none provides its best\r
+    possible  minimum-bandwidth  guarantee  to  the target: 10MB/s,\r
+    guaranteed  if  all interferers are readers. Results remain the\r
+    same  regardless of the values used for target latency and idle\r
+    time;  I  set them to 100µs and 1000µs, respectively, for every\r
+    group.\r
+    \r
+    Low  limits  for  inactive  interferers  are  set  to twice the\r
+    limits  for active interferers, to pose greater difficulties to\r
+    the policy.\r
+    \r
+    I  chose weights for prop-bfq so as to guarantee about the same\r
+    minimum  bandwidth  as  low-none  to  the  target,  in the same\r
+    only-reader  worst  case  as  for  low-none  and  to  preserve,\r
+    between  the  weights  of  active and inactive interferers, the\r
+    same  ratio  as  between  the low limits of active and inactive\r
+    interferers.\r
+    \r
+    Full  details  on  configurations  can  be  found  in  the long\r
+    version of this article.\r
+    \r
+    Each  workload  was  run  ten  times  for each policy, plus ten\r
+    times   without  any  I/O  control,  i.e.,  with  none  as  I/O\r
+    scheduler  and  no  I/O policy in use. For each run, I measured\r
+    the  I/O  throughput of the target (which reveals the bandwidth\r
+    provided  to  the target), the cumulative I/O throughput of the\r
+    interferers,  and  the  total  I/O throughput. These quantities\r
+    fluctuated  very  little  during  each  run,  as well as across\r
+    different  runs. Thus in the graphs I report only averages over\r
+    per-run  average throughputs. In particular, for the case of no\r
+    I/O  control,  I  report only the total I/O throughput, to give\r
+    an  idea of the throughput that can be reached without imposing\r
+    any control.\r
+    \r
+    Results\r
+    \r
+    This  plot  shows  throughput results for the simplest group of\r
+    workloads: the static-sequential set.\r
+    \r
+    With  a  random reader as the target against sequential readers\r
+    as  interferers,  low-none  does  guarantee  the configured low\r
+    limit   to  the  target.  Yet  it  reaches  only  a  low  total\r
+    throughput.  The  throughput  of  the  random  reader evidently\r
+    oscillates  around 10MB/s during the test. This implies that it\r
+    is  at least slightly below 10MB/s for a significant percentage\r
+    of  the  time.  But  when this happens, the low-limit mechanism\r
+    limits  the  maximum bandwidth of every active group to the low\r
+    limit  set  for the group, i.e., to just 10MB/s. The end result\r
+    is  a total throughput lower than 10% of the throughput reached\r
+    without I/O control.\r
+    \r
+    That  said, the high throughput achieved without I/O control is\r
+    obtained  by  choking  the random I/O of the target in favor of\r
+    the  sequential  I/O  of  the interferers. Thus, it is probably\r
+    more  interesting  to  compare  low-none  throughput  with  the\r
+    throughput  reachable while actually guaranteeing 10MB/s to the\r
+    target.  The  target  is  a single, synchronous, random reader,\r
+    which  reaches  23MB/s while active. So, to guarantee 10MB/s to\r
+    the  target,  it  is  enough  to serve it for about half of the\r
+    time,  and the interferers for the other half. Since the device\r
+    reaches  ~500MB/s  with  the sequential I/O of the interferers,\r
+    the  resulting  throughput  with  this  service scheme would be\r
+    (500+23)/2,  or  about 260MB/s. low-none thus reaches less than\r
+    20%  of  the total throughput that could be reached while still\r
+    preserving the target bandwidth.\r
+    \r
+    prop-bfq  provides the target with a slightly higher throughput\r
+    than  low-none.  This  makes  it harder for prop-bfq to reach a\r
+    high  total throughput, because prop-bfq serves more random I/O\r
+    (from  the target) than low-none. Nevertheless, prop-bfq gets a\r
+    much  higher  total  throughput than low-none. According to the\r
+    above  estimate,  this  throughput  is about 90% of the maximum\r
+    throughput  that  could  be reached, for this workload, without\r
+    violating  service  guarantees. The reason for this good result\r
+    is  that  bfq  provides  an  effective  implementation  of  the\r
+    proportional-share  service  policy.  At  any time, each active\r
+    group  is  granted  a fraction of the current total throughput,\r
+    and  the  sum  of  these  fractions  is  equal to one; so group\r
+    bandwidths  naturally  saturate  the available total throughput\r
+    at all times.\r
+    \r
+    Things  change  with  the  second  workload:  a  random  reader\r
+    against  sequential writers. Now low-none reaches a much higher\r
+    total  throughput  than  prop-bfq.  low-none  serves  much more\r
+    sequential  (write)  I/O  than  prop-bfq because writes somehow\r
+    break  the  low-limit  mechanisms and prevail over the reads of\r
+    the  target.  Conceivably,  this happens because writes tend to\r
+    both  starve  reads  in  the OS (mainly by eating all available\r
+    I/O  tags)  and to cheat on their completion time in the drive.\r
+    In  contrast,  bfq  is  intentionally  configured  to privilege\r
+    reads, to counter these issues.\r
+    \r
+    In  particular, low-none gets an even higher throughput than no\r
+    I/O  control  at all because it penalizes the random I/O of the\r
+    target even more than the no-controller configuration.\r
+    \r
+    Finally,  with  the  last  two workloads, prop-bfq reaches even\r
+    higher  total  throughput  than  with the first two. It happens\r
+    because  the  target  also  does  sequential  I/O,  and serving\r
+    sequential  I/O  is  much  more  beneficial for throughput than\r
+    serving  random  I/O.  With  these  two  workloads,  the  total\r
+    throughput  is, respectively, close to or much higher than that\r
+    reached  without  I/O control. For the last workload, the total\r
+    throughput  is  much higher because, differently from none, bfq\r
+    privileges  reads  over  asynchronous writes, and reads yield a\r
+    higher  throughput  than  writes.  In  contrast, low-none still\r
+    gets  lower  or much lower throughput than prop-bfq, because of\r
+    the  same issues that hinder low-none throughput with the first\r
+    two workloads.\r
+    \r
+    As  for  bandwidth  guarantees,  with  readers  as  interferers\r
+    (third  workload),  prop-bfq,  as  expected, gives the target a\r
+    fraction  of  the  total throughput proportional to its weight.\r
+    bfq    approximates    perfect   proportional-share   bandwidth\r
+    distribution  among groups doing I/O of the same type (reads or\r
+    writes)  and  with  the  same  locality (sequential or random).\r
+    With  the last workload, prop-bfq gives much more throughput to\r
+    the  reader  than  to  all the interferers, because interferers\r
+    are asynchronous writers, and bfq privileges reads.\r
+    \r
+    The  second  group  of  workloads  (static random), is the one,\r
+    among   all   the  workloads  considered,  for  which  prop-bfq\r
+    performs worst. Results are shown below:\r
+    \r
+    This  chart reports results not only for mainline bfq, but also\r
+    for  an improved version of bfq which is currently under public\r
+    testing.  As  can  be  seen, with only random readers, prop-bfq\r
+    reaches  a  much  lower  total  throughput  than low-none. This\r
+    happens  because of the Achilles heel of the bfq I/O scheduler.\r
+    If  the  process  in  service  does  synchronous  I/O and has a\r
+    higher  weight  than  some  other process, then, to give strong\r
+    bandwidth   guarantees   to   that   process,   bfq  plugs  I/O\r
+    dispatching  every  time  the process temporarily stops issuing\r
+    I/O   requests.   In  this  respect,  processes  actually  have\r
+    differentiated  weights and do synchronous I/O in the workloads\r
+    tested.  So  bfq systematically performs I/O plugging for them.\r
+    Unfortunately,  this  plugging  empties  the internal queues of\r
+    the  drive, which kills throughput with random I/O. And the I/O\r
+    of all processes in these workloads is also random.\r
+    \r
+    The  situation  reverses  with  a  sequential reader as target.\r
+    Yet,  the most interesting results come from the new version of\r
+    bfq,  containing  small  changes  to  counter exactly the above\r
+    weakness.  This  version  recovers  most of the throughput loss\r
+    with  the  workload  made of only random I/O and more; with the\r
+    second  workload,  where  the target is a sequential reader, it\r
+    reaches about 3.7 times the total throughput of low-none.\r
+    \r
+    When  the main concern is the latency of flows containing short\r
+    I/O,  Linux seems now rather high performing, thanks to the bfq\r
+    I/O  scheduler  and  the  I/O  latency  controller.  But if the\r
+    requirement  is  to  provide  explicit bandwidth guarantees (or\r
+    just  fairness) to I/O flows, then one must be ready to give up\r
+    much  or most of the speed of the storage media. bfq helps with\r
+    some   workloads,   but  loses  most  of  the  throughput  with\r
+    workloads  consisting  of mostly random I/O. Fortunately, there\r
+    is  apparently  hope  for  much  better  performance  since  an\r
+    improvement,  still  under  development, seems to enable bfq to\r
+    reach a high throughput with all workloads tested so far.\r
+    \r
+    [  I  wish  to  thank  Vivek Goyal for enabling me to make this\r
+    article much more fair and sound.]\r
+    \r
+    [66]Comments (4 posted)\r
+    \r
+    [67]KDE's onboarding initiative, one year later\r
+    \r
+    August 24, 2018\r
+    \r
+    This article was contributed by Marta Rybczyńska\r
+    \r
+    [68]Akademy\r
+    \r
+    In  2017,  the  KDE  community  decided  on  [69]three goals to\r
+    concentrate  on  for  the  next  few  years.  One  of  them was\r
+    [70]streamlining   the  onboarding  of  new  contributors  (the\r
+    others  were  [71]improving usability and [72]privacy ). During\r
+    [73]Akademy  ,  the  yearly  KDE  conference  that  was held in\r
+    Vienna  in  August,  Neofytos Kolokotronis shared the status of\r
+    the  onboarding  goal,  the work done during the last year, and\r
+    further  plans.  While it is a complicated process in a project\r
+    as  big  and  diverse  as  KDE, numerous improvements have been\r
+    already made.\r
+    \r
+    Two  of the three KDE community goals were proposed by relative\r
+    newcomers.  Kolokotronis  was  one  of those, having joined the\r
+    [74]KDE  Promo  team  not  long  before  proposing the focus on\r
+    onboarding.  He  had  previously  been involved with [75]Chakra\r
+    Linux  ,  a  distribution  based on KDE software. The fact that\r
+    new  members of the community proposed strategic goals was also\r
+    noted in the [76]Sunday keynote by Claudia Garad .\r
+    \r
+    Proper  onboarding  adds excitement to the contribution process\r
+    and  increases retention, he explained. When we look at [77]the\r
+    definition  of  onboarding  ,  it is a process in which the new\r
+    contributors  acquire  knowledge, skills, and behaviors so that\r
+    they  can  contribute effectively. Kolokotronis proposed to see\r
+    it  also  as  socialization:  integration  into  the  project's\r
+    relationships, culture, structure, and procedures.\r
+    \r
+    The  gains  from  proper  onboarding  are many. The project can\r
+    grow   by  attracting  new  blood  with  new  perspectives  and\r
+    solutions.   The  community  maintains  its  health  and  stays\r
+    vibrant.  Another  important  advantage of efficient onboarding\r
+    is  that  replacing  current  contributors  becomes easier when\r
+    they  change interests, jobs, or leave the project for whatever\r
+    reason.  Finally,  successful  onboarding adds new advocates to\r
+    the project.\r
+    \r
+    Achievements so far and future plans\r
+    \r
+    The  team  started  with  ideas  for  a  centralized onboarding\r
+    process  for the whole of KDE. They found out quickly that this\r
+    would  not  work  because KDE is "very decentralized", so it is\r
+    hard  to  provide  tools  and procedures that are going to work\r
+    for   the  whole  project.  According  to  Kolokotronis,  other\r
+    characteristics   of   KDE  that  impact  onboarding  are  high\r
+    diversity,   remote   and   online   teams,   and  hundreds  of\r
+    contributors  in dozens of projects and teams. In addition, new\r
+    contributors  already know in which area they want to take part\r
+    and  they  prefer  specific  information  that will be directly\r
+    useful for them.\r
+    \r
+    So  the  team  changed its approach; several changes have since\r
+    been  proposed  and  implemented.  The  [78]Get  Involved page,\r
+    which  is  expected to be one of the resources new contributors\r
+    read  first, has been rewritten. For the [79]Junior Jobs page ,\r
+    the  team  is  [80] [81]discussing what the generic content for\r
+    KDE  as  a whole should be. The team simplified [82]Phabricator\r
+    registration  ,  which  resulted  in  documenting  the  process\r
+    better.  Another part of the work includes the [83]KDE Bugzilla\r
+    ;  it  includes, for example initiatives to limit the number of\r
+    states of a ticket or remove obsolete products.\r
+    \r
+    The   [84]Plasma   Mobile  team  is  heavily  involved  in  the\r
+    onboarding  goal.  The Plasma Mobile developers have simplified\r
+    their    development   environment   setup   and   created   an\r
+    [85]interactive  "Get  Involved"  page. In addition, the Plasma\r
+    team  changed  the  way task descriptions are written; they now\r
+    contain  more detail, so that it is easier to get involved. The\r
+    basic  description  should  be  short  and clear, and it should\r
+    include  details  of  the  problem  and possible solutions. The\r
+    developers  try  to  share  the  list  of  skills  necessary to\r
+    fulfill  the  tasks  and  include  clear links to the technical\r
+    resources needed.\r
+    \r
+    Kolokotronis  and  team  also identified a new potential source\r
+    of  contributors  for  KDE:  distributions using KDE. They have\r
+    the  advantage  of  already knowing and using the software. The\r
+    next  idea  the team is working on is to make sure that setting\r
+    up  a  development  environment is easy. The team plans to work\r
+    on this during a dedicated sprint this autumn.\r
+    \r
+    Searching for new contributors\r
+    \r
+    Kolokotronis  plans  to  search  for  new  contributors  at the\r
+    periphery  of  the  project,  among  the "skilled enthusiasts":\r
+    loyal  users  who  actually  care  about the project. They "can\r
+    make  wonders",  he  said.  Those  individuals may be also less\r
+    confident  or  shy,  have  troubles  making the first step, and\r
+    need  guidance.  The  project  leaders  should  take  that into\r
+    account.\r
+    \r
+    In   addition,   newcomers   are  all  different.  Kolokotronis\r
+    provided  a  long  list  of  how contributors differ, including\r
+    skills  and  knowledge,  motives  and  interests,  and time and\r
+    dedication.  His  advice  is to "try to find their superpower",\r
+    the  skills  they  have  that  are  missing  in the team. Those\r
+    "superpowers" can then be used for the benefit of the project.\r
+    \r
+    If  a project does nothing else, he said, it can start with its\r
+    documentation.   However,   this   does   not  only  mean  code\r
+    documentation.  Writing  down  the  procedures  or  information\r
+    about  the internal work of the project, like who is working on\r
+    what,  is  an  important  part of a project's documentation and\r
+    helps  newcomers.  There  should  be  also guidelines on how to\r
+    start, especially setting up the development environment.\r
+    \r
+    The  first  thing  the  project leaders should do, according to\r
+    Kolokotronis,  is to spend time on introducing newcomers to the\r
+    project.  Ideally  every  new  contributor  should  be assigned\r
+    mentors  —  more  experienced  members  who  can help them when\r
+    needed.  The mentors and project leaders should find tasks that\r
+    are   interesting   for  each  person.  Answering  an  audience\r
+    question   on   suggestions   for   shy  new  contributors,  he\r
+    recommended  even  more  mentoring.  It is also very helpful to\r
+    make  sure  that  newcomers  have  enough  to  read, but "avoid\r
+    RTFM",  he  highlighted.  It is also easy for a new contributor\r
+    "to  fly  away",  he  said.  The solution is to keep requesting\r
+    things and be proactive.\r
+    \r
+    What the project can do?\r
+    \r
+    Kolokotronis  suggested  a number of actions for a project when\r
+    it   wants  to  improve  its  onboarding.  The  first  step  is\r
+    preparation:  the  project  leaders  should know the team's and\r
+    the  project's  needs. Long-term planning is important, too. It\r
+    is  not  enough  to wait for contributors to come — the project\r
+    should  be  proactive,  which means reaching out to candidates,\r
+    suggesting   appropriate  tasks  and,  finally,  making  people\r
+    available for the newcomers if they need help.\r
+    \r
+    This  leads to next step: to be a mentor. Kolokotronis suggests\r
+    being  a  "great  host",  but  also  trying  to  phase  out the\r
+    dependency   on   the   mentor   rapidly.  "We  have  been  all\r
+    newcomers",  he  said.  It  can  be  intimidating  to  join  an\r
+    existing  group. Onboarding creates a sense of belonging which,\r
+    in turn, increases retention.\r
+    \r
+    The  last  step  proposed  was  to  be strategic. This includes\r
+    thinking  about  the  emotions  you  want  newcomers  to  feel.\r
+    Kolokotronis  explained the strategic part with an example. The\r
+    overall   goal   is   (surprise!)  improve  onboarding  of  new\r
+    contributors.  An  intermediate  objective might be to keep the\r
+    newcomers  after  they  have  made  their first commit. If your\r
+    strategy  is  to  keep  them  confident  and proud, you can use\r
+    different  tactics  like  praise and acknowledgment of the work\r
+    in  public.  Another  useful  tactic  may  be  assigning simple\r
+    tasks, according to the skill of the contributor.\r
+    \r
+    To   summarize,   the   most   important  thing,  according  to\r
+    Kolokotronis,  is  to  respond  quickly and spend time with new\r
+    contributors.  This  time should be used to explain procedures,\r
+    and  to  introduce the people and culture. It is also essential\r
+    to  guide  first  contributions  and praise contributor's skill\r
+    and  effort. Increase the difficulty of tasks over time to keep\r
+    contributors  motivated  and  challenged. And finally, he said,\r
+    "turn them into mentors".\r
+    \r
+    Kolokotronis  acknowledges  that  onboarding  "takes  time" and\r
+    "everyone  complains"  about  it. However, he is convinced that\r
+    it  is  beneficial  in  the  long  term  and  that it decreases\r
+    developer turnover.\r
+    \r
+    Advice to newcomers\r
+    \r
+    Kolokotronis  concluded  with some suggestions for newcomers to\r
+    a  project.  They  should  try  to be persistent and to not get\r
+    discouraged  when  something  goes  wrong. Building connections\r
+    from   the  very  beginning  is  helpful.  He  suggests  asking\r
+    questions  as  if you were already a member "and things will be\r
+    fine". However, accept criticism if it happens.\r
+    \r
+    One  of  the  next  actions  of  the onboarding team will be to\r
+    collect  feedback  from  newcomers and experienced contributors\r
+    to  see  if they agree on the ideas and processes introduced so\r
+    far.\r
+    \r
+    [86]Comments (none posted)\r
+    \r
+    [87]Sharing and archiving data sets with Dat\r
+    \r
+    August 27, 2018\r
+    \r
+    This article was contributed by Antoine Beaupré\r
+    \r
+    [88]Dat  is  a  new peer-to-peer protocol that uses some of the\r
+    concepts  of  [89]BitTorrent  and  Git.  Dat  primarily targets\r
+    researchers  and  open-data activists as it is a great tool for\r
+    sharing,  archiving, and cataloging large data sets. But it can\r
+    also  be  used to implement decentralized web applications in a\r
+    novel way.\r
+    \r
+    Dat quick primer\r
+    \r
+    Dat  is  written in JavaScript, so it can be installed with npm\r
+    ,  but there are [90]standalone binary builds and a [91]desktop\r
+    application  (as an AppImage). An [92]online viewer can be used\r
+    to  inspect data for those who do not want to install arbitrary\r
+    binaries on their computers.\r
+    \r
+    The  command-line  application  allows  basic  operations  like\r
+    downloading  existing  data sets and sharing your own. Dat uses\r
+    a  32-byte hex string that is an [93]ed25519 public key , which\r
+    is  is  used  to  discover  and  find  content  on the net. For\r
+    example, this will download some sample data:  $ dat clone \\r
+    \r
+    dat://778f8d955175c92e4ced5e4f5563f69bfec0c86cc6f670352c457943-\r
+    666fe639 \\r
+    \r
+    ~/Downloads/dat-demo\r
+    \r
+    Similarly,  the  share  command  is  used  to share content. It\r
+    indexes  the  files  in  a  given  directory  and creates a new\r
+    unique  address  like the one above. The share command starts a\r
+    server  that uses multiple discovery mechanisms (currently, the\r
+    [94]Mainline  Distributed  Hash  Table  (DHT), a [95]custom DNS\r
+    server  ,  and  multicast  DNS)  to announce the content to its\r
+    peers.  This  is  how another user, armed with that public key,\r
+    can  download  that  content with dat clone or mirror the files\r
+    continuously with dat sync .\r
+    \r
+    So  far,  this  looks  a  lot  like BitTorrent [96]magnet links\r
+    updated  with 21st century cryptography. But Dat adds revisions\r
+    on  top  of  that,  so  modifications  are automatically shared\r
+    through  the  swarm.  That is important for public data sets as\r
+    those  are  often  dynamic  in  nature.  Revisions also make it\r
+    possible  to  use [97]Dat as a backup system by saving the data\r
+    incrementally using an [98]archiver .\r
+    \r
+    While  Dat  is designed to work on larger data sets, processing\r
+    them  for  sharing  may  take a while. For example, sharing the\r
+    Linux  kernel  source  code  required about five minutes as Dat\r
+    worked  on indexing all of the files. This is comparable to the\r
+    performance  offered by [99]IPFS and BitTorrent. Data sets with\r
+    more or larger files may take quite a bit more time.\r
+    \r
+    One  advantage  that  Dat  has  over  IPFS  is  that it doesn't\r
+    duplicate  the  data. When IPFS imports new data, it duplicates\r
+    the  files  into  ~/.ipfs . For collections of small files like\r
+    the  kernel,  this  is not a huge problem, but for larger files\r
+    like  videos  or  music,  it's  a  significant limitation. IPFS\r
+    eventually  implemented  a solution to this [100]problem in the\r
+    form  of the experimental [101]filestore feature , but it's not\r
+    enabled  by  default.  Even  with that feature enabled, though,\r
+    changes   to  data  sets  are  not  automatically  tracked.  In\r
+    comparison,  Dat  operation on dynamic data feels much lighter.\r
+    The downside is that each set needs its own dat share process.\r
+    \r
+    Like  any  peer-to-peer  system, Dat needs at least one peer to\r
+    stay  online  to  offer  the  content, which is impractical for\r
+    mobile  devices. Hosting providers like [102]Hashbase (which is\r
+    a  [103]pinning  service  in  Dat  jargon)  can help users keep\r
+    content  online  without  running  their  own [104]server . The\r
+    closest   parallel  in  the  traditional  web  ecosystem  would\r
+    probably   be  content  distribution  networks  (CDN)  although\r
+    pinning    services    are   not   necessarily   geographically\r
+    distributed  and  a  CDN does not necessarily retain a complete\r
+    copy of a website.  [105]\r
+    \r
+    A  web  browser called [106]Beaker , based on the [107]Electron\r
+    framework,  can  access  Dat  content  natively  without  going\r
+    through  a pinning service. Furthermore, Beaker is essential to\r
+    get   any   of  the  [108]Dat  applications  working,  as  they\r
+    fundamentally  rely  on  dat://  URLs  to  do their magic. This\r
+    means  that  Dat  applications won't work for most users unless\r
+    they  install that special web browser. There is a [109]Firefox\r
+    extension  called " [110]dat-fox " for people who don't want to\r
+    install  yet  another  browser,  but  it  requires installing a\r
+    [111]helper  program  .  The  extension  will  be  able to load\r
+    dat://  URLs  but  many  applications  will still not work. For\r
+    example,  the  [112]photo  gallery application completely fails\r
+    with dat-fox.\r
+    \r
+    Dat-based  applications  look promising from a privacy point of\r
+    view.  Because of its peer-to-peer nature, users regain control\r
+    over  where their data is stored: either on their own computer,\r
+    an  online server, or by a trusted third party. But considering\r
+    the  protocol  is not well established in current web browsers,\r
+    I  foresee  difficulties  in adoption of that aspect of the Dat\r
+    ecosystem.  Beyond  that,  it  is rather disappointing that Dat\r
+    applications  cannot  run  natively in a web browser given that\r
+    JavaScript is designed exactly for that.\r
+    \r
+    Dat privacy\r
+    \r
+    An  advantage  Dat  has  over other peer-to-peer protocols like\r
+    BitTorrent   is   end-to-end   encryption.   I  was  originally\r
+    concerned   by   the   encryption   design   when  reading  the\r
+    [113]academic paper [PDF] :\r
+    \r
+    It  is  up  to  client programs to make design decisions around\r
+    which  discovery  networks  they  trust.  For  example if a Dat\r
+    client  decides  to  use  the BitTorrent DHT to discover peers,\r
+    and  they  are  searching for a publicly shared Dat key (e.g. a\r
+    key  cited publicly in a published scientific paper) with known\r
+    contents,  then because of the privacy design of the BitTorrent\r
+    DHT  it  becomes  public  knowledge  what  key  that  client is\r
+    searching for.\r
+    \r
+    So  in  other  words, to share a secret file with another user,\r
+    the  public key is transmitted over a secure side-channel, only\r
+    to  then  leak  during  the discovery process. Fortunately, the\r
+    public  Dat  key is not directly used during discovery as it is\r
+    [114]hashed  with  BLAKE2B  .  Still, the security model of Dat\r
+    assumes   the   public  key  is  private,  which  is  a  rather\r
+    counterintuitive  concept  that  might upset cryptographers and\r
+    confuse  users  who  are  frequently  encouraged  to  type such\r
+    strings  in  address bars and search engines as part of the Dat\r
+    experience.  There  is a [115]security & privacy FAQ in the Dat\r
+    documentation warning about this problem:\r
+    \r
+    One  of  the key elements of Dat privacy is that the public key\r
+    is  never  used  in  any  discovery  network. The public key is\r
+    hashed,  creating  the discovery key. Whenever peers attempt to\r
+    connect to each other, they use the discovery key.\r
+    \r
+    Data  is  encrypted  using  the  public key, so it is important\r
+    that this key stays secure.\r
+    \r
+    There  are  other  privacy  issues outlined in the document; it\r
+    states that " Dat faces similar privacy risks as BitTorrent ":\r
+    \r
+    When  you download a dataset, your IP address is exposed to the\r
+    users  sharing  that dataset. This may lead to honeypot servers\r
+    collecting  IP addresses, as we've seen in Bittorrent. However,\r
+    with  dataset  sharing we can create a web of trust model where\r
+    specific  institutions  are  trusted  as  primary  sources  for\r
+    datasets, diminishing the sharing of IP addresses.\r
+    \r
+    A  Dat  blog  post  refers to this issue as [116]reader privacy\r
+    and  it is, indeed, a sensitive issue in peer-to-peer networks.\r
+    It  is  how  BitTorrent  users  are discovered and served scary\r
+    verbiage  from  lawyers, after all. But Dat makes this a little\r
+    better  because,  to  join  a swarm, you must know what you are\r
+    looking  for  already,  which means peers who can look at swarm\r
+    activity  only  include  users  who know the secret public key.\r
+    This  works  well  for  secret  content, but for larger, public\r
+    data  sets, it is a real problem; it is why the Dat project has\r
+    [117]avoided creating a Wikipedia mirror so far.\r
+    \r
+    I  found  another  privacy  issue that is not documented in the\r
+    security  FAQ  during  my  review of the protocol. As mentioned\r
+    earlier,  the [118]Dat discovery protocol routinely phones home\r
+    to  DNS  servers operated by the Dat project. This implies that\r
+    the  default  discovery  servers (and an attacker watching over\r
+    their  traffic)  know  who is publishing or seeking content, in\r
+    essence  discovering  the  "social  network"  behind  Dat. This\r
+    discovery  mechanism  can be disabled in clients, but a similar\r
+    privacy  issue  applies  to  the  DHT as well, although that is\r
+    distributed  so  it  doesn't  require  trust of the Dat project\r
+    itself.\r
+    \r
+    Considering  those  aspects  of the protocol, privacy-conscious\r
+    users  will  probably  want  to  use Tor or other anonymization\r
+    techniques to work around those concerns.\r
+    \r
+    The future of Dat\r
+    \r
+    [119]Dat  2.0  was  released  in  June  2017  with  performance\r
+    improvements   and   protocol   changes.  [120]Dat  Enhancement\r
+    Proposals  (DEPs)  guide the project's future development; most\r
+    work  is  currently  geared  toward  implementing  the  draft "\r
+    [121]multi-writer   proposal   "   in  [122]HyperDB  .  Without\r
+    multi-writer  support, only the original publisher of a Dat can\r
+    modify  it.  According  to  Joe  Hand, co-executive-director of\r
+    [123]Code  for  Science & Society (CSS) and Dat core developer,\r
+    in  an  IRC  chat, "supporting multiwriter is a big requirement\r
+    for  lots  of  folks". For example, while Dat might allow Alice\r
+    to  share  her  research  results with Bob, he cannot modify or\r
+    contribute  back  to  those results. The multi-writer extension\r
+    allows  for  Alice  to assign trust to Bob so he can have write\r
+    access to the data.\r
+    \r
+    Unfortunately,  the  current  proposal doesn't solve the " hard\r
+    problems  " of " conflict merges and secure key distribution ".\r
+    The  former  will  be worked out through user interface tweaks,\r
+    but  the  latter  is  a  classic problem that security projects\r
+    have   typically   trouble  finding  solutions  for—Dat  is  no\r
+    exception.  How  will Alice securely trust Bob? The OpenPGP web\r
+    of  trust?  Hexadecimal  fingerprints  read over the phone? Dat\r
+    doesn't provide a magic solution to this problem.\r
+    \r
+    Another  thing limiting adoption is that Dat is not packaged in\r
+    any  distribution  that I could find (although I [124]requested\r
+    it  in  Debian  )  and,  considering the speed of change of the\r
+    JavaScript  ecosystem,  this  is  unlikely  to  change any time\r
+    soon.  A  [125]Rust  implementation  of  the  Dat  protocol has\r
+    started,  however,  which  might  be easier to package than the\r
+    multitude  of  [126]Node.js  modules. In terms of mobile device\r
+    support,  there is an experimental Android web browser with Dat\r
+    support  called  [127]Bunsen  , which somehow doesn't run on my\r
+    phone.  Some  adventurous  users  have  successfully run Dat in\r
+    [128]Termux  .  I  haven't  found an app running on iOS at this\r
+    point.\r
+    \r
+    Even  beyond  platform  support, distributed protocols like Dat\r
+    have  a  tough  slope  to climb against the virtual monopoly of\r
+    more  centralized  protocols,  so  it  remains  to  be seen how\r
+    popular  those  tools  will  be.  Hand says Dat is supported by\r
+    multiple  non-profit  organizations. Beyond CSS, [129]Blue Link\r
+    Labs  is working on the Beaker Browser as a self-funded startup\r
+    and  a  grass-roots  organization, [130]Digital Democracy , has\r
+    contributed  to  the  project.  The  [131]Internet  Archive has\r
+    [132]announced  a  collaboration  between  itself, CSS, and the\r
+    California  Digital  Library to launch a pilot project to see "\r
+    how   members  of  a  cooperative,  decentralized  network  can\r
+    leverage  shared  services  to  ensure  data preservation while\r
+    reducing storage costs and increasing replication counts ".\r
+    \r
+    Hand  said  adoption in academia has been "slow but steady" and\r
+    that  the [133]Dat in the Lab project has helped identify areas\r
+    that  could  help researchers adopt the project. Unfortunately,\r
+    as  is  the case with many free-software projects, he said that\r
+    "our  team is definitely a bit limited on bandwidth to push for\r
+    bigger  adoption".  Hand said that the project received a grant\r
+    from   [134]Mozilla   Open   Source   Support  to  improve  its\r
+    documentation, which will be a big help.\r
+    \r
+    Ultimately,   Dat   suffers   from  a  problem  common  to  all\r
+    peer-to-peer  applications,  which is naming. Dat addresses are\r
+    not  exactly  intuitive:  humans  do not remember strings of 64\r
+    hexadecimal  characters well. For this, Dat took a [135]similar\r
+    approach  to IPFS by using DNS TXT records and /.well-known URL\r
+    paths   to  bridge  existing,  human-readable  names  with  Dat\r
+    hashes.  So  this sacrifices a part of the decentralized nature\r
+    of the project in favor of usability.\r
+    \r
+    I  have  tested  a lot of distributed protocols like Dat in the\r
+    past  and I am not sure Dat is a clear winner. It certainly has\r
+    advantages  over IPFS in terms of usability and resource usage,\r
+    but  the  lack  of packages on most platforms is a big limit to\r
+    adoption  for  most  people. This means it will be difficult to\r
+    share  content  with  my  friends  and  family with Dat anytime\r
+    soon,  which  would  probably  be  my  primary use case for the\r
+    project.  Until  the  protocol  reaches the wider adoption that\r
+    BitTorrent  has  seen  in  terms  of  platform  support, I will\r
+    probably   wait   before  switching  everything  over  to  this\r
+    promising project.\r
+    \r
+    [136]Comments (11 posted)\r
+    \r
+    Page editor : Jonathan Corbet\r
+    \r
+    Inside this week's LWN.net Weekly Edition\r
+    \r
+    [137]Briefs  :  OpenSSH  7.8;  4.19-rc1;  Which stable?; Netdev\r
+    0x12; Bison 3.1; Quotes; ...\r
+    \r
+    [138]Announcements  :  Newsletters;  events;  security updates;\r
+    kernel patches; ...  Next page : [139]Brief items>>\r
+    \r
+    \r
+    \r
+    [1] https://lwn.net/Articles/763743/\r
+    \r
+    [2] https://lwn.net/Articles/763626/\r
+    \r
+    [3] https://lwn.net/Articles/763641/\r
+    \r
+    [4] https://lwn.net/Articles/763106/\r
+    \r
+    [5] https://lwn.net/Articles/763603/\r
+    \r
+    [6] https://lwn.net/Articles/763175/\r
+    \r
+    [7] https://lwn.net/Articles/763492/\r
+    \r
+    [8] https://lwn.net/Articles/763254/\r
+    \r
+    [9] https://lwn.net/Articles/763255/\r
+    \r
+    [10] https://lwn.net/Articles/763743/#Comments\r
+    \r
+    [11] https://lwn.net/Articles/763626/\r
+    \r
+    [12] http://julialang.org/\r
+    \r
+    [13] https://julialang.org/blog/2018/08/one-point-zero\r
+    \r
+    [14] https://julialang.org/benchmarks/\r
+    \r
+    [15] https://juliacomputing.com/\r
+    \r
+    [16] https://en.wikipedia.org/wiki/Read%E2%80%93eval%E2%80%93p-\r
+    rint_loop\r
+    \r
+    [17] http://llvm.org/\r
+    \r
+    [18] http://www.3blue1brown.com/essence-of-linear-algebra-page/\r
+    \r
+    [19] http://www.netlib.org/lapack/\r
+    \r
+    [20] https://lwn.net/Articles/657157/\r
+    \r
+    [21] https://julialang.org/publications/julia-fresh-approach-B-\r
+    EKS.pdf\r
+    \r
+    [22] https://lwn.net/Articles/738915/\r
+    \r
+    [23] https://pypy.org/\r
+    \r
+    [24] https://github.com/JuliaPy/PyCall.jl\r
+    \r
+    [25] https://github.com/JuliaInterop/RCall.jl\r
+    \r
+    [26] https://docs.julialang.org/en/stable/\r
+    \r
+    [27] https://julialang.org/learning/\r
+    \r
+    [28] http://bogumilkaminski.pl/files/julia_express.pdf\r
+    \r
+    [29] https://docs.julialang.org/en/stable/manual/noteworthy-di-\r
+    fferences/#Noteworthy-differences-from-Python-1\r
+    \r
+    [30] https://lwn.net/Articles/746386/\r
+    \r
+    [31] https://github.com/JuliaLang/IJulia.jl\r
+    \r
+    [32] https://lwn.net/Articles/764001/\r
+    \r
+    [33] https://lwn.net/Articles/763626/#Comments\r
+    \r
+    [34] https://lwn.net/Articles/763641/\r
+    \r
+    [35] https://lwn.net/Archives/ConferenceByYear/#2018-Linux_Sec-\r
+    urity_Summit_NA\r
+    \r
+    [36]  https://events.linuxfoundation.org/events/linux-security-\r
+    summit-north-america-2018/\r
+    \r
+    [37] https://kernsec.org/wiki/index.php/Kernel_Self_Protection-\r
+    _Project\r
+    \r
+    [38] https://lwn.net/Articles/763644/\r
+    \r
+    [39] https://raphlinus.github.io/programming/rust/2018/08/17/u-\r
+    ndefined-behavior.html\r
+    \r
+    [40] https://lwn.net/Articles/749064/\r
+    \r
+    [41] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/-\r
+    linux.git/commit/?id=02361bc77888\r
+    \r
+    [42] https://lore.kernel.org/lkml/CA+55aFzCG-zNmZwX4A2FQpadafL-\r
+    fEzK6CC=qPXydAacU1RqZWA@mail.gmail.com/T/#u\r
+    \r
+    [43] https://lwn.net/Articles/758245/\r
+    \r
+    [44] https://lwn.net/Articles/718888/\r
+    \r
+    [45] https://lwn.net/Articles/744507/\r
+    \r
+    [46] https://outflux.net/slides/2018/lss/danger.pdf\r
+    \r
+    [47] https://lwn.net/Articles/763641/#Comments\r
+    \r
+    [48] https://lwn.net/Articles/763106/\r
+    \r
+    [49] https://lwn.net/Articles/763497/\r
+    \r
+    [50] https://lwn.net/Articles/762566/\r
+    \r
+    [51] https://lwn.net/Articles/761118/\r
+    \r
+    [52] https://git.kernel.org/linus/d5791044d2e5749ef4de84161cec-\r
+    5532e2111540\r
+    \r
+    [53] https://lwn.net/ml/linux-kernel/20180630000253.70103-1-sq-\r
+    ue@chromium.org/\r
+    \r
+    [54] https://git.kernel.org/linus/771c035372a036f83353eef46dbb-\r
+    829780330234\r
+    \r
+    [55] https://lwn.net/Articles/745073/\r
+    \r
+    [56] https://lwn.net/ml/linux-kernel/CA+55aFxFjAmrFpwQmEHCthHO-\r
+    zgidCKnod+cNDEE+3Spu9o1s3w@mail.gmail.com/\r
+    \r
+    [57] https://lwn.net/Articles/759499/\r
+    \r
+    [58] https://lwn.net/Articles/762355/\r
+    \r
+    [59] https://lwn.net/ml/linux-fsdevel/20180823223145.GK6515@Ze-\r
+    nIV.linux.org.uk/\r
+    \r
+    [60] https://lwn.net/Articles/763106/#Comments\r
+    \r
+    [61] https://lwn.net/Articles/763603/\r
+    \r
+    [62] https://lwn.net/Articles/601799/\r
+    \r
+    [63] https://lwn.net/Articles/552904\r
+    \r
+    [64] https://lwn.net/Articles/758963/\r
+    \r
+    [65] http://algogroup.unimore.it/people/paolo/pub-docs/extende-\r
+    d-lat-bw-throughput.pdf\r
+    \r
+    [66] https://lwn.net/Articles/763603/#Comments\r
+    \r
+    [67] https://lwn.net/Articles/763175/\r
+    \r
+    [68] https://lwn.net/Archives/ConferenceByYear/#2018-Akademy\r
+    \r
+    [69] https://dot.kde.org/2017/11/30/kdes-goals-2018-and-beyond\r
+    \r
+    [70] https://phabricator.kde.org/T7116\r
+    \r
+    [71] https://phabricator.kde.org/T6831\r
+    \r
+    [72] https://phabricator.kde.org/T7050\r
+    \r
+    [73] https://akademy.kde.org/\r
+    \r
+    [74] https://community.kde.org/Promo\r
+    \r
+    [75] https://www.chakralinux.org/\r
+    \r
+    [76] https://conf.kde.org/en/Akademy2018/public/events/79\r
+    \r
+    [77] https://en.wikipedia.org/wiki/Onboarding\r
+    \r
+    [78] https://community.kde.org/Get_Involved\r
+    \r
+    [79] https://community.kde.org/KDE/Junior_Jobs\r
+    \r
+    [80] https://lwn.net/Articles/763189/\r
+    \r
+    [81] https://phabricator.kde.org/T8686\r
+    \r
+    [82] https://phabricator.kde.org/T7646\r
+    \r
+    [83] https://bugs.kde.org/\r
+    \r
+    [84] https://www.plasma-mobile.org/index.html\r
+    \r
+    [85] https://www.plasma-mobile.org/findyourway\r
+    \r
+    [86] https://lwn.net/Articles/763175/#Comments\r
+    \r
+    [87] https://lwn.net/Articles/763492/\r
+    \r
+    [88] https://datproject.org\r
+    \r
+    [89] https://www.bittorrent.com/\r
+    \r
+    [90] https://github.com/datproject/dat/releases\r
+    \r
+    [91] https://docs.datproject.org/install\r
+    \r
+    [92] https://datbase.org/\r
+    \r
+    [93] https://ed25519.cr.yp.to/\r
+    \r
+    [94] https://en.wikipedia.org/wiki/Mainline_DHT\r
+    \r
+    [95] https://github.com/mafintosh/dns-discovery\r
+    \r
+    [96] https://en.wikipedia.org/wiki/Magnet_URI_scheme\r
+    \r
+    [97] https://blog.datproject.org/2017/10/13/using-dat-for-auto-\r
+    matic-file-backups/\r
+    \r
+    [98] https://github.com/mafintosh/hypercore-archiver\r
+    \r
+    [99] https://ipfs.io/\r
+    \r
+    [100] https://github.com/ipfs/go-ipfs/issues/875\r
+    \r
+    [101] https://github.com/ipfs/go-ipfs/blob/master/docs/experim-\r
+    ental-features.md#ipfs-filestore\r
+    \r
+    [102] https://hashbase.io/\r
+    \r
+    [103] https://github.com/datprotocol/DEPs/blob/master/proposal-\r
+    s/0003-http-pinning-service-api.md\r
+    \r
+    [104] https://docs.datproject.org/server\r
+    \r
+    [105] https://lwn.net/Articles/763544/\r
+    \r
+    [106] https://beakerbrowser.com/\r
+    \r
+    [107] https://electronjs.org/\r
+    \r
+    [108] https://github.com/beakerbrowser/explore\r
+    \r
+    [109] https://addons.mozilla.org/en-US/firefox/addon/dat-p2p-p-\r
+    rotocol/\r
+    \r
+    [110] https://github.com/sammacbeth/dat-fox\r
+    \r
+    [111] https://github.com/sammacbeth/dat-fox-helper\r
+    \r
+    [112] https://github.com/beakerbrowser/dat-photos-app\r
+    \r
+    [113] https://github.com/datproject/docs/raw/master/papers/dat-\r
+    paper.pdf\r
+    \r
+    [114] https://github.com/datprotocol/DEPs/blob/653e0cf40233b5d-\r
+    474cddc04235577d9d55b2934/proposals/0000-peer-discovery.md#dis-\r
+    covery-keys\r
+    \r
+    [115] https://docs.datproject.org/security\r
+    \r
+    [116] https://blog.datproject.org/2016/12/12/reader-privacy-on-\r
+    the-p2p-web/\r
+    \r
+    [117] https://blog.datproject.org/2017/12/10/dont-ship/\r
+    \r
+    [118] https://github.com/datprotocol/DEPs/pull/7\r
+    \r
+    [119] https://blog.datproject.org/2017/06/01/dat-sleep-release/\r
+    \r
+    [120] https://github.com/datprotocol/DEPs\r
+    \r
+    [121] https://github.com/datprotocol/DEPs/blob/master/proposal-\r
+    s/0008-multiwriter.md\r
+    \r
+    [122] https://github.com/mafintosh/hyperdb\r
+    \r
+    [123] https://codeforscience.org/\r
+    \r
+    [124] https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=890565\r
+    \r
+    [125] https://github.com/datrs\r
+    \r
+    [126] https://nodejs.org/en/\r
+    \r
+    [127] https://bunsenbrowser.github.io/#!index.md\r
+    \r
+    [128] https://termux.com/\r
+    \r
+    [129] https://bluelinklabs.com/\r
+    \r
+    [130] https://www.digital-democracy.org/\r
+    \r
+    [131] https://archive.org\r
+    \r
+    [132] https://blog.archive.org/2018/06/05/internet-archive-cod-\r
+    e-for-science-and-society-and-california-digital-library-to-pa-\r
+    rtner-on-a-data-sharing-and-preservation-pilot-project/\r
+    \r
+    [133] https://github.com/codeforscience/Dat-in-the-Lab\r
+    \r
+    [134] https://www.mozilla.org/en-US/moss/\r
+    \r
+    [135] https://github.com/datprotocol/DEPs/blob/master/proposal-\r
+    s/0005-dns.md\r
+    \r
+    [136] https://lwn.net/Articles/763492/#Comments\r
+    \r
+    [137] https://lwn.net/Articles/763254/\r
+    \r
+    [138] https://lwn.net/Articles/763255/\r
+    \r
+    [139] https://lwn.net/Articles/763254/\r
+\r
+\r
+\r
diff --git a/test/expected/LWN/0000763252.header b/test/expected/LWN/0000763252.header

new file mode 100644 (file)

index 0000000..8675073
--- /dev/null
+++ b/test/expected/LWN/0000763252.header
@@ -0,0 +1,3 @@
+0LWN.net Weekly Edition for August 30, 2018    null/LWN/0000763252             70\r
+i  \r
+i\r
diff --git a/test/expected/LWN/0000763252.header.html b/test/expected/LWN/0000763252.header.html

new file mode 100644 (file)

index 0000000..ddbdc83
--- /dev/null
+++ b/test/expected/LWN/0000763252.header.html
@@ -0,0 +1,20 @@
+<!DOCTYPE html>
+<html>
+<head>
+  <meta http-equiv='content-type' content='text/html; charset=utf-8'>
+  <meta name='viewport' content='width=device-width, initial-scale=1.0'>
+  <style type='text/css'>
+    body { margin: 1em 15%; }
+  </style>
+</head>
+<body>
+<div class='story-header'>
+       <h1><a href='0000763252.html'>LWN.net Weekly Edition for August 30, 2018</a></h1>
+       <div class='details'></div>
+       <br/>
+       <div class='content' style='text-align: justify'>
+               
+       </div>
+<hr/>
+</div>
+</body>
diff --git a/test/expected/LWN/0000763252.html b/test/expected/LWN/0000763252.html

new file mode 100644 (file)

index 0000000..183fc89
--- /dev/null
+++ b/test/expected/LWN/0000763252.html
@@ -0,0 +1,25 @@
+<!DOCTYPE html>
+<html>
+<head>
+  <meta http-equiv='content-type' content='text/html; charset=utf-8'>
+  <meta name='viewport' content='width=device-width, initial-scale=1.0'>
+  <style type='text/css'>
+    body { margin: 1em 15%; }
+  </style>
+</head>
+<body>
+<div class='story'>
+       <h1>LWN.net Weekly Edition for August 30, 2018</h1>
+       <div class='details'></div>
+       <br/>
+    <ul>
+        <li>News link: <a href='https://lwn.net/Articles/763252/'>https://lwn.net/Articles/763252/</a></li>
+        <li>Source link: <a href=''></a></li>
+    </ul>
+       <br/>
+       <div class='content' style='text-align: justify'>
+               [1]Welcome to the LWN.net Weekly Edition for August 30, 2018 This edition contains the following feature content:<br/><br/>[2]An introduction to the Julia language, part 1 : Julia is a language designed for intensive numerical calculations; this article gives an overview of its core features.<br/><br/>[3]C considered dangerous : a Linux Security Summit talk on what is being done to make the use of C in the kernel safer.<br/><br/>[4]The second half of the 4.19 merge window : the final features merged (or not merged) before the merge window closed for this cycle.<br/><br/>[5]Measuring (and fixing) I/O-controller throughput loss : the kernel&#x27;s I/O controllers can provide useful bandwidth guarantees, but at a significant cost in throughput.<br/><br/>[6]KDE&#x27;s onboarding initiative, one year later : what has gone right in KDE&#x27;s effort to make it easier for contributors to join the project, and what remains to be done.<br/><br/>[7]Sharing and archiving data sets with Dat : an innovative approach to addressing and sharing data on the net.<br/><br/>This week&#x27;s edition also includes these inner pages:<br/><br/>[8]Brief items : Brief news items from throughout the community.<br/><br/>[9]Announcements : Newsletters, conferences, security updates, patches, and more.<br/><br/>Please enjoy this week&#x27;s edition, and, as always, thank you for supporting LWN.net.<br/><br/>[10]Comments (none posted)<br/><br/>[11]An introduction to the Julia language, part 1<br/><br/>August 28, 2018<br/><br/>This article was contributed by Lee Phillips<br/><br/>[12]Julia is a young computer language aimed at serving the needs of scientists, engineers, and other practitioners of numerically intensive programming. It was first publicly released in 2012. After an intense period of language development, version 1.0 was [13]released on August 8. The 1.0 release promises years of language stability; users can be confident that developments in the 1.x series will not break their code. This is the first part of a two-part article introducing the world of Julia. This part will introduce enough of the language syntax and constructs to allow you to begin to write simple programs. The following installment will acquaint you with the additional pieces needed to create real projects, and to make use of Julia&#x27;s ecosystem.<br/><br/>Goals and history<br/><br/>The Julia project has ambitious goals. It wants the language to perform about as well as Fortran or C when running numerical algorithms, while remaining as pleasant to program in as Python. I believe the project has met these goals and is poised to see increasing adoption by numerical researchers, especially now that an official, stable release is available.<br/><br/>The Julia project maintains a [14]micro-benchmark page that compares its numerical performance against both statically compiled languages (C, Fortran) and dynamically typed languages (R, Python). While it&#x27;s certainly possible to argue about the relevance and fairness of particular benchmarks, the data overall supports the Julia team&#x27;s contention that Julia has generally achieved parity with Fortran and C; the benchmark source code is available.<br/><br/>Julia began as research in computer science at MIT; its creators are Alan Edelman, Stefan Karpinski, Jeff Bezanson, and Viral Shah. These four remain active developers of the language. They, along with Keno Fischer, co-founder and CTO of [15]Julia Computing , were kind enough to share their thoughts with us about the language. I&#x27;ll be drawing on their comments later on; for now, let&#x27;s get a taste of what Julia code looks like.<br/><br/>Getting started<br/><br/>To explore Julia initially, start up its standard [16]read-eval-print loop (REPL) by typing julia at the terminal, assuming that you have installed it. You will then be able to interact with what will seem to be an interpreted language — but, behind the scenes, those commands are being compiled by a just-in-time (JIT) compiler that uses the [17]LLVM compiler framework . This allows Julia to be interactive, while turning the code into fast, native machine instructions. However, the JIT compiler passes sometimes introduce noticeable delays at the REPL, especially when using a function for the first time.<br/><br/>To run a Julia program non-interactively, execute a command like: $ julia script.jl<br/><br/>Julia has all the usual data structures: numbers of various types (including complex and rational numbers), multidimensional arrays, dictionaries, strings, and characters. Functions are first-class: they can be passed as arguments to other functions, can be members of arrays, and so on.<br/><br/>Julia embraces Unicode. Strings, which are enclosed in double quotes, are arrays of Unicode characters, which are enclosed in single quotes. The &quot; * &quot; operator is used for string and character concatenation. Thus &#x27;a&#x27; and &#x27;β&#x27; are characters, and &#x27;aβ&#x27; is a syntax error. &quot;a&quot; and &quot;β&quot; are strings, as are &quot;aβ&quot;, &#x27;a&#x27; * &#x27;β&#x27;, and &quot;a&quot; * &quot;β&quot; — all evaluate to the same string.<br/><br/>Variable and function names can contain non-ASCII characters. This, along with Julia&#x27;s clever syntax that understands numbers prepended to variables to mean multiplication, goes a long way to allowing the numerical scientist to write code that more closely resembles the compact mathematical notation of the equations that usually lie behind it.  julia ε₁ = 0.01<br/><br/>0.01<br/><br/>julia ε₂ = 0.02<br/><br/>0.02<br/><br/>julia 2ε₁ + 3ε₂<br/><br/>0.08<br/><br/>And where does Julia come down on the age-old debate of what do about 1/2 ? In Fortran and Python 2, this will get you 0, since 1 and 2 are integers, and the result is rounded down to the integer 0. This was deemed inconsistent, and confusing to some, so it was changed in Python 3 to return 0.5 — which is what you get in Julia, too.<br/><br/>While we&#x27;re on the subject of fractions, Julia can handle rational numbers, with a special syntax: 3//5 + 2//3 returns 19//15 , while 3/5 + 2/3 gets you the floating-point answer 1.2666666666666666. Internally, Julia thinks of a rational number in its reduced form, so the expression 6//8 == 3//4 returns true , and numerator(6//8) returns 3 .<br/><br/>Arrays<br/><br/>Arrays are enclosed in square brackets and indexed with an iterator that can contain a step value:  julia a = [1, 2, 3, 4, 5, 6]<br/><br/>6-element Array{Int64,1}:<br/><br/>1<br/><br/>2<br/><br/>3<br/><br/>4<br/><br/>5<br/><br/>6<br/><br/>julia a[1:2:end]<br/><br/>3-element Array{Int64,1}:<br/><br/>1<br/><br/>3<br/><br/>5<br/><br/>As you can see, indexing starts at one, and the useful end index means the obvious thing. When you define a variable in the REPL, Julia replies with the type and value of the assigned data; you can suppress this output by ending your input line with a semicolon.<br/><br/>Since arrays are such a vital part of numerical computation, and Julia makes them easy to work with, we&#x27;ll spend a bit more time with them than the other data structures.<br/><br/>To illustrate the syntax, we can start with a couple of 2D arrays, defined at the REPL:  julia a = [1 2 3; 4 5 6]<br/><br/>2×3 Array{Int64,2}:<br/><br/>1 2 3<br/><br/>4 5 6<br/><br/>julia z = [-1 -2 -3; -4 -5 -6];<br/><br/>Indexing is as expected:  julia a[1, 2]<br/><br/>2<br/><br/>You can glue arrays together horizontally:  julia [a z]<br/><br/>2×6 Array{Int64,2}:<br/><br/>1 2 3 -1 -2 -3<br/><br/>4 5 6 -4 -5 -6<br/><br/>And vertically:  julia [a; z]<br/><br/>4×3 Array{Int64,2}:<br/><br/>1  2  3<br/><br/>4  5  6<br/><br/>-1 -2 -3<br/><br/>-4 -5 -6<br/><br/>Julia has all the usual operators for handling arrays, and [18]linear algebra functions that work with matrices (2D arrays). The linear algebra functions are part of Julia&#x27;s standard library, but need to be imported with a command like &quot; using LinearAlgebra &quot;, which is a detail omitted from the current documentation. The functions include such things as determinants, matrix inverses, eigenvalues and eigenvectors, many kinds of matrix factorizations, etc. Julia has not reinvented the wheel here, but wisely uses the [19]LAPACK Fortran library of battle-tested linear algebra routines.<br/><br/>The extension of arithmetic operators to arrays is usually intuitive:  julia a + z<br/><br/>2×3 Array{Int64,2}:<br/><br/>0 0 0<br/><br/>0 0 0<br/><br/>And the numerical prepending syntax works with arrays, too:  julia 3a + 4z<br/><br/>2×3 Array{Int64,2}:<br/><br/>-1 -2 -3<br/><br/>-4 -5 -6<br/><br/>Putting a multiplication operator between two matrices gets you matrix multiplication:  julia a * transpose(a)<br/><br/>2×2 Array{Int64,2}:<br/><br/>14 32<br/><br/>32 77<br/><br/>You can &quot;broadcast&quot; numbers to cover all the elements in an array by prepending the usual arithmetic operators with a dot:  julia 1 .+ a<br/><br/>2×3 Array{Int64,2}:<br/><br/>2 3 4<br/><br/>5 6 7<br/><br/>Note that the language only actually requires the dot for some operators, but not for others, such as &quot;*&quot; and &quot;/&quot;. The reasons for this are arcane, and it probably makes sense to be consistent and use the dot whenever you intend broadcasting. Note also that the current version of the official documentation is incorrect in claiming that you may omit the dot from &quot;+&quot; and &quot;-&quot;; in fact, this now gives an error.<br/><br/>You can use the dot notation to turn any function into one that operates on each element of an array:  julia round.(sin.([0, π/2, π, 3π/2, 2π]))<br/><br/>5-element Array{Float64,1}:<br/><br/>0.0<br/><br/>1.0<br/><br/>0.0<br/><br/>-1.0<br/><br/>-0.0<br/><br/>The example above illustrates chaining two dotted functions together. The Julia compiler turns expressions like this into &quot;fused&quot; operations: instead of applying each function in turn to create a new array that is passed to the next function, the compiler combines the functions into a single compound function that is applied once over the array, creating a significant optimization.<br/><br/>You can use this dot notation with any function, including your own, to turn it into a version that operates element-wise over arrays.<br/><br/>Dictionaries (associative arrays) can be defined with several syntaxes. Here&#x27;s one:  julia d1 = Dict(&quot;A&quot;=1, &quot;B&quot;=2)<br/><br/>Dict{String,Int64} with 2 entries:<br/><br/>&quot;B&quot; = 2<br/><br/>&quot;A&quot; = 1<br/><br/>You may have noticed that the code snippets so far have not included any type declarations. Every value in Julia has a type, but the compiler will infer types if they are not specified. It is generally not necessary to declare types for performance, but type declarations sometimes serve other purposes, that we&#x27;ll return to later. Julia has a deep and sophisticated type system, including user-defined types and C-like structs. Types can have behaviors associated with them, and can inherit behaviors from other types. The best thing about Julia&#x27;s type system is that you can ignore it entirely, use just a few pieces of it, or spend weeks studying its design.<br/><br/>Control flow<br/><br/>Julia code is organized in blocks, which can indicate control flow, function definitions, and other code units. Blocks are terminated with the end keyword, and indentation is not significant. Statements are separated either with newlines or semicolons.<br/><br/>Julia has the typical control flow constructs; here is a while block:  julia i = 1;<br/><br/>julia while i 5<br/><br/>print(i)<br/><br/>global i = i + 1<br/><br/>end<br/><br/>1234<br/><br/>Notice the global keyword. Most blocks in Julia introduce a local scope for variables; without this keyword here, we would get an error about an undefined variable.<br/><br/>Julia has the usual if statements and for loops that use the same iterators that we introduced above for array indexing. We can also iterate over collections:  julia for i ∈ [&#x27;a&#x27;, &#x27;b&#x27;, &#x27;c&#x27;]<br/><br/>println(i)<br/><br/>end<br/><br/>a<br/><br/>b<br/><br/>c<br/><br/>In place of the fancy math symbol in this for loop, we can use &quot; = &quot; or &quot; in &quot;. If you want to use the math symbol but have no convenient way to type it, the REPL will help you: type &quot; \in &quot; and the TAB key, and the symbol appears; you can type many [20]LaTeX expressions into the REPL in this way.<br/><br/>Development of Julia<br/><br/>The language is developed on GitHub, with over 700 contributors. The Julia team mentioned in their email to us that the decision to use GitHub has been particularly good for Julia, as it streamlined the process for many of their contributors, who are scientists or domain experts in various fields, rather than professional software developers.<br/><br/>The creators of Julia have [21]published [PDF] a detailed “mission statement” for the language, describing their aims and motivations. A key issue that they wanted their language to solve is what they called the &quot;two-language problem.&quot; This situation is familiar to anyone who has used Python or another dynamic language on a demanding numerical problem. To get good performance, you will wind up rewriting the numerically intensive parts of the program in C or Fortran, dealing with the interface between the two languages, and may still be disappointed in the overhead presented by calling the foreign routines from your original code.<br/><br/>For Python, [22]NumPy and SciPy wrap many numerical routines, written in Fortran or C, for efficient use from that language, but you can only take advantage of this if your calculation fits the pattern of an available routine; in more general cases, where you will have to write a loop over your data, you are stuck with Python&#x27;s native performance, which is orders of magnitude slower. If you switch to an alternative, faster implementation of Python, such as [23]PyPy , the numerical libraries may not be compatible; NumPy became available for PyPy only within about the past year.<br/><br/>Julia solves the two-language problem by being as expressive and simple to program in as a dynamic scripting language, while having the native performance of a static, compiled language. There is no need to write numerical libraries in a second language, but C or Fortran library routines can be called using a facility that Julia has built-in. Other languages, such as [24]Python or [25]R , can also interoperate easily with Julia using external packages.<br/><br/>Documentation<br/><br/>There are many resources to turn to to learn the language. There is an extensive and detailed [26]manual at Julia headquarters, and this may be a good place to start. However, although the first few chapters provide a gentle introduction, the material soon becomes dense and, at times, hard to follow, with references to concepts that are not explained until later chapters. Fortunately, there is a [27]&quot;learning&quot; link at the top of the Julia home page, which takes you to a long list of videos, tutorials, books, articles, and classes both about Julia and that use Julia in teaching subjects such a numerical analysis. There is also a fairly good [28]cheat-sheet [PDF] , which was just updated for v. 1.0.<br/><br/>If you&#x27;re coming from Python, [29]this list of noteworthy differences between Python and Julia syntax will probably be useful.<br/><br/>Some of the linked tutorials are in the form of [30]Jupyter notebooks — indeed, the name &quot;Jupyter&quot; is formed from &quot;Julia&quot;, &quot;Python&quot;, and &quot;R&quot;, which are the three original languages supported by the interface. The [31]Julia kernel for Jupyter was recently upgraded to support v. 1.0. Judicious sampling of a variety of documentation sources, combined with liberal experimentation, may be the best way of learning the language. Jupyter makes this experimentation more inviting for those who enjoy the web-based interface, but the REPL that comes with Julia helps a great deal in this regard by providing, for instance, TAB completion and an extensive help system invoked by simply pressing the &quot;?&quot; key.<br/><br/>Stay tuned<br/><br/>The [32]next installment in this two-part series will explain how Julia is organized around the concept of &quot;multiple dispatch&quot;. You will learn how to create functions and make elementary use of Julia&#x27;s type system. We&#x27;ll see how to install packages and use modules, and how to make graphs. Finally, Part 2 will briefly survey the important topics of macros and distributed computing.<br/><br/>[33]Comments (80 posted)<br/><br/>[34]C considered dangerous<br/><br/>By Jake Edge<br/><br/>August 29, 2018<br/><br/>[35]LSS NA<br/><br/>At the North America edition of the [36]2018 Linux Security Summit (LSS NA), which was held in late August in Vancouver, Canada, Kees Cook gave a presentation on some of the dangers that come with programs written in C. In particular, of course, the Linux kernel is mostly written in C, which means that the security of our systems rests on a somewhat dangerous foundation. But there are things that can be done to help firm things up by &quot; Making C Less Dangerous &quot; as the title of his talk suggested.<br/><br/>He began with a brief summary of the work that he and others are doing as part of the [37]Kernel Self Protection Project (KSPP). The goal of the project is to get kernel protections merged into the mainline. These protections are not targeted at protecting user-space processes from other (possibly rogue) processes, but are, instead, focused on protecting the kernel from user-space code. There are around 12 organizations and ten individuals working on roughly 20 different technologies as part of the KSPP, he said. The progress has been &quot;slow and steady&quot;, he said, which is how he thinks it should go.  [38]<br/><br/>One of the main problems is that C is treated mostly like a fancy assembler. The kernel developers do this because they want the kernel to be as fast and as small as possible. There are other reasons, too, such as the need to do architecture-specific tasks that lack a C API (e.g. setting up page tables, switching to 64-bit mode).<br/><br/>But there is lots of undefined behavior in C. This &quot;operational baggage&quot; can lead to various problems. In addition, C has a weak standard library with multiple utility functions that have various pitfalls. In C, the content of uninitialized automatic variables is undefined, but in the machine code that it gets translated to, the value is whatever happened to be in that memory location before. In C, a function pointer can be called even if the type of the pointer does not match the type of the function being called—assembly doesn&#x27;t care, it just jumps to a location, he said.<br/><br/>The APIs in the standard library are also bad in many cases. He asked: why is there no argument to memcpy() to specify the maximum destination length? He noted a recent [39]blog post from Raph Levien entitled &quot;With Undefined Behavior, Anything is Possible&quot;. That obviously resonated with Cook, as he pointed out his T-shirt—with the title and artwork from the post.<br/><br/>Less danger<br/><br/>He then moved on to some things that kernel developers can do (and are doing) to get away from some of the dangers of C. He began with variable-length arrays (VLAs), which can be used to overflow the stack to access data outside of its region. Even if the stack has a guard page, VLAs can be used to jump past it to write into other memory, which can then be used by some other kind of attack. The C language is &quot;perfectly fine with this&quot;. It is easy to find uses of VLAs with the -Wvla flag, however.<br/><br/>But it turns out that VLAs are [40]not just bad from a security perspective , they are also slow. In a micro-benchmark associated with a [41]patch removing a VLA , a 13% performance boost came from using a fixed-size array. He dug in a bit further and found that much more code is being generated to handle a VLA, which explains the speed increase. Since Linus Torvalds has [42]declared that VLAs should be removed from the kernel because they cause security problems and also slow the kernel down; Cook said &quot;don&#x27;t use VLAs&quot;.<br/><br/>Another problem area is switch statements, in particular where there is no break for a case . That could mean that the programmer expects and wants to fall through to the next case or it could be that the break was simply forgotten. There is a way to get a warning from the compiler for fall-throughs, but there needs to be a way to mark those that are truly meant to be that way. A special fall-through &quot;statement&quot; in the form of a comment is what has been agreed on within the static-analysis community. He and others have been going through each of the places where there is no break to add these comments (or a break ); they have &quot;found a lot of bugs this way&quot;, he said.<br/><br/>Uninitialized local variables will generate a warning, but not if the variable is passed in by reference. There are some GCC plugins that will automatically initialize these variables, but there are also patches for both GCC and Clang to provide a compiler option to do so. Neither of those is upstream yet, but Torvalds has praised the effort so the kernel would likely use the option. An interesting side effect that came about while investigating this was a warning he got about unreachable code when he enabled the auto-initialization. There were two variables declared just after a switch (and outside of any case ), where they would never be reached.<br/><br/>Arithmetic overflow is another undefined behavior in C that can cause various problems. GCC can check for signed overflow, which performs well (the overhead is in the noise, he said), but adding warning messages for it does grow the kernel by 6%; making the overflow abort, instead, only adds 0.1%. Clang can check for both signed and unsigned overflow; signed overflow is undefined, while unsigned overflow is defined, but often unexpected. Marking places where unsigned overflow is expected is needed; it would be nice to get those annotations put into the kernel, Cook said.<br/><br/>Explicit bounds checking is expensive. Doing it for copy_{to,from}_user() is a less than 1% performance hit, but adding it to the strcpy() and memcpy() families are around a 2% hit. Pre-Meltdown that would have been a totally impossible performance regression for security, he said; post-Meltdown, since it is less than 5%, maybe there is a chance to add this checking.<br/><br/>Better APIs would help as well. He pointed to the evolution of strcpy() , through str n cpy() and str l cpy() (each with their own bounds flaws) to str s cpy() , which seems to be &quot;OK so far&quot;. He also mentioned memcpy() again as a poor API with respect to bounds checking.<br/><br/>Hardware support for bounds checking is available in the application data integrity (ADI) feature for SPARC and is coming for Arm; it may also be available for Intel processors at some point. These all use a form of &quot;memory tagging&quot;, where allocations get a tag that is stored in the high-order byte of the address. An offset from the address can be checked by the hardware to see if it still falls within the allocated region based on the tag.<br/><br/>Control-flow integrity (CFI) has become more of an issue lately because much of what attackers had used in the past has been marked as &quot;no execute&quot; so they are turning to using existing code &quot;gadgets&quot; already present in the kernel by hijacking existing indirect function calls. In C, you can just call pointers without regard to the type as it just treats them as an address to jump to. Clang has a CFI-sanitize feature that enforces the function prototype to restrict the calls that can be made. It is done at runtime and is not perfect, in part because there are lots of functions in the kernel that take one unsigned long parameter and return an unsigned long.<br/><br/>Attacks on CFI have both a &quot;forward edge&quot;, which is what CFI sanitize tries to handle, and a &quot;backward edge&quot; that comes from manipulating the stack values, the return address in particular. Clang has two methods available to prevent the stack manipulation. The first is the &quot;safe stack&quot;, which puts various important items (e.g. &quot;safe&quot; variables, register spills, and the return address) on a separate stack. Alternatively, the &quot;shadow stack&quot; feature creates a separate stack just for return addresses.<br/><br/>One problem with these other stacks is that they are still writable, so if an attacker can find them in memory, they can still perform their attacks. Hardware-based protections, like Intel&#x27;s Control-Flow Enforcement Technology (CET), [43]provides a read-only shadow call stack for return addresses. Another hardware protection is [44]pointer authentication for Arm, which adds a kind of encrypted tag to the return address that can be verified before it is used.<br/><br/>Status and challenges<br/><br/>Cook then went through the current status of handling these different problems in the kernel. VLAs are almost completely gone, he said, just a few remain in the crypto subsystem; he hopes those VLAs will be gone by 4.20 (or whatever the number of the next kernel release turns out to be). Once that happens, he plans to turn on -Wvla for the kernel build so that none creep back in.<br/><br/>There has been steady progress made on marking fall-through cases in switch statements. Only 745 remain to be handled of the 2311 that existed when this work started; each one requires scrutiny to determine what the author&#x27;s intent is. Auto-initialized local variables can be done using compiler plugins, but that is &quot;not quite what we want&quot;, he said. More compiler support would be helpful there. For arithmetic overflow, it would be nice to see GCC get support for the unsigned case, but memory allocations are now doing explicit overflow checking at this point.<br/><br/>Bounds checking has seen some &quot;crying about performance hits&quot;, so we are waiting impatiently for hardware support, he said. CFI forward-edge protection needs [45]link-time optimization (LTO) support for Clang in the kernel, but it is currently working on Android. For backward-edge mitigation, the Clang shadow call stack is working on Android, but we are impatiently waiting for hardware support for that too.<br/><br/>There are a number of challenges in doing security development for the kernel, Cook said. There are cultural boundaries due to conservatism within the kernel community; that requires patiently working and reworking features in order to get them upstream. There are, of course, technical challenges because of the complexity of security changes; those kinds of problems can be solved. There are also resource limitations in terms of developers, testers, reviewers, and so on. KSPP and the other kernel security developers are still making that &quot;slow but steady&quot; progress.<br/><br/>Cook&#x27;s [46]slides [PDF] are available for interested readers; before long, there should be a video available of the talk as well.<br/><br/>[I would like to thank LWN&#x27;s travel sponsor, the Linux Foundation, for travel assistance to attend the Linux Security Summit in Vancouver.]<br/><br/>[47]Comments (70 posted)<br/><br/>[48]The second half of the 4.19 merge window<br/><br/>By Jonathan Corbet<br/><br/>August 26, 2018  By the time Linus Torvalds [49]released 4.19-rc1 and closed the merge window for this development cycle, 12,317 non-merge changesets had found their way into the mainline; about 4,800 of those landed after [50]last week&#x27;s summary was written. As tends to be the case late in the merge window, many of those changes were fixes for the bigger patches that went in early, but there were also a number of new features added. Some of the more significant changes include:<br/><br/>Core kernel<br/><br/>The full set of patches adding [51]control-group awareness to the out-of-memory killer has not been merged due to ongoing disagreements, but one piece of it has: there is a new memory.oom.group control knob that will cause all processes within a control group to be killed in an out-of-memory situation.<br/><br/>A new set of protections has been added to prevent an attacker from fooling a program into writing to an existing file or FIFO. An open with the O_CREAT flag to a file or FIFO in a world-writable, sticky directory (e.g. /tmp ) will fail if the owner of the opening process is not the owner of either the target file or the containing directory. This behavior, disabled by default, is controlled by the new protected_regular and protected_fifos sysctl knobs.<br/><br/>Filesystems and block layer<br/><br/>The dm-integrity device-mapper target can now use a separate device for metadata storage.<br/><br/>EROFS, the &quot;enhanced read-only filesystem&quot;, has been added to the staging tree. It is &quot; a lightweight read-only file system with modern designs (eg. page-sized blocks, inline xattrs/data, etc.) for scenarios which need high-performance read-only requirements, eg. firmwares in mobile phone or LIVECDs &quot;<br/><br/>The new &quot;metadata copy-up&quot; feature in overlayfs will avoid copying a file&#x27;s contents to the upper layer on a metadata-only change. See [52]this commit for details.<br/><br/>Hardware support<br/><br/>Graphics : Qualcomm Adreno A6xx GPUs.<br/><br/>Industrial I/O : Spreadtrum SC27xx series PMIC analog-to-digital converters, Analog Devices AD5758 digital-to-analog converters, Intersil ISL29501 time-of-flight sensors, Silicon Labs SI1133 UV index/ambient light sensor chips, and Bosch Sensortec BME680 sensors.<br/><br/>Miscellaneous : Generic ADC-based resistive touchscreens, Generic ASIC devices via the Google [53]Gasket framework , Analog Devices ADGS1408/ADGS1409 multiplexers, Actions Semi Owl SoCs DMA controllers, MEN 16Z069 watchdog timers, Rohm BU21029 touchscreen controllers, Cirrus Logic CS47L35, CS47L85, CS47L90, and CS47L91 codecs, Cougar 500k gaming keyboards, Qualcomm GENI-based I2C controllers, Actions Semiconductor Owl I2C controllers, ChromeOS EC-based USBPD chargers, and Analog Devices ADP5061 battery chargers.<br/><br/>USB : Nuvoton NPCM7XX on-chip EHCI USB controllers, Broadcom Stingray PCIe PHYs, and Renesas R-Car generation 3 PCIe PHYs.<br/><br/>There is also a new subsystem for the abstraction of GNSS (global navigation satellite systems — GPS, for example) receivers in the kernel. To date, such devices have been handled with an abundance of user-space drivers; the hope is to bring some order in this area. Support for u-blox and SiRFstar receivers has been added as well.<br/><br/>Kernel internal<br/><br/>The __deprecated marker, used to mark interfaces that should no longer be used, has been deprecated and removed from the kernel entirely. [54]Torvalds said : &quot; They are not useful. They annoy everybody, and nobody ever does anything about them, because it&#x27;s always &#x27;somebody elses problem&#x27;. And when people start thinking that warnings are normal, they stop looking at them, and the real warnings that mean something go unnoticed. &quot;<br/><br/>The minimum version of GCC required by the kernel has been moved up to 4.6.<br/><br/>There are a couple of significant changes that failed to get in this time around, including the [55]XArray data structure. The patches are thought to be ready, but they had the bad luck to be based on a tree that failed to be merged for other reasons, so Torvalds [56]didn&#x27;t even look at them . That, in turn, blocks another set of patches intended to enable migration of slab-allocated objects.<br/><br/>The other big deferral is the [57]new system-call API for filesystem mounting . Despite ongoing [58]concerns about what happens when the same low-level device is mounted multiple times with conflicting options, Al Viro sent [59]a pull request to send this work upstream. The ensuing discussion made it clear that there is still not a consensus in this area, though, so it seems that this work has to wait for another cycle.<br/><br/>Assuming all goes well, the kernel will stabilize over the coming weeks and the final 4.19 release will happen in mid-October.<br/><br/>[60]Comments (1 posted)<br/><br/>[61]Measuring (and fixing) I/O-controller throughput loss<br/><br/>August 29, 2018<br/><br/>This article was contributed by Paolo Valente<br/><br/>Many services, from web hosting and video streaming to cloud storage, need to move data to and from storage. They also often require that each per-client I/O flow be guaranteed a non-zero amount of bandwidth and a bounded latency. An expensive way to provide these guarantees is to over-provision storage resources, keeping each resource underutilized, and thus have plenty of bandwidth available for the few I/O flows dispatched to each medium. Alternatively one can use an I/O controller. Linux provides two mechanisms designed to throttle some I/O streams to allow others to meet their bandwidth and latency requirements. These mechanisms work, but they come at a cost: a loss of as much as 80% of total available I/O bandwidth. I have run some tests to demonstrate this problem; some upcoming improvements to the [62]bfq I/O scheduler promise to improve the situation considerably.<br/><br/>Throttling does guarantee control, even on drives that happen to be highly utilized but, as will be seen, it has a hard time actually ensuring that drives are highly utilized. Even with greedy I/O flows, throttling easily ends up utilizing as little as 20% of the available speed of a flash-based drive. Such a speed loss may be particularly problematic with lower-end storage. On the opposite end, it is also disappointing with high-end hardware, as the Linux block I/O stack itself has been [63]redesigned from the ground up to fully utilize the high speed of modern, fast storage. In addition, throttling fails to guarantee the expected bandwidths if I/O contains both reads and writes, or is sporadic in nature.<br/><br/>On the bright side, there now seems to be an effective alternative for controlling I/O: the proportional-share policy provided by the bfq I/O scheduler. It enables nearly 100% storage bandwidth utilization, at least with some of the workloads that are problematic for throttling. An upcoming version of bfq may be able to achieve this result with almost all workloads. Finally, bfq guarantees bandwidths with all workloads. The current limitation of bfq is that its execution overhead becomes significant at speeds above 400,000 I/O operations per second on commodity CPUs.<br/><br/>Using the bfq I/O scheduler, Linux can now guarantee low latency to lightweight flows containing sporadic, short I/O. No throughput issues arise, and no configuration is required. This capability benefits important, time-sensitive tasks, such as video or audio streaming, as well as executing commands or starting applications. Although benchmarks are not available yet, these guarantees might also be provided by the newly proposed [64]I/O latency controller . It allows administrators to set target latencies for I/O requests originating from each group of processes, and favors the groups with the lowest target latency.<br/><br/>The testbed<br/><br/>I ran the tests with an ext4 filesystem mounted on a PLEXTOR PX-256M5S SSD, which features a peak rate of ~160MB/s with random I/O, and of ~500MB/s with sequential I/O. I used blk-mq, in Linux 4.18. The system was equipped with a 2.4GHz Intel Core i7-2760QM CPU and 1.3GHz DDR3 DRAM. In such a system, a single thread doing synchronous reads reaches a throughput of 23MB/s.<br/><br/>For the purposes of these tests, each process is considered to be in one of two groups, termed &quot;target&quot; and &quot;interferers&quot;. A target is a single-process, I/O-bound group whose I/O is focused on. In particular, I measure the I/O throughput enjoyed by this group to get the minimum bandwidth delivered to the group. An interferer is single-process group whose role is to generate additional I/O that interferes with the I/O of the target. The tested workloads contain one target and multiple interferers.<br/><br/>The single process in each group either reads or writes, through asynchronous (buffered) operations, to one file — different from the file read or written by any other process — after invalidating the buffer cache for the file. I define a reader or writer process as either &quot;random&quot; or &quot;sequential&quot;, depending on whether it reads or writes its file at random positions or sequentially. Finally, an interferer is defined as being either &quot;active&quot; or &quot;inactive&quot; depending on whether it performs I/O during the test. When an interferer is mentioned, it is assumed that the interferer is active.<br/><br/>Workloads are defined so as to try to cover the combinations that, I believe, most influence the performance of the storage device and of the I/O policies. For brevity, in this article I show results for only two groups of workloads:<br/><br/>Static sequential : four synchronous sequential readers or four asynchronous sequential writers, plus five inactive interferers.<br/><br/>Static random : four synchronous random readers, all with a block size equal to 4k, plus five inactive interferers.<br/><br/>To create each workload, I considered, for each mix of interferers in the group, two possibilities for the target: it could be either a random or a sequential synchronous reader. In [65]a longer version of this article [PDF] , you will also find results for workloads with varying degrees of I/O randomness, and for dynamic workloads (containing sporadic I/O sources). These extra results confirm the losses of throughput and I/O control for throttling that are shown here.<br/><br/>I/O policies<br/><br/>Linux provides two I/O-control mechanisms for guaranteeing (a minimum) bandwidth, or at least fairness, to long-lived flows: the throttling and proportional-share I/O policies. With throttling, one can set a maximum bandwidth limit — &quot;max limit&quot; for brevity — for the I/O of each group. Max limits can be used, in an indirect way, to provide the service guarantee at the focus of this article. For example, to guarantee minimum bandwidths to I/O flows, a group can be guaranteed a minimum bandwidth by limiting the maximum bandwidth of all the other groups.<br/><br/>Unfortunately, max limits have two drawbacks in terms of throughput. First, if some groups do not use their allocated bandwidth, that bandwidth cannot be reclaimed by other active groups. Second, limits must comply with the worst-case speed of the device, namely, its random-I/O peak rate. Such limits will clearly leave a lot of throughput unused with workloads that otherwise would drive the device to higher throughput levels. Maximizing throughput is simply not a goal of max limits. So, for brevity, test results with max limits are not shown here. You can find these results, plus a more detailed description of the above drawbacks, in the long version of this article.<br/><br/>Because of these drawbacks, a new, still experimental, low limit has been added to the throttling policy. If a group is assigned a low limit, then the throttling policy automatically limits the I/O of the other groups in such a way to guarantee to the group a minimum bandwidth equal to its assigned low limit. This new throttling mechanism throttles no group as long as every group is getting at least its assigned minimum bandwidth. I tested this mechanism, but did not consider the interesting problem of guaranteeing minimum bandwidths while, at the same time, enforcing maximum bandwidths.<br/><br/>The other I/O policy available in Linux, proportional share, provides weighted fairness. Each group is assigned a weight, and should receive a portion of the total throughput proportional to its weight. This scheme guarantees minimum bandwidths in the same way that low limits do in throttling. In particular, it guarantees to each group a minimum bandwidth equal to the ratio between the weight of the group, and the sum of the weights of all the groups that may be active at the same time.<br/><br/>The actual implementation of the proportional-share policy, on a given drive, depends on what flavor of the block layer is in use for that drive. If the drive is using the legacy block interface, the policy is implemented by the cfq I/O scheduler. Unfortunately, cfq fails to control bandwidths with flash-based storage, especially on drives featuring command queueing. This case is not considered in these tests. With drives using the multiqueue interface, proportional share is implemented by bfq. This is the combination considered in the tests.<br/><br/>To benchmark both throttling (low limits) and proportional share, I tested, for each workload, the combinations of I/O policies and I/O schedulers reported in the table below. In the end, there are three test cases for each workload. In addition, for some workloads, I considered two versions of bfq for the proportional-share policy.<br/><br/>Name<br/><br/>I/O policy<br/><br/>Scheduler<br/><br/>Parameter for target<br/><br/>Parameter for each of the four active interferers<br/><br/>Parameter for each of the five inactive interferers<br/><br/>Sum of parameters<br/><br/>low-none<br/><br/>Throttling with low limits<br/><br/>none<br/><br/>10MB/s<br/><br/>10MB/s (tot: 40)<br/><br/>20MB/s (tot: 100)<br/><br/>150MB/s<br/><br/>prop-bfq<br/><br/>Proportional share<br/><br/>bfq<br/><br/>300<br/><br/>100 (tot: 400)<br/><br/>200 (tot: 1000)<br/><br/>1700<br/><br/>For low limits, I report results with only none as the I/O scheduler, because the results are the same with kyber and mq-deadline.<br/><br/>The capabilities of the storage medium and of low limits drove the policy configurations. In particular:<br/><br/>The configuration of the target and of the active interferers for low-none is the one for which low-none provides its best possible minimum-bandwidth guarantee to the target: 10MB/s, guaranteed if all interferers are readers. Results remain the same regardless of the values used for target latency and idle time; I set them to 100µs and 1000µs, respectively, for every group.<br/><br/>Low limits for inactive interferers are set to twice the limits for active interferers, to pose greater difficulties to the policy.<br/><br/>I chose weights for prop-bfq so as to guarantee about the same minimum bandwidth as low-none to the target, in the same only-reader worst case as for low-none and to preserve, between the weights of active and inactive interferers, the same ratio as between the low limits of active and inactive interferers.<br/><br/>Full details on configurations can be found in the long version of this article.<br/><br/>Each workload was run ten times for each policy, plus ten times without any I/O control, i.e., with none as I/O scheduler and no I/O policy in use. For each run, I measured the I/O throughput of the target (which reveals the bandwidth provided to the target), the cumulative I/O throughput of the interferers, and the total I/O throughput. These quantities fluctuated very little during each run, as well as across different runs. Thus in the graphs I report only averages over per-run average throughputs. In particular, for the case of no I/O control, I report only the total I/O throughput, to give an idea of the throughput that can be reached without imposing any control.<br/><br/>Results<br/><br/>This plot shows throughput results for the simplest group of workloads: the static-sequential set.<br/><br/>With a random reader as the target against sequential readers as interferers, low-none does guarantee the configured low limit to the target. Yet it reaches only a low total throughput. The throughput of the random reader evidently oscillates around 10MB/s during the test. This implies that it is at least slightly below 10MB/s for a significant percentage of the time. But when this happens, the low-limit mechanism limits the maximum bandwidth of every active group to the low limit set for the group, i.e., to just 10MB/s. The end result is a total throughput lower than 10% of the throughput reached without I/O control.<br/><br/>That said, the high throughput achieved without I/O control is obtained by choking the random I/O of the target in favor of the sequential I/O of the interferers. Thus, it is probably more interesting to compare low-none throughput with the throughput reachable while actually guaranteeing 10MB/s to the target. The target is a single, synchronous, random reader, which reaches 23MB/s while active. So, to guarantee 10MB/s to the target, it is enough to serve it for about half of the time, and the interferers for the other half. Since the device reaches ~500MB/s with the sequential I/O of the interferers, the resulting throughput with this service scheme would be (500+23)/2, or about 260MB/s. low-none thus reaches less than 20% of the total throughput that could be reached while still preserving the target bandwidth.<br/><br/>prop-bfq provides the target with a slightly higher throughput than low-none. This makes it harder for prop-bfq to reach a high total throughput, because prop-bfq serves more random I/O (from the target) than low-none. Nevertheless, prop-bfq gets a much higher total throughput than low-none. According to the above estimate, this throughput is about 90% of the maximum throughput that could be reached, for this workload, without violating service guarantees. The reason for this good result is that bfq provides an effective implementation of the proportional-share service policy. At any time, each active group is granted a fraction of the current total throughput, and the sum of these fractions is equal to one; so group bandwidths naturally saturate the available total throughput at all times.<br/><br/>Things change with the second workload: a random reader against sequential writers. Now low-none reaches a much higher total throughput than prop-bfq. low-none serves much more sequential (write) I/O than prop-bfq because writes somehow break the low-limit mechanisms and prevail over the reads of the target. Conceivably, this happens because writes tend to both starve reads in the OS (mainly by eating all available I/O tags) and to cheat on their completion time in the drive. In contrast, bfq is intentionally configured to privilege reads, to counter these issues.<br/><br/>In particular, low-none gets an even higher throughput than no I/O control at all because it penalizes the random I/O of the target even more than the no-controller configuration.<br/><br/>Finally, with the last two workloads, prop-bfq reaches even higher total throughput than with the first two. It happens because the target also does sequential I/O, and serving sequential I/O is much more beneficial for throughput than serving random I/O. With these two workloads, the total throughput is, respectively, close to or much higher than that reached without I/O control. For the last workload, the total throughput is much higher because, differently from none, bfq privileges reads over asynchronous writes, and reads yield a higher throughput than writes. In contrast, low-none still gets lower or much lower throughput than prop-bfq, because of the same issues that hinder low-none throughput with the first two workloads.<br/><br/>As for bandwidth guarantees, with readers as interferers (third workload), prop-bfq, as expected, gives the target a fraction of the total throughput proportional to its weight. bfq approximates perfect proportional-share bandwidth distribution among groups doing I/O of the same type (reads or writes) and with the same locality (sequential or random). With the last workload, prop-bfq gives much more throughput to the reader than to all the interferers, because interferers are asynchronous writers, and bfq privileges reads.<br/><br/>The second group of workloads (static random), is the one, among all the workloads considered, for which prop-bfq performs worst. Results are shown below:<br/><br/>This chart reports results not only for mainline bfq, but also for an improved version of bfq which is currently under public testing. As can be seen, with only random readers, prop-bfq reaches a much lower total throughput than low-none. This happens because of the Achilles heel of the bfq I/O scheduler. If the process in service does synchronous I/O and has a higher weight than some other process, then, to give strong bandwidth guarantees to that process, bfq plugs I/O dispatching every time the process temporarily stops issuing I/O requests. In this respect, processes actually have differentiated weights and do synchronous I/O in the workloads tested. So bfq systematically performs I/O plugging for them. Unfortunately, this plugging empties the internal queues of the drive, which kills throughput with random I/O. And the I/O of all processes in these workloads is also random.<br/><br/>The situation reverses with a sequential reader as target. Yet, the most interesting results come from the new version of bfq, containing small changes to counter exactly the above weakness. This version recovers most of the throughput loss with the workload made of only random I/O and more; with the second workload, where the target is a sequential reader, it reaches about 3.7 times the total throughput of low-none.<br/><br/>When the main concern is the latency of flows containing short I/O, Linux seems now rather high performing, thanks to the bfq I/O scheduler and the I/O latency controller. But if the requirement is to provide explicit bandwidth guarantees (or just fairness) to I/O flows, then one must be ready to give up much or most of the speed of the storage media. bfq helps with some workloads, but loses most of the throughput with workloads consisting of mostly random I/O. Fortunately, there is apparently hope for much better performance since an improvement, still under development, seems to enable bfq to reach a high throughput with all workloads tested so far.<br/><br/><h2>I wish to thank Vivek Goyal for enabling me to make this article much more fair and sound.]<br/><br/>[66]Comments (4 posted)<br/><br/>[67]KDE&#x27;s onboarding initiative, one year later<br/><br/>August 24, 2018<br/><br/>This article was contributed by Marta Rybczyńska<br/><br/>[68]Akademy<br/><br/>In 2017, the KDE community decided on [69]three goals to concentrate on for the next few years. One of them was [70]streamlining the onboarding of new contributors (the others were [71]improving usability and [72]privacy ). During [73]Akademy , the yearly KDE conference that was held in Vienna in August, Neofytos Kolokotronis shared the status of the onboarding goal, the work done during the last year, and further plans. While it is a complicated process in a project as big and diverse as KDE, numerous improvements have been already made.<br/><br/>Two of the three KDE community goals were proposed by relative newcomers. Kolokotronis was one of those, having joined the [74]KDE Promo team not long before proposing the focus on onboarding. He had previously been involved with [75]Chakra Linux , a distribution based on KDE software. The fact that new members of the community proposed strategic goals was also noted in the [76]Sunday keynote by Claudia Garad .<br/><br/>Proper onboarding adds excitement to the contribution process and increases retention, he explained. When we look at [77]the definition of onboarding , it is a process in which the new contributors acquire knowledge, skills, and behaviors so that they can contribute effectively. Kolokotronis proposed to see it also as socialization: integration into the project&#x27;s relationships, culture, structure, and procedures.<br/><br/>The gains from proper onboarding are many. The project can grow by attracting new blood with new perspectives and solutions. The community maintains its health and stays vibrant. Another important advantage of efficient onboarding is that replacing current contributors becomes easier when they change interests, jobs, or leave the project for whatever reason. Finally, successful onboarding adds new advocates to the project.<br/><br/>Achievements so far and future plans<br/><br/>The team started with ideas for a centralized onboarding process for the whole of KDE. They found out quickly that this would not work because KDE is &quot;very decentralized&quot;, so it is hard to provide tools and procedures that are going to work for the whole project. According to Kolokotronis, other characteristics of KDE that impact onboarding are high diversity, remote and online teams, and hundreds of contributors in dozens of projects and teams. In addition, new contributors already know in which area they want to take part and they prefer specific information that will be directly useful for them.<br/><br/>So the team changed its approach; several changes have since been proposed and implemented. The [78]Get Involved page, which is expected to be one of the resources new contributors read first, has been rewritten. For the [79]Junior Jobs page , the team is [80] [81]discussing what the generic content for KDE as a whole should be. The team simplified [82]Phabricator registration , which resulted in documenting the process better. Another part of the work includes the [83]KDE Bugzilla ; it includes, for example initiatives to limit the number of states of a ticket or remove obsolete products.<br/><br/>The [84]Plasma Mobile team is heavily involved in the onboarding goal. The Plasma Mobile developers have simplified their development environment setup and created an [85]interactive &quot;Get Involved&quot; page. In addition, the Plasma team changed the way task descriptions are written; they now contain more detail, so that it is easier to get involved. The basic description should be short and clear, and it should include details of the problem and possible solutions. The developers try to share the list of skills necessary to fulfill the tasks and include clear links to the technical resources needed.<br/><br/>Kolokotronis and team also identified a new potential source of contributors for KDE: distributions using KDE. They have the advantage of already knowing and using the software. The next idea the team is working on is to make sure that setting up a development environment is easy. The team plans to work on this during a dedicated sprint this autumn.<br/><br/>Searching for new contributors<br/><br/>Kolokotronis plans to search for new contributors at the periphery of the project, among the &quot;skilled enthusiasts&quot;: loyal users who actually care about the project. They &quot;can make wonders&quot;, he said. Those individuals may be also less confident or shy, have troubles making the first step, and need guidance. The project leaders should take that into account.<br/><br/>In addition, newcomers are all different. Kolokotronis provided a long list of how contributors differ, including skills and knowledge, motives and interests, and time and dedication. His advice is to &quot;try to find their superpower&quot;, the skills they have that are missing in the team. Those &quot;superpowers&quot; can then be used for the benefit of the project.<br/><br/>If a project does nothing else, he said, it can start with its documentation. However, this does not only mean code documentation. Writing down the procedures or information about the internal work of the project, like who is working on what, is an important part of a project&#x27;s documentation and helps newcomers. There should be also guidelines on how to start, especially setting up the development environment.<br/><br/>The first thing the project leaders should do, according to Kolokotronis, is to spend time on introducing newcomers to the project. Ideally every new contributor should be assigned mentors — more experienced members who can help them when needed. The mentors and project leaders should find tasks that are interesting for each person. Answering an audience question on suggestions for shy new contributors, he recommended even more mentoring. It is also very helpful to make sure that newcomers have enough to read, but &quot;avoid RTFM&quot;, he highlighted. It is also easy for a new contributor &quot;to fly away&quot;, he said. The solution is to keep requesting things and be proactive.<br/><br/>What the project can do?<br/><br/>Kolokotronis suggested a number of actions for a project when it wants to improve its onboarding. The first step is preparation: the project leaders should know the team&#x27;s and the project&#x27;s needs. Long-term planning is important, too. It is not enough to wait for contributors to come — the project should be proactive, which means reaching out to candidates, suggesting appropriate tasks and, finally, making people available for the newcomers if they need help.<br/><br/>This leads to next step: to be a mentor. Kolokotronis suggests being a &quot;great host&quot;, but also trying to phase out the dependency on the mentor rapidly. &quot;We have been all newcomers&quot;, he said. It can be intimidating to join an existing group. Onboarding creates a sense of belonging which, in turn, increases retention.<br/><br/>The last step proposed was to be strategic. This includes thinking about the emotions you want newcomers to feel. Kolokotronis explained the strategic part with an example. The overall goal is (surprise!) improve onboarding of new contributors. An intermediate objective might be to keep the newcomers after they have made their first commit. If your strategy is to keep them confident and proud, you can use different tactics like praise and acknowledgment of the work in public. Another useful tactic may be assigning simple tasks, according to the skill of the contributor.<br/><br/>To summarize, the most important thing, according to Kolokotronis, is to respond quickly and spend time with new contributors. This time should be used to explain procedures, and to introduce the people and culture. It is also essential to guide first contributions and praise contributor&#x27;s skill and effort. Increase the difficulty of tasks over time to keep contributors motivated and challenged. And finally, he said, &quot;turn them into mentors&quot;.<br/><br/>Kolokotronis acknowledges that onboarding &quot;takes time&quot; and &quot;everyone complains&quot; about it. However, he is convinced that it is beneficial in the long term and that it decreases developer turnover.<br/><br/>Advice to newcomers<br/><br/>Kolokotronis concluded with some suggestions for newcomers to a project. They should try to be persistent and to not get discouraged when something goes wrong. Building connections from the very beginning is helpful. He suggests asking questions as if you were already a member &quot;and things will be fine&quot;. However, accept criticism if it happens.<br/><br/>One of the next actions of the onboarding team will be to collect feedback from newcomers and experienced contributors to see if they agree on the ideas and processes introduced so far.<br/><br/>[86]Comments (none posted)<br/><br/>[87]Sharing and archiving data sets with Dat<br/><br/>August 27, 2018<br/><br/>This article was contributed by Antoine Beaupré<br/><br/>[88]Dat is a new peer-to-peer protocol that uses some of the concepts of [89]BitTorrent and Git. Dat primarily targets researchers and open-data activists as it is a great tool for sharing, archiving, and cataloging large data sets. But it can also be used to implement decentralized web applications in a novel way.<br/><br/>Dat quick primer<br/><br/>Dat is written in JavaScript, so it can be installed with npm , but there are [90]standalone binary builds and a [91]desktop application (as an AppImage). An [92]online viewer can be used to inspect data for those who do not want to install arbitrary binaries on their computers.<br/><br/>The command-line application allows basic operations like downloading existing data sets and sharing your own. Dat uses a 32-byte hex string that is an [93]ed25519 public key , which is is used to discover and find content on the net. For example, this will download some sample data:  $ dat clone \<br/><br/>dat://778f8d955175c92e4ced5e4f5563f69bfec0c86cc6f670352c457943666fe639 \<br/><br/>~/Downloads/dat-demo<br/><br/>Similarly, the share command is used to share content. It indexes the files in a given directory and creates a new unique address like the one above. The share command starts a server that uses multiple discovery mechanisms (currently, the [94]Mainline Distributed Hash Table (DHT), a [95]custom DNS server , and multicast DNS) to announce the content to its peers. This is how another user, armed with that public key, can download that content with dat clone or mirror the files continuously with dat sync .<br/><br/>So far, this looks a lot like BitTorrent [96]magnet links updated with 21st century cryptography. But Dat adds revisions on top of that, so modifications are automatically shared through the swarm. That is important for public data sets as those are often dynamic in nature. Revisions also make it possible to use [97]Dat as a backup system by saving the data incrementally using an [98]archiver .<br/><br/>While Dat is designed to work on larger data sets, processing them for sharing may take a while. For example, sharing the Linux kernel source code required about five minutes as Dat worked on indexing all of the files. This is comparable to the performance offered by [99]IPFS and BitTorrent. Data sets with more or larger files may take quite a bit more time.<br/><br/>One advantage that Dat has over IPFS is that it doesn&#x27;t duplicate the data. When IPFS imports new data, it duplicates the files into ~/.ipfs . For collections of small files like the kernel, this is not a huge problem, but for larger files like videos or music, it&#x27;s a significant limitation. IPFS eventually implemented a solution to this [100]problem in the form of the experimental [101]filestore feature , but it&#x27;s not enabled by default. Even with that feature enabled, though, changes to data sets are not automatically tracked. In comparison, Dat operation on dynamic data feels much lighter. The downside is that each set needs its own dat share process.<br/><br/>Like any peer-to-peer system, Dat needs at least one peer to stay online to offer the content, which is impractical for mobile devices. Hosting providers like [102]Hashbase (which is a [103]pinning service in Dat jargon) can help users keep content online without running their own [104]server . The closest parallel in the traditional web ecosystem would probably be content distribution networks (CDN) although pinning services are not necessarily geographically distributed and a CDN does not necessarily retain a complete copy of a website.  [105]<br/><br/>A web browser called [106]Beaker , based on the [107]Electron framework, can access Dat content natively without going through a pinning service. Furthermore, Beaker is essential to get any of the [108]Dat applications working, as they fundamentally rely on dat:// URLs to do their magic. This means that Dat applications won&#x27;t work for most users unless they install that special web browser. There is a [109]Firefox extension called &quot; [110]dat-fox &quot; for people who don&#x27;t want to install yet another browser, but it requires installing a [111]helper program . The extension will be able to load dat:// URLs but many applications will still not work. For example, the [112]photo gallery application completely fails with dat-fox.<br/><br/>Dat-based applications look promising from a privacy point of view. Because of its peer-to-peer nature, users regain control over where their data is stored: either on their own computer, an online server, or by a trusted third party. But considering the protocol is not well established in current web browsers, I foresee difficulties in adoption of that aspect of the Dat ecosystem. Beyond that, it is rather disappointing that Dat applications cannot run natively in a web browser given that JavaScript is designed exactly for that.<br/><br/>Dat privacy<br/><br/>An advantage Dat has over other peer-to-peer protocols like BitTorrent is end-to-end encryption. I was originally concerned by the encryption design when reading the [113]academic paper [PDF] :<br/><br/>It is up to client programs to make design decisions around which discovery networks they trust. For example if a Dat client decides to use the BitTorrent DHT to discover peers, and they are searching for a publicly shared Dat key (e.g. a key cited publicly in a published scientific paper) with known contents, then because of the privacy design of the BitTorrent DHT it becomes public knowledge what key that client is searching for.<br/><br/>So in other words, to share a secret file with another user, the public key is transmitted over a secure side-channel, only to then leak during the discovery process. Fortunately, the public Dat key is not directly used during discovery as it is [114]hashed with BLAKE2B . Still, the security model of Dat assumes the public key is private, which is a rather counterintuitive concept that might upset cryptographers and confuse users who are frequently encouraged to type such strings in address bars and search engines as part of the Dat experience. There is a [115]security &amp; privacy FAQ in the Dat documentation warning about this problem:<br/><br/>One of the key elements of Dat privacy is that the public key is never used in any discovery network. The public key is hashed, creating the discovery key. Whenever peers attempt to connect to each other, they use the discovery key.<br/><br/>Data is encrypted using the public key, so it is important that this key stays secure.<br/><br/>There are other privacy issues outlined in the document; it states that &quot; Dat faces similar privacy risks as BitTorrent &quot;:<br/><br/>When you download a dataset, your IP address is exposed to the users sharing that dataset. This may lead to honeypot servers collecting IP addresses, as we&#x27;ve seen in Bittorrent. However, with dataset sharing we can create a web of trust model where specific institutions are trusted as primary sources for datasets, diminishing the sharing of IP addresses.<br/><br/>A Dat blog post refers to this issue as [116]reader privacy and it is, indeed, a sensitive issue in peer-to-peer networks. It is how BitTorrent users are discovered and served scary verbiage from lawyers, after all. But Dat makes this a little better because, to join a swarm, you must know what you are looking for already, which means peers who can look at swarm activity only include users who know the secret public key. This works well for secret content, but for larger, public data sets, it is a real problem; it is why the Dat project has [117]avoided creating a Wikipedia mirror so far.<br/><br/>I found another privacy issue that is not documented in the security FAQ during my review of the protocol. As mentioned earlier, the [118]Dat discovery protocol routinely phones home to DNS servers operated by the Dat project. This implies that the default discovery servers (and an attacker watching over their traffic) know who is publishing or seeking content, in essence discovering the &quot;social network&quot; behind Dat. This discovery mechanism can be disabled in clients, but a similar privacy issue applies to the DHT as well, although that is distributed so it doesn&#x27;t require trust of the Dat project itself.<br/><br/>Considering those aspects of the protocol, privacy-conscious users will probably want to use Tor or other anonymization techniques to work around those concerns.<br/><br/>The future of Dat<br/><br/>[119]Dat 2.0 was released in June 2017 with performance improvements and protocol changes. [120]Dat Enhancement Proposals (DEPs) guide the project&#x27;s future development; most work is currently geared toward implementing the draft &quot; [121]multi-writer proposal &quot; in [122]HyperDB . Without multi-writer support, only the original publisher of a Dat can modify it. According to Joe Hand, co-executive-director of [123]Code for Science &amp; Society (CSS) and Dat core developer, in an IRC chat, &quot;supporting multiwriter is a big requirement for lots of folks&quot;. For example, while Dat might allow Alice to share her research results with Bob, he cannot modify or contribute back to those results. The multi-writer extension allows for Alice to assign trust to Bob so he can have write access to the data.<br/><br/>Unfortunately, the current proposal doesn&#x27;t solve the &quot; hard problems &quot; of &quot; conflict merges and secure key distribution &quot;. The former will be worked out through user interface tweaks, but the latter is a classic problem that security projects have typically trouble finding solutions for—Dat is no exception. How will Alice securely trust Bob? The OpenPGP web of trust? Hexadecimal fingerprints read over the phone? Dat doesn&#x27;t provide a magic solution to this problem.<br/><br/>Another thing limiting adoption is that Dat is not packaged in any distribution that I could find (although I [124]requested it in Debian ) and, considering the speed of change of the JavaScript ecosystem, this is unlikely to change any time soon. A [125]Rust implementation of the Dat protocol has started, however, which might be easier to package than the multitude of [126]Node.js modules. In terms of mobile device support, there is an experimental Android web browser with Dat support called [127]Bunsen , which somehow doesn&#x27;t run on my phone. Some adventurous users have successfully run Dat in [128]Termux . I haven&#x27;t found an app running on iOS at this point.<br/><br/>Even beyond platform support, distributed protocols like Dat have a tough slope to climb against the virtual monopoly of more centralized protocols, so it remains to be seen how popular those tools will be. Hand says Dat is supported by multiple non-profit organizations. Beyond CSS, [129]Blue Link Labs is working on the Beaker Browser as a self-funded startup and a grass-roots organization, [130]Digital Democracy , has contributed to the project. The [131]Internet Archive has [132]announced a collaboration between itself, CSS, and the California Digital Library to launch a pilot project to see &quot; how members of a cooperative, decentralized network can leverage shared services to ensure data preservation while reducing storage costs and increasing replication counts &quot;.<br/><br/>Hand said adoption in academia has been &quot;slow but steady&quot; and that the [133]Dat in the Lab project has helped identify areas that could help researchers adopt the project. Unfortunately, as is the case with many free-software projects, he said that &quot;our team is definitely a bit limited on bandwidth to push for bigger adoption&quot;. Hand said that the project received a grant from [134]Mozilla Open Source Support to improve its documentation, which will be a big help.<br/><br/>Ultimately, Dat suffers from a problem common to all peer-to-peer applications, which is naming. Dat addresses are not exactly intuitive: humans do not remember strings of 64 hexadecimal characters well. For this, Dat took a [135]similar approach to IPFS by using DNS TXT records and /.well-known URL paths to bridge existing, human-readable names with Dat hashes. So this sacrifices a part of the decentralized nature of the project in favor of usability.<br/><br/>I have tested a lot of distributed protocols like Dat in the past and I am not sure Dat is a clear winner. It certainly has advantages over IPFS in terms of usability and resource usage, but the lack of packages on most platforms is a big limit to adoption for most people. This means it will be difficult to share content with my friends and family with Dat anytime soon, which would probably be my primary use case for the project. Until the protocol reaches the wider adoption that BitTorrent has seen in terms of platform support, I will probably wait before switching everything over to this promising project.<br/><br/>[136]Comments (11 posted)<br/><br/>Page editor : Jonathan Corbet<br/><br/>Inside this week&#x27;s LWN.net Weekly Edition<br/><br/>[137]Briefs : OpenSSH 7.8; 4.19-rc1; Which stable?; Netdev 0x12; Bison 3.1; Quotes; ...<br/><br/>[138]Announcements : Newsletters; events; security updates; kernel patches; ...  Next page : [139]Brief items&gt;&gt;<br/><br/><br/><br/>[1] https://lwn.net/Articles/763743/<br/><br/>[2] https://lwn.net/Articles/763626/<br/><br/>[3] https://lwn.net/Articles/763641/<br/><br/>[4] https://lwn.net/Articles/763106/<br/><br/>[5] https://lwn.net/Articles/763603/<br/><br/>[6] https://lwn.net/Articles/763175/<br/><br/>[7] https://lwn.net/Articles/763492/<br/><br/>[8] https://lwn.net/Articles/763254/<br/><br/>[9] https://lwn.net/Articles/763255/<br/><br/>[10] https://lwn.net/Articles/763743/#Comments<br/><br/>[11] https://lwn.net/Articles/763626/<br/><br/>[12] http://julialang.org/<br/><br/>[13] https://julialang.org/blog/2018/08/one-point-zero<br/><br/>[14] https://julialang.org/benchmarks/<br/><br/>[15] https://juliacomputing.com/<br/><br/>[16] https://en.wikipedia.org/wiki/Read%E2%80%93eval%E2%80%93print_loop<br/><br/>[17] http://llvm.org/<br/><br/>[18] http://www.3blue1brown.com/essence-of-linear-algebra-page/<br/><br/>[19] http://www.netlib.org/lapack/<br/><br/>[20] https://lwn.net/Articles/657157/<br/><br/>[21] https://julialang.org/publications/julia-fresh-approach-BEKS.pdf<br/><br/>[22] https://lwn.net/Articles/738915/<br/><br/>[23] https://pypy.org/<br/><br/>[24] https://github.com/JuliaPy/PyCall.jl<br/><br/>[25] https://github.com/JuliaInterop/RCall.jl<br/><br/>[26] https://docs.julialang.org/en/stable/<br/><br/>[27] https://julialang.org/learning/<br/><br/>[28] http://bogumilkaminski.pl/files/julia_express.pdf<br/><br/>[29] https://docs.julialang.org/en/stable/manual/noteworthy-differences/#Noteworthy-differences-from-Python-1<br/><br/>[30] https://lwn.net/Articles/746386/<br/><br/>[31] https://github.com/JuliaLang/IJulia.jl<br/><br/>[32] https://lwn.net/Articles/764001/<br/><br/>[33] https://lwn.net/Articles/763626/#Comments<br/><br/>[34] https://lwn.net/Articles/763641/<br/><br/>[35] https://lwn.net/Archives/ConferenceByYear/#2018-Linux_Security_Summit_NA<br/><br/>[36] https://events.linuxfoundation.org/events/linux-security-summit-north-america-2018/<br/><br/>[37] https://kernsec.org/wiki/index.php/Kernel_Self_Protection_Project<br/><br/>[38] https://lwn.net/Articles/763644/<br/><br/>[39] https://raphlinus.github.io/programming/rust/2018/08/17/undefined-behavior.html<br/><br/>[40] https://lwn.net/Articles/749064/<br/><br/>[41] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=02361bc77888<br/><br/>[42] https://lore.kernel.org/lkml/CA+55aFzCG-zNmZwX4A2FQpadafLfEzK6CC=qPXydAacU1RqZWA@mail.gmail.com/T/#u<br/><br/>[43] https://lwn.net/Articles/758245/<br/><br/>[44] https://lwn.net/Articles/718888/<br/><br/>[45] https://lwn.net/Articles/744507/<br/><br/>[46] https://outflux.net/slides/2018/lss/danger.pdf<br/><br/>[47] https://lwn.net/Articles/763641/#Comments<br/><br/>[48] https://lwn.net/Articles/763106/<br/><br/>[49] https://lwn.net/Articles/763497/<br/><br/>[50] https://lwn.net/Articles/762566/<br/><br/>[51] https://lwn.net/Articles/761118/<br/><br/>[52] https://git.kernel.org/linus/d5791044d2e5749ef4de84161cec5532e2111540<br/><br/>[53] https://lwn.net/ml/linux-kernel/20180630000253.70103-1-sque@chromium.org/<br/><br/>[54] https://git.kernel.org/linus/771c035372a036f83353eef46dbb829780330234<br/><br/>[55] https://lwn.net/Articles/745073/<br/><br/>[56] https://lwn.net/ml/linux-kernel/CA+55aFxFjAmrFpwQmEHCthHOzgidCKnod+cNDEE+3Spu9o1s3w@mail.gmail.com/<br/><br/>[57] https://lwn.net/Articles/759499/<br/><br/>[58] https://lwn.net/Articles/762355/<br/><br/>[59] https://lwn.net/ml/linux-fsdevel/20180823223145.GK6515@ZenIV.linux.org.uk/<br/><br/>[60] https://lwn.net/Articles/763106/#Comments<br/><br/>[61] https://lwn.net/Articles/763603/<br/><br/>[62] https://lwn.net/Articles/601799/<br/><br/>[63] https://lwn.net/Articles/552904<br/><br/>[64] https://lwn.net/Articles/758963/<br/><br/>[65] http://algogroup.unimore.it/people/paolo/pub-docs/extended-lat-bw-throughput.pdf<br/><br/>[66] https://lwn.net/Articles/763603/#Comments<br/><br/>[67] https://lwn.net/Articles/763175/<br/><br/>[68] https://lwn.net/Archives/ConferenceByYear/#2018-Akademy<br/><br/>[69] https://dot.kde.org/2017/11/30/kdes-goals-2018-and-beyond<br/><br/>[70] https://phabricator.kde.org/T7116<br/><br/>[71] https://phabricator.kde.org/T6831<br/><br/>[72] https://phabricator.kde.org/T7050<br/><br/>[73] https://akademy.kde.org/<br/><br/>[74] https://community.kde.org/Promo<br/><br/>[75] https://www.chakralinux.org/<br/><br/>[76] https://conf.kde.org/en/Akademy2018/public/events/79<br/><br/>[77] https://en.wikipedia.org/wiki/Onboarding<br/><br/>[78] https://community.kde.org/Get_Involved<br/><br/>[79] https://community.kde.org/KDE/Junior_Jobs<br/><br/>[80] https://lwn.net/Articles/763189/<br/><br/>[81] https://phabricator.kde.org/T8686<br/><br/>[82] https://phabricator.kde.org/T7646<br/><br/>[83] https://bugs.kde.org/<br/><br/>[84] https://www.plasma-mobile.org/index.html<br/><br/>[85] https://www.plasma-mobile.org/findyourway<br/><br/>[86] https://lwn.net/Articles/763175/#Comments<br/><br/>[87] https://lwn.net/Articles/763492/<br/><br/>[88] https://datproject.org<br/><br/>[89] https://www.bittorrent.com/<br/><br/>[90] https://github.com/datproject/dat/releases<br/><br/>[91] https://docs.datproject.org/install<br/><br/>[92] https://datbase.org/<br/><br/>[93] https://ed25519.cr.yp.to/<br/><br/>[94] https://en.wikipedia.org/wiki/Mainline_DHT<br/><br/>[95] https://github.com/mafintosh/dns-discovery<br/><br/>[96] https://en.wikipedia.org/wiki/Magnet_URI_scheme<br/><br/>[97] https://blog.datproject.org/2017/10/13/using-dat-for-automatic-file-backups/<br/><br/>[98] https://github.com/mafintosh/hypercore-archiver<br/><br/>[99] https://ipfs.io/<br/><br/>[100] https://github.com/ipfs/go-ipfs/issues/875<br/><br/>[101] https://github.com/ipfs/go-ipfs/blob/master/docs/experimental-features.md#ipfs-filestore<br/><br/>[102] https://hashbase.io/<br/><br/>[103] https://github.com/datprotocol/DEPs/blob/master/proposals/0003-http-pinning-service-api.md<br/><br/>[104] https://docs.datproject.org/server<br/><br/>[105] https://lwn.net/Articles/763544/<br/><br/>[106] https://beakerbrowser.com/<br/><br/>[107] https://electronjs.org/<br/><br/>[108] https://github.com/beakerbrowser/explore<br/><br/>[109] https://addons.mozilla.org/en-US/firefox/addon/dat-p2p-protocol/<br/><br/>[110] https://github.com/sammacbeth/dat-fox<br/><br/>[111] https://github.com/sammacbeth/dat-fox-helper<br/><br/>[112] https://github.com/beakerbrowser/dat-photos-app<br/><br/>[113] https://github.com/datproject/docs/raw/master/papers/dat-paper.pdf<br/><br/>[114] https://github.com/datprotocol/DEPs/blob/653e0cf40233b5d474cddc04235577d9d55b2934/proposals/0000-peer-discovery.md#discovery-keys<br/><br/>[115] https://docs.datproject.org/security<br/><br/>[116] https://blog.datproject.org/2016/12/12/reader-privacy-on-the-p2p-web/<br/><br/>[117] https://blog.datproject.org/2017/12/10/dont-ship/<br/><br/>[118] https://github.com/datprotocol/DEPs/pull/7<br/><br/>[119] https://blog.datproject.org/2017/06/01/dat-sleep-release/<br/><br/>[120] https://github.com/datprotocol/DEPs<br/><br/>[121] https://github.com/datprotocol/DEPs/blob/master/proposals/0008-multiwriter.md<br/><br/>[122] https://github.com/mafintosh/hyperdb<br/><br/>[123] https://codeforscience.org/<br/><br/>[124] https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=890565<br/><br/>[125] https://github.com/datrs<br/><br/>[126] https://nodejs.org/en/<br/><br/>[127] https://bunsenbrowser.github.io/#!index.md<br/><br/>[128] https://termux.com/<br/><br/>[129] https://bluelinklabs.com/<br/><br/>[130] https://www.digital-democracy.org/<br/><br/>[131] https://archive.org<br/><br/>[132] https://blog.archive.org/2018/06/05/internet-archive-code-for-science-and-society-and-california-digital-library-to-partner-on-a-data-sharing-and-preservation-pilot-project/<br/><br/>[133] https://github.com/codeforscience/Dat-in-the-Lab<br/><br/>[134] https://www.mozilla.org/en-US/moss/<br/><br/>[135] https://github.com/datprotocol/DEPs/blob/master/proposals/0005-dns.md<br/><br/>[136] https://lwn.net/Articles/763492/#Comments<br/><br/>[137] https://lwn.net/Articles/763254/<br/><br/>[138] https://lwn.net/Articles/763255/<br/><br/>[139] https://lwn.net/Articles/763254/
+       </div>
+<hr/>
+</div>
+</body>
diff --git a/test/expected/LWN/0000763603 b/test/expected/LWN/0000763603

new file mode 100644 (file)

index 0000000..82ae8c2
--- /dev/null
+++ b/test/expected/LWN/0000763603
@@ -0,0 +1,13 @@
+      [$] MEASURING (AND FIXING) I/O-CONTROLLER THROUGHPUT LOSS    \r
+\r
+  [Kernel] Aug 29, 2018 21:20 UTC (Wed) (corbet)\r
+\r
+  o News link: https://lwn.net/Articles/763603\r
+  o Source link: \r
+\r
+\r
+    [$]   Sorry,   this  article  is  currently  available  to  LWN\r
+    suscribers only [https://lwn.net/subscribe/].\r
+\r
+\r
+\r
diff --git a/test/expected/LWN/0000763603.header b/test/expected/LWN/0000763603.header

new file mode 100644 (file)

index 0000000..b547597
--- /dev/null
+++ b/test/expected/LWN/0000763603.header
@@ -0,0 +1,19 @@
+0[$] Measuring (and fixing) I/O-controller throughput loss     null/LWN/0000763603             70\r
+i  [Kernel] Aug 29, 2018 21:20 UTC (Wed) (corbet)\r
+i\r
+i    Many  services,  from  web hosting and video streaming to cloud\r
+i    storage,  need  to  move  data  to  and from storage. They also\r
+i    often  require  that  each  per-client I/O flow be guaranteed a\r
+i    non-zero   amount  of  bandwidth  and  a  bounded  latency.  An\r
+i    expensive  way to provide these guarantees is to over-provision\r
+i    storage  resources,  keeping  each  resource underutilized, and\r
+i    thus  have  plenty of bandwidth available for the few I/O flows\r
+i    dispatched  to  each  medium.  Alternatively one can use an I/O\r
+i    controller.  Linux provides two mechanisms designed to throttle\r
+i    some  I/O  streams  to allow others to meet their bandwidth and\r
+i    latency  requirements.  These mechanisms work, but they come at\r
+i    a  cost:  a  loss  of  as  much  as  80% of total available I/O\r
+i    bandwidth.  I  have run some tests to demonstrate this problem;\r
+i    some  upcoming improvements to the bfq I/O scheduler promise to\r
+i    improve the situation considerably.\r
+i\r
diff --git a/test/expected/LWN/0000763603.header.html b/test/expected/LWN/0000763603.header.html

new file mode 100644 (file)

index 0000000..10fb39a
--- /dev/null
+++ b/test/expected/LWN/0000763603.header.html
@@ -0,0 +1,20 @@
+<!DOCTYPE html>
+<html>
+<head>
+  <meta http-equiv='content-type' content='text/html; charset=utf-8'>
+  <meta name='viewport' content='width=device-width, initial-scale=1.0'>
+  <style type='text/css'>
+    body { margin: 1em 15%; }
+  </style>
+</head>
+<body>
+<div class='story-header'>
+       <h1><a href='0000763603.html'>[$] Measuring (and fixing) I/O-controller throughput loss</a></h1>
+       <div class='details'>([Kernel] Aug 29, 2018 21:20 UTC (Wed) (corbet))</div>
+       <br/>
+       <div class='content' style='text-align: justify'>
+               Many services, from web hosting and video streaming to cloud storage, need to move data to and from storage. They also often require that each per-client I/O flow be guaranteed a non-zero amount of bandwidth and a bounded latency. An expensive way to provide these guarantees is to over-provision storage resources, keeping each resource underutilized, and thus have plenty of bandwidth available for the few I/O flows dispatched to each medium. Alternatively one can use an I/O controller. Linux provides two mechanisms designed to throttle some I/O streams to allow others to meet their bandwidth and latency requirements. These mechanisms work, but they come at a cost: a loss of as much as 80% of total available I/O bandwidth. I have run some tests to demonstrate this problem; some upcoming improvements to the bfq I/O scheduler promise to improve the situation considerably.
+       </div>
+<hr/>
+</div>
+</body>
diff --git a/test/expected/LWN/0000763603.html b/test/expected/LWN/0000763603.html

new file mode 100644 (file)

index 0000000..d0dfc41
--- /dev/null
+++ b/test/expected/LWN/0000763603.html
@@ -0,0 +1,25 @@
+<!DOCTYPE html>
+<html>
+<head>
+  <meta http-equiv='content-type' content='text/html; charset=utf-8'>
+  <meta name='viewport' content='width=device-width, initial-scale=1.0'>
+  <style type='text/css'>
+    body { margin: 1em 15%; }
+  </style>
+</head>
+<body>
+<div class='story'>
+       <h1>[$] Measuring (and fixing) I/O-controller throughput loss</h1>
+       <div class='details'>([Kernel] Aug 29, 2018 21:20 UTC (Wed) (corbet))</div>
+       <br/>
+    <ul>
+        <li>News link: <a href='https://lwn.net/Articles/763603'>https://lwn.net/Articles/763603</a></li>
+        <li>Source link: <a href=''></a></li>
+    </ul>
+       <br/>
+       <div class='content' style='text-align: justify'>
+               [$] Sorry, this article is currently available to LWN suscribers only [https://lwn.net/subscribe/].
+       </div>
+<hr/>
+</div>
+</body>
diff --git a/test/expected/LWN/0000763729 b/test/expected/LWN/0000763729

new file mode 100644 (file)

index 0000000..745ba29
--- /dev/null
+++ b/test/expected/LWN/0000763729
@@ -0,0 +1,13 @@
+                 [$] PROTECTING FILES WITH FS-VERITY               \r
+\r
+  [Kernel] Aug 30, 2018 18:50 UTC (Thu) (corbet)\r
+\r
+  o News link: https://lwn.net/Articles/763729\r
+  o Source link: \r
+\r
+\r
+    [$]   Sorry,   this  article  is  currently  available  to  LWN\r
+    suscribers only [https://lwn.net/subscribe/].\r
+\r
+\r
+\r
diff --git a/test/expected/LWN/0000763729.header b/test/expected/LWN/0000763729.header

new file mode 100644 (file)

index 0000000..934f640
--- /dev/null
+++ b/test/expected/LWN/0000763729.header
@@ -0,0 +1,13 @@
+0[$] Protecting files with fs-verity   null/LWN/0000763729             70\r
+i  [Kernel] Aug 30, 2018 18:50 UTC (Thu) (corbet)\r
+i\r
+i    The  developers  of  the  Android system have, among their many\r
+i    goals,  the  wish  to  better  protect  Android devices against\r
+i    persistent  compromise.  It is bad if a device is taken over by\r
+i    an  attacker; it's worse if it remains compromised even after a\r
+i    reboot.  Numerous  mechanisms  for  ensuring  the  integrity of\r
+i    installed  system files have been proposed and implemented over\r
+i    the  years.  But it seems there is always room for one more; to\r
+i    fill  that  space, the fs-verity mechanism is being proposed as\r
+i    a way to protect individual files from malicious modification.\r
+i\r
diff --git a/test/expected/LWN/0000763729.header.html b/test/expected/LWN/0000763729.header.html

new file mode 100644 (file)

index 0000000..c110703
--- /dev/null
+++ b/test/expected/LWN/0000763729.header.html
@@ -0,0 +1,20 @@
+<!DOCTYPE html>
+<html>
+<head>
+  <meta http-equiv='content-type' content='text/html; charset=utf-8'>
+  <meta name='viewport' content='width=device-width, initial-scale=1.0'>
+  <style type='text/css'>
+    body { margin: 1em 15%; }
+  </style>
+</head>
+<body>
+<div class='story-header'>
+       <h1><a href='0000763729.html'>[$] Protecting files with fs-verity</a></h1>
+       <div class='details'>([Kernel] Aug 30, 2018 18:50 UTC (Thu) (corbet))</div>
+       <br/>
+       <div class='content' style='text-align: justify'>
+               The developers of the Android system have, among their many goals, the wish to better protect Android devices against persistent compromise. It is bad if a device is taken over by an attacker; it&#x27;s worse if it remains compromised even after a reboot. Numerous mechanisms for ensuring the integrity of installed system files have been proposed and implemented over the years. But it seems there is always room for one more; to fill that space, the fs-verity mechanism is being proposed as a way to protect individual files from malicious modification.
+       </div>
+<hr/>
+</div>
+</body>
diff --git a/test/expected/LWN/0000763729.html b/test/expected/LWN/0000763729.html

new file mode 100644 (file)

index 0000000..01ceedf
--- /dev/null
+++ b/test/expected/LWN/0000763729.html
@@ -0,0 +1,25 @@
+<!DOCTYPE html>
+<html>
+<head>
+  <meta http-equiv='content-type' content='text/html; charset=utf-8'>
+  <meta name='viewport' content='width=device-width, initial-scale=1.0'>
+  <style type='text/css'>
+    body { margin: 1em 15%; }
+  </style>
+</head>
+<body>
+<div class='story'>
+       <h1>[$] Protecting files with fs-verity</h1>
+       <div class='details'>([Kernel] Aug 30, 2018 18:50 UTC (Thu) (corbet))</div>
+       <br/>
+    <ul>
+        <li>News link: <a href='https://lwn.net/Articles/763729'>https://lwn.net/Articles/763729</a></li>
+        <li>Source link: <a href=''></a></li>
+    </ul>
+       <br/>
+       <div class='content' style='text-align: justify'>
+               [$] Sorry, this article is currently available to LWN suscribers only [https://lwn.net/subscribe/].
+       </div>
+<hr/>
+</div>
+</body>
diff --git a/test/expected/LWN/0000763789 b/test/expected/LWN/0000763789

new file mode 100644 (file)

index 0000000..4a05b04
--- /dev/null
+++ b/test/expected/LWN/0000763789
@@ -0,0 +1,13 @@
+          [$] LWN.NET WEEKLY EDITION FOR SEPTEMBER 6, 2018         \r
+\r
+  \r
+\r
+  o News link: https://lwn.net/Articles/763789/\r
+  o Source link: \r
+\r
+\r
+    [$]   Sorry,   this  article  is  currently  available  to  LWN\r
+    suscribers only [https://lwn.net/subscribe/].\r
+\r
+\r
+\r
diff --git a/test/expected/LWN/0000763789.header b/test/expected/LWN/0000763789.header

new file mode 100644 (file)

index 0000000..9bb9525
--- /dev/null
+++ b/test/expected/LWN/0000763789.header
@@ -0,0 +1,3 @@
+0[$] LWN.net Weekly Edition for September 6, 2018      null/LWN/0000763789             70\r
+i  \r
+i\r
diff --git a/test/expected/LWN/0000763789.header.html b/test/expected/LWN/0000763789.header.html

new file mode 100644 (file)

index 0000000..ad726d4
--- /dev/null
+++ b/test/expected/LWN/0000763789.header.html
@@ -0,0 +1,20 @@
+<!DOCTYPE html>
+<html>
+<head>
+  <meta http-equiv='content-type' content='text/html; charset=utf-8'>
+  <meta name='viewport' content='width=device-width, initial-scale=1.0'>
+  <style type='text/css'>
+    body { margin: 1em 15%; }
+  </style>
+</head>
+<body>
+<div class='story-header'>
+       <h1><a href='0000763789.html'>[$] LWN.net Weekly Edition for September 6, 2018</a></h1>
+       <div class='details'></div>
+       <br/>
+       <div class='content' style='text-align: justify'>
+               
+       </div>
+<hr/>
+</div>
+</body>
diff --git a/test/expected/LWN/0000763789.html b/test/expected/LWN/0000763789.html

new file mode 100644 (file)

index 0000000..0638c5f
--- /dev/null
+++ b/test/expected/LWN/0000763789.html
@@ -0,0 +1,25 @@
+<!DOCTYPE html>
+<html>
+<head>
+  <meta http-equiv='content-type' content='text/html; charset=utf-8'>
+  <meta name='viewport' content='width=device-width, initial-scale=1.0'>
+  <style type='text/css'>
+    body { margin: 1em 15%; }
+  </style>
+</head>
+<body>
+<div class='story'>
+       <h1>[$] LWN.net Weekly Edition for September 6, 2018</h1>
+       <div class='details'></div>
+       <br/>
+    <ul>
+        <li>News link: <a href='https://lwn.net/Articles/763789/'>https://lwn.net/Articles/763789/</a></li>
+        <li>Source link: <a href=''></a></li>
+    </ul>
+       <br/>
+       <div class='content' style='text-align: justify'>
+               [$] Sorry, this article is currently available to LWN suscribers only [https://lwn.net/subscribe/].
+       </div>
+<hr/>
+</div>
+</body>
diff --git a/test/expected/LWN/0000763987 b/test/expected/LWN/0000763987

new file mode 100644 (file)

index 0000000..aa76279
--- /dev/null
+++ b/test/expected/LWN/0000763987
@@ -0,0 +1,19 @@
+                      KERNEL PREPATCH 4.19-RC2                     \r
+\r
+  [Kernel] Sep 2, 2018 22:29 UTC (Sun) (corbet)\r
+\r
+  o News link: https://lwn.net/Articles/763987/\r
+  o Source link: \r
+\r
+\r
+    The  [1]4.19-rc2  kernel  prepatch  is  out  for  testing. " As\r
+    usual,  the  rc2  release  is pretty small. People are taking a\r
+    breather  after  the  merge  window, and it takes a bit of time\r
+    for bug reports to start coming in and get identified. "\r
+    \r
+    \r
+    \r
+    [1] https://lwn.net/Articles/763988/\r
+\r
+\r
+\r
diff --git a/test/expected/LWN/0000763987.header b/test/expected/LWN/0000763987.header

new file mode 100644 (file)

index 0000000..8fc90d4
--- /dev/null
+++ b/test/expected/LWN/0000763987.header
@@ -0,0 +1,8 @@
+0Kernel prepatch 4.19-rc2      null/LWN/0000763987             70\r
+i  [Kernel] Sep 2, 2018 22:29 UTC (Sun) (corbet)\r
+i\r
+i    The  4.19-rc2  kernel  prepatch  is out for testing. "As usual,\r
+i    the  rc2  release is pretty small. People are taking a breather\r
+i    after  the  merge  window,  and  it takes a bit of time for bug\r
+i    reports to start coming in and get identified."\r
+i\r
diff --git a/test/expected/LWN/0000763987.header.html b/test/expected/LWN/0000763987.header.html

new file mode 100644 (file)

index 0000000..76f8132
--- /dev/null
+++ b/test/expected/LWN/0000763987.header.html
@@ -0,0 +1,20 @@
+<!DOCTYPE html>
+<html>
+<head>
+  <meta http-equiv='content-type' content='text/html; charset=utf-8'>
+  <meta name='viewport' content='width=device-width, initial-scale=1.0'>
+  <style type='text/css'>
+    body { margin: 1em 15%; }
+  </style>
+</head>
+<body>
+<div class='story-header'>
+       <h1><a href='0000763987.html'>Kernel prepatch 4.19-rc2</a></h1>
+       <div class='details'>([Kernel] Sep 2, 2018 22:29 UTC (Sun) (corbet))</div>
+       <br/>
+       <div class='content' style='text-align: justify'>
+               The 4.19-rc2 kernel prepatch is out for testing. &quot;As usual, the rc2 release is pretty small. People are taking a breather after the merge window, and it takes a bit of time for bug reports to start coming in and get identified.&quot;
+       </div>
+<hr/>
+</div>
+</body>
diff --git a/test/expected/LWN/0000763987.html b/test/expected/LWN/0000763987.html

new file mode 100644 (file)

index 0000000..e73655b
--- /dev/null
+++ b/test/expected/LWN/0000763987.html
@@ -0,0 +1,25 @@
+<!DOCTYPE html>
+<html>
+<head>
+  <meta http-equiv='content-type' content='text/html; charset=utf-8'>
+  <meta name='viewport' content='width=device-width, initial-scale=1.0'>
+  <style type='text/css'>
+    body { margin: 1em 15%; }
+  </style>
+</head>
+<body>
+<div class='story'>
+       <h1>Kernel prepatch 4.19-rc2</h1>
+       <div class='details'>([Kernel] Sep 2, 2018 22:29 UTC (Sun) (corbet))</div>
+       <br/>
+    <ul>
+        <li>News link: <a href='https://lwn.net/Articles/763987/'>https://lwn.net/Articles/763987/</a></li>
+        <li>Source link: <a href=''></a></li>
+    </ul>
+       <br/>
+       <div class='content' style='text-align: justify'>
+               The [1]4.19-rc2 kernel prepatch is out for testing. &quot; As usual, the rc2 release is pretty small. People are taking a breather after the merge window, and it takes a bit of time for bug reports to start coming in and get identified. &quot;<br/><br/><br/><br/>[1] https://lwn.net/Articles/763988/
+       </div>
+<hr/>
+</div>
+</body>
diff --git a/test/expected/LWN/0000764001 b/test/expected/LWN/0000764001

new file mode 100644 (file)

index 0000000..f0c680c
--- /dev/null
+++ b/test/expected/LWN/0000764001
@@ -0,0 +1,13 @@
+          [$] AN INTRODUCTION TO THE JULIA LANGUAGE, PART 2        \r
+\r
+  [Development] Sep 4, 2018 15:57 UTC (Tue) (jake)\r
+\r
+  o News link: https://lwn.net/Articles/764001\r
+  o Source link: \r
+\r
+\r
+    [$]   Sorry,   this  article  is  currently  available  to  LWN\r
+    suscribers only [https://lwn.net/subscribe/].\r
+\r
+\r
+\r
diff --git a/test/expected/LWN/0000764001.header b/test/expected/LWN/0000764001.header

new file mode 100644 (file)

index 0000000..529c9fa
--- /dev/null
+++ b/test/expected/LWN/0000764001.header
@@ -0,0 +1,12 @@
+0[$] An introduction to the Julia language, part 2     null/LWN/0000764001             70\r
+i  [Development] Sep 4, 2018 15:57 UTC (Tue) (jake)\r
+i\r
+i    Part  1 of this series introduced the Julia project's goals and\r
+i    development  process, along with the language syntax, including\r
+i    the  basics  of  control flow, data types, and, in more detail,\r
+i    how  to  work with arrays. In this part, user-defined functions\r
+i    and  the central concept of multiple dispatch are described. It\r
+i    will  also survey Julia's module and package system, cover some\r
+i    syntax  features,  show how to make plots, and briefly dip into\r
+i    macros and distributed computing.\r
+i\r
diff --git a/test/expected/LWN/0000764001.header.html b/test/expected/LWN/0000764001.header.html

new file mode 100644 (file)

index 0000000..8611c17
--- /dev/null
+++ b/test/expected/LWN/0000764001.header.html
@@ -0,0 +1,20 @@
+<!DOCTYPE html>
+<html>
+<head>
+  <meta http-equiv='content-type' content='text/html; charset=utf-8'>
+  <meta name='viewport' content='width=device-width, initial-scale=1.0'>
+  <style type='text/css'>
+    body { margin: 1em 15%; }
+  </style>
+</head>
+<body>
+<div class='story-header'>
+       <h1><a href='0000764001.html'>[$] An introduction to the Julia language, part 2</a></h1>
+       <div class='details'>([Development] Sep 4, 2018 15:57 UTC (Tue) (jake))</div>
+       <br/>
+       <div class='content' style='text-align: justify'>
+               Part 1 of this series introduced the Julia project&#x27;s goals and development process, along with the language syntax, including the basics of control flow, data types, and, in more detail, how to work with arrays. In this part, user-defined functions and the central concept of multiple dispatch are described. It will also survey Julia&#x27;s module and package system, cover some syntax features, show how to make plots, and briefly dip into macros and distributed computing.
+       </div>
+<hr/>
+</div>
+</body>
diff --git a/test/expected/LWN/0000764001.html b/test/expected/LWN/0000764001.html

new file mode 100644 (file)

index 0000000..cbf08fd
--- /dev/null
+++ b/test/expected/LWN/0000764001.html
@@ -0,0 +1,25 @@
+<!DOCTYPE html>
+<html>
+<head>
+  <meta http-equiv='content-type' content='text/html; charset=utf-8'>
+  <meta name='viewport' content='width=device-width, initial-scale=1.0'>
+  <style type='text/css'>
+    body { margin: 1em 15%; }
+  </style>
+</head>
+<body>
+<div class='story'>
+       <h1>[$] An introduction to the Julia language, part 2</h1>
+       <div class='details'>([Development] Sep 4, 2018 15:57 UTC (Tue) (jake))</div>
+       <br/>
+    <ul>
+        <li>News link: <a href='https://lwn.net/Articles/764001'>https://lwn.net/Articles/764001</a></li>
+        <li>Source link: <a href=''></a></li>
+    </ul>
+       <br/>
+       <div class='content' style='text-align: justify'>
+               [$] Sorry, this article is currently available to LWN suscribers only [https://lwn.net/subscribe/].
+       </div>
+<hr/>
+</div>
+</body>
diff --git a/test/expected/LWN/0000764046 b/test/expected/LWN/0000764046

new file mode 100644 (file)

index 0000000..03dc5ce
--- /dev/null
+++ b/test/expected/LWN/0000764046
@@ -0,0 +1,490 @@
+                     SECURITY UPDATES FOR MONDAY                   \r
+\r
+  [Security] Sep 3, 2018 15:41 UTC (Mon) (ris)\r
+\r
+  o News link: https://lwn.net/Articles/764046\r
+  o Source link: \r
+\r
+\r
+    Dist.\r
+    \r
+    ID\r
+    \r
+    Release\r
+    \r
+    Package\r
+    \r
+    Date\r
+    \r
+    Debian\r
+    \r
+    [1]DLA-1492-1\r
+    \r
+    LTS\r
+    \r
+    dojo\r
+    \r
+    2018-09-03\r
+    \r
+    Debian\r
+    \r
+    [2]DLA-1487-1\r
+    \r
+    LTS\r
+    \r
+    libtirpc\r
+    \r
+    2018-08-31\r
+    \r
+    Debian\r
+    \r
+    [3]DLA-1488-1\r
+    \r
+    LTS\r
+    \r
+    mariadb-10.0\r
+    \r
+    2018-08-31\r
+    \r
+    Debian\r
+    \r
+    [4]DLA-1490-1\r
+    \r
+    LTS\r
+    \r
+    php5\r
+    \r
+    2018-09-01\r
+    \r
+    Debian\r
+    \r
+    [5]DSA-4283-1\r
+    \r
+    stable\r
+    \r
+    ruby-json-jwt\r
+    \r
+    2018-08-31\r
+    \r
+    Debian\r
+    \r
+    [6]DLA-1488-1\r
+    \r
+    LTS\r
+    \r
+    spice\r
+    \r
+    2018-08-31\r
+    \r
+    Debian\r
+    \r
+    [7]DLA-1486-1\r
+    \r
+    LTS\r
+    \r
+    spice\r
+    \r
+    2018-09-01\r
+    \r
+    Debian\r
+    \r
+    [8]DLA-1489-1\r
+    \r
+    LTS\r
+    \r
+    spice-gtk\r
+    \r
+    2018-09-01\r
+    \r
+    Debian\r
+    \r
+    [9]DLA-1491-1\r
+    \r
+    LTS\r
+    \r
+    tomcat8\r
+    \r
+    2018-09-02\r
+    \r
+    Debian\r
+    \r
+    [10]DSA-4282-1\r
+    \r
+    stable\r
+    \r
+    trafficserver\r
+    \r
+    2018-08-31\r
+    \r
+    Fedora\r
+    \r
+    [11]FEDORA-2018-33fef25ed1\r
+    \r
+    F28\r
+    \r
+    ghc-hakyll\r
+    \r
+    2018-08-31\r
+    \r
+    Fedora\r
+    \r
+    [12]FEDORA-2018-33fef25ed1\r
+    \r
+    F28\r
+    \r
+    ghc-hs-bibutils\r
+    \r
+    2018-08-31\r
+    \r
+    Fedora\r
+    \r
+    [13]FEDORA-2018-07083800ac\r
+    \r
+    F28\r
+    \r
+    ghostscript\r
+    \r
+    2018-09-02\r
+    \r
+    Fedora\r
+    \r
+    [14]FEDORA-2018-77e610115a\r
+    \r
+    F28\r
+    \r
+    mariadb\r
+    \r
+    2018-08-31\r
+    \r
+    Fedora\r
+    \r
+    [15]FEDORA-2018-33fef25ed1\r
+    \r
+    F28\r
+    \r
+    pandoc-citeproc\r
+    \r
+    2018-08-31\r
+    \r
+    Fedora\r
+    \r
+    [16]FEDORA-2018-f2b24ce26e\r
+    \r
+    F28\r
+    \r
+    phpMyAdmin\r
+    \r
+    2018-08-31\r
+    \r
+    Fedora\r
+    \r
+    [17]FEDORA-2018-915602df63\r
+    \r
+    F27\r
+    \r
+    xen\r
+    \r
+    2018-08-31\r
+    \r
+    Mageia\r
+    \r
+    [18]MGASA-2018-0366\r
+    \r
+    6\r
+    \r
+    java-1.8.0-openjdk\r
+    \r
+    2018-09-02\r
+    \r
+    Mageia\r
+    \r
+    [19]MGASA-2018-0361\r
+    \r
+    6\r
+    \r
+    libarchive\r
+    \r
+    2018-08-31\r
+    \r
+    Mageia\r
+    \r
+    [20]MGASA-2018-0367\r
+    \r
+    6\r
+    \r
+    libgd\r
+    \r
+    2018-09-02\r
+    \r
+    Mageia\r
+    \r
+    [21]MGASA-2018-0356\r
+    \r
+    6\r
+    \r
+    libraw\r
+    \r
+    2018-08-31\r
+    \r
+    Mageia\r
+    \r
+    [22]MGASA-2018-0364\r
+    \r
+    6\r
+    \r
+    libxcursor\r
+    \r
+    2018-08-31\r
+    \r
+    Mageia\r
+    \r
+    [23]MGASA-2018-0359\r
+    \r
+    5\r
+    \r
+    mariadb\r
+    \r
+    2018-08-31\r
+    \r
+    Mageia\r
+    \r
+    [24]MGASA-2018-0355\r
+    \r
+    5, 6\r
+    \r
+    mercurial\r
+    \r
+    2018-08-31\r
+    \r
+    Mageia\r
+    \r
+    [25]MGASA-2018-0363\r
+    \r
+    6\r
+    \r
+    openssh\r
+    \r
+    2018-08-31\r
+    \r
+    Mageia\r
+    \r
+    [26]MGASA-2018-0365\r
+    \r
+    6\r
+    \r
+    openssl\r
+    \r
+    2018-09-02\r
+    \r
+    Mageia\r
+    \r
+    [27]MGASA-2018-0358\r
+    \r
+    6\r
+    \r
+    poppler\r
+    \r
+    2018-08-31\r
+    \r
+    Mageia\r
+    \r
+    [28]MGASA-2018-0362\r
+    \r
+    6\r
+    \r
+    quazip\r
+    \r
+    2018-08-31\r
+    \r
+    Mageia\r
+    \r
+    [29]MGASA-2018-0357\r
+    \r
+    6\r
+    \r
+    squirrelmail\r
+    \r
+    2018-08-31\r
+    \r
+    Mageia\r
+    \r
+    [30]MGASA-2018-0360\r
+    \r
+    6\r
+    \r
+    virtualbox\r
+    \r
+    2018-08-31\r
+    \r
+    openSUSE\r
+    \r
+    [31]openSUSE-SU-2018:2590-1\r
+    \r
+    42.3\r
+    \r
+    cobbler\r
+    \r
+    2018-09-03\r
+    \r
+    openSUSE\r
+    \r
+    [32]openSUSE-SU-2018:2592-1\r
+    \r
+    15.0\r
+    \r
+    libressl\r
+    \r
+    2018-09-03\r
+    \r
+    openSUSE\r
+    \r
+    [33]openSUSE-SU-2018:2587-1\r
+    \r
+    42.3\r
+    \r
+    wireshark\r
+    \r
+    2018-09-02\r
+    \r
+    openSUSE\r
+    \r
+    [34]openSUSE-SU-2018:2591-1\r
+    \r
+    15.0 42.3\r
+    \r
+    zutils\r
+    \r
+    2018-09-03\r
+    \r
+    SUSE\r
+    \r
+    [35]SUSE-SU-2018:2576-1\r
+    \r
+    OS7\r
+    \r
+    OpenStack\r
+    \r
+    2018-08-31\r
+    \r
+    SUSE\r
+    \r
+    [36]SUSE-SU-2018:2578-1\r
+    \r
+    OS7\r
+    \r
+    couchdb\r
+    \r
+    2018-08-31\r
+    \r
+    SUSE\r
+    \r
+    [37]SUSE-SU-2018:2574-1\r
+    \r
+    SLE11\r
+    \r
+    java-1_7_0-ibm\r
+    \r
+    2018-08-31\r
+    \r
+    SUSE\r
+    \r
+    [38]SUSE-SU-2018:2583-1\r
+    \r
+    SLE11\r
+    \r
+    java-1_7_1-ibm\r
+    \r
+    2018-08-31\r
+    \r
+    SUSE\r
+    \r
+    [39]SUSE-SU-2018:2584-1\r
+    \r
+    SLE12\r
+    \r
+    spice\r
+    \r
+    2018-08-31\r
+    \r
+    \r
+    \r
+    [1] https://lwn.net/Articles/764007/\r
+    \r
+    [2] https://lwn.net/Articles/764008/\r
+    \r
+    [3] https://lwn.net/Articles/764009/\r
+    \r
+    [4] https://lwn.net/Articles/764010/\r
+    \r
+    [5] https://lwn.net/Articles/764011/\r
+    \r
+    [6] https://lwn.net/Articles/764013/\r
+    \r
+    [7] https://lwn.net/Articles/764012/\r
+    \r
+    [8] https://lwn.net/Articles/764014/\r
+    \r
+    [9] https://lwn.net/Articles/764015/\r
+    \r
+    [10] https://lwn.net/Articles/764016/\r
+    \r
+    [11] https://lwn.net/Articles/764017/\r
+    \r
+    [12] https://lwn.net/Articles/764018/\r
+    \r
+    [13] https://lwn.net/Articles/764019/\r
+    \r
+    [14] https://lwn.net/Articles/764020/\r
+    \r
+    [15] https://lwn.net/Articles/764021/\r
+    \r
+    [16] https://lwn.net/Articles/764022/\r
+    \r
+    [17] https://lwn.net/Articles/764023/\r
+    \r
+    [18] https://lwn.net/Articles/764024/\r
+    \r
+    [19] https://lwn.net/Articles/764025/\r
+    \r
+    [20] https://lwn.net/Articles/764026/\r
+    \r
+    [21] https://lwn.net/Articles/764027/\r
+    \r
+    [22] https://lwn.net/Articles/764028/\r
+    \r
+    [23] https://lwn.net/Articles/764029/\r
+    \r
+    [24] https://lwn.net/Articles/764030/\r
+    \r
+    [25] https://lwn.net/Articles/764031/\r
+    \r
+    [26] https://lwn.net/Articles/764032/\r
+    \r
+    [27] https://lwn.net/Articles/764033/\r
+    \r
+    [28] https://lwn.net/Articles/764034/\r
+    \r
+    [29] https://lwn.net/Articles/764035/\r
+    \r
+    [30] https://lwn.net/Articles/764036/\r
+    \r
+    [31] https://lwn.net/Articles/764037/\r
+    \r
+    [32] https://lwn.net/Articles/764038/\r
+    \r
+    [33] https://lwn.net/Articles/764039/\r
+    \r
+    [34] https://lwn.net/Articles/764040/\r
+    \r
+    [35] https://lwn.net/Articles/764044/\r
+    \r
+    [36] https://lwn.net/Articles/764041/\r
+    \r
+    [37] https://lwn.net/Articles/764042/\r
+    \r
+    [38] https://lwn.net/Articles/764043/\r
+    \r
+    [39] https://lwn.net/Articles/764045/\r
+\r
+\r
+\r
diff --git a/test/expected/LWN/0000764046.header b/test/expected/LWN/0000764046.header

new file mode 100644 (file)

index 0000000..095afeb
--- /dev/null
+++ b/test/expected/LWN/0000764046.header
@@ -0,0 +1,13 @@
+0Security updates for Monday   null/LWN/0000764046             70\r
+i  [Security] Sep 3, 2018 15:41 UTC (Mon) (ris)\r
+i\r
+i    Security  updates  have  been issued by Debian (dojo, libtirpc,\r
+i    mariadb-10.0,  php5,  ruby-json-jwt, spice, spice-gtk, tomcat8,\r
+i    and   trafficserver),   Fedora   (ghc-hakyll,  ghc-hs-bibutils,\r
+i    ghostscript,  mariadb,  pandoc-citeproc,  phpMyAdmin, and xen),\r
+i    Mageia    (java-1.8.0-openjdk,   libarchive,   libgd,   libraw,\r
+i    libxcursor,  mariadb,  mercurial,  openssh,  openssl,  poppler,\r
+i    quazip,   squirrelmail,  and  virtualbox),  openSUSE  (cobbler,\r
+i    libressl,   wireshark,   and   zutils),   and   SUSE  (couchdb,\r
+i    java-1_7_0-ibm, java-1_7_1-ibm, OpenStack, and spice).\r
+i\r
diff --git a/test/expected/LWN/0000764046.header.html b/test/expected/LWN/0000764046.header.html

new file mode 100644 (file)

index 0000000..d42349b
--- /dev/null
+++ b/test/expected/LWN/0000764046.header.html
@@ -0,0 +1,20 @@
+<!DOCTYPE html>
+<html>
+<head>
+  <meta http-equiv='content-type' content='text/html; charset=utf-8'>
+  <meta name='viewport' content='width=device-width, initial-scale=1.0'>
+  <style type='text/css'>
+    body { margin: 1em 15%; }
+  </style>
+</head>
+<body>
+<div class='story-header'>
+       <h1><a href='0000764046.html'>Security updates for Monday</a></h1>
+       <div class='details'>([Security] Sep 3, 2018 15:41 UTC (Mon) (ris))</div>
+       <br/>
+       <div class='content' style='text-align: justify'>
+               Security updates have been issued by Debian (dojo, libtirpc, mariadb-10.0, php5, ruby-json-jwt, spice, spice-gtk, tomcat8, and trafficserver), Fedora (ghc-hakyll, ghc-hs-bibutils, ghostscript, mariadb, pandoc-citeproc, phpMyAdmin, and xen), Mageia (java-1.8.0-openjdk, libarchive, libgd, libraw, libxcursor, mariadb, mercurial, openssh, openssl, poppler, quazip, squirrelmail, and virtualbox), openSUSE (cobbler, libressl, wireshark, and zutils), and SUSE (couchdb, java-1_7_0-ibm, java-1_7_1-ibm, OpenStack, and spice).
+       </div>
+<hr/>
+</div>
+</body>
diff --git a/test/expected/LWN/0000764046.html b/test/expected/LWN/0000764046.html

new file mode 100644 (file)

index 0000000..4384209
--- /dev/null
+++ b/test/expected/LWN/0000764046.html
@@ -0,0 +1,25 @@
+<!DOCTYPE html>
+<html>
+<head>
+  <meta http-equiv='content-type' content='text/html; charset=utf-8'>
+  <meta name='viewport' content='width=device-width, initial-scale=1.0'>
+  <style type='text/css'>
+    body { margin: 1em 15%; }
+  </style>
+</head>
+<body>
+<div class='story'>
+       <h1>Security updates for Monday</h1>
+       <div class='details'>([Security] Sep 3, 2018 15:41 UTC (Mon) (ris))</div>
+       <br/>
+    <ul>
+        <li>News link: <a href='https://lwn.net/Articles/764046'>https://lwn.net/Articles/764046</a></li>
+        <li>Source link: <a href=''></a></li>
+    </ul>
+       <br/>
+       <div class='content' style='text-align: justify'>
+               Dist.<br/><br/>ID<br/><br/>Release<br/><br/>Package<br/><br/>Date<br/><br/>Debian<br/><br/>[1]DLA-1492-1<br/><br/>LTS<br/><br/>dojo<br/><br/>2018-09-03<br/><br/>Debian<br/><br/>[2]DLA-1487-1<br/><br/>LTS<br/><br/>libtirpc<br/><br/>2018-08-31<br/><br/>Debian<br/><br/>[3]DLA-1488-1<br/><br/>LTS<br/><br/>mariadb-10.0<br/><br/>2018-08-31<br/><br/>Debian<br/><br/>[4]DLA-1490-1<br/><br/>LTS<br/><br/>php5<br/><br/>2018-09-01<br/><br/>Debian<br/><br/>[5]DSA-4283-1<br/><br/>stable<br/><br/>ruby-json-jwt<br/><br/>2018-08-31<br/><br/>Debian<br/><br/>[6]DLA-1488-1<br/><br/>LTS<br/><br/>spice<br/><br/>2018-08-31<br/><br/>Debian<br/><br/>[7]DLA-1486-1<br/><br/>LTS<br/><br/>spice<br/><br/>2018-09-01<br/><br/>Debian<br/><br/>[8]DLA-1489-1<br/><br/>LTS<br/><br/>spice-gtk<br/><br/>2018-09-01<br/><br/>Debian<br/><br/>[9]DLA-1491-1<br/><br/>LTS<br/><br/>tomcat8<br/><br/>2018-09-02<br/><br/>Debian<br/><br/>[10]DSA-4282-1<br/><br/>stable<br/><br/>trafficserver<br/><br/>2018-08-31<br/><br/>Fedora<br/><br/>[11]FEDORA-2018-33fef25ed1<br/><br/>F28<br/><br/>ghc-hakyll<br/><br/>2018-08-31<br/><br/>Fedora<br/><br/>[12]FEDORA-2018-33fef25ed1<br/><br/>F28<br/><br/>ghc-hs-bibutils<br/><br/>2018-08-31<br/><br/>Fedora<br/><br/>[13]FEDORA-2018-07083800ac<br/><br/>F28<br/><br/>ghostscript<br/><br/>2018-09-02<br/><br/>Fedora<br/><br/>[14]FEDORA-2018-77e610115a<br/><br/>F28<br/><br/>mariadb<br/><br/>2018-08-31<br/><br/>Fedora<br/><br/>[15]FEDORA-2018-33fef25ed1<br/><br/>F28<br/><br/>pandoc-citeproc<br/><br/>2018-08-31<br/><br/>Fedora<br/><br/>[16]FEDORA-2018-f2b24ce26e<br/><br/>F28<br/><br/>phpMyAdmin<br/><br/>2018-08-31<br/><br/>Fedora<br/><br/>[17]FEDORA-2018-915602df63<br/><br/>F27<br/><br/>xen<br/><br/>2018-08-31<br/><br/>Mageia<br/><br/>[18]MGASA-2018-0366<br/><br/>6<br/><br/>java-1.8.0-openjdk<br/><br/>2018-09-02<br/><br/>Mageia<br/><br/>[19]MGASA-2018-0361<br/><br/>6<br/><br/>libarchive<br/><br/>2018-08-31<br/><br/>Mageia<br/><br/>[20]MGASA-2018-0367<br/><br/>6<br/><br/>libgd<br/><br/>2018-09-02<br/><br/>Mageia<br/><br/>[21]MGASA-2018-0356<br/><br/>6<br/><br/>libraw<br/><br/>2018-08-31<br/><br/>Mageia<br/><br/>[22]MGASA-2018-0364<br/><br/>6<br/><br/>libxcursor<br/><br/>2018-08-31<br/><br/>Mageia<br/><br/>[23]MGASA-2018-0359<br/><br/>5<br/><br/>mariadb<br/><br/>2018-08-31<br/><br/>Mageia<br/><br/>[24]MGASA-2018-0355<br/><br/>5, 6<br/><br/>mercurial<br/><br/>2018-08-31<br/><br/>Mageia<br/><br/>[25]MGASA-2018-0363<br/><br/>6<br/><br/>openssh<br/><br/>2018-08-31<br/><br/>Mageia<br/><br/>[26]MGASA-2018-0365<br/><br/>6<br/><br/>openssl<br/><br/>2018-09-02<br/><br/>Mageia<br/><br/>[27]MGASA-2018-0358<br/><br/>6<br/><br/>poppler<br/><br/>2018-08-31<br/><br/>Mageia<br/><br/>[28]MGASA-2018-0362<br/><br/>6<br/><br/>quazip<br/><br/>2018-08-31<br/><br/>Mageia<br/><br/>[29]MGASA-2018-0357<br/><br/>6<br/><br/>squirrelmail<br/><br/>2018-08-31<br/><br/>Mageia<br/><br/>[30]MGASA-2018-0360<br/><br/>6<br/><br/>virtualbox<br/><br/>2018-08-31<br/><br/>openSUSE<br/><br/>[31]openSUSE-SU-2018:2590-1<br/><br/>42.3<br/><br/>cobbler<br/><br/>2018-09-03<br/><br/>openSUSE<br/><br/>[32]openSUSE-SU-2018:2592-1<br/><br/>15.0<br/><br/>libressl<br/><br/>2018-09-03<br/><br/>openSUSE<br/><br/>[33]openSUSE-SU-2018:2587-1<br/><br/>42.3<br/><br/>wireshark<br/><br/>2018-09-02<br/><br/>openSUSE<br/><br/>[34]openSUSE-SU-2018:2591-1<br/><br/>15.0 42.3<br/><br/>zutils<br/><br/>2018-09-03<br/><br/>SUSE<br/><br/>[35]SUSE-SU-2018:2576-1<br/><br/>OS7<br/><br/>OpenStack<br/><br/>2018-08-31<br/><br/>SUSE<br/><br/>[36]SUSE-SU-2018:2578-1<br/><br/>OS7<br/><br/>couchdb<br/><br/>2018-08-31<br/><br/>SUSE<br/><br/>[37]SUSE-SU-2018:2574-1<br/><br/>SLE11<br/><br/>java-1_7_0-ibm<br/><br/>2018-08-31<br/><br/>SUSE<br/><br/>[38]SUSE-SU-2018:2583-1<br/><br/>SLE11<br/><br/>java-1_7_1-ibm<br/><br/>2018-08-31<br/><br/>SUSE<br/><br/>[39]SUSE-SU-2018:2584-1<br/><br/>SLE12<br/><br/>spice<br/><br/>2018-08-31<br/><br/><br/><br/>[1] https://lwn.net/Articles/764007/<br/><br/>[2] https://lwn.net/Articles/764008/<br/><br/>[3] https://lwn.net/Articles/764009/<br/><br/>[4] https://lwn.net/Articles/764010/<br/><br/>[5] https://lwn.net/Articles/764011/<br/><br/>[6] https://lwn.net/Articles/764013/<br/><br/>[7] https://lwn.net/Articles/764012/<br/><br/>[8] https://lwn.net/Articles/764014/<br/><br/>[9] https://lwn.net/Articles/764015/<br/><br/>[10] https://lwn.net/Articles/764016/<br/><br/>[11] https://lwn.net/Articles/764017/<br/><br/>[12] https://lwn.net/Articles/764018/<br/><br/>[13] https://lwn.net/Articles/764019/<br/><br/>[14] https://lwn.net/Articles/764020/<br/><br/>[15] https://lwn.net/Articles/764021/<br/><br/>[16] https://lwn.net/Articles/764022/<br/><br/>[17] https://lwn.net/Articles/764023/<br/><br/>[18] https://lwn.net/Articles/764024/<br/><br/>[19] https://lwn.net/Articles/764025/<br/><br/>[20] https://lwn.net/Articles/764026/<br/><br/>[21] https://lwn.net/Articles/764027/<br/><br/>[22] https://lwn.net/Articles/764028/<br/><br/>[23] https://lwn.net/Articles/764029/<br/><br/>[24] https://lwn.net/Articles/764030/<br/><br/>[25] https://lwn.net/Articles/764031/<br/><br/>[26] https://lwn.net/Articles/764032/<br/><br/>[27] https://lwn.net/Articles/764033/<br/><br/>[28] https://lwn.net/Articles/764034/<br/><br/>[29] https://lwn.net/Articles/764035/<br/><br/>[30] https://lwn.net/Articles/764036/<br/><br/>[31] https://lwn.net/Articles/764037/<br/><br/>[32] https://lwn.net/Articles/764038/<br/><br/>[33] https://lwn.net/Articles/764039/<br/><br/>[34] https://lwn.net/Articles/764040/<br/><br/>[35] https://lwn.net/Articles/764044/<br/><br/>[36] https://lwn.net/Articles/764041/<br/><br/>[37] https://lwn.net/Articles/764042/<br/><br/>[38] https://lwn.net/Articles/764043/<br/><br/>[39] https://lwn.net/Articles/764045/
+       </div>
+<hr/>
+</div>
+</body>
diff --git a/test/expected/LWN/0000764048 b/test/expected/LWN/0000764048

new file mode 100644 (file)

index 0000000..2e2225d
--- /dev/null
+++ b/test/expected/LWN/0000764048
@@ -0,0 +1,13 @@
+                 [$] LIFE BEHIND THE TINFOIL CURTAIN               \r
+\r
+  [Security] Sep 5, 2018 22:11 UTC (Wed) (jake)\r
+\r
+  o News link: https://lwn.net/Articles/764048\r
+  o Source link: \r
+\r
+\r
+    [$]   Sorry,   this  article  is  currently  available  to  LWN\r
+    suscribers only [https://lwn.net/subscribe/].\r
+\r
+\r
+\r
diff --git a/test/expected/LWN/0000764048.header b/test/expected/LWN/0000764048.header

new file mode 100644 (file)

index 0000000..c44db41
--- /dev/null
+++ b/test/expected/LWN/0000764048.header
@@ -0,0 +1,13 @@
+0[$] Life behind the tinfoil curtain   null/LWN/0000764048             70\r
+i  [Security] Sep 5, 2018 22:11 UTC (Wed) (jake)\r
+i\r
+i    Security  and  convenience  rarely go hand-in-hand, but if your\r
+i    job  (or  life) requires extraordinary care against potentially\r
+i    targeted  attacks,  the  security side of that tradeoff may win\r
+i    out.  If  so, running a system like Qubes OS on your desktop or\r
+i    CopperheadOS  on  your  phone  might  make sense, which is just\r
+i    what  Konstantin  Ryabitsev,  Linux Foundation (LF) director of\r
+i    IT  security, has done. He reported on the experience in a talk\r
+i    [YouTube  video]  entitled "Life Behind the Tinfoil Curtain" at\r
+i    the 2018 Linux Security Summit North America.\r
+i\r
diff --git a/test/expected/LWN/0000764048.header.html b/test/expected/LWN/0000764048.header.html

new file mode 100644 (file)

index 0000000..4c8a08e
--- /dev/null
+++ b/test/expected/LWN/0000764048.header.html
@@ -0,0 +1,20 @@
+<!DOCTYPE html>
+<html>
+<head>
+  <meta http-equiv='content-type' content='text/html; charset=utf-8'>
+  <meta name='viewport' content='width=device-width, initial-scale=1.0'>
+  <style type='text/css'>
+    body { margin: 1em 15%; }
+  </style>
+</head>
+<body>
+<div class='story-header'>
+       <h1><a href='0000764048.html'>[$] Life behind the tinfoil curtain</a></h1>
+       <div class='details'>([Security] Sep 5, 2018 22:11 UTC (Wed) (jake))</div>
+       <br/>
+       <div class='content' style='text-align: justify'>
+               Security and convenience rarely go hand-in-hand, but if your job (or life) requires extraordinary care against potentially targeted attacks, the security side of that tradeoff may win out. If so, running a system like Qubes OS on your desktop or CopperheadOS on your phone might make sense, which is just what Konstantin Ryabitsev, Linux Foundation (LF) director of IT security, has done. He reported on the experience in a talk [YouTube video] entitled &quot;Life Behind the Tinfoil Curtain&quot; at the 2018 Linux Security Summit North America.
+       </div>
+<hr/>
+</div>
+</body>
diff --git a/test/expected/LWN/0000764048.html b/test/expected/LWN/0000764048.html

new file mode 100644 (file)

index 0000000..8675aee
--- /dev/null
+++ b/test/expected/LWN/0000764048.html
@@ -0,0 +1,25 @@
+<!DOCTYPE html>
+<html>
+<head>
+  <meta http-equiv='content-type' content='text/html; charset=utf-8'>
+  <meta name='viewport' content='width=device-width, initial-scale=1.0'>
+  <style type='text/css'>
+    body { margin: 1em 15%; }
+  </style>
+</head>
+<body>
+<div class='story'>
+       <h1>[$] Life behind the tinfoil curtain</h1>
+       <div class='details'>([Security] Sep 5, 2018 22:11 UTC (Wed) (jake))</div>
+       <br/>
+    <ul>
+        <li>News link: <a href='https://lwn.net/Articles/764048'>https://lwn.net/Articles/764048</a></li>
+        <li>Source link: <a href=''></a></li>
+    </ul>
+       <br/>
+       <div class='content' style='text-align: justify'>
+               [$] Sorry, this article is currently available to LWN suscribers only [https://lwn.net/subscribe/].
+       </div>
+<hr/>
+</div>
+</body>
diff --git a/test/expected/LWN/0000764055 b/test/expected/LWN/0000764055

new file mode 100644 (file)

index 0000000..5136596
--- /dev/null
+++ b/test/expected/LWN/0000764055
@@ -0,0 +1,239 @@
+         TOPICS SOUGHT FOR THE KERNEL AND MAINTAINER SUMMITS       \r
+\r
+  [Kernel] Sep 3, 2018 19:07 UTC (Mon) (corbet)\r
+\r
+  o News link: https://lwn.net/Articles/764055\r
+  o Source link: \r
+\r
+\r
+    The  annual  Maintainer  and  Kernel  Summits  will  be held in\r
+    Vancouver,  BC  on  November  12 to 15, in conjunction with the\r
+    Linux  Plumbers  Conference.  The  program committee is looking\r
+    for  topics  for  both  summits;  read on for details on how to\r
+    submit  ideas and, perhaps, get an invitation to the Maintainer\r
+    Summit.\r
+    \r
+    From :\r
+    \r
+    "Theodore Y. Ts'o" <tytso-AT-mit.edu>\r
+    \r
+    To :\r
+    \r
+    linux-kernel-AT-vger.kernel.org, linux-fsdevel-AT-vger.kernel.-\r
+    org,      linux-mm-AT-kvack.org,     netdev-AT-vger.kernel.org,\r
+    linux-block-AT-vger.kernel.org\r
+    \r
+    Subject :\r
+    \r
+    Maintainer / Kernel Summit 2018 planning kick-off\r
+    \r
+    Date :\r
+    \r
+    Thu, 30 Aug 2018 17:35:17 -0400\r
+    \r
+    Message-ID :\r
+    \r
+    <20180830213517.GA19110@thunk.org>\r
+    \r
+    Archive-link :\r
+    \r
+    [1]Article\r
+    \r
+    [  Feel  free  to  forward  this  to other Linux kernel mailing\r
+    lists as\r
+    \r
+    appropriate -- Ted ]\r
+    \r
+    This  year,  the  Maintainer  and  Kernel  Summit  will  be  in\r
+    Vancouver,\r
+    \r
+    B.C.,  November  12th  -- 15th. The Maintainer's summit will be\r
+    held on\r
+    \r
+    Monday,  November  12th,  in  Vancouver, immediately before the\r
+    Linux\r
+    \r
+    Plumber's Conference (LPC) November 13th -- 15th.\r
+    \r
+    For  the  past  few  years, before 2017, we've scheduled mostly\r
+    management\r
+    \r
+    and  development  process  issues  on  the  first  day. We then\r
+    opened up\r
+    \r
+    the  second  day  of  the Kernel Summit to all attendees of the\r
+    conference\r
+    \r
+    with  which the Kernel Summit has been colocated, and called it\r
+    the\r
+    \r
+    "Open  Technical  Day".  This is something that just made sense\r
+    in order\r
+    \r
+    to assure that all of the necessary people needed to discuss a\r
+    \r
+    particular technical issue could be in the room.\r
+    \r
+    Starting  last  year  in Prague, we took the next logical step,\r
+    and split\r
+    \r
+    the Kernel Summit in two. The "Maintainer's Summit" is an\r
+    \r
+    invite-only,  half-day  event,  where the primary focus will be\r
+    process\r
+    \r
+    issues  of  Linux  Kernel Development. It will be limited to 30\r
+    invitees\r
+    \r
+    and  a  handful  of  sponsored attendees. This makes it smaller\r
+    than the\r
+    \r
+    first  few  kernel  summits  (which  were  limited to around 50\r
+    attendees).\r
+    \r
+    The "Kernel Summit" is now organized as a track which is run in\r
+    \r
+    parallel   with   the  other  tracks  at  the  Linux  Plumber's\r
+    Conference, and\r
+    \r
+    is open to all registered attendees of Plumbers. Much as how we\r
+    \r
+    organized  the  Kernel  Summit  "open technical day" in 2016 in\r
+    Santa Fe,\r
+    \r
+    the  Kernel Summit schedule will be synchronized with the other\r
+    tracks\r
+    \r
+    at  the  Plumber's  Conference,  and  it  will  be  open to all\r
+    registered\r
+    \r
+    Plumber's attendees.\r
+    \r
+    Linus  has  suggested  the  following ten people as the core of\r
+    the people\r
+    \r
+    he  would  like  invited  to the Maintainer's Summit, which was\r
+    calculated\r
+    \r
+    from statistics from his git tree.\r
+    \r
+    David Miller\r
+    \r
+    Dave Airlie\r
+    \r
+    Greg KH\r
+    \r
+    Arnd Bergmann\r
+    \r
+    Ingo Molnar\r
+    \r
+    Mauro Carvalho Chehab\r
+    \r
+    Takashi Iwai\r
+    \r
+    Thomas Gleixner\r
+    \r
+    Andrew Morton\r
+    \r
+    Olof Johansson\r
+    \r
+    As  we  did  last  year, there will be a mini-program committee\r
+    that will\r
+    \r
+    be pick enough names to bring the total number of 30 for the\r
+    \r
+    Maintainer's  Summit.  That  program  committee will consist of\r
+    Arnd\r
+    \r
+    Bergmann,  Thomas  Gleixner,  Greg  KH,  Paul McKenney, and Ted\r
+    Ts'o.\r
+    \r
+    We  will use the rest of names on the list generated by Linus's\r
+    script\r
+    \r
+    as  a  starting  point  of  people to be considered. People who\r
+    suggest\r
+    \r
+    topics  that  should  be  discussed  on the Maintainer's summit\r
+    will also\r
+    \r
+    be  added  to  the  list.  To  make  topic  suggestions for the\r
+    Maintainer's\r
+    \r
+    Summit,  please  send e-mail to the ksummit-discuss list with a\r
+    subject\r
+    \r
+    prefix of [MAINTAINERS SUMMIT].\r
+    \r
+    The  other job of the program committee will be to organize the\r
+    program\r
+    \r
+    for  the  Kernel  Summit.  The  goal of the Kernel Summit track\r
+    will be to\r
+    \r
+    provide  a  forum  to  discuss  specific  technical issues that\r
+    would be\r
+    \r
+    easier  to  resolve  in  person  than  over e-mail. The program\r
+    committee\r
+    \r
+    will  also  consider  "information  sharing" topics if they are\r
+    clearly of\r
+    \r
+    interest  to  the  wider  development community (i.e., advanced\r
+    training\r
+    \r
+    in topics that would be useful to kernel developers).\r
+    \r
+    To  suggest  a  topic  for  the  Kernel Summit, please tag your\r
+    e-mail with\r
+    \r
+    [TECH  TOPIC]. As before, please use a separate e-mail for each\r
+    topic,\r
+    \r
+    and send the topic suggestions to:\r
+    \r
+    ksummit-discuss@lists.linuxfoundation.org\r
+    \r
+    People  who  submit topic suggestions before September 21st and\r
+    which\r
+    \r
+    are  accepted,  will  be  given  a  free admission to the Linux\r
+    Plumbers\r
+    \r
+    Conference.\r
+    \r
+    We  will  reserving  roughly  half  the Kernel Summit slots for\r
+    last-minute\r
+    \r
+    discussions   that   will  be  scheduled  during  the  week  of\r
+    Plumber's, in an\r
+    \r
+    "unconference  style".  This  was extremely popular in Santa Fe\r
+    and in\r
+    \r
+    Prague,  since  it  allowed  ideas  that  came  up  in  hallway\r
+    discussions,\r
+    \r
+    and   in  Plumber's  Miniconference,  to  be  given  scheduled,\r
+    dedicated\r
+    \r
+    times for that discussion.\r
+    \r
+    If  you  were  not  subscribed on to the kernel-discuss mailing\r
+    list from\r
+    \r
+    last  year  (or  if  you  had removed yourself after the kernel\r
+    summit),\r
+    \r
+    you can subscribe to the discuss list using mailman:\r
+    \r
+    https://lists.linuxfoundation.org/mailman/listinfo/ksummi...\r
+    \r
+    \r
+    \r
+    [1] https://lwn.net/ml/linux-kernel/20180830213517.GA19110@thu-\r
+    nk.org\r
+\r
+\r
+\r
diff --git a/test/expected/LWN/0000764055.header b/test/expected/LWN/0000764055.header

new file mode 100644 (file)

index 0000000..0b2a9b6
--- /dev/null
+++ b/test/expected/LWN/0000764055.header
@@ -0,0 +1,10 @@
+0Topics sought for the Kernel and Maintainer Summits   null/LWN/0000764055             70\r
+i  [Kernel] Sep 3, 2018 19:07 UTC (Mon) (corbet)\r
+i\r
+i    The  annual  Maintainer  and  Kernel  Summits  will  be held in\r
+i    Vancouver,  BC  on  November 12  to 15, in conjunction with the\r
+i    Linux  Plumbers  Conference.  The  program committee is looking\r
+i    for  topics  for  both  summits;  read on for details on how to\r
+i    submit  ideas and, perhaps, get an invitation to the Maintainer\r
+i    Summit.\r
+i\r
diff --git a/test/expected/LWN/0000764055.header.html b/test/expected/LWN/0000764055.header.html

new file mode 100644 (file)

index 0000000..359a99c
--- /dev/null
+++ b/test/expected/LWN/0000764055.header.html
@@ -0,0 +1,20 @@
+<!DOCTYPE html>
+<html>
+<head>
+  <meta http-equiv='content-type' content='text/html; charset=utf-8'>
+  <meta name='viewport' content='width=device-width, initial-scale=1.0'>
+  <style type='text/css'>
+    body { margin: 1em 15%; }
+  </style>
+</head>
+<body>
+<div class='story-header'>
+       <h1><a href='0000764055.html'>Topics sought for the Kernel and Maintainer Summits</a></h1>
+       <div class='details'>([Kernel] Sep 3, 2018 19:07 UTC (Mon) (corbet))</div>
+       <br/>
+       <div class='content' style='text-align: justify'>
+               The annual Maintainer and Kernel Summits will be held in Vancouver, BC on November 12 to 15, in conjunction with the Linux Plumbers Conference. The program committee is looking for topics for both summits; read on for details on how to submit ideas and, perhaps, get an invitation to the Maintainer Summit.
+       </div>
+<hr/>
+</div>
+</body>
diff --git a/test/expected/LWN/0000764055.html b/test/expected/LWN/0000764055.html

new file mode 100644 (file)

index 0000000..14008c9
--- /dev/null
+++ b/test/expected/LWN/0000764055.html
@@ -0,0 +1,25 @@
+<!DOCTYPE html>
+<html>
+<head>
+  <meta http-equiv='content-type' content='text/html; charset=utf-8'>
+  <meta name='viewport' content='width=device-width, initial-scale=1.0'>
+  <style type='text/css'>
+    body { margin: 1em 15%; }
+  </style>
+</head>
+<body>
+<div class='story'>
+       <h1>Topics sought for the Kernel and Maintainer Summits</h1>
+       <div class='details'>([Kernel] Sep 3, 2018 19:07 UTC (Mon) (corbet))</div>
+       <br/>
+    <ul>
+        <li>News link: <a href='https://lwn.net/Articles/764055'>https://lwn.net/Articles/764055</a></li>
+        <li>Source link: <a href=''></a></li>
+    </ul>
+       <br/>
+       <div class='content' style='text-align: justify'>
+               The annual Maintainer and Kernel Summits will be held in Vancouver, BC on November 12 to 15, in conjunction with the Linux Plumbers Conference. The program committee is looking for topics for both summits; read on for details on how to submit ideas and, perhaps, get an invitation to the Maintainer Summit.<br/><br/>From :<br/><br/>&quot;Theodore Y. Ts&#x27;o&quot; &lt;tytso-AT-mit.edu&gt;<br/><br/>To :<br/><br/>linux-kernel-AT-vger.kernel.org, linux-fsdevel-AT-vger.kernel.org, linux-mm-AT-kvack.org, netdev-AT-vger.kernel.org, linux-block-AT-vger.kernel.org<br/><br/>Subject :<br/><br/>Maintainer / Kernel Summit 2018 planning kick-off<br/><br/>Date :<br/><br/>Thu, 30 Aug 2018 17:35:17 -0400<br/><br/>Message-ID :<br/><br/>&lt;20180830213517.GA19110@thunk.org&gt;<br/><br/>Archive-link :<br/><br/>[1]Article<br/><br/><h2>Feel free to forward this to other Linux kernel mailing lists as<br/><br/>appropriate -- Ted</h2><br/><br/>This year, the Maintainer and Kernel Summit will be in Vancouver,<br/><br/>B.C., November 12th -- 15th. The Maintainer&#x27;s summit will be held on<br/><br/>Monday, November 12th, in Vancouver, immediately before the Linux<br/><br/>Plumber&#x27;s Conference (LPC) November 13th -- 15th.<br/><br/>For the past few years, before 2017, we&#x27;ve scheduled mostly management<br/><br/>and development process issues on the first day. We then opened up<br/><br/>the second day of the Kernel Summit to all attendees of the conference<br/><br/>with which the Kernel Summit has been colocated, and called it the<br/><br/>&quot;Open Technical Day&quot;. This is something that just made sense in order<br/><br/>to assure that all of the necessary people needed to discuss a<br/><br/>particular technical issue could be in the room.<br/><br/>Starting last year in Prague, we took the next logical step, and split<br/><br/>the Kernel Summit in two. The &quot;Maintainer&#x27;s Summit&quot; is an<br/><br/>invite-only, half-day event, where the primary focus will be process<br/><br/>issues of Linux Kernel Development. It will be limited to 30 invitees<br/><br/>and a handful of sponsored attendees. This makes it smaller than the<br/><br/>first few kernel summits (which were limited to around 50 attendees).<br/><br/>The &quot;Kernel Summit&quot; is now organized as a track which is run in<br/><br/>parallel with the other tracks at the Linux Plumber&#x27;s Conference, and<br/><br/>is open to all registered attendees of Plumbers. Much as how we<br/><br/>organized the Kernel Summit &quot;open technical day&quot; in 2016 in Santa Fe,<br/><br/>the Kernel Summit schedule will be synchronized with the other tracks<br/><br/>at the Plumber&#x27;s Conference, and it will be open to all registered<br/><br/>Plumber&#x27;s attendees.<br/><br/>Linus has suggested the following ten people as the core of the people<br/><br/>he would like invited to the Maintainer&#x27;s Summit, which was calculated<br/><br/>from statistics from his git tree.<br/><br/>David Miller<br/><br/>Dave Airlie<br/><br/>Greg KH<br/><br/>Arnd Bergmann<br/><br/>Ingo Molnar<br/><br/>Mauro Carvalho Chehab<br/><br/>Takashi Iwai<br/><br/>Thomas Gleixner<br/><br/>Andrew Morton<br/><br/>Olof Johansson<br/><br/>As we did last year, there will be a mini-program committee that will<br/><br/>be pick enough names to bring the total number of 30 for the<br/><br/>Maintainer&#x27;s Summit. That program committee will consist of Arnd<br/><br/>Bergmann, Thomas Gleixner, Greg KH, Paul McKenney, and Ted Ts&#x27;o.<br/><br/>We will use the rest of names on the list generated by Linus&#x27;s script<br/><br/>as a starting point of people to be considered. People who suggest<br/><br/>topics that should be discussed on the Maintainer&#x27;s summit will also<br/><br/>be added to the list. To make topic suggestions for the Maintainer&#x27;s<br/><br/>Summit, please send e-mail to the ksummit-discuss list with a subject<br/><br/>prefix of [MAINTAINERS SUMMIT].<br/><br/>The other job of the program committee will be to organize the program<br/><br/>for the Kernel Summit. The goal of the Kernel Summit track will be to<br/><br/>provide a forum to discuss specific technical issues that would be<br/><br/>easier to resolve in person than over e-mail. The program committee<br/><br/>will also consider &quot;information sharing&quot; topics if they are clearly of<br/><br/>interest to the wider development community (i.e., advanced training<br/><br/>in topics that would be useful to kernel developers).<br/><br/>To suggest a topic for the Kernel Summit, please tag your e-mail with<br/><br/>[TECH TOPIC]. As before, please use a separate e-mail for each topic,<br/><br/>and send the topic suggestions to:<br/><br/>ksummit-discuss@lists.linuxfoundation.org<br/><br/>People who submit topic suggestions before September 21st and which<br/><br/>are accepted, will be given a free admission to the Linux Plumbers<br/><br/>Conference.<br/><br/>We will reserving roughly half the Kernel Summit slots for last-minute<br/><br/>discussions that will be scheduled during the week of Plumber&#x27;s, in an<br/><br/>&quot;unconference style&quot;. This was extremely popular in Santa Fe and in<br/><br/>Prague, since it allowed ideas that came up in hallway discussions,<br/><br/>and in Plumber&#x27;s Miniconference, to be given scheduled, dedicated<br/><br/>times for that discussion.<br/><br/>If you were not subscribed on to the kernel-discuss mailing list from<br/><br/>last year (or if you had removed yourself after the kernel summit),<br/><br/>you can subscribe to the discuss list using mailman:<br/><br/>https://lists.linuxfoundation.org/mailman/listinfo/ksummi...<br/><br/><br/><br/>[1] https://lwn.net/ml/linux-kernel/20180830213517.GA19110@thunk.org
+       </div>
+<hr/>
+</div>
+</body>
diff --git a/test/expected/LWN/0000764057 b/test/expected/LWN/0000764057

new file mode 100644 (file)

index 0000000..58a24b1
--- /dev/null
+++ b/test/expected/LWN/0000764057
@@ -0,0 +1,13 @@
+    [$] IDA: SIMPLIFYING THE COMPLEX TASK OF ALLOCATING INTEGERS   \r
+\r
+  [Kernel] Sep 4, 2018 0:15 UTC (Tue) (corbet)\r
+\r
+  o News link: https://lwn.net/Articles/764057\r
+  o Source link: \r
+\r
+\r
+    [$]   Sorry,   this  article  is  currently  available  to  LWN\r
+    suscribers only [https://lwn.net/subscribe/].\r
+\r
+\r
+\r
diff --git a/test/expected/LWN/0000764057.header b/test/expected/LWN/0000764057.header

new file mode 100644 (file)

index 0000000..eb103f9
--- /dev/null
+++ b/test/expected/LWN/0000764057.header
@@ -0,0 +1,16 @@
+0[$] IDA: simplifying the complex task of allocating integers  null/LWN/0000764057             70\r
+i  [Kernel] Sep 4, 2018 0:15 UTC (Tue) (corbet)\r
+i\r
+i    It  is  common  for kernel code to generate unique integers for\r
+i    identifiers.  When  one plugs in a flash drive, it will show up\r
+i    as  /dev/sdN;  that  N (a letter derived from a number) must be\r
+i    generated  in  the  kernel, and it should not already be in use\r
+i    for  another  drive or unpleasant things will happen. One might\r
+i    think  that  generating  such  numbers would not be a difficult\r
+i    task,  but  that  turns  out  not to be the case, especially in\r
+i    situations  where  many  numbers  must be tracked. The IDA (for\r
+i    "ID  allocator", perhaps) API exists to handle this specialized\r
+i    task.  In  past  kernels, it has managed to make the process of\r
+i    getting  an unused number surprisingly complex; the 4.19 kernel\r
+i    has a new IDA API that simplifies things considerably.\r
+i\r
diff --git a/test/expected/LWN/0000764057.header.html b/test/expected/LWN/0000764057.header.html

new file mode 100644 (file)

index 0000000..80f1739
--- /dev/null
+++ b/test/expected/LWN/0000764057.header.html
@@ -0,0 +1,20 @@
+<!DOCTYPE html>
+<html>
+<head>
+  <meta http-equiv='content-type' content='text/html; charset=utf-8'>
+  <meta name='viewport' content='width=device-width, initial-scale=1.0'>
+  <style type='text/css'>
+    body { margin: 1em 15%; }
+  </style>
+</head>
+<body>
+<div class='story-header'>
+       <h1><a href='0000764057.html'>[$] IDA: simplifying the complex task of allocating integers</a></h1>
+       <div class='details'>([Kernel] Sep 4, 2018 0:15 UTC (Tue) (corbet))</div>
+       <br/>
+       <div class='content' style='text-align: justify'>
+               It is common for kernel code to generate unique integers for identifiers. When one plugs in a flash drive, it will show up as /dev/sdN; that N (a letter derived from a number) must be generated in the kernel, and it should not already be in use for another drive or unpleasant things will happen. One might think that generating such numbers would not be a difficult task, but that turns out not to be the case, especially in situations where many numbers must be tracked. The IDA (for &quot;ID allocator&quot;, perhaps) API exists to handle this specialized task. In past kernels, it has managed to make the process of getting an unused number surprisingly complex; the 4.19 kernel has a new IDA API that simplifies things considerably.
+       </div>
+<hr/>
+</div>
+</body>
diff --git a/test/expected/LWN/0000764057.html b/test/expected/LWN/0000764057.html

new file mode 100644 (file)

index 0000000..cba93c8
--- /dev/null
+++ b/test/expected/LWN/0000764057.html
@@ -0,0 +1,25 @@
+<!DOCTYPE html>
+<html>
+<head>
+  <meta http-equiv='content-type' content='text/html; charset=utf-8'>
+  <meta name='viewport' content='width=device-width, initial-scale=1.0'>
+  <style type='text/css'>
+    body { margin: 1em 15%; }
+  </style>
+</head>
+<body>
+<div class='story'>
+       <h1>[$] IDA: simplifying the complex task of allocating integers</h1>
+       <div class='details'>([Kernel] Sep 4, 2018 0:15 UTC (Tue) (corbet))</div>
+       <br/>
+    <ul>
+        <li>News link: <a href='https://lwn.net/Articles/764057'>https://lwn.net/Articles/764057</a></li>
+        <li>Source link: <a href=''></a></li>
+    </ul>
+       <br/>
+       <div class='content' style='text-align: justify'>
+               [$] Sorry, this article is currently available to LWN suscribers only [https://lwn.net/subscribe/].
+       </div>
+<hr/>
+</div>
+</body>
diff --git a/test/expected/LWN/0000764130 b/test/expected/LWN/0000764130

new file mode 100644 (file)

index 0000000..a01bd05
--- /dev/null
+++ b/test/expected/LWN/0000764130
@@ -0,0 +1,263 @@
+                    SECURITY UPDATES FOR TUESDAY                   \r
+\r
+  [Security] Sep 4, 2018 15:14 UTC (Tue) (ris)\r
+\r
+  o News link: https://lwn.net/Articles/764130\r
+  o Source link: \r
+\r
+\r
+    Dist.\r
+    \r
+    ID\r
+    \r
+    Release\r
+    \r
+    Package\r
+    \r
+    Date\r
+    \r
+    openSUSE\r
+    \r
+    [1]openSUSE-SU-2018:2600-1\r
+    \r
+    15.0\r
+    \r
+    ImageMagick\r
+    \r
+    2018-09-04\r
+    \r
+    openSUSE\r
+    \r
+    [2]openSUSE-SU-2018:2597-1\r
+    \r
+    42.3\r
+    \r
+    libressl\r
+    \r
+    2018-09-04\r
+    \r
+    openSUSE\r
+    \r
+    [3]openSUSE-SU-2018:2599-1\r
+    \r
+    15.0\r
+    \r
+    postgresql10\r
+    \r
+    2018-09-04\r
+    \r
+    openSUSE\r
+    \r
+    [4]openSUSE-SU-2018:2598-1\r
+    \r
+    15.0\r
+    \r
+    spice\r
+    \r
+    2018-09-04\r
+    \r
+    openSUSE\r
+    \r
+    [5]openSUSE-SU-2018:2602-1\r
+    \r
+    42.3\r
+    \r
+    spice\r
+    \r
+    2018-09-04\r
+    \r
+    openSUSE\r
+    \r
+    [6]openSUSE-SU-2018:2601-1\r
+    \r
+    42.3\r
+    \r
+    spice-gtk\r
+    \r
+    2018-09-04\r
+    \r
+    Red Hat\r
+    \r
+    [7]RHSA-2018:2616-01\r
+    \r
+    EL7\r
+    \r
+    RHGS WA\r
+    \r
+    2018-09-04\r
+    \r
+    Red Hat\r
+    \r
+    [8]RHSA-2018:2608-01\r
+    \r
+    EL6\r
+    \r
+    Red Hat Gluster Storage\r
+    \r
+    2018-09-04\r
+    \r
+    Red Hat\r
+    \r
+    [9]RHSA-2018:2607-01\r
+    \r
+    EL7\r
+    \r
+    Red Hat Gluster Storage\r
+    \r
+    2018-09-04\r
+    \r
+    Red Hat\r
+    \r
+    [10]RHSA-2018:2626-01\r
+    \r
+    EL7\r
+    \r
+    Red Hat Virtualization\r
+    \r
+    2018-09-04\r
+    \r
+    Red Hat\r
+    \r
+    [11]RHSA-2018:2615-01\r
+    \r
+    EL7\r
+    \r
+    collectd\r
+    \r
+    2018-09-04\r
+    \r
+    Red Hat\r
+    \r
+    [12]RHSA-2018:2645-01\r
+    \r
+    EL6.7\r
+    \r
+    kernel\r
+    \r
+    2018-09-04\r
+    \r
+    Red Hat\r
+    \r
+    [13]RHSA-2018:2643-01\r
+    \r
+    EL7\r
+    \r
+    rhvm-appliance\r
+    \r
+    2018-09-04\r
+    \r
+    Red Hat\r
+    \r
+    [14]RHSA-2018:2612-01\r
+    \r
+    EL6\r
+    \r
+    samba\r
+    \r
+    2018-09-04\r
+    \r
+    Red Hat\r
+    \r
+    [15]RHSA-2018:2613-01\r
+    \r
+    EL7\r
+    \r
+    samba\r
+    \r
+    2018-09-04\r
+    \r
+    SUSE\r
+    \r
+    [16]SUSE-SU-2018:2603-1\r
+    \r
+    OS7\r
+    \r
+    crowbar,     crowbar-core,    crowbar-ha,    crowbar-openstack,\r
+    crowbar-ui\r
+    \r
+    2018-09-04\r
+    \r
+    SUSE\r
+    \r
+    [17]SUSE-SU-2018:2596-1\r
+    \r
+    SLE12\r
+    \r
+    kernel\r
+    \r
+    2018-09-03\r
+    \r
+    SUSE\r
+    \r
+    [18]SUSE-SU-2018:2595-1\r
+    \r
+    SLE12\r
+    \r
+    spice\r
+    \r
+    2018-09-03\r
+    \r
+    SUSE\r
+    \r
+    [19]SUSE-SU-2018:2594-1\r
+    \r
+    SLE12\r
+    \r
+    spice-gtk\r
+    \r
+    2018-09-03\r
+    \r
+    SUSE\r
+    \r
+    [20]SUSE-SU-2018:2593-1\r
+    \r
+    SLE12\r
+    \r
+    spice-gtk\r
+    \r
+    2018-09-03\r
+    \r
+    \r
+    \r
+    [1] https://lwn.net/Articles/764119/\r
+    \r
+    [2] https://lwn.net/Articles/764120/\r
+    \r
+    [3] https://lwn.net/Articles/764121/\r
+    \r
+    [4] https://lwn.net/Articles/764122/\r
+    \r
+    [5] https://lwn.net/Articles/764123/\r
+    \r
+    [6] https://lwn.net/Articles/764124/\r
+    \r
+    [7] https://lwn.net/Articles/764115/\r
+    \r
+    [8] https://lwn.net/Articles/764113/\r
+    \r
+    [9] https://lwn.net/Articles/764112/\r
+    \r
+    [10] https://lwn.net/Articles/764114/\r
+    \r
+    [11] https://lwn.net/Articles/764110/\r
+    \r
+    [12] https://lwn.net/Articles/764111/\r
+    \r
+    [13] https://lwn.net/Articles/764116/\r
+    \r
+    [14] https://lwn.net/Articles/764117/\r
+    \r
+    [15] https://lwn.net/Articles/764118/\r
+    \r
+    [16] https://lwn.net/Articles/764125/\r
+    \r
+    [17] https://lwn.net/Articles/764126/\r
+    \r
+    [18] https://lwn.net/Articles/764127/\r
+    \r
+    [19] https://lwn.net/Articles/764129/\r
+    \r
+    [20] https://lwn.net/Articles/764128/\r
+\r
+\r
+\r
diff --git a/test/expected/LWN/0000764130.header b/test/expected/LWN/0000764130.header

new file mode 100644 (file)

index 0000000..af3b6b3
--- /dev/null
+++ b/test/expected/LWN/0000764130.header
@@ -0,0 +1,10 @@
+0Security updates for Tuesday  null/LWN/0000764130             70\r
+i  [Security] Sep 4, 2018 15:14 UTC (Tue) (ris)\r
+i\r
+i    Security  updates  have  been  issued by openSUSE (ImageMagick,\r
+i    libressl,   postgresql10,   spice,   and  spice-gtk),  Red  Hat\r
+i    (collectd,   kernel,   Red   Hat   Gluster   Storage,  Red  Hat\r
+i    Virtualization,  RHGS  WA, rhvm-appliance, and samba), and SUSE\r
+i    (crowbar,    crowbar-core,    crowbar-ha,    crowbar-openstack,\r
+i    crowbar-ui, kernel, spice, and spice-gtk).\r
+i\r
diff --git a/test/expected/LWN/0000764130.header.html b/test/expected/LWN/0000764130.header.html

new file mode 100644 (file)

index 0000000..792d886
--- /dev/null
+++ b/test/expected/LWN/0000764130.header.html
@@ -0,0 +1,20 @@
+<!DOCTYPE html>
+<html>
+<head>
+  <meta http-equiv='content-type' content='text/html; charset=utf-8'>
+  <meta name='viewport' content='width=device-width, initial-scale=1.0'>
+  <style type='text/css'>
+    body { margin: 1em 15%; }
+  </style>
+</head>
+<body>
+<div class='story-header'>
+       <h1><a href='0000764130.html'>Security updates for Tuesday</a></h1>
+       <div class='details'>([Security] Sep 4, 2018 15:14 UTC (Tue) (ris))</div>
+       <br/>
+       <div class='content' style='text-align: justify'>
+               Security updates have been issued by openSUSE (ImageMagick, libressl, postgresql10, spice, and spice-gtk), Red Hat (collectd, kernel, Red Hat Gluster Storage, Red Hat Virtualization, RHGS WA, rhvm-appliance, and samba), and SUSE (crowbar, crowbar-core, crowbar-ha, crowbar-openstack, crowbar-ui, kernel, spice, and spice-gtk).
+       </div>
+<hr/>
+</div>
+</body>
diff --git a/test/expected/LWN/0000764130.html b/test/expected/LWN/0000764130.html

new file mode 100644 (file)

index 0000000..cd24a80
--- /dev/null
+++ b/test/expected/LWN/0000764130.html
@@ -0,0 +1,25 @@
+<!DOCTYPE html>
+<html>
+<head>
+  <meta http-equiv='content-type' content='text/html; charset=utf-8'>
+  <meta name='viewport' content='width=device-width, initial-scale=1.0'>
+  <style type='text/css'>
+    body { margin: 1em 15%; }
+  </style>
+</head>
+<body>
+<div class='story'>
+       <h1>Security updates for Tuesday</h1>
+       <div class='details'>([Security] Sep 4, 2018 15:14 UTC (Tue) (ris))</div>
+       <br/>
+    <ul>
+        <li>News link: <a href='https://lwn.net/Articles/764130'>https://lwn.net/Articles/764130</a></li>
+        <li>Source link: <a href=''></a></li>
+    </ul>
+       <br/>
+       <div class='content' style='text-align: justify'>
+               Dist.<br/><br/>ID<br/><br/>Release<br/><br/>Package<br/><br/>Date<br/><br/>openSUSE<br/><br/>[1]openSUSE-SU-2018:2600-1<br/><br/>15.0<br/><br/>ImageMagick<br/><br/>2018-09-04<br/><br/>openSUSE<br/><br/>[2]openSUSE-SU-2018:2597-1<br/><br/>42.3<br/><br/>libressl<br/><br/>2018-09-04<br/><br/>openSUSE<br/><br/>[3]openSUSE-SU-2018:2599-1<br/><br/>15.0<br/><br/>postgresql10<br/><br/>2018-09-04<br/><br/>openSUSE<br/><br/>[4]openSUSE-SU-2018:2598-1<br/><br/>15.0<br/><br/>spice<br/><br/>2018-09-04<br/><br/>openSUSE<br/><br/>[5]openSUSE-SU-2018:2602-1<br/><br/>42.3<br/><br/>spice<br/><br/>2018-09-04<br/><br/>openSUSE<br/><br/>[6]openSUSE-SU-2018:2601-1<br/><br/>42.3<br/><br/>spice-gtk<br/><br/>2018-09-04<br/><br/>Red Hat<br/><br/>[7]RHSA-2018:2616-01<br/><br/>EL7<br/><br/>RHGS WA<br/><br/>2018-09-04<br/><br/>Red Hat<br/><br/>[8]RHSA-2018:2608-01<br/><br/>EL6<br/><br/>Red Hat Gluster Storage<br/><br/>2018-09-04<br/><br/>Red Hat<br/><br/>[9]RHSA-2018:2607-01<br/><br/>EL7<br/><br/>Red Hat Gluster Storage<br/><br/>2018-09-04<br/><br/>Red Hat<br/><br/>[10]RHSA-2018:2626-01<br/><br/>EL7<br/><br/>Red Hat Virtualization<br/><br/>2018-09-04<br/><br/>Red Hat<br/><br/>[11]RHSA-2018:2615-01<br/><br/>EL7<br/><br/>collectd<br/><br/>2018-09-04<br/><br/>Red Hat<br/><br/>[12]RHSA-2018:2645-01<br/><br/>EL6.7<br/><br/>kernel<br/><br/>2018-09-04<br/><br/>Red Hat<br/><br/>[13]RHSA-2018:2643-01<br/><br/>EL7<br/><br/>rhvm-appliance<br/><br/>2018-09-04<br/><br/>Red Hat<br/><br/>[14]RHSA-2018:2612-01<br/><br/>EL6<br/><br/>samba<br/><br/>2018-09-04<br/><br/>Red Hat<br/><br/>[15]RHSA-2018:2613-01<br/><br/>EL7<br/><br/>samba<br/><br/>2018-09-04<br/><br/>SUSE<br/><br/>[16]SUSE-SU-2018:2603-1<br/><br/>OS7<br/><br/>crowbar, crowbar-core, crowbar-ha, crowbar-openstack, crowbar-ui<br/><br/>2018-09-04<br/><br/>SUSE<br/><br/>[17]SUSE-SU-2018:2596-1<br/><br/>SLE12<br/><br/>kernel<br/><br/>2018-09-03<br/><br/>SUSE<br/><br/>[18]SUSE-SU-2018:2595-1<br/><br/>SLE12<br/><br/>spice<br/><br/>2018-09-03<br/><br/>SUSE<br/><br/>[19]SUSE-SU-2018:2594-1<br/><br/>SLE12<br/><br/>spice-gtk<br/><br/>2018-09-03<br/><br/>SUSE<br/><br/>[20]SUSE-SU-2018:2593-1<br/><br/>SLE12<br/><br/>spice-gtk<br/><br/>2018-09-03<br/><br/><br/><br/>[1] https://lwn.net/Articles/764119/<br/><br/>[2] https://lwn.net/Articles/764120/<br/><br/>[3] https://lwn.net/Articles/764121/<br/><br/>[4] https://lwn.net/Articles/764122/<br/><br/>[5] https://lwn.net/Articles/764123/<br/><br/>[6] https://lwn.net/Articles/764124/<br/><br/>[7] https://lwn.net/Articles/764115/<br/><br/>[8] https://lwn.net/Articles/764113/<br/><br/>[9] https://lwn.net/Articles/764112/<br/><br/>[10] https://lwn.net/Articles/764114/<br/><br/>[11] https://lwn.net/Articles/764110/<br/><br/>[12] https://lwn.net/Articles/764111/<br/><br/>[13] https://lwn.net/Articles/764116/<br/><br/>[14] https://lwn.net/Articles/764117/<br/><br/>[15] https://lwn.net/Articles/764118/<br/><br/>[16] https://lwn.net/Articles/764125/<br/><br/>[17] https://lwn.net/Articles/764126/<br/><br/>[18] https://lwn.net/Articles/764127/<br/><br/>[19] https://lwn.net/Articles/764129/<br/><br/>[20] https://lwn.net/Articles/764128/
+       </div>
+<hr/>
+</div>
+</body>
diff --git a/test/expected/LWN/0000764131 b/test/expected/LWN/0000764131

new file mode 100644 (file)

index 0000000..94161c5
--- /dev/null
+++ b/test/expected/LWN/0000764131
@@ -0,0 +1,13 @@
+            [$] LEARNING ABOUT GO INTERNALS AT GOPHERCON           \r
+\r
+  [Front] Sep 5, 2018 19:20 UTC (Wed) (jake)\r
+\r
+  o News link: https://lwn.net/Articles/764131\r
+  o Source link: \r
+\r
+\r
+    [$]   Sorry,   this  article  is  currently  available  to  LWN\r
+    suscribers only [https://lwn.net/subscribe/].\r
+\r
+\r
+\r
diff --git a/test/expected/LWN/0000764131.header b/test/expected/LWN/0000764131.header

new file mode 100644 (file)

index 0000000..402f3cd
--- /dev/null
+++ b/test/expected/LWN/0000764131.header
@@ -0,0 +1,14 @@
+0[$] Learning about Go internals at GopherCon  null/LWN/0000764131             70\r
+i  [Front] Sep 5, 2018 19:20 UTC (Wed) (jake)\r
+i\r
+i    GopherCon   is  the  major  conference  for  the  Go  language,\r
+i    attended  by  1600  dedicated  "gophers", as the members of its\r
+i    community  like  to  call  themselves.  Held  for the last five\r
+i    years   in   Denver,   it   attracts  programmers,  open-source\r
+i    contributors,  and  technical  managers  from  all  over  North\r
+i    America  and the world. GopherCon's highly-technical program is\r
+i    an  intense  mix  of  Go internals and programming tutorials, a\r
+i    few  of  which we will explore in this article. Subscribers can\r
+i    read  on  for  a  report  from  GopherCon  by guest author Josh\r
+i    Berkus.\r
+i\r
diff --git a/test/expected/LWN/0000764131.header.html b/test/expected/LWN/0000764131.header.html

new file mode 100644 (file)

index 0000000..763ec98
--- /dev/null
+++ b/test/expected/LWN/0000764131.header.html
@@ -0,0 +1,20 @@
+<!DOCTYPE html>
+<html>
+<head>
+  <meta http-equiv='content-type' content='text/html; charset=utf-8'>
+  <meta name='viewport' content='width=device-width, initial-scale=1.0'>
+  <style type='text/css'>
+    body { margin: 1em 15%; }
+  </style>
+</head>
+<body>
+<div class='story-header'>
+       <h1><a href='0000764131.html'>[$] Learning about Go internals at GopherCon</a></h1>
+       <div class='details'>([Front] Sep 5, 2018 19:20 UTC (Wed) (jake))</div>
+       <br/>
+       <div class='content' style='text-align: justify'>
+               GopherCon is the major conference for the Go language, attended by 1600 dedicated &quot;gophers&quot;, as the members of its community like to call themselves. Held for the last five years in Denver, it attracts programmers, open-source contributors, and technical managers from all over North America and the world. GopherCon&#x27;s highly-technical program is an intense mix of Go internals and programming tutorials, a few of which we will explore in this article. Subscribers can read on for a report from GopherCon by guest author Josh Berkus.
+       </div>
+<hr/>
+</div>
+</body>
diff --git a/test/expected/LWN/0000764131.html b/test/expected/LWN/0000764131.html

new file mode 100644 (file)

index 0000000..ca9872f
--- /dev/null
+++ b/test/expected/LWN/0000764131.html
@@ -0,0 +1,25 @@
+<!DOCTYPE html>
+<html>
+<head>
+  <meta http-equiv='content-type' content='text/html; charset=utf-8'>
+  <meta name='viewport' content='width=device-width, initial-scale=1.0'>
+  <style type='text/css'>
+    body { margin: 1em 15%; }
+  </style>
+</head>
+<body>
+<div class='story'>
+       <h1>[$] Learning about Go internals at GopherCon</h1>
+       <div class='details'>([Front] Sep 5, 2018 19:20 UTC (Wed) (jake))</div>
+       <br/>
+    <ul>
+        <li>News link: <a href='https://lwn.net/Articles/764131'>https://lwn.net/Articles/764131</a></li>
+        <li>Source link: <a href=''></a></li>
+    </ul>
+       <br/>
+       <div class='content' style='text-align: justify'>
+               [$] Sorry, this article is currently available to LWN suscribers only [https://lwn.net/subscribe/].
+       </div>
+<hr/>
+</div>
+</body>
diff --git a/test/expected/LWN/0000764182 b/test/expected/LWN/0000764182

new file mode 100644 (file)

index 0000000..4a44393
--- /dev/null
+++ b/test/expected/LWN/0000764182
@@ -0,0 +1,92 @@
+                   SECURITY UPDATES FOR WEDNESDAY                  \r
+\r
+  [Security] Sep 5, 2018 15:01 UTC (Wed) (ris)\r
+\r
+  o News link: https://lwn.net/Articles/764182\r
+  o Source link: \r
+\r
+\r
+    Dist.\r
+    \r
+    ID\r
+    \r
+    Release\r
+    \r
+    Package\r
+    \r
+    Date\r
+    \r
+    Debian\r
+    \r
+    [1]DSA-4284-1\r
+    \r
+    stable\r
+    \r
+    lcms2\r
+    \r
+    2018-09-04\r
+    \r
+    openSUSE\r
+    \r
+    [2]openSUSE-SU-2018:2623-1\r
+    \r
+    42.3\r
+    \r
+    yubico-piv-tool\r
+    \r
+    2018-09-05\r
+    \r
+    Oracle\r
+    \r
+    [3]ELSA-2018-4208\r
+    \r
+    OL6\r
+    \r
+    kernel\r
+    \r
+    2018-09-04\r
+    \r
+    Oracle\r
+    \r
+    [4]ELSA-2018-4208\r
+    \r
+    OL7\r
+    \r
+    kernel\r
+    \r
+    2018-09-04\r
+    \r
+    SUSE\r
+    \r
+    [5]SUSE-SU-2018:2608-1\r
+    \r
+    cobbler\r
+    \r
+    2018-09-04\r
+    \r
+    SUSE\r
+    \r
+    [6]SUSE-SU-2018:2615-1\r
+    \r
+    SLE11\r
+    \r
+    kvm\r
+    \r
+    2018-09-05\r
+    \r
+    \r
+    \r
+    [1] https://lwn.net/Articles/764176/\r
+    \r
+    [2] https://lwn.net/Articles/764177/\r
+    \r
+    [3] https://lwn.net/Articles/764178/\r
+    \r
+    [4] https://lwn.net/Articles/764179/\r
+    \r
+    [5] https://lwn.net/Articles/764180/\r
+    \r
+    [6] https://lwn.net/Articles/764181/\r
+\r
+\r
+\r
diff --git a/test/expected/LWN/0000764182.header b/test/expected/LWN/0000764182.header

new file mode 100644 (file)

index 0000000..feedb1b
--- /dev/null
+++ b/test/expected/LWN/0000764182.header
@@ -0,0 +1,7 @@
+0Security updates for Wednesday        null/LWN/0000764182             70\r
+i  [Security] Sep 5, 2018 15:01 UTC (Wed) (ris)\r
+i\r
+i    Security  updates  have been issued by Debian (lcms2), openSUSE\r
+i    (yubico-piv-tool),  Oracle  (kernel),  and  SUSE  (cobbler  and\r
+i    kvm).\r
+i\r
diff --git a/test/expected/LWN/0000764182.header.html b/test/expected/LWN/0000764182.header.html

new file mode 100644 (file)

index 0000000..4958638
--- /dev/null
+++ b/test/expected/LWN/0000764182.header.html
@@ -0,0 +1,20 @@
+<!DOCTYPE html>
+<html>
+<head>
+  <meta http-equiv='content-type' content='text/html; charset=utf-8'>
+  <meta name='viewport' content='width=device-width, initial-scale=1.0'>
+  <style type='text/css'>
+    body { margin: 1em 15%; }
+  </style>
+</head>
+<body>
+<div class='story-header'>
+       <h1><a href='0000764182.html'>Security updates for Wednesday</a></h1>
+       <div class='details'>([Security] Sep 5, 2018 15:01 UTC (Wed) (ris))</div>
+       <br/>
+       <div class='content' style='text-align: justify'>
+               Security updates have been issued by Debian (lcms2), openSUSE (yubico-piv-tool), Oracle (kernel), and SUSE (cobbler and kvm).
+       </div>
+<hr/>
+</div>
+</body>
diff --git a/test/expected/LWN/0000764182.html b/test/expected/LWN/0000764182.html

new file mode 100644 (file)

index 0000000..76b7e83
--- /dev/null
+++ b/test/expected/LWN/0000764182.html
@@ -0,0 +1,25 @@
+<!DOCTYPE html>
+<html>
+<head>
+  <meta http-equiv='content-type' content='text/html; charset=utf-8'>
+  <meta name='viewport' content='width=device-width, initial-scale=1.0'>
+  <style type='text/css'>
+    body { margin: 1em 15%; }
+  </style>
+</head>
+<body>
+<div class='story'>
+       <h1>Security updates for Wednesday</h1>
+       <div class='details'>([Security] Sep 5, 2018 15:01 UTC (Wed) (ris))</div>
+       <br/>
+    <ul>
+        <li>News link: <a href='https://lwn.net/Articles/764182'>https://lwn.net/Articles/764182</a></li>
+        <li>Source link: <a href=''></a></li>
+    </ul>
+       <br/>
+       <div class='content' style='text-align: justify'>
+               Dist.<br/><br/>ID<br/><br/>Release<br/><br/>Package<br/><br/>Date<br/><br/>Debian<br/><br/>[1]DSA-4284-1<br/><br/>stable<br/><br/>lcms2<br/><br/>2018-09-04<br/><br/>openSUSE<br/><br/>[2]openSUSE-SU-2018:2623-1<br/><br/>42.3<br/><br/>yubico-piv-tool<br/><br/>2018-09-05<br/><br/>Oracle<br/><br/>[3]ELSA-2018-4208<br/><br/>OL6<br/><br/>kernel<br/><br/>2018-09-04<br/><br/>Oracle<br/><br/>[4]ELSA-2018-4208<br/><br/>OL7<br/><br/>kernel<br/><br/>2018-09-04<br/><br/>SUSE<br/><br/>[5]SUSE-SU-2018:2608-1<br/><br/>cobbler<br/><br/>2018-09-04<br/><br/>SUSE<br/><br/>[6]SUSE-SU-2018:2615-1<br/><br/>SLE11<br/><br/>kvm<br/><br/>2018-09-05<br/><br/><br/><br/>[1] https://lwn.net/Articles/764176/<br/><br/>[2] https://lwn.net/Articles/764177/<br/><br/>[3] https://lwn.net/Articles/764178/<br/><br/>[4] https://lwn.net/Articles/764179/<br/><br/>[5] https://lwn.net/Articles/764180/<br/><br/>[6] https://lwn.net/Articles/764181/
+       </div>
+<hr/>
+</div>
+</body>
diff --git a/test/expected/LWN/0000764184 b/test/expected/LWN/0000764184

new file mode 100644 (file)

index 0000000..ff6288f
--- /dev/null
+++ b/test/expected/LWN/0000764184
@@ -0,0 +1,26 @@
+                       A SET OF STABLE KERNELS                     \r
+\r
+  [Kernel] Sep 5, 2018 15:15 UTC (Wed) (ris)\r
+\r
+  o News link: https://lwn.net/Articles/764184/\r
+  o Source link: \r
+\r
+\r
+    Greg  Kroah-Hartman  has  released  stable  kernels [1]4.18.6 ,\r
+    [2]4.14.68  ,  [3]4.9.125 , [4]4.4.154 , and [5]3.18.121 . They\r
+    all contain important fixes and users should upgrade.\r
+    \r
+    \r
+    \r
+    [1] https://lwn.net/Articles/764185/\r
+    \r
+    [2] https://lwn.net/Articles/764186/\r
+    \r
+    [3] https://lwn.net/Articles/764187/\r
+    \r
+    [4] https://lwn.net/Articles/764188/\r
+    \r
+    [5] https://lwn.net/Articles/764189/\r
+\r
+\r
+\r
diff --git a/test/expected/LWN/0000764184.header b/test/expected/LWN/0000764184.header

new file mode 100644 (file)

index 0000000..91c215e
--- /dev/null
+++ b/test/expected/LWN/0000764184.header
@@ -0,0 +1,7 @@
+0A set of stable kernels       null/LWN/0000764184             70\r
+i  [Kernel] Sep 5, 2018 15:15 UTC (Wed) (ris)\r
+i\r
+i    Greg   Kroah-Hartman   has   released  stable  kernels  4.18.6,\r
+i    4.14.68,  4.9.125,  4.4.154,  and  3.18.121.  They  all contain\r
+i    important fixes and users should upgrade.\r
+i\r
diff --git a/test/expected/LWN/0000764184.header.html b/test/expected/LWN/0000764184.header.html

new file mode 100644 (file)

index 0000000..7c47529
--- /dev/null
+++ b/test/expected/LWN/0000764184.header.html
@@ -0,0 +1,20 @@
+<!DOCTYPE html>
+<html>
+<head>
+  <meta http-equiv='content-type' content='text/html; charset=utf-8'>
+  <meta name='viewport' content='width=device-width, initial-scale=1.0'>
+  <style type='text/css'>
+    body { margin: 1em 15%; }
+  </style>
+</head>
+<body>
+<div class='story-header'>
+       <h1><a href='0000764184.html'>A set of stable kernels</a></h1>
+       <div class='details'>([Kernel] Sep 5, 2018 15:15 UTC (Wed) (ris))</div>
+       <br/>
+       <div class='content' style='text-align: justify'>
+               Greg Kroah-Hartman has released stable kernels 4.18.6, 4.14.68, 4.9.125, 4.4.154, and 3.18.121. They all contain important fixes and users should upgrade.
+       </div>
+<hr/>
+</div>
+</body>
diff --git a/test/expected/LWN/0000764184.html b/test/expected/LWN/0000764184.html

new file mode 100644 (file)

index 0000000..94aed84
--- /dev/null
+++ b/test/expected/LWN/0000764184.html
@@ -0,0 +1,25 @@
+<!DOCTYPE html>
+<html>
+<head>
+  <meta http-equiv='content-type' content='text/html; charset=utf-8'>
+  <meta name='viewport' content='width=device-width, initial-scale=1.0'>
+  <style type='text/css'>
+    body { margin: 1em 15%; }
+  </style>
+</head>
+<body>
+<div class='story'>
+       <h1>A set of stable kernels</h1>
+       <div class='details'>([Kernel] Sep 5, 2018 15:15 UTC (Wed) (ris))</div>
+       <br/>
+    <ul>
+        <li>News link: <a href='https://lwn.net/Articles/764184/'>https://lwn.net/Articles/764184/</a></li>
+        <li>Source link: <a href=''></a></li>
+    </ul>
+       <br/>
+       <div class='content' style='text-align: justify'>
+               Greg Kroah-Hartman has released stable kernels [1]4.18.6 , [2]4.14.68 , [3]4.9.125 , [4]4.4.154 , and [5]3.18.121 . They all contain important fixes and users should upgrade.<br/><br/><br/><br/>[1] https://lwn.net/Articles/764185/<br/><br/>[2] https://lwn.net/Articles/764186/<br/><br/>[3] https://lwn.net/Articles/764187/<br/><br/>[4] https://lwn.net/Articles/764188/<br/><br/>[5] https://lwn.net/Articles/764189/
+       </div>
+<hr/>
+</div>
+</body>
diff --git a/test/expected/LWN/0000764200 b/test/expected/LWN/0000764200

new file mode 100644 (file)

index 0000000..20882cb
--- /dev/null
+++ b/test/expected/LWN/0000764200
@@ -0,0 +1,13 @@
+             [$] WRITING NETWORK FLOW DISSECTORS IN BPF            \r
+\r
+  [Kernel] Sep 6, 2018 15:59 UTC (Thu) (corbet)\r
+\r
+  o News link: https://lwn.net/Articles/764200\r
+  o Source link: \r
+\r
+\r
+    [$]   Sorry,   this  article  is  currently  available  to  LWN\r
+    suscribers only [https://lwn.net/subscribe/].\r
+\r
+\r
+\r
diff --git a/test/expected/LWN/0000764200.header b/test/expected/LWN/0000764200.header

new file mode 100644 (file)

index 0000000..9cd70e2
--- /dev/null
+++ b/test/expected/LWN/0000764200.header
@@ -0,0 +1,13 @@
+0[$] Writing network flow dissectors in BPF    null/LWN/0000764200             70\r
+i  [Kernel] Sep 6, 2018 15:59 UTC (Thu) (corbet)\r
+i\r
+i    Network  packet  headers  contain  a great deal of information,\r
+i    but  the  kernel  often only needs a subset of that information\r
+i    to  be  able to perform filtering or associate any given packet\r
+i    with  a  flow.  The  piece  of  code that follows the different\r
+i    layers  of  packet  encapsulation to find the important data is\r
+i    called  a  flow  dissector.  In current Linux kernels, the flow\r
+i    dissector  is  written  in  C.  A  patch  set has been proposed\r
+i    recently  to  implement  it  in  BPF  with  the  clear  goal of\r
+i    improving security, flexibility, and maybe even performance.\r
+i\r
diff --git a/test/expected/LWN/0000764200.header.html b/test/expected/LWN/0000764200.header.html

new file mode 100644 (file)

index 0000000..cfcf95a
--- /dev/null
+++ b/test/expected/LWN/0000764200.header.html
@@ -0,0 +1,20 @@
+<!DOCTYPE html>
+<html>
+<head>
+  <meta http-equiv='content-type' content='text/html; charset=utf-8'>
+  <meta name='viewport' content='width=device-width, initial-scale=1.0'>
+  <style type='text/css'>
+    body { margin: 1em 15%; }
+  </style>
+</head>
+<body>
+<div class='story-header'>
+       <h1><a href='0000764200.html'>[$] Writing network flow dissectors in BPF</a></h1>
+       <div class='details'>([Kernel] Sep 6, 2018 15:59 UTC (Thu) (corbet))</div>
+       <br/>
+       <div class='content' style='text-align: justify'>
+               Network packet headers contain a great deal of information, but the kernel often only needs a subset of that information to be able to perform filtering or associate any given packet with a flow. The piece of code that follows the different layers of packet encapsulation to find the important data is called a flow dissector. In current Linux kernels, the flow dissector is written in C. A patch set has been proposed recently to implement it in BPF with the clear goal of improving security, flexibility, and maybe even performance.
+       </div>
+<hr/>
+</div>
+</body>
diff --git a/test/expected/LWN/0000764200.html b/test/expected/LWN/0000764200.html

new file mode 100644 (file)

index 0000000..800444f
--- /dev/null
+++ b/test/expected/LWN/0000764200.html
@@ -0,0 +1,25 @@
+<!DOCTYPE html>
+<html>
+<head>
+  <meta http-equiv='content-type' content='text/html; charset=utf-8'>
+  <meta name='viewport' content='width=device-width, initial-scale=1.0'>
+  <style type='text/css'>
+    body { margin: 1em 15%; }
+  </style>
+</head>
+<body>
+<div class='story'>
+       <h1>[$] Writing network flow dissectors in BPF</h1>
+       <div class='details'>([Kernel] Sep 6, 2018 15:59 UTC (Thu) (corbet))</div>
+       <br/>
+    <ul>
+        <li>News link: <a href='https://lwn.net/Articles/764200'>https://lwn.net/Articles/764200</a></li>
+        <li>Source link: <a href=''></a></li>
+    </ul>
+       <br/>
+       <div class='content' style='text-align: justify'>
+               [$] Sorry, this article is currently available to LWN suscribers only [https://lwn.net/subscribe/].
+       </div>
+<hr/>
+</div>
+</body>
diff --git a/test/expected/LWN/0000764202 b/test/expected/LWN/0000764202

new file mode 100644 (file)

index 0000000..b690a82
--- /dev/null
+++ b/test/expected/LWN/0000764202
@@ -0,0 +1,23 @@
+                        FIREFOX 62.0 RELEASED                      \r
+\r
+  [Development] Sep 5, 2018 17:31 UTC (Wed) (ris)\r
+\r
+  o News link: https://lwn.net/Articles/764202/\r
+  o Source link: \r
+\r
+\r
+    Mozilla  has  released Firefox 62.0, with several new features.\r
+    The  Firefox  Home (default New Tab) allows users to display up\r
+    to  4  rows  of  top sites, Pocket stories, and highlights; for\r
+    those  using containers there is menu option to reopen a tab in\r
+    a  different  container;  Firefox  63 will remove all trust for\r
+    Symantec-issued  certificates,  and  it  is optional in Firefox\r
+    62;  FreeBSD  support for WebAuthn was added; and more. See the\r
+    [1]release notes for details.\r
+    \r
+    \r
+    \r
+    [1] https://www.mozilla.org/en-US/firefox/62.0/releasenotes/\r
+\r
+\r
+\r
diff --git a/test/expected/LWN/0000764202.header b/test/expected/LWN/0000764202.header

new file mode 100644 (file)

index 0000000..f8dc850
--- /dev/null
+++ b/test/expected/LWN/0000764202.header
@@ -0,0 +1,12 @@
+0Firefox 62.0 released null/LWN/0000764202             70\r
+i  [Development] Sep 5, 2018 17:31 UTC (Wed) (ris)\r
+i\r
+i    Mozilla  has  released Firefox 62.0, with several new features.\r
+i    The  Firefox  Home (default New Tab) allows users to display up\r
+i    to  4  rows  of  top sites, Pocket stories, and highlights; for\r
+i    those  using containers there is menu option to reopen a tab in\r
+i    a  different  container;  Firefox  63 will remove all trust for\r
+i    Symantec-issued  certificates,  and  it  is optional in Firefox\r
+i    62;  FreeBSD  support for WebAuthn was added; and more. See the\r
+i    release notes for details.\r
+i\r
diff --git a/test/expected/LWN/0000764202.header.html b/test/expected/LWN/0000764202.header.html

new file mode 100644 (file)

index 0000000..06e752e
--- /dev/null
+++ b/test/expected/LWN/0000764202.header.html
@@ -0,0 +1,20 @@
+<!DOCTYPE html>
+<html>
+<head>
+  <meta http-equiv='content-type' content='text/html; charset=utf-8'>
+  <meta name='viewport' content='width=device-width, initial-scale=1.0'>
+  <style type='text/css'>
+    body { margin: 1em 15%; }
+  </style>
+</head>
+<body>
+<div class='story-header'>
+       <h1><a href='0000764202.html'>Firefox 62.0 released</a></h1>
+       <div class='details'>([Development] Sep 5, 2018 17:31 UTC (Wed) (ris))</div>
+       <br/>
+       <div class='content' style='text-align: justify'>
+               Mozilla has released Firefox 62.0, with several new features. The Firefox Home (default New Tab) allows users to display up to 4 rows of top sites, Pocket stories, and highlights; for those using containers there is menu option to reopen a tab in a different container; Firefox 63 will remove all trust for Symantec-issued certificates, and it is optional in Firefox 62; FreeBSD support for WebAuthn was added; and more. See the release notes for details.
+       </div>
+<hr/>
+</div>
+</body>
diff --git a/test/expected/LWN/0000764202.html b/test/expected/LWN/0000764202.html

new file mode 100644 (file)

index 0000000..de063a5
--- /dev/null
+++ b/test/expected/LWN/0000764202.html
@@ -0,0 +1,25 @@
+<!DOCTYPE html>
+<html>
+<head>
+  <meta http-equiv='content-type' content='text/html; charset=utf-8'>
+  <meta name='viewport' content='width=device-width, initial-scale=1.0'>
+  <style type='text/css'>
+    body { margin: 1em 15%; }
+  </style>
+</head>
+<body>
+<div class='story'>
+       <h1>Firefox 62.0 released</h1>
+       <div class='details'>([Development] Sep 5, 2018 17:31 UTC (Wed) (ris))</div>
+       <br/>
+    <ul>
+        <li>News link: <a href='https://lwn.net/Articles/764202/'>https://lwn.net/Articles/764202/</a></li>
+        <li>Source link: <a href=''></a></li>
+    </ul>
+       <br/>
+       <div class='content' style='text-align: justify'>
+               Mozilla has released Firefox 62.0, with several new features. The Firefox Home (default New Tab) allows users to display up to 4 rows of top sites, Pocket stories, and highlights; for those using containers there is menu option to reopen a tab in a different container; Firefox 63 will remove all trust for Symantec-issued certificates, and it is optional in Firefox 62; FreeBSD support for WebAuthn was added; and more. See the [1]release notes for details.<br/><br/><br/><br/>[1] https://www.mozilla.org/en-US/firefox/62.0/releasenotes/
+       </div>
+<hr/>
+</div>
+</body>
diff --git a/test/expected/LWN/0000764209 b/test/expected/LWN/0000764209

new file mode 100644 (file)

index 0000000..3fbbdd3
--- /dev/null
+++ b/test/expected/LWN/0000764209
@@ -0,0 +1,13 @@
+         [$] STRENGTHENING USER-SPACE SPECTRE V2 PROTECTION        \r
+\r
+  [Kernel] Sep 5, 2018 21:47 UTC (Wed) (corbet)\r
+\r
+  o News link: https://lwn.net/Articles/764209\r
+  o Source link: \r
+\r
+\r
+    [$]   Sorry,   this  article  is  currently  available  to  LWN\r
+    suscribers only [https://lwn.net/subscribe/].\r
+\r
+\r
+\r
diff --git a/test/expected/LWN/0000764209.header b/test/expected/LWN/0000764209.header

new file mode 100644 (file)

index 0000000..d0410ed
--- /dev/null
+++ b/test/expected/LWN/0000764209.header
@@ -0,0 +1,15 @@
+0[$] Strengthening user-space Spectre v2 protection    null/LWN/0000764209             70\r
+i  [Kernel] Sep 5, 2018 21:47 UTC (Wed) (corbet)\r
+i\r
+i    The  Spectre  variant  2  vulnerability  allows the speculative\r
+i    execution   of  incorrect  (in  an  attacker-controllable  way)\r
+i    indirect  branch  predictions,  resulting  in  the  ability  to\r
+i    exfiltrate  information  via side channels. The kernel has been\r
+i    reasonably  well  protected  against this variant since shortly\r
+i    after  its  disclosure in January. It is, however, possible for\r
+i    user-space  processes  to  use Spectre v2 to attack each other;\r
+i    thus  far,  the  mainline  kernel has offered relatively little\r
+i    protection  against  such  attacks. A recent proposal from Jiri\r
+i    Kosina  may  change  that  situation,  but there are still some\r
+i    disagreements around the details.\r
+i\r
diff --git a/test/expected/LWN/0000764209.header.html b/test/expected/LWN/0000764209.header.html

new file mode 100644 (file)

index 0000000..0e94410
--- /dev/null
+++ b/test/expected/LWN/0000764209.header.html
@@ -0,0 +1,20 @@
+<!DOCTYPE html>
+<html>
+<head>
+  <meta http-equiv='content-type' content='text/html; charset=utf-8'>
+  <meta name='viewport' content='width=device-width, initial-scale=1.0'>
+  <style type='text/css'>
+    body { margin: 1em 15%; }
+  </style>
+</head>
+<body>
+<div class='story-header'>
+       <h1><a href='0000764209.html'>[$] Strengthening user-space Spectre v2 protection</a></h1>
+       <div class='details'>([Kernel] Sep 5, 2018 21:47 UTC (Wed) (corbet))</div>
+       <br/>
+       <div class='content' style='text-align: justify'>
+               The Spectre variant 2 vulnerability allows the speculative execution of incorrect (in an attacker-controllable way) indirect branch predictions, resulting in the ability to exfiltrate information via side channels. The kernel has been reasonably well protected against this variant since shortly after its disclosure in January. It is, however, possible for user-space processes to use Spectre v2 to attack each other; thus far, the mainline kernel has offered relatively little protection against such attacks. A recent proposal from Jiri Kosina may change that situation, but there are still some disagreements around the details.
+       </div>
+<hr/>
+</div>
+</body>
diff --git a/test/expected/LWN/0000764209.html b/test/expected/LWN/0000764209.html

new file mode 100644 (file)

index 0000000..c24f3ff
--- /dev/null
+++ b/test/expected/LWN/0000764209.html
@@ -0,0 +1,25 @@
+<!DOCTYPE html>
+<html>
+<head>
+  <meta http-equiv='content-type' content='text/html; charset=utf-8'>
+  <meta name='viewport' content='width=device-width, initial-scale=1.0'>
+  <style type='text/css'>
+    body { margin: 1em 15%; }
+  </style>
+</head>
+<body>
+<div class='story'>
+       <h1>[$] Strengthening user-space Spectre v2 protection</h1>
+       <div class='details'>([Kernel] Sep 5, 2018 21:47 UTC (Wed) (corbet))</div>
+       <br/>
+    <ul>
+        <li>News link: <a href='https://lwn.net/Articles/764209'>https://lwn.net/Articles/764209</a></li>
+        <li>Source link: <a href=''></a></li>
+    </ul>
+       <br/>
+       <div class='content' style='text-align: justify'>
+               [$] Sorry, this article is currently available to LWN suscribers only [https://lwn.net/subscribe/].
+       </div>
+<hr/>
+</div>
+</body>
diff --git a/test/expected/LWN/0000764219 b/test/expected/LWN/0000764219

new file mode 100644 (file)

index 0000000..b91238e
--- /dev/null
+++ b/test/expected/LWN/0000764219
@@ -0,0 +1,161 @@
+                         GNOME 3.30 RELEASED                       \r
+\r
+  [Development] Sep 5, 2018 21:17 UTC (Wed) (ris)\r
+\r
+  o News link: https://lwn.net/Articles/764219\r
+  o Source link: \r
+\r
+\r
+    The  GNOME  Project  has  announced  the  release of GNOME 3.30\r
+    "Almería".   "   This   release  brings  automatic  updates  in\r
+    Software,   more   games,   and  a  new  Podcasts  application.\r
+    Improvements  to  core  GNOME  applications  include  a refined\r
+    location  and  search  bar  in  Files, a [Thunderbolt] panel in\r
+    Settings,  support  for  remoting  using RDP in Boxes, and many\r
+    more. " The [1]release notes contain more information.\r
+    \r
+    From :\r
+    \r
+    Matthias         Clasen         via         devel-announce-list\r
+    <devel-announce-list-AT-gnome.org>\r
+    \r
+    To :\r
+    \r
+    gnome-announce-list-AT-gnome.org, devel-announce-list-AT-gnome-\r
+    .org\r
+    \r
+    Subject :\r
+    \r
+    GNOME 3.30 released\r
+    \r
+    Date :\r
+    \r
+    Wed, 5 Sep 2018 16:41:54 -0400\r
+    \r
+    Message-ID :\r
+    \r
+    <CAFwd_vCdnMhopZsZMq2M-N7DfQbUheTCfDb--Lgn6rrAXPyfdQ@mail.gmai-\r
+    l.com>\r
+    \r
+    Cc :\r
+    \r
+    Matthias Clasen <matthias.clasen-AT-gmail.com>\r
+    \r
+    Archive-link :\r
+    \r
+    [2]Article\r
+    \r
+    The  GNOME  Project  is  proud to announce the release of GNOME\r
+    3.30, “Almería”\r
+    \r
+    This  release brings automatic updates in Software, more games,\r
+    and a new\r
+    \r
+    Podcasts application.\r
+    \r
+    Improvements  to  core  GNOME  applications  include  a refined\r
+    location and\r
+    \r
+    search\r
+    \r
+    bar  in  Files,  a  Thunderbold  panel in Settings, support for\r
+    remoting using\r
+    \r
+    RDP\r
+    \r
+    in Boxes, and many more.\r
+    \r
+    More  information  about the changes in GNOME 3.30 can be found\r
+    in the\r
+    \r
+    release notes:\r
+    \r
+    https://help.gnome.org/misc/release-notes/3.30/\r
+    \r
+    For  the  release  team,  this release is particularly exciting\r
+    because it is\r
+    \r
+    the\r
+    \r
+    first  one  that has been produced and verified with our new CI\r
+    infrastructure\r
+    \r
+    in gitlab.gnome.org.\r
+    \r
+    GNOME  3.30 will be available shortly in many distributions. If\r
+    you want to\r
+    \r
+    try it\r
+    \r
+    today,  you  can  use  the soon-to-be-released Fedora 29 or the\r
+    openSUSE nightly\r
+    \r
+    live images which will both include GNOME 3.30 very soon.\r
+    \r
+    https://www.gnome.org/getting-gnome/\r
+    \r
+    http://download.opensuse.org/repositories/GNOME:/Medias/i...\r
+    \r
+    To  try the very latest developments in GNOME, you can also use\r
+    Fedora\r
+    \r
+    Silverblue,\r
+    \r
+    whose rawhide branch always includes the latest GNOME packages.\r
+    \r
+    https://kojipkgs.fedoraproject.org/compose/rawhide/latest...\r
+    \r
+    If  you are interested in building applications for GNOME 3.30,\r
+    look for the\r
+    \r
+    GNOME  3.30  Flatpak  SDK,  which  will  be  available  in  the\r
+    sdk.gnome.org\r
+    \r
+    repository\r
+    \r
+    soon.\r
+    \r
+    This  six-month  effort wouldn't have been possible without the\r
+    whole\r
+    \r
+    GNOME  community,  made  of  contributors  and friends from all\r
+    around the\r
+    \r
+    world:  developers, designers, documentation writers, usability\r
+    and\r
+    \r
+    accessibility  specialists, translators, maintainers, students,\r
+    system\r
+    \r
+    administrators,  companies,  artists, testers and last, but not\r
+    least, our\r
+    \r
+    users.\r
+    \r
+    GNOME  would  not  exist  without  all  of  you.  Thank  you to\r
+    everyone!\r
+    \r
+    Our  next release, GNOME 3.32, is planned for March 2019. Until\r
+    then,\r
+    \r
+    enjoy GNOME 3.30!\r
+    \r
+    The GNOME Release Team\r
+    \r
+    --\r
+    \r
+    devel-announce-list mailing list\r
+    \r
+    devel-announce-list@gnome.org\r
+    \r
+    https://mail.gnome.org/mailman/listinfo/devel-announce-list\r
+    \r
+    \r
+    \r
+    [1] https://help.gnome.org/misc/release-notes/3.30/\r
+    \r
+    [2] http://www.mail-archive.com/search?l=mid&q=CAFwd_vCdnMhopZ-\r
+    sZMq2M-N7DfQbUheTCfDb--Lgn6rrAXPyfdQ%40mail.gmail.com\r
+\r
+\r
+\r
diff --git a/test/expected/LWN/0000764219.header b/test/expected/LWN/0000764219.header

new file mode 100644 (file)

index 0000000..4985147
--- /dev/null
+++ b/test/expected/LWN/0000764219.header
@@ -0,0 +1,11 @@
+0GNOME 3.30 released   null/LWN/0000764219             70\r
+i  [Development] Sep 5, 2018 21:17 UTC (Wed) (ris)\r
+i\r
+i    The  GNOME  Project  has  announced  the  release of GNOME 3.30\r
+i    "Almería".  "This release brings automatic updates in Software,\r
+i    more  games,  and  a  new Podcasts application. Improvements to\r
+i    core  GNOME  applications include a refined location and search\r
+i    bar  in  Files,  a [Thunderbolt] panel in Settings, support for\r
+i    remoting  using RDP in Boxes, and many more." The release notes\r
+i    contain more information.\r
+i\r
diff --git a/test/expected/LWN/0000764219.header.html b/test/expected/LWN/0000764219.header.html

new file mode 100644 (file)

index 0000000..b143c4c
--- /dev/null
+++ b/test/expected/LWN/0000764219.header.html
@@ -0,0 +1,20 @@
+<!DOCTYPE html>
+<html>
+<head>
+  <meta http-equiv='content-type' content='text/html; charset=utf-8'>
+  <meta name='viewport' content='width=device-width, initial-scale=1.0'>
+  <style type='text/css'>
+    body { margin: 1em 15%; }
+  </style>
+</head>
+<body>
+<div class='story-header'>
+       <h1><a href='0000764219.html'>GNOME 3.30 released</a></h1>
+       <div class='details'>([Development] Sep 5, 2018 21:17 UTC (Wed) (ris))</div>
+       <br/>
+       <div class='content' style='text-align: justify'>
+               The GNOME Project has announced the release of GNOME 3.30 &quot;Almería&quot;. &quot;This release brings automatic updates in Software, more games, and a new Podcasts application. Improvements to core GNOME applications include a refined location and search bar in Files, a [Thunderbolt] panel in Settings, support for remoting using RDP in Boxes, and many more.&quot; The release notes contain more information.
+       </div>
+<hr/>
+</div>
+</body>
diff --git a/test/expected/LWN/0000764219.html b/test/expected/LWN/0000764219.html

new file mode 100644 (file)

index 0000000..d82900d
--- /dev/null
+++ b/test/expected/LWN/0000764219.html
@@ -0,0 +1,25 @@
+<!DOCTYPE html>
+<html>
+<head>
+  <meta http-equiv='content-type' content='text/html; charset=utf-8'>
+  <meta name='viewport' content='width=device-width, initial-scale=1.0'>
+  <style type='text/css'>
+    body { margin: 1em 15%; }
+  </style>
+</head>
+<body>
+<div class='story'>
+       <h1>GNOME 3.30 released</h1>
+       <div class='details'>([Development] Sep 5, 2018 21:17 UTC (Wed) (ris))</div>
+       <br/>
+    <ul>
+        <li>News link: <a href='https://lwn.net/Articles/764219'>https://lwn.net/Articles/764219</a></li>
+        <li>Source link: <a href=''></a></li>
+    </ul>
+       <br/>
+       <div class='content' style='text-align: justify'>
+               The GNOME Project has announced the release of GNOME 3.30 &quot;Almería&quot;. &quot; This release brings automatic updates in Software, more games, and a new Podcasts application. Improvements to core GNOME applications include a refined location and search bar in Files, a [Thunderbolt] panel in Settings, support for remoting using RDP in Boxes, and many more. &quot; The [1]release notes contain more information.<br/><br/>From :<br/><br/>Matthias Clasen via devel-announce-list &lt;devel-announce-list-AT-gnome.org&gt;<br/><br/>To :<br/><br/>gnome-announce-list-AT-gnome.org, devel-announce-list-AT-gnome.org<br/><br/>Subject :<br/><br/>GNOME 3.30 released<br/><br/>Date :<br/><br/>Wed, 5 Sep 2018 16:41:54 -0400<br/><br/>Message-ID :<br/><br/>&lt;CAFwd_vCdnMhopZsZMq2M-N7DfQbUheTCfDb--Lgn6rrAXPyfdQ@mail.gmail.com&gt;<br/><br/>Cc :<br/><br/>Matthias Clasen &lt;matthias.clasen-AT-gmail.com&gt;<br/><br/>Archive-link :<br/><br/>[2]Article<br/><br/>The GNOME Project is proud to announce the release of GNOME 3.30, “Almería”<br/><br/>This release brings automatic updates in Software, more games, and a new<br/><br/>Podcasts application.<br/><br/>Improvements to core GNOME applications include a refined location and<br/><br/>search<br/><br/>bar in Files, a Thunderbold panel in Settings, support for remoting using<br/><br/>RDP<br/><br/>in Boxes, and many more.<br/><br/>More information about the changes in GNOME 3.30 can be found in the<br/><br/>release notes:<br/><br/>https://help.gnome.org/misc/release-notes/3.30/<br/><br/>For the release team, this release is particularly exciting because it is<br/><br/>the<br/><br/>first one that has been produced and verified with our new CI infrastructure<br/><br/>in gitlab.gnome.org.<br/><br/>GNOME 3.30 will be available shortly in many distributions. If you want to<br/><br/>try it<br/><br/>today, you can use the soon-to-be-released Fedora 29 or the openSUSE nightly<br/><br/>live images which will both include GNOME 3.30 very soon.<br/><br/>https://www.gnome.org/getting-gnome/<br/><br/>http://download.opensuse.org/repositories/GNOME:/Medias/i...<br/><br/>To try the very latest developments in GNOME, you can also use Fedora<br/><br/>Silverblue,<br/><br/>whose rawhide branch always includes the latest GNOME packages.<br/><br/>https://kojipkgs.fedoraproject.org/compose/rawhide/latest...<br/><br/>If you are interested in building applications for GNOME 3.30, look for the<br/><br/>GNOME 3.30 Flatpak SDK, which will be available in the sdk.gnome.org<br/><br/>repository<br/><br/>soon.<br/><br/>This six-month effort wouldn&#x27;t have been possible without the whole<br/><br/>GNOME community, made of contributors and friends from all around the<br/><br/>world: developers, designers, documentation writers, usability and<br/><br/>accessibility specialists, translators, maintainers, students, system<br/><br/>administrators, companies, artists, testers and last, but not least, our<br/><br/>users.<br/><br/>GNOME would not exist without all of you. Thank you to everyone!<br/><br/>Our next release, GNOME 3.32, is planned for March 2019. Until then,<br/><br/>enjoy GNOME 3.30!<br/><br/>The GNOME Release Team<br/><br/>--<br/><br/>devel-announce-list mailing list<br/><br/>devel-announce-list@gnome.org<br/><br/>https://mail.gnome.org/mailman/listinfo/devel-announce-list<br/><br/><br/><br/>[1] https://help.gnome.org/misc/release-notes/3.30/<br/><br/>[2] http://www.mail-archive.com/search?l=mid&amp;q=CAFwd_vCdnMhopZsZMq2M-N7DfQbUheTCfDb--Lgn6rrAXPyfdQ%40mail.gmail.com
+       </div>
+<hr/>
+</div>
+</body>
diff --git a/test/expected/LWN/0000764300 b/test/expected/LWN/0000764300

new file mode 100644 (file)

index 0000000..64a081b
--- /dev/null
+++ b/test/expected/LWN/0000764300
@@ -0,0 +1,312 @@
+                    SECURITY UPDATES FOR THURSDAY                  \r
+\r
+  [Security] Sep 6, 2018 13:55 UTC (Thu) (jake)\r
+\r
+  o News link: https://lwn.net/Articles/764300\r
+  o Source link: \r
+\r
+\r
+    Dist.\r
+    \r
+    ID\r
+    \r
+    Release\r
+    \r
+    Package\r
+    \r
+    Date\r
+    \r
+    Debian\r
+    \r
+    [1]DSA-4286-1\r
+    \r
+    stable\r
+    \r
+    curl\r
+    \r
+    2018-09-05\r
+    \r
+    Debian\r
+    \r
+    [2]DLA-1494-1\r
+    \r
+    LTS\r
+    \r
+    gdm3\r
+    \r
+    2018-09-05\r
+    \r
+    Debian\r
+    \r
+    [3]DLA-1495-1\r
+    \r
+    LTS\r
+    \r
+    git-annex\r
+    \r
+    2018-09-05\r
+    \r
+    Debian\r
+    \r
+    [4]DLA-1496-1\r
+    \r
+    LTS\r
+    \r
+    lcms2\r
+    \r
+    2018-09-06\r
+    \r
+    Debian\r
+    \r
+    [5]DSA-4285-1\r
+    \r
+    stable\r
+    \r
+    sympa\r
+    \r
+    2018-09-05\r
+    \r
+    Fedora\r
+    \r
+    [6]FEDORA-2018-38bdbafa96\r
+    \r
+    F28\r
+    \r
+    discount\r
+    \r
+    2018-09-06\r
+    \r
+    Fedora\r
+    \r
+    [7]FEDORA-2018-fe437a98d6\r
+    \r
+    F27\r
+    \r
+    dolphin-emu\r
+    \r
+    2018-09-06\r
+    \r
+    Fedora\r
+    \r
+    [8]FEDORA-2018-5bf744beee\r
+    \r
+    F28\r
+    \r
+    gd\r
+    \r
+    2018-09-06\r
+    \r
+    Fedora\r
+    \r
+    [9]FEDORA-2018-fac5420dd1\r
+    \r
+    F27\r
+    \r
+    obs-build\r
+    \r
+    2018-09-06\r
+    \r
+    Fedora\r
+    \r
+    [10]FEDORA-2018-fac5420dd1\r
+    \r
+    F27\r
+    \r
+    osc\r
+    \r
+    2018-09-06\r
+    \r
+    Fedora\r
+    \r
+    [11]FEDORA-2018-4f0b7d1251\r
+    \r
+    F27\r
+    \r
+    tcpflow\r
+    \r
+    2018-09-06\r
+    \r
+    Fedora\r
+    \r
+    [12]FEDORA-2018-5ad77cc979\r
+    \r
+    F28\r
+    \r
+    tcpflow\r
+    \r
+    2018-09-06\r
+    \r
+    Fedora\r
+    \r
+    [13]FEDORA-2018-7626df1731\r
+    \r
+    F27\r
+    \r
+    yara\r
+    \r
+    2018-09-06\r
+    \r
+    Fedora\r
+    \r
+    [14]FEDORA-2018-8344cb89ac\r
+    \r
+    F28\r
+    \r
+    yara\r
+    \r
+    2018-09-06\r
+    \r
+    openSUSE\r
+    \r
+    [15]openSUSE-SU-2018:2628-1\r
+    \r
+    15.0\r
+    \r
+    wireshark\r
+    \r
+    2018-09-05\r
+    \r
+    Slackware\r
+    \r
+    [16]SSA:2018-249-01\r
+    \r
+    curl\r
+    \r
+    2018-09-06\r
+    \r
+    Slackware\r
+    \r
+    [17]SSA:2018-249-03\r
+    \r
+    firefox\r
+    \r
+    2018-09-06\r
+    \r
+    Slackware\r
+    \r
+    [18]SSA:2018-249-02\r
+    \r
+    ghostscript\r
+    \r
+    2018-09-06\r
+    \r
+    Slackware\r
+    \r
+    [19]SSA:2018-249-04\r
+    \r
+    thunderbird\r
+    \r
+    2018-09-06\r
+    \r
+    SUSE\r
+    \r
+    [20]SUSE-SU-2018:2630-1\r
+    \r
+    SLE15\r
+    \r
+    apache-pdfbox\r
+    \r
+    2018-09-06\r
+    \r
+    SUSE\r
+    \r
+    [21]SUSE-SU-2018:2629-1\r
+    \r
+    curl\r
+    \r
+    2018-09-05\r
+    \r
+    SUSE\r
+    \r
+    [22]SUSE-SU-2018:2632-1\r
+    \r
+    OS7 SLE12\r
+    \r
+    dovecot22\r
+    \r
+    2018-09-06\r
+    \r
+    SUSE\r
+    \r
+    [23]SUSE-SU-2018:2631-1\r
+    \r
+    OS7 SLE12\r
+    \r
+    libvirt\r
+    \r
+    2018-09-06\r
+    \r
+    Ubuntu\r
+    \r
+    [24]USN-3759-2\r
+    \r
+    12.04\r
+    \r
+    libtirpc\r
+    \r
+    2018-09-05\r
+    \r
+    Ubuntu\r
+    \r
+    [25]USN-3759-1\r
+    \r
+    14.04 16.04 18.04\r
+    \r
+    libtirpc\r
+    \r
+    2018-09-05\r
+    \r
+    \r
+    \r
+    [1] https://lwn.net/Articles/764275/\r
+    \r
+    [2] https://lwn.net/Articles/764276/\r
+    \r
+    [3] https://lwn.net/Articles/764277/\r
+    \r
+    [4] https://lwn.net/Articles/764278/\r
+    \r
+    [5] https://lwn.net/Articles/764279/\r
+    \r
+    [6] https://lwn.net/Articles/764280/\r
+    \r
+    [7] https://lwn.net/Articles/764281/\r
+    \r
+    [8] https://lwn.net/Articles/764282/\r
+    \r
+    [9] https://lwn.net/Articles/764283/\r
+    \r
+    [10] https://lwn.net/Articles/764284/\r
+    \r
+    [11] https://lwn.net/Articles/764285/\r
+    \r
+    [12] https://lwn.net/Articles/764286/\r
+    \r
+    [13] https://lwn.net/Articles/764287/\r
+    \r
+    [14] https://lwn.net/Articles/764288/\r
+    \r
+    [15] https://lwn.net/Articles/764289/\r
+    \r
+    [16] https://lwn.net/Articles/764290/\r
+    \r
+    [17] https://lwn.net/Articles/764292/\r
+    \r
+    [18] https://lwn.net/Articles/764291/\r
+    \r
+    [19] https://lwn.net/Articles/764293/\r
+    \r
+    [20] https://lwn.net/Articles/764294/\r
+    \r
+    [21] https://lwn.net/Articles/764295/\r
+    \r
+    [22] https://lwn.net/Articles/764296/\r
+    \r
+    [23] https://lwn.net/Articles/764297/\r
+    \r
+    [24] https://lwn.net/Articles/764298/\r
+    \r
+    [25] https://lwn.net/Articles/764299/\r
+\r
+\r
+\r
diff --git a/test/expected/LWN/0000764300.header b/test/expected/LWN/0000764300.header

new file mode 100644 (file)

index 0000000..a34ccd1
--- /dev/null
+++ b/test/expected/LWN/0000764300.header
@@ -0,0 +1,10 @@
+0Security updates for Thursday null/LWN/0000764300             70\r
+i  [Security] Sep 6, 2018 13:55 UTC (Thu) (jake)\r
+i\r
+i    Security  updates  have  been  issued  by  Debian  (curl, gdm3,\r
+i    git-annex,  lcms2,  and  sympa), Fedora (discount, dolphin-emu,\r
+i    gd,  obs-build,  osc, tcpflow, and yara), openSUSE (wireshark),\r
+i    Slackware  (curl,  firefox, ghostscript, and thunderbird), SUSE\r
+i    (apache-pdfbox,  curl,  dovecot22,  and  libvirt),  and  Ubuntu\r
+i    (libtirpc).\r
+i\r
diff --git a/test/expected/LWN/0000764300.header.html b/test/expected/LWN/0000764300.header.html

new file mode 100644 (file)

index 0000000..9bed822
--- /dev/null
+++ b/test/expected/LWN/0000764300.header.html
@@ -0,0 +1,20 @@
+<!DOCTYPE html>
+<html>
+<head>
+  <meta http-equiv='content-type' content='text/html; charset=utf-8'>
+  <meta name='viewport' content='width=device-width, initial-scale=1.0'>
+  <style type='text/css'>
+    body { margin: 1em 15%; }
+  </style>
+</head>
+<body>
+<div class='story-header'>
+       <h1><a href='0000764300.html'>Security updates for Thursday</a></h1>
+       <div class='details'>([Security] Sep 6, 2018 13:55 UTC (Thu) (jake))</div>
+       <br/>
+       <div class='content' style='text-align: justify'>
+               Security updates have been issued by Debian (curl, gdm3, git-annex, lcms2, and sympa), Fedora (discount, dolphin-emu, gd, obs-build, osc, tcpflow, and yara), openSUSE (wireshark), Slackware (curl, firefox, ghostscript, and thunderbird), SUSE (apache-pdfbox, curl, dovecot22, and libvirt), and Ubuntu (libtirpc).
+       </div>
+<hr/>
+</div>
+</body>
diff --git a/test/expected/LWN/0000764300.html b/test/expected/LWN/0000764300.html

new file mode 100644 (file)

index 0000000..968dbd3
--- /dev/null
+++ b/test/expected/LWN/0000764300.html
@@ -0,0 +1,25 @@
+<!DOCTYPE html>
+<html>
+<head>
+  <meta http-equiv='content-type' content='text/html; charset=utf-8'>
+  <meta name='viewport' content='width=device-width, initial-scale=1.0'>
+  <style type='text/css'>
+    body { margin: 1em 15%; }
+  </style>
+</head>
+<body>
+<div class='story'>
+       <h1>Security updates for Thursday</h1>
+       <div class='details'>([Security] Sep 6, 2018 13:55 UTC (Thu) (jake))</div>
+       <br/>
+    <ul>
+        <li>News link: <a href='https://lwn.net/Articles/764300'>https://lwn.net/Articles/764300</a></li>
+        <li>Source link: <a href=''></a></li>
+    </ul>
+       <br/>
+       <div class='content' style='text-align: justify'>
+               Dist.<br/><br/>ID<br/><br/>Release<br/><br/>Package<br/><br/>Date<br/><br/>Debian<br/><br/>[1]DSA-4286-1<br/><br/>stable<br/><br/>curl<br/><br/>2018-09-05<br/><br/>Debian<br/><br/>[2]DLA-1494-1<br/><br/>LTS<br/><br/>gdm3<br/><br/>2018-09-05<br/><br/>Debian<br/><br/>[3]DLA-1495-1<br/><br/>LTS<br/><br/>git-annex<br/><br/>2018-09-05<br/><br/>Debian<br/><br/>[4]DLA-1496-1<br/><br/>LTS<br/><br/>lcms2<br/><br/>2018-09-06<br/><br/>Debian<br/><br/>[5]DSA-4285-1<br/><br/>stable<br/><br/>sympa<br/><br/>2018-09-05<br/><br/>Fedora<br/><br/>[6]FEDORA-2018-38bdbafa96<br/><br/>F28<br/><br/>discount<br/><br/>2018-09-06<br/><br/>Fedora<br/><br/>[7]FEDORA-2018-fe437a98d6<br/><br/>F27<br/><br/>dolphin-emu<br/><br/>2018-09-06<br/><br/>Fedora<br/><br/>[8]FEDORA-2018-5bf744beee<br/><br/>F28<br/><br/>gd<br/><br/>2018-09-06<br/><br/>Fedora<br/><br/>[9]FEDORA-2018-fac5420dd1<br/><br/>F27<br/><br/>obs-build<br/><br/>2018-09-06<br/><br/>Fedora<br/><br/>[10]FEDORA-2018-fac5420dd1<br/><br/>F27<br/><br/>osc<br/><br/>2018-09-06<br/><br/>Fedora<br/><br/>[11]FEDORA-2018-4f0b7d1251<br/><br/>F27<br/><br/>tcpflow<br/><br/>2018-09-06<br/><br/>Fedora<br/><br/>[12]FEDORA-2018-5ad77cc979<br/><br/>F28<br/><br/>tcpflow<br/><br/>2018-09-06<br/><br/>Fedora<br/><br/>[13]FEDORA-2018-7626df1731<br/><br/>F27<br/><br/>yara<br/><br/>2018-09-06<br/><br/>Fedora<br/><br/>[14]FEDORA-2018-8344cb89ac<br/><br/>F28<br/><br/>yara<br/><br/>2018-09-06<br/><br/>openSUSE<br/><br/>[15]openSUSE-SU-2018:2628-1<br/><br/>15.0<br/><br/>wireshark<br/><br/>2018-09-05<br/><br/>Slackware<br/><br/>[16]SSA:2018-249-01<br/><br/>curl<br/><br/>2018-09-06<br/><br/>Slackware<br/><br/>[17]SSA:2018-249-03<br/><br/>firefox<br/><br/>2018-09-06<br/><br/>Slackware<br/><br/>[18]SSA:2018-249-02<br/><br/>ghostscript<br/><br/>2018-09-06<br/><br/>Slackware<br/><br/>[19]SSA:2018-249-04<br/><br/>thunderbird<br/><br/>2018-09-06<br/><br/>SUSE<br/><br/>[20]SUSE-SU-2018:2630-1<br/><br/>SLE15<br/><br/>apache-pdfbox<br/><br/>2018-09-06<br/><br/>SUSE<br/><br/>[21]SUSE-SU-2018:2629-1<br/><br/>curl<br/><br/>2018-09-05<br/><br/>SUSE<br/><br/>[22]SUSE-SU-2018:2632-1<br/><br/>OS7 SLE12<br/><br/>dovecot22<br/><br/>2018-09-06<br/><br/>SUSE<br/><br/>[23]SUSE-SU-2018:2631-1<br/><br/>OS7 SLE12<br/><br/>libvirt<br/><br/>2018-09-06<br/><br/>Ubuntu<br/><br/>[24]USN-3759-2<br/><br/>12.04<br/><br/>libtirpc<br/><br/>2018-09-05<br/><br/>Ubuntu<br/><br/>[25]USN-3759-1<br/><br/>14.04 16.04 18.04<br/><br/>libtirpc<br/><br/>2018-09-05<br/><br/><br/><br/>[1] https://lwn.net/Articles/764275/<br/><br/>[2] https://lwn.net/Articles/764276/<br/><br/>[3] https://lwn.net/Articles/764277/<br/><br/>[4] https://lwn.net/Articles/764278/<br/><br/>[5] https://lwn.net/Articles/764279/<br/><br/>[6] https://lwn.net/Articles/764280/<br/><br/>[7] https://lwn.net/Articles/764281/<br/><br/>[8] https://lwn.net/Articles/764282/<br/><br/>[9] https://lwn.net/Articles/764283/<br/><br/>[10] https://lwn.net/Articles/764284/<br/><br/>[11] https://lwn.net/Articles/764285/<br/><br/>[12] https://lwn.net/Articles/764286/<br/><br/>[13] https://lwn.net/Articles/764287/<br/><br/>[14] https://lwn.net/Articles/764288/<br/><br/>[15] https://lwn.net/Articles/764289/<br/><br/>[16] https://lwn.net/Articles/764290/<br/><br/>[17] https://lwn.net/Articles/764292/<br/><br/>[18] https://lwn.net/Articles/764291/<br/><br/>[19] https://lwn.net/Articles/764293/<br/><br/>[20] https://lwn.net/Articles/764294/<br/><br/>[21] https://lwn.net/Articles/764295/<br/><br/>[22] https://lwn.net/Articles/764296/<br/><br/>[23] https://lwn.net/Articles/764297/<br/><br/>[24] https://lwn.net/Articles/764298/<br/><br/>[25] https://lwn.net/Articles/764299/
+       </div>
+<hr/>
+</div>
+</body>
diff --git a/test/expected/LWN/0000764321 b/test/expected/LWN/0000764321

new file mode 100644 (file)

index 0000000..df52d21
--- /dev/null
+++ b/test/expected/LWN/0000764321
@@ -0,0 +1,88 @@
+      THE HIDDEN BENEFIT OF GIVING BACK TO OPEN SOURCE SOFTWARE    \r
+                         (WORKING KNOWLEDGE)                       \r
+\r
+  [Briefs] Sep 6, 2018 16:56 UTC (Thu) (corbet)\r
+\r
+  o News link: https://lwn.net/Articles/764321/\r
+  o Source link: \r
+\r
+\r
+    The  Harvard  Business  School's  "Working  Knowledge" site has\r
+    [1]an  article  arguing  that it can pay for companies to allow\r
+    their  developers  to  contribute  back  to  the projects whose\r
+    software  they  use. " And that presents an interesting dilemma\r
+    for  firms  that rely heavily on open source. Should they allow\r
+    employees  on  company  time  to  make updates and edits to the\r
+    software  for  community use that could be used by competitors?\r
+    New  research  by  Assistant Professor Frank Nagle, a member of\r
+    the  Strategy  Unit  at  Harvard  Business  School,  shows that\r
+    paying  employees  to  contribute  to  such software boosts the\r
+    company’s  productivity  from  using the software by as much as\r
+    100 percent, when compared with free-riding competitors. "\r
+    \r
+    \r
+    \r
+    [1] https://hbswk.hbs.edu/item/the-hidden-benefit-of-giving-ba-\r
+    ck-to-open-source-software\r
+\r
+\r
+  ** The Hidden Benefit of Giving Back to Open Source Software\r
+     (Working Knowledge)\r
+     \r
+     This is no surprise to me. Most of the open source software\r
+     improvements that might help a competitor are too general in\r
+     nature to really be giving the other guys a competitive\r
+     advantage.\r
+     \r
+     For instance, if Lyft contributed Linux kernel or PHP or Apache\r
+     or whatever fixes, the benefit to Lyft of having that improved\r
+     expertise far exceeds the general benefit to competitor Uber.\r
+     \r
+\r
+     ** The Hidden Benefit of Giving Back to Open Source Software\r
+        (Working Knowledge)\r
+        \r
+        This is no surprise to me. Most of the open source software\r
+        improvements that might help a competitor are too general in\r
+        nature to really be giving the other guys a competitive\r
+        advantage.\r
+        \r
+        For instance, if Lyft contributed Linux kernel or PHP or\r
+        Apache or whatever fixes, the benefit to Lyft of having that\r
+        improved expertise far exceeds the general benefit to\r
+        competitor Uber.\r
+        \r
+\r
+\r
+     ** The Hidden Benefit of Giving Back to Open Source Software\r
+        (Working Knowledge)\r
+        \r
+        Even having to debate it seems so farcical. If you're worried\r
+        about people who "do the same thing", the software they use\r
+        is not the main differentiator. How your company is\r
+        organized, how you treat your people and your customers, how\r
+        you organized projects etc are huge, and software is\r
+        ultimately minor. Fixes and changes to software? Incredibly\r
+        minor.\r
+        \r
+\r
+\r
+     ** The Hidden Benefit of Giving Back to Open Source Software\r
+        (Working Knowledge)\r
+        \r
+        Perhaps this is too dismissive, as there is the part about\r
+        letting your programmers do their job to the best of their\r
+        ability. That seems pretty big.\r
+        \r
+\r
+        ** The Hidden Benefit of Giving Back to Open Source Software\r
+           (Working Knowledge)\r
+           \r
+           Perhaps this is too dismissive, as there is the part about\r
+           letting your programmers do their job to the best of their\r
+           ability. That seems pretty big.\r
+           \r
+\r
+\r
+\r
+\r
diff --git a/test/expected/LWN/0000764321.header b/test/expected/LWN/0000764321.header

new file mode 100644 (file)

index 0000000..9cdc345
--- /dev/null
+++ b/test/expected/LWN/0000764321.header
@@ -0,0 +1,16 @@
+0The Hidden Benefit of Giving Back to Open Source Software (Working Knowledge) null/LWN/0000764321             70\r
+i  [Briefs] Sep 6, 2018 16:56 UTC (Thu) (corbet)\r
+i\r
+i    The  Harvard  Business School's "Working Knowledge" site has an\r
+i    article  arguing  that  it can pay for companies to allow their\r
+i    developers  to  contribute  back to the projects whose software\r
+i    they  use.  "And that presents an interesting dilemma for firms\r
+i    that  rely  heavily on open source. Should they allow employees\r
+i    on  company  time to make updates and edits to the software for\r
+i    community  use  that could be used by competitors? New research\r
+i    by  Assistant  Professor  Frank Nagle, a member of the Strategy\r
+i    Unit  at  Harvard  Business School, shows that paying employees\r
+i    to   contribute   to   such   software   boosts  the  company’s\r
+i    productivity  from  using  the  software  by  as  much  as  100\r
+i    percent, when compared with free-riding competitors."\r
+i\r
diff --git a/test/expected/LWN/0000764321.header.html b/test/expected/LWN/0000764321.header.html

new file mode 100644 (file)

index 0000000..eb8b490
--- /dev/null
+++ b/test/expected/LWN/0000764321.header.html
@@ -0,0 +1,20 @@
+<!DOCTYPE html>
+<html>
+<head>
+  <meta http-equiv='content-type' content='text/html; charset=utf-8'>
+  <meta name='viewport' content='width=device-width, initial-scale=1.0'>
+  <style type='text/css'>
+    body { margin: 1em 15%; }
+  </style>
+</head>
+<body>
+<div class='story-header'>
+       <h1><a href='0000764321.html'>The Hidden Benefit of Giving Back to Open Source Software (Working Knowledge)</a></h1>
+       <div class='details'>([Briefs] Sep 6, 2018 16:56 UTC (Thu) (corbet))</div>
+       <br/>
+       <div class='content' style='text-align: justify'>
+               The Harvard Business School&#x27;s &quot;Working Knowledge&quot; site has an article arguing that it can pay for companies to allow their developers to contribute back to the projects whose software they use. &quot;And that presents an interesting dilemma for firms that rely heavily on open source. Should they allow employees on company time to make updates and edits to the software for community use that could be used by competitors? New research by Assistant Professor Frank Nagle, a member of the Strategy Unit at Harvard Business School, shows that paying employees to contribute to such software boosts the company’s productivity from using the software by as much as 100 percent, when compared with free-riding competitors.&quot;
+       </div>
+<hr/>
+</div>
+</body>
diff --git a/test/expected/LWN/0000764321.html b/test/expected/LWN/0000764321.html

new file mode 100644 (file)

index 0000000..a424258
--- /dev/null
+++ b/test/expected/LWN/0000764321.html
@@ -0,0 +1,50 @@
+<!DOCTYPE html>
+<html>
+<head>
+  <meta http-equiv='content-type' content='text/html; charset=utf-8'>
+  <meta name='viewport' content='width=device-width, initial-scale=1.0'>
+  <style type='text/css'>
+    body { margin: 1em 15%; }
+  </style>
+</head>
+<body>
+<div class='story'>
+       <h1>The Hidden Benefit of Giving Back to Open Source Software (Working Knowledge)</h1>
+       <div class='details'>([Briefs] Sep 6, 2018 16:56 UTC (Thu) (corbet))</div>
+       <br/>
+    <ul>
+        <li>News link: <a href='https://lwn.net/Articles/764321/'>https://lwn.net/Articles/764321/</a></li>
+        <li>Source link: <a href=''></a></li>
+    </ul>
+       <br/>
+       <div class='content' style='text-align: justify'>
+               The Harvard Business School&#x27;s &quot;Working Knowledge&quot; site has [1]an article arguing that it can pay for companies to allow their developers to contribute back to the projects whose software they use. &quot; And that presents an interesting dilemma for firms that rely heavily on open source. Should they allow employees on company time to make updates and edits to the software for community use that could be used by competitors? New research by Assistant Professor Frank Nagle, a member of the Strategy Unit at Harvard Business School, shows that paying employees to contribute to such software boosts the company’s productivity from using the software by as much as 100 percent, when compared with free-riding competitors. &quot;<br/><br/><br/><br/>[1] https://hbswk.hbs.edu/item/the-hidden-benefit-of-giving-back-to-open-source-software
+       </div>
+<hr/>
+  <div class='comment' style='display: block; margin-left: 80px'>
+    <h2>The Hidden Benefit of Giving Back to Open Source Software (Working Knowledge)</h2>
+    <div class='by' style='font-style: italic;'></div>
+    <div class='comment_content'><p></p><p>This is no surprise to me. Most of the open source software improvements that might help a competitor are too general in nature to really be giving the other guys a competitive advantage.</p><p></p><p>For instance, if Lyft contributed Linux kernel or PHP or Apache or whatever fixes, the benefit to Lyft of having that improved expertise far exceeds the general benefit to competitor Uber.</p><p></p></div>
+    <div class='comment' style='display: block; margin-left: 80px'>
+      <h2>The Hidden Benefit of Giving Back to Open Source Software (Working Knowledge)</h2>
+      <div class='by' style='font-style: italic;'></div>
+      <div class='comment_content'><p></p><p>This is no surprise to me. Most of the open source software improvements that might help a competitor are too general in nature to really be giving the other guys a competitive advantage.</p><p></p><p>For instance, if Lyft contributed Linux kernel or PHP or Apache or whatever fixes, the benefit to Lyft of having that improved expertise far exceeds the general benefit to competitor Uber.</p><p></p></div>
+    </div>
+    <div class='comment' style='display: block; margin-left: 80px'>
+      <h2>The Hidden Benefit of Giving Back to Open Source Software (Working Knowledge)</h2>
+      <div class='by' style='font-style: italic;'></div>
+      <div class='comment_content'><p></p><p>Even having to debate it seems so farcical. If you're worried about people who "do the same thing", the software they use is not the main differentiator. How your company is organized, how you treat your people and your customers, how you organized projects etc are huge, and software is ultimately minor. Fixes and changes to software? Incredibly minor.</p><p></p></div>
+    </div>
+    <div class='comment' style='display: block; margin-left: 80px'>
+      <h2>The Hidden Benefit of Giving Back to Open Source Software (Working Knowledge)</h2>
+      <div class='by' style='font-style: italic;'></div>
+      <div class='comment_content'><p></p><p>Perhaps this is too dismissive, as there is the part about letting your programmers do their job to the best of their ability. That seems pretty big.</p><p></p></div>
+      <div class='comment' style='display: block; margin-left: 80px'>
+        <h2>The Hidden Benefit of Giving Back to Open Source Software (Working Knowledge)</h2>
+        <div class='by' style='font-style: italic;'></div>
+        <div class='comment_content'><p></p><p>Perhaps this is too dismissive, as there is the part about letting your programmers do their job to the best of their ability. That seems pretty big.</p><p></p></div>
+      </div>
+    </div>
+  </div>
+</div>
+</body>
diff --git a/test/source/LWN/Articles/763252.html b/test/source/LWN/Articles/763252.html

new file mode 100644 (file)

index 0000000..6658210
--- /dev/null
+++ b/test/source/LWN/Articles/763252.html
@@ -0,0 +1,1864 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
+        "http://www.w3.org/TR/html4/loose.dtd">
+        <html>
+        <head><title>LWN.net Weekly Edition for August 30, 2018 [LWN.net]</title>
+        <link rel="next" href="/Articles/763254/"/>
+<meta name="twitter:card" content="summary" />
+<meta name="twitter:site" content="@lwnnet" />
+<meta name="twitter:title" content="LWN.net Weekly Edition for August 30, 2018" />
+<meta name="twitter:description" content="Julia; C considered dangerous; 4.19 Merge window; I/O controller throughput; KDE onboarding; Dat." />
+<meta name="viewport" content="width=device-width, initial-scale=1">
+<meta HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=utf-8">
+        <link rel="icon" href="/images/favicon.png" type="image/png">
+        <link rel="alternate" type="application/rss+xml" title="LWN.net headlines" href="https://lwn.net/headlines/newrss">
+        <link rel="stylesheet" href="/CSS/lwn">
+<link rel="stylesheet" href="/CSS/nosub">
+<link rel="stylesheet" href="/CSS/pure-min">
+           <!--[if lte IE 8]>
+             <link rel="stylesheet" href="/CSS/grids-responsive-old-ie-min">
+           <![endif]-->
+           <!--[if gt IE 8]><!-->
+             <link rel="stylesheet" href="/CSS/grids-responsive-min">
+           <!--<![endif]-->
+           <link rel="stylesheet" href="/CSS/pure-lwn">
+           
+        
+<script type="text/javascript">var p="http",d="static";if(document.location.protocol=="https:"){p+="s";d="engine";}var z=document.createElement("script");z.type="text/javascript";z.async=true;z.src=p+"://"+d+".adzerk.net/ados.js";var s=document.getElementsByTagName("script")[0];s.parentNode.insertBefore(z,s);</script>
+<script type="text/javascript">
+var ados_keywords = ados_keywords || [];
+if( location.protocol=='https:' ) {
+        ados_keywords.push('T:SSL');
+} else {
+        ados_keywords.push('T:HTTP');
+}
+
+var ados = ados || {};
+ados.run = ados.run || [];
+ados.run.push(function() {
+
+ados_add_placement(4669, 20979, "azk13321_leaderboard", 4).setZone(16026);
+
+ados_add_placement(4669, 20979, "azk93271_right_zone", [5,10,6]).setZone(16027);
+
+ados_add_placement(4669, 20979, "azk31017_tracking", 20).setZone(20995);
+
+
+
+ados_setKeywords(ados_keywords.join(', ')); 
+ados_load();
+});</script>
+
+        </head>
+        <body bgcolor="#ffffff" link="Blue" VLINK="Green" alink="Green">
+        <a name="t"></a>
+<div id="menu"><a href="/"><img src="https://static.lwn.net/images/logo/barepenguin-70.png" class="logo"
+                 border="0" alt="LWN.net Logo">
+           <font class="logo">LWN<br>.net</font>
+           <font class="logobl">News from the source</font></a>
+           <a href="/"><img src="https://static.lwn.net/images/lcorner-ss.png" class="sslogo"
+                 border="0" alt="LWN"></a><div class="navmenu-container">
+           <ul class="navmenu">
+        <li><a class="navmenu" href="#t"><b>Content</b></a><ul><li><a href="/current/">Weekly Edition</a></li><li><a href="/Archives/">Archives</a></li><li><a href="/Search/">Search</a></li><li><a href="/Kernel/">Kernel</a></li><li><a href="/Security/">Security</a></li><li><a href="/Distributions/">Distributions</a></li><li><a href="/Calendar/">Events calendar</a></li><li><a href="/Comments/unread">Unread comments</a></li><li><hr></li><li><a href="/op/FAQ.lwn">LWN FAQ</a></li><li><a href="/op/AuthorGuide.lwn">Write for us</a></li></ul></li>
+<li><a class="navmenu" href="#t"><b>Edition</b></a><ul><li><a href="/Articles/763252/">⇒Front page</a></li><li><a href="/Articles/763254/">Brief items</a></li><li><a href="/Articles/763255/">Announcements</a></li><li><a href="/Articles/763252/bigpage">One big page</a></li><li><a href="/Articles/762816/">Previous week</a></li><li><a href="/Articles/763789/">Following week</a></li></ul></li>
+</ul></div>
+</div> <!-- menu -->
+<div class="pure-g not-handset" style="margin-left: 10.5em">
+           <div class="not-print">
+             <div id="azk13321_leaderboard"></div>
+           </div>
+           </div>
+        <div class="topnav-container">
+<div class="not-handset"><form action="https://lwn.net/Login/" method="post" name="loginform"
+                 class="loginform">
+        <b>User:</b> <input type="text" name="Username" value="" size="8" /> <b>Password:</b> <input type="password" name="Password" size="8" /> <input type="hidden" name="target" value="/Articles/763252/" /> <input type="submit" name="submit" value="Log in" /></form> |
+           <form action="https://lwn.net/subscribe/" method="post" class="loginform">
+           <input type="submit" name="submit" value="Subscribe" />
+           </form> |
+           <form action="https://lwn.net/Login/newaccount" method="post" class="loginform">
+           <input type="submit" name="submit" value="Register" />
+           </form>
+        </div>
+               <div class="handset-only">
+               <a href="/subscribe/"><b>Subscribe</b></a> /
+               <a href="/Login/"><b>Log in</b></a> /
+               <a href="/Login/newaccount"><b>New account</b></a>
+               </div>
+               </div><div class="pure-grid maincolumn">
+<div class="lwn-u-1 pure-u-md-19-24">
+<div class="PageHeadline">
+<h1>LWN.net Weekly Edition for August 30, 2018</h1>
+</div>
+<div class="ArticleText">
+<a name="763743"></a><h2 class="SummaryHL"><a href="/Articles/763743/">Welcome to the LWN.net Weekly Edition for August 30, 2018</a></h2>
+
+This edition contains the following feature content:
+                   <p>
+                   <ul class="spacylist">
+
+  <li> <a href="/Articles/763626/">An introduction to the Julia language,
+  part 1</a>:  Julia is a language designed for intensive numerical
+  calculations; this article gives an overview of its core features.
+
+  <li> <a href="/Articles/763641/">C considered dangerous</a>: a Linux
+  Security Summit talk on what is being done to make the use of C in the
+  kernel safer.
+
+  <li> <a href="/Articles/763106/">The second half of the 4.19 merge
+  window</a>:  the final features merged (or not merged) before the merge
+  window closed for this cycle.
+
+
+  <li> <a href="/Articles/763603/">Measuring (and fixing) I/O-controller
+  throughput loss</a>: the kernel's I/O controllers can provide useful
+  bandwidth guarantees, but at a significant cost in throughput.
+
+  <li> <a href="/Articles/763175/">KDE's onboarding initiative, one year
+  later</a>: what has gone right in KDE's effort to make it easier for
+  contributors to join the project, and what remains to be done.
+
+  <li> <a href="/Articles/763492/">Sharing and archiving data sets with
+  Dat</a>:  an innovative approach to addressing and sharing data on the
+  net. 
+
+</ul>
+
+<p>
+                       This week's edition also includes these inner pages:
+                       <p>
+                       <ul class="spacylist">
+
+<li> <a href="/Articles/763254/">Brief items</a>: Brief news items from throughout the community.
+
+<li> <a href="/Articles/763255/">Announcements</a>: Newsletters, conferences, security updates, patches, and more.
+
+</ul>
+                       <p>
+                       Please enjoy this week's edition, and, as always, thank you for
+                       supporting LWN.net.
+<p><a href="/Articles/763743/#Comments">Comments (none posted)</a>
+<p>
+<a name="763626"></a><h2 class="SummaryHL"><a href="/Articles/763626/">An introduction to the Julia language, part 1</a></h2>
+
+<div class="GAByline">
+           <p>August 28, 2018</p>
+           <p>This article was contributed by Lee Phillips</p>
+           </div>
+<p><a href="http://julialang.org/">Julia</a> is a young computer language
+aimed at serving the needs of scientists, engineers, and other
+practitioners of numerically intensive programming. It was first publicly
+released in 2012. After an intense period of language development, version
+1.0 was <a
+href="https://julialang.org/blog/2018/08/one-point-zero">released</a> on
+August&nbsp;8. The 1.0 release promises years of language
+stability; users can be confident that developments in the 1.x series will
+not break their code. 
+    This is the first part of a two-part article introducing the world of Julia.
+    This part will introduce enough of the language syntax and constructs to
+    allow you to begin to write simple programs. The following installment will
+    acquaint you with the additional pieces needed to create real projects, and to
+    make use of Julia's ecosystem.
+
+<h4>Goals and history</h4>
+
+<p>The Julia project has ambitious goals. It wants the language to perform
+about as well as Fortran or C when running numerical algorithms, while
+remaining as pleasant to program in as Python. I believe the project has
+met these goals and is poised to see increasing adoption by numerical
+researchers, especially now that an official, stable release is
+available.</p>
+
+<p>The Julia project maintains a <a
+href="https://julialang.org/benchmarks/">micro-benchmark page</a> that compares its
+numerical performance against both statically compiled languages (C,
+Fortran) and dynamically typed languages (R, Python). While it's certainly
+possible to argue about the relevance and fairness of particular
+benchmarks, the data overall supports the Julia team's contention that Julia
+has generally achieved parity with Fortran and C; the benchmark
+source code is available.</p>
+
+<p>Julia began as research in computer science at MIT; its creators are
+Alan Edelman, Stefan Karpinski, Jeff Bezanson, and Viral Shah. These four
+ remain active developers of the language. They, along with Keno Fischer,
+co-founder and CTO of <a href="https://juliacomputing.com/">Julia
+Computing</a>, were kind enough to share their thoughts with us about
+the language.  I'll be drawing
+on their comments later on; for now, let's get a taste of
+what Julia code looks like.</p>
+
+<h4>Getting started</h4>
+
+<p>To explore Julia initially, start up its standard <a
+href="https://en.wikipedia.org/wiki/Read%E2%80%93eval%E2%80%93print_loop">read-eval-print
+loop</a> (REPL)
+by typing <code>julia</code> at the terminal, assuming that you have installed
+it. You will then be
+able to interact with what will seem to be an interpreted language — but,
+behind the scenes, those commands are being compiled by a 
+just-in-time (JIT) compiler that uses the <a href="http://llvm.org/">LLVM
+compiler framework</a>. This allows Julia to be interactive, while turning
+the code into fast, native machine instructions. However, the JIT compiler
+passes sometimes introduce noticeable delays at the REPL, especially when
+using a function for the first time.</p>
+
+<p>To run a Julia program non-interactively, execute a command like:
+<pre>
+    $ julia script.jl &lt;args&gt;
+</pre>
+ 
+<p>Julia has all the usual data structures: numbers of various types
+(including complex and rational numbers), multidimensional arrays,
+dictionaries, strings, and characters. Functions are first-class: they can
+be passed as arguments to other functions, can be members of arrays,
+and so on.</p>
+
+<p>Julia embraces Unicode. Strings, which are enclosed in double quotes,
+are arrays of Unicode characters, which are enclosed in single quotes. The
+&quot;<tt>*</tt>&quot; operator is used for string and character concatenation. Thus
+'a' and 'β' are characters, and 'aβ' is a syntax error. &quot;a&quot; and
+&quot;β&quot; are strings, as are &quot;aβ&quot;, 'a' * 'β', and
+&quot;a&quot; * &quot;β&quot; — all evaluate to the same string.
+
+<p>Variable and function names can contain non-ASCII characters. This, along
+with Julia's clever syntax that understands numbers prepended to variables
+to mean multiplication, goes a long way to allowing the numerical scientist
+to write code that more closely resembles the compact mathematical notation
+of the equations that usually lie behind it.</p>
+
+<pre>
+    julia&gt; ε₁ = 0.01
+    0.01
+
+    julia&gt; ε₂ = 0.02
+    0.02
+
+    julia&gt; 2ε₁ + 3ε₂
+    0.08
+</pre>
+
+<p>And where does Julia come down on the age-old debate of what do about
+<tt>1/2</tt>? In Fortran and Python&nbsp;2, this will get you 0, since 1 and 2 are
+integers, and the result is rounded down to the integer 0. This was deemed
+inconsistent, and confusing to some, so it was changed in Python&nbsp;3 to
+return 0.5 — which is what you 
+get in Julia, too.</p>
+
+<p>While we're on the subject of fractions, Julia can handle rational
+numbers, with a special syntax: <tt>3//5&nbsp;+&nbsp;2//3</tt> returns
+<tt>19//15</tt>, while <tt>3/5&nbsp;+&nbsp;2/3</tt> 
+gets you the floating-point answer 1.2666666666666666. Internally, Julia
+thinks of a rational number in its reduced form, so the expression
+<tt>6//8&nbsp;==&nbsp;3//4</tt> returns <code>true</code>, and <code>numerator(6//8)</code> returns
+<code>3</code>.</p>
+
+<h4>Arrays</h4>
+
+<p>Arrays are enclosed in square brackets and indexed with an iterator that
+can contain a step value:</p>
+
+<pre>
+    julia&gt; a = [1, 2, 3, 4, 5, 6]
+    6-element Array{Int64,1}:
+     1
+     2
+     3
+     4
+     5
+     6
+
+    julia&gt; a[1:2:end]
+    3-element Array{Int64,1}:          
+     1
+     3
+     5
+</pre>
+
+<p>As you can see, indexing starts at one, and the useful <code>end</code>
+index means the obvious thing. When you define a variable in the REPL,
+Julia replies with the type and value of the assigned data; you can suppress this output by ending your input line with a semicolon.</p>
+
+<p>Since arrays are such a vital part of numerical computation, and Julia
+makes them easy to work with, we'll spend a bit more time with them than the other data structures.</p>
+
+<p>To illustrate the syntax, we can start with a couple of 2D arrays, defined at the REPL:</p>
+
+<pre>
+    julia&gt; a = [1 2 3; 4 5 6]
+    2×3 Array{Int64,2}:
+     1  2  3
+     4  5  6
+
+    julia&gt; z = [-1 -2 -3; -4 -5 -6];
+</pre>
+
+<p>Indexing is as expected:</p>
+
+<pre>
+    julia&gt; a[1, 2]
+    2
+</pre>
+
+<p>You can glue arrays together horizontally:</p>
+
+<pre>
+    julia&gt; [a z]
+    2×6 Array{Int64,2}:
+     1  2  3  -1  -2  -3
+     4  5  6  -4  -5  -6
+</pre>
+
+<p>And vertically:</p>
+
+<pre>
+    julia&gt; [a; z]
+    4×3 Array{Int64,2}:
+      1   2   3
+      4   5   6
+     -1  -2  -3
+     -4  -5  -6
+</pre>
+
+<p>Julia has all the usual operators for handling arrays, and <a
+href="http://www.3blue1brown.com/essence-of-linear-algebra-page/">linear
+algebra</a> functions that work with matrices (2D arrays). The linear
+algebra functions are part of Julia's standard library, but need to be
+imported with a command like "<code>using LinearAlgebra</code>", which is a detail
+omitted from the current documentation. The functions include such things as
+determinants, matrix inverses, eigenvalues and eigenvectors, many kinds of
+matrix factorizations, etc. Julia has not reinvented the wheel here, but
+wisely uses the <a href="http://www.netlib.org/lapack/">LAPACK</a> Fortran
+library of battle-tested linear algebra routines.</p>
+
+<p>The extension of arithmetic operators to arrays is usually intuitive:</p>
+
+<pre>
+    julia&gt; a + z
+    2×3 Array{Int64,2}:
+     0  0  0
+     0  0  0
+</pre>
+
+<p>And the numerical prepending syntax works with arrays, too:</p>
+
+<pre>
+    julia&gt; 3a + 4z
+    2×3 Array{Int64,2}:
+     -1  -2  -3
+     -4  -5  -6
+</pre>
+
+<p>Putting a multiplication operator between two matrices gets you matrix
+multiplication:</p>
+
+<pre>
+    julia&gt; a * transpose(a)
+    2×2 Array{Int64,2}:
+     14  32
+     32  77
+</pre>
+
+<p>You can &quot;broadcast&quot; numbers to cover all the elements in an
+array by prepending the usual arithmetic operators with a dot:</p>
+
+<pre>
+    julia&gt; 1 .+ a
+    2×3 Array{Int64,2}:
+     2  3  4
+     5  6  7
+</pre>
+
+<p>Note that the language only actually requires the dot for some
+operators, but not for others, such as &quot;*&quot; and &quot;/&quot;. The
+reasons for this are arcane, and it probably makes sense to be consistent
+and use the dot whenever you intend broadcasting. Note also that the
+current version of the official documentation is incorrect in claiming that
+you may omit the dot from &quot;+&quot; and &quot;-&quot;; in fact, this
+now gives an error.</p>
+
+<p>You can use the dot notation to turn any function into one that operates
+on each element of an array:</p>
+
+<pre>
+    julia&gt; round.(sin.([0, π/2, π, 3π/2, 2π]))
+    5-element Array{Float64,1}:
+      0.0
+      1.0
+      0.0
+     -1.0
+     -0.0
+</pre>
+
+<p>The example above illustrates chaining two dotted functions
+together. The Julia compiler turns expressions like this into
+&quot;fused&quot; operations: instead of applying each function in turn to
+create a new array that is passed to the next function, the compiler
+combines the functions into a single compound function that is applied once
+over the array, creating a significant optimization.</p>
+
+<p>You can use this dot notation with any function, including your own, to
+turn it into a version that operates element-wise over arrays.</p>
+
+<p>Dictionaries (associative arrays) can be defined with several
+syntaxes. Here's one:</p>
+
+<pre>
+    julia&gt; d1 = Dict(&quot;A&quot;=&gt;1, &quot;B&quot;=&gt;2)
+    Dict{String,Int64} with 2 entries:
+      &quot;B&quot; =&gt; 2
+      &quot;A&quot; =&gt; 1
+</pre>
+
+<p>You may have noticed that the code snippets so far have not included any
+type declarations. Every value in Julia has a type, but the compiler will
+infer types if they are not specified. It is generally not necessary to
+declare types for performance, but type declarations sometimes serve other
+purposes, that we'll return to later. Julia has a deep and sophisticated
+type system, including user-defined types and C-like structs. Types can
+have behaviors associated with them, and can inherit behaviors from other
+types. The best thing about Julia's type system is that you can ignore it
+entirely, use just a few pieces of it, or spend weeks studying its
+design.</p>
+
+<h4>Control flow</h4>
+
+<p>Julia code is organized in blocks, which can indicate control flow,
+function definitions, and other code units. Blocks are terminated with the
+<code>end</code> keyword, and indentation is not significant. Statements
+are separated either with newlines or semicolons.</p>
+
+<p>Julia has the typical control flow constructs; here is a
+<code>while</code> block:</p>
+
+<pre>
+    julia&gt; i = 1;
+
+    julia&gt; while i &lt; 5
+              print(i)
+              global i = i + 1
+          end
+    1234
+</pre>
+
+<p>Notice the <code>global</code> keyword. Most blocks in Julia introduce a
+local scope for variables; without this keyword here, we would get an error
+about an undefined variable.</p>
+
+<p>Julia has the usual <code>if</code> statements and <code>for</code>
+loops that use the same iterators that we introduced above for array
+indexing. We can also iterate over collections:</p>
+
+<pre>
+    julia&gt; for i ∈ [&#39;a&#39;, &#39;b&#39;, &#39;c&#39;]
+              println(i)
+          end
+    a
+    b
+    c
+</pre>
+
+<p>In place of the fancy math symbol in this <code>for</code> loop, we can
+use &quot;<tt>=</tt>&quot; or &quot;<tt>in</tt>&quot;. If you want to use
+the math symbol but 
+have no convenient way to type it, the REPL will help you: type
+&quot;<tt>\in</tt>&quot; and the TAB key, and the symbol appears; you can type many
+<a href="/Articles/657157/">LaTeX</a> expressions into the
+REPL in this way.</p>
+
+<h4>Development of Julia</h4>
+
+<p>The language is developed on GitHub, with over 700 contributors. The
+Julia team mentioned in their email to us that the decision to use GitHub
+has been particularly good for Julia, as it streamlined the process for
+many of their contributors, who are scientists or domain experts in various
+fields, rather than professional software developers.</p>
+
+<p>The creators of Julia have <a
+href="https://julialang.org/publications/julia-fresh-approach-BEKS.pdf">published
+[PDF]</a>
+a detailed “mission statement” for the language, describing their aims and
+motivations. A key issue that they wanted their language to solve is what
+they called the &quot;two-language problem.&quot; This situation is
+familiar to anyone who has used Python or another dynamic language on a
+demanding numerical problem. To get good performance, you will wind up
+rewriting the numerically intensive parts of the program in C or Fortran,
+dealing with the interface between the two languages, and may still be
+disappointed in the overhead presented by calling the foreign routines from
+your original code.
+
+<p>
+For Python, <a
+href="/Articles/738915/">NumPy and SciPy</a> wrap many
+numerical routines, written in Fortran or C, for efficient use from that
+language, but you can only take advantage of this if your calculation fits
+the pattern of an available routine; in more general cases, where you will
+have to write a loop over your data, you are stuck with Python's native
+performance, which is orders of magnitude slower. If you switch to an
+alternative, faster implementation of Python, such as <a
+href="https://pypy.org/">PyPy</a>, the numerical libraries may not be
+compatible; NumPy became available for PyPy only within about the past
+year.</p>
+
+<p>Julia solves the two-language problem by being as expressive and simple
+to program in as a dynamic scripting language, while having the native
+performance of a static, compiled language. There is no need to write
+numerical libraries in a second language, but C or Fortran library routines
+can be called using a facility that Julia has built-in. Other languages,
+such as <a href="https://github.com/JuliaPy/PyCall.jl">Python</a> or <a
+href="https://github.com/JuliaInterop/RCall.jl">R</a>, can also interoperate
+easily with Julia using external packages.</p>
+
+<h4>Documentation</h4>
+
+<p>There are many resources to turn to to learn the language. There is an
+extensive and detailed <a
+href="https://docs.julialang.org/en/stable/">manual</a> at Julia
+headquarters, and this may be a good place to start. However, although the
+first few chapters provide a gentle introduction, the material soon becomes
+dense and, at times, hard to follow, with references to concepts that are
+not explained until later chapters. Fortunately, there is a <a
+href="https://julialang.org/learning/">&quot;learning&quot; link</a> at the
+top of the Julia home page, which takes you to a long list of videos,
+tutorials, books, articles, and classes both about Julia and that use Julia
+in teaching subjects such a numerical analysis. There is also a fairly good
+ <a
+href="http://bogumilkaminski.pl/files/julia_express.pdf">cheat-sheet [PDF]</a>, which was
+just updated for v. 1.0.</p>
+
+<p>If you're coming from Python, <a
+href="https://docs.julialang.org/en/stable/manual/noteworthy-differences/#Noteworthy-differences-from-Python-1">this
+list</a> of noteworthy differences between Python and Julia syntax will
+probably be useful.</p>
+
+<p>Some of the linked tutorials are in the form of <a
+href="https://lwn.net/Articles/746386/">Jupyter notebooks</a> — indeed,
+the name "Jupyter" is formed from "Julia",
+"Python", and "R", which are the three original languages supported by
+the interface. The <a href="https://github.com/JuliaLang/IJulia.jl">Julia
+kernel for Jupyter</a> was recently upgraded to support v. 1.0. Judicious
+sampling of a variety of documentation sources, combined with liberal
+experimentation, may be the best way of learning the language. Jupyter
+makes this experimentation more inviting for those who enjoy the web-based
+interface, but the REPL that comes with Julia helps a great deal in this
+regard by providing, for instance, TAB completion and an extensive help
+system invoked by simply pressing the &quot;?&quot; key.</p>
+
+<h4>Stay tuned</h4>
+
+<p>
+    The <a href="/Articles/764001/">next installment</a> in this two-part series will explain how Julia is
+    organized around the concept of "multiple dispatch". You will learn how to
+    create functions and make elementary use of Julia's type system. We'll see how
+    to install packages and use modules, and how to make graphs. Finally, Part 2
+    will briefly survey the important topics of macros and distributed computing.
+<p><a href="/Articles/763626/#Comments">Comments (80 posted)</a>
+<p>
+<a name="763641"></a><h2 class="SummaryHL"><a href="/Articles/763641/">C considered dangerous</a></h2>
+
+<div class="FeatureByline">
+           By <b>Jake Edge</b><br>August 29, 2018
+           <hr>
+<a href="/Archives/ConferenceByYear/#2018-Linux_Security_Summit_NA">LSS NA</a>
+</div>
+<p>
+At the North America edition of the <a
+href="https://events.linuxfoundation.org/events/linux-security-summit-north-america-2018/">2018
+Linux Security Summit</a> (LSS NA), which was held in late August in Vancouver,
+Canada, Kees Cook gave a presentation on some of the dangers that come with
+programs written in C.  In particular, of course, the Linux kernel is
+mostly written in C, which means that the security of our systems rests on
+a somewhat dangerous foundation.  But there are things that can be done to
+help firm things up by "<span>Making C Less Dangerous</span>" as the title
+of his talk suggested.
+</p>
+
+<p>
+He began with a brief summary of the work that he and others are doing as
+part of the <a
+href="https://kernsec.org/wiki/index.php/Kernel_Self_Protection_Project">Kernel
+Self Protection Project</a> (KSPP).  The goal of the project is to get
+kernel protections merged into the mainline.  These protections are not
+targeted at protecting user-space processes from other (possibly rogue)
+processes, but are, instead, focused on protecting the kernel from
+user-space code.  There are around 12 organizations and ten individuals
+working on roughly 20 different technologies as part of the KSPP, he said.   The
+progress has been "slow and steady", he said, which is how he thinks it
+should go.
+</p>
+
+<a href="/Articles/763644/">
+<img src="https://static.lwn.net/images/2018/lssna-cook-sm.jpg" border=0 hspace=5 align="right"
+alt="[Kees Cook]" title="Kees Cook" width=214 height=300>
+</a>
+
+<p>
+One of the main problems is that C is treated mostly like a fancy assembler.
+The kernel developers do this because they want the kernel to be as fast
+and as small as possible.  There are other reasons, too, such as the need to do
+architecture-specific tasks that lack a C API (e.g. setting up page tables,
+switching to 64-bit mode).
+</p>
+
+<p>
+But there is lots of undefined behavior in C.  This "operational baggage"
+can lead to various problems.  In addition, C has a weak standard library
+with multiple utility functions that have various pitfalls.  In C, the content
+of uninitialized automatic variables is undefined, but in the machine code that it
+gets translated to, the value is whatever happened to be in that memory
+location before.  In C, a function pointer can be called even if the type
+of the pointer does not match the type of the function being
+called—assembly doesn't care, it just jumps to a location, he said.
+</p>
+
+<p>
+The APIs in the standard library are also bad in many cases.  He asked: why
+is there no argument to <tt>memcpy()</tt> to specify the maximum
+destination length?  He noted a recent <a
+href="https://raphlinus.github.io/programming/rust/2018/08/17/undefined-behavior.html">blog
+post</a> from Raph Levien entitled "With Undefined Behavior, Anything is
+Possible".  That obviously resonated with Cook, as he pointed out his
+T-shirt—with the title and artwork from the post.
+</p>
+
+<h4>Less danger</h4>
+
+<p>
+He then moved on to some things that kernel developers can do (and are
+doing) to get away from some of the dangers of C.  He began with
+variable-length arrays (VLAs), which can be used to overflow the stack to
+access 
+data outside of its region.  Even if the stack has a guard page, VLAs can
+be used to jump past it to write into other memory, which can then be used
+by some other kind of attack.  The C language is "perfectly fine with
+this".  It is easy to find uses of VLAs with the <tt>-Wvla</tt> flag, however.
+</p>
+
+<p>
+But it turns out that VLAs are <a href="/Articles/749064/">not just bad
+from a security perspective</a>, 
+they are also slow.  In a micro-benchmark associated with a <a
+href="https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=02361bc77888">patch
+removing a VLA</a>, a 13% performance boost came from using a fixed-size
+array.  He dug in a bit further and found that much more code is being
+generated to handle a VLA, which explains the speed increase.  Since Linus
+Torvalds has <a
+href="https://lore.kernel.org/lkml/CA+55aFzCG-zNmZwX4A2FQpadafLfEzK6CC=qPXydAacU1RqZWA@mail.gmail.com/T/#u">declared
+that VLAs should be removed</a> from the kernel because they cause security
+problems and also slow the kernel down; Cook said "don't use VLAs".
+</p>
+
+<p>
+Another problem area is <tt>switch</tt> statements, in particular where
+there is no <tt>break</tt> for a <tt>case</tt>.  That could mean that the
+programmer expects and wants to fall through to the next case or it could
+be that the <tt>break</tt> was simply forgotten.  There is a way to get a
+warning from the compiler for fall-throughs, but there needs to be a way to
+mark those that are truly meant to be that way.  A special fall-through
+"statement" in the form of a comment is what has been agreed on within the
+static-analysis community.  He and others have been going through each of
+the places where there is no <tt>break</tt> to add these comments (or a
+<tt>break</tt>); they 
+have "found a lot of bugs this way", he said.
+</p>
+
+<p>
+Uninitialized local variables will generate a warning, but not if the
+variable is passed in by reference.  There are some GCC plugins that will
+automatically initialize these variables, but there are also patches for
+both GCC and Clang to provide a compiler option to do so.  Neither of those
+is upstream yet, but Torvalds has praised the effort so the kernel would
+likely use the option.  An interesting side
+effect that came about while investigating this was a warning he got about
+unreachable code when he 
+enabled the auto-initialization.  There were two variables declared just
+after a <tt>switch</tt> (and outside of any <tt>case</tt>), where they
+would never be reached.
+</p>
+
+<p>
+Arithmetic overflow is another undefined behavior in C that can cause various
+ problems.  GCC can check for signed overflow, which performs well
+(the overhead is in the noise, he said), but adding warning messages for it does grow
+the kernel by 6%;  making the overflow abort, instead, only adds 0.1%.
+Clang can check for both signed and unsigned overflow; signed overflow is
+undefined, while unsigned overflow is defined, but often unexpected.
+Marking places where unsigned overflow is expected is needed; it
+would be nice to get those annotations put into the kernel, Cook said.
+</p>
+
+<p>
+Explicit bounds checking is expensive.  Doing it for
+<tt>copy_{to,from}_user()</tt> is a less than 1% performance hit, but
+adding it to the <tt>strcpy()</tt> and <tt>memcpy()</tt> families are
+around a 2% hit.  Pre-Meltdown that would have been a totally impossible
+performance regression for security, he said; post-Meltdown, since it is
+less than 5%, maybe there is a chance to add this checking.
+</p>
+
+<p>
+Better APIs would help as well.  He pointed to the evolution of
+<tt>strcpy()</tt>, through <tt>str<b>n</b>cpy()</tt> and
+<tt>str<b>l</b>cpy()</tt> (each with their own bounds flaws) to
+<tt>str<b>s</b>cpy()</tt>, which seems to be "OK so far".  He also mentioned
+<tt>memcpy()</tt> again as a poor API with respect to bounds checking.
+</p>
+
+<p>
+Hardware support for bounds checking is available in the application
+data integrity (ADI) feature for SPARC and is coming for Arm; it may also be
+available for Intel processors at some point.  These all use a form of
+"memory tagging", where allocations get a tag that is stored in the
+high-order byte of the address.   An offset from the address can be checked
+by the hardware to see if it still falls within the allocated region based
+on the tag.
+</p>
+
+<p>
+Control-flow integrity (CFI) has become more of an issue lately because
+much of what attackers had used in the past has been marked as "no execute"
+so they are turning to using existing code "gadgets" already present in the
+kernel by hijacking existing indirect function calls.  In C, you can just call
+pointers without regard to the type as it just treats them as an 
+address to jump to.  Clang has a CFI-sanitize feature that enforces the
+function prototype to restrict the calls that can be made.  It is done at
+runtime and is not perfect, in part because there are lots of functions in
+the kernel that take one unsigned long parameter and return an unsigned long.
+</p>
+
+<p>
+Attacks on CFI have both a "forward edge", which is what CFI sanitize
+tries to handle, and a "backward edge" that comes from manipulating the stack
+values, the return address in particular.  Clang has two methods available
+to prevent the stack manipulation.  The first is the "safe stack", which
+puts various important items (e.g. "safe" variables, register spills, and
+the return address) on a separate stack.  Alternatively, the "shadow stack"
+feature creates a separate stack just for return addresses.
+</p>
+
+<p>
+One problem with these other stacks is that they are still writable, so if
+an attacker can find them in memory, they can still perform their attacks.
+Hardware-based protections, like Intel's Control-Flow Enforcement
+Technology (CET), <a href="/Articles/758245/">provides a read-only shadow
+call stack</a> for return addresses.  Another hardware protection is <a
+href="/Articles/718888/">pointer authentication</a> for Arm, which adds a
+kind of encrypted tag to the return address that can be verified before it
+is used.
+</p>
+
+<h4>Status and challenges</h4>
+
+<p>
+Cook then went through the current status of handling these different
+problems in the kernel.  VLAs are almost completely gone, he said, just a
+few remain in the crypto subsystem; he hopes those VLAs will be gone by 4.20 (or
+whatever the number of the next kernel release turns out to be).  Once that
+happens, he plans to turn on <tt>-Wvla</tt> for the kernel build so that
+none creep back in.
+</p>
+
+<p>
+There has been steady progress made on marking fall-through cases in
+<tt>switch</tt> statements.  Only 745 remain to be handled of the 2311 that
+existed when this work started; each one requires scrutiny to determine
+what the author's intent is.  Auto-initialized local variables can be done
+using compiler plugins, but that is "not quite what we want", he said.
+More compiler support would be helpful there.  For arithmetic overflow, it
+would be nice to see GCC get support for the unsigned case, but memory
+allocations are now doing explicit overflow checking at this point. 
+</p>
+
+<p>
+Bounds checking has seen some "crying about performance hits", so we are
+waiting impatiently for hardware support, he said.  CFI forward-edge
+protection needs <a href="/Articles/744507/">link-time optimization</a>
+(LTO) support for Clang in the kernel, but it is currently working on
+Android.  For backward-edge mitigation, the Clang shadow call stack is
+working on Android, but we are impatiently waiting for hardware support for
+that too.
+</p>
+
+<p>
+There are a number of challenges in doing security development for the
+kernel, Cook said.  There are cultural boundaries due to conservatism
+within the kernel community; that requires patiently working and reworking
+features in order to get them upstream.  There are, of course, technical
+challenges because of the complexity of security changes;  those kinds of
+problems can be solved.  There are also resource limitations in terms of
+developers, testers, reviewers, and so on.  KSPP and the other kernel
+security developers are still making that "slow but steady" progress.
+</p>
+
+<p>
+Cook's <a href="https://outflux.net/slides/2018/lss/danger.pdf">slides
+[PDF]</a> are available for interested readers; before long, there should
+be a video available of the talk as well.
+
+<p>
+[I would like to thank LWN's travel sponsor, the Linux Foundation, for
+travel assistance to attend the Linux Security Summit in Vancouver.]
+<p><a href="/Articles/763641/#Comments">Comments (70 posted)</a>
+<p>
+<a name="763106"></a><h2 class="SummaryHL"><a href="/Articles/763106/">The second half of the 4.19 merge window</a></h2>
+
+<div class="FeatureByline">
+           By <b>Jonathan Corbet</b><br>August 26, 2018
+           </div>
+By the time Linus Torvalds <a href="/Articles/763497/">released
+4.19-rc1</a> and closed 
+the merge window for this development cycle, 12,317 non-merge
+changesets had found their way into the mainline; about 4,800 of those
+landed after <a href="/Articles/762566/">last week's summary</a> was
+written.  As tends to be the case 
+late in the merge window, many of those changes were fixes for the bigger
+patches that went in early, but there were also a number of new features
+added.  Some of the more significant changes include:
+<br clear="all">
+<p>
+
+<h4>Core kernel</h4>
+<p>
+<ul class="spacylist">
+
+<li> The full set of patches adding <a
+     href="/Articles/761118/">control-group awareness to the out-of-memory
+     killer</a> has <i>not</i> been merged due to ongoing disagreements,
+     but one piece of it has: there is a new <tt>memory.oom.group</tt>
+     control knob that will cause all processes within a control group to
+     be killed in an out-of-memory situation.
+<li> A new set of protections has been added to prevent an attacker from
+     fooling a program into writing to an existing file or FIFO.  An open
+     with the <tt>O_CREAT</tt> flag to a file or FIFO in a world-writable,
+     sticky 
+     directory (e.g. <tt>/tmp</tt>) will fail if the owner of the opening
+     process is not the owner of either the target file or the containing
+     directory.  This behavior, disabled by default, is controlled by the
+     new <tt>protected_regular</tt> and <tt>protected_fifos</tt> sysctl
+     knobs.
+
+</ul>
+
+<h4>Filesystems and block layer</h4>
+<p>
+<ul class="spacylist">
+
+<li> The dm-integrity device-mapper target can now use a separate device
+     for metadata storage.
+<li> EROFS, the "enhanced read-only filesystem", has been added to the
+     staging tree.  It is "<span>a lightweight read-only file system with
+     modern designs (eg. page-sized blocks, inline xattrs/data, etc.) for
+     scenarios which need high-performance read-only requirements,
+     eg. firmwares in mobile phone or LIVECDs</span>"
+<li> The new "metadata copy-up" feature in overlayfs will avoid copying a
+     file's contents to the upper layer on a metadata-only change.  See <a
+     href="https://git.kernel.org/linus/d5791044d2e5749ef4de84161cec5532e2111540">this
+     commit</a> for details.
+
+</ul>
+<p>
+
+<h4>Hardware support</h4>
+<p>
+<ul class="spacylist">
+
+<li> <b>Graphics</b>:
+     Qualcomm Adreno A6xx GPUs.
+
+<li> <b>Industrial I/O</b>:
+     Spreadtrum SC27xx series PMIC analog-to-digital converters,
+     Analog Devices AD5758 digital-to-analog converters,
+     Intersil ISL29501 time-of-flight sensors,
+     Silicon Labs SI1133 UV index/ambient light sensor chips, and
+     Bosch Sensortec BME680 sensors.
+
+
+<li> <b>Miscellaneous</b>:
+     Generic ADC-based resistive touchscreens,
+     Generic ASIC devices via the Google <a
+     href="/ml/linux-kernel/20180630000253.70103-1-sque@chromium.org/">Gasket
+     framework</a>,
+     Analog Devices ADGS1408/ADGS1409 multiplexers,
+     Actions Semi Owl SoCs DMA controllers,
+     MEN 16Z069 watchdog timers,
+     Rohm BU21029 touchscreen controllers,
+     Cirrus Logic CS47L35, CS47L85, CS47L90, and CS47L91 codecs,
+     Cougar 500k gaming keyboards,
+     Qualcomm GENI-based I2C controllers,
+     Actions Semiconductor Owl I2C controllers,
+     ChromeOS EC-based USBPD chargers, and
+     Analog Devices ADP5061 battery chargers.
+
+<li> <b>USB</b>:
+     Nuvoton NPCM7XX on-chip EHCI USB controllers,
+     Broadcom Stingray PCIe PHYs, and
+     Renesas R-Car generation 3 PCIe PHYs.
+
+<li> There is also a new subsystem for the abstraction of GNSS (global
+     navigation satellite systems — GPS, for example) receivers in the
+     kernel.  To date, such devices have been handled with an abundance of
+     user-space drivers; the hope is to bring some order in this area.
+     Support for u-blox and SiRFstar receivers has been added as well.
+
+</ul>
+
+
+<p>
+<h4>Kernel internal</h4>
+<p>
+<ul class="spacylist">
+
+<li> The <tt>__deprecated</tt> marker, used to mark interfaces that should
+     no longer be used, has been deprecated and removed from the kernel
+     entirely.  <a
+     href="https://git.kernel.org/linus/771c035372a036f83353eef46dbb829780330234">Torvalds
+     said</a>: "<span>They are not useful.  They annoy
+     everybody, and nobody ever does anything about them, because it's
+     always 'somebody elses problem'.  And when people start thinking that
+     warnings are normal, they stop looking at them, and the real warnings
+     that mean something go unnoticed.</span>"
+<li> The minimum version of GCC required by the kernel has been moved up to
+     4.6.
+
+</ul>
+<p>
+
+There are a couple of significant changes that failed to get in this time
+around, including the <a
+href="/Articles/745073/">XArray</a> data structure.  The patches are
+thought to be ready, but they had the bad luck to be based on a tree that
+failed to be merged for other reasons, so Torvalds <a
+href="/ml/linux-kernel/CA+55aFxFjAmrFpwQmEHCthHOzgidCKnod+cNDEE+3Spu9o1s3w@mail.gmail.com/">didn't
+even look at them</a>.  That, in turn, blocks another set of patches intended to
+enable migration of slab-allocated objects.
+<p>
+The other big deferral is the <a href="/Articles/759499/">new system-call
+API for filesystem mounting</a>.  Despite ongoing <a
+href="/Articles/762355/">concerns</a> about what happens when the same
+low-level device is mounted multiple times with conflicting options, Al
+Viro sent <a
+href="/ml/linux-fsdevel/20180823223145.GK6515@ZenIV.linux.org.uk/">a pull
+request</a> to send this work upstream.  The ensuing discussion made it
+clear that there is still not a consensus in this area, though, so it seems
+that this work has to wait for another cycle.
+<p>
+Assuming all goes well, the kernel will stabilize over the coming weeks and
+the final 4.19 release will happen in mid-October.
+<p><a href="/Articles/763106/#Comments">Comments (1 posted)</a>
+<p>
+<a name="763603"></a><h2 class="SummaryHL"><a href="/Articles/763603/">Measuring (and fixing) I/O-controller throughput loss</a></h2>
+
+<div class="GAByline">
+           <p>August 29, 2018</p>
+           <p>This article was contributed by Paolo Valente</p>
+           </div>
+<p>Many services, from web hosting and video streaming to cloud storage,
+need to move data to and from storage.  They also often require that each per-client
+I/O flow be guaranteed a non-zero amount of bandwidth and a bounded latency. An
+expensive way to provide these guarantees is to over-provision
+storage resources, keeping each resource underutilized, and thus
+have plenty of bandwidth available for the few I/O flows dispatched to
+each medium. Alternatively one can use an I/O controller.  Linux provides
+two mechanisms designed to throttle some I/O streams to allow others to
+meet their bandwidth and latency requirements.  These mechanisms work, but
+they come at a cost: a loss of as much as 80% of total available I/O
+bandwidth.  I have run some tests to demonstrate this problem; some
+upcoming improvements to the <a href="/Articles/601799/">bfq I/O
+scheduler</a> promise to improve the situation considerably.
+<p>
+
+<p>Throttling does guarantee control, even on drives that happen to be
+highly utilized but, as will be seen, it has a hard time
+actually ensuring that drives are highly utilized. Even with greedy I/O
+flows, throttling
+easily ends up utilizing as little as 20% of the available speed of a
+flash-based drive.
+
+Such a speed loss may be particularly problematic with lower-end
+storage. On the opposite end, it is also disappointing with
+high-end hardware, as the Linux block I/O stack itself has been
+<a href="/Articles/552904">redesigned from the ground up</a> to fully utilize the
+high speed of modern, fast storage.  In
+addition, throttling fails to guarantee the expected bandwidths if I/O
+contains both reads and writes, or is sporadic in nature.
+
+<p>On the bright side, there now seems to be an effective alternative for
+controlling I/O: the proportional-share policy provided by the bfq I/O
+scheduler. It enables nearly 100% storage bandwidth utilization,
+at least with some of the workloads that are problematic for
+throttling. An upcoming version of bfq may be able to
+achieve this result with almost all workloads. Finally, bfq
+guarantees bandwidths with all workloads. The current limitation of
+bfq is that its execution overhead becomes significant at speeds above
+400,000 I/O operations per second on commodity CPUs.
+
+<p>Using the bfq I/O scheduler, Linux can now guarantee
+low latency to lightweight flows containing sporadic, short I/O. No
+throughput issues arise, and no configuration is required. This
+capability benefits important, time-sensitive tasks, such as 
+video or audio streaming, as well as executing commands or starting
+applications. 
+
+Although benchmarks are not available yet, these guarantees might also be
+provided by the newly proposed <a href="/Articles/758963/">I/O latency
+controller</a>.  It allows administrators to set target latencies for I/O
+requests originating from each group of processes, and favors the
+groups with the lowest target latency.
+
+<h4>The testbed</h4>
+
+<p>I ran the tests with an ext4 filesystem mounted on a PLEXTOR
+PX-256M5S SSD, which features a peak rate of ~160MB/s with random I/O,
+and of ~500MB/s with sequential I/O. I used blk-mq, in Linux
+4.18.  The system was equipped with a 2.4GHz Intel Core i7-2760QM
+CPU and 1.3GHz DDR3 DRAM. In such a system, a single thread doing
+synchronous reads reaches a throughput of 23MB/s.
+
+<p>
+For the purposes of these tests, each process is considered to be in one of
+two groups, termed "target" and "interferers".
+A target is a single-process, I/O-bound group whose I/O is focused on. In
+particular, I measure the I/O throughput enjoyed by this group to get
+the minimum bandwidth delivered to the group.
+An interferer is single-process group whose role is to generate
+additional I/O that interferes with the I/O of the target.
+The tested workloads contain one target and multiple interferers.
+
+<p>The single process in each group either reads or writes, through
+asynchronous (buffered) operations, to one file — different from the file read
+or written by any other process — after invalidating the buffer cache
+for the file.  I define a reader or writer process as either "random" or
+"sequential", depending on whether it reads or writes its file at random
+positions or sequentially.
+Finally, an interferer is defined as being either "active" or "inactive"
+depending on whether it performs I/O during the test. When an
+interferer is mentioned, it is assumed that the interferer is active.
+
+<p>Workloads are defined so as to try to cover the combinations that, I
+believe, most influence the performance of the storage device and of
+the I/O policies. For brevity, in this article I show results for only
+two groups of workloads:
+<p>
+<ul class="spacylist">
+
+<li> <b>Static sequential</b>: four synchronous sequential readers or four
+     asynchronous sequential writers, plus five inactive interferers.
+     
+<li> <b>Static random</b>: four synchronous random readers, all with a block
+     size equal to 4k, plus five inactive interferers.
+</ul>
+
+<p>To create each workload, I considered, for each mix of
+interferers in the group, two possibilities for the target: it could be
+either a random or a sequential synchronous reader.
+
+In <a
+href="http://algogroup.unimore.it/people/paolo/pub-docs/extended-lat-bw-throughput.pdf">a
+longer version of this article [PDF]</a>, you will also find results
+for workloads with varying degrees of I/O randomness, and for
+dynamic workloads (containing sporadic I/O sources). These extra results
+confirm the losses of throughput and I/O control for throttling that
+are shown here.
+
+<h4>I/O policies</h4>
+
+<p>Linux provides two I/O-control mechanisms for guaranteeing (a minimum)
+bandwidth, or at least fairness, to long-lived flows: the throttling
+and proportional-share I/O policies.
+With throttling, one can set a maximum bandwidth limit — "max limit" for
+brevity — for the I/O of each group. Max limits can be used,
+in an indirect way, to provide the service guarantee at the focus of this
+article.  For example, to guarantee minimum bandwidths to I/O flows, a group can
+be guaranteed a minimum bandwidth by limiting the maximum bandwidth of
+all the other groups.
+
+<p>Unfortunately, max limits have two drawbacks in terms of
+throughput. First, if some groups do not use their allocated bandwidth,
+that bandwidth cannot be reclaimed by other active groups. Second,
+limits must comply with the worst-case speed of the device, namely,
+its random-I/O peak rate. Such limits will clearly leave a lot of
+throughput unused with workloads that otherwise would drive the
+device to higher throughput levels.
+
+Maximizing throughput is simply not a goal of max limits. So, for
+brevity, test results with max limits are not shown here. You can
+find these results, plus a more detailed description of the above
+drawbacks, in the long version of this article.
+
+<p>Because of these drawbacks, a new, still experimental, low limit
+has been added to the throttling policy. If a group is
+assigned a low limit, then the throttling policy automatically
+limits the I/O of the other groups in such a way to
+guarantee to the group a minimum bandwidth equal to its assigned low
+limit. This new throttling mechanism throttles no group as long as
+every group is getting at least its assigned minimum bandwidth. I tested
+this mechanism, but did not consider the interesting problem
+of guaranteeing minimum bandwidths while, at the same time, enforcing
+maximum bandwidths.
+
+<p>The other I/O policy available in Linux, proportional share,
+provides weighted fairness. Each group is assigned a weight, and should
+receive a portion of the total throughput proportional to its weight.
+This scheme guarantees minimum bandwidths in the same way that low limits do
+in throttling. In particular, it guarantees to each group a minimum
+bandwidth equal to the ratio between the weight of the group, and the
+sum of the weights of all the groups that may be active at the same
+time.
+
+<p>The actual implementation of the proportional-share policy, on a given
+drive, depends on what flavor of the block layer is in use for that
+drive. If the drive is using the legacy block interface, the policy is
+implemented by 
+the cfq I/O scheduler. Unfortunately, cfq fails to control
+bandwidths with flash-based storage, especially on drives featuring
+command queueing.  This case is not considered in these tests. With
+drives using the multiqueue interface,
+proportional share is implemented by bfq. This is the
+combination considered in the tests.
+
+<p>To benchmark both throttling (low limits) and proportional share, I
+tested, for each workload, the combinations of I/O policies and I/O
+schedulers reported in the table below.  In the end, there are three test
+cases for each workload. In addition, for some workloads, I considered two
+versions of bfq for the proportional-share policy.
+
+<blockquote>
+<table class="OddEven">
+<tr>
+<th align="left" width="14%" valign="top">Name </th>
+<th align="left" width="14%" valign="top">I/O policy </th>
+<th align="left" width="14%" valign="top">Scheduler </th>
+<th align="left" width="14%" valign="top">Parameter for target </th>
+<th align="left" width="14%" valign="top">Parameter for each
+of the four active interferers </th>
+<th align="left" width="14%" valign="top">Parameter for each of the five inactive
+interferers </th>
+<th align="left" width="14%" valign="top">Sum of parameters</th>
+</tr>
+<tr>
+<td align="left" width="14%" valign="top">low-none</td>
+<td align="left" width="14%" valign="top">Throttling with low limits</td>
+<td align="left" width="14%" valign="top">none</td>
+<td align="left" width="14%" valign="top">10MB/s</td>
+<td align="left" width="14%" valign="top">10MB/s
+(tot: 40)</td>
+<td align="left" width="14%" valign="top">20MB/s (tot: 100)</td>
+<td align="left" width="14%" valign="top">150MB/s</td>
+</tr>
+<tr>
+<td align="left" width="14%" valign="top">prop-bfq</td>
+<td align="left" width="14%" valign="top">Proportional share</td>
+<td align="left" width="14%" valign="top">bfq</td>
+<td align="left" width="14%" valign="top">300</td>
+<td align="left" width="14%" valign="top">100 (tot: 400)</td>
+<td align="left" width="14%" valign="top">200
+(tot: 1000)</td>
+<td align="left" width="14%" valign="top">1700</td>
+</tr>
+</table>
+</blockquote>
+
+
+
+
+<p>For low limits, I report results with only none as the I/O scheduler,
+because the results are the same with kyber and mq-deadline.
+
+<p>The capabilities of the storage medium and of low limits drove the policy
+configurations. In particular:
+
+<ul class="spacylist">
+
+<li> The configuration of the target and of the active interferers for
+low-none is the one for which low-none provides
+its best possible minimum-bandwidth guarantee to the target: 10MB/s,
+guaranteed if all interferers are readers.
+Results remain the same regardless of the values used for target
+latency and idle time; I set them to 100µs and
+1000µs, respectively, for every group.</li>
+
+<li> Low limits for inactive interferers are set to twice the limits for
+active interferers, to pose greater difficulties to the
+policy.</li>
+
+<li> I chose weights for prop-bfq so as to guarantee about the same
+minimum bandwidth as low-none to the target, in the same
+only-reader worst case as for low-none and to preserve, between
+the weights of active and inactive interferers, the same ratio as
+between the low limits of active and inactive interferers.</li>
+</ul>
+<p>Full details on configurations can be found in the long version of this
+article. 
+
+<p>Each workload was run ten times for each policy, plus ten times without
+any I/O control, i.e., with none as I/O scheduler and no I/O policy in
+use. For each run, I measured the I/O throughput of the target (which
+reveals the bandwidth provided to the target), the cumulative I/O
+throughput of the interferers, and the total I/O throughput. These
+quantities fluctuated very little during each run, as well as across
+different runs. Thus in the graphs I report only averages over per-run
+average throughputs. In particular, for the case of no I/O control, I
+report only the total I/O throughput, to give an idea of the throughput
+that can be reached without imposing any control.
+
+<h4>Results</h4>
+
+<p>
+This plot shows throughput results for the simplest group of
+workloads: the static-sequential set.
+
+<blockquote>
+<img src="https://static.lwn.net/images/2018/iocontrol/fig1.png" alt="[Figure 1]" class="photo">
+</blockquote>
+<p>
+
+With a random reader as
+the target against sequential readers as interferers, low-none does
+guarantee the configured low limit to the target. Yet it reaches only a
+low total throughput. The throughput of
+the random reader evidently oscillates around 10MB/s during the test.
+This implies that it is at least slightly below 10MB/s for a significant
+percentage of the time. But when this happens, the low-limit mechanism
+limits the maximum bandwidth of every active group to the low limit set
+for the group, i.e., to just 10MB/s.
+The end result is a total throughput lower than 10% of the throughput
+reached without I/O control.
+<p>
+That said, the high throughput achieved without I/O control is
+obtained by choking the random I/O of the target in favor of
+the sequential I/O of the interferers.  Thus, it
+is probably more interesting to compare low-none throughput with the
+throughput reachable while actually guaranteeing 10MB/s to the target.
+The target is a single, synchronous, random reader, which reaches 23MB/s while
+active. So, to guarantee 10MB/s to the target, it is enough to
+serve it for about half of the time, and the interferers for the other
+half. Since the device reaches ~500MB/s with the sequential I/O of the
+interferers, the resulting throughput with this service scheme would be
+(500+23)/2, or about 260MB/s. low-none thus reaches less than 20%
+of the 
+total throughput that could be reached while still preserving the target
+bandwidth.
+
+<p>prop-bfq provides the target with a slightly higher throughput than
+low-none. This makes it harder for prop-bfq to reach a high total
+throughput, because prop-bfq serves more random I/O (from the target)
+than low-none. Nevertheless, prop-bfq gets a much higher total
+throughput than low-none. According to the above estimate, this
+throughput is about 90% of the maximum throughput that could be reached,
+for this workload, without violating service guarantees. The reason for
+this good result is that bfq provides an effective implementation of
+the proportional-share service policy. At any time, each active group is
+granted a fraction of the current total throughput, and the sum of these
+fractions is equal to one; so group bandwidths naturally saturate the
+available total throughput at all times.
+
+<p>Things change with the second workload: a random reader against
+sequential writers. Now low-none reaches a much higher total
+throughput than prop-bfq.  low-none serves
+much more sequential (write) I/O than prop-bfq because writes somehow
+break the low-limit mechanisms and prevail over the reads of the target.
+Conceivably, this happens because writes tend to both starve reads in
+the OS (mainly by eating all available I/O tags) and to cheat on their
+completion time in the drive. In contrast, bfq is intentionally
+configured to privilege reads, to counter these issues.
+
+<p>In particular, low-none gets an even higher throughput than no
+I/O control at all because it penalizes the random I/O of the target even more
+than the no-controller configuration.
+
+<p>Finally, with the last two workloads, prop-bfq reaches even
+higher total throughput than with the first two. It happens
+because the target also does sequential I/O, and serving sequential
+I/O is much more beneficial for throughput than serving random I/O. With
+these two workloads, the total throughput is, respectively, close to or
+much higher than that reached without I/O control. For the last
+workload, the total throughput is much higher because, differently from
+none, bfq privileges reads over asynchronous writes, and reads yield
+a higher throughput than writes. In contrast, low-none still gets
+lower or much lower throughput than prop-bfq, because of the same
+issues that hinder low-none throughput with the first two workloads.
+
+<p>As for bandwidth guarantees, with readers as interferers (third
+workload), prop-bfq, as expected, gives the target a fraction of the
+total throughput proportional to its weight.  bfq approximates
+perfect proportional-share bandwidth distribution among groups doing I/O
+of the same type (reads or writes) and with the same locality
+(sequential or random). With the last workload, prop-bfq gives much
+more throughput to the reader than to all the interferers, because
+interferers are asynchronous writers, and bfq privileges reads.
+
+<p>The second group of workloads (static random), is the one, among all
+the workloads considered, for which prop-bfq performs worst.
+Results are shown below:
+<p>
+<blockquote>
+<img src="https://static.lwn.net/images/2018/iocontrol/fig2.png" alt="[Figure 2]" class="photo">
+</blockquote>
+<p>
+
+This chart
+reports results not only for mainline bfq, but also for an
+improved version of 
+bfq which is currently under public testing.
+As can be seen, with only random readers, prop-bfq reaches a
+much lower total throughput than low-none. This happens because of
+the Achilles heel of the bfq I/O scheduler. If the process in service
+does synchronous I/O and has a higher weight than some other process, then, to
+give strong bandwidth guarantees to that process, bfq plugs I/O
+dispatching every time the process temporarily stops issuing
+I/O requests. In this respect, processes actually have differentiated
+weights and do synchronous I/O in the workloads tested. So bfq
+systematically performs I/O plugging for them. Unfortunately, this
+plugging empties the internal queues of the drive, which kills
+throughput with random I/O. And the I/O of all processes in these
+workloads is also random.
+
+<p>The situation reverses with a sequential reader as target. Yet, the most
+interesting results come from the new version of bfq, containing
+small changes to counter exactly the above weakness. This
+version recovers most of the throughput loss with the workload made of
+only random I/O and more; with the second workload, where the target is
+a sequential reader, it reaches about 3.7 times the total throughput of
+low-none.
+<p>
+
+When the main concern is the latency of flows containing short I/O,
+Linux seems now rather high performing, thanks to the bfq I/O
+scheduler and the I/O latency controller. But if the
+requirement is to provide explicit bandwidth guarantees (or just fairness) to
+I/O flows, then one must be ready to give up much or most of the speed of
+the storage media. bfq helps with some workloads, but loses most of
+the throughput with workloads consisting of mostly random
+I/O. Fortunately, there is apparently hope for much better
+performance since an improvement, still under development, seems to
+enable bfq to reach a high throughput with all workloads tested so
+far.
+
+
+
+<p>
+[ I wish to thank Vivek Goyal for enabling me to make this article
+much more fair and sound.]<div class="MakeALink">
+               <table align="right"><tr><td>
+               <form action="/SubscriberLink/MakeLink" method="post">
+               <input type="hidden" name="articleid" value="763603">
+               <input type="submit" value="Send a free link"></form>
+               </td></tr></table>
+               </div>
+               <br clear="all">
+               
+<p><a href="/Articles/763603/#Comments">Comments (4 posted)</a>
+<p>
+<a name="763175"></a><h2 class="SummaryHL"><a href="/Articles/763175/">KDE's onboarding initiative, one year later</a></h2>
+
+<div class="GAByline">
+           <p>August 24, 2018</p>
+           <p>This article was contributed by Marta Rybczyńska</p>
+           <hr>
+<a href="/Archives/ConferenceByYear/#2018-Akademy">Akademy</a>
+</div>
+<p>In 2017, the KDE community decided on <a
+href="https://dot.kde.org/2017/11/30/kdes-goals-2018-and-beyond">three
+goals</a>
+to concentrate on for the next few years. One of them was <a
+href="https://phabricator.kde.org/T7116">streamlining the onboarding of new
+contributors</a> (the others were <a
+href="https://phabricator.kde.org/T6831">improving  
+usability</a> and <a href="https://phabricator.kde.org/T7050">privacy</a>).
+During <a href="https://akademy.kde.org/">Akademy</a>, the yearly KDE
+conference 
+that was held in Vienna in August, Neofytos Kolokotronis shared the status
+of the
+onboarding goal, the work done during the last year, and further plans.
+While it is a complicated process in a project as big and diverse as KDE,
+numerous improvements have been already made.</p>
+
+<p>Two of the three KDE community goals were proposed by relative
+newcomers. Kolokotronis was one of those, having joined the <a
+href="https://community.kde.org/Promo">KDE Promo team</a>
+not long before proposing
+the focus on onboarding. He had previously been involved with <a
+href="https://www.chakralinux.org/">Chakra  
+Linux</a>, a distribution based on KDE software. The fact that new
+members of the community proposed strategic goals was also noted in the <a
+href="https://conf.kde.org/en/Akademy2018/public/events/79">Sunday keynote
+by   Claudia Garad</a>.</p>
+
+<p>Proper onboarding adds excitement to the contribution process and
+increases retention, he explained. When we look at <a
+href="https://en.wikipedia.org/wiki/Onboarding">the definition of
+onboarding</a>,
+it is a process in which the new contributors acquire knowledge, skills, and
+behaviors so that they can contribute effectively. Kolokotronis proposed
+to see it also as socialization: integration into the project's relationships,
+culture, structure, and procedures.</p>
+
+<p>The gains from proper onboarding are many.  The project can grow by
+attracting new blood with new perspectives and solutions. The community
+maintains its health and stays vibrant. Another important advantage of
+efficient onboarding is that replacing current contributors becomes easier
+when they change interests, jobs, or leave the project for whatever reason.
+Finally, successful onboarding adds new advocates to the project.</p>
+
+<h4>Achievements so far and future plans</h4>
+
+<p>The team started with ideas for a centralized onboarding process for the
+whole of KDE. They found out quickly that this would not work because KDE
+is "very decentralized", so it is hard to provide tools and
+procedures that are going to work for the whole project. According to
+Kolokotronis, other characteristics of KDE that impact onboarding are high
+diversity, remote and online teams, and hundreds of contributors in dozens of
+projects and teams. In addition, new contributors already know in which
+area they want to take part and they prefer specific information that will
+be directly useful for them.</p>
+
+<p>So the team changed its approach; several changes have since been proposed
+and implemented. The <a href="https://community.kde.org/Get_Involved">Get
+Involved</a> page, which is expected to be one of the resources new
+contributors read first, has been rewritten. For the <a
+href="https://community.kde.org/KDE/Junior_Jobs">Junior Jobs page</a>, the
+team is
+
+<a href="/Articles/763189/"><img
+src="https://static.lwn.net/images/conf/2018/akademy/NeofytosKolokotronis-sm.jpg" alt="[Neofytos
+Kolokotronis]" title="Neofytos Kolokotronis" class="rthumb"></a>
+
+
+<a
+href="https://phabricator.kde.org/T8686">discussing</a> what the  
+generic content for KDE as a whole should be. The team simplified <a
+href="https://phabricator.kde.org/T7646">Phabricator registration</a>,
+which
+resulted in documenting the process better. Another part of the work
+includes the <a href="https://bugs.kde.org/">KDE Bugzilla</a>; it includes,
+for example initiatives to limit the number of
+states of a ticket or remove obsolete products.</p>
+
+<p>The <a href="https://www.plasma-mobile.org/index.html">Plasma Mobile</a>
+team is heavily involved in the onboarding goal.  The Plasma Mobile
+developers have simplified their 
+development environment setup and created an <a
+href="https://www.plasma-mobile.org/findyourway">interactive "Get
+Involved"</a> page. In addition, the Plasma team changed the way task
+descriptions are written; they now contain more detail, so that it is
+easier to get
+involved. The basic description should be short and clear, and it should include
+details of the problem and possible solutions. The developers try to
+share the list of skills necessary to fulfill the tasks and include clear
+links to the technical resources needed.</p>
+
+<p>Kolokotronis and team also identified a new potential source of 
+contributors for KDE: distributions using
+KDE. They have the advantage of already knowing and using the software.
+
+The next idea the team is working on is to make sure that setting up a
+development environment is easy.  The team plans to work on this during a
+dedicated sprint this autumn.</p>
+
+<h4>Searching for new contributors</h4>
+
+<p>Kolokotronis plans to search for new contributors at the periphery of the
+project, among the "skilled enthusiasts": loyal users who actually care
+about the project. They "can make wonders", he said. Those
+individuals may be also less confident or shy, have troubles making the
+first step, and need guidance. The project leaders should take that into
+account.</p>
+
+<p>In addition, newcomers are all different. Kolokotronis
+provided a long list of how contributors differ,
+including skills and knowledge, motives and
+interests, and time and dedication. His advice is to "try to find their
+superpower", the skills they have that are missing in the team. Those
+"superpowers" can then be used for the benefit of the project.</p>
+
+<p>If a project does nothing else, he said, it can start with its documentation.
+However, this does not only mean code documentation. Writing down the
+procedures or information about the internal work of the project, like who
+is working on what, is an important part of a project's documentation and helps
+newcomers. There should be also guidelines on how to start, especially
+setting up the development environment.</p>
+
+<p>The first thing the project leaders should do, according to
+Kolokotronis, is to spend time on introducing newcomers to the project.
+Ideally every new contributor should be assigned mentors &mdash; more
+experienced members who can help them when needed. The mentors and project
+leaders should find tasks that are interesting for each person. Answering
+an audience question on suggestions for shy new
+contributors, he recommended even more mentoring. It is also very helpful
+to make sure that newcomers have enough to read, but "avoid RTFM", he highlighted. It
+is also easy for a new contributor "to fly away", he said. The solution is
+to keep requesting things and be proactive.</p>
+
+<h4>What the project can do?</h4>
+
+<p>Kolokotronis suggested a number of actions for a project when it wants to
+improve its onboarding. The first step is preparation: the project
+leaders should know the team's and the project's needs. Long-term
+planning is important, too. It is not enough to wait for contributors to
+come &mdash; the project should be proactive, which means reaching out to
+candidates, suggesting appropriate tasks and, finally, making people
+available for the newcomers if they need help.</p>
+
+<p>This leads to next step: to be a mentor. Kolokotronis suggests being a
+"great host", but also trying to phase out the dependency on the mentor
+rapidly. "We have
+been all newcomers", he said. It can be intimidating to join an existing
+group. Onboarding creates a sense of belonging which, in turn, increases
+retention.</p>
+
+<p>The last step proposed was to be strategic. This includes thinking about
+the emotions you want newcomers to feel. Kolokotronis explained the
+strategic part with an example. The overall goal is (surprise!) improve
+onboarding 
+of new contributors. An intermediate objective might be to keep the
+newcomers after they have made their first commit. If your strategy is to keep them
+confident and proud, you can use different tactics like praise and
+acknowledgment of the work in public. Another useful tactic may be assigning
+simple tasks, according to the skill of the contributor.</p>
+
+<p>To summarize, the most important thing, according to Kolokotronis, is to
+respond quickly and spend time with new contributors. This time should be
+used to explain procedures, and to introduce the people and culture. It is also
+essential to guide first contributions and praise contributor's skill and
+effort.
+Increase the difficulty of tasks over time to keep contributors motivated and
+challenged. And finally, he said,
+"turn them into mentors".</p>
+
+<p>Kolokotronis acknowledges that onboarding "takes time" and "everyone
+complains" about it. However, he is convinced that it is beneficial in the
+long term
+and that it decreases developer turnover.</p>
+
+<h4>Advice to newcomers</h4>
+
+<p>Kolokotronis concluded with some suggestions for newcomers to a
+project. They should try 
+to be persistent and to not get discouraged when something goes wrong.
+Building connections from the very beginning is helpful. He suggests 
+asking questions as if you were already a member "and things will be fine".
+However, accept criticism if it happens.</p>
+
+<p>One of the next actions of the onboarding team will be to collect
+feedback from newcomers and experienced contributors to see if they agree
+on the ideas and processes introduced so far.</p>
+<p><a href="/Articles/763175/#Comments">Comments (none posted)</a>
+<p>
+<a name="763492"></a><h2 class="SummaryHL"><a href="/Articles/763492/">Sharing and archiving data sets with Dat</a></h2>
+
+<div class="GAByline">
+           <p>August 27, 2018</p>
+           <p>This article was contributed by Antoine Beaupré</p>
+           </div>
+<p><a href="https://datproject.org">Dat</a> is a new peer-to-peer protocol
+that uses some of the concepts of
+<a href="https://www.bittorrent.com/">BitTorrent</a> and Git. Dat primarily
+targets researchers and 
+open-data activists as it is a great tool for sharing, archiving, and
+cataloging large data sets. But it can also be used to implement
+decentralized web applications in a novel way.</p>
+
+<h4>Dat quick primer</h4>
+
+<p>Dat is written in JavaScript, so it can be installed with <code>npm</code>, but
+there are <a href="https://github.com/datproject/dat/releases">standalone
+binary builds</a> and 
+a <a href="https://docs.datproject.org/install">desktop application</a> (as an AppImage). An <a href="https://datbase.org/">online viewer</a> can
+be used to inspect data for those who do not want to install
+arbitrary binaries on their computers.</p>
+
+<p>The command-line application allows basic operations like downloading
+existing data sets and sharing your own.
+Dat uses a 32-byte hex string that is an <a
+href="https://ed25519.cr.yp.to/">ed25519 public key</a>, which is 
+is used to discover and find content on the net.
+For example, this will
+download some sample data:</p>
+
+<pre>
+    $ dat clone \
+      dat://778f8d955175c92e4ced5e4f5563f69bfec0c86cc6f670352c457943666fe639 \
+      ~/Downloads/dat-demo
+</pre>
+
+<p>Similarly, the <code>share</code> command is used to share content. It indexes
+the files in a given directory and creates a new unique address like
+the one above.  The <code>share</code>
+command starts a server that uses multiple discovery mechanisms (currently, the <a href="https://en.wikipedia.org/wiki/Mainline_DHT">Mainline Distributed
+Hash Table</a> (DHT), a <a href="https://github.com/mafintosh/dns-discovery">custom DNS server</a>, and
+multicast DNS) to announce the content to its peers. This is how
+another user, armed with that public key, can download that content
+with <code>dat&nbsp;clone</code> or mirror the files continuously with
+<code>dat&nbsp;sync</code>.</p> 
+
+<p>So far, this looks a lot like BitTorrent <a href="https://en.wikipedia.org/wiki/Magnet_URI_scheme">magnet links</a> updated
+with 21st century cryptography. But Dat adds revisions on top of that,
+so modifications are automatically shared through the swarm. That is
+important for public data sets as those
+are often dynamic in nature. Revisions also make it possible to use
+<a href="https://blog.datproject.org/2017/10/13/using-dat-for-automatic-file-backups/">Dat as a backup system</a> by saving the data incrementally using an
+<a href="https://github.com/mafintosh/hypercore-archiver">archiver</a>.</p>
+
+<p>While Dat is designed to work on larger data sets, processing them
+for sharing may take a while. For example, sharing the Linux
+kernel source code required about five minutes as Dat worked on
+indexing all of the files. This is comparable to the performance offered by
+<a href="https://ipfs.io/">IPFS</a> and BitTorrent. Data sets with
+more or larger files may take quite a bit more time.
+
+<p>
+One advantage that Dat has over IPFS is that it
+doesn't duplicate the data. When IPFS imports new data, it duplicates
+the files into <code>~/.ipfs</code>. For collections of small files like the
+kernel, this is not a huge problem, but for larger files like videos or
+music, it's a significant limitation. IPFS eventually implemented a
+solution to this <a href="https://github.com/ipfs/go-ipfs/issues/875">problem</a> in the form of the experimental
+<a href="https://github.com/ipfs/go-ipfs/blob/master/docs/experimental-features.md#ipfs-filestore">filestore feature</a>, but it's not enabled by default. Even with
+that feature enabled, though, changes to data sets are not automatically
+tracked. In comparison, Dat operation on dynamic data feels much
+lighter. The downside is that each set needs its own <code>dat share</code>
+process.</p>
+
+<p>Like any peer-to-peer system, Dat needs at least one peer to stay online to
+offer the content, which is impractical for mobile devices. Hosting
+providers like <a href="https://hashbase.io/">Hashbase</a> (which is a <a href="https://github.com/datprotocol/DEPs/blob/master/proposals/0003-http-pinning-service-api.md">pinning service</a> in Dat
+jargon) can help users keep content online without running their own
+<a href="https://docs.datproject.org/server">server</a>. The closest parallel in the traditional web ecosystem
+would probably be content distribution networks (CDN) although pinning
+services are not necessarily geographically distributed and a CDN does
+not necessarily retain a complete copy of a website.</p>
+
+<a href="/Articles/763544/">
+<img src="https://static.lwn.net/images/2018/dat-photoapp-sm.png" border=0 hspace=5 align="right"
+width=300 height=392 alt="[Photo app]" title="Photo app">
+</a>
+
+<p>A web browser called <a href="https://beakerbrowser.com/">Beaker</a>, based on the <a href="https://electronjs.org/">Electron</a> framework,
+can access Dat content natively without going through a pinning
+service. Furthermore, Beaker is essential to get any of the <a
+href="https://github.com/beakerbrowser/explore">Dat 
+applications</a> working, as they fundamentally rely on <code>dat://</code> URLs
+to do their magic. This means that Dat applications won't work for
+most users unless they install that special web browser. There is a
+<a href="https://addons.mozilla.org/en-US/firefox/addon/dat-p2p-protocol/">Firefox extension</a> called "<a href="https://github.com/sammacbeth/dat-fox">dat-fox</a>" for people who don't want
+to install yet another browser, but it requires installing a
+<a href="https://github.com/sammacbeth/dat-fox-helper">helper program</a>. The extension will be able to load <code>dat://</code> URLs
+but many applications will still not work. For example, the <a
+href="https://github.com/beakerbrowser/dat-photos-app">photo gallery 
+application</a> completely fails with dat-fox.</p>
+
+<p>Dat-based applications look promising from a privacy point of view.
+Because of its peer-to-peer nature, users regain control over where
+their data is stored: either on their own computer, an online server, or
+by a trusted third party. But considering the protocol is not well
+established in current web browsers, I foresee difficulties in
+adoption of that aspect of the Dat ecosystem. Beyond that, it is rather
+disappointing that Dat applications cannot run natively in a web
+browser given that JavaScript is designed exactly for that.</p>
+
+<h4>Dat privacy</h4>
+
+<p>An advantage Dat has over other peer-to-peer protocols like BitTorrent
+is end-to-end encryption. I was originally concerned by the encryption
+design when reading the <a
+href="https://github.com/datproject/docs/raw/master/papers/dat-paper.pdf">academic
+paper [PDF]</a>:</p> 
+
+<div class="BigQuote">
+  <p>It is up to client programs to make design decisions around which
+  discovery networks they trust. For example if a Dat client decides
+  to use the BitTorrent DHT to discover peers, and
+  they are searching 
+  for a publicly shared Dat key (e.g. a key cited publicly in a
+  published scientific paper) with known contents, then because of the
+  privacy design of the BitTorrent DHT it becomes public knowledge
+  what key that client is searching for.</p>
+</div>
+
+<p>So in other words, to share a secret file with another user, the
+public key is transmitted over a secure side-channel, only to then
+leak during the discovery process. Fortunately, the public Dat key
+is not directly used during discovery as it is <a
+href="https://github.com/datprotocol/DEPs/blob/653e0cf40233b5d474cddc04235577d9d55b2934/proposals/0000-peer-discovery.md#discovery-keys">hashed 
+with BLAKE2B</a>. Still, the security model of Dat assumes the public
+key is private, which is a rather counterintuitive concept that might upset
+cryptographers and confuse users who are frequently encouraged to type
+such strings in address bars and search engines as part of the Dat
+experience. There is a <a
+href="https://docs.datproject.org/security">security &amp; privacy FAQ</a>
+in the Dat 
+documentation warning about this problem:</p>
+
+<div class="BigQuote">
+  <p>One of the key elements of Dat privacy is that the public key is
+  never used in any discovery network. The public key is hashed,
+  creating the discovery key. Whenever peers attempt to connect to
+  each other, they use the discovery key.</p>
+  
+  <p>Data is encrypted using the public key, so it is important that this
+  key stays secure.</p>
+</div>
+
+<p>There are other privacy issues outlined in the
+document; it states that "<span>Dat faces similar privacy risks as
+BitTorrent</span>":</p>
+
+<div class="BigQuote">
+  <p>When you download a dataset, your IP address is exposed to the users
+  sharing that dataset. This may lead to honeypot servers collecting
+  IP addresses, as we've seen in Bittorrent. However, with dataset
+  sharing we can create a web of trust model where specific
+  institutions are trusted as primary sources for datasets,
+  diminishing the sharing of IP addresses.</p>
+</div>
+
+<p>A Dat blog post refers to this issue as <a href="https://blog.datproject.org/2016/12/12/reader-privacy-on-the-p2p-web/">reader privacy</a> and it
+is, indeed, a sensitive issue in peer-to-peer networks. It is how
+BitTorrent users are discovered and served scary verbiage from lawyers,
+after all. But Dat makes this a little better because, to join a swarm,
+you must know what you are looking for already, which means peers who
+can look at swarm activity only include users who know the secret
+public key. This works well for secret content, but for larger, public
+data sets, it is a real problem; it is why the Dat project has <a
+href="https://blog.datproject.org/2017/12/10/dont-ship/">avoided 
+creating a Wikipedia mirror</a> so far.</p>
+
+<p>I found another privacy issue that is not documented in the security FAQ
+during my review of the protocol. As mentioned earlier,
+the <a href="https://github.com/datprotocol/DEPs/pull/7">Dat discovery
+protocol</a> routinely 
+    phones home to DNS servers operated by the Dat project.
+This implies that the default discovery servers (and an
+attacker watching over their traffic) know who is publishing or seeking
+content, in essence discovering the "social network" behind Dat. This
+discovery mechanism can be disabled in clients, but a similar privacy
+issue applies to the DHT as well, although that is distributed so it
+doesn't require trust of the Dat project itself.</p>
+
+<p>Considering those aspects of the protocol, privacy-conscious users
+will probably want to use Tor or other anonymization techniques to
+work around those concerns.</p>
+
+<h4>The future of Dat</h4>
+
+<p><a href="https://blog.datproject.org/2017/06/01/dat-sleep-release/">Dat 2.0 was released in June 2017</a> with performance improvements and
+protocol changes. <a href="https://github.com/datprotocol/DEPs">Dat
+Enhancement Proposals</a> (DEPs) guide the project's
+future development;  most work is currently geared toward
+implementing the draft "<a href="https://github.com/datprotocol/DEPs/blob/master/proposals/0008-multiwriter.md">multi-writer proposal</a>" in
+<a href="https://github.com/mafintosh/hyperdb">HyperDB</a>. Without
+multi-writer support, only the 
+original publisher of a Dat can modify it. According to Joe Hand,
+co-executive-director of <a href="https://codeforscience.org/">Code for Science &amp; Society</a> (CSS) and
+Dat core developer, in an IRC chat, "supporting multiwriter is a big requirement for lots
+of folks". For example, while Dat might allow Alice to share her
+research results with Bob, he cannot modify or contribute back to those
+results. The multi-writer extension allows for Alice to assign trust
+to Bob so he can have write access to the data.
+
+<p>
+Unfortunately, the
+current proposal doesn't solve the "<span>hard problems</span>" of
+"<span>conflict merges 
+and secure key distribution</span>". The former will be worked out through
+user interface tweaks, but the latter is a classic problem that security
+projects have typically trouble finding 
+solutions for—Dat is no exception. How will Alice securely trust
+Bob? The OpenPGP web of trust? Hexadecimal fingerprints read over the
+phone? Dat doesn't provide a magic solution to this problem.</p>
+
+<p>Another thing limiting adoption is that Dat is not packaged in any
+distribution that I could find (although I <a href="https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=890565">requested it in
+Debian</a>) and, considering the speed of change of the JavaScript
+ecosystem, this is unlikely to change any time soon. A <a
+href="https://github.com/datrs">Rust 
+implementation</a> of the Dat protocol has started, however, which
+might be easier to package than the multitude of <a href="https://nodejs.org/en/">Node.js</a>
+modules. In terms of mobile device support, there is an experimental
+Android web browser with Dat support called <a href="https://bunsenbrowser.github.io/#!index.md">Bunsen</a>, which somehow
+doesn't run on my phone. Some adventurous users have successfully run Dat
+in <a href="https://termux.com/">Termux</a>. I haven't found an app
+running on iOS at this 
+point.</p>
+
+<p>Even beyond platform support, distributed protocols like Dat have a
+tough slope to climb against the virtual monopoly of more centralized
+protocols, so it remains to be seen how popular those tools will
+be. Hand says Dat is supported by multiple non-profit
+organizations. Beyond CSS, <a href="https://bluelinklabs.com/">Blue Link Labs</a> is working on the
+Beaker Browser as a self-funded startup and a grass-roots
+organization, <a href="https://www.digital-democracy.org/">Digital Democracy</a>, has contributed to the
+project. The <a href="https://archive.org">Internet Archive</a> has <a href="https://blog.archive.org/2018/06/05/internet-archive-code-for-science-and-society-and-california-digital-library-to-partner-on-a-data-sharing-and-preservation-pilot-project/">announced a collaboration</a>
+between itself, CSS, and the California Digital Library to launch a pilot
+project to see "<span>how members of a cooperative, decentralized
+network can leverage shared services to ensure data preservation while
+reducing storage costs and increasing replication counts</span>".
+
+<p>
+Hand said
+adoption in academia has been "slow but steady" and that the <a href="https://github.com/codeforscience/Dat-in-the-Lab">Dat in
+the Lab project</a> has helped identify areas that could help
+researchers adopt the project. Unfortunately, as is the case with many
+free-software projects, he said that "our team is definitely a bit
+limited on bandwidth to push for bigger adoption". Hand
+said that the project received a grant from <a
+href="https://www.mozilla.org/en-US/moss/">Mozilla Open Source 
+Support</a> to improve its documentation, which will be a big help.</p>
+
+<p>Ultimately, Dat suffers from a problem common to all peer-to-peer
+applications, which is naming. Dat addresses are not exactly
+intuitive: humans do not remember strings of 64 hexadecimal characters
+well. For this, Dat took a <a href="https://github.com/datprotocol/DEPs/blob/master/proposals/0005-dns.md">similar approach</a> to IPFS by using
+DNS <tt>TXT</tt> records and <code>/.well-known</code> URL paths to bridge existing,
+human-readable names with Dat hashes. So this sacrifices a part of the
+decentralized nature of the project in favor of usability.</p>
+
+<p>I have tested a lot of distributed protocols like Dat in the past and
+I am not sure Dat is a clear winner. It certainly has advantages over
+IPFS in terms of usability and resource usage, but the lack of
+packages on most platforms is a big limit to adoption for most
+people. This means it will be difficult to share content with my
+friends and family with Dat anytime soon, which would probably be my
+primary use case for the project. Until the protocol reaches the wider
+adoption that BitTorrent has seen in terms of platform support, I will
+probably wait before switching everything over to this
+promising project.</p>
+<p><a href="/Articles/763492/#Comments">Comments (11 posted)</a>
+<p>
+<p>
+<b>Page editor</b>: Jonathan Corbet<br>
+<h2>Inside this week's LWN.net Weekly Edition</h2>
+<ul>
+<li> <a href="/Articles/763254/">Briefs</a>: OpenSSH 7.8; 4.19-rc1; Which stable?; Netdev 0x12; Bison 3.1; Quotes; ...
+            <li> <a href="/Articles/763255/">Announcements</a>: Newsletters; events; security updates; kernel patches; ...
+            </ul>
+<b>Next page</b>:
+                  <a href="/Articles/763254/">Brief items&gt;&gt;</a><br>
+                  
+</div> <!-- ArticleText -->
+</div>
+<div class="lwn-u-1 pure-u-md-1-6 not-print">
+<div id="azk93271_right_zone"></div>
+</div>
+</div> <!-- pure-grid -->
+
+        <br clear="all">
+        <center>
+        <P>
+        <font size="-2">
+        Copyright &copy; 2018, Eklektix, Inc.<BR>
+        
+        Comments and public postings are copyrighted by their creators.<br>
+        Linux  is a registered trademark of Linus Torvalds<br>
+        </font>
+        </center>
+        
+            <script type="text/javascript">
+            var gaJsHost = (("https:" == document.location.protocol) ? "https://ssl." : "http://www.");
+            document.write(unescape("%3Cscript src='" + gaJsHost + "google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E"));
+            </script>
+            <script type="text/javascript">
+            try {
+            var pageTracker = _gat._getTracker("UA-2039382-1");
+            pageTracker._trackPageview();
+            } catch(err) {}</script>
+            
+        </body></html>
+        
+\ No newline at end of file
diff --git a/test/source/LWN/Articles/763987.html b/test/source/LWN/Articles/763987.html

new file mode 100644 (file)

index 0000000..153bfda
--- /dev/null
+++ b/test/source/LWN/Articles/763987.html
@@ -0,0 +1,136 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
+        "http://www.w3.org/TR/html4/loose.dtd">
+        <html>
+        <head><title>Kernel prepatch 4.19-rc2 [LWN.net]</title>
+        <meta name="twitter:card" content="summary" />
+<meta name="twitter:site" content="@lwnnet" />
+<meta name="twitter:title" content="Kernel prepatch 4.19-rc2" />
+<meta name="twitter:description" content="The 4.19-rc2 kernel prepatch is out for
+testing.
+&quot;As usual, the rc2 release is pretty small. People are taking a
+breather after the merge window, and it takes a bit of time for bug
+reports to start coming in and get identified.&quot;" />
+<meta name="viewport" content="width=device-width, initial-scale=1">
+<meta HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=utf-8">
+        <link rel="icon" href="/images/favicon.png" type="image/png">
+        <link rel="alternate" type="application/rss+xml" title="LWN.net headlines" href="https://lwn.net/headlines/newrss">
+<link rel="alternate" type="application/rss+xml" title="Comments posted to this article" href="https://lwn.net/headlines/763987/">
+        <link rel="stylesheet" href="/CSS/lwn">
+<link rel="stylesheet" href="/CSS/nosub">
+<link rel="stylesheet" href="/CSS/pure-min">
+           <!--[if lte IE 8]>
+             <link rel="stylesheet" href="/CSS/grids-responsive-old-ie-min">
+           <![endif]-->
+           <!--[if gt IE 8]><!-->
+             <link rel="stylesheet" href="/CSS/grids-responsive-min">
+           <!--<![endif]-->
+           <link rel="stylesheet" href="/CSS/pure-lwn">
+           
+        
+<script type="text/javascript">var p="http",d="static";if(document.location.protocol=="https:"){p+="s";d="engine";}var z=document.createElement("script");z.type="text/javascript";z.async=true;z.src=p+"://"+d+".adzerk.net/ados.js";var s=document.getElementsByTagName("script")[0];s.parentNode.insertBefore(z,s);</script>
+<script type="text/javascript">
+var ados_keywords = ados_keywords || [];
+if( location.protocol=='https:' ) {
+        ados_keywords.push('T:SSL');
+} else {
+        ados_keywords.push('T:HTTP');
+}
+
+var ados = ados || {};
+ados.run = ados.run || [];
+ados.run.push(function() {
+
+ados_add_placement(4669, 20979, "azk13321_leaderboard", 4).setZone(16026);
+
+ados_add_placement(4669, 20979, "azk93271_right_zone", [5,10,6]).setZone(16027);
+
+ados_add_placement(4669, 20979, "azk31017_tracking", 20).setZone(20995);
+
+
+
+ados_setKeywords(ados_keywords.join(', ')); 
+ados_load();
+});</script>
+
+        </head>
+        <body bgcolor="#ffffff" link="Blue" VLINK="Green" alink="Green">
+        <a name="t"></a>
+<div id="menu"><a href="/"><img src="https://static.lwn.net/images/logo/barepenguin-70.png" class="logo"
+                 border="0" alt="LWN.net Logo">
+           <font class="logo">LWN<br>.net</font>
+           <font class="logobl">News from the source</font></a>
+           <a href="/"><img src="https://static.lwn.net/images/lcorner-ss.png" class="sslogo"
+                 border="0" alt="LWN"></a><div class="navmenu-container">
+           <ul class="navmenu">
+        <li><a class="navmenu" href="#t"><b>Content</b></a><ul><li><a href="/current/">Weekly Edition</a></li><li><a href="/Archives/">Archives</a></li><li><a href="/Search/">Search</a></li><li><a href="/Kernel/">Kernel</a></li><li><a href="/Security/">Security</a></li><li><a href="/Distributions/">Distributions</a></li><li><a href="/Calendar/">Events calendar</a></li><li><a href="/Comments/unread">Unread comments</a></li><li><hr></li><li><a href="/op/FAQ.lwn">LWN FAQ</a></li><li><a href="/op/AuthorGuide.lwn">Write for us</a></li></ul></li>
+</ul></div>
+</div> <!-- menu -->
+<div class="pure-g not-handset" style="margin-left: 10.5em">
+           <div class="not-print">
+             <div id="azk13321_leaderboard"></div>
+           </div>
+           </div>
+        <div class="topnav-container">
+<div class="not-handset"><form action="https://lwn.net/Login/" method="post" name="loginform"
+                 class="loginform">
+        <b>User:</b> <input type="text" name="Username" value="" size="8" /> <b>Password:</b> <input type="password" name="Password" size="8" /> <input type="hidden" name="target" value="/Articles/763987/" /> <input type="submit" name="submit" value="Log in" /></form> |
+           <form action="https://lwn.net/subscribe/" method="post" class="loginform">
+           <input type="submit" name="submit" value="Subscribe" />
+           </form> |
+           <form action="https://lwn.net/Login/newaccount" method="post" class="loginform">
+           <input type="submit" name="submit" value="Register" />
+           </form>
+        </div>
+               <div class="handset-only">
+               <a href="/subscribe/"><b>Subscribe</b></a> /
+               <a href="/Login/"><b>Log in</b></a> /
+               <a href="/Login/newaccount"><b>New account</b></a>
+               </div>
+               </div><div class="pure-grid maincolumn">
+<div class="lwn-u-1 pure-u-md-19-24">
+<div class="PageHeadline">
+<h1>Kernel prepatch 4.19-rc2</h1>
+<div class="Byline">[Posted September 2, 2018 by corbet]
+               <p>
+               </div>
+</div>
+<div class="ArticleText">
+The <a href="/Articles/763988/">4.19-rc2</a> kernel prepatch is out for
+testing.
+"<span>As usual, the rc2 release is pretty small. People are taking a
+breather after the merge window, and it takes a bit of time for bug
+reports to start coming in and get identified.</span>"<hr width="60%" align="left">
+           (<a href="https://lwn.net/Login/?target=/Articles/763987/">Log in</a> to post comments)
+           <p>
+           
+</div> <!-- ArticleText -->
+<p><a name="Comments"></a>
+</div>
+<div class="lwn-u-1 pure-u-md-1-6 not-print">
+<div id="azk93271_right_zone"></div>
+</div>
+</div> <!-- pure-grid -->
+
+        <br clear="all">
+        <center>
+        <P>
+        <font size="-2">
+        Copyright &copy; 2018, Eklektix, Inc.<BR>
+        
+        Comments and public postings are copyrighted by their creators.<br>
+        Linux  is a registered trademark of Linus Torvalds<br>
+        </font>
+        </center>
+        
+            <script type="text/javascript">
+            var gaJsHost = (("https:" == document.location.protocol) ? "https://ssl." : "http://www.");
+            document.write(unescape("%3Cscript src='" + gaJsHost + "google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E"));
+            </script>
+            <script type="text/javascript">
+            try {
+            var pageTracker = _gat._getTracker("UA-2039382-1");
+            pageTracker._trackPageview();
+            } catch(err) {}</script>
+            
+        </body></html>
+        
+\ No newline at end of file
diff --git a/test/source/LWN/Articles/764046.html b/test/source/LWN/Articles/764046.html

new file mode 100644 (file)

index 0000000..0a6c792
--- /dev/null
+++ b/test/source/LWN/Articles/764046.html
@@ -0,0 +1,329 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
+        "http://www.w3.org/TR/html4/loose.dtd">
+        <html>
+        <head><title>Security updates for Monday [LWN.net]</title>
+        <meta name="twitter:card" content="summary" />
+<meta name="twitter:site" content="@lwnnet" />
+<meta name="twitter:title" content="Security updates for Monday" />
+<meta name="twitter:description" content="Security updates have been issued by Debian (dojo, libtirpc, mariadb-10.0, php5, ruby-json-jwt, spice, spice-gtk, tomcat8, and trafficserver), Fedora (ghc-hakyll, ghc-hs-bibutils, ghostscript, mariadb, pandoc-citeproc, phpMyAdmin, and xen), Mageia (java-1.8.0-openjdk, libarchive, libgd, libraw, libxcursor, mariadb, mercurial, openssh, openssl, poppler, quazip, squirrelmail, and virtualbox), openSUSE (cobbler, libressl, wireshark, and zutils), and SUSE (couchdb, java-1_7_0-ibm, java-1_7_1-ibm, OpenStack, and spice).
+" />
+<meta name="viewport" content="width=device-width, initial-scale=1">
+<meta HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=utf-8">
+        <link rel="icon" href="/images/favicon.png" type="image/png">
+        <link rel="alternate" type="application/rss+xml" title="LWN.net headlines" href="https://lwn.net/headlines/newrss">
+        <link rel="stylesheet" href="/CSS/lwn">
+<link rel="stylesheet" href="/CSS/nosub">
+<link rel="stylesheet" href="/CSS/pure-min">
+           <!--[if lte IE 8]>
+             <link rel="stylesheet" href="/CSS/grids-responsive-old-ie-min">
+           <![endif]-->
+           <!--[if gt IE 8]><!-->
+             <link rel="stylesheet" href="/CSS/grids-responsive-min">
+           <!--<![endif]-->
+           <link rel="stylesheet" href="/CSS/pure-lwn">
+           
+        
+<script type="text/javascript">var p="http",d="static";if(document.location.protocol=="https:"){p+="s";d="engine";}var z=document.createElement("script");z.type="text/javascript";z.async=true;z.src=p+"://"+d+".adzerk.net/ados.js";var s=document.getElementsByTagName("script")[0];s.parentNode.insertBefore(z,s);</script>
+<script type="text/javascript">
+var ados_keywords = ados_keywords || [];
+if( location.protocol=='https:' ) {
+        ados_keywords.push('T:SSL');
+} else {
+        ados_keywords.push('T:HTTP');
+}
+
+var ados = ados || {};
+ados.run = ados.run || [];
+ados.run.push(function() {
+
+ados_add_placement(4669, 20979, "azk13321_leaderboard", 4).setZone(16026);
+
+ados_add_placement(4669, 20979, "azk93271_right_zone", [5,10,6]).setZone(16027);
+
+ados_add_placement(4669, 20979, "azk31017_tracking", 20).setZone(20995);
+
+
+
+ados_setKeywords(ados_keywords.join(', ')); 
+ados_load();
+});</script>
+
+        </head>
+        <body bgcolor="#ffffff" link="Blue" VLINK="Green" alink="Green">
+        <a name="t"></a>
+<div id="menu"><a href="/"><img src="https://static.lwn.net/images/logo/barepenguin-70.png" class="logo"
+                 border="0" alt="LWN.net Logo">
+           <font class="logo">LWN<br>.net</font>
+           <font class="logobl">News from the source</font></a>
+           <a href="/"><img src="https://static.lwn.net/images/lcorner-ss.png" class="sslogo"
+                 border="0" alt="LWN"></a><div class="navmenu-container">
+           <ul class="navmenu">
+        <li><a class="navmenu" href="#t"><b>Content</b></a><ul><li><a href="/current/">Weekly Edition</a></li><li><a href="/Archives/">Archives</a></li><li><a href="/Search/">Search</a></li><li><a href="/Kernel/">Kernel</a></li><li><a href="/Security/">Security</a></li><li><a href="/Distributions/">Distributions</a></li><li><a href="/Calendar/">Events calendar</a></li><li><a href="/Comments/unread">Unread comments</a></li><li><hr></li><li><a href="/op/FAQ.lwn">LWN FAQ</a></li><li><a href="/op/AuthorGuide.lwn">Write for us</a></li></ul></li>
+</ul></div>
+</div> <!-- menu -->
+<div class="pure-g not-handset" style="margin-left: 10.5em">
+           <div class="not-print">
+             <div id="azk13321_leaderboard"></div>
+           </div>
+           </div>
+        <div class="topnav-container">
+<div class="not-handset"><form action="https://lwn.net/Login/" method="post" name="loginform"
+                 class="loginform">
+        <b>User:</b> <input type="text" name="Username" value="" size="8" /> <b>Password:</b> <input type="password" name="Password" size="8" /> <input type="hidden" name="target" value="/Articles/764046/" /> <input type="submit" name="submit" value="Log in" /></form> |
+           <form action="https://lwn.net/subscribe/" method="post" class="loginform">
+           <input type="submit" name="submit" value="Subscribe" />
+           </form> |
+           <form action="https://lwn.net/Login/newaccount" method="post" class="loginform">
+           <input type="submit" name="submit" value="Register" />
+           </form>
+        </div>
+               <div class="handset-only">
+               <a href="/subscribe/"><b>Subscribe</b></a> /
+               <a href="/Login/"><b>Log in</b></a> /
+               <a href="/Login/newaccount"><b>New account</b></a>
+               </div>
+               </div><div class="pure-grid maincolumn">
+<div class="lwn-u-1 pure-u-md-19-24">
+<div class="PageHeadline">
+<h1>Security updates for Monday</h1>
+</div>
+<div class="ArticleText">
+<table class="OddEven">
+               <tr><th align="left">Dist.</th>
+                   <th align="left">ID</th>
+                   <th align="left">Release</th>
+                   <th align="left">Package</th>
+                   <th align="left">Date</th></tr>
+<tr><td align="left">Debian</td>
+                       <td align="left"><a href="https://lwn.net/Articles/764007/">DLA-1492-1</a></td>
+                       <td align="left">LTS</td>
+                       <td align="left">dojo</td>
+                       <td>2018-09-03</td>
+                       </tr><tr><td align="left">Debian</td>
+                       <td align="left"><a href="https://lwn.net/Articles/764008/">DLA-1487-1</a></td>
+                       <td align="left">LTS</td>
+                       <td align="left">libtirpc</td>
+                       <td>2018-08-31</td>
+                       </tr><tr><td align="left">Debian</td>
+                       <td align="left"><a href="https://lwn.net/Articles/764009/">DLA-1488-1</a></td>
+                       <td align="left">LTS</td>
+                       <td align="left">mariadb-10.0</td>
+                       <td>2018-08-31</td>
+                       </tr><tr><td align="left">Debian</td>
+                       <td align="left"><a href="https://lwn.net/Articles/764010/">DLA-1490-1</a></td>
+                       <td align="left">LTS</td>
+                       <td align="left">php5</td>
+                       <td>2018-09-01</td>
+                       </tr><tr><td align="left">Debian</td>
+                       <td align="left"><a href="https://lwn.net/Articles/764011/">DSA-4283-1</a></td>
+                       <td align="left">stable</td>
+                       <td align="left">ruby-json-jwt</td>
+                       <td>2018-08-31</td>
+                       </tr><tr><td align="left">Debian</td>
+                       <td align="left"><a href="https://lwn.net/Articles/764013/">DLA-1488-1</a></td>
+                       <td align="left">LTS</td>
+                       <td align="left">spice</td>
+                       <td>2018-08-31</td>
+                       </tr><tr><td align="left">Debian</td>
+                       <td align="left"><a href="https://lwn.net/Articles/764012/">DLA-1486-1</a></td>
+                       <td align="left">LTS</td>
+                       <td align="left">spice</td>
+                       <td>2018-09-01</td>
+                       </tr><tr><td align="left">Debian</td>
+                       <td align="left"><a href="https://lwn.net/Articles/764014/">DLA-1489-1</a></td>
+                       <td align="left">LTS</td>
+                       <td align="left">spice-gtk</td>
+                       <td>2018-09-01</td>
+                       </tr><tr><td align="left">Debian</td>
+                       <td align="left"><a href="https://lwn.net/Articles/764015/">DLA-1491-1</a></td>
+                       <td align="left">LTS</td>
+                       <td align="left">tomcat8</td>
+                       <td>2018-09-02</td>
+                       </tr><tr><td align="left">Debian</td>
+                       <td align="left"><a href="https://lwn.net/Articles/764016/">DSA-4282-1</a></td>
+                       <td align="left">stable</td>
+                       <td align="left">trafficserver</td>
+                       <td>2018-08-31</td>
+                       </tr><tr><td align="left">Fedora</td>
+                       <td align="left"><a href="https://lwn.net/Articles/764017/">FEDORA-2018-33fef25ed1</a></td>
+                       <td align="left">F28</td>
+                       <td align="left">ghc-hakyll</td>
+                       <td>2018-08-31</td>
+                       </tr><tr><td align="left">Fedora</td>
+                       <td align="left"><a href="https://lwn.net/Articles/764018/">FEDORA-2018-33fef25ed1</a></td>
+                       <td align="left">F28</td>
+                       <td align="left">ghc-hs-bibutils</td>
+                       <td>2018-08-31</td>
+                       </tr><tr><td align="left">Fedora</td>
+                       <td align="left"><a href="https://lwn.net/Articles/764019/">FEDORA-2018-07083800ac</a></td>
+                       <td align="left">F28</td>
+                       <td align="left">ghostscript</td>
+                       <td>2018-09-02</td>
+                       </tr><tr><td align="left">Fedora</td>
+                       <td align="left"><a href="https://lwn.net/Articles/764020/">FEDORA-2018-77e610115a</a></td>
+                       <td align="left">F28</td>
+                       <td align="left">mariadb</td>
+                       <td>2018-08-31</td>
+                       </tr><tr><td align="left">Fedora</td>
+                       <td align="left"><a href="https://lwn.net/Articles/764021/">FEDORA-2018-33fef25ed1</a></td>
+                       <td align="left">F28</td>
+                       <td align="left">pandoc-citeproc</td>
+                       <td>2018-08-31</td>
+                       </tr><tr><td align="left">Fedora</td>
+                       <td align="left"><a href="https://lwn.net/Articles/764022/">FEDORA-2018-f2b24ce26e</a></td>
+                       <td align="left">F28</td>
+                       <td align="left">phpMyAdmin</td>
+                       <td>2018-08-31</td>
+                       </tr><tr><td align="left">Fedora</td>
+                       <td align="left"><a href="https://lwn.net/Articles/764023/">FEDORA-2018-915602df63</a></td>
+                       <td align="left">F27</td>
+                       <td align="left">xen</td>
+                       <td>2018-08-31</td>
+                       </tr><tr><td align="left">Mageia</td>
+                       <td align="left"><a href="https://lwn.net/Articles/764024/">MGASA-2018-0366</a></td>
+                       <td align="left">6</td>
+                       <td align="left">java-1.8.0-openjdk</td>
+                       <td>2018-09-02</td>
+                       </tr><tr><td align="left">Mageia</td>
+                       <td align="left"><a href="https://lwn.net/Articles/764025/">MGASA-2018-0361</a></td>
+                       <td align="left">6</td>
+                       <td align="left">libarchive</td>
+                       <td>2018-08-31</td>
+                       </tr><tr><td align="left">Mageia</td>
+                       <td align="left"><a href="https://lwn.net/Articles/764026/">MGASA-2018-0367</a></td>
+                       <td align="left">6</td>
+                       <td align="left">libgd</td>
+                       <td>2018-09-02</td>
+                       </tr><tr><td align="left">Mageia</td>
+                       <td align="left"><a href="https://lwn.net/Articles/764027/">MGASA-2018-0356</a></td>
+                       <td align="left">6</td>
+                       <td align="left">libraw</td>
+                       <td>2018-08-31</td>
+                       </tr><tr><td align="left">Mageia</td>
+                       <td align="left"><a href="https://lwn.net/Articles/764028/">MGASA-2018-0364</a></td>
+                       <td align="left">6</td>
+                       <td align="left">libxcursor</td>
+                       <td>2018-08-31</td>
+                       </tr><tr><td align="left">Mageia</td>
+                       <td align="left"><a href="https://lwn.net/Articles/764029/">MGASA-2018-0359</a></td>
+                       <td align="left">5</td>
+                       <td align="left">mariadb</td>
+                       <td>2018-08-31</td>
+                       </tr><tr><td align="left">Mageia</td>
+                       <td align="left"><a href="https://lwn.net/Articles/764030/">MGASA-2018-0355</a></td>
+                       <td align="left">5, 6</td>
+                       <td align="left">mercurial</td>
+                       <td>2018-08-31</td>
+                       </tr><tr><td align="left">Mageia</td>
+                       <td align="left"><a href="https://lwn.net/Articles/764031/">MGASA-2018-0363</a></td>
+                       <td align="left">6</td>
+                       <td align="left">openssh</td>
+                       <td>2018-08-31</td>
+                       </tr><tr><td align="left">Mageia</td>
+                       <td align="left"><a href="https://lwn.net/Articles/764032/">MGASA-2018-0365</a></td>
+                       <td align="left">6</td>
+                       <td align="left">openssl</td>
+                       <td>2018-09-02</td>
+                       </tr><tr><td align="left">Mageia</td>
+                       <td align="left"><a href="https://lwn.net/Articles/764033/">MGASA-2018-0358</a></td>
+                       <td align="left">6</td>
+                       <td align="left">poppler</td>
+                       <td>2018-08-31</td>
+                       </tr><tr><td align="left">Mageia</td>
+                       <td align="left"><a href="https://lwn.net/Articles/764034/">MGASA-2018-0362</a></td>
+                       <td align="left">6</td>
+                       <td align="left">quazip</td>
+                       <td>2018-08-31</td>
+                       </tr><tr><td align="left">Mageia</td>
+                       <td align="left"><a href="https://lwn.net/Articles/764035/">MGASA-2018-0357</a></td>
+                       <td align="left">6</td>
+                       <td align="left">squirrelmail</td>
+                       <td>2018-08-31</td>
+                       </tr><tr><td align="left">Mageia</td>
+                       <td align="left"><a href="https://lwn.net/Articles/764036/">MGASA-2018-0360</a></td>
+                       <td align="left">6</td>
+                       <td align="left">virtualbox</td>
+                       <td>2018-08-31</td>
+                       </tr><tr><td align="left">openSUSE</td>
+                       <td align="left"><a href="https://lwn.net/Articles/764037/">openSUSE-SU-2018:2590-1</a></td>
+                       <td align="left">42.3</td>
+                       <td align="left">cobbler</td>
+                       <td>2018-09-03</td>
+                       </tr><tr><td align="left">openSUSE</td>
+                       <td align="left"><a href="https://lwn.net/Articles/764038/">openSUSE-SU-2018:2592-1</a></td>
+                       <td align="left">15.0</td>
+                       <td align="left">libressl</td>
+                       <td>2018-09-03</td>
+                       </tr><tr><td align="left">openSUSE</td>
+                       <td align="left"><a href="https://lwn.net/Articles/764039/">openSUSE-SU-2018:2587-1</a></td>
+                       <td align="left">42.3</td>
+                       <td align="left">wireshark</td>
+                       <td>2018-09-02</td>
+                       </tr><tr><td align="left">openSUSE</td>
+                       <td align="left"><a href="https://lwn.net/Articles/764040/">openSUSE-SU-2018:2591-1</a></td>
+                       <td align="left">15.0 42.3</td>
+                       <td align="left">zutils</td>
+                       <td>2018-09-03</td>
+                       </tr><tr><td align="left">SUSE</td>
+                       <td align="left"><a href="https://lwn.net/Articles/764044/">SUSE-SU-2018:2576-1</a></td>
+                       <td align="left">OS7 </td>
+                       <td align="left">OpenStack</td>
+                       <td>2018-08-31</td>
+                       </tr><tr><td align="left">SUSE</td>
+                       <td align="left"><a href="https://lwn.net/Articles/764041/">SUSE-SU-2018:2578-1</a></td>
+                       <td align="left">OS7 </td>
+                       <td align="left">couchdb</td>
+                       <td>2018-08-31</td>
+                       </tr><tr><td align="left">SUSE</td>
+                       <td align="left"><a href="https://lwn.net/Articles/764042/">SUSE-SU-2018:2574-1</a></td>
+                       <td align="left">SLE11</td>
+                       <td align="left">java-1_7_0-ibm</td>
+                       <td>2018-08-31</td>
+                       </tr><tr><td align="left">SUSE</td>
+                       <td align="left"><a href="https://lwn.net/Articles/764043/">SUSE-SU-2018:2583-1</a></td>
+                       <td align="left">SLE11</td>
+                       <td align="left">java-1_7_1-ibm</td>
+                       <td>2018-08-31</td>
+                       </tr><tr><td align="left">SUSE</td>
+                       <td align="left"><a href="https://lwn.net/Articles/764045/">SUSE-SU-2018:2584-1</a></td>
+                       <td align="left">SLE12</td>
+                       <td align="left">spice</td>
+                       <td>2018-08-31</td>
+                       </tr></table>
+<hr width="60%" align="left">
+           (<a href="https://lwn.net/Login/?target=/Articles/764046/">Log in</a> to post comments)
+           <p>
+           
+</div> <!-- ArticleText -->
+<p><a name="Comments"></a>
+
+</div> <!-- ArticleText -->
+</div>
+<div class="lwn-u-1 pure-u-md-1-6 not-print">
+<div id="azk93271_right_zone"></div>
+</div>
+</div> <!-- pure-grid -->
+
+        <br clear="all">
+        <center>
+        <P>
+        <font size="-2">
+        Copyright &copy; 2018, Eklektix, Inc.<BR>
+        
+        Comments and public postings are copyrighted by their creators.<br>
+        Linux  is a registered trademark of Linus Torvalds<br>
+        </font>
+        </center>
+        
+            <script type="text/javascript">
+            var gaJsHost = (("https:" == document.location.protocol) ? "https://ssl." : "http://www.");
+            document.write(unescape("%3Cscript src='" + gaJsHost + "google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E"));
+            </script>
+            <script type="text/javascript">
+            try {
+            var pageTracker = _gat._getTracker("UA-2039382-1");
+            pageTracker._trackPageview();
+            } catch(err) {}</script>
+            
+        </body></html>
+        
+\ No newline at end of file
diff --git a/test/source/LWN/Articles/764055.html b/test/source/LWN/Articles/764055.html

new file mode 100644 (file)

index 0000000..f793db2
--- /dev/null
+++ b/test/source/LWN/Articles/764055.html
@@ -0,0 +1,242 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
+        "http://www.w3.org/TR/html4/loose.dtd">
+        <html>
+        <head><title>Topics sought for the Kernel and Maintainer Summits [LWN.net]</title>
+        <meta name="twitter:card" content="summary" />
+<meta name="twitter:site" content="@lwnnet" />
+<meta name="twitter:title" content="Topics sought for the Kernel and Maintainer Summits" />
+<meta name="twitter:description" content="The annual Maintainer and Kernel Summits will be held in Vancouver, BC on
+November 12 to 15, in conjunction with the Linux Plumbers Conference.
+The program committee is looking for topics for both summits; read on for
+details on how to submit ideas and, perhaps, get an invitation to the
+Maintainer Summit.
+" />
+<meta name="viewport" content="width=device-width, initial-scale=1">
+<meta HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=utf-8">
+        <link rel="icon" href="/images/favicon.png" type="image/png">
+        <link rel="alternate" type="application/rss+xml" title="LWN.net headlines" href="https://lwn.net/headlines/newrss">
+<link rel="alternate" type="application/rss+xml" title="Comments posted to this article" href="https://lwn.net/headlines/764055/">
+        <link rel="stylesheet" href="/CSS/lwn">
+<link rel="stylesheet" href="/CSS/nosub">
+<link rel="stylesheet" href="/CSS/pure-min">
+           <!--[if lte IE 8]>
+             <link rel="stylesheet" href="/CSS/grids-responsive-old-ie-min">
+           <![endif]-->
+           <!--[if gt IE 8]><!-->
+             <link rel="stylesheet" href="/CSS/grids-responsive-min">
+           <!--<![endif]-->
+           <link rel="stylesheet" href="/CSS/pure-lwn">
+           
+        
+<script type="text/javascript">var p="http",d="static";if(document.location.protocol=="https:"){p+="s";d="engine";}var z=document.createElement("script");z.type="text/javascript";z.async=true;z.src=p+"://"+d+".adzerk.net/ados.js";var s=document.getElementsByTagName("script")[0];s.parentNode.insertBefore(z,s);</script>
+<script type="text/javascript">
+var ados_keywords = ados_keywords || [];
+if( location.protocol=='https:' ) {
+        ados_keywords.push('T:SSL');
+} else {
+        ados_keywords.push('T:HTTP');
+}
+
+var ados = ados || {};
+ados.run = ados.run || [];
+ados.run.push(function() {
+
+ados_add_placement(4669, 20979, "azk13321_leaderboard", 4).setZone(16026);
+
+ados_add_placement(4669, 20979, "azk93271_right_zone", [5,10,6]).setZone(16027);
+
+ados_add_placement(4669, 20979, "azk31017_tracking", 20).setZone(20995);
+
+
+
+ados_setKeywords(ados_keywords.join(', ')); 
+ados_load();
+});</script>
+
+        </head>
+        <body bgcolor="#ffffff" link="Blue" VLINK="Green" alink="Green">
+        <a name="t"></a>
+<div id="menu"><a href="/"><img src="https://static.lwn.net/images/logo/barepenguin-70.png" class="logo"
+                 border="0" alt="LWN.net Logo">
+           <font class="logo">LWN<br>.net</font>
+           <font class="logobl">News from the source</font></a>
+           <a href="/"><img src="https://static.lwn.net/images/lcorner-ss.png" class="sslogo"
+                 border="0" alt="LWN"></a><div class="navmenu-container">
+           <ul class="navmenu">
+        <li><a class="navmenu" href="#t"><b>Content</b></a><ul><li><a href="/current/">Weekly Edition</a></li><li><a href="/Archives/">Archives</a></li><li><a href="/Search/">Search</a></li><li><a href="/Kernel/">Kernel</a></li><li><a href="/Security/">Security</a></li><li><a href="/Distributions/">Distributions</a></li><li><a href="/Calendar/">Events calendar</a></li><li><a href="/Comments/unread">Unread comments</a></li><li><hr></li><li><a href="/op/FAQ.lwn">LWN FAQ</a></li><li><a href="/op/AuthorGuide.lwn">Write for us</a></li></ul></li>
+<li><a class="navmenu" href="#t"><b>Edition</b></a><ul><li><a href="/Articles/763791/">Return to the Briefs page</a></li></ul></li>
+</ul></div>
+</div> <!-- menu -->
+<div class="pure-g not-handset" style="margin-left: 10.5em">
+           <div class="not-print">
+             <div id="azk13321_leaderboard"></div>
+           </div>
+           </div>
+        <div class="topnav-container">
+<div class="not-handset"><form action="https://lwn.net/Login/" method="post" name="loginform"
+                 class="loginform">
+        <b>User:</b> <input type="text" name="Username" value="" size="8" /> <b>Password:</b> <input type="password" name="Password" size="8" /> <input type="hidden" name="target" value="/Articles/764055/" /> <input type="submit" name="submit" value="Log in" /></form> |
+           <form action="https://lwn.net/subscribe/" method="post" class="loginform">
+           <input type="submit" name="submit" value="Subscribe" />
+           </form> |
+           <form action="https://lwn.net/Login/newaccount" method="post" class="loginform">
+           <input type="submit" name="submit" value="Register" />
+           </form>
+        </div>
+               <div class="handset-only">
+               <a href="/subscribe/"><b>Subscribe</b></a> /
+               <a href="/Login/"><b>Log in</b></a> /
+               <a href="/Login/newaccount"><b>New account</b></a>
+               </div>
+               </div><div class="pure-grid maincolumn">
+<div class="lwn-u-1 pure-u-md-19-24">
+<div class="PageHeadline">
+<h1>Topics sought for the Kernel and Maintainer Summits</h1>
+</div>
+<div class="ArticleText">
+The annual Maintainer and Kernel Summits will be held in Vancouver, BC on
+November&nbsp;12 to&nbsp;15, in conjunction with the Linux Plumbers Conference.
+The program committee is looking for topics for both summits; read on for
+details on how to submit ideas and, perhaps, get an invitation to the
+Maintainer Summit.
+<p><hr><p>
+<table>
+<tr><td valign="top"><b>From</b>:</td>
+             <td>&nbsp;</td><td valign="top">"Theodore Y. Ts'o" &lt;tytso-AT-mit.edu&gt;</td></tr>
+<tr><td valign="top"><b>To</b>:</td>
+             <td>&nbsp;</td><td valign="top">linux-kernel-AT-vger.kernel.org, linux-fsdevel-AT-vger.kernel.org,        linux-mm-AT-kvack.org, netdev-AT-vger.kernel.org,        linux-block-AT-vger.kernel.org</td></tr>
+<tr><td valign="top"><b>Subject</b>:</td>
+             <td>&nbsp;</td><td valign="top">Maintainer / Kernel Summit 2018 planning kick-off</td></tr>
+<tr><td valign="top"><b>Date</b>:</td>
+             <td>&nbsp;</td><td valign="top">Thu, 30 Aug 2018 17:35:17 -0400</td></tr>
+<tr><td valign="top"><b>Message-ID</b>:</td>
+             <td>&nbsp;</td><td valign="top">&lt;20180830213517.GA19110@thunk.org&gt;</td></tr>
+<tr><td valign="top"><b>Archive-link</b>:</td>
+             <td>&nbsp;</td><td valign="top"><a href="https://lwn.net/ml/linux-kernel/20180830213517.GA19110@thunk.org">Article</a></td></tr>
+</table><p>
+<pre>
+[ Feel free to forward this to other Linux kernel mailing lists as
+  appropriate -- Ted ]
+
+This year, the Maintainer and Kernel Summit will be in Vancouver,
+B.C., November 12th -- 15th.  The Maintainer's summit will be held on
+Monday, November 12th, in Vancouver, immediately before the Linux
+Plumber's Conference (LPC) November 13th -- 15th.
+
+For the past few years, before 2017, we've scheduled mostly management
+and development process issues on the first day.  We then opened up
+the second day of the Kernel Summit to all attendees of the conference
+with which the Kernel Summit has been colocated, and called it the
+"Open Technical Day".  This is something that just made sense in order
+to assure that all of the necessary people needed to discuss a
+particular technical issue could be in the room.
+
+Starting last year in Prague, we took the next logical step, and split
+the Kernel Summit in two.  The "Maintainer's Summit" is an
+invite-only, half-day event, where the primary focus will be process
+issues of Linux Kernel Development.  It will be limited to 30 invitees
+and a handful of sponsored attendees.  This makes it smaller than the
+first few kernel summits (which were limited to around 50 attendees).
+
+The "Kernel Summit" is now organized as a track which is run in
+parallel with the other tracks at the Linux Plumber's Conference, and
+is open to all registered attendees of Plumbers.  Much as how we
+organized the Kernel Summit "open technical day" in 2016 in Santa Fe,
+the Kernel Summit schedule will be synchronized with the other tracks
+at the Plumber's Conference, and it will be open to all registered
+Plumber's attendees.
+
+Linus has suggested the following ten people as the core of the people
+he would like invited to the Maintainer's Summit, which was calculated
+from statistics from his git tree.
+
+       David Miller
+       Dave Airlie
+       Greg KH
+       Arnd Bergmann
+       Ingo Molnar
+       Mauro Carvalho Chehab
+       Takashi Iwai
+       Thomas Gleixner
+       Andrew Morton
+       Olof Johansson
+
+As we did last year, there will be a mini-program committee that will
+be pick enough names to bring the total number of 30 for the
+Maintainer's Summit.  That program committee will consist of Arnd
+Bergmann, Thomas Gleixner, Greg KH, Paul McKenney, and Ted Ts'o.
+
+We will use the rest of names on the list generated by Linus's script
+as a starting point of people to be considered.  People who suggest
+topics that should be discussed on the Maintainer's summit will also
+be added to the list.  To make topic suggestions for the Maintainer's
+Summit, please send e-mail to the ksummit-discuss list with a subject
+prefix of [MAINTAINERS SUMMIT].
+
+
+The other job of the program committee will be to organize the program
+for the Kernel Summit.  The goal of the Kernel Summit track will be to
+provide a forum to discuss specific technical issues that would be
+easier to resolve in person than over e-mail.  The program committee
+will also consider "information sharing" topics if they are clearly of
+interest to the wider development community (i.e., advanced training
+in topics that would be useful to kernel developers).
+
+To suggest a topic for the Kernel Summit, please tag your e-mail with
+[TECH TOPIC].  As before, please use a separate e-mail for each topic,
+and send the topic suggestions to:
+
+       ksummit-discuss@lists.linuxfoundation.org
+
+People who submit topic suggestions before September 21st and which
+are accepted, will be given a free admission to the Linux Plumbers
+Conference.
+
+We will reserving roughly half the Kernel Summit slots for last-minute
+discussions that will be scheduled during the week of Plumber's, in an
+"unconference style".  This was extremely popular in Santa Fe and in
+Prague, since it allowed ideas that came up in hallway discussions,
+and in Plumber's Miniconference, to be given scheduled, dedicated
+times for that discussion.
+
+
+If you were not subscribed on to the kernel-discuss mailing list from
+last year (or if you had removed yourself after the kernel summit),
+you can subscribe to the discuss list using mailman:
+
+   <a href="https://lists.linuxfoundation.org/mailman/listinfo/ksummit-discuss">https://lists.linuxfoundation.org/mailman/listinfo/ksummi...</a></pre>
+<hr width="60%" align="left">
+           (<a href="https://lwn.net/Login/?target=/Articles/764055/">Log in</a> to post comments)
+           <p>
+           
+</div> <!-- ArticleText -->
+<p><a name="Comments"></a>
+</div>
+<div class="lwn-u-1 pure-u-md-1-6 not-print">
+<div id="azk93271_right_zone"></div>
+</div>
+</div> <!-- pure-grid -->
+
+        <br clear="all">
+        <center>
+        <P>
+        <font size="-2">
+        Copyright &copy; 2018, Eklektix, Inc.<BR>
+        
+        Comments and public postings are copyrighted by their creators.<br>
+        Linux  is a registered trademark of Linus Torvalds<br>
+        </font>
+        </center>
+        
+            <script type="text/javascript">
+            var gaJsHost = (("https:" == document.location.protocol) ? "https://ssl." : "http://www.");
+            document.write(unescape("%3Cscript src='" + gaJsHost + "google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E"));
+            </script>
+            <script type="text/javascript">
+            try {
+            var pageTracker = _gat._getTracker("UA-2039382-1");
+            pageTracker._trackPageview();
+            } catch(err) {}</script>
+            
+        </body></html>
+        
+\ No newline at end of file
diff --git a/test/source/LWN/Articles/764130.html b/test/source/LWN/Articles/764130.html

new file mode 100644 (file)

index 0000000..2fbae67
--- /dev/null
+++ b/test/source/LWN/Articles/764130.html
@@ -0,0 +1,234 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
+        "http://www.w3.org/TR/html4/loose.dtd">
+        <html>
+        <head><title>Security updates for Tuesday [LWN.net]</title>
+        <meta name="twitter:card" content="summary" />
+<meta name="twitter:site" content="@lwnnet" />
+<meta name="twitter:title" content="Security updates for Tuesday" />
+<meta name="twitter:description" content="Security updates have been issued by openSUSE (ImageMagick, libressl, postgresql10, spice, and spice-gtk), Red Hat (collectd, kernel, Red Hat Gluster Storage, Red Hat Virtualization, RHGS WA, rhvm-appliance, and samba), and SUSE (crowbar, crowbar-core, crowbar-ha, crowbar-openstack, crowbar-ui, kernel, spice, and spice-gtk).
+" />
+<meta name="viewport" content="width=device-width, initial-scale=1">
+<meta HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=utf-8">
+        <link rel="icon" href="/images/favicon.png" type="image/png">
+        <link rel="alternate" type="application/rss+xml" title="LWN.net headlines" href="https://lwn.net/headlines/newrss">
+        <link rel="stylesheet" href="/CSS/lwn">
+<link rel="stylesheet" href="/CSS/nosub">
+<link rel="stylesheet" href="/CSS/pure-min">
+           <!--[if lte IE 8]>
+             <link rel="stylesheet" href="/CSS/grids-responsive-old-ie-min">
+           <![endif]-->
+           <!--[if gt IE 8]><!-->
+             <link rel="stylesheet" href="/CSS/grids-responsive-min">
+           <!--<![endif]-->
+           <link rel="stylesheet" href="/CSS/pure-lwn">
+           
+        
+<script type="text/javascript">var p="http",d="static";if(document.location.protocol=="https:"){p+="s";d="engine";}var z=document.createElement("script");z.type="text/javascript";z.async=true;z.src=p+"://"+d+".adzerk.net/ados.js";var s=document.getElementsByTagName("script")[0];s.parentNode.insertBefore(z,s);</script>
+<script type="text/javascript">
+var ados_keywords = ados_keywords || [];
+if( location.protocol=='https:' ) {
+        ados_keywords.push('T:SSL');
+} else {
+        ados_keywords.push('T:HTTP');
+}
+
+var ados = ados || {};
+ados.run = ados.run || [];
+ados.run.push(function() {
+
+ados_add_placement(4669, 20979, "azk13321_leaderboard", 4).setZone(16026);
+
+ados_add_placement(4669, 20979, "azk93271_right_zone", [5,10,6]).setZone(16027);
+
+ados_add_placement(4669, 20979, "azk31017_tracking", 20).setZone(20995);
+
+
+
+ados_setKeywords(ados_keywords.join(', ')); 
+ados_load();
+});</script>
+
+        </head>
+        <body bgcolor="#ffffff" link="Blue" VLINK="Green" alink="Green">
+        <a name="t"></a>
+<div id="menu"><a href="/"><img src="https://static.lwn.net/images/logo/barepenguin-70.png" class="logo"
+                 border="0" alt="LWN.net Logo">
+           <font class="logo">LWN<br>.net</font>
+           <font class="logobl">News from the source</font></a>
+           <a href="/"><img src="https://static.lwn.net/images/lcorner-ss.png" class="sslogo"
+                 border="0" alt="LWN"></a><div class="navmenu-container">
+           <ul class="navmenu">
+        <li><a class="navmenu" href="#t"><b>Content</b></a><ul><li><a href="/current/">Weekly Edition</a></li><li><a href="/Archives/">Archives</a></li><li><a href="/Search/">Search</a></li><li><a href="/Kernel/">Kernel</a></li><li><a href="/Security/">Security</a></li><li><a href="/Distributions/">Distributions</a></li><li><a href="/Calendar/">Events calendar</a></li><li><a href="/Comments/unread">Unread comments</a></li><li><hr></li><li><a href="/op/FAQ.lwn">LWN FAQ</a></li><li><a href="/op/AuthorGuide.lwn">Write for us</a></li></ul></li>
+</ul></div>
+</div> <!-- menu -->
+<div class="pure-g not-handset" style="margin-left: 10.5em">
+           <div class="not-print">
+             <div id="azk13321_leaderboard"></div>
+           </div>
+           </div>
+        <div class="topnav-container">
+<div class="not-handset"><form action="https://lwn.net/Login/" method="post" name="loginform"
+                 class="loginform">
+        <b>User:</b> <input type="text" name="Username" value="" size="8" /> <b>Password:</b> <input type="password" name="Password" size="8" /> <input type="hidden" name="target" value="/Articles/764130/" /> <input type="submit" name="submit" value="Log in" /></form> |
+           <form action="https://lwn.net/subscribe/" method="post" class="loginform">
+           <input type="submit" name="submit" value="Subscribe" />
+           </form> |
+           <form action="https://lwn.net/Login/newaccount" method="post" class="loginform">
+           <input type="submit" name="submit" value="Register" />
+           </form>
+        </div>
+               <div class="handset-only">
+               <a href="/subscribe/"><b>Subscribe</b></a> /
+               <a href="/Login/"><b>Log in</b></a> /
+               <a href="/Login/newaccount"><b>New account</b></a>
+               </div>
+               </div><div class="pure-grid maincolumn">
+<div class="lwn-u-1 pure-u-md-19-24">
+<div class="PageHeadline">
+<h1>Security updates for Tuesday</h1>
+</div>
+<div class="ArticleText">
+<table class="OddEven">
+               <tr><th align="left">Dist.</th>
+                   <th align="left">ID</th>
+                   <th align="left">Release</th>
+                   <th align="left">Package</th>
+                   <th align="left">Date</th></tr>
+<tr><td align="left">openSUSE</td>
+                       <td align="left"><a href="https://lwn.net/Articles/764119/">openSUSE-SU-2018:2600-1</a></td>
+                       <td align="left">15.0</td>
+                       <td align="left">ImageMagick</td>
+                       <td>2018-09-04</td>
+                       </tr><tr><td align="left">openSUSE</td>
+                       <td align="left"><a href="https://lwn.net/Articles/764120/">openSUSE-SU-2018:2597-1</a></td>
+                       <td align="left">42.3</td>
+                       <td align="left">libressl</td>
+                       <td>2018-09-04</td>
+                       </tr><tr><td align="left">openSUSE</td>
+                       <td align="left"><a href="https://lwn.net/Articles/764121/">openSUSE-SU-2018:2599-1</a></td>
+                       <td align="left">15.0</td>
+                       <td align="left">postgresql10</td>
+                       <td>2018-09-04</td>
+                       </tr><tr><td align="left">openSUSE</td>
+                       <td align="left"><a href="https://lwn.net/Articles/764122/">openSUSE-SU-2018:2598-1</a></td>
+                       <td align="left">15.0</td>
+                       <td align="left">spice</td>
+                       <td>2018-09-04</td>
+                       </tr><tr><td align="left">openSUSE</td>
+                       <td align="left"><a href="https://lwn.net/Articles/764123/">openSUSE-SU-2018:2602-1</a></td>
+                       <td align="left">42.3</td>
+                       <td align="left">spice</td>
+                       <td>2018-09-04</td>
+                       </tr><tr><td align="left">openSUSE</td>
+                       <td align="left"><a href="https://lwn.net/Articles/764124/">openSUSE-SU-2018:2601-1</a></td>
+                       <td align="left">42.3</td>
+                       <td align="left">spice-gtk</td>
+                       <td>2018-09-04</td>
+                       </tr><tr><td align="left">Red Hat</td>
+                       <td align="left"><a href="https://lwn.net/Articles/764115/">RHSA-2018:2616-01</a></td>
+                       <td align="left">EL7</td>
+                       <td align="left">RHGS WA</td>
+                       <td>2018-09-04</td>
+                       </tr><tr><td align="left">Red Hat</td>
+                       <td align="left"><a href="https://lwn.net/Articles/764113/">RHSA-2018:2608-01</a></td>
+                       <td align="left">EL6</td>
+                       <td align="left">Red Hat Gluster Storage</td>
+                       <td>2018-09-04</td>
+                       </tr><tr><td align="left">Red Hat</td>
+                       <td align="left"><a href="https://lwn.net/Articles/764112/">RHSA-2018:2607-01</a></td>
+                       <td align="left">EL7</td>
+                       <td align="left">Red Hat Gluster Storage</td>
+                       <td>2018-09-04</td>
+                       </tr><tr><td align="left">Red Hat</td>
+                       <td align="left"><a href="https://lwn.net/Articles/764114/">RHSA-2018:2626-01</a></td>
+                       <td align="left">EL7</td>
+                       <td align="left">Red Hat Virtualization</td>
+                       <td>2018-09-04</td>
+                       </tr><tr><td align="left">Red Hat</td>
+                       <td align="left"><a href="https://lwn.net/Articles/764110/">RHSA-2018:2615-01</a></td>
+                       <td align="left">EL7</td>
+                       <td align="left">collectd</td>
+                       <td>2018-09-04</td>
+                       </tr><tr><td align="left">Red Hat</td>
+                       <td align="left"><a href="https://lwn.net/Articles/764111/">RHSA-2018:2645-01</a></td>
+                       <td align="left">EL6.7</td>
+                       <td align="left">kernel</td>
+                       <td>2018-09-04</td>
+                       </tr><tr><td align="left">Red Hat</td>
+                       <td align="left"><a href="https://lwn.net/Articles/764116/">RHSA-2018:2643-01</a></td>
+                       <td align="left">EL7</td>
+                       <td align="left">rhvm-appliance</td>
+                       <td>2018-09-04</td>
+                       </tr><tr><td align="left">Red Hat</td>
+                       <td align="left"><a href="https://lwn.net/Articles/764117/">RHSA-2018:2612-01</a></td>
+                       <td align="left">EL6</td>
+                       <td align="left">samba</td>
+                       <td>2018-09-04</td>
+                       </tr><tr><td align="left">Red Hat</td>
+                       <td align="left"><a href="https://lwn.net/Articles/764118/">RHSA-2018:2613-01</a></td>
+                       <td align="left">EL7</td>
+                       <td align="left">samba</td>
+                       <td>2018-09-04</td>
+                       </tr><tr><td align="left">SUSE</td>
+                       <td align="left"><a href="https://lwn.net/Articles/764125/">SUSE-SU-2018:2603-1</a></td>
+                       <td align="left">OS7 </td>
+                       <td align="left">crowbar, crowbar-core, crowbar-ha, crowbar-openstack, crowbar-ui</td>
+                       <td>2018-09-04</td>
+                       </tr><tr><td align="left">SUSE</td>
+                       <td align="left"><a href="https://lwn.net/Articles/764126/">SUSE-SU-2018:2596-1</a></td>
+                       <td align="left">SLE12</td>
+                       <td align="left">kernel</td>
+                       <td>2018-09-03</td>
+                       </tr><tr><td align="left">SUSE</td>
+                       <td align="left"><a href="https://lwn.net/Articles/764127/">SUSE-SU-2018:2595-1</a></td>
+                       <td align="left">SLE12</td>
+                       <td align="left">spice</td>
+                       <td>2018-09-03</td>
+                       </tr><tr><td align="left">SUSE</td>
+                       <td align="left"><a href="https://lwn.net/Articles/764129/">SUSE-SU-2018:2594-1</a></td>
+                       <td align="left">SLE12</td>
+                       <td align="left">spice-gtk</td>
+                       <td>2018-09-03</td>
+                       </tr><tr><td align="left">SUSE</td>
+                       <td align="left"><a href="https://lwn.net/Articles/764128/">SUSE-SU-2018:2593-1</a></td>
+                       <td align="left">SLE12</td>
+                       <td align="left">spice-gtk</td>
+                       <td>2018-09-03</td>
+                       </tr></table>
+<hr width="60%" align="left">
+           (<a href="https://lwn.net/Login/?target=/Articles/764130/">Log in</a> to post comments)
+           <p>
+           
+</div> <!-- ArticleText -->
+<p><a name="Comments"></a>
+
+</div> <!-- ArticleText -->
+</div>
+<div class="lwn-u-1 pure-u-md-1-6 not-print">
+<div id="azk93271_right_zone"></div>
+</div>
+</div> <!-- pure-grid -->
+
+        <br clear="all">
+        <center>
+        <P>
+        <font size="-2">
+        Copyright &copy; 2018, Eklektix, Inc.<BR>
+        
+        Comments and public postings are copyrighted by their creators.<br>
+        Linux  is a registered trademark of Linus Torvalds<br>
+        </font>
+        </center>
+        
+            <script type="text/javascript">
+            var gaJsHost = (("https:" == document.location.protocol) ? "https://ssl." : "http://www.");
+            document.write(unescape("%3Cscript src='" + gaJsHost + "google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E"));
+            </script>
+            <script type="text/javascript">
+            try {
+            var pageTracker = _gat._getTracker("UA-2039382-1");
+            pageTracker._trackPageview();
+            } catch(err) {}</script>
+            
+        </body></html>
+        
+\ No newline at end of file
diff --git a/test/source/LWN/Articles/764182.html b/test/source/LWN/Articles/764182.html

new file mode 100644 (file)

index 0000000..aaf1c1b
--- /dev/null
+++ b/test/source/LWN/Articles/764182.html
@@ -0,0 +1,164 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
+        "http://www.w3.org/TR/html4/loose.dtd">
+        <html>
+        <head><title>Security updates for Wednesday [LWN.net]</title>
+        <meta name="twitter:card" content="summary" />
+<meta name="twitter:site" content="@lwnnet" />
+<meta name="twitter:title" content="Security updates for Wednesday" />
+<meta name="twitter:description" content="Security updates have been issued by Debian (lcms2), openSUSE (yubico-piv-tool), Oracle (kernel), and SUSE (cobbler and kvm).
+" />
+<meta name="viewport" content="width=device-width, initial-scale=1">
+<meta HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=utf-8">
+        <link rel="icon" href="/images/favicon.png" type="image/png">
+        <link rel="alternate" type="application/rss+xml" title="LWN.net headlines" href="https://lwn.net/headlines/newrss">
+        <link rel="stylesheet" href="/CSS/lwn">
+<link rel="stylesheet" href="/CSS/nosub">
+<link rel="stylesheet" href="/CSS/pure-min">
+           <!--[if lte IE 8]>
+             <link rel="stylesheet" href="/CSS/grids-responsive-old-ie-min">
+           <![endif]-->
+           <!--[if gt IE 8]><!-->
+             <link rel="stylesheet" href="/CSS/grids-responsive-min">
+           <!--<![endif]-->
+           <link rel="stylesheet" href="/CSS/pure-lwn">
+           
+        
+<script type="text/javascript">var p="http",d="static";if(document.location.protocol=="https:"){p+="s";d="engine";}var z=document.createElement("script");z.type="text/javascript";z.async=true;z.src=p+"://"+d+".adzerk.net/ados.js";var s=document.getElementsByTagName("script")[0];s.parentNode.insertBefore(z,s);</script>
+<script type="text/javascript">
+var ados_keywords = ados_keywords || [];
+if( location.protocol=='https:' ) {
+        ados_keywords.push('T:SSL');
+} else {
+        ados_keywords.push('T:HTTP');
+}
+
+var ados = ados || {};
+ados.run = ados.run || [];
+ados.run.push(function() {
+
+ados_add_placement(4669, 20979, "azk13321_leaderboard", 4).setZone(16026);
+
+ados_add_placement(4669, 20979, "azk93271_right_zone", [5,10,6]).setZone(16027);
+
+ados_add_placement(4669, 20979, "azk31017_tracking", 20).setZone(20995);
+
+
+
+ados_setKeywords(ados_keywords.join(', ')); 
+ados_load();
+});</script>
+
+        </head>
+        <body bgcolor="#ffffff" link="Blue" VLINK="Green" alink="Green">
+        <a name="t"></a>
+<div id="menu"><a href="/"><img src="https://static.lwn.net/images/logo/barepenguin-70.png" class="logo"
+                 border="0" alt="LWN.net Logo">
+           <font class="logo">LWN<br>.net</font>
+           <font class="logobl">News from the source</font></a>
+           <a href="/"><img src="https://static.lwn.net/images/lcorner-ss.png" class="sslogo"
+                 border="0" alt="LWN"></a><div class="navmenu-container">
+           <ul class="navmenu">
+        <li><a class="navmenu" href="#t"><b>Content</b></a><ul><li><a href="/current/">Weekly Edition</a></li><li><a href="/Archives/">Archives</a></li><li><a href="/Search/">Search</a></li><li><a href="/Kernel/">Kernel</a></li><li><a href="/Security/">Security</a></li><li><a href="/Distributions/">Distributions</a></li><li><a href="/Calendar/">Events calendar</a></li><li><a href="/Comments/unread">Unread comments</a></li><li><hr></li><li><a href="/op/FAQ.lwn">LWN FAQ</a></li><li><a href="/op/AuthorGuide.lwn">Write for us</a></li></ul></li>
+</ul></div>
+</div> <!-- menu -->
+<div class="pure-g not-handset" style="margin-left: 10.5em">
+           <div class="not-print">
+             <div id="azk13321_leaderboard"></div>
+           </div>
+           </div>
+        <div class="topnav-container">
+<div class="not-handset"><form action="https://lwn.net/Login/" method="post" name="loginform"
+                 class="loginform">
+        <b>User:</b> <input type="text" name="Username" value="" size="8" /> <b>Password:</b> <input type="password" name="Password" size="8" /> <input type="hidden" name="target" value="/Articles/764182/" /> <input type="submit" name="submit" value="Log in" /></form> |
+           <form action="https://lwn.net/subscribe/" method="post" class="loginform">
+           <input type="submit" name="submit" value="Subscribe" />
+           </form> |
+           <form action="https://lwn.net/Login/newaccount" method="post" class="loginform">
+           <input type="submit" name="submit" value="Register" />
+           </form>
+        </div>
+               <div class="handset-only">
+               <a href="/subscribe/"><b>Subscribe</b></a> /
+               <a href="/Login/"><b>Log in</b></a> /
+               <a href="/Login/newaccount"><b>New account</b></a>
+               </div>
+               </div><div class="pure-grid maincolumn">
+<div class="lwn-u-1 pure-u-md-19-24">
+<div class="PageHeadline">
+<h1>Security updates for Wednesday</h1>
+</div>
+<div class="ArticleText">
+<table class="OddEven">
+               <tr><th align="left">Dist.</th>
+                   <th align="left">ID</th>
+                   <th align="left">Release</th>
+                   <th align="left">Package</th>
+                   <th align="left">Date</th></tr>
+<tr><td align="left">Debian</td>
+                       <td align="left"><a href="https://lwn.net/Articles/764176/">DSA-4284-1</a></td>
+                       <td align="left">stable</td>
+                       <td align="left">lcms2</td>
+                       <td>2018-09-04</td>
+                       </tr><tr><td align="left">openSUSE</td>
+                       <td align="left"><a href="https://lwn.net/Articles/764177/">openSUSE-SU-2018:2623-1</a></td>
+                       <td align="left">42.3</td>
+                       <td align="left">yubico-piv-tool</td>
+                       <td>2018-09-05</td>
+                       </tr><tr><td align="left">Oracle</td>
+                       <td align="left"><a href="https://lwn.net/Articles/764178/">ELSA-2018-4208</a></td>
+                       <td align="left">OL6</td>
+                       <td align="left">kernel</td>
+                       <td>2018-09-04</td>
+                       </tr><tr><td align="left">Oracle</td>
+                       <td align="left"><a href="https://lwn.net/Articles/764179/">ELSA-2018-4208</a></td>
+                       <td align="left">OL7</td>
+                       <td align="left">kernel</td>
+                       <td>2018-09-04</td>
+                       </tr><tr><td align="left">SUSE</td>
+                       <td align="left"><a href="https://lwn.net/Articles/764180/">SUSE-SU-2018:2608-1</a></td>
+                       <td align="left"></td>
+                       <td align="left">cobbler</td>
+                       <td>2018-09-04</td>
+                       </tr><tr><td align="left">SUSE</td>
+                       <td align="left"><a href="https://lwn.net/Articles/764181/">SUSE-SU-2018:2615-1</a></td>
+                       <td align="left">SLE11</td>
+                       <td align="left">kvm</td>
+                       <td>2018-09-05</td>
+                       </tr></table>
+<hr width="60%" align="left">
+           (<a href="https://lwn.net/Login/?target=/Articles/764182/">Log in</a> to post comments)
+           <p>
+           
+</div> <!-- ArticleText -->
+<p><a name="Comments"></a>
+
+</div> <!-- ArticleText -->
+</div>
+<div class="lwn-u-1 pure-u-md-1-6 not-print">
+<div id="azk93271_right_zone"></div>
+</div>
+</div> <!-- pure-grid -->
+
+        <br clear="all">
+        <center>
+        <P>
+        <font size="-2">
+        Copyright &copy; 2018, Eklektix, Inc.<BR>
+        
+        Comments and public postings are copyrighted by their creators.<br>
+        Linux  is a registered trademark of Linus Torvalds<br>
+        </font>
+        </center>
+        
+            <script type="text/javascript">
+            var gaJsHost = (("https:" == document.location.protocol) ? "https://ssl." : "http://www.");
+            document.write(unescape("%3Cscript src='" + gaJsHost + "google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E"));
+            </script>
+            <script type="text/javascript">
+            try {
+            var pageTracker = _gat._getTracker("UA-2039382-1");
+            pageTracker._trackPageview();
+            } catch(err) {}</script>
+            
+        </body></html>
+        
+\ No newline at end of file
diff --git a/test/source/LWN/Articles/764184.html b/test/source/LWN/Articles/764184.html

new file mode 100644 (file)

index 0000000..a81daf3
--- /dev/null
+++ b/test/source/LWN/Articles/764184.html
@@ -0,0 +1,135 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
+        "http://www.w3.org/TR/html4/loose.dtd">
+        <html>
+        <head><title>A set of stable kernels [LWN.net]</title>
+        <meta name="twitter:card" content="summary" />
+<meta name="twitter:site" content="@lwnnet" />
+<meta name="twitter:title" content="A set of stable kernels" />
+<meta name="twitter:description" content="Greg Kroah-Hartman has released stable kernels 4.18.6, 4.14.68, 4.9.125, 4.4.154, and 3.18.121. They all contain important fixes and
+users should upgrade." />
+<meta name="viewport" content="width=device-width, initial-scale=1">
+<meta HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=utf-8">
+        <link rel="icon" href="/images/favicon.png" type="image/png">
+        <link rel="alternate" type="application/rss+xml" title="LWN.net headlines" href="https://lwn.net/headlines/newrss">
+<link rel="alternate" type="application/rss+xml" title="Comments posted to this article" href="https://lwn.net/headlines/764184/">
+        <link rel="stylesheet" href="/CSS/lwn">
+<link rel="stylesheet" href="/CSS/nosub">
+<link rel="stylesheet" href="/CSS/pure-min">
+           <!--[if lte IE 8]>
+             <link rel="stylesheet" href="/CSS/grids-responsive-old-ie-min">
+           <![endif]-->
+           <!--[if gt IE 8]><!-->
+             <link rel="stylesheet" href="/CSS/grids-responsive-min">
+           <!--<![endif]-->
+           <link rel="stylesheet" href="/CSS/pure-lwn">
+           
+        
+<script type="text/javascript">var p="http",d="static";if(document.location.protocol=="https:"){p+="s";d="engine";}var z=document.createElement("script");z.type="text/javascript";z.async=true;z.src=p+"://"+d+".adzerk.net/ados.js";var s=document.getElementsByTagName("script")[0];s.parentNode.insertBefore(z,s);</script>
+<script type="text/javascript">
+var ados_keywords = ados_keywords || [];
+if( location.protocol=='https:' ) {
+        ados_keywords.push('T:SSL');
+} else {
+        ados_keywords.push('T:HTTP');
+}
+
+var ados = ados || {};
+ados.run = ados.run || [];
+ados.run.push(function() {
+
+ados_add_placement(4669, 20979, "azk13321_leaderboard", 4).setZone(16026);
+
+ados_add_placement(4669, 20979, "azk93271_right_zone", [5,10,6]).setZone(16027);
+
+ados_add_placement(4669, 20979, "azk31017_tracking", 20).setZone(20995);
+
+
+
+ados_setKeywords(ados_keywords.join(', ')); 
+ados_load();
+});</script>
+
+        </head>
+        <body bgcolor="#ffffff" link="Blue" VLINK="Green" alink="Green">
+        <a name="t"></a>
+<div id="menu"><a href="/"><img src="https://static.lwn.net/images/logo/barepenguin-70.png" class="logo"
+                 border="0" alt="LWN.net Logo">
+           <font class="logo">LWN<br>.net</font>
+           <font class="logobl">News from the source</font></a>
+           <a href="/"><img src="https://static.lwn.net/images/lcorner-ss.png" class="sslogo"
+                 border="0" alt="LWN"></a><div class="navmenu-container">
+           <ul class="navmenu">
+        <li><a class="navmenu" href="#t"><b>Content</b></a><ul><li><a href="/current/">Weekly Edition</a></li><li><a href="/Archives/">Archives</a></li><li><a href="/Search/">Search</a></li><li><a href="/Kernel/">Kernel</a></li><li><a href="/Security/">Security</a></li><li><a href="/Distributions/">Distributions</a></li><li><a href="/Calendar/">Events calendar</a></li><li><a href="/Comments/unread">Unread comments</a></li><li><hr></li><li><a href="/op/FAQ.lwn">LWN FAQ</a></li><li><a href="/op/AuthorGuide.lwn">Write for us</a></li></ul></li>
+</ul></div>
+</div> <!-- menu -->
+<div class="pure-g not-handset" style="margin-left: 10.5em">
+           <div class="not-print">
+             <div id="azk13321_leaderboard"></div>
+           </div>
+           </div>
+        <div class="topnav-container">
+<div class="not-handset"><form action="https://lwn.net/Login/" method="post" name="loginform"
+                 class="loginform">
+        <b>User:</b> <input type="text" name="Username" value="" size="8" /> <b>Password:</b> <input type="password" name="Password" size="8" /> <input type="hidden" name="target" value="/Articles/764184/" /> <input type="submit" name="submit" value="Log in" /></form> |
+           <form action="https://lwn.net/subscribe/" method="post" class="loginform">
+           <input type="submit" name="submit" value="Subscribe" />
+           </form> |
+           <form action="https://lwn.net/Login/newaccount" method="post" class="loginform">
+           <input type="submit" name="submit" value="Register" />
+           </form>
+        </div>
+               <div class="handset-only">
+               <a href="/subscribe/"><b>Subscribe</b></a> /
+               <a href="/Login/"><b>Log in</b></a> /
+               <a href="/Login/newaccount"><b>New account</b></a>
+               </div>
+               </div><div class="pure-grid maincolumn">
+<div class="lwn-u-1 pure-u-md-19-24">
+<div class="PageHeadline">
+<h1>A set of stable kernels</h1>
+<div class="Byline">[Posted September 5, 2018 by ris]
+               <p>
+               </div>
+</div>
+<div class="ArticleText">
+Greg Kroah-Hartman has released stable kernels <a
+href="/Articles/764185/">4.18.6</a>, <a
+href="/Articles/764186/">4.14.68</a>, <a
+href="/Articles/764187/">4.9.125</a>, <a
+href="/Articles/764188/">4.4.154</a>, and <a
+href="/Articles/764189/">3.18.121</a>. They all contain important fixes and
+users should upgrade.<hr width="60%" align="left">
+           (<a href="https://lwn.net/Login/?target=/Articles/764184/">Log in</a> to post comments)
+           <p>
+           
+</div> <!-- ArticleText -->
+<p><a name="Comments"></a>
+</div>
+<div class="lwn-u-1 pure-u-md-1-6 not-print">
+<div id="azk93271_right_zone"></div>
+</div>
+</div> <!-- pure-grid -->
+
+        <br clear="all">
+        <center>
+        <P>
+        <font size="-2">
+        Copyright &copy; 2018, Eklektix, Inc.<BR>
+        
+        Comments and public postings are copyrighted by their creators.<br>
+        Linux  is a registered trademark of Linus Torvalds<br>
+        </font>
+        </center>
+        
+            <script type="text/javascript">
+            var gaJsHost = (("https:" == document.location.protocol) ? "https://ssl." : "http://www.");
+            document.write(unescape("%3Cscript src='" + gaJsHost + "google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E"));
+            </script>
+            <script type="text/javascript">
+            try {
+            var pageTracker = _gat._getTracker("UA-2039382-1");
+            pageTracker._trackPageview();
+            } catch(err) {}</script>
+            
+        </body></html>
+        
+\ No newline at end of file
diff --git a/test/source/LWN/Articles/764202.html b/test/source/LWN/Articles/764202.html

new file mode 100644 (file)

index 0000000..141a5a8
--- /dev/null
+++ b/test/source/LWN/Articles/764202.html
@@ -0,0 +1,142 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
+        "http://www.w3.org/TR/html4/loose.dtd">
+        <html>
+        <head><title>Firefox 62.0 released [LWN.net]</title>
+        <meta name="twitter:card" content="summary" />
+<meta name="twitter:site" content="@lwnnet" />
+<meta name="twitter:title" content="Firefox 62.0 released" />
+<meta name="twitter:description" content="Mozilla has released Firefox 62.0, with several new features. The Firefox
+Home (default New Tab) allows users to display up to 4 rows of top sites,
+Pocket stories, and highlights; for those using containers there is menu
+option to reopen a tab in a different container; Firefox 63 will remove all
+trust for Symantec-issued certificates, and it is optional in Firefox
+62; FreeBSD support for WebAuthn was added; and more. See the release
+notes for details." />
+<meta name="viewport" content="width=device-width, initial-scale=1">
+<meta HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=utf-8">
+        <link rel="icon" href="/images/favicon.png" type="image/png">
+        <link rel="alternate" type="application/rss+xml" title="LWN.net headlines" href="https://lwn.net/headlines/newrss">
+<link rel="alternate" type="application/rss+xml" title="Comments posted to this article" href="https://lwn.net/headlines/764202/">
+        <link rel="stylesheet" href="/CSS/lwn">
+<link rel="stylesheet" href="/CSS/nosub">
+<link rel="stylesheet" href="/CSS/pure-min">
+           <!--[if lte IE 8]>
+             <link rel="stylesheet" href="/CSS/grids-responsive-old-ie-min">
+           <![endif]-->
+           <!--[if gt IE 8]><!-->
+             <link rel="stylesheet" href="/CSS/grids-responsive-min">
+           <!--<![endif]-->
+           <link rel="stylesheet" href="/CSS/pure-lwn">
+           
+        
+<script type="text/javascript">var p="http",d="static";if(document.location.protocol=="https:"){p+="s";d="engine";}var z=document.createElement("script");z.type="text/javascript";z.async=true;z.src=p+"://"+d+".adzerk.net/ados.js";var s=document.getElementsByTagName("script")[0];s.parentNode.insertBefore(z,s);</script>
+<script type="text/javascript">
+var ados_keywords = ados_keywords || [];
+if( location.protocol=='https:' ) {
+        ados_keywords.push('T:SSL');
+} else {
+        ados_keywords.push('T:HTTP');
+}
+
+var ados = ados || {};
+ados.run = ados.run || [];
+ados.run.push(function() {
+
+ados_add_placement(4669, 20979, "azk13321_leaderboard", 4).setZone(16026);
+
+ados_add_placement(4669, 20979, "azk93271_right_zone", [5,10,6]).setZone(16027);
+
+ados_add_placement(4669, 20979, "azk31017_tracking", 20).setZone(20995);
+
+
+
+ados_setKeywords(ados_keywords.join(', ')); 
+ados_load();
+});</script>
+
+        </head>
+        <body bgcolor="#ffffff" link="Blue" VLINK="Green" alink="Green">
+        <a name="t"></a>
+<div id="menu"><a href="/"><img src="https://static.lwn.net/images/logo/barepenguin-70.png" class="logo"
+                 border="0" alt="LWN.net Logo">
+           <font class="logo">LWN<br>.net</font>
+           <font class="logobl">News from the source</font></a>
+           <a href="/"><img src="https://static.lwn.net/images/lcorner-ss.png" class="sslogo"
+                 border="0" alt="LWN"></a><div class="navmenu-container">
+           <ul class="navmenu">
+        <li><a class="navmenu" href="#t"><b>Content</b></a><ul><li><a href="/current/">Weekly Edition</a></li><li><a href="/Archives/">Archives</a></li><li><a href="/Search/">Search</a></li><li><a href="/Kernel/">Kernel</a></li><li><a href="/Security/">Security</a></li><li><a href="/Distributions/">Distributions</a></li><li><a href="/Calendar/">Events calendar</a></li><li><a href="/Comments/unread">Unread comments</a></li><li><hr></li><li><a href="/op/FAQ.lwn">LWN FAQ</a></li><li><a href="/op/AuthorGuide.lwn">Write for us</a></li></ul></li>
+<li><a class="navmenu" href="#t"><b>Edition</b></a><ul><li><a href="/Articles/763791/">Return to the Briefs page</a></li></ul></li>
+</ul></div>
+</div> <!-- menu -->
+<div class="pure-g not-handset" style="margin-left: 10.5em">
+           <div class="not-print">
+             <div id="azk13321_leaderboard"></div>
+           </div>
+           </div>
+        <div class="topnav-container">
+<div class="not-handset"><form action="https://lwn.net/Login/" method="post" name="loginform"
+                 class="loginform">
+        <b>User:</b> <input type="text" name="Username" value="" size="8" /> <b>Password:</b> <input type="password" name="Password" size="8" /> <input type="hidden" name="target" value="/Articles/764202/" /> <input type="submit" name="submit" value="Log in" /></form> |
+           <form action="https://lwn.net/subscribe/" method="post" class="loginform">
+           <input type="submit" name="submit" value="Subscribe" />
+           </form> |
+           <form action="https://lwn.net/Login/newaccount" method="post" class="loginform">
+           <input type="submit" name="submit" value="Register" />
+           </form>
+        </div>
+               <div class="handset-only">
+               <a href="/subscribe/"><b>Subscribe</b></a> /
+               <a href="/Login/"><b>Log in</b></a> /
+               <a href="/Login/newaccount"><b>New account</b></a>
+               </div>
+               </div><div class="pure-grid maincolumn">
+<div class="lwn-u-1 pure-u-md-19-24">
+<div class="PageHeadline">
+<h1>Firefox 62.0 released</h1>
+<div class="Byline">[Posted September 5, 2018 by ris]
+               <p>
+               </div>
+</div>
+<div class="ArticleText">
+Mozilla has released Firefox 62.0, with several new features. The Firefox
+Home (default New Tab) allows users to display up to 4 rows of top sites,
+Pocket stories, and highlights; for those using containers there is menu
+option to reopen a tab in a different container; Firefox 63 will remove all
+trust for Symantec-issued certificates, and it is optional in Firefox
+62; FreeBSD support for WebAuthn was added; and more. See the <a
+href="https://www.mozilla.org/en-US/firefox/62.0/releasenotes/">release
+notes</a> for details.<hr width="60%" align="left">
+           (<a href="https://lwn.net/Login/?target=/Articles/764202/">Log in</a> to post comments)
+           <p>
+           
+</div> <!-- ArticleText -->
+<p><a name="Comments"></a>
+</div>
+<div class="lwn-u-1 pure-u-md-1-6 not-print">
+<div id="azk93271_right_zone"></div>
+</div>
+</div> <!-- pure-grid -->
+
+        <br clear="all">
+        <center>
+        <P>
+        <font size="-2">
+        Copyright &copy; 2018, Eklektix, Inc.<BR>
+        
+        Comments and public postings are copyrighted by their creators.<br>
+        Linux  is a registered trademark of Linus Torvalds<br>
+        </font>
+        </center>
+        
+            <script type="text/javascript">
+            var gaJsHost = (("https:" == document.location.protocol) ? "https://ssl." : "http://www.");
+            document.write(unescape("%3Cscript src='" + gaJsHost + "google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E"));
+            </script>
+            <script type="text/javascript">
+            try {
+            var pageTracker = _gat._getTracker("UA-2039382-1");
+            pageTracker._trackPageview();
+            } catch(err) {}</script>
+            
+        </body></html>
+        
+\ No newline at end of file
diff --git a/test/source/LWN/Articles/764219.html b/test/source/LWN/Articles/764219.html

new file mode 100644 (file)

index 0000000..346deb3
--- /dev/null
+++ b/test/source/LWN/Articles/764219.html
@@ -0,0 +1,217 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
+        "http://www.w3.org/TR/html4/loose.dtd">
+        <html>
+        <head><title>GNOME 3.30 released [LWN.net]</title>
+        <meta name="twitter:card" content="summary" />
+<meta name="twitter:site" content="@lwnnet" />
+<meta name="twitter:title" content="GNOME 3.30 released" />
+<meta name="twitter:description" content="The GNOME Project has announced the release of GNOME 3.30
+&quot;Almería&quot;. &quot;This release brings automatic updates in Software, more
+games, and a new Podcasts application. Improvements to core GNOME
+applications include a refined location and search bar in Files, a
+[Thunderbolt] panel in Settings, support for remoting using RDP in Boxes, and
+many more.&quot; The release notes
+contain more information.
+" />
+<meta name="viewport" content="width=device-width, initial-scale=1">
+<meta HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=utf-8">
+        <link rel="icon" href="/images/favicon.png" type="image/png">
+        <link rel="alternate" type="application/rss+xml" title="LWN.net headlines" href="https://lwn.net/headlines/newrss">
+<link rel="alternate" type="application/rss+xml" title="Comments posted to this article" href="https://lwn.net/headlines/764219/">
+        <link rel="stylesheet" href="/CSS/lwn">
+<link rel="stylesheet" href="/CSS/nosub">
+<link rel="stylesheet" href="/CSS/pure-min">
+           <!--[if lte IE 8]>
+             <link rel="stylesheet" href="/CSS/grids-responsive-old-ie-min">
+           <![endif]-->
+           <!--[if gt IE 8]><!-->
+             <link rel="stylesheet" href="/CSS/grids-responsive-min">
+           <!--<![endif]-->
+           <link rel="stylesheet" href="/CSS/pure-lwn">
+           
+        
+<script type="text/javascript">var p="http",d="static";if(document.location.protocol=="https:"){p+="s";d="engine";}var z=document.createElement("script");z.type="text/javascript";z.async=true;z.src=p+"://"+d+".adzerk.net/ados.js";var s=document.getElementsByTagName("script")[0];s.parentNode.insertBefore(z,s);</script>
+<script type="text/javascript">
+var ados_keywords = ados_keywords || [];
+if( location.protocol=='https:' ) {
+        ados_keywords.push('T:SSL');
+} else {
+        ados_keywords.push('T:HTTP');
+}
+
+var ados = ados || {};
+ados.run = ados.run || [];
+ados.run.push(function() {
+
+ados_add_placement(4669, 20979, "azk13321_leaderboard", 4).setZone(16026);
+
+ados_add_placement(4669, 20979, "azk93271_right_zone", [5,10,6]).setZone(16027);
+
+ados_add_placement(4669, 20979, "azk31017_tracking", 20).setZone(20995);
+
+
+
+ados_setKeywords(ados_keywords.join(', ')); 
+ados_load();
+});</script>
+
+        </head>
+        <body bgcolor="#ffffff" link="Blue" VLINK="Green" alink="Green">
+        <a name="t"></a>
+<div id="menu"><a href="/"><img src="https://static.lwn.net/images/logo/barepenguin-70.png" class="logo"
+                 border="0" alt="LWN.net Logo">
+           <font class="logo">LWN<br>.net</font>
+           <font class="logobl">News from the source</font></a>
+           <a href="/"><img src="https://static.lwn.net/images/lcorner-ss.png" class="sslogo"
+                 border="0" alt="LWN"></a><div class="navmenu-container">
+           <ul class="navmenu">
+        <li><a class="navmenu" href="#t"><b>Content</b></a><ul><li><a href="/current/">Weekly Edition</a></li><li><a href="/Archives/">Archives</a></li><li><a href="/Search/">Search</a></li><li><a href="/Kernel/">Kernel</a></li><li><a href="/Security/">Security</a></li><li><a href="/Distributions/">Distributions</a></li><li><a href="/Calendar/">Events calendar</a></li><li><a href="/Comments/unread">Unread comments</a></li><li><hr></li><li><a href="/op/FAQ.lwn">LWN FAQ</a></li><li><a href="/op/AuthorGuide.lwn">Write for us</a></li></ul></li>
+<li><a class="navmenu" href="#t"><b>Edition</b></a><ul><li><a href="/Articles/763791/">Return to the Briefs page</a></li></ul></li>
+</ul></div>
+</div> <!-- menu -->
+<div class="pure-g not-handset" style="margin-left: 10.5em">
+           <div class="not-print">
+             <div id="azk13321_leaderboard"></div>
+           </div>
+           </div>
+        <div class="topnav-container">
+<div class="not-handset"><form action="https://lwn.net/Login/" method="post" name="loginform"
+                 class="loginform">
+        <b>User:</b> <input type="text" name="Username" value="" size="8" /> <b>Password:</b> <input type="password" name="Password" size="8" /> <input type="hidden" name="target" value="/Articles/764219/" /> <input type="submit" name="submit" value="Log in" /></form> |
+           <form action="https://lwn.net/subscribe/" method="post" class="loginform">
+           <input type="submit" name="submit" value="Subscribe" />
+           </form> |
+           <form action="https://lwn.net/Login/newaccount" method="post" class="loginform">
+           <input type="submit" name="submit" value="Register" />
+           </form>
+        </div>
+               <div class="handset-only">
+               <a href="/subscribe/"><b>Subscribe</b></a> /
+               <a href="/Login/"><b>Log in</b></a> /
+               <a href="/Login/newaccount"><b>New account</b></a>
+               </div>
+               </div><div class="pure-grid maincolumn">
+<div class="lwn-u-1 pure-u-md-19-24">
+<div class="PageHeadline">
+<h1>GNOME 3.30 released</h1>
+</div>
+<div class="ArticleText">
+The GNOME Project has announced the release of GNOME 3.30
+"Almería". "<span>This release brings automatic updates in Software, more
+games, and a new Podcasts application. Improvements to core GNOME
+applications include a refined location and search bar in Files, a
+[Thunderbolt] panel in Settings, support for remoting using RDP in Boxes, and
+many more.</span>" The <a
+href="https://help.gnome.org/misc/release-notes/3.30/">release notes</a>
+contain more information.
+<p><hr><p>
+<table>
+<tr><td valign="top"><b>From</b>:</td>
+             <td>&nbsp;</td><td valign="top">Matthias Clasen via devel-announce-list &lt;devel-announce-list-AT-gnome.org&gt;</td></tr>
+<tr><td valign="top"><b>To</b>:</td>
+             <td>&nbsp;</td><td valign="top">gnome-announce-list-AT-gnome.org, devel-announce-list-AT-gnome.org</td></tr>
+<tr><td valign="top"><b>Subject</b>:</td>
+             <td>&nbsp;</td><td valign="top">GNOME 3.30 released</td></tr>
+<tr><td valign="top"><b>Date</b>:</td>
+             <td>&nbsp;</td><td valign="top">Wed, 5 Sep 2018 16:41:54 -0400</td></tr>
+<tr><td valign="top"><b>Message-ID</b>:</td>
+             <td>&nbsp;</td><td valign="top">&lt;CAFwd_vCdnMhopZsZMq2M-N7DfQbUheTCfDb--Lgn6rrAXPyfdQ@mail.gmail.com&gt;</td></tr>
+<tr><td valign="top"><b>Cc</b>:</td>
+             <td>&nbsp;</td><td valign="top">Matthias Clasen &lt;matthias.clasen-AT-gmail.com&gt;</td></tr>
+<tr><td valign="top"><b>Archive-link</b>:</td>
+             <td>&nbsp;</td><td valign="top"><a href="http://www.mail-archive.com/search?l=mid&q=CAFwd_vCdnMhopZsZMq2M-N7DfQbUheTCfDb--Lgn6rrAXPyfdQ%40mail.gmail.com">Article</a></td></tr>
+</table><p>
+<pre>
+The GNOME Project is proud to announce the release of GNOME 3.30, “Almería”
+
+This release brings automatic updates in Software, more games, and a new
+Podcasts application.
+
+Improvements to core GNOME applications include a refined location and
+search
+bar in Files, a Thunderbold panel in Settings, support for remoting using
+RDP
+in Boxes, and many more.
+
+More information about the changes in GNOME 3.30 can be found in the
+release notes:
+
+ <a href="https://help.gnome.org/misc/release-notes/3.30/">https://help.gnome.org/misc/release-notes/3.30/</a>
+
+For the release team, this release is particularly exciting because it is
+the
+first one that has been produced and verified with our new CI infrastructure
+in gitlab.gnome.org.
+
+GNOME 3.30 will be available shortly in many distributions. If you want to
+try it
+today, you can use the soon-to-be-released Fedora 29 or the openSUSE nightly
+live images which will both include GNOME 3.30 very soon.
+
+ <a href="https://www.gnome.org/getting-gnome/">https://www.gnome.org/getting-gnome/</a>
+
+<a href="http://download.opensuse.org/repositories/GNOME:/Medias/images/iso/?P=GNOME_Next*">http://download.opensuse.org/repositories/GNOME:/Medias/i...</a>
+
+To try the very latest developments in GNOME, you can also use Fedora
+Silverblue,
+whose rawhide branch always includes the latest GNOME packages.
+
+
+<a href="https://kojipkgs.fedoraproject.org/compose/rawhide/latest-Fedora-Rawhide/compose/Silverblue/x86_64/iso/">https://kojipkgs.fedoraproject.org/compose/rawhide/latest...</a>
+
+If you are interested in building applications for GNOME 3.30, look for the
+GNOME 3.30 Flatpak SDK, which will be available in the sdk.gnome.org
+repository
+soon.
+
+This six-month effort wouldn't have been possible without the whole
+GNOME community, made of contributors and friends from all around the
+world: developers, designers, documentation writers, usability and
+accessibility specialists, translators, maintainers, students, system
+administrators, companies, artists, testers and last, but not least, our
+users.
+GNOME would not exist without all of you. Thank you to everyone!
+
+Our next release, GNOME 3.32, is planned for March 2019. Until then,
+enjoy GNOME 3.30!
+
+The GNOME Release Team
+-- 
+devel-announce-list mailing list
+devel-announce-list@gnome.org
+<a href="https://mail.gnome.org/mailman/listinfo/devel-announce-list">https://mail.gnome.org/mailman/listinfo/devel-announce-list</a></pre>
+<hr width="60%" align="left">
+           (<a href="https://lwn.net/Login/?target=/Articles/764219/">Log in</a> to post comments)
+           <p>
+           
+</div> <!-- ArticleText -->
+<p><a name="Comments"></a>
+</div>
+<div class="lwn-u-1 pure-u-md-1-6 not-print">
+<div id="azk93271_right_zone"></div>
+</div>
+</div> <!-- pure-grid -->
+
+        <br clear="all">
+        <center>
+        <P>
+        <font size="-2">
+        Copyright &copy; 2018, Eklektix, Inc.<BR>
+        
+        Comments and public postings are copyrighted by their creators.<br>
+        Linux  is a registered trademark of Linus Torvalds<br>
+        </font>
+        </center>
+        
+            <script type="text/javascript">
+            var gaJsHost = (("https:" == document.location.protocol) ? "https://ssl." : "http://www.");
+            document.write(unescape("%3Cscript src='" + gaJsHost + "google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E"));
+            </script>
+            <script type="text/javascript">
+            try {
+            var pageTracker = _gat._getTracker("UA-2039382-1");
+            pageTracker._trackPageview();
+            } catch(err) {}</script>
+            
+        </body></html>
+        
+\ No newline at end of file
diff --git a/test/source/LWN/Articles/764300.html b/test/source/LWN/Articles/764300.html

new file mode 100644 (file)

index 0000000..e1d7bcc
--- /dev/null
+++ b/test/source/LWN/Articles/764300.html
@@ -0,0 +1,259 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
+        "http://www.w3.org/TR/html4/loose.dtd">
+        <html>
+        <head><title>Security updates for Thursday [LWN.net]</title>
+        <meta name="twitter:card" content="summary" />
+<meta name="twitter:site" content="@lwnnet" />
+<meta name="twitter:title" content="Security updates for Thursday" />
+<meta name="twitter:description" content="Security updates have been issued by Debian (curl, gdm3, git-annex, lcms2, and sympa), Fedora (discount, dolphin-emu, gd, obs-build, osc, tcpflow, and yara), openSUSE (wireshark), Slackware (curl, firefox, ghostscript, and thunderbird), SUSE (apache-pdfbox, curl, dovecot22, and libvirt), and Ubuntu (libtirpc).
+" />
+<meta name="viewport" content="width=device-width, initial-scale=1">
+<meta HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=utf-8">
+        <link rel="icon" href="/images/favicon.png" type="image/png">
+        <link rel="alternate" type="application/rss+xml" title="LWN.net headlines" href="https://lwn.net/headlines/newrss">
+        <link rel="stylesheet" href="/CSS/lwn">
+<link rel="stylesheet" href="/CSS/nosub">
+<link rel="stylesheet" href="/CSS/pure-min">
+           <!--[if lte IE 8]>
+             <link rel="stylesheet" href="/CSS/grids-responsive-old-ie-min">
+           <![endif]-->
+           <!--[if gt IE 8]><!-->
+             <link rel="stylesheet" href="/CSS/grids-responsive-min">
+           <!--<![endif]-->
+           <link rel="stylesheet" href="/CSS/pure-lwn">
+           
+        
+<script type="text/javascript">var p="http",d="static";if(document.location.protocol=="https:"){p+="s";d="engine";}var z=document.createElement("script");z.type="text/javascript";z.async=true;z.src=p+"://"+d+".adzerk.net/ados.js";var s=document.getElementsByTagName("script")[0];s.parentNode.insertBefore(z,s);</script>
+<script type="text/javascript">
+var ados_keywords = ados_keywords || [];
+if( location.protocol=='https:' ) {
+        ados_keywords.push('T:SSL');
+} else {
+        ados_keywords.push('T:HTTP');
+}
+
+var ados = ados || {};
+ados.run = ados.run || [];
+ados.run.push(function() {
+
+ados_add_placement(4669, 20979, "azk13321_leaderboard", 4).setZone(16026);
+
+ados_add_placement(4669, 20979, "azk93271_right_zone", [5,10,6]).setZone(16027);
+
+ados_add_placement(4669, 20979, "azk31017_tracking", 20).setZone(20995);
+
+
+
+ados_setKeywords(ados_keywords.join(', ')); 
+ados_load();
+});</script>
+
+        </head>
+        <body bgcolor="#ffffff" link="Blue" VLINK="Green" alink="Green">
+        <a name="t"></a>
+<div id="menu"><a href="/"><img src="https://static.lwn.net/images/logo/barepenguin-70.png" class="logo"
+                 border="0" alt="LWN.net Logo">
+           <font class="logo">LWN<br>.net</font>
+           <font class="logobl">News from the source</font></a>
+           <a href="/"><img src="https://static.lwn.net/images/lcorner-ss.png" class="sslogo"
+                 border="0" alt="LWN"></a><div class="navmenu-container">
+           <ul class="navmenu">
+        <li><a class="navmenu" href="#t"><b>Content</b></a><ul><li><a href="/current/">Weekly Edition</a></li><li><a href="/Archives/">Archives</a></li><li><a href="/Search/">Search</a></li><li><a href="/Kernel/">Kernel</a></li><li><a href="/Security/">Security</a></li><li><a href="/Distributions/">Distributions</a></li><li><a href="/Calendar/">Events calendar</a></li><li><a href="/Comments/unread">Unread comments</a></li><li><hr></li><li><a href="/op/FAQ.lwn">LWN FAQ</a></li><li><a href="/op/AuthorGuide.lwn">Write for us</a></li></ul></li>
+</ul></div>
+</div> <!-- menu -->
+<div class="pure-g not-handset" style="margin-left: 10.5em">
+           <div class="not-print">
+             <div id="azk13321_leaderboard"></div>
+           </div>
+           </div>
+        <div class="topnav-container">
+<div class="not-handset"><form action="https://lwn.net/Login/" method="post" name="loginform"
+                 class="loginform">
+        <b>User:</b> <input type="text" name="Username" value="" size="8" /> <b>Password:</b> <input type="password" name="Password" size="8" /> <input type="hidden" name="target" value="/Articles/764300/" /> <input type="submit" name="submit" value="Log in" /></form> |
+           <form action="https://lwn.net/subscribe/" method="post" class="loginform">
+           <input type="submit" name="submit" value="Subscribe" />
+           </form> |
+           <form action="https://lwn.net/Login/newaccount" method="post" class="loginform">
+           <input type="submit" name="submit" value="Register" />
+           </form>
+        </div>
+               <div class="handset-only">
+               <a href="/subscribe/"><b>Subscribe</b></a> /
+               <a href="/Login/"><b>Log in</b></a> /
+               <a href="/Login/newaccount"><b>New account</b></a>
+               </div>
+               </div><div class="pure-grid maincolumn">
+<div class="lwn-u-1 pure-u-md-19-24">
+<div class="PageHeadline">
+<h1>Security updates for Thursday</h1>
+</div>
+<div class="ArticleText">
+<table class="OddEven">
+               <tr><th align="left">Dist.</th>
+                   <th align="left">ID</th>
+                   <th align="left">Release</th>
+                   <th align="left">Package</th>
+                   <th align="left">Date</th></tr>
+<tr><td align="left">Debian</td>
+                       <td align="left"><a href="https://lwn.net/Articles/764275/">DSA-4286-1</a></td>
+                       <td align="left">stable</td>
+                       <td align="left">curl</td>
+                       <td>2018-09-05</td>
+                       </tr><tr><td align="left">Debian</td>
+                       <td align="left"><a href="https://lwn.net/Articles/764276/">DLA-1494-1</a></td>
+                       <td align="left">LTS</td>
+                       <td align="left">gdm3</td>
+                       <td>2018-09-05</td>
+                       </tr><tr><td align="left">Debian</td>
+                       <td align="left"><a href="https://lwn.net/Articles/764277/">DLA-1495-1</a></td>
+                       <td align="left">LTS</td>
+                       <td align="left">git-annex</td>
+                       <td>2018-09-05</td>
+                       </tr><tr><td align="left">Debian</td>
+                       <td align="left"><a href="https://lwn.net/Articles/764278/">DLA-1496-1</a></td>
+                       <td align="left">LTS</td>
+                       <td align="left">lcms2</td>
+                       <td>2018-09-06</td>
+                       </tr><tr><td align="left">Debian</td>
+                       <td align="left"><a href="https://lwn.net/Articles/764279/">DSA-4285-1</a></td>
+                       <td align="left">stable</td>
+                       <td align="left">sympa</td>
+                       <td>2018-09-05</td>
+                       </tr><tr><td align="left">Fedora</td>
+                       <td align="left"><a href="https://lwn.net/Articles/764280/">FEDORA-2018-38bdbafa96</a></td>
+                       <td align="left">F28</td>
+                       <td align="left">discount</td>
+                       <td>2018-09-06</td>
+                       </tr><tr><td align="left">Fedora</td>
+                       <td align="left"><a href="https://lwn.net/Articles/764281/">FEDORA-2018-fe437a98d6</a></td>
+                       <td align="left">F27</td>
+                       <td align="left">dolphin-emu</td>
+                       <td>2018-09-06</td>
+                       </tr><tr><td align="left">Fedora</td>
+                       <td align="left"><a href="https://lwn.net/Articles/764282/">FEDORA-2018-5bf744beee</a></td>
+                       <td align="left">F28</td>
+                       <td align="left">gd</td>
+                       <td>2018-09-06</td>
+                       </tr><tr><td align="left">Fedora</td>
+                       <td align="left"><a href="https://lwn.net/Articles/764283/">FEDORA-2018-fac5420dd1</a></td>
+                       <td align="left">F27</td>
+                       <td align="left">obs-build</td>
+                       <td>2018-09-06</td>
+                       </tr><tr><td align="left">Fedora</td>
+                       <td align="left"><a href="https://lwn.net/Articles/764284/">FEDORA-2018-fac5420dd1</a></td>
+                       <td align="left">F27</td>
+                       <td align="left">osc</td>
+                       <td>2018-09-06</td>
+                       </tr><tr><td align="left">Fedora</td>
+                       <td align="left"><a href="https://lwn.net/Articles/764285/">FEDORA-2018-4f0b7d1251</a></td>
+                       <td align="left">F27</td>
+                       <td align="left">tcpflow</td>
+                       <td>2018-09-06</td>
+                       </tr><tr><td align="left">Fedora</td>
+                       <td align="left"><a href="https://lwn.net/Articles/764286/">FEDORA-2018-5ad77cc979</a></td>
+                       <td align="left">F28</td>
+                       <td align="left">tcpflow</td>
+                       <td>2018-09-06</td>
+                       </tr><tr><td align="left">Fedora</td>
+                       <td align="left"><a href="https://lwn.net/Articles/764287/">FEDORA-2018-7626df1731</a></td>
+                       <td align="left">F27</td>
+                       <td align="left">yara</td>
+                       <td>2018-09-06</td>
+                       </tr><tr><td align="left">Fedora</td>
+                       <td align="left"><a href="https://lwn.net/Articles/764288/">FEDORA-2018-8344cb89ac</a></td>
+                       <td align="left">F28</td>
+                       <td align="left">yara</td>
+                       <td>2018-09-06</td>
+                       </tr><tr><td align="left">openSUSE</td>
+                       <td align="left"><a href="https://lwn.net/Articles/764289/">openSUSE-SU-2018:2628-1</a></td>
+                       <td align="left">15.0</td>
+                       <td align="left">wireshark</td>
+                       <td>2018-09-05</td>
+                       </tr><tr><td align="left">Slackware</td>
+                       <td align="left"><a href="https://lwn.net/Articles/764290/">SSA:2018-249-01</a></td>
+                       <td align="left"></td>
+                       <td align="left">curl</td>
+                       <td>2018-09-06</td>
+                       </tr><tr><td align="left">Slackware</td>
+                       <td align="left"><a href="https://lwn.net/Articles/764292/">SSA:2018-249-03</a></td>
+                       <td align="left"></td>
+                       <td align="left">firefox</td>
+                       <td>2018-09-06</td>
+                       </tr><tr><td align="left">Slackware</td>
+                       <td align="left"><a href="https://lwn.net/Articles/764291/">SSA:2018-249-02</a></td>
+                       <td align="left"></td>
+                       <td align="left">ghostscript</td>
+                       <td>2018-09-06</td>
+                       </tr><tr><td align="left">Slackware</td>
+                       <td align="left"><a href="https://lwn.net/Articles/764293/">SSA:2018-249-04</a></td>
+                       <td align="left"></td>
+                       <td align="left">thunderbird</td>
+                       <td>2018-09-06</td>
+                       </tr><tr><td align="left">SUSE</td>
+                       <td align="left"><a href="https://lwn.net/Articles/764294/">SUSE-SU-2018:2630-1</a></td>
+                       <td align="left">SLE15</td>
+                       <td align="left">apache-pdfbox</td>
+                       <td>2018-09-06</td>
+                       </tr><tr><td align="left">SUSE</td>
+                       <td align="left"><a href="https://lwn.net/Articles/764295/">SUSE-SU-2018:2629-1</a></td>
+                       <td align="left"></td>
+                       <td align="left">curl</td>
+                       <td>2018-09-05</td>
+                       </tr><tr><td align="left">SUSE</td>
+                       <td align="left"><a href="https://lwn.net/Articles/764296/">SUSE-SU-2018:2632-1</a></td>
+                       <td align="left">OS7 SLE12</td>
+                       <td align="left">dovecot22</td>
+                       <td>2018-09-06</td>
+                       </tr><tr><td align="left">SUSE</td>
+                       <td align="left"><a href="https://lwn.net/Articles/764297/">SUSE-SU-2018:2631-1</a></td>
+                       <td align="left">OS7 SLE12</td>
+                       <td align="left">libvirt</td>
+                       <td>2018-09-06</td>
+                       </tr><tr><td align="left">Ubuntu</td>
+                       <td align="left"><a href="https://lwn.net/Articles/764298/">USN-3759-2</a></td>
+                       <td align="left">12.04</td>
+                       <td align="left">libtirpc</td>
+                       <td>2018-09-05</td>
+                       </tr><tr><td align="left">Ubuntu</td>
+                       <td align="left"><a href="https://lwn.net/Articles/764299/">USN-3759-1</a></td>
+                       <td align="left">14.04 16.04 18.04</td>
+                       <td align="left">libtirpc</td>
+                       <td>2018-09-05</td>
+                       </tr></table>
+<hr width="60%" align="left">
+           (<a href="https://lwn.net/Login/?target=/Articles/764300/">Log in</a> to post comments)
+           <p>
+           
+</div> <!-- ArticleText -->
+<p><a name="Comments"></a>
+
+</div> <!-- ArticleText -->
+</div>
+<div class="lwn-u-1 pure-u-md-1-6 not-print">
+<div id="azk93271_right_zone"></div>
+</div>
+</div> <!-- pure-grid -->
+
+        <br clear="all">
+        <center>
+        <P>
+        <font size="-2">
+        Copyright &copy; 2018, Eklektix, Inc.<BR>
+        
+        Comments and public postings are copyrighted by their creators.<br>
+        Linux  is a registered trademark of Linus Torvalds<br>
+        </font>
+        </center>
+        
+            <script type="text/javascript">
+            var gaJsHost = (("https:" == document.location.protocol) ? "https://ssl." : "http://www.");
+            document.write(unescape("%3Cscript src='" + gaJsHost + "google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E"));
+            </script>
+            <script type="text/javascript">
+            try {
+            var pageTracker = _gat._getTracker("UA-2039382-1");
+            pageTracker._trackPageview();
+            } catch(err) {}</script>
+            
+        </body></html>
+        
+\ No newline at end of file
diff --git a/test/source/LWN/Articles/764321.html b/test/source/LWN/Articles/764321.html

new file mode 100644 (file)

index 0000000..7d51dae
--- /dev/null
+++ b/test/source/LWN/Articles/764321.html
@@ -0,0 +1,216 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
+        "http://www.w3.org/TR/html4/loose.dtd">
+        <html>
+        <head><title>The Hidden Benefit of Giving Back to Open Source Software (Working Knowledge) [LWN.net]</title>
+        <meta name="twitter:card" content="summary" />
+<meta name="twitter:site" content="@lwnnet" />
+<meta name="twitter:title" content="The Hidden Benefit of Giving Back to Open Source Software (Working Knowledge)" />
+<meta name="twitter:description" content="The Harvard Business School's &quot;Working Knowledge&quot; site has an
+article arguing that it can pay for companies to allow their developers
+to contribute back to the projects whose software they use.
+&quot;And that presents an interesting dilemma for firms that rely heavily
+on open source. Should they allow employees on company time to make updates
+and edits to the software for community use that could be used by
+competitors? New research by Assistant Professor Frank Nagle, a member of
+the Strategy Unit at Harvard Business School, shows that paying employees
+to contribute to such software boosts the company’s productivity from using
+the software by as much as 100 percent, when compared with free-riding
+competitors.&quot;" />
+<meta name="viewport" content="width=device-width, initial-scale=1">
+<meta HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=utf-8">
+        <link rel="icon" href="/images/favicon.png" type="image/png">
+        <link rel="alternate" type="application/rss+xml" title="LWN.net headlines" href="https://lwn.net/headlines/newrss">
+<link rel="alternate" type="application/rss+xml" title="Comments posted to this article" href="https://lwn.net/headlines/764321/">
+        <link rel="stylesheet" href="/CSS/lwn">
+<link rel="stylesheet" href="/CSS/nosub">
+<link rel="stylesheet" href="/CSS/pure-min">
+           <!--[if lte IE 8]>
+             <link rel="stylesheet" href="/CSS/grids-responsive-old-ie-min">
+           <![endif]-->
+           <!--[if gt IE 8]><!-->
+             <link rel="stylesheet" href="/CSS/grids-responsive-min">
+           <!--<![endif]-->
+           <link rel="stylesheet" href="/CSS/pure-lwn">
+           
+        
+<script type="text/javascript">var p="http",d="static";if(document.location.protocol=="https:"){p+="s";d="engine";}var z=document.createElement("script");z.type="text/javascript";z.async=true;z.src=p+"://"+d+".adzerk.net/ados.js";var s=document.getElementsByTagName("script")[0];s.parentNode.insertBefore(z,s);</script>
+<script type="text/javascript">
+var ados_keywords = ados_keywords || [];
+if( location.protocol=='https:' ) {
+        ados_keywords.push('T:SSL');
+} else {
+        ados_keywords.push('T:HTTP');
+}
+
+var ados = ados || {};
+ados.run = ados.run || [];
+ados.run.push(function() {
+
+ados_add_placement(4669, 20979, "azk13321_leaderboard", 4).setZone(16026);
+
+ados_add_placement(4669, 20979, "azk93271_right_zone", [5,10,6]).setZone(16027);
+
+ados_add_placement(4669, 20979, "azk31017_tracking", 20).setZone(20995);
+
+
+
+ados_setKeywords(ados_keywords.join(', ')); 
+ados_load();
+});</script>
+
+        </head>
+        <body bgcolor="#ffffff" link="Blue" VLINK="Green" alink="Green">
+        <a name="t"></a>
+<div id="menu"><a href="/"><img src="https://static.lwn.net/images/logo/barepenguin-70.png" class="logo"
+                 border="0" alt="LWN.net Logo">
+           <font class="logo">LWN<br>.net</font>
+           <font class="logobl">News from the source</font></a>
+           <a href="/"><img src="https://static.lwn.net/images/lcorner-ss.png" class="sslogo"
+                 border="0" alt="LWN"></a><div class="navmenu-container">
+           <ul class="navmenu">
+        <li><a class="navmenu" href="#t"><b>Content</b></a><ul><li><a href="/current/">Weekly Edition</a></li><li><a href="/Archives/">Archives</a></li><li><a href="/Search/">Search</a></li><li><a href="/Kernel/">Kernel</a></li><li><a href="/Security/">Security</a></li><li><a href="/Distributions/">Distributions</a></li><li><a href="/Calendar/">Events calendar</a></li><li><a href="/Comments/unread">Unread comments</a></li><li><hr></li><li><a href="/op/FAQ.lwn">LWN FAQ</a></li><li><a href="/op/AuthorGuide.lwn">Write for us</a></li></ul></li>
+</ul></div>
+</div> <!-- menu -->
+<div class="pure-g not-handset" style="margin-left: 10.5em">
+           <div class="not-print">
+             <div id="azk13321_leaderboard"></div>
+           </div>
+           </div>
+        <div class="topnav-container">
+<div class="not-handset"><form action="https://lwn.net/Login/" method="post" name="loginform"
+                 class="loginform">
+        <b>User:</b> <input type="text" name="Username" value="" size="8" /> <b>Password:</b> <input type="password" name="Password" size="8" /> <input type="hidden" name="target" value="/Articles/764321/" /> <input type="submit" name="submit" value="Log in" /></form> |
+           <form action="https://lwn.net/subscribe/" method="post" class="loginform">
+           <input type="submit" name="submit" value="Subscribe" />
+           </form> |
+           <form action="https://lwn.net/Login/newaccount" method="post" class="loginform">
+           <input type="submit" name="submit" value="Register" />
+           </form>
+        </div>
+               <div class="handset-only">
+               <a href="/subscribe/"><b>Subscribe</b></a> /
+               <a href="/Login/"><b>Log in</b></a> /
+               <a href="/Login/newaccount"><b>New account</b></a>
+               </div>
+               </div><div class="pure-grid maincolumn">
+<div class="lwn-u-1 pure-u-md-19-24">
+<div class="PageHeadline">
+<h1>The Hidden Benefit of Giving Back to Open Source Software (Working Knowledge)</h1>
+<div class="Byline">[Posted September 6, 2018 by corbet]
+               <p>
+               </div>
+</div>
+<div class="ArticleText">
+The Harvard Business School's "Working Knowledge" site has <a
+href="https://hbswk.hbs.edu/item/the-hidden-benefit-of-giving-back-to-open-source-software">an
+article</a> arguing that it can pay for companies to allow their developers
+to contribute back to the projects whose software they use.
+"<span>And that presents an interesting dilemma for firms that rely heavily
+on open source. Should they allow employees on company time to make updates
+and edits to the software for community use that could be used by
+competitors? New research by Assistant Professor Frank Nagle, a member of
+the Strategy Unit at Harvard Business School, shows that paying employees
+to contribute to such software boosts the company’s productivity from using
+the software by as much as 100 percent, when compared with free-riding
+competitors.</span>"<hr width="60%" align="left">
+           (<a href="https://lwn.net/Login/?target=/Articles/764321/">Log in</a> to post comments)
+           <p>
+           
+</div> <!-- ArticleText -->
+<p><a name="Comments"></a>
+
+<a name="CommAnchor764326"></a>
+<div class="CommentBox">
+  <p class="CommentTitle">The Hidden Benefit of Giving Back to Open Source Software (Working Knowledge)</p>
+  <div class="CommentBody">
+    <p class="CommentPoster">Posted Sep 6, 2018 18:43 UTC (Thu) by <b>sjfriedl</b> (subscriber, #10111)
+       [<a href="/Articles/764326/">Link</a>]
+    </p>
+    <div class="FormattedComment">
+This is no surprise to me.  Most of the open source software improvements that might help a competitor are too general in nature to really be giving the other guys a competitive advantage.<br>
+<p>
+For instance, if Lyft contributed Linux kernel or PHP or Apache or whatever fixes, the benefit to Lyft of having that improved expertise far exceeds the general benefit to competitor Uber.<br>
+</div>
+
+  </div>
+  <p>
+  <div class="CommentReplyButton">
+    <form action="/Articles/764326/comment" method="post">
+      <input type="submit" value="Reply to this comment">
+    </form>
+  </div>
+  
+</div>
+
+<a name="CommAnchor764334"></a>
+<div class="CommentBox">
+  <p class="CommentTitle">The Hidden Benefit of Giving Back to Open Source Software (Working Knowledge)</p>
+  <div class="CommentBody">
+    <p class="CommentPoster">Posted Sep 6, 2018 21:42 UTC (Thu) by <b>k8to</b> (subscriber, #15413)
+       [<a href="/Articles/764334/">Link</a>]
+    </p>
+    <div class="FormattedComment">
+Even having to debate it seems so farcical.  If you're worried about people who "do the same thing", the software they use is not the main differentiator.  How your company is organized, how you treat your people and your customers, how you organized projects etc are huge, and software is ultimately minor.  Fixes and changes to software? Incredibly minor.<br>
+</div>
+
+  </div>
+  <p>
+  <div class="CommentReplyButton">
+    <form action="/Articles/764334/comment" method="post">
+      <input type="submit" value="Reply to this comment">
+    </form>
+  </div>
+  
+</div>
+<div class="Comment">
+
+<a name="CommAnchor764337"></a>
+<div class="CommentBox">
+  <p class="CommentTitle">The Hidden Benefit of Giving Back to Open Source Software (Working Knowledge)</p>
+  <div class="CommentBody">
+    <p class="CommentPoster">Posted Sep 6, 2018 21:56 UTC (Thu) by <b>k8to</b> (subscriber, #15413)
+       [<a href="/Articles/764337/">Link</a>]
+    </p>
+    <div class="FormattedComment">
+Perhaps this is too dismissive, as there is the part about letting your programmers do their job to the best of their ability.  That seems pretty big.<br>
+</div>
+
+  </div>
+  <p>
+  <div class="CommentReplyButton">
+    <form action="/Articles/764337/comment" method="post">
+      <input type="submit" value="Reply to this comment">
+    </form>
+  </div>
+  
+</div>
+</div>
+</div>
+<div class="lwn-u-1 pure-u-md-1-6 not-print">
+<div id="azk93271_right_zone"></div>
+</div>
+</div> <!-- pure-grid -->
+
+        <br clear="all">
+        <center>
+        <P>
+        <font size="-2">
+        Copyright &copy; 2018, Eklektix, Inc.<BR>
+        
+        Comments and public postings are copyrighted by their creators.<br>
+        Linux  is a registered trademark of Linus Torvalds<br>
+        </font>
+        </center>
+        
+            <script type="text/javascript">
+            var gaJsHost = (("https:" == document.location.protocol) ? "https://ssl." : "http://www.");
+            document.write(unescape("%3Cscript src='" + gaJsHost + "google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E"));
+            </script>
+            <script type="text/javascript">
+            try {
+            var pageTracker = _gat._getTracker("UA-2039382-1");
+            pageTracker._trackPageview();
+            } catch(err) {}</script>
+            
+        </body></html>
+        
+\ No newline at end of file
diff --git a/test/source/LWN/index.html b/test/source/LWN/index.html

new file mode 100644 (file)

index 0000000..bf54316
--- /dev/null
+++ b/test/source/LWN/index.html
@@ -0,0 +1,502 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
+        "http://www.w3.org/TR/html4/loose.dtd">
+        <html>
+        <head><title>Welcome to LWN.net [LWN.net]</title>
+        <meta name="viewport" content="width=device-width, initial-scale=1">
+<meta HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=utf-8">
+        <link rel="icon" href="/images/favicon.png" type="image/png">
+        <link rel="alternate" type="application/rss+xml" title="LWN.net headlines" href="https://lwn.net/headlines/newrss">
+        <link rel="stylesheet" href="/CSS/lwn">
+<link rel="stylesheet" href="/CSS/nosub">
+<link rel="stylesheet" href="/CSS/pure-min">
+           <!--[if lte IE 8]>
+             <link rel="stylesheet" href="/CSS/grids-responsive-old-ie-min">
+           <![endif]-->
+           <!--[if gt IE 8]><!-->
+             <link rel="stylesheet" href="/CSS/grids-responsive-min">
+           <!--<![endif]-->
+           <link rel="stylesheet" href="/CSS/pure-lwn">
+           
+        
+<script type="text/javascript">var p="http",d="static";if(document.location.protocol=="https:"){p+="s";d="engine";}var z=document.createElement("script");z.type="text/javascript";z.async=true;z.src=p+"://"+d+".adzerk.net/ados.js";var s=document.getElementsByTagName("script")[0];s.parentNode.insertBefore(z,s);</script>
+<script type="text/javascript">
+var ados_keywords = ados_keywords || [];
+if( location.protocol=='https:' ) {
+        ados_keywords.push('T:SSL');
+} else {
+        ados_keywords.push('T:HTTP');
+}
+
+var ados = ados || {};
+ados.run = ados.run || [];
+ados.run.push(function() {
+
+ados_add_placement(4669, 20979, "azk13321_leaderboard", 4).setZone(16026);
+
+ados_add_placement(4669, 20979, "azk93271_right_zone", [5,10,6]).setZone(16027);
+
+ados_add_placement(4669, 20979, "azk31017_tracking", 20).setZone(20995);
+
+ados_keywords.push('S:Homepage');
+
+ados_setKeywords(ados_keywords.join(', ')); 
+ados_load();
+});</script>
+
+        </head>
+        <body bgcolor="#ffffff" link="Blue" VLINK="Green" alink="Green">
+        <a name="t"></a>
+<div id="menu"><a href="/"><img src="https://static.lwn.net/images/logo/barepenguin-70.png" class="logo"
+                 border="0" alt="LWN.net Logo">
+           <font class="logo">LWN<br>.net</font>
+           <font class="logobl">News from the source</font></a>
+           <a href="/"><img src="https://static.lwn.net/images/lcorner-ss.png" class="sslogo"
+                 border="0" alt="LWN"></a><div class="navmenu-container">
+           <ul class="navmenu">
+        <li><a class="navmenu" href="#t"><b>Content</b></a><ul><li><a href="/current/">Weekly Edition</a></li><li><a href="/Archives/">Archives</a></li><li><a href="/Search/">Search</a></li><li><a href="/Kernel/">Kernel</a></li><li><a href="/Security/">Security</a></li><li><a href="/Distributions/">Distributions</a></li><li><a href="/Calendar/">Events calendar</a></li><li><a href="/Comments/unread">Unread comments</a></li><li><hr></li><li><a href="/op/FAQ.lwn">LWN FAQ</a></li><li><a href="/op/AuthorGuide.lwn">Write for us</a></li></ul></li>
+</ul></div>
+</div> <!-- menu -->
+<div class="pure-g not-handset" style="margin-left: 10.5em">
+           <div class="not-print">
+             <div id="azk13321_leaderboard"></div>
+           </div>
+           </div>
+        <div class="topnav-container">
+<div class="not-handset"><form action="https://lwn.net/Login/" method="post" name="loginform"
+                 class="loginform">
+        <b>User:</b> <input type="text" name="Username" value="" size="8" /> <b>Password:</b> <input type="password" name="Password" size="8" /> <input type="hidden" name="target" value="" /> <input type="submit" name="submit" value="Log in" /></form> |
+           <form action="https://lwn.net/subscribe/" method="post" class="loginform">
+           <input type="submit" name="submit" value="Subscribe" />
+           </form> |
+           <form action="https://lwn.net/Login/newaccount" method="post" class="loginform">
+           <input type="submit" name="submit" value="Register" />
+           </form>
+        </div>
+               <div class="handset-only">
+               <a href="/subscribe/"><b>Subscribe</b></a> /
+               <a href="/Login/"><b>Log in</b></a> /
+               <a href="/Login/newaccount"><b>New account</b></a>
+               </div>
+               </div><div class="pure-grid maincolumn">
+<div class="lwn-u-1 pure-u-md-19-24">
+<div class="PageHeadline">
+<h1>Welcome to LWN.net</h1>
+</div>
+<div class="ArticleText">
+<div class="pure-u-1">
+       <blockquote style="margin-top: 0">
+       LWN.net is a reader-supported news site dedicated to producing the best
+       coverage from within the Linux and free software development communities.
+       See <a href="/op/FAQ.lwn">the LWN FAQ</a> for more information, and
+       please consider <a href="/subscribe/Info">subscribing</a> to gain full
+       access and support our activities.
+       </blockquote>
+       <p>
+       </div><div class="pure-u-1 pure-u-md-1-2 fp-feature">
+<div class="Headline"><b>[<font class="Subscription">$</font>] Writing network flow dissectors in BPF</b></div>
+        
+        <div class="BlurbListing">
+        <font size="-1">[Kernel] Posted Sep 6, 2018 15:59 UTC (Thu) by corbet</font>
+        <p>
+        Network packet headers contain a great deal of information, but the
+kernel often only needs a subset of that information to be able to perform
+filtering or associate any given packet with a flow. The piece of code that
+follows the different layers of packet encapsulation to find the important
+data is called a flow dissector. In current Linux kernels, the <a
+href="https://elixir.bootlin.com/linux/v4.18.6/source/net/core/flow_dissector.c">flow
+dissector</a>
+is written in C. A <a href="/Articles/763938/">patch set</a> has been
+proposed recently to implement it in BPF with the clear goal of improving
+security, flexibility, and maybe even performance.
+
+        <p>
+        <a href="/Articles/764200/">Full Story</a> (<a href="/Articles/764200/#Comments">comments: 1</a>)
+<p>
+</div>
+</div><div class="pure-u-1 pure-u-md-11-24">
+<div class="Headline"><b>The Hidden Benefit of Giving Back to Open Source Software (Working Knowledge)</b></div>
+        
+        <div class="BlurbListing">
+        <font size="-1">[Briefs] Posted Sep 6, 2018 16:56 UTC (Thu) by corbet</font>
+        <p>
+        The Harvard Business School's "Working Knowledge" site has <a
+href="https://hbswk.hbs.edu/item/the-hidden-benefit-of-giving-back-to-open-source-software">an
+article</a> arguing that it can pay for companies to allow their developers
+to contribute back to the projects whose software they use.
+"<span>And that presents an interesting dilemma for firms that rely heavily
+on open source. Should they allow employees on company time to make updates
+and edits to the software for community use that could be used by
+competitors? New research by Assistant Professor Frank Nagle, a member of
+the Strategy Unit at Harvard Business School, shows that paying employees
+to contribute to such software boosts the company’s productivity from using
+the software by as much as 100 percent, when compared with free-riding
+competitors.</span>"
+        <p>
+        <a href="/Articles/764321/">Comments (3 posted)</a>
+<p>
+</div>
+</div>
+<div class="pure-u-1 pure-u-md-1-2 fp-feature">
+<div class="Headline"><b>[<font class="Subscription">$</font>] LWN.net Weekly Edition for September 6, 2018</b></div>
+        
+        <font size="-1">Posted Sep 6, 2018 3:03 UTC (Thu)</font><p>
+        The LWN.net Weekly Edition for September 6, 2018 is available.
+<p>
+        <b>Inside this week's LWN.net Weekly Edition</b>
+        <ul>
+<li> <a href="/Articles/763789/">Front</a>: Life behind the tinfoil curtain; User-space Spectre protection; fs-verity; IDA; Julia part 2; GopherCon.
+            <li> <a href="/Articles/763791/">Briefs</a>: Tink; Kernel &amp; Maintainer Summit topics; LMDE 3; Firefox 62; GNOME 3.30; Quotes; ...
+            <li> <a href="/Articles/763792/">Announcements</a>: Newsletters; events; security updates; kernel patches; ...
+            </ul>
+<a href="/Articles/763789/">Read more</a>
+<p>
+
+</div><div class="pure-u-1 pure-u-md-11-24">
+<div class="Headline"><b>Security updates for Thursday</b></div>
+        
+        <div class="BlurbListing">
+        <font size="-1">[Security] Posted Sep 6, 2018 13:55 UTC (Thu) by jake</font>
+        <p>
+        Security updates have been issued by <b>Debian</b> (curl, gdm3, git-annex, lcms2, and sympa), <b>Fedora</b> (discount, dolphin-emu, gd, obs-build, osc, tcpflow, and yara), <b>openSUSE</b> (wireshark), <b>Slackware</b> (curl, firefox, ghostscript, and thunderbird), <b>SUSE</b> (apache-pdfbox, curl, dovecot22, and libvirt), and <b>Ubuntu</b> (libtirpc).
+
+        <p>
+        <a href="/Articles/764300/">Full Story</a> (<a href="/Articles/764300/#Comments">comments: none</a>)
+<p>
+</div>
+</div>
+<div class="pure-u-1 pure-u-md-1-2 fp-feature">
+<div class="Headline"><b>[<font class="Subscription">$</font>] Life behind the tinfoil curtain</b></div>
+        
+        <div class="BlurbListing">
+        <font size="-1">[Security] Posted Sep 5, 2018 22:11 UTC (Wed) by jake</font>
+        <p>
+        <p>
+Security and convenience rarely go hand-in-hand, but if your job (or life)
+requires extraordinary care against potentially targeted attacks, the
+security side of that tradeoff may win out.  If so, running a system like
+<a href="https://www.qubes-os.org/">Qubes&nbsp;OS</a> on your desktop or <a
+href="https://copperhead.co/">CopperheadOS</a> on your phone might make sense,
+which is just what Konstantin Ryabitsev, Linux Foundation (LF) director of IT
+security, has done.  He reported on the experience in a <a
+href="https://www.youtube.com/watch?v=8cU4hQg6GvU&index=6&list=PLbzoR-pLrL6rOT6m50HdJFYUHyvA9lurI&t=0s">talk
+[YouTube video]</a> entitled "Life Behind the Tinfoil Curtain" at the 2018
+<a
+href="https://events.linuxfoundation.org/events/linux-security-summit-north-america-2018/">Linux
+Security Summit North America</a>.
+
+        <p>
+        <a href="/Articles/764048/">Full Story</a> (<a href="/Articles/764048/#Comments">comments: 9</a>)
+<p>
+</div>
+</div><div class="pure-u-1 pure-u-md-11-24">
+<div class="Headline"><b>GNOME 3.30 released</b></div>
+        
+        <div class="BlurbListing">
+        <font size="-1">[Development] Posted Sep 5, 2018 21:17 UTC (Wed) by ris</font>
+        <p>
+        The GNOME Project has announced the release of GNOME 3.30
+"Almería". "<span>This release brings automatic updates in Software, more
+games, and a new Podcasts application. Improvements to core GNOME
+applications include a refined location and search bar in Files, a
+[Thunderbolt] panel in Settings, support for remoting using RDP in Boxes, and
+many more.</span>" The <a
+href="https://help.gnome.org/misc/release-notes/3.30/">release notes</a>
+contain more information.
+
+        <p>
+        <a href="/Articles/764219/">Full Story</a> (<a href="/Articles/764219/#Comments">comments: none</a>)
+<p>
+</div>
+</div>
+<div class="pure-u-1 pure-u-md-1-2 fp-feature">
+<div class="Headline"><b>[<font class="Subscription">$</font>] Strengthening user-space Spectre v2 protection</b></div>
+        
+        <div class="BlurbListing">
+        <font size="-1">[Kernel] Posted Sep 5, 2018 21:47 UTC (Wed) by corbet</font>
+        <p>
+        The Spectre variant 2 vulnerability allows the speculative execution of
+incorrect (in an attacker-controllable way) indirect branch predictions,
+resulting in 
+the ability to exfiltrate information via side channels.  The kernel has
+been reasonably well protected against this variant since shortly after its
+disclosure in January.  It is, however, possible for user-space processes
+to use Spectre v2 to attack each other; thus far, the mainline kernel has
+offered relatively little protection against such attacks.  A recent <a
+href="/ml/linux-kernel/nycvar.YFH.7.76.1809041619510.15880@cbobk.fhfr.pm/">proposal</a>
+from Jiri Kosina may change that situation, but there are still some
+disagreements around the details.
+
+        <p>
+        <a href="/Articles/764209/">Full Story</a> (<a href="/Articles/764209/#Comments">comments: 1</a>)
+<p>
+</div>
+</div><div class="pure-u-1 pure-u-md-11-24">
+<div class="Headline"><b>Firefox 62.0 released</b></div>
+        
+        <div class="BlurbListing">
+        <font size="-1">[Development] Posted Sep 5, 2018 17:31 UTC (Wed) by ris</font>
+        <p>
+        Mozilla has released Firefox 62.0, with several new features. The Firefox
+Home (default New Tab) allows users to display up to 4 rows of top sites,
+Pocket stories, and highlights; for those using containers there is menu
+option to reopen a tab in a different container; Firefox 63 will remove all
+trust for Symantec-issued certificates, and it is optional in Firefox
+62; FreeBSD support for WebAuthn was added; and more. See the <a
+href="https://www.mozilla.org/en-US/firefox/62.0/releasenotes/">release
+notes</a> for details.
+        <p>
+        <a href="/Articles/764202/">Comments (none posted)</a>
+<p>
+</div>
+</div>
+<div class="pure-u-1 pure-u-md-1-2 fp-feature">
+<div class="Headline"><b>[<font class="Subscription">$</font>] Learning about Go internals at GopherCon</b></div>
+        
+        <div class="BlurbListing">
+        <font size="-1">[Front] Posted Sep 5, 2018 19:20 UTC (Wed) by jake</font>
+        <p>
+        <p><a href="https://www.gophercon.com/">GopherCon</a> is the major
+conference for the <a href="https://golang.org/">Go language</a>, attended
+by 1600  
+dedicated "gophers", as the members of its community like to call
+themselves.  Held for the last five years in Denver, it attracts programmers,
+open-source contributors, and technical managers from all over North
+America and the world. GopherCon's highly-technical program is an intense
+mix of Go internals and programming tutorials, a few of which we will
+explore in this article.
+<p>
+Subscribers can read on for a report from GopherCon by guest author Josh
+Berkus.
+
+        <p>
+        <a href="/Articles/764131/">Full Story</a> (<a href="/Articles/764131/#Comments">comments: 10</a>)
+<p>
+</div>
+</div><div class="pure-u-1 pure-u-md-11-24">
+<div class="Headline"><b>A set of stable kernels</b></div>
+        
+        <div class="BlurbListing">
+        <font size="-1">[Kernel] Posted Sep 5, 2018 15:15 UTC (Wed) by ris</font>
+        <p>
+        Greg Kroah-Hartman has released stable kernels <a
+href="/Articles/764185/">4.18.6</a>, <a
+href="/Articles/764186/">4.14.68</a>, <a
+href="/Articles/764187/">4.9.125</a>, <a
+href="/Articles/764188/">4.4.154</a>, and <a
+href="/Articles/764189/">3.18.121</a>. They all contain important fixes and
+users should upgrade.
+        <p>
+        <a href="/Articles/764184/">Comments (none posted)</a>
+<p>
+</div>
+</div>
+<div class="pure-u-1 pure-u-md-1-2 fp-feature">
+<div class="Headline"><b>[<font class="Subscription">$</font>] An introduction to the Julia language, part 2</b></div>
+        
+        <div class="BlurbListing">
+        <font size="-1">[Development] Posted Sep 4, 2018 15:57 UTC (Tue) by jake</font>
+        <p>
+        <p><a
+href="/Articles/763626/">Part 1</a> of this series introduced 
+the <a href="http://julialang.org/">Julia</a> project's goals and
+development process, along with
+the language syntax, including the basics of control flow, data
+types, and, in more detail, how to work with arrays. In this part, 
+user-defined functions and the central 
+concept of multiple dispatch are described. It will also survey Julia's
+module and 
+package system, cover some syntax features, show how to make
+plots, and briefly dip into macros and distributed computing.
+
+        <p>
+        <a href="/Articles/764001/">Full Story</a> (<a href="/Articles/764001/#Comments">comments: 7</a>)
+<p>
+</div>
+</div><div class="pure-u-1 pure-u-md-11-24">
+<div class="Headline"><b>Security updates for Wednesday</b></div>
+        
+        <div class="BlurbListing">
+        <font size="-1">[Security] Posted Sep 5, 2018 15:01 UTC (Wed) by ris</font>
+        <p>
+        Security updates have been issued by <b>Debian</b> (lcms2), <b>openSUSE</b> (yubico-piv-tool), <b>Oracle</b> (kernel), and <b>SUSE</b> (cobbler and kvm).
+
+        <p>
+        <a href="/Articles/764182/">Full Story</a> (<a href="/Articles/764182/#Comments">comments: none</a>)
+<p>
+</div>
+</div>
+<div class="pure-u-1 pure-u-md-1-2 fp-feature">
+<div class="Headline"><b>[<font class="Subscription">$</font>] IDA: simplifying the complex task of allocating integers</b></div>
+        
+        <div class="BlurbListing">
+        <font size="-1">[Kernel] Posted Sep 4, 2018 0:15 UTC (Tue) by corbet</font>
+        <p>
+        It is common for kernel code to generate unique integers for identifiers.
+When one plugs in a flash drive, it will show up as
+<tt>/dev/sdN</tt>; that <tt>N</tt> (a letter derived from a
+number) must be generated in the
+kernel, and it should not already be in use for another drive or unpleasant
+things will happen.  One might think that generating such numbers would not
+be a difficult task, but that turns out not to be the case, especially in
+situations where many numbers must be tracked.  The IDA (for "ID
+allocator", perhaps) API exists to handle this specialized task.  In past
+kernels, it has managed to make the process of getting an unused number
+surprisingly 
+complex; the 4.19 kernel has a new IDA API that simplifies things
+considerably.
+
+        <p>
+        <a href="/Articles/764057/">Full Story</a> (<a href="/Articles/764057/#Comments">comments: 8</a>)
+<p>
+</div>
+</div><div class="pure-u-1 pure-u-md-11-24">
+<div class="Headline"><b>Security updates for Tuesday</b></div>
+        
+        <div class="BlurbListing">
+        <font size="-1">[Security] Posted Sep 4, 2018 15:14 UTC (Tue) by ris</font>
+        <p>
+        Security updates have been issued by <b>openSUSE</b> (ImageMagick, libressl, postgresql10, spice, and spice-gtk), <b>Red Hat</b> (collectd, kernel, Red Hat Gluster Storage, Red Hat Virtualization, RHGS WA, rhvm-appliance, and samba), and <b>SUSE</b> (crowbar, crowbar-core, crowbar-ha, crowbar-openstack, crowbar-ui, kernel, spice, and spice-gtk).
+
+        <p>
+        <a href="/Articles/764130/">Full Story</a> (<a href="/Articles/764130/#Comments">comments: none</a>)
+<p>
+</div>
+</div>
+<div class="pure-u-1 pure-u-md-1-2 fp-feature">
+<div class="Headline"><b>[<font class="Subscription">$</font>] Protecting files with fs-verity</b></div>
+        
+        <div class="BlurbListing">
+        <font size="-1">[Kernel] Posted Aug 30, 2018 18:50 UTC (Thu) by corbet</font>
+        <p>
+        The developers of the Android system have, among their many goals, the wish
+to better protect Android devices against persistent compromise.  It is bad
+if a device is taken over by an attacker; it's worse if it remains
+compromised even after a reboot.  Numerous mechanisms for ensuring the
+integrity of  installed system files have been proposed and implemented
+over the years.  But it seems there is always room for one more; to fill
+that space, the <a
+href="/ml/linux-fsdevel/20180824161642.1144-1-ebiggers@kernel.org/">fs-verity</a>
+mechanism is being proposed as a way to protect individual files from
+malicious modification.
+
+        <p>
+        <a href="/Articles/763729/">Full Story</a> (<a href="/Articles/763729/#Comments">comments: 6</a>)
+<p>
+</div>
+</div><div class="pure-u-1 pure-u-md-11-24">
+<div class="Headline"><b>Topics sought for the Kernel and Maintainer Summits</b></div>
+        
+        <div class="BlurbListing">
+        <font size="-1">[Kernel] Posted Sep 3, 2018 19:07 UTC (Mon) by corbet</font>
+        <p>
+        The annual Maintainer and Kernel Summits will be held in Vancouver, BC on
+November&nbsp;12 to&nbsp;15, in conjunction with the Linux Plumbers Conference.
+The program committee is looking for topics for both summits; read on for
+details on how to submit ideas and, perhaps, get an invitation to the
+Maintainer Summit.
+
+        <p>
+        <a href="/Articles/764055/">Full Story</a> (<a href="/Articles/764055/#Comments">comments: none</a>)
+<p>
+</div>
+</div>
+<div class="pure-u-1 pure-u-md-1-2 fp-feature">
+<div class="Headline"><b>LWN.net Weekly Edition for August 30, 2018</b></div>
+        
+        <font size="-1">Posted Aug 30, 2018 1:06 UTC (Thu)</font><p>
+        The LWN.net Weekly Edition for August 30, 2018 is available.
+<p>
+        <b>Inside this week's LWN.net Weekly Edition</b>
+        <ul>
+<li> <a href="/Articles/763252/">Front</a>: Julia; C considered dangerous; 4.19 Merge window; I/O controller throughput; KDE onboarding; Dat.
+            <li> <a href="/Articles/763254/">Briefs</a>: OpenSSH 7.8; 4.19-rc1; Which stable?; Netdev 0x12; Bison 3.1; Quotes; ...
+            <li> <a href="/Articles/763255/">Announcements</a>: Newsletters; events; security updates; kernel patches; ...
+            </ul>
+<a href="/Articles/763252/">Read more</a>
+<p>
+
+</div><div class="pure-u-1 pure-u-md-11-24">
+<div class="Headline"><b>Security updates for Monday</b></div>
+        
+        <div class="BlurbListing">
+        <font size="-1">[Security] Posted Sep 3, 2018 15:41 UTC (Mon) by ris</font>
+        <p>
+        Security updates have been issued by <b>Debian</b> (dojo, libtirpc, mariadb-10.0, php5, ruby-json-jwt, spice, spice-gtk, tomcat8, and trafficserver), <b>Fedora</b> (ghc-hakyll, ghc-hs-bibutils, ghostscript, mariadb, pandoc-citeproc, phpMyAdmin, and xen), <b>Mageia</b> (java-1.8.0-openjdk, libarchive, libgd, libraw, libxcursor, mariadb, mercurial, openssh, openssl, poppler, quazip, squirrelmail, and virtualbox), <b>openSUSE</b> (cobbler, libressl, wireshark, and zutils), and <b>SUSE</b> (couchdb, java-1_7_0-ibm, java-1_7_1-ibm, OpenStack, and spice).
+
+        <p>
+        <a href="/Articles/764046/">Full Story</a> (<a href="/Articles/764046/#Comments">comments: none</a>)
+<p>
+</div>
+</div>
+<div class="pure-u-1 pure-u-md-1-2 fp-feature">
+<div class="Headline"><b>[<font class="Subscription">$</font>] Measuring (and fixing) I/O-controller throughput loss</b></div>
+        
+        <div class="BlurbListing">
+        <font size="-1">[Kernel] Posted Aug 29, 2018 21:20 UTC (Wed) by corbet</font>
+        <p>
+        Many services, from web hosting and video streaming to cloud storage,
+need to move data to and from storage.  They also often require that each per-client
+I/O flow be guaranteed a non-zero amount of bandwidth and a bounded latency. An
+expensive way to provide these guarantees is to over-provision
+storage resources, keeping each resource underutilized, and thus
+have plenty of bandwidth available for the few I/O flows dispatched to
+each medium. Alternatively one can use an I/O controller.  Linux provides
+two mechanisms designed to throttle some I/O streams to allow others to
+meet their bandwidth and latency requirements.  These mechanisms work, but
+they come at a cost: a loss of as much as 80% of total available I/O
+bandwidth.  I have run some tests to demonstrate this problem; some
+upcoming improvements to the <a href="/Articles/601799/">bfq I/O
+scheduler</a> promise to improve the situation considerably.
+
+        <p>
+        <a href="/Articles/763603/">Full Story</a> (<a href="/Articles/763603/#Comments">comments: 4</a>)
+<p>
+</div>
+</div><div class="pure-u-1 pure-u-md-11-24">
+<div class="Headline"><b>Kernel prepatch 4.19-rc2</b></div>
+        
+        <div class="BlurbListing">
+        <font size="-1">[Kernel] Posted Sep 2, 2018 22:29 UTC (Sun) by corbet</font>
+        <p>
+        The <a href="/Articles/763988/">4.19-rc2</a> kernel prepatch is out for
+testing.
+"<span>As usual, the rc2 release is pretty small. People are taking a
+breather after the merge window, and it takes a bit of time for bug
+reports to start coming in and get identified.</span>"
+        <p>
+        <a href="/Articles/763987/">Comments (none posted)</a>
+<p>
+</div>
+</div>
+<p><a href="/Articles/?offset=6">--&gt; More news items</a>
+       
+</div> <!-- ArticleText -->
+</div>
+<div class="lwn-u-1 pure-u-md-1-6 not-print">
+<div id="azk93271_right_zone"></div>
+</div>
+</div> <!-- pure-grid -->
+
+        <br clear="all">
+        <center>
+        <P>
+        <font size="-2">
+        Copyright &copy; 2018, Eklektix, Inc.<BR>
+        
+        Comments and public postings are copyrighted by their creators.<br>
+        Linux  is a registered trademark of Linus Torvalds<br>
+        </font>
+        </center>
+        
+            <script type="text/javascript">
+            var gaJsHost = (("https:" == document.location.protocol) ? "https://ssl." : "http://www.");
+            document.write(unescape("%3Cscript src='" + gaJsHost + "google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E"));
+            </script>
+            <script type="text/javascript">
+            try {
+            var pageTracker = _gat._getTracker("UA-2039382-1");
+            pageTracker._trackPageview();
+            } catch(err) {}</script>
+            
+        </body></html>
+        
+\ No newline at end of file
author	Niki Roo <niki@nikiroo.be>
	Sun, 23 Sep 2018 04:04:04 +0000 (06:04 +0200)
committer	Niki Roo <niki@nikiroo.be>
	Sun, 23 Sep 2018 04:04:04 +0000 (06:04 +0200)
changelog.md		patch \| blob \| blame \| history
libs/nikiroo-utils-4.4.3-sources.jar	[moved from libs/nikiroo-utils-4.4.2-sources.jar with 98% similarity]	patch \| blob \| blame \| history
src/be/nikiroo/gofetch/test/TestBase.java		patch \| blob \| blame \| history
src/be/nikiroo/gofetch/test/TestLWN.java		patch \| blob \| blame \| history
test/expected/LWN/0000763252	[new file with mode: 0644]	patch \| blob
test/expected/LWN/0000763252.header	[new file with mode: 0644]	patch \| blob
test/expected/LWN/0000763252.header.html	[new file with mode: 0644]	patch \| blob
test/expected/LWN/0000763252.html	[new file with mode: 0644]	patch \| blob
test/expected/LWN/0000763603	[new file with mode: 0644]	patch \| blob
test/expected/LWN/0000763603.header	[new file with mode: 0644]	patch \| blob
test/expected/LWN/0000763603.header.html	[new file with mode: 0644]	patch \| blob
test/expected/LWN/0000763603.html	[new file with mode: 0644]	patch \| blob
test/expected/LWN/0000763729	[new file with mode: 0644]	patch \| blob
test/expected/LWN/0000763729.header	[new file with mode: 0644]	patch \| blob
test/expected/LWN/0000763729.header.html	[new file with mode: 0644]	patch \| blob
test/expected/LWN/0000763729.html	[new file with mode: 0644]	patch \| blob
test/expected/LWN/0000763789	[new file with mode: 0644]	patch \| blob
test/expected/LWN/0000763789.header	[new file with mode: 0644]	patch \| blob
test/expected/LWN/0000763789.header.html	[new file with mode: 0644]	patch \| blob
test/expected/LWN/0000763789.html	[new file with mode: 0644]	patch \| blob
test/expected/LWN/0000763987	[new file with mode: 0644]	patch \| blob
test/expected/LWN/0000763987.header	[new file with mode: 0644]	patch \| blob
test/expected/LWN/0000763987.header.html	[new file with mode: 0644]	patch \| blob
test/expected/LWN/0000763987.html	[new file with mode: 0644]	patch \| blob
test/expected/LWN/0000764001	[new file with mode: 0644]	patch \| blob
test/expected/LWN/0000764001.header	[new file with mode: 0644]	patch \| blob
test/expected/LWN/0000764001.header.html	[new file with mode: 0644]	patch \| blob
test/expected/LWN/0000764001.html	[new file with mode: 0644]	patch \| blob
test/expected/LWN/0000764046	[new file with mode: 0644]	patch \| blob
test/expected/LWN/0000764046.header	[new file with mode: 0644]	patch \| blob
test/expected/LWN/0000764046.header.html	[new file with mode: 0644]	patch \| blob
test/expected/LWN/0000764046.html	[new file with mode: 0644]	patch \| blob
test/expected/LWN/0000764048	[new file with mode: 0644]	patch \| blob
test/expected/LWN/0000764048.header	[new file with mode: 0644]	patch \| blob
test/expected/LWN/0000764048.header.html	[new file with mode: 0644]	patch \| blob
test/expected/LWN/0000764048.html	[new file with mode: 0644]	patch \| blob
test/expected/LWN/0000764055	[new file with mode: 0644]	patch \| blob
test/expected/LWN/0000764055.header	[new file with mode: 0644]	patch \| blob
test/expected/LWN/0000764055.header.html	[new file with mode: 0644]	patch \| blob
test/expected/LWN/0000764055.html	[new file with mode: 0644]	patch \| blob
test/expected/LWN/0000764057	[new file with mode: 0644]	patch \| blob
test/expected/LWN/0000764057.header	[new file with mode: 0644]	patch \| blob
test/expected/LWN/0000764057.header.html	[new file with mode: 0644]	patch \| blob
test/expected/LWN/0000764057.html	[new file with mode: 0644]	patch \| blob
test/expected/LWN/0000764130	[new file with mode: 0644]	patch \| blob
test/expected/LWN/0000764130.header	[new file with mode: 0644]	patch \| blob
test/expected/LWN/0000764130.header.html	[new file with mode: 0644]	patch \| blob
test/expected/LWN/0000764130.html	[new file with mode: 0644]	patch \| blob
test/expected/LWN/0000764131	[new file with mode: 0644]	patch \| blob
test/expected/LWN/0000764131.header	[new file with mode: 0644]	patch \| blob
test/expected/LWN/0000764131.header.html	[new file with mode: 0644]	patch \| blob
test/expected/LWN/0000764131.html	[new file with mode: 0644]	patch \| blob
test/expected/LWN/0000764182	[new file with mode: 0644]	patch \| blob
test/expected/LWN/0000764182.header	[new file with mode: 0644]	patch \| blob
test/expected/LWN/0000764182.header.html	[new file with mode: 0644]	patch \| blob
test/expected/LWN/0000764182.html	[new file with mode: 0644]	patch \| blob
test/expected/LWN/0000764184	[new file with mode: 0644]	patch \| blob
test/expected/LWN/0000764184.header	[new file with mode: 0644]	patch \| blob
test/expected/LWN/0000764184.header.html	[new file with mode: 0644]	patch \| blob
test/expected/LWN/0000764184.html	[new file with mode: 0644]	patch \| blob
test/expected/LWN/0000764200	[new file with mode: 0644]	patch \| blob
test/expected/LWN/0000764200.header	[new file with mode: 0644]	patch \| blob
test/expected/LWN/0000764200.header.html	[new file with mode: 0644]	patch \| blob
test/expected/LWN/0000764200.html	[new file with mode: 0644]	patch \| blob
test/expected/LWN/0000764202	[new file with mode: 0644]	patch \| blob
test/expected/LWN/0000764202.header	[new file with mode: 0644]	patch \| blob
test/expected/LWN/0000764202.header.html	[new file with mode: 0644]	patch \| blob
test/expected/LWN/0000764202.html	[new file with mode: 0644]	patch \| blob
test/expected/LWN/0000764209	[new file with mode: 0644]	patch \| blob
test/expected/LWN/0000764209.header	[new file with mode: 0644]	patch \| blob
test/expected/LWN/0000764209.header.html	[new file with mode: 0644]	patch \| blob
test/expected/LWN/0000764209.html	[new file with mode: 0644]	patch \| blob
test/expected/LWN/0000764219	[new file with mode: 0644]	patch \| blob
test/expected/LWN/0000764219.header	[new file with mode: 0644]	patch \| blob
test/expected/LWN/0000764219.header.html	[new file with mode: 0644]	patch \| blob
test/expected/LWN/0000764219.html	[new file with mode: 0644]	patch \| blob
test/expected/LWN/0000764300	[new file with mode: 0644]	patch \| blob
test/expected/LWN/0000764300.header	[new file with mode: 0644]	patch \| blob
test/expected/LWN/0000764300.header.html	[new file with mode: 0644]	patch \| blob
test/expected/LWN/0000764300.html	[new file with mode: 0644]	patch \| blob
test/expected/LWN/0000764321	[new file with mode: 0644]	patch \| blob
test/expected/LWN/0000764321.header	[new file with mode: 0644]	patch \| blob
test/expected/LWN/0000764321.header.html	[new file with mode: 0644]	patch \| blob
test/expected/LWN/0000764321.html	[new file with mode: 0644]	patch \| blob
test/source/LWN/Articles/763252.html	[new file with mode: 0644]	patch \| blob
test/source/LWN/Articles/763987.html	[new file with mode: 0644]	patch \| blob
test/source/LWN/Articles/764046.html	[new file with mode: 0644]	patch \| blob
test/source/LWN/Articles/764055.html	[new file with mode: 0644]	patch \| blob
test/source/LWN/Articles/764130.html	[new file with mode: 0644]	patch \| blob
test/source/LWN/Articles/764182.html	[new file with mode: 0644]	patch \| blob
test/source/LWN/Articles/764184.html	[new file with mode: 0644]	patch \| blob
test/source/LWN/Articles/764202.html	[new file with mode: 0644]	patch \| blob
test/source/LWN/Articles/764219.html	[new file with mode: 0644]	patch \| blob
test/source/LWN/Articles/764300.html	[new file with mode: 0644]	patch \| blob
test/source/LWN/Articles/764321.html	[new file with mode: 0644]	patch \| blob
test/source/LWN/index.html	[new file with mode: 0644]	patch \| blob