Scala Parallel Collections by Aleksandar Prokopec - EPFL Algorithms Study
1.08k likes | 1.2k Views
Dive into the world of Scala parallel collections with EPFL's Aleksandar Prokopec, exploring nested parallelism, recursive algorithms, and efficient parallel processing for faster computation times.
Scala Parallel Collections by Aleksandar Prokopec - EPFL Algorithms Study
E N D
Presentation Transcript
Scala Parallel Collections AleksandarProkopec EPFL
Scala collections for { s <- surnames n <- names if s endsWith n } yield (n, s) McDonald
Scala collections for { s <- surnames n <- names if s endsWith n } yield (n, s) 1040 ms
Scala parallel collections for { s <- surnames n <- names if s endsWith n } yield (n, s)
Scala parallel collections for { s <- surnames.par n <- names.par if s endsWith n } yield (n, s)
Scala parallel collections for { s <- surnames.par n <- names.par if s endsWith n } yield (n, s) 2 cores 575 ms
Scala parallel collections for { s <- surnames.par n <- names.par if s endsWith n } yield (n, s) 4 cores 305 ms
for comprehensions surnames.par.flatMap { s => names.par .filter(n => s endsWith n) .map(n => (n, s)) }
for comprehensionsnested parallelized bulk operations surnames.par.flatMap { s => names.par .filter(n => s endsWith n) .map(n => (n, s)) }
Nested parallelismparallel within parallel surnames.par.flatMap { s => surnameToCollection(s) // may invoke parallel ops } composition
Nested parallelismgoing recursive def vowel(c: Char): Boolean = ...
Nested parallelismgoing recursive def vowel(c: Char): Boolean = ... def gen(n: Int, acc: Seq[String]): Seq[String] = if (n == 0) acc
Nested parallelismgoing recursive def vowel(c: Char): Boolean = ... def gen(n: Int, acc: Seq[String]): Seq[String] = if (n == 0) acc else for (s <- gen(n - 1, acc); c <- 'a' to 'z') yield recursive algorithms
Nested parallelismgoing recursive def vowel(c: Char): Boolean = ... def gen(n: Int, acc: Seq[String]): Seq[String] = if (n == 0) acc else for (s <- gen(n - 1, acc); c <- 'a' to 'z') yield if (s.length == 0) s + c
Nested parallelismgoing recursive def vowel(c: Char): Boolean = ... def gen(n: Int, acc: Seq[String]): Seq[String] = if (n == 0) acc else for (s <- gen(n - 1, acc); c <- 'a' to 'z') yield if (s.length == 0) s + c else if (vowel(s.last) && !vowel(c)) s + c else if (!vowel(s.last) && vowel(c)) s + c
Nested parallelismgoing recursive def vowel(c: Char): Boolean = ... def gen(n: Int, acc: Seq[String]): Seq[String] = if (n == 0) acc else for (s <- gen(n - 1, acc); c <- 'a' to 'z') yield if (s.length == 0) s + c else if (vowel(s.last) && !vowel(c)) s + c else if (!vowel(s.last) && vowel(c)) s + c else s gen(5, Array(""))
Nested parallelismgoing recursive def vowel(c: Char): Boolean = ... def gen(n: Int, acc: Seq[String]): Seq[String] = if (n == 0) acc else for (s <- gen(n - 1, acc); c <- 'a' to 'z') yield if (s.length == 0) s + c else if (vowel(s.last) && !vowel(c)) s + c else if (!vowel(s.last) && vowel(c)) s + c else s gen(5, Array("")) 1545 ms
Nested parallelismgoing recursive def vowel(c: Char): Boolean = ... def gen(n: Int, acc: ParSeq[String]): ParSeq[String] = if (n == 0) acc else for (s <- gen(n - 1, acc); c <- 'a' to 'z') yield if (s.length == 0) s + c else if (vowel(s.last) && !vowel(c)) s + c else if (!vowel(s.last) && vowel(c)) s + c else s gen(5, ParArray(""))
Nested parallelismgoing recursive def vowel(c: Char): Boolean = ... def gen(n: Int, acc: ParSeq[String]): ParSeq[String] = if (n == 0) acc else for (s <- gen(n - 1, acc); c <- 'a' to 'z') yield if (s.length == 0) s + c else if (vowel(s.last) && !vowel(c)) s + c else if (!vowel(s.last) && vowel(c)) s + c else s gen(5, ParArray("")) 1 core 1575 ms
Nested parallelismgoing recursive def vowel(c: Char): Boolean = ... def gen(n: Int, acc: ParSeq[String]): ParSeq[String] = if (n == 0) acc else for (s <- gen(n - 1, acc); c <- 'a' to 'z') yield if (s.length == 0) s + c else if (vowel(s.last) && !vowel(c)) s + c else if (!vowel(s.last) && vowel(c)) s + c else s gen(5, ParArray("")) 2 cores 809 ms
Nested parallelismgoing recursive def vowel(c: Char): Boolean = ... def gen(n: Int, acc: ParSeq[String]): ParSeq[String] = if (n == 0) acc else for (s <- gen(n - 1, acc); c <- 'a' to 'z') yield if (s.length == 0) s + c else if (vowel(s.last) && !vowel(c)) s + c else if (!vowel(s.last) && vowel(c)) s + c else s gen(5, ParArray("")) 4 cores 530 ms
Character countuse case for foldLeft val txt: String = ... txt.foldLeft(0) { case (a, ‘ ‘) => a case (a, c) => a + 1 }
Character countuse case for foldLeft txt.foldLeft(0) { case (a, ‘ ‘) => a case (a, c) => a + 1 } going left to right - not parallelizable! 2 4 3 5 1 0 6 A B C D E F _ + 1
Character countuse case for foldLeft txt.foldLeft(0) { case (a, ‘ ‘) => a case (a, c) => a + 1 } going left to right – not really necessary 6 _ + _ 2 1 0 3 A B C 0 2 3 1 D E F _ + 1 _ + 1
Character countin parallel txt.fold(0) { case (a, ‘ ‘) => a case (a, c) => a + 1 }
Character countin parallel txt.fold(0) { case (a, ‘ ‘) => a case (a, c) => a + 1 } : (Int, Char) => Int 2 1 1 3 A B C 3 2 1 1 A B C _ + 1
Character countfold not applicable txt.fold(0) { case (a, ‘ ‘) => a case (a, c) => a + 1 } ! (Int, Int) => Int 3 3 _ + _ 3 2 1 3 A B C 3 2 1 3 A B C
Character countuse case for aggregate txt.aggregate(0)({ case (a, ‘ ‘) => a case (a, c) => a + 1 }, _ + _)
Character countuse case for aggregate txt.aggregate(0)({ case (a, ‘ ‘) => a case (a, c) => a + 1 }, _ + _) 3 3 _ + _ 3 1 1 2 A B C 3 2 1 3 A B C _ + 1
Character countuse case for aggregate txt.aggregate(0)({ case (a, ‘ ‘) => a case (a, c) => a + 1 }, _ + _) aggregation element 3 3 _ + _ 1 2 3 1 A B B C 3 2 3 1 A B C _ + 1
Character countuse case for aggregate txt.aggregate(0)({ case (a, ‘ ‘) => a case (a, c) => a + 1 }, _ + _) aggregation aggregation aggregation element 3 3 _ + _ 1 2 3 1 A B B C 3 2 3 1 A B C _ + 1
Word countanother use case for foldLeft txt.foldLeft((0, true)) { case ((wc, _), ' ') => (wc, true) case ((wc, true), x) => (wc + 1, false) case ((wc, false), x) => (wc, false) }
Word countinitial accumulation txt.foldLeft((0, true)) { case ((wc, _), ' ') => (wc, true) case ((wc, true), x) => (wc + 1, false) case ((wc, false), x) => (wc, false) } 0 words so far last character was a space “Folding me softly.”
Word counta space txt.foldLeft((0, true)) { case ((wc, _), ' ') => (wc, true) case ((wc, true), x) => (wc + 1, false) case ((wc, false), x) => (wc, false) } last seen character is a space “Folding me softly.”
Word counta non space txt.foldLeft((0, true)) { case ((wc, _), ' ') => (wc, true) case ((wc, true), x) => (wc + 1, false) case ((wc, false), x) => (wc, false) } last seen character was a space – a new word “Folding me softly.”
Word counta non space txt.foldLeft((0, true)) { case ((wc, _), ' ') => (wc, true) case ((wc, true), x) => (wc + 1, false) case ((wc, false), x) => (wc, false) } last seen character wasn’t a space – no new word “Folding me softly.”
Word countin parallel P1 P2 “Folding me “ “softly.“
Word countin parallel P1 P2 “Folding me “ “softly.“ wc = 2; rs = 1 wc = 1; ls = 0
Word countin parallel P1 P2 “Folding me “ “softly.“ wc = 2; rs = 1 wc = 1; ls = 0 wc = 3
Word countmust assume arbitrary partitions P1 P2 “Foldin“ “g me softly.“ wc = 1; rs = 0 wc = 3; ls = 0
Word count must assume arbitrary partitions P1 P2 “Foldin“ “g me softly.“ wc = 1; rs = 0 wc = 3; ls = 0 wc = 3
Word countinitial aggregation txt.par.aggregate((0, 0, 0))
Word countinitial aggregation txt.par.aggregate((0, 0, 0)) # spaces on the left #words # spaces on the right
Word countinitial aggregation txt.par.aggregate((0, 0, 0)) # spaces on the left #words # spaces on the right ””
Word countaggregation aggregation ... }, { case ((0, 0, 0), res) => res case (res, (0, 0, 0)) => res ““ “softly.“ “Folding me“ ““