scala parallel collections
Download
Skip this Video
Download Presentation
Scala Parallel Collections

Loading in 2 Seconds...

play fullscreen
1 / 108

Scala Parallel Collections - PowerPoint PPT Presentation


  • 59 Views
  • Uploaded on

Scala Parallel Collections. Aleksandar Prokopec EPFL. Scala collections. for { s <- surnames n <- names if s endsWith n } yield (n, s). Mc Donald. Scala collections. for { s <- surnames n <- names if s endsWith n } yield (n, s). 1040 ms.

loader
I am the owner, or an agent authorized to act on behalf of the owner, of the copyrighted work described.
capcha
Download Presentation

PowerPoint Slideshow about ' Scala Parallel Collections' - tilly


An Image/Link below is provided (as is) to download presentation

Download Policy: Content on the Website is provided to you AS IS for your information and personal use and may not be sold / licensed / shared on other websites without getting consent from its author.While downloading, if for some reason you are not able to download a presentation, the publisher may have deleted the file from their server.


- - - - - - - - - - - - - - - - - - - - - - - - - - E N D - - - - - - - - - - - - - - - - - - - - - - - - - -
Presentation Transcript
scala parallel collections

Scala Parallel Collections

AleksandarProkopec

EPFL

scala collections
Scala collections

for {

s <- surnames

n <- names

if s endsWith n

} yield (n, s)

McDonald

scala collections1
Scala collections

for {

s <- surnames

n <- names

if s endsWith n

} yield (n, s)

1040 ms

scala parallel collections1
Scala parallel collections

for {

s <- surnames

n <- names

if s endsWith n

} yield (n, s)

scala parallel collections2
Scala parallel collections

for {

s <- surnames.par

n <- names.par

if s endsWith n

} yield (n, s)

scala parallel collections3
Scala parallel collections

for {

s <- surnames.par

n <- names.par

if s endsWith n

} yield (n, s)

2 cores

575 ms

scala parallel collections4
Scala parallel collections

for {

s <- surnames.par

n <- names.par

if s endsWith n

} yield (n, s)

4 cores

305 ms

for comprehensions
for comprehensions

surnames.par.flatMap { s =>

names.par

.filter(n => s endsWith n)

.map(n => (n, s))

}

for comprehensions nested parallelized bulk operations
for comprehensionsnested parallelized bulk operations

surnames.par.flatMap { s =>

names.par

.filter(n => s endsWith n)

.map(n => (n, s))

}

nested parallelism parallel within parallel
Nested parallelismparallel within parallel

surnames.par.flatMap { s =>

surnameToCollection(s)

// may invoke parallel ops

}

composition

nested parallelism going recursive
Nested parallelismgoing recursive

def vowel(c: Char): Boolean = ...

nested parallelism going recursive1
Nested parallelismgoing recursive

def vowel(c: Char): Boolean = ...

def gen(n: Int, acc: Seq[String]): Seq[String] =

if (n == 0) acc

nested parallelism going recursive2
Nested parallelismgoing recursive

def vowel(c: Char): Boolean = ...

def gen(n: Int, acc: Seq[String]): Seq[String] =

if (n == 0) acc

else for (s <- gen(n - 1, acc); c <- \'a\' to \'z\') yield

recursive algorithms

nested parallelism going recursive3
Nested parallelismgoing recursive

def vowel(c: Char): Boolean = ...

def gen(n: Int, acc: Seq[String]): Seq[String] =

if (n == 0) acc

else for (s <- gen(n - 1, acc); c <- \'a\' to \'z\') yield

if (s.length == 0) s + c

nested parallelism going recursive4
Nested parallelismgoing recursive

def vowel(c: Char): Boolean = ...

def gen(n: Int, acc: Seq[String]): Seq[String] =

if (n == 0) acc

else for (s <- gen(n - 1, acc); c <- \'a\' to \'z\') yield

if (s.length == 0) s + c

else if (vowel(s.last) && !vowel(c)) s + c

else if (!vowel(s.last) && vowel(c)) s + c

nested parallelism going recursive5
Nested parallelismgoing recursive

def vowel(c: Char): Boolean = ...

def gen(n: Int, acc: Seq[String]): Seq[String] =

if (n == 0) acc

else for (s <- gen(n - 1, acc); c <- \'a\' to \'z\') yield

if (s.length == 0) s + c

else if (vowel(s.last) && !vowel(c)) s + c

else if (!vowel(s.last) && vowel(c)) s + c

else s

gen(5, Array(""))

nested parallelism going recursive6
Nested parallelismgoing recursive

def vowel(c: Char): Boolean = ...

def gen(n: Int, acc: Seq[String]): Seq[String] =

if (n == 0) acc

else for (s <- gen(n - 1, acc); c <- \'a\' to \'z\') yield

if (s.length == 0) s + c

else if (vowel(s.last) && !vowel(c)) s + c

else if (!vowel(s.last) && vowel(c)) s + c

else s

gen(5, Array(""))

1545 ms

nested parallelism going recursive7
Nested parallelismgoing recursive

def vowel(c: Char): Boolean = ...

def gen(n: Int, acc: ParSeq[String]): ParSeq[String] =

if (n == 0) acc

else for (s <- gen(n - 1, acc); c <- \'a\' to \'z\') yield

if (s.length == 0) s + c

else if (vowel(s.last) && !vowel(c)) s + c

else if (!vowel(s.last) && vowel(c)) s + c

else s

gen(5, ParArray(""))

nested parallelism going recursive8
Nested parallelismgoing recursive

def vowel(c: Char): Boolean = ...

def gen(n: Int, acc: ParSeq[String]): ParSeq[String] =

if (n == 0) acc

else for (s <- gen(n - 1, acc); c <- \'a\' to \'z\') yield

if (s.length == 0) s + c

else if (vowel(s.last) && !vowel(c)) s + c

else if (!vowel(s.last) && vowel(c)) s + c

else s

gen(5, ParArray(""))

1 core

1575 ms

nested parallelism going recursive9
Nested parallelismgoing recursive

def vowel(c: Char): Boolean = ...

def gen(n: Int, acc: ParSeq[String]): ParSeq[String] =

if (n == 0) acc

else for (s <- gen(n - 1, acc); c <- \'a\' to \'z\') yield

if (s.length == 0) s + c

else if (vowel(s.last) && !vowel(c)) s + c

else if (!vowel(s.last) && vowel(c)) s + c

else s

gen(5, ParArray(""))

2 cores

809 ms

nested parallelism going recursive10
Nested parallelismgoing recursive

def vowel(c: Char): Boolean = ...

def gen(n: Int, acc: ParSeq[String]): ParSeq[String] =

if (n == 0) acc

else for (s <- gen(n - 1, acc); c <- \'a\' to \'z\') yield

if (s.length == 0) s + c

else if (vowel(s.last) && !vowel(c)) s + c

else if (!vowel(s.last) && vowel(c)) s + c

else s

gen(5, ParArray(""))

4 cores

530 ms

character count use case for foldleft
Character countuse case for foldLeft

val txt: String = ...

txt.foldLeft(0) {

case (a, ‘ ‘) => a

case (a, c) => a + 1

}

character count use case for foldleft1
Character countuse case for foldLeft

txt.foldLeft(0) {

case (a, ‘ ‘) => a

case (a, c) => a + 1

}

going left to right - not parallelizable!

2

4

3

5

1

0

6

A

B

C

D

E

F

_ + 1

character count use case for foldleft2
Character countuse case for foldLeft

txt.foldLeft(0) {

case (a, ‘ ‘) => a

case (a, c) => a + 1

}

going left to right – not really necessary

6

_ + _

2

1

0

3

A

B

C

0

2

3

1

D

E

F

_ + 1

_ + 1

character count in parallel
Character countin parallel

txt.fold(0) {

case (a, ‘ ‘) => a

case (a, c) => a + 1

}

character count in parallel1
Character countin parallel

txt.fold(0) {

case (a, ‘ ‘) => a

case (a, c) => a + 1

}

: (Int, Char) => Int

2

1

1

3

A

B

C

3

2

1

1

A

B

C

_ + 1

character count fold not applicable
Character countfold not applicable

txt.fold(0) {

case (a, ‘ ‘) => a

case (a, c) => a + 1

}

! (Int, Int) => Int

3

3

_ + _

3

2

1

3

A

B

C

3

2

1

3

A

B

C

character count use case for aggregate
Character countuse case for aggregate

txt.aggregate(0)({

case (a, ‘ ‘) => a

case (a, c) => a + 1

}, _ + _)

character count use case for aggregate1
Character countuse case for aggregate

txt.aggregate(0)({

case (a, ‘ ‘) => a

case (a, c) => a + 1

}, _ + _)

3

3

_ + _

3

1

1

2

A

B

C

3

2

1

3

A

B

C

_ + 1

character count use case for aggregate2
Character countuse case for aggregate

txt.aggregate(0)({

case (a, ‘ ‘) => a

case (a, c) => a + 1

}, _ + _)

aggregation  element

3

3

_ + _

1

2

3

1

A

B

B

C

3

2

3

1

A

B

C

_ + 1

character count use case for aggregate3
Character countuse case for aggregate

txt.aggregate(0)({

case (a, ‘ ‘) => a

case (a, c) => a + 1

}, _ + _)

aggregation  aggregation

aggregation  element

3

3

_ + _

1

2

3

1

A

B

B

C

3

2

3

1

A

B

C

_ + 1

word count another use case for foldleft
Word countanother use case for foldLeft

txt.foldLeft((0, true)) {

case ((wc, _), \' \') => (wc, true)

case ((wc, true), x) => (wc + 1, false)

case ((wc, false), x) => (wc, false)

}

word count initial accumulation
Word countinitial accumulation

txt.foldLeft((0, true)) {

case ((wc, _), \' \') => (wc, true)

case ((wc, true), x) => (wc + 1, false)

case ((wc, false), x) => (wc, false)

}

0 words so far

last character was a space

“Folding me softly.”

word count a space
Word counta space

txt.foldLeft((0, true)) {

case ((wc, _), \' \') => (wc, true)

case ((wc, true), x) => (wc + 1, false)

case ((wc, false), x) => (wc, false)

}

last seen character is a space

“Folding me softly.”

word count a non space
Word counta non space

txt.foldLeft((0, true)) {

case ((wc, _), \' \') => (wc, true)

case ((wc, true), x) => (wc + 1, false)

case ((wc, false), x) => (wc, false)

}

last seen character was a space – a new word

“Folding me softly.”

word count a non space1
Word counta non space

txt.foldLeft((0, true)) {

case ((wc, _), \' \') => (wc, true)

case ((wc, true), x) => (wc + 1, false)

case ((wc, false), x) => (wc, false)

}

last seen character wasn’t a space – no new word

“Folding me softly.”

word count in parallel
Word countin parallel

P1

P2

“Folding me “

“softly.“

word count in parallel1
Word countin parallel

P1

P2

“Folding me “

“softly.“

wc = 2; rs = 1

wc = 1; ls = 0

word count in parallel2
Word countin parallel

P1

P2

“Folding me “

“softly.“

wc = 2; rs = 1

wc = 1; ls = 0

wc = 3

word count must assume arbitrary partitions
Word countmust assume arbitrary partitions

P1

P2

“Foldin“

“g me softly.“

wc = 1; rs = 0

wc = 3; ls = 0

word count must assume arbitrary partitions1
Word count must assume arbitrary partitions

P1

P2

“Foldin“

“g me softly.“

wc = 1; rs = 0

wc = 3; ls = 0

wc = 3

word count initial aggregation
Word countinitial aggregation

txt.par.aggregate((0, 0, 0))

word count initial aggregation1
Word countinitial aggregation

txt.par.aggregate((0, 0, 0))

# spaces on the left

#words

# spaces on the right

word count initial aggregation2
Word countinitial aggregation

txt.par.aggregate((0, 0, 0))

# spaces on the left

#words

# spaces on the right

””

word count aggregation aggregation
Word countaggregation  aggregation

...

}, {

case ((0, 0, 0), res) => res

case (res, (0, 0, 0)) => res

““

“softly.“

“Folding me“

““

word count aggregation aggregation1
Word count aggregation  aggregation

...

}, {

case ((0, 0, 0), res) => res

case (res, (0, 0, 0)) => res

case ((lls, lwc, 0), (0, rwc, rrs)) =>

(lls, lwc + rwc - 1, rrs)

“Folding m“

“e softly.“

word count aggregation aggregation2
Word count aggregation  aggregation

...

}, {

case ((0, 0, 0), res) => res

case (res, (0, 0, 0)) => res

case ((lls, lwc, 0), (0, rwc, rrs)) =>

(lls, lwc + rwc - 1, rrs)

case ((lls, lwc, _), (_, rwc, rrs)) =>

(lls, lwc + rwc, rrs)

“Folding me”

“ softly.“

word count aggregation element
Word count aggregation  element

txt.par.aggregate((0, 0, 0))({

case ((ls, 0, _), \' \') => (ls + 1, 0, ls + 1)

0 words and a space – add one more space each side

”_”

word count aggregation element1
Word count aggregation  element

txt.par.aggregate((0, 0, 0))({

case ((ls, 0, _), \' \') => (ls + 1, 0, ls + 1)

case ((ls, 0, _), c) => (ls, 1, 0)

0 words and a non-space – one word, no spaces on the right side

” m”

word count aggregation element2
Word count aggregation  element

txt.par.aggregate((0, 0, 0))({

case ((ls, 0, _), \' \') => (ls + 1, 0, ls + 1)

case ((ls, 0, _), c) => (ls, 1, 0)

case ((ls, wc, rs), \' \') => (ls, wc, rs + 1)

nonzero words and a space – one more space on the right side

” me_”

word count aggregation element3
Word count aggregation  element

txt.par.aggregate((0, 0, 0))({

case ((ls, 0, _), \' \') => (ls + 1, 0, ls + 1)

case ((ls, 0, _), c) => (ls, 1, 0)

case ((ls, wc, rs), \' \') => (ls, wc, rs + 1)

case ((ls, wc, 0), c) => (ls, wc, 0)

nonzero words, last non-space and current non-space – no change

” me sof”

word count aggregation element4
Word count aggregation  element

txt.par.aggregate((0, 0, 0))({

case ((ls, 0, _), \' \') => (ls + 1, 0, ls + 1)

case ((ls, 0, _), c) => (ls, 1, 0)

case ((ls, wc, rs), \' \') => (ls, wc, rs + 1)

case ((ls, wc, 0), c) => (ls, wc, 0)

case ((ls, wc, rs), c) => (ls, wc + 1, 0)

nonzero words, last space and current non-space – one more word

” me s”

word count in parallel3
Word countin parallel

txt.par.aggregate((0, 0, 0))({

case ((ls, 0, _), \' \') => (ls + 1, 0, ls + 1)

case ((ls, 0, _), c) => (ls, 1, 0)

case ((ls, wc, rs), \' \') => (ls, wc, rs + 1)

case ((ls, wc, 0), c) => (ls, wc, 0)

case ((ls, wc, rs), c) => (ls, wc + 1, 0)

}, {

case ((0, 0, 0), res) => res

case (res, (0, 0, 0)) => res

case ((lls, lwc, 0), (0, rwc, rrs)) =>

(lls, lwc + rwc - 1, rrs)

case ((lls, lwc, _), (_, rwc, rrs)) =>

(lls, lwc + rwc, rrs)

})

word count using parallel strings
Word countusing parallel strings?

txt.par.aggregate((0, 0, 0))({

case ((ls, 0, _), \' \') => (ls + 1, 0, ls + 1)

case ((ls, 0, _), c) => (ls, 1, 0)

case ((ls, wc, rs), \' \') => (ls, wc, rs + 1)

case ((ls, wc, 0), c) => (ls, wc, 0)

case ((ls, wc, rs), c) => (ls, wc + 1, 0)

}, {

case ((0, 0, 0), res) => res

case (res, (0, 0, 0)) => res

case ((lls, lwc, 0), (0, rwc, rrs)) =>

(lls, lwc + rwc - 1, rrs)

case ((lls, lwc, _), (_, rwc, rrs)) =>

(lls, lwc + rwc, rrs)

})

word count string not really parallelizable1
Word countstring not really parallelizable

scala> (txt: String).par

collection.parallel.ParSeq[Char] = ParArray(…)

word count string not really parallelizable2
Word countstring not really parallelizable

scala> (txt: String).par

collection.parallel.ParSeq[Char] = ParArray(…)

different internal representation!

word count string not really parallelizable3
Word countstring not really parallelizable

scala> (txt: String).par

collection.parallel.ParSeq[Char] = ParArray(…)

different internal representation!

ParArray

word count string not really parallelizable4
Word countstring not really parallelizable

scala> (txt: String).par

collection.parallel.ParSeq[Char] = ParArray(…)

different internal representation!

ParArray

 copy string contents into an array

conversions going parallel
Conversionsgoing parallel

// par is efficient – no copying

mutable.{Array, ArrayBuffer, ArraySeq}

mutable.{HashMap, HashSet}

immutable.{Vector, Range}

immutable.{HashMap, HashSet}

conversions going parallel1
Conversionsgoing parallel

// par is efficient – no copying

mutable.{Array, ArrayBuffer, ArraySeq}

mutable.{HashMap, HashSet}

immutable.{Vector, Range}

immutable.{HashMap, HashSet}

most other collections construct a new parallel collection!

custom collection
Custom collection

class ParString(val str: String)

custom collection1
Custom collection

class ParString(valstr: String)

extends parallel.immutable.ParSeq[Char] {

custom collection2
Custom collection

class ParString(valstr: String)

extends parallel.immutable.ParSeq[Char] {

def apply(i: Int) = str.charAt(i)

def length = str.length

custom collection3
Custom collection

class ParString(valstr: String)

extends parallel.immutable.ParSeq[Char] {

def apply(i: Int) = str.charAt(i)

def length = str.length

defseq = newWrappedString(str)

custom collection4
Custom collection

class ParString(valstr: String)

extends parallel.immutable.ParSeq[Char] {

def apply(i: Int) = str.charAt(i)

def length = str.length

def seq = new WrappedString(str)

def splitter: Splitter[Char]

custom collection5
Custom collection

class ParString(valstr: String)

extends parallel.immutable.ParSeq[Char] {

def apply(i: Int) = str.charAt(i)

def length = str.length

def seq = new WrappedString(str)

def splitter =

new ParStringSplitter(0, str.length)

custom collection splitter definition
Custom collectionsplitter definition

class ParStringSplitter(var i: Int, len: Int)

extends Splitter[Char] {

custom collection splitters are iterators
Custom collectionsplitters are iterators

class ParStringSplitter(i: Int, len: Int)

extends Splitter[Char] {

def hasNext = i < len

def next = {

valr = str.charAt(i)

i += 1

r

}

custom collection splitters must be duplicated
Custom collectionsplitters must be duplicated

...

def dup = new ParStringSplitter(i, len)

custom collection splitters know how many elements remain
Custom collectionsplitters know how many elements remain

...

def dup = new ParStringSplitter(i, len)

def remaining = len - i

custom collection splitters can be split
Custom collectionsplitters can be split

...

def psplit(sizes: Int*): Seq[ParStringSplitter] = {

valsplitted = newArrayBuffer[ParStringSplitter]

for (sz <- sizes) {

valnext = (i + sz) min ntl

splitted += new ParStringSplitter(i, next)

i = next

}

splitted

}

word count now with parallel strings
Word countnow with parallel strings

new ParString(txt).aggregate((0, 0, 0))({

case ((ls, 0, _), \' \') => (ls + 1, 0, ls + 1)

case ((ls, 0, _), c) => (ls, 1, 0)

case ((ls, wc, rs), \' \') => (ls, wc, rs + 1)

case ((ls, wc, 0), c) => (ls, wc, 0)

case ((ls, wc, rs), c) => (ls, wc + 1, 0)

}, {

case ((0, 0, 0), res) => res

case (res, (0, 0, 0)) => res

case ((lls, lwc, 0), (0, rwc, rrs)) =>

(lls, lwc + rwc - 1, rrs)

case ((lls, lwc, _), (_, rwc, rrs)) =>

(lls, lwc + rwc, rrs)

})

word count performance
Word countperformance

txt.foldLeft((0, true)) {

case ((wc, _), \' \') => (wc, true)

case ((wc, true), x) => (wc + 1, false)

case ((wc, false), x) => (wc, false)

}

new ParString(txt).aggregate((0, 0, 0))({

case ((ls, 0, _), \' \') => (ls + 1, 0, ls + 1)

case ((ls, 0, _), c) => (ls, 1, 0)

case ((ls, wc, rs), \' \') => (ls, wc, rs + 1)

case ((ls, wc, 0), c) => (ls, wc, 0)

case ((ls, wc, rs), c) => (ls, wc + 1, 0)

}, {

case ((0, 0, 0), res) => res

case (res, (0, 0, 0)) => res

case ((lls, lwc, 0), (0, rwc, rrs)) =>

(lls, lwc + rwc - 1, rrs)

case ((lls, lwc, _), (_, rwc, rrs)) =>

(lls, lwc + rwc, rrs)

})

100 ms

cores: 1 2 4

time: 137 ms 70 ms 35 ms

hierarchy
Hierarchy

GenTraversable

GenIterable

Traversable

GenSeq

Iterable

ParIterable

Seq

ParSeq

hierarchy1
Hierarchy

def nonEmpty(sq: Seq[String]) = {

val res = new mutable.ArrayBuffer[String]()

for (s <- sq) {

if (s.nonEmpty) res += s

}

res

}

hierarchy2
Hierarchy

def nonEmpty(sq: ParSeq[String]) = {

val res = new mutable.ArrayBuffer[String]()

for (s <- sq) {

if (s.nonEmpty) res += s

}

res

}

hierarchy3
Hierarchy

def nonEmpty(sq: ParSeq[String]) = {

val res = new mutable.ArrayBuffer[String]()

for (s <- sq) {

if (s.nonEmpty) res += s

}

res

}

side-effects!

ArrayBuffer is not synchronized!

hierarchy4
Hierarchy

def nonEmpty(sq: ParSeq[String]) = {

val res = new mutable.ArrayBuffer[String]()

for (s <- sq) {

if (s.nonEmpty) res += s

}

res

}

Seq

side-effects!

ArrayBuffer is not synchronized!

ParSeq

hierarchy5
Hierarchy

def nonEmpty(sq: GenSeq[String]) = {

val res = new mutable.ArrayBuffer[String]()

for (s <- sq) {

if (s.nonEmpty) res.synchronized {

res += s

}

}

res

}

thank you
Thank you!

Examples at:

git://github.com/axel22/sd.git

accessors vs transformers some methods need more than just splitters
Accessorsvs. transformerssome methods need more than just splitters

foreach, reduce, find, sameElements, indexOf, corresponds, forall, exists, max, min, sum, count, …

map, flatMap, filter, partition, ++, take, drop, span, zip, patch, padTo, …

accessors vs transformers some methods need more than just splitters1
Accessorsvs. transformerssome methods need more than just splitters

foreach, reduce, find, sameElements, indexOf, corresponds, forall, exists, max, min, sum, count, …

map, flatMap, filter, partition, ++, take, drop, span, zip, patch, padTo, …

These return collections!

accessors vs transformers some methods need more than just splitters2
Accessorsvs. transformerssome methods need more than just splitters

foreach, reduce, find, sameElements, indexOf, corresponds, forall, exists, max, min, sum, count, …

map, flatMap, filter, partition, ++, take, drop, span, zip, patch, padTo, …

Sequential collections – builders

accessors vs transformers some methods need more than just splitters3
Accessorsvs. transformerssome methods need more than just splitters

foreach, reduce, find, sameElements, indexOf, corresponds, forall, exists, max, min, sum, count, …

map, flatMap, filter, partition, ++, take, drop, span, zip, patch, padTo, …

Sequential collections – builders

Parallel collections – combiners

builder s building a sequential collection
Buildersbuilding a sequential collection

+=

+=

+=

1

2

2

3

4

4

5

6

6

7

Nil

ListBuilder

Nil

result

combiner s building parallel collections
Combinersbuilding parallel collections

trait Combiner[-Elem, +To]

extends Builder[Elem, To] {

def combine[N <: Elem, NewTo >: To]

(other: Combiner[N, NewTo]):

Combiner[N, NewTo]

}

combiner s building parallel collections1
Combinersbuilding parallel collections

trait Combiner[-Elem, +To]

extends Builder[Elem, To] {

def combine[N <: Elem, NewTo >: To]

(other: Combiner[N, NewTo]):

Combiner[N, NewTo]

}

Combiner

Combiner

Combiner

combiners building parallel collections
Combinersbuilding parallel collections

trait Combiner[-Elem, +To]

extends Builder[Elem, To] {

def combine[N <: Elem, NewTo >: To]

(other: Combiner[N, NewTo]):

Combiner[N, NewTo]

}

either use an efficient merge operation or do lazy evaluation

parallel arrays
Parallel arrays

copy

2

4

6

8

8

0

2

2

merge

allocate

merge

merge

1, 2, 3, 4

5, 6, 7, 8

3, 1, 8, 0

2, 2, 1, 9

2, 4

6, 8

8, 0

2, 2

parallel hash tables1
Parallel hash tables

e.g. calling filter

ParHashMap

0

1

2

4

5

7

8

9

parallel hash tables2
Parallel hash tables

e.g. calling filter

ParHashCombiner

ParHashCombiner

ParHashMap

0

0

1

1

2

4

4

5

5

7

7

8

9

9

parallel hash tables3
Parallel hash tables

ParHashCombiner

ParHashCombiner

0

1

4

5

7

9

ParHashMap

0

1

2

4

5

7

8

9

parallel hash tables4
Parallel hash tables

0

1

4

5

7

9

ParHashCombiner

ParHashCombiner

0

1

4

5

7

9

ParHashMap

0

1

2

4

5

7

8

9

parallel hash tables5
Parallel hash tables

0

1

4

5

7

9

How to merge?

ParHashCombiner

ParHashCombiner

ParHashMap

parallel hash tables6
Parallel hash tables

buckets!

ParHashCombiner

ParHashCombiner

ParHashMap

0

0

1

1

2

4

4

5

5

7

7

8

9

9

0 = 00002

1 = 00012

4 = 01002

parallel hash tables7
Parallel hash tables

ParHashCombiner

ParHashCombiner

combine

0

4

5

9

1

7

parallel hash tables8
Parallel hash tables

ParHashCombiner

no copying!

ParHashCombiner

ParHashCombiner

0

4

5

9

1

7

parallel hash tables9
Parallel hash tables

ParHashCombiner

0

4

9

1

5

7

parallel hash tables10
Parallel hash tables

ParHashMap

0

1

4

5

7

9

ad