|
@@ -3,8 +3,7 @@ package com.mediahub
|
3
|
3
|
import akka.NotUsed
|
4
|
4
|
import akka.actor.ActorSystem
|
5
|
5
|
import akka.stream.alpakka.csv.scaladsl.{CsvParsing, CsvToMap}
|
6
|
|
-import akka.stream.javadsl.FileIO
|
7
|
|
-import akka.stream.scaladsl.{Compression, Framing, Sink, Source}
|
|
6
|
+import akka.stream.scaladsl.{Compression, FileIO, Flow, Sink, Source}
|
8
|
7
|
import akka.util.ByteString
|
9
|
8
|
import org.springframework.stereotype.Component
|
10
|
9
|
|
|
@@ -12,7 +11,7 @@ import java.io.File
|
12
|
11
|
import java.nio.file.Paths
|
13
|
12
|
import scala.collection.immutable
|
14
|
13
|
import scala.concurrent.duration.DurationInt
|
15
|
|
-import scala.concurrent.{Await, ExecutionContext, Future}
|
|
14
|
+import scala.concurrent.{Await, Future}
|
16
|
15
|
import scala.language.postfixOps
|
17
|
16
|
|
18
|
17
|
|
|
@@ -25,19 +24,33 @@ class FileReader {
|
25
|
24
|
val titlePrincipalsResource: File = new File("src/main/resources/title.principals.tsv.gz")
|
26
|
25
|
val nameBasicsResource: File = new File("src/main/resources/name.basics.tsv.gz")
|
27
|
26
|
|
|
27
|
+ def fileSource(file: File) = {
|
|
28
|
+ FileIO
|
|
29
|
+ .fromPath(Paths.get(file.getPath), 1 * 1024 * 1024)
|
|
30
|
+ .via(Compression.gunzip())
|
|
31
|
+ }
|
|
32
|
+
|
|
33
|
+ val lineParser: Flow[ByteString, Map[String, String], NotUsed] = {
|
|
34
|
+ CsvParsing
|
|
35
|
+ .lineScanner(CsvParsing.Tab, CsvParsing.DoubleQuote, CsvParsing.DoubleQuote)
|
|
36
|
+ .via(CsvToMap.toMapAsStringsCombineAll(headerPlaceholder = Option.empty))
|
|
37
|
+ }
|
|
38
|
+
|
|
39
|
+ //working search not optimized
|
28
|
40
|
val titleId: String = getIdOfTitle(titleBasicsResource, "Carmencita")
|
29
|
41
|
println("value ", titleId)
|
30
|
42
|
|
31
|
|
- val personsIdList: List[String] = getIdOfPersons(titlePrincipalsResource, titleId)
|
|
43
|
+ val personsIdList: List[String] = getIdOfPersons(titlePrincipalsResource, titleId)
|
32
|
44
|
println("value ", personsIdList)
|
33
|
45
|
|
34
|
46
|
val personsList: List[String] = getPersons(nameBasicsResource, personsIdList)
|
35
|
47
|
println("value ", personsList)
|
36
|
48
|
|
37
|
49
|
|
|
50
|
+ //working do not change for now
|
38
|
51
|
def getIdOfTitle(file: File, titleName: String): String = {
|
39
|
52
|
val result: Future[Option[String]] = Source.single(file)
|
40
|
|
- .flatMapConcat(f => FileIO.fromPath(Paths.get(f.getPath), 1 * 1024 * 1024))
|
|
53
|
+ .flatMapConcat(f => FileIO.fromPath(Paths.get(f.getPath), 1 * 1024 * 1024).reduce((a, b) => a ++ b))
|
41
|
54
|
.via(Compression.gunzip())
|
42
|
55
|
.via(CsvParsing.lineScanner(CsvParsing.Tab, CsvParsing.DoubleQuote, CsvParsing.DoubleQuote))
|
43
|
56
|
.via(CsvToMap.toMapAsStringsCombineAll(headerPlaceholder = Option.empty))
|
|
@@ -49,6 +62,19 @@ class FileReader {
|
49
|
62
|
result.value.get.get.get
|
50
|
63
|
}
|
51
|
64
|
|
|
65
|
+ def getIdOfTitleAsync(file: File, titleName: String): immutable.Iterable[Option[String]] = {
|
|
66
|
+ val result = fileSource(file)
|
|
67
|
+ .mapAsync(10) {
|
|
68
|
+ result =>
|
|
69
|
+ Source.single(result)
|
|
70
|
+ .via(lineParser)
|
|
71
|
+ .filter(row => row.getOrElse("primaryTitle", "") == titleName)
|
|
72
|
+ .map(a => a.get("tconst"))
|
|
73
|
+ .runWith(Sink.collection)
|
|
74
|
+ }
|
|
75
|
+ .runWith(Sink.head)
|
|
76
|
+ Await.result(result, 5 minutes)
|
|
77
|
+ }
|
52
|
78
|
|
53
|
79
|
def getIdOfPersons(file: File, titleId: String): List[String] = {
|
54
|
80
|
val result: Future[immutable.Iterable[Option[String]]] = Source.single(file)
|