Browse Source

last async working but not parsing all file

Antoine Dorian 1 year ago
parent
commit
894e908f1f
1 changed files with 49 additions and 35 deletions
  1. 49
    35
      src/main/scala/com/mediahub/FileReader.scala

+ 49
- 35
src/main/scala/com/mediahub/FileReader.scala View File

2
 
2
 
3
 import akka.NotUsed
3
 import akka.NotUsed
4
 import akka.actor.ActorSystem
4
 import akka.actor.ActorSystem
5
+import akka.stream.ActorAttributes.supervisionStrategy
6
+import akka.stream.Supervision.resumingDecider
5
 import akka.stream.alpakka.csv.scaladsl.{CsvParsing, CsvToMap}
7
 import akka.stream.alpakka.csv.scaladsl.{CsvParsing, CsvToMap}
6
 import akka.stream.scaladsl.{Compression, FileIO, Flow, Sink, Source}
8
 import akka.stream.scaladsl.{Compression, FileIO, Flow, Sink, Source}
7
 import akka.util.ByteString
9
 import akka.util.ByteString
24
   val titlePrincipalsResource: File = new File("src/main/resources/title.principals.tsv.gz")
26
   val titlePrincipalsResource: File = new File("src/main/resources/title.principals.tsv.gz")
25
   val nameBasicsResource: File = new File("src/main/resources/name.basics.tsv.gz")
27
   val nameBasicsResource: File = new File("src/main/resources/name.basics.tsv.gz")
26
 
28
 
27
-  def fileSource(file: File) = {
28
-    FileIO
29
-      .fromPath(Paths.get(file.getPath), 1 * 1024 * 1024)
29
+  def fileSource(file: File): Source[ByteString, NotUsed] = {
30
+    Source.single(file)
31
+      .flatMapConcat(f => FileIO.fromPath(Paths.get(f.getPath), 1 * 1024 * 1024 * 2))
30
       .via(Compression.gunzip())
32
       .via(Compression.gunzip())
31
   }
33
   }
32
 
34
 
37
   }
39
   }
38
 
40
 
39
   //working search not optimized
41
   //working search not optimized
40
-  val titleId: String = getIdOfTitle(titleBasicsResource, "Carmencita")
42
+  val titleId: String = getIdOfTitleAsync(titleBasicsResource, "Carmencita")
41
   println("value ", titleId)
43
   println("value ", titleId)
42
 
44
 
43
-  val personsIdList: List[String] = getIdOfPersons(titlePrincipalsResource, titleId)
45
+  val personsIdList: List[String] = getIdOfPersonsAsync(titlePrincipalsResource, titleId).toList.flatten
44
   println("value ", personsIdList)
46
   println("value ", personsIdList)
45
 
47
 
46
-  val personsList: List[String] = getPersons(nameBasicsResource, personsIdList)
48
+  val personsList = getPersons(nameBasicsResource, personsIdList)
47
   println("value ", personsList)
49
   println("value ", personsList)
48
 
50
 
49
 
51
 
50
-  //working do not change for now
51
-  def getIdOfTitle(file: File, titleName: String): String = {
52
-    val result: Future[Option[String]] = Source.single(file)
53
-      .flatMapConcat(f => FileIO.fromPath(Paths.get(f.getPath), 1 * 1024 * 1024).reduce((a, b) => a ++ b))
54
-      .via(Compression.gunzip())
55
-      .via(CsvParsing.lineScanner(CsvParsing.Tab, CsvParsing.DoubleQuote, CsvParsing.DoubleQuote))
56
-      .via(CsvToMap.toMapAsStringsCombineAll(headerPlaceholder = Option.empty))
57
-      .filter(row => row.getOrElse("primaryTitle", "") == titleName)
58
-      .map(a => a.get("tconst"))
52
+  def getIdOfTitleAsync(file: File, titleName: String): String = {
53
+    val result = fileSource(file)
54
+      .mapAsync(50) {
55
+        res =>
56
+          Source.single(res)
57
+            .via(lineParser)
58
+            .filter(row => row.getOrElse("primaryTitle", "") == titleName)
59
+            .map(a => a.get("tconst"))
60
+            .withAttributes(supervisionStrategy(resumingDecider))
61
+            .runWith(Sink.head)
62
+      }
63
+      .withAttributes(supervisionStrategy(resumingDecider))
59
       .runWith(Sink.head)
64
       .runWith(Sink.head)
60
 
65
 
61
     Await.result(result, 5 minutes)
66
     Await.result(result, 5 minutes)
62
     result.value.get.get.get
67
     result.value.get.get.get
63
   }
68
   }
64
 
69
 
65
-  def getIdOfTitleAsync(file: File, titleName: String): immutable.Iterable[Option[String]] = {
66
-    val result = fileSource(file)
67
-      .mapAsync(10) {
68
-        result =>
69
-          Source.single(result)
70
+  def getIdOfPersonsAsync(file: File, titleId: String): immutable.Iterable[Option[String]] = {
71
+    val te = fileSource(file)
72
+      .mapAsync(50) {
73
+        res =>
74
+          Source.single(res)
70
             .via(lineParser)
75
             .via(lineParser)
71
-            .filter(row => row.getOrElse("primaryTitle", "") == titleName)
72
-            .map(a => a.get("tconst"))
76
+            .filter(row => row.getOrElse("tconst", "") == titleId)
77
+            .map(a => a.get("nconst"))
78
+            .withAttributes(supervisionStrategy(resumingDecider))
73
             .runWith(Sink.collection)
79
             .runWith(Sink.collection)
74
       }
80
       }
81
+      .withAttributes(supervisionStrategy(resumingDecider))
75
       .runWith(Sink.head)
82
       .runWith(Sink.head)
76
-    Await.result(result, 5 minutes)
83
+
84
+    Await.result(te, 5 minutes)
77
   }
85
   }
78
 
86
 
79
-  def getIdOfPersons(file: File, titleId: String): List[String] = {
87
+  def getPersons(file: File, personsIdList: List[String]): List[String] = {
80
     val result: Future[immutable.Iterable[Option[String]]] = Source.single(file)
88
     val result: Future[immutable.Iterable[Option[String]]] = Source.single(file)
81
       .flatMapConcat(f => FileIO.fromPath(Paths.get(f.getPath), 1 * 1024 * 1024))
89
       .flatMapConcat(f => FileIO.fromPath(Paths.get(f.getPath), 1 * 1024 * 1024))
82
       .via(Compression.gunzip())
90
       .via(Compression.gunzip())
83
       .via(CsvParsing.lineScanner(CsvParsing.Tab, CsvParsing.DoubleQuote, CsvParsing.DoubleQuote))
91
       .via(CsvParsing.lineScanner(CsvParsing.Tab, CsvParsing.DoubleQuote, CsvParsing.DoubleQuote))
84
       .via(CsvToMap.toMapAsStringsCombineAll(headerPlaceholder = Option.empty))
92
       .via(CsvToMap.toMapAsStringsCombineAll(headerPlaceholder = Option.empty))
85
-      .filter(row => row.getOrElse("tconst", "") == titleId)
86
-      .map(a => a.get("nconst"))
93
+      .filter(row => personsIdList.contains(row.getOrElse("nconst", "")))
94
+      .map(a => a.get("primaryName"))
87
       .runWith(Sink.collection)
95
       .runWith(Sink.collection)
88
 
96
 
89
     Await.result(result, 5 minutes)
97
     Await.result(result, 5 minutes)
90
     result.value.get.get.toList.flatten
98
     result.value.get.get.toList.flatten
91
   }
99
   }
92
 
100
 
93
-  def getPersons(file: File, personsIdList: List[String]): List[String] = {
94
-    val result: Future[immutable.Iterable[Option[String]]] = Source.single(file)
101
+ /* def getPersonsAsync(file: File, personsIdList: List[String]) = {
102
+    val test = Source.single(file)
95
       .flatMapConcat(f => FileIO.fromPath(Paths.get(f.getPath), 1 * 1024 * 1024))
103
       .flatMapConcat(f => FileIO.fromPath(Paths.get(f.getPath), 1 * 1024 * 1024))
96
       .via(Compression.gunzip())
104
       .via(Compression.gunzip())
97
-      .via(CsvParsing.lineScanner(CsvParsing.Tab, CsvParsing.DoubleQuote, CsvParsing.DoubleQuote))
98
-      .via(CsvToMap.toMapAsStringsCombineAll(headerPlaceholder = Option.empty))
99
-      .filter(row => personsIdList.contains(row.getOrElse("nconst", "")))
100
-      .map(a => a.get("primaryName"))
105
+      .mapAsync(2) {
106
+        res =>
107
+          Source.single(res)
108
+            .via(CsvParsing.lineScanner(CsvParsing.Tab, CsvParsing.DoubleQuote, CsvParsing.DoubleQuote))
109
+            .via(CsvToMap.toMapAsStringsCombineAll(headerPlaceholder = Option.empty))
110
+            .filter(row => row.getOrElse("nconst", "") == personsIdList.head)
111
+            .map(a => a.get("primaryName"))
112
+            .runWith(Sink.collection)
113
+      }
101
       .runWith(Sink.collection)
114
       .runWith(Sink.collection)
102
 
115
 
103
-    Await.result(result, 5 minutes)
104
-    result.value.get.get.toList.flatten
116
+    Await.result(test, 5 minutes)
117
+    test.value.get.get.toList.flatten
105
   }
118
   }
106
-}
119
+  */
120
+}

Powered by TurnKey Linux.