|
@@ -0,0 +1,146 @@
|
|
1
|
+package com.mediahub
|
|
2
|
+
|
|
3
|
+import akka.NotUsed
|
|
4
|
+import akka.actor.ActorSystem
|
|
5
|
+import akka.stream.ActorAttributes.supervisionStrategy
|
|
6
|
+import akka.stream.Supervision.resumingDecider
|
|
7
|
+import akka.stream.alpakka.csv.scaladsl.{CsvParsing, CsvToMap}
|
|
8
|
+import akka.stream.scaladsl.{Compression, FileIO, Flow, Sink, Source}
|
|
9
|
+import akka.util.ByteString
|
|
10
|
+import org.springframework.stereotype.Component
|
|
11
|
+
|
|
12
|
+import java.io.File
|
|
13
|
+import java.nio.charset.StandardCharsets
|
|
14
|
+import java.nio.file.Paths
|
|
15
|
+import scala.collection.{immutable, mutable}
|
|
16
|
+import scala.concurrent.Await
|
|
17
|
+import scala.concurrent.duration.DurationInt
|
|
18
|
+import scala.language.postfixOps
|
|
19
|
+import scala.util.Try
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+@Component
|
|
23
|
+class MovieQueryService extends MovieServiceTrait {
|
|
24
|
+
|
|
25
|
+ implicit val system: ActorSystem = ActorSystem()
|
|
26
|
+
|
|
27
|
+ val titleBasicsResource: File = new File("src/main/resources/title.basics.tsv.gz")
|
|
28
|
+ val titlePrincipalsResource: File = new File("src/main/resources/title.principals.tsv.gz")
|
|
29
|
+ val nameBasicsResource: File = new File("src/main/resources/name.basics.tsv.gz")
|
|
30
|
+ val titleEpisodeResource: File = new File("src/main/resources/title.episode.tsv.gz")
|
|
31
|
+
|
|
32
|
+ def fileSource(file: File): Source[ByteString, NotUsed] = {
|
|
33
|
+ Source.single(file)
|
|
34
|
+ .flatMapConcat(f => FileIO.fromPath(Paths.get(f.getPath), 1 * 1024 * 1024 * 2))
|
|
35
|
+ .via(Compression.gunzip())
|
|
36
|
+ }
|
|
37
|
+
|
|
38
|
+ val lineParser: Flow[ByteString, Map[String, String], NotUsed] = {
|
|
39
|
+ CsvParsing
|
|
40
|
+ .lineScanner(CsvParsing.Tab, CsvParsing.DoubleQuote, CsvParsing.DoubleQuote)
|
|
41
|
+ .via(CsvToMap.toMapAsStringsCombineAll(headerPlaceholder = Option.empty))
|
|
42
|
+ }
|
|
43
|
+
|
|
44
|
+ override def principalsForMovieName(name: String): List[Principal] = {
|
|
45
|
+
|
|
46
|
+ val titleId: String = getIdOfTitle(titleBasicsResource, name)
|
|
47
|
+ val personsIdList: List[String] = getIdOfPersons(titlePrincipalsResource, titleId).toList.flatten
|
|
48
|
+ val personsList = getPersons(nameBasicsResource, personsIdList)
|
|
49
|
+ personsList
|
|
50
|
+ }
|
|
51
|
+
|
|
52
|
+ override def tvSeriesWithGreatestNumberOfEpisodes(): List[TvSeries] = {
|
|
53
|
+ val seriesTitleMap = getSeriesTitle(titleEpisodeResource)
|
|
54
|
+ val titleListMaxEpisode = getTitleByIds(titleBasicsResource, seriesTitleMap)
|
|
55
|
+ titleListMaxEpisode
|
|
56
|
+ }
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+ def getIdOfTitle(file: File, titleName: String): String = {
|
|
61
|
+ val result = fileSource(file)
|
|
62
|
+ .mapAsync(50) {
|
|
63
|
+ res =>
|
|
64
|
+ Source.single(res)
|
|
65
|
+ .via(lineParser)
|
|
66
|
+ .filter(row => row.getOrElse("primaryTitle", "") == titleName)
|
|
67
|
+ .map(a => a.get("tconst"))
|
|
68
|
+ .withAttributes(supervisionStrategy(resumingDecider))
|
|
69
|
+ .runWith(Sink.head)
|
|
70
|
+ }
|
|
71
|
+ .withAttributes(supervisionStrategy(resumingDecider))
|
|
72
|
+ .runWith(Sink.head)
|
|
73
|
+
|
|
74
|
+ Await.result(result, 5 minutes)
|
|
75
|
+ result.value.get.get.get
|
|
76
|
+ }
|
|
77
|
+
|
|
78
|
+ def getTitleByIds(file: File, titleIdMap: mutable.Map[String, Int]): List[TvSeries] = {
|
|
79
|
+ val result = Source.single(file)
|
|
80
|
+ .flatMapConcat(f => FileIO.fromPath(Paths.get(f.getPath), 1 * 1024 * 1024))
|
|
81
|
+ .via(Compression.gunzip())
|
|
82
|
+ .via(CsvParsing.lineScanner(CsvParsing.Tab, CsvParsing.DoubleQuote, CsvParsing.DoubleQuote))
|
|
83
|
+ .via(CsvToMap.toMapAsStringsCombineAll(StandardCharsets.UTF_8, Option.empty))
|
|
84
|
+ .filter(row => titleIdMap.keySet.toList.contains(row.getOrElse("tconst", "")))
|
|
85
|
+ .map(a => TvSeries(a("originalTitle"), Try(a.getOrElse("startYear","").toInt).getOrElse(0), a.getOrElse("endYear","").toIntOption, a.get("genres").toList))
|
|
86
|
+ .runWith(Sink.collection)
|
|
87
|
+
|
|
88
|
+ Await.result(result, 5 minutes)
|
|
89
|
+ result.value.get.get.toList
|
|
90
|
+ }
|
|
91
|
+
|
|
92
|
+ def getIdOfPersons(file: File, titleId: String): immutable.Iterable[Option[String]] = {
|
|
93
|
+ val result = fileSource(file)
|
|
94
|
+ .mapAsync(50) {
|
|
95
|
+ res =>
|
|
96
|
+ Source.single(res)
|
|
97
|
+ .via(lineParser)
|
|
98
|
+ .filter(row => row.getOrElse("tconst", "") == titleId)
|
|
99
|
+ .map(a => a.get("nconst"))
|
|
100
|
+ .withAttributes(supervisionStrategy(resumingDecider))
|
|
101
|
+ .runWith(Sink.collection)
|
|
102
|
+ }
|
|
103
|
+ .withAttributes(supervisionStrategy(resumingDecider))
|
|
104
|
+ .runWith(Sink.head)
|
|
105
|
+
|
|
106
|
+ Await.result(result, 5 minutes)
|
|
107
|
+ }
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+ def getPersons(file: File, personsIdList: List[String]): List[Principal] = {
|
|
111
|
+ val result = Source.single(file)
|
|
112
|
+ .flatMapConcat(f => FileIO.fromPath(Paths.get(f.getPath), 1 * 1024 * 1024))
|
|
113
|
+ .via(Compression.gunzip())
|
|
114
|
+ .via(CsvParsing.lineScanner(CsvParsing.Tab, CsvParsing.DoubleQuote, CsvParsing.DoubleQuote))
|
|
115
|
+ .via(CsvToMap.toMapAsStringsCombineAll(StandardCharsets.UTF_8, Option.empty))
|
|
116
|
+ .filter(row => personsIdList.contains(row.getOrElse("nconst", "")))
|
|
117
|
+ .map(a => Principal(a("primaryName"), Try(a("birthYear").toInt).getOrElse(0), a("deathYear").toIntOption, a.get("primaryProfession").toList))
|
|
118
|
+ .runWith(Sink.collection)
|
|
119
|
+
|
|
120
|
+ Await.result(result, 5 minutes)
|
|
121
|
+ result.value.get.get.toList
|
|
122
|
+ }
|
|
123
|
+
|
|
124
|
+ def getSeriesTitle(file: File): mutable.Map[String, Int] = {
|
|
125
|
+
|
|
126
|
+ val seriesMap = collection.mutable.Map("0" -> 0)
|
|
127
|
+ val maxSeriesMap = collection.mutable.Map("0" -> 0)
|
|
128
|
+
|
|
129
|
+ val result = Source.single(file)
|
|
130
|
+ .flatMapConcat(f => FileIO.fromPath(Paths.get(f.getPath), 1 * 1024 * 1024))
|
|
131
|
+ .via(Compression.gunzip())
|
|
132
|
+ .via(CsvParsing.lineScanner(CsvParsing.Tab, CsvParsing.DoubleQuote, CsvParsing.DoubleQuote))
|
|
133
|
+ .via(CsvToMap.toMapAsStringsCombineAll(StandardCharsets.UTF_8, Option.empty))
|
|
134
|
+ .map(row => seriesMap += seriesMap.get(row("parentTconst")).map(x => row("parentTconst") -> (x + 1)).getOrElse(row("parentTconst") -> 1))
|
|
135
|
+ .runWith(Sink.seq)
|
|
136
|
+
|
|
137
|
+ Await.result(result, 5 minutes)
|
|
138
|
+
|
|
139
|
+ for (_ <- 1 to 10) {
|
|
140
|
+ maxSeriesMap.addOne(seriesMap.maxBy(x => x._2)._1 -> seriesMap.maxBy(x => x._2)._2)
|
|
141
|
+ seriesMap.remove(seriesMap.maxBy(x => x._2)._1)
|
|
142
|
+ }
|
|
143
|
+ maxSeriesMap
|
|
144
|
+ }
|
|
145
|
+
|
|
146
|
+}
|