Browse Source

first commit

placidenduwayo 1 year ago
parent
commit
928fb60edf

+ 1
- 1
pom.xml View File

65
             <artifactId>scala-logging-slf4j_2.11</artifactId>
65
             <artifactId>scala-logging-slf4j_2.11</artifactId>
66
             <version>2.1.2</version>
66
             <version>2.1.2</version>
67
         </dependency>
67
         </dependency>
68
-        
68
+
69
     </dependencies>
69
     </dependencies>
70
 
70
 
71
     <build>
71
     <build>

+ 8
- 0
src/main/java/fr/natan/akkastreamfileprocessingapi/controller/MovieController.java View File

34
         akkaStreamFilesProcessing.getAllPersons();
34
         akkaStreamFilesProcessing.getAllPersons();
35
         return new ResponseEntity<>("is running", HttpStatus.OK);
35
         return new ResponseEntity<>("is running", HttpStatus.OK);
36
     }
36
     }
37
+
38
+    @RequestMapping(value = "/persons/{personID}", method = RequestMethod.GET)
39
+    private ResponseEntity<String> getPersonByID(@PathVariable(name = "personID") String nconst){
40
+       akkaStreamFilesProcessing.getPersonById(nconst);
41
+
42
+       return new ResponseEntity<>("is running", HttpStatus.OK);
43
+
44
+    }
37
 }
45
 }

+ 5
- 0
src/main/scala/fr/natan/akkastreamfileprocessingapi/jsonformat/ToJsonMapping.scala View File

1
+package fr.natan.akkastreamfileprocessingapi.jsonformat
2
+
3
+case class ToJsonMapping() {
4
+
5
+}

+ 8
- 7
src/main/scala/fr/natan/akkastreamfileprocessingapi/models/Person.scala View File

4
                          nconst: String,
4
                          nconst: String,
5
                          primaryName: String,
5
                          primaryName: String,
6
                          birthYear: String,
6
                          birthYear: String,
7
-                         dearthYear: String,
7
+                         deathYear: String,
8
                          primaryProfession: List[String],
8
                          primaryProfession: List[String],
9
                          knownForTitles: List[String]
9
                          knownForTitles: List[String]
10
                        ){
10
                        ){
11
   override def toString: String = {
11
   override def toString: String = {
12
-    "Person[ nconst:"+nconst+
13
-      ", primaryName:"+primaryName+
14
-    ", birthYear:"+birthYear+
15
-    ", dearthYear:"+dearthYear+
16
-    ", primaryProfession:"+primaryProfession+
17
-    ", knownForTitles:"+knownForTitles
12
+    "Person[ person-ID:"+nconst+
13
+      ", primary-name:"+primaryName+
14
+    ", birth-year:"+birthYear+
15
+    ", dearth year:"+deathYear+
16
+    ", primary profession:"+primaryProfession+
17
+    ", known for titles:"+knownForTitles+
18
+    "]"
18
   }
19
   }
19
 }
20
 }

+ 2
- 2
src/main/scala/fr/natan/akkastreamfileprocessingapi/models/TvSeries.scala View File

9
                      startYear: String,
9
                      startYear: String,
10
                     endYear: String,
10
                     endYear: String,
11
                      runtimeMinutes: String,
11
                      runtimeMinutes: String,
12
-                     genre: String
12
+                     genres: String
13
                    ) {
13
                    ) {
14
 
14
 
15
   override def toString: String = {
15
   override def toString: String = {
21
       ", start year:"+startYear+
21
       ", start year:"+startYear+
22
     ", end year:"+endYear+
22
     ", end year:"+endYear+
23
       ", runtime minutes:"+runtimeMinutes+
23
       ", runtime minutes:"+runtimeMinutes+
24
-      ", genre:"+genre+"]"
24
+      ", genre:"+genres+"]"
25
   }
25
   }
26
 }
26
 }

+ 72
- 56
src/main/scala/fr/natan/akkastreamfileprocessingapi/service/AkkaStreamComponents.scala View File

1
 package fr.natan.akkastreamfileprocessingapi.service
1
 package fr.natan.akkastreamfileprocessingapi.service
2
 
2
 
3
 import akka.actor.ActorSystem
3
 import akka.actor.ActorSystem
4
+import akka.stream.alpakka.csv.scaladsl.{CsvParsing, CsvToMap}
4
 import akka.stream.javadsl.Framing
5
 import akka.stream.javadsl.Framing
5
 import akka.stream.scaladsl.{Compression, FileIO, Flow, Sink, Source}
6
 import akka.stream.scaladsl.{Compression, FileIO, Flow, Sink, Source}
6
 import akka.util.ByteString
7
 import akka.util.ByteString
19
 
20
 
20
   implicit val actor: ActorSystem = ActorSystem("AkkaStreamActor")
21
   implicit val actor: ActorSystem = ActorSystem("AkkaStreamActor")
21
 
22
 
22
-  private def convertToTvSerie(array: Array[String]): TvSeries = {
23
+  private def convertToTvSerie(map: Map[String, String]): TvSeries = {
23
     val tvSerie: TvSeries = TvSeries(
24
     val tvSerie: TvSeries = TvSeries(
24
-      array(0),
25
-      array(1),
26
-      array(2),
27
-      array(3),
28
-      array(4),
29
-      array(5),
30
-      array(6),
31
-      array(7),
32
-      array(8))
25
+      map("tconst"),
26
+      map("titleType"),
27
+      map("primaryTitle"),
28
+      map("originalTitle"),
29
+      map("isAdult"),
30
+      map("startYear"),
31
+      map("endYear"),
32
+      map("runtimeMinutes"),
33
+      map("genres")
34
+    )
33
 
35
 
34
     tvSerie
36
     tvSerie
35
   }
37
   }
36
 
38
 
37
-  private def convertToPerson(array: Array[String]): Person ={
39
+  private def convertToPerson(map: Map[String, String]): Person ={
38
     Person(
40
     Person(
39
-      array(0),
40
-      array(1),
41
-      array(2),
42
-      array(3),
43
-      array(4).split(",").toList,
44
-      array(5).split(",").toList,
41
+      map("nconst"),
42
+      map("primaryName"),
43
+      map("birthYear"),
44
+      map("deathYear"),
45
+      map("primaryProfession").split(",").toList,
46
+      map("knownForTitles").split(",").toList,
45
     )
47
     )
46
   }
48
   }
47
 
49
 
48
   //flows
50
   //flows
49
 
51
 
50
-  def buildTvSerieFlow(movieID: String): Flow[String, TvSeries, NotUsed] = {
52
+  def buildTvSerieFlow(): Flow[Map[String, String], TvSeries, NotUsed] = {
51
 
53
 
52
-    val tvFlow: Flow[String, TvSeries, NotUsed] =
53
-      Flow[String].filter(rows => !rows.contains(movieID))
54
+    val tvFlow: Flow[Map[String, String], TvSeries, NotUsed] =
55
+      Flow[Map[String, String]]
54
         .map(row => {
56
         .map(row => {
55
-          val movie: Array[String] = row.split(separator)
56
-          convertToTvSerie(movie)
57
+          convertToTvSerie(row)
57
         })
58
         })
58
     tvFlow
59
     tvFlow
59
   }
60
   }
60
 
61
 
61
-  def buildPersonFlow(personID: String): Flow[String, Person, NotUsed]={
62
-    val personFlow: Flow[String, Person, NotUsed] =
63
-      Flow[String]
64
-        .filterNot((rows: String)=>{
65
-          rows.contains(personID)
66
-        })
67
-        .map((row: String)=>{
68
-          convertToPerson(row.split(separator))
62
+  def buildPersonFlow(): Flow[Map[String, String], Person, NotUsed] = {
63
+    val personFlow: Flow[Map[String, String], Person, NotUsed] =
64
+      Flow[Map[String, String]]
65
+        .map((rowMap: Map[String, String]) => {
66
+          convertToPerson(rowMap)
69
         })
67
         })
70
 
68
 
71
     personFlow
69
     personFlow
72
   }
70
   }
73
-  def buildfilterByMoviePrimaryTitleFlow(moviePrimaryTitle: String): Flow[String, TvSeries, NotUsed] = {
74
-    val filterFlow: Flow[String, TvSeries, NotUsed] =
75
-      Flow[String]
76
-        .filter((rows: String) => {
77
-          rows.contains(moviePrimaryTitle)
71
+
72
+  def filterByMoviePrimaryTitleFlow(moviePrimaryTitle: String): Flow[Map[String, String], TvSeries, NotUsed] = {
73
+    val filterFlow: Flow[Map[String, String], TvSeries, NotUsed] = Flow[Map[String, String]]
74
+        .filter((rows: Map[String, String]) => {
75
+          rows.getOrElse("primaryTitle","")==moviePrimaryTitle
78
         })
76
         })
79
-        .map(row => {
80
-          val movie: Array[String] = row.split("\t")
81
-          convertToTvSerie(movie)
77
+        .map(rowMap => {
78
+          convertToTvSerie(map = rowMap)
82
         })
79
         })
83
 
80
 
84
     filterFlow
81
     filterFlow
85
   }
82
   }
83
+
84
+  def filterByPersonID(nconst: String): Flow[Map[String, String], Person, NotUsed]={
85
+    val personFilter: Flow[Map[String, String], Person, NotUsed]=
86
+      Flow[Map[String, String]]
87
+        .filter((rowMap:Map[String, String])=>{
88
+          rowMap.getOrElse("nconst","")==nconst
89
+        })
90
+        .map(rowMap=>{
91
+          convertToPerson(map = rowMap)
92
+        })
93
+
94
+    personFilter
95
+  }
96
+
86
   //source
97
   //source
87
-  def buildSource(inputFile: File): Source[String, NotUsed] = {
98
+  def buildSource(inputFile: File): Source[Map[String, String], NotUsed] = {
88
 
99
 
89
-    var source: Source[String, NotUsed] = null
100
+    var datasource: Source[Map[String, String], NotUsed] = null
90
 
101
 
91
     if (!fileExists(inputFile.getPath)) {
102
     if (!fileExists(inputFile.getPath)) {
92
       return null
103
       return null
93
     }
104
     }
94
-    source = Source
105
+    datasource = Source
95
       .single(inputFile)
106
       .single(inputFile)
96
-      .flatMapConcat(
97
-        (file: File) =>
98
-          FileIO.fromPath(Paths.get(inputFile.getPath)
99
-          )
107
+      .flatMapConcat((filename: File) => {
108
+        FileIO.fromPath(
109
+          Paths.get(filename.getPath)
110
+        )
111
+      }
100
       )
112
       )
101
       .via(Compression.gunzip())
113
       .via(Compression.gunzip())
102
-      .via(
103
-        Framing.delimiter(ByteString("\n"), 4096)
104
-          .map(byteString => byteString.utf8String)
105
-      )
114
+      .via(CsvParsing.lineScanner(CsvParsing.Tab, CsvParsing.DoubleQuote))
115
+      .via(CsvToMap.toMapAsStrings())
106
 
116
 
107
-    source
117
+    datasource
108
   }
118
   }
109
 
119
 
110
-  def buildAndValidateSource(inputFile: File): Source[String, NotUsed] = {
120
+  def buildAndValidateSource(inputFile: File): Source[Map[String, String], NotUsed] = {
111
 
121
 
112
-    val source: Source[String, NotUsed] = buildSource(inputFile = inputFile)
122
+    val source: Source[Map[String, String], NotUsed] = buildSource(inputFile = inputFile)
113
     if (source == null) {
123
     if (source == null) {
114
       throw new FileNotFoundException(filename = inputFile.getPath)
124
       throw new FileNotFoundException(filename = inputFile.getPath)
115
     }
125
     }
126
     tvSeriesSink
136
     tvSeriesSink
127
   }
137
   }
128
 
138
 
129
-  def buildPersonSink(logger: Logger): Sink[Person, Future[Done]] ={
130
-    val personSink :Sink[Person, Future[Done]] = Sink
131
-      .foreach[Person]((person: Person)=>{
139
+  def buildPersonsSink(logger: Logger): Sink[Person,Future[Done]] = {
140
+    val listPersonsSink: Sink[Person, Future[Done]]=
141
+      Sink.foreach[Person]((person: Person)=>{
132
         logger.info(s"${person.toString}")
142
         logger.info(s"${person.toString}")
133
       })
143
       })
134
 
144
 
135
-    personSink
145
+    listPersonsSink
146
+  }
147
+
148
+  def buildPersonSink(logger: Logger): Sink[Person, Future[Done]] = {
149
+    Sink.foreach[Person](
150
+      (person: Person) => logger.info(s"${person.toString}")
151
+    )
136
   }
152
   }
137
 }
153
 }

+ 6
- 0
src/main/scala/fr/natan/akkastreamfileprocessingapi/service/AkkaStreamFileProcessing.scala View File

1
 package fr.natan.akkastreamfileprocessingapi.service
1
 package fr.natan.akkastreamfileprocessingapi.service
2
 
2
 
3
+import akka.Done
4
+
5
+import scala.concurrent.Future
6
+
3
 trait AkkaStreamFileProcessing {
7
 trait AkkaStreamFileProcessing {
4
 
8
 
5
   def getAllMovies()
9
   def getAllMovies()
6
   def getMoviesByTitle (movieTitle: String)
10
   def getMoviesByTitle (movieTitle: String)
7
 
11
 
8
   def getAllPersons()
12
   def getAllPersons()
13
+
14
+  def getPersonById(nconst: String)
9
 }
15
 }

+ 49
- 20
src/main/scala/fr/natan/akkastreamfileprocessingapi/service/AkkaStreamFileProcessingImpl.scala View File

6
 import com.typesafe.scalalogging.slf4j.Logger
6
 import com.typesafe.scalalogging.slf4j.Logger
7
 import fr.natan.akkastreamfileprocessingapi.datasource.Datasource.{nameBasics, titleBasics}
7
 import fr.natan.akkastreamfileprocessingapi.datasource.Datasource.{nameBasics, titleBasics}
8
 import fr.natan.akkastreamfileprocessingapi.models.{Person, TvSeries}
8
 import fr.natan.akkastreamfileprocessingapi.models.{Person, TvSeries}
9
-import fr.natan.akkastreamfileprocessingapi.service.AkkaStreamComponents.{buildAndValidateSource, buildPersonFlow, buildPersonSink, buildSource, buildTvSerieFlow, buildTvSeriesSink, buildfilterByMoviePrimaryTitleFlow}
9
+import fr.natan.akkastreamfileprocessingapi.service.AkkaStreamComponents.{
10
+  buildAndValidateSource,
11
+  buildPersonFlow,
12
+  buildPersonSink,
13
+  buildPersonsSink,
14
+  buildSource, buildTvSerieFlow,
15
+  buildTvSeriesSink, filterByMoviePrimaryTitleFlow,
16
+  filterByPersonID
17
+}
10
 import org.slf4j.LoggerFactory
18
 import org.slf4j.LoggerFactory
11
 import org.springframework.stereotype.Component
19
 import org.springframework.stereotype.Component
12
 
20
 
22
 
30
 
23
   override def getAllMovies(): Unit = {
31
   override def getAllMovies(): Unit = {
24
 
32
 
25
-    val source: Source[String, NotUsed] = buildAndValidateSource(inputFile = titleBasics)
33
+    val source: Source[Map[String, String], NotUsed] = buildAndValidateSource(inputFile = titleBasics)
26
     val sink: Sink[TvSeries, Future[Done]] = buildTvSeriesSink(logger = logger)
34
     val sink: Sink[TvSeries, Future[Done]] = buildTvSeriesSink(logger = logger)
27
 
35
 
28
 
36
 
30
 
38
 
31
     //graph sink->flow->sink
39
     //graph sink->flow->sink
32
     source
40
     source
33
-      .via(flow = buildTvSerieFlow(movieID = "tconst"))
41
+      .via(flow = buildTvSerieFlow())
34
       .runWith(sink = sink)
42
       .runWith(sink = sink)
35
       .andThen {
43
       .andThen {
36
-      case Success(value) =>
37
-        val elapsedTime: Long = (System.currentTimeMillis() - startingTime) / 1000
38
-        logger.info(s"$value: successfully processing file, elapsed time $titleBasics: $elapsedTime sec")
39
-      case Failure(error: Error) => logger.error(s"$error")
40
-    }
44
+        case Success(value) =>
45
+          val elapsedTime: Long = (System.currentTimeMillis() - startingTime) / 1000
46
+          logger.info(s"$value: successfully processing file, elapsed time $titleBasics: $elapsedTime sec")
47
+        case Failure(error: Error) => logger.error(s"$error")
48
+      }
41
 
49
 
42
   }
50
   }
43
 
51
 
44
   override def getMoviesByTitle(moviePrimaryTitle: String): Unit = {
52
   override def getMoviesByTitle(moviePrimaryTitle: String): Unit = {
45
 
53
 
46
-    val tvSeriesSource: Source[String, NotUsed] = buildAndValidateSource(inputFile = titleBasics)
54
+    /*val tvSeriesSource: Source[String, NotUsed] = buildAndValidateSource(inputFile = titleBasics)
47
     val tvSeriesSink: Sink[TvSeries, Future[Done]] = buildTvSeriesSink(logger = logger)
55
     val tvSeriesSink: Sink[TvSeries, Future[Done]] = buildTvSeriesSink(logger = logger)
48
 
56
 
49
     val filterByMovieTitleFlow: Flow[String, TvSeries, NotUsed] =
57
     val filterByMovieTitleFlow: Flow[String, TvSeries, NotUsed] =
50
-      buildfilterByMoviePrimaryTitleFlow(moviePrimaryTitle = moviePrimaryTitle)
58
+      filterByMoviePrimaryTitleFlow(moviePrimaryTitle = moviePrimaryTitle)
51
 
59
 
52
     val startTime: Long = System.currentTimeMillis()
60
     val startTime: Long = System.currentTimeMillis()
53
-    val listTvSeries: Future[Done]= tvSeriesSource
61
+    val listTvSeries: Future[Done] = tvSeriesSource
54
       .via(flow = filterByMovieTitleFlow)
62
       .via(flow = filterByMovieTitleFlow)
55
       .runWith(sink = tvSeriesSink)
63
       .runWith(sink = tvSeriesSink)
56
       .andThen {
64
       .andThen {
57
-      case Success(value) =>
58
-        val elapsedTime: Long = (System.currentTimeMillis() - startTime) / 1000
59
-        logger.info(s"$value: successfully processing file, elapsed time $titleBasics: $elapsedTime sec")
60
-      case Failure(error: Error) => logger.error(s"$error")
61
-    }
65
+        case Success(value) =>
66
+          val elapsedTime: Long = (System.currentTimeMillis() - startTime) / 1000
67
+          logger.info(s"$value: successfully processing file, elapsed time $titleBasics: $elapsedTime sec")
68
+        case Failure(error: Error) => logger.error(s"$error")
69
+      }*/
62
   }
70
   }
63
 
71
 
64
   override def getAllPersons(): Unit = {
72
   override def getAllPersons(): Unit = {
65
-    val personSource: Source[String, NotUsed]= buildSource(inputFile = nameBasics)
66
-    val personSink: Sink[Person, Future[Done]] = buildPersonSink(logger = logger)
73
+    val personSource: Source[Map[String, String], NotUsed] = buildSource(inputFile = nameBasics)
74
+    val personSink: Sink[Person, Future[Done]] = buildPersonsSink(logger = logger)
67
 
75
 
68
     //graph
76
     //graph
77
+    val startTime: Long = System.currentTimeMillis()
69
     personSource
78
     personSource
70
-      .via(flow = buildPersonFlow(personID = "nconst"))
79
+      .via(flow = buildPersonFlow())
71
       .runWith(sink = personSink)
80
       .runWith(sink = personSink)
72
       .andThen {
81
       .andThen {
73
         case Success(value) =>
82
         case Success(value) =>
74
-          logger.info(s"$value: successfully processing file")
83
+          val elapsedTime: Long = (System.currentTimeMillis() - startTime) / 1000
84
+          logger.info(s"$value: successfully processing file $elapsedTime sec")
75
         case Failure(error: Error) => logger.error(s"$error")
85
         case Failure(error: Error) => logger.error(s"$error")
76
       }
86
       }
77
   }
87
   }
88
+
89
+  override def getPersonById(nconst: String): Unit = {
90
+    val source: Source[Map[String, String], NotUsed] = buildSource(inputFile = nameBasics)
91
+    val sink: Sink[Person, Future[Done]] = buildPersonSink(logger = logger)
92
+
93
+    val startTime: Long = System.currentTimeMillis()
94
+    source
95
+      .via(flow = filterByPersonID(nconst = nconst))
96
+      .runWith(sink = sink)
97
+      .andThen {
98
+        case Success(value) => {
99
+          val elapsedTime: Long = (System.currentTimeMillis()-startTime)/1000
100
+          logger.info(s"$value: Successfully processed, elapsed time: $elapsedTime")
101
+        }
102
+        case Failure(exception) => logger.error(s"$exception: Fail")
103
+      }
104
+  }
105
+
106
+
78
 }
107
 }
79
 
108
 
80
 
109
 

Powered by TurnKey Linux.