diff --git a/src/ObjectDetectionViaSavedModelBundle.java b/src/ObjectDetectionViaSavedModelBundle.java new file mode 100644 index 0000000..d9b96e5 --- /dev/null +++ b/src/ObjectDetectionViaSavedModelBundle.java @@ -0,0 +1,171 @@ +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; + +import org.opencv.core.Core; +import org.tensorflow.DataType; +import org.tensorflow.Graph; +import org.tensorflow.Output; +import org.tensorflow.SavedModelBundle; +import org.tensorflow.Session; +import org.tensorflow.Tensor; +import org.tensorflow.TensorFlow; +import org.tensorflow.types.UInt8; + +public class ObjectDetectionViaSavedModelBundle { + + + public static void main( String[] args ) throws Exception { + /*System.out.println("Reading model from TensorFlow..."); + + System.loadLibrary(Core.NATIVE_LIBRARY_NAME); + + ObjectDetector objectDetector = new ObjectDetector(); + + + objectDetector.testGetLayerTypes(); + objectDetector.testGetLayer(); + objectDetector.testImage();*/ + + + final int IMG_SIZE = 128; + final String value = "Hello from " + TensorFlow.version(); + System.out.println(value); + + byte[] imageBytes = readAllBytesOrExit(Paths.get("/home/dpapp/tensorflow-1.5.0/models/raccoon_dataset/test_images/ironOre_test_9.jpg")); + Tensor image = constructAndExecuteGraphToNormalizeImage(imageBytes); + + SavedModelBundle load = SavedModelBundle.load("/home/dpapp/tensorflow-1.5.0/models/raccoon_dataset/results/checkpoint_23826/saved_model/", "serve"); + + try (Graph g = load.graph()) { + try (Session s = load.session(); + Tensor result = s.runner() + .feed("image_tensor:0", image) + .fetch("detection_boxes:0").run().get(0)) + { + System.out.println(result.floatValue()); + } + } + load.close(); + + System.out.println("Done..."); + } + + private static byte[] readAllBytesOrExit(Path path) { + try { + return Files.readAllBytes(path); + } catch (IOException e) { + System.err.println("Failed to read [" + path + "]: " + e.getMessage()); + System.exit(1); + } + return null; + } + + private static Tensor constructAndExecuteGraphToNormalizeImage(byte[] imageBytes) { + try (Graph g = new Graph()) { + GraphBuilder b = new GraphBuilder(g); + // Some constants specific to the pre-trained model at: + // https://storage.googleapis.com/download.tensorflow.org/models/inception5h.zip + // + // - The model was trained with images scaled to 224x224 pixels. + // - The colors, represented as R, G, B in 1-byte each were converted to + // float using (value - Mean)/Scale. + final int H = 224; + final int W = 224; + final float mean = 117f; + final float scale = 1f; + + // Since the graph is being constructed once per execution here, we can use a constant for the + // input image. If the graph were to be re-used for multiple input images, a placeholder would + // have been more appropriate. + final Output input = b.constant("input", imageBytes); + final Output output = + b.div( + b.sub( + b.resizeBilinear( + b.expandDims( + b.cast(b.decodeJpeg(input, 3), Float.class), + b.constant("make_batch", 0)), + b.constant("size", new int[] {H, W})), + b.constant("mean", mean)), + b.constant("scale", scale)); + try (Session s = new Session(g)) { + return s.runner().fetch(output.op().name()).run().get(0).expect(Float.class); + } + } + } + + static class GraphBuilder { + GraphBuilder(Graph g) { + this.g = g; + } + + Output div(Output x, Output y) { + return binaryOp("Div", x, y); + } + + Output sub(Output x, Output y) { + return binaryOp("Sub", x, y); + } + + Output resizeBilinear(Output images, Output size) { + return binaryOp3("ResizeBilinear", images, size); + } + + Output expandDims(Output input, Output dim) { + return binaryOp3("ExpandDims", input, dim); + } + + Output cast(Output value, Class type) { + DataType dtype = DataType.fromClass(type); + return g.opBuilder("Cast", "Cast") + .addInput(value) + .setAttr("DstT", dtype) + .build() + .output(0); + } + + Output decodeJpeg(Output contents, long channels) { + return g.opBuilder("DecodeJpeg", "DecodeJpeg") + .addInput(contents) + .setAttr("channels", channels) + .build() + .output(0); + } + + Output constant(String name, Object value, Class type) { + try (Tensor t = Tensor.create(value, type)) { + return g.opBuilder("Const", name) + .setAttr("dtype", DataType.fromClass(type)) + .setAttr("value", t) + .build() + .output(0); + } + } + Output constant(String name, byte[] value) { + return this.constant(name, value, String.class); + } + + Output constant(String name, int value) { + return this.constant(name, value, Integer.class); + } + + Output constant(String name, int[] value) { + return this.constant(name, value, Integer.class); + } + + Output constant(String name, float value) { + return this.constant(name, value, Float.class); + } + + private Output binaryOp(String type, Output in1, Output in2) { + return g.opBuilder(type, type).addInput(in1).addInput(in2).build().output(0); + } + + private Output binaryOp3(String type, Output in1, Output in2) { + return g.opBuilder(type, type).addInput(in1).addInput(in2).build().output(0); + } + private Graph g; + } +} diff --git a/src/ObjectDetector.java b/src/ObjectDetector.java index b4744db..ad952f8 100644 --- a/src/ObjectDetector.java +++ b/src/ObjectDetector.java @@ -4,11 +4,16 @@ import static org.junit.Assert.assertNotNull; import java.awt.List; import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; import java.util.ArrayList; import org.opencv.core.Core; import org.opencv.core.CvType; import org.opencv.core.Mat; +import org.opencv.core.Point; import org.opencv.core.Scalar; import org.opencv.core.Size; import org.opencv.dnn.DictValue; @@ -17,6 +22,14 @@ import org.opencv.dnn.Layer; import org.opencv.dnn.Net; import org.opencv.imgcodecs.Imgcodecs; import org.opencv.imgproc.Imgproc; +import org.tensorflow.DataType; +import org.tensorflow.Graph; +import org.tensorflow.Output; +import org.tensorflow.SavedModelBundle; +import org.tensorflow.Session; +import org.tensorflow.Tensor; +import org.tensorflow.TensorFlow; +import org.tensorflow.types.UInt8; public class ObjectDetector { @@ -29,7 +42,7 @@ public class ObjectDetector { public ObjectDetector() throws Exception { this.inputImagePath = "/home/dpapp/tensorflow-1.5.0/models/raccoon_dataset/test_images/ironOre_test_9.jpg"; this.inputModelPath = "/home/dpapp/tensorflow-1.5.0/models/raccoon_dataset/results/checkpoint_23826/frozen_graph_inference.pb"; - this.inputModelArgumentsPath = "/home/dpapp/tensorflow-1.5.0/models/raccoon_dataset/generated_graph.pbtxt"; + this.inputModelArgumentsPath = "/home/dpapp/tensorflow-1.5.0/models/raccoon_dataset/training/graph.pbtxt"; File f = new File(inputImagePath); if(!f.exists()) throw new Exception("Test image is missing: " + inputImagePath); @@ -41,6 +54,9 @@ public class ObjectDetector { net = Dnn.readNetFromTensorflow(inputModelPath, inputModelArgumentsPath); } + + + public void testGetLayerTypes() { ArrayList layertypes = new ArrayList(); net.getLayerTypes(layertypes); @@ -64,31 +80,74 @@ public class ObjectDetector { assertEquals("Layer name does not match the expected value!", testLayerName, layer.get_name()); } - public void testImage() throws Exception { - Mat rawImage = Imgcodecs.imread(inputImagePath); - Mat grayImage = new Mat(); - Imgproc.cvtColor(rawImage, grayImage, Imgproc.COLOR_RGB2GRAY); - assertNotNull("Loading image from file failed!", rawImage); + public Mat testImage() throws Exception { + final int IN_WIDTH = 300; + final int IN_HEIGHT = 300; + final float WH_RATIO = (float)IN_WIDTH / IN_HEIGHT; + final double IN_SCALE_FACTOR = 0.007843; + final double MEAN_VAL = 127.5; + final double THRESHOLD = 0.2; + Mat frame = Imgcodecs.imread(inputImagePath); + Imgproc.cvtColor(frame, frame, Imgproc.COLOR_RGBA2RGB); + assertNotNull("Loading image from file failed!", frame); - Mat image = new Mat(); - - Imgproc.resize(grayImage, image, new Size(224, 224)); + Mat blob = Dnn.blobFromImage(frame, IN_SCALE_FACTOR, + new Size(IN_WIDTH, IN_HEIGHT), + new Scalar(MEAN_VAL, MEAN_VAL, MEAN_VAL), false, false); + net.setInput(blob); + Mat detections = net.forward(); - Mat inputBlob = Dnn.blobFromImage(image); - - assertNotNull("Converting image to blob failed!", inputBlob); + int cols = frame.cols(); + int rows = frame.rows(); - Mat inputBlobP = new Mat(); - - Core.subtract(inputBlob, new Scalar(117.0), inputBlobP); - - - net.setInput(inputBlobP); + Size cropSize; + if ((float)cols / rows > WH_RATIO) { + cropSize = new Size(rows * WH_RATIO, rows); + } else { + cropSize = new Size(cols, cols / WH_RATIO); + } - Mat result = net.forward(); + int y1 = (int)(rows - cropSize.height) / 2; + int y2 = (int)(y1 + cropSize.height); + int x1 = (int)(cols - cropSize.width) / 2; + int x2 = (int)(x1 + cropSize.width); + Mat subFrame = frame.submat(y1, y2, x1, x2); - assertNotNull("Net returned no result!", result); + cols = subFrame.cols(); + rows = subFrame.rows(); + + detections = detections.reshape(1, (int)detections.total() / 7); + + for (int i = 0; i < detections.rows(); ++i) { + double confidence = detections.get(i, 2)[0]; + if (confidence > THRESHOLD) { + int classId = (int)detections.get(i, 1)[0]; + + int xLeftBottom = (int)(detections.get(i, 3)[0] * cols); + int yLeftBottom = (int)(detections.get(i, 4)[0] * rows); + int xRightTop = (int)(detections.get(i, 5)[0] * cols); + int yRightTop = (int)(detections.get(i, 6)[0] * rows); + + // Draw rectangle around detected object. + Imgproc.rectangle(subFrame, new Point(xLeftBottom, yLeftBottom), + new Point(xRightTop, yRightTop), + new Scalar(0, 255, 0)); + String label = "ironOre" + ": " + confidence; + int[] baseLine = new int[1]; + Size labelSize = Imgproc.getTextSize(label, Core.FONT_HERSHEY_SIMPLEX, 0.5, 1, baseLine); + + // Draw background for label. + Imgproc.rectangle(subFrame, new Point(xLeftBottom, yLeftBottom - labelSize.height), + new Point(xLeftBottom + labelSize.width, yLeftBottom + baseLine[0]), + new Scalar(255, 255, 255), Core.FILLED); + + // Write class name and confidence. + Imgproc.putText(subFrame, label, new Point(xLeftBottom, yLeftBottom), + Core.FONT_HERSHEY_SIMPLEX, 0.5, new Scalar(0, 0, 0)); + } + } + return frame; } @@ -103,7 +162,147 @@ public class ObjectDetector { objectDetector.testGetLayerTypes(); objectDetector.testGetLayer(); objectDetector.testImage(); - + /* + + final int IMG_SIZE = 128; + final String value = "Hello from " + TensorFlow.version(); + + byte[] imageBytes = readAllBytesOrExit(Paths.get("/home/dpapp/tensorflow-1.5.0/models/raccoon_dataset/test_images/ironOre_test_9.jpg")); + Tensor image = constructAndExecuteGraphToNormalizeImage(imageBytes); + + SavedModelBundle load = SavedModelBundle.load("/home/dpapp/tensorflow-1.5.0/models/raccoon_dataset/SavedModel/saved_model.pb"); + + long[] sitio2; + try (Graph g = load.graph()) { + try (Session s = load.session(); + Tensor result = s.runner() + .feed("image_tensor", image) + .fetch("detection_boxes").run().get(0)) + { + sitio2 = (long[]) result.copyTo(new long[1]); + System.out.print(sitio2[0]+"\n"); + } + } + load.close(); + */ System.out.println("Done..."); } + + private static byte[] readAllBytesOrExit(Path path) { + try { + return Files.readAllBytes(path); + } catch (IOException e) { + System.err.println("Failed to read [" + path + "]: " + e.getMessage()); + System.exit(1); + } + return null; + } + + private static Tensor constructAndExecuteGraphToNormalizeImage(byte[] imageBytes) { + try (Graph g = new Graph()) { + GraphBuilder b = new GraphBuilder(g); + // Some constants specific to the pre-trained model at: + // https://storage.googleapis.com/download.tensorflow.org/models/inception5h.zip + // + // - The model was trained with images scaled to 224x224 pixels. + // - The colors, represented as R, G, B in 1-byte each were converted to + // float using (value - Mean)/Scale. + final int H = 224; + final int W = 224; + final float mean = 117f; + final float scale = 1f; + + // Since the graph is being constructed once per execution here, we can use a constant for the + // input image. If the graph were to be re-used for multiple input images, a placeholder would + // have been more appropriate. + final Output input = b.constant("input", imageBytes); + final Output output = + b.div( + b.sub( + b.resizeBilinear( + b.expandDims( + b.cast(b.decodeJpeg(input, 3), Float.class), + b.constant("make_batch", 0)), + b.constant("size", new int[] {H, W})), + b.constant("mean", mean)), + b.constant("scale", scale)); + try (Session s = new Session(g)) { + return s.runner().fetch(output.op().name()).run().get(0).expect(Float.class); + } + } + } + + static class GraphBuilder { + GraphBuilder(Graph g) { + this.g = g; + } + + Output div(Output x, Output y) { + return binaryOp("Div", x, y); + } + + Output sub(Output x, Output y) { + return binaryOp("Sub", x, y); + } + + Output resizeBilinear(Output images, Output size) { + return binaryOp3("ResizeBilinear", images, size); + } + + Output expandDims(Output input, Output dim) { + return binaryOp3("ExpandDims", input, dim); + } + + Output cast(Output value, Class type) { + DataType dtype = DataType.fromClass(type); + return g.opBuilder("Cast", "Cast") + .addInput(value) + .setAttr("DstT", dtype) + .build() + .output(0); + } + + Output decodeJpeg(Output contents, long channels) { + return g.opBuilder("DecodeJpeg", "DecodeJpeg") + .addInput(contents) + .setAttr("channels", channels) + .build() + .output(0); + } + + Output constant(String name, Object value, Class type) { + try (Tensor t = Tensor.create(value, type)) { + return g.opBuilder("Const", name) + .setAttr("dtype", DataType.fromClass(type)) + .setAttr("value", t) + .build() + .output(0); + } + } + Output constant(String name, byte[] value) { + return this.constant(name, value, String.class); + } + + Output constant(String name, int value) { + return this.constant(name, value, Integer.class); + } + + Output constant(String name, int[] value) { + return this.constant(name, value, Integer.class); + } + + Output constant(String name, float value) { + return this.constant(name, value, Float.class); + } + + private Output binaryOp(String type, Output in1, Output in2) { + return g.opBuilder(type, type).addInput(in1).addInput(in2).build().output(0); + } + + private Output binaryOp3(String type, Output in1, Output in2) { + return g.opBuilder(type, type).addInput(in1).addInput(in2).build().output(0); + } + private Graph g; + } + } \ No newline at end of file diff --git a/target/classes/ObjectDetectionViaSavedModelBundle$GraphBuilder.class b/target/classes/ObjectDetectionViaSavedModelBundle$GraphBuilder.class new file mode 100644 index 0000000..3a331a4 Binary files /dev/null and b/target/classes/ObjectDetectionViaSavedModelBundle$GraphBuilder.class differ diff --git a/target/classes/ObjectDetectionViaSavedModelBundle.class b/target/classes/ObjectDetectionViaSavedModelBundle.class new file mode 100644 index 0000000..d83669b Binary files /dev/null and b/target/classes/ObjectDetectionViaSavedModelBundle.class differ diff --git a/target/classes/ObjectDetector$GraphBuilder.class b/target/classes/ObjectDetector$GraphBuilder.class new file mode 100644 index 0000000..cc159e0 Binary files /dev/null and b/target/classes/ObjectDetector$GraphBuilder.class differ diff --git a/target/classes/ObjectDetector.class b/target/classes/ObjectDetector.class index 8264198..d15481b 100644 Binary files a/target/classes/ObjectDetector.class and b/target/classes/ObjectDetector.class differ