Augmented Reality in OpenHPS

Paper PDF GitHub Demo Web App

Our paper "FidMark: A Fiducial Marker Ontology for Semantically Describing Visual Markers" has been accepted to be presented at the ESWC2024 conference later in May. With this paper we propose an ontology that describes visual fiducial markers used in Augmented Reality (AR). With this description we aim to enable collaboration of multiple AR devices within same reference space.

To demonstrate our ontology, we created a demonstration that heavily relies on OpenHPS for both data serialization and processing of data. We started by defining a FiducialMarker as a type of reference space:

typescript
@SerializableObject({
    rdf: {
        type: fidmark.FiducialMarker
    }
})
export class FiducialMarker extends ReferenceSpace {
    @SerializableMember({
        rdf: {
            predicate: fidmark.markerData,
            datatype: xsd.string
        },
    })
    data?: string;
    @SerializableMember({
        rdf: {
            predicate: fidmark.markerIdentifier,
            datatype: xsd.integer
        },
        numberType: NumberType.INTEGER
    })
    identifier?: number;
    @SerializableMember({
        rdf: {
            predicate: fidmark.hasDictionary
        }
    })
    dictionary?: MarkerDictionary;
    origin?: MarkerOrigin;
    
    @SerializableMember({
        rdf: {
            predicate: [fidmark.hasHeight],
            serializer: (value: number) => {
                return RDFBuilder.blankNode()
                    .add(rdf.type, qudt.QuantityValue)
                    .add(qudt.unit, LengthUnit.MILLIMETER)
                    .add(qudt.numericValue, value, xsd.double)
                    .build();
            },
            deserializer: (thing: Thing) => {
                const unit = RDFSerializer.deserialize(thing.predicates[qudt.unit][0] as Thing, LengthUnit);
                return unit.convert(parseFloat(thing.predicates[qudt.numericValue][0].value), LengthUnit.MILLIMETER);
            },
        },
    })
    height?: number;
    @SerializableMember({
        rdf: {
            predicate: [fidmark.hasWidth],
            serializer: (value: number) => {
                return RDFBuilder.blankNode()
                    .add(rdf.type, qudt.QuantityValue)
                    .add(qudt.unit, LengthUnit.MILLIMETER)
                    .add(qudt.numericValue, value, xsd.double)
                    .build();
            },
            deserializer: (thing: Thing) => {
                const unit = RDFSerializer.deserialize(thing.predicates[qudt.unit][0] as Thing, LengthUnit);
                return unit.convert(parseFloat(thing.predicates[qudt.numericValue][0].value), LengthUnit.MILLIMETER);
            },
        },
    })
    width?: number;
    @SerializableMember({
        rdf: {
            predicate: fidmark.hasImageDesciptor
        }
    })
    imageDescriptor?: ImageDescriptor;
}

As our goal with this paper was to represent these fiducial markers with semantic data, we used @openhps/rdf to serialize and deserialize the objects to linked data. With our FiducialMarker reference space defined, we created two new processing nodes. One processing node for detecting ArUco markers within a video frame using js-aruco and finally a procesing node for displaying 3D models positioned relative to these markers using Three.js.

Both nodes are using a simple model that starts with the WebRTC video source that captures the camera, processes it to retrieve fiducial markers and finally superimposes 3D objects relative to these markers.

typescript
ModelBuilder.create()
    // Create a video source
    .from(new VideoSource({
        fps: 30,
        uid: "video",
        videoSource: video,                     // Video element
        autoPlay: true,
        height: window.innerHeight,
        facingMode: { ideal: "environment" } ,  // Back facing camera
    }))
    // Add all virtual objects and detectable markers to the data frame
    // These are objects we will try to detect or display
    .via(new CallbackNode(frame => {
        markers.forEach(marker => {
            frame.addObject(marker);
        });
        objects.forEach(virtualObject => {
            frame.addObject(virtualObject);
        });
    }))
    // Detect ArUco markers
    .via(new ArUcoMarkerDetection())
    // Display virtual objects relative to detected markers
    .via(new ThreeJSNode({
        canvas
    }))
    .to()
    .build();

Our ArUcoMarkerDetection node loops through all markers that we are trying to detect. Once it finds a marker with the correct identifier, it will get its position and orientation and add it to the data frame before pushing it to the next node in the network:

typescript
export class ArUcoMarkerDetection<InOut extends ImageFrame<ImageData>> extends ProcessingNode<InOut, InOut> {
    // Mapping of the ontology dictionaries to the names js-aruco uses
    mapping: any = {
        [fidmark.DICT_CHILLITAGS]: 'CHILITAGS',
        [fidmark.DICT_MIP_36h12]: 'ARUCO_MIP_36h12',
        [fidmark.DICT_ARUCO_ORIGINAL]: 'ARUCO_DEFAULT_OPENCV',
        [fidmark.DICT_4X4_1000]: 'ARUCO_4X4_1000',
    };
    // Cache of AR detectors and estimators
    protected detectors: Map<string, AR.Detector> = new Map();
    protected poseEstimators: Map<number, POS.Posit> = new Map();
    
    process(frame: InOut): Promise<InOut> {
        return new Promise((resolve) => {
            // Loop through all objects that "can" be detected
            frame.getObjects().forEach(markerObject => {
                if (markerObject instanceof FiducialMarker) {
                    markerObject.position = undefined;
                    const dictionaryName = this.mapping[(markerObject.dictionary as any).rdf.uri];
                    const detector = this.detectors.get(dictionaryName) ?? new AR.Detector({
                        dictionaryName: dictionaryName
                    });
                    // Create cache when it does not exist
                    if (!this.detectors.has(dictionaryName)) {
                        this.detectors.set(dictionaryName, detector);
                    }
                    if (!this.poseEstimators.has(markerObject.width)) {
                        this.poseEstimators.set(markerObject.width, new POS.Posit(markerObject.width, frame.image.width));
                    }
                }
            });
            this.detectors.forEach((detector, dictionaryName) => {
                // Detect the marker in the frame image
                const markers = detector.detect(frame.image);
                if (markers.length > 0) {
                    // Marker(s) detected, determine if they are the markers we are looking for
                    markers.forEach((marker: AR.Marker) => {
                        const markerObject = frame.getObjects().find(o => {
                            return o instanceof FiducialMarker && o.identifier === marker.id &&
                                this.mapping[(o.dictionary as any).rdf.uri] === dictionaryName;
                        }) as FiducialMarker;
                        // Only process markers with the correct identifier
                        if (markerObject && markerObject.identifier === marker.id) {
                            const posit = this.poseEstimators.get(markerObject.width);
                            const corners = marker.corners;
                            for (let i = 0; i < corners.length; ++ i){
                                const corner = corners[i];
                                corner.x = corner.x - (frame.image.width / 2);
                                corner.y = (frame.image.height / 2) - corner.y;
                            }
                            const pose = posit.pose(corners);
                            const translation = pose.bestTranslation;
                            const rotation = pose.bestRotation;
                            // Set the position of the marker
                            markerObject.setPosition(new Absolute3DPosition(translation[0], translation[1], -translation[2], LengthUnit.MILLIMETER));
                            // Set the orientation of the marker
                            markerObject.position.setOrientation(Orientation.fromEuler({
                               x: -Math.asin(-rotation[1][2]),
                               y: -Math.atan2(rotation[0][2], rotation[2][2]),
                               z:  Math.atan2(rotation[1][0], rotation[1][1])
                            }));
                        }
                    });
                }
            });
            resolve(frame);
        });
    }
}

Finally, our ThreeJSNode is a processing node that creates Three.js scene that includes virtual objects. These virtual objects are positioned relative to the markers that we have detected in our ArUcoProcessingNode.

typescript
export class ThreeJSNode extends ImageProcessingNode<any, any> {
    declare protected options: ThreeJSNodeOptions;
    protected canvas: HTMLCanvasElement;
    protected renderer: THREE.WebGLRenderer;
    protected camera: THREE.PerspectiveCamera;
    protected scene: THREE.Scene;
    constructor(options?: ThreeJSNodeOptions) {
        super(options);
        this.once('build', this._onBuild.bind(this));
    }
    private _onBuild(): void {
        // Prepare the canvas to draw
        this.canvas = this.options.canvas;
        this.renderer = new THREE.WebGLRenderer({ antialias: true, alpha: true, canvas: this.options.canvas });
        this.renderer.setClearColor(0xffffff, 1);
        this.camera = new THREE.PerspectiveCamera();
        this.scene = new THREE.Scene();
        this.scene.add(this.camera);
    }
    processImage(image: ImageData, frame: DataFrame): Promise<ImageData> {
        return new Promise((resolve) => {
            this.renderer.setSize(image.width, image.height);
            const cameraObject = frame.source as PerspectiveCameraObject;
            this.camera.fov = cameraObject.fov;
            this.camera.aspect = image.width / image.height;
            this.camera.near = 1;
            this.camera.far = cameraObject.far;
            
            this.scene = new THREE.Scene();
            this.scene.add(this.camera);
            this.scene.add(new THREE.AmbientLight(0xffffff, 1))
            // Loop through all markers
            frame.getObjects().forEach(marker => {
                // Only process detected markers (with a position)
                if (marker instanceof FiducialMarker && marker.position !== undefined) {
                    // Get all virtual objects in the data frame ...
                    const virtualObjects = frame.getObjects(VirtualObject).filter(obj => {
                        // ... that are positioned relative to this marker
                        return obj.getRelativePosition(marker.uid) !== undefined; 
                    });
                    virtualObjects.forEach(object => {
                        const position = (object.getRelativePosition(marker.uid, Relative3DPosition.name) as Relative3DPosition);
                        if (position) {
                            // Create a 3D mesh of the objects
                            const mesh = object.geometry.gltf.scene;
                            mesh.rotation.setFromRotationMatrix(marker.position.orientation.toRotationMatrix() as any);
                            mesh.position.set(...
                                marker.position.toVector3()
                                    .add(position.toVector3(LengthUnit.MILLIMETER)
                                        .applyQuaternion(marker.position.orientation))
                                    .toArray());
                            mesh.scale.x = marker.width;
                            mesh.scale.y = marker.height;
                            mesh.scale.z = (marker.width + marker.height) / 2.;
                            // ... and add it to the 3D scene
                            this.scene.add(mesh);
                        }
                    });
                }
            });
            this.scene.background = new THREE.Texture(image);
            this.scene.background.needsUpdate = true;
            // Render the scene with the virtual 3D objects positioned relative to the markers
            this.renderer.render(this.scene, this.camera);
            resolve(image);
        });
    }
}
export interface ThreeJSNodeOptions extends ImageProcessingOptions {
    canvas: HTMLCanvasElement;
}

Next: OpenHPS Protocol Buffers
Previous: SemBeacon, OpenHPS and the bridge between