{
  "name": "CLAMS NFA Wrapper",
  "description": "Wraps the [NVIDIA NeMo Forced Aligner tool](https://docs.nvidia.com/nemo-framework/user-guide/latest/nemotoolkit/tools/nemo_forced_aligner.html) to temporally align transcribed text with its audio source. ",
  "app_version": "v1.0",
  "mmif_version": "1.2.0",
  "analyzer_version": "2.7.3",
  "app_license": "Apache 2.0",
  "analyzer_license": "Apache 2.0",
  "identifier": "http://apps.clams.ai/nfa-wrapper/v1.0",
  "url": "https://github.com/clamsproject/app-nfa-wrapper",
  "input": [
    [
      {
        "@type": "http://clams.ai/vocabulary/type/AudioDocument/v2",
        "required": true
      },
      {
        "@type": "http://clams.ai/vocabulary/type/VideoDocument/v2",
        "required": true
      }
    ],
    {
      "@type": "http://clams.ai/vocabulary/type/TextDocument/v2",
      "description": "Text content transcribed from audio input with no existing annotations.",
      "required": true
    }
  ],
  "output": [
    {
      "@type": "http://clams.ai/vocabulary/type/Token/v1",
      "description": "Token from original text split on whitespace. `text` property stores the string value of the token. `start` and `end` properties indicate position of token in entire text. `document` property identifies source text document."
    },
    {
      "@type": "http://clams.ai/vocabulary/type/TimeFrame/v6",
      "description": "TimeFrame annotation representing the source audio segment corresponding to a given transcribed token, with `start` and `end` times given in milliseconds.",
      "properties": {
        "frameType": "speech",
        "timeUnit": "milliseconds"
      }
    },
    {
      "@type": "http://clams.ai/vocabulary/type/Alignment/v1",
      "description": "Alignment between `Token` and `TimeFrame` annotations."
    }
  ],
  "parameters": [
    {
      "name": "model",
      "description": "NeMo ASR model to use. Choices: fc_hybrid, parakeet, conformer, fc_ctc. By default, the fc_hybrid model will be used.",
      "type": "string",
      "choices": [
        "fc_hybrid",
        "parakeet",
        "conformer",
        "fc_ctc"
      ],
      "default": "fc_hybrid",
      "multivalued": false
    },
    {
      "name": "pretty",
      "description": "The JSON body of the HTTP response will be re-formatted with 2-space indentation",
      "type": "boolean",
      "default": false,
      "multivalued": false
    },
    {
      "name": "runningTime",
      "description": "The running time of the app will be recorded in the view metadata",
      "type": "boolean",
      "default": true,
      "multivalued": false
    },
    {
      "name": "hwFetch",
      "description": "The hardware information (architecture, GPU and vRAM) will be recorded in the view metadata",
      "type": "boolean",
      "default": false,
      "multivalued": false
    },
    {
      "name": "tfSamplingMode",
      "description": "Sampling mode for TimeFrame annotations. Has no effect when the app does not process TimeFrames. \"representatives\" uses all representative timepoints if present, otherwise skips the TimeFrame. \"single\" uses the middle representative if present, otherwise extracts an image from the midpoint of the start/end interval (midpoint is calculated by floor division of the sum of start and end). \"all\" uses all target timepoints if present, otherwise extracts all images from the time interval.",
      "type": "string",
      "choices": [
        "representatives",
        "single",
        "all"
      ],
      "default": "representatives",
      "multivalued": false
    }
  ]
}