scan.js 2.69 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131
'use strict'

var licenses = []
  .concat(require('spdx-license-ids'))
  .concat(require('spdx-license-ids/deprecated'))
var exceptions = require('spdx-exceptions')

module.exports = function (source) {
  var index = 0

  function hasMore () {
    return index < source.length
  }

  // `value` can be a regexp or a string.
  // If it is recognized, the matching source string is returned and
  // the index is incremented. Otherwise `undefined` is returned.
  function read (value) {
    if (value instanceof RegExp) {
      var chars = source.slice(index)
      var match = chars.match(value)
      if (match) {
        index += match[0].length
        return match[0]
      }
    } else {
      if (source.indexOf(value, index) === index) {
        index += value.length
        return value
      }
    }
  }

  function skipWhitespace () {
    read(/[ ]*/)
  }

  function operator () {
    var string
    var possibilities = ['WITH', 'AND', 'OR', '(', ')', ':', '+']
    for (var i = 0; i < possibilities.length; i++) {
      string = read(possibilities[i])
      if (string) {
        break
      }
    }

    if (string === '+' && index > 1 && source[index - 2] === ' ') {
      throw new Error('Space before `+`')
    }

    return string && {
      type: 'OPERATOR',
      string: string
    }
  }

  function idstring () {
    return read(/[A-Za-z0-9-.]+/)
  }

  function expectIdstring () {
    var string = idstring()
    if (!string) {
      throw new Error('Expected idstring at offset ' + index)
    }
    return string
  }

  function documentRef () {
    if (read('DocumentRef-')) {
      var string = expectIdstring()
      return {type: 'DOCUMENTREF', string: string}
    }
  }

  function licenseRef () {
    if (read('LicenseRef-')) {
      var string = expectIdstring()
      return {type: 'LICENSEREF', string: string}
    }
  }

  function identifier () {
    var begin = index
    var string = idstring()

    if (licenses.indexOf(string) !== -1) {
      return {
        type: 'LICENSE',
        string: string
      }
    } else if (exceptions.indexOf(string) !== -1) {
      return {
        type: 'EXCEPTION',
        string: string
      }
    }

    index = begin
  }

  // Tries to read the next token. Returns `undefined` if no token is
  // recognized.
  function parseToken () {
    // Ordering matters
    return (
      operator() ||
      documentRef() ||
      licenseRef() ||
      identifier()
    )
  }

  var tokens = []
  while (hasMore()) {
    skipWhitespace()
    if (!hasMore()) {
      break
    }

    var token = parseToken()
    if (!token) {
      throw new Error('Unexpected `' + source[index] +
                      '` at offset ' + index)
    }

    tokens.push(token)
  }
  return tokens
}