{"version":3,"file":"sdf-parser.min.js","sources":["../node_modules/ensure-string/lib-esm/index.js","../node_modules/isutf8/dist/index.esm.js","../src/getEntriesBoundaries.js","../src/util/getMolecule.js","../src/iterator.browser.js","../src/parse.js"],"sourcesContent":["import isutf8 from 'isutf8';\n/**\n * Ensure that the data is string. If it is an ArrayBuffer it will be converted to string using TextDecoder.\n * @param blob\n * @param options\n * @returns\n */\nexport function ensureString(blob, options = {}) {\n    if (typeof blob === 'string') {\n        return blob;\n    }\n    if (ArrayBuffer.isView(blob) || blob instanceof ArrayBuffer) {\n        const { encoding = guessEncoding(blob) } = options;\n        const decoder = new TextDecoder(encoding);\n        return decoder.decode(blob);\n    }\n    throw new TypeError(`blob must be a string, ArrayBuffer or ArrayBufferView`);\n}\nfunction guessEncoding(blob) {\n    const uint8 = ArrayBuffer.isView(blob)\n        ? new Uint8Array(blob.buffer, blob.byteOffset, blob.byteLength)\n        : new Uint8Array(blob);\n    if (uint8.length >= 2) {\n        if (uint8[0] === 0xfe && uint8[1] === 0xff) {\n            return 'utf-16be';\n        }\n        if (uint8[0] === 0xff && uint8[1] === 0xfe) {\n            return 'utf-16le';\n        }\n    }\n    //@ts-expect-error an ArrayBuffer is also ok\n    if (!isutf8(blob))\n        return 'latin1';\n    return 'utf-8';\n}\n//# sourceMappingURL=index.js.map","/*\n    https://tools.ietf.org/html/rfc3629\n\n    UTF8-char = UTF8-1 / UTF8-2 / UTF8-3 / UTF8-4\n\n    UTF8-1    = %x00-7F\n\n    UTF8-2    = %xC2-DF UTF8-tail\n\n    UTF8-3    = %xE0 %xA0-BF UTF8-tail\n                %xE1-EC 2( UTF8-tail )\n                %xED %x80-9F UTF8-tail\n                %xEE-EF 2( UTF8-tail )\n\n    UTF8-4    = %xF0 %x90-BF 2( UTF8-tail )\n                %xF1-F3 3( UTF8-tail )\n                %xF4 %x80-8F 2( UTF8-tail )\n\n    UTF8-tail = %x80-BF\n*/\n/**\n * Check if a Node.js Buffer or Uint8Array is UTF-8.\n */\nfunction isUtf8(buf) {\n    if (!buf) {\n        return false;\n    }\n    var i = 0;\n    var len = buf.length;\n    while (i < len) {\n        // UTF8-1 = %x00-7F\n        if (buf[i] <= 0x7F) {\n            i++;\n            continue;\n        }\n        // UTF8-2 = %xC2-DF UTF8-tail\n        if (buf[i] >= 0xC2 && buf[i] <= 0xDF) {\n            // if(buf[i + 1] >= 0x80 && buf[i + 1] <= 0xBF) {\n            if (buf[i + 1] >> 6 === 2) {\n                i += 2;\n                continue;\n            }\n            else {\n                return false;\n            }\n        }\n        // UTF8-3 = %xE0 %xA0-BF UTF8-tail\n        // UTF8-3 = %xED %x80-9F UTF8-tail\n        if (((buf[i] === 0xE0 && buf[i + 1] >= 0xA0 && buf[i + 1] <= 0xBF) ||\n            (buf[i] === 0xED && buf[i + 1] >= 0x80 && buf[i + 1] <= 0x9F)) && buf[i + 2] >> 6 === 2) {\n            i += 3;\n            continue;\n        }\n        // UTF8-3 = %xE1-EC 2( UTF8-tail )\n        // UTF8-3 = %xEE-EF 2( UTF8-tail )\n        if (((buf[i] >= 0xE1 && buf[i] <= 0xEC) ||\n            (buf[i] >= 0xEE && buf[i] <= 0xEF)) &&\n            buf[i + 1] >> 6 === 2 &&\n            buf[i + 2] >> 6 === 2) {\n            i += 3;\n            continue;\n        }\n        // UTF8-4 = %xF0 %x90-BF 2( UTF8-tail )\n        //          %xF1-F3 3( UTF8-tail )\n        //          %xF4 %x80-8F 2( UTF8-tail )\n        if (((buf[i] === 0xF0 && buf[i + 1] >= 0x90 && buf[i + 1] <= 0xBF) ||\n            (buf[i] >= 0xF1 && buf[i] <= 0xF3 && buf[i + 1] >> 6 === 2) ||\n            (buf[i] === 0xF4 && buf[i + 1] >= 0x80 && buf[i + 1] <= 0x8F)) &&\n            buf[i + 2] >> 6 === 2 &&\n            buf[i + 3] >> 6 === 2) {\n            i += 4;\n            continue;\n        }\n        return false;\n    }\n    return true;\n}\n\nexport { isUtf8 as default };\n","export function getEntriesBoundaries(string, substring, eol) {\n  const res = [];\n  let previous = 0;\n  let next = 0;\n  while (next !== -1) {\n    next = string.indexOf(substring, previous);\n    if (next !== -1) {\n      res.push([previous, next]);\n      const nextMatch = string.indexOf(eol, next + substring.length);\n      if (nextMatch === -1) {\n        next = -1;\n      } else {\n        previous = nextMatch + eol.length;\n        next = previous;\n      }\n    } else {\n      res.push([previous, string.length]);\n    }\n  }\n  return res;\n}\n","export function getMolecule(sdfPart, labels, currentLabels, options) {\n  let parts = sdfPart.split(`${options.eol}>`);\n  if (parts.length === 0 || parts[0].length <= 5) return;\n  let molecule = {};\n  molecule.molfile = parts[0] + options.eol;\n  for (let j = 1; j < parts.length; j++) {\n    let lines = parts[j].split(options.eol);\n    let from = lines[0].indexOf('<');\n    let to = lines[0].indexOf('>');\n    let label = lines[0].substring(from + 1, to);\n    currentLabels.push(label);\n    if (!labels[label]) {\n      labels[label] = {\n        counter: 0,\n        isNumeric: options.dynamicTyping,\n        keep: false,\n      };\n      if (\n        (!options.exclude || options.exclude.indexOf(label) === -1) &&\n        (!options.include || options.include.indexOf(label) > -1)\n      ) {\n        labels[label].keep = true;\n        if (options.modifiers[label]) {\n          labels[label].modifier = options.modifiers[label];\n        }\n        if (options.forEach[label]) {\n          labels[label].forEach = options.forEach[label];\n        }\n      }\n    }\n    if (labels[label].keep) {\n      for (let k = 1; k < lines.length - 1; k++) {\n        if (molecule[label]) {\n          molecule[label] += options.eol + lines[k];\n        } else {\n          molecule[label] = lines[k];\n        }\n      }\n      if (labels[label].modifier) {\n        let modifiedValue = labels[label].modifier(molecule[label]);\n        if (modifiedValue === undefined || modifiedValue === null) {\n          delete molecule[label];\n        } else {\n          molecule[label] = modifiedValue;\n        }\n      }\n      if (labels[label].isNumeric) {\n        if (!isFinite(molecule[label]) || molecule[label].match(/^0[0-9]/)) {\n          labels[label].isNumeric = false;\n        }\n      }\n    }\n  }\n  return molecule;\n}\n","export function iterator() {\n  throw new Error('Iterator not implemented in the browser');\n}\n","import { ensureString } from 'ensure-string';\n\nimport { getEntriesBoundaries } from './getEntriesBoundaries';\nimport { getMolecule } from './util/getMolecule';\n/**\n *  Parse a SDF file\n * @param {string|ArrayBuffer|Uint8Array} sdf SDF file to parse\n * @param {object} [options={}]\n * @param {string[]} [options.include] List of fields to include\n * @param {string[]} [options.exclude] List of fields to exclude\n * @param {Function} [options.filter] Callback allowing to filter the molecules\n * @param {boolean} [options.dynamicTyping] Dynamically type the data\n * @param {object} [options.modifiers] Object containing callbacks to apply on some specific fields\n * @param {boolean} [options.mixedEOL=false] Set to true if you know there is a mixture between \\r\\n and \\n\n * @param {string} [options.eol] Specify the end of line character. Default will be the one found in the file\n */\nexport function parse(sdf, options = {}) {\n  options = { ...options };\n  if (options.modifiers === undefined) options.modifiers = {};\n  if (options.forEach === undefined) options.forEach = {};\n  if (options.dynamicTyping === undefined) options.dynamicTyping = true;\n\n  sdf = ensureString(sdf);\n  if (typeof sdf !== 'string') {\n    throw new TypeError('Parameter \"sdf\" must be a string');\n  }\n\n  if (options.eol === undefined) {\n    options.eol = '\\n';\n    if (options.mixedEOL) {\n      sdf = sdf.replace(/\\r\\n/g, '\\n');\n      sdf = sdf.replace(/\\r/g, '\\n');\n    } else {\n      // we will find the delimiter in order to be much faster and not use regular expression\n      let header = sdf.substr(0, 1000);\n      if (header.indexOf('\\r\\n') > -1) {\n        options.eol = '\\r\\n';\n      } else if (header.indexOf('\\r') > -1) {\n        options.eol = '\\r';\n      }\n    }\n  }\n\n  let entriesBoundaries = getEntriesBoundaries(\n    sdf,\n    `${options.eol}$$$$`,\n    options.eol,\n  );\n  let molecules = [];\n  let labels = {};\n\n  let start = Date.now();\n\n  for (let i = 0; i < entriesBoundaries.length; i++) {\n    let sdfPart = sdf.substring(...entriesBoundaries[i]);\n\n    let currentLabels = [];\n    const molecule = getMolecule(sdfPart, labels, currentLabels, options);\n    if (!molecule) continue;\n    if (!options.filter || options.filter(molecule)) {\n      molecules.push(molecule);\n      // only now we can increase the counter\n      for (let j = 0; j < currentLabels.length; j++) {\n        labels[currentLabels[j]].counter++;\n      }\n    }\n  }\n  // all numeric fields should be converted to numbers\n  for (let label in labels) {\n    let currentLabel = labels[label];\n    if (currentLabel.isNumeric) {\n      currentLabel.minValue = Infinity;\n      currentLabel.maxValue = -Infinity;\n      for (let j = 0; j < molecules.length; j++) {\n        if (molecules[j][label]) {\n          let value = parseFloat(molecules[j][label]);\n          molecules[j][label] = value;\n          if (value > currentLabel.maxValue) {\n            currentLabel.maxValue = value;\n          }\n          if (value < currentLabel.minValue) {\n            currentLabel.minValue = value;\n          }\n        }\n      }\n    }\n  }\n\n  // we check that a label is in all the records\n  for (let key in labels) {\n    if (labels[key].counter === molecules.length) {\n      labels[key].always = true;\n    } else {\n      labels[key].always = false;\n    }\n  }\n\n  let statistics = [];\n  for (let key in labels) {\n    let statistic = labels[key];\n    statistic.label = key;\n    statistics.push(statistic);\n  }\n\n  return {\n    time: Date.now() - start,\n    molecules,\n    labels: Object.keys(labels),\n    statistics,\n  };\n}\n"],"names":["ensureString","blob","options","ArrayBuffer","isView","encoding","guessEncoding","TextDecoder","decode","TypeError","uint8","Uint8Array","buffer","byteOffset","byteLength","length","buf","i","len","isutf8","getEntriesBoundaries","string","substring","eol","res","previous","next","indexOf","push","nextMatch","getMolecule","sdfPart","labels","currentLabels","parts","split","molecule","molfile","j","lines","from","to","label","counter","isNumeric","dynamicTyping","keep","exclude","include","modifiers","modifier","forEach","k","modifiedValue","isFinite","match","Error","sdf","arguments","undefined","mixedEOL","replace","header","substr","entriesBoundaries","molecules","start","Date","now","filter","currentLabel","minValue","Infinity","maxValue","value","parseFloat","key","always","statistics","statistic","time","Object","keys"],"mappings":"iPAiBM,SAAUA,EACdC,GACiC,IAAjCC,yDAA+B,CAAA,EAE/B,GAAoB,iBAATD,EACT,OAAOA,EAET,GAAIE,YAAYC,OAAOH,IAASA,aAAgBE,YAAa,CAC3D,MAAME,SAAEA,EAAWC,EAAcL,IAAUC,EAE3C,OADgB,IAAIK,YAAYF,GACjBG,OAAOP,GAExB,MAAM,IAAIQ,UAAU,wDACtB,CAEA,SAASH,EAAcL,GACrB,MAAMS,EAAQP,YAAYC,OAAOH,GAC7B,IAAIU,WAAWV,EAAKW,OAAQX,EAAKY,WAAYZ,EAAKa,YAClD,IAAIH,WAAWV,GACnB,GAAIS,EAAMK,QAAU,EAAG,CACrB,GAAiB,MAAbL,EAAM,IAA4B,MAAbA,EAAM,GAC7B,MAAO,WAET,GAAiB,MAAbA,EAAM,IAA4B,MAAbA,EAAM,GAC7B,MAAO,WAIX,OCtBF,SAAgBM,GACZ,IAAKA,EACD,OAAO,EAIX,IAFA,IAAIC,EAAI,EACJC,EAAMF,EAAID,OACPE,EAAIC,GAEP,GAAIF,EAAIC,IAAM,IACVA,QADJ,CAKA,GAAID,EAAIC,IAAM,KAAQD,EAAIC,IAAM,IAAM,CAElC,GAAID,EAAIC,EAAI,IAAM,GAAM,EAAG,CACvBA,GAAK,EACL,QACJ,CAEI,OAAO,CAEf,CAGA,IAAiB,MAAXD,EAAIC,IAAeD,EAAIC,EAAI,IAAM,KAAQD,EAAIC,EAAI,IAAM,KAC7C,MAAXD,EAAIC,IAAeD,EAAIC,EAAI,IAAM,KAAQD,EAAIC,EAAI,IAAM,MAAUD,EAAIC,EAAI,IAAM,GAAM,EACtFA,GAAK,OAKT,IAAMD,EAAIC,IAAM,KAAQD,EAAIC,IAAM,KAC7BD,EAAIC,IAAM,KAAQD,EAAIC,IAAM,MAC7BD,EAAIC,EAAI,IAAM,GAAM,GACpBD,EAAIC,EAAI,IAAM,GAAM,EACpBA,GAAK,MAJT,CAUA,KAAiB,MAAXD,EAAIC,IAAeD,EAAIC,EAAI,IAAM,KAAQD,EAAIC,EAAI,IAAM,KACxDD,EAAIC,IAAM,KAAQD,EAAIC,IAAM,KAAQD,EAAIC,EAAI,IAAM,GAAM,GAC7C,MAAXD,EAAIC,IAAeD,EAAIC,EAAI,IAAM,KAAQD,EAAIC,EAAI,IAAM,MACxDD,EAAIC,EAAI,IAAM,GAAM,GACpBD,EAAIC,EAAI,IAAM,GAAM,EAIxB,OAAO,EAHHA,GAAK,CATT,CA3BA,CAyCJ,OAAO,CACX,CD/BOE,CAAOlB,GAEL,QAFmB,QAG5B,CEhDO,SAASmB,EAAqBC,EAAQC,EAAWC,GACtD,MAAMC,EAAM,GACZ,IAAIC,EAAW,EACXC,EAAO,EACX,MAAiB,IAAVA,GAEL,GADAA,EAAOL,EAAOM,QAAQL,EAAWG,IACnB,IAAVC,EAAa,CACfF,EAAII,KAAK,CAACH,EAAUC,IACpB,MAAMG,EAAYR,EAAOM,QAAQJ,EAAKG,EAAOJ,EAAUP,SACpC,IAAfc,EACFH,GAAQ,GAERD,EAAWI,EAAYN,EAAIR,OAC3BW,EAAOD,EAEX,MACED,EAAII,KAAK,CAACH,EAAUJ,EAAON,SAG/B,OAAOS,CACT,CCpBO,SAASM,EAAYC,EAASC,EAAQC,EAAe/B,GAC1D,IAAIgC,EAAQH,EAAQI,MAAO,GAAEjC,EAAQqB,QACrC,GAAqB,IAAjBW,EAAMnB,QAAgBmB,EAAM,GAAGnB,QAAU,EAAG,OAChD,IAAIqB,EAAW,CAAA,EACfA,EAASC,QAAUH,EAAM,GAAKhC,EAAQqB,IACtC,IAAK,IAAIe,EAAI,EAAGA,EAAIJ,EAAMnB,OAAQuB,IAAK,CACrC,IAAIC,EAAQL,EAAMI,GAAGH,MAAMjC,EAAQqB,KAC/BiB,EAAOD,EAAM,GAAGZ,QAAQ,KACxBc,EAAKF,EAAM,GAAGZ,QAAQ,KACtBe,EAAQH,EAAM,GAAGjB,UAAUkB,EAAO,EAAGC,GAqBzC,GApBAR,EAAcL,KAAKc,GACdV,EAAOU,KACVV,EAAOU,GAAS,CACdC,QAAS,EACTC,UAAW1C,EAAQ2C,cACnBC,MAAM,GAGJ5C,EAAQ6C,UAA+C,IAApC7C,EAAQ6C,QAAQpB,QAAQe,IAC3CxC,EAAQ8C,WAAW9C,EAAQ8C,QAAQrB,QAAQe,IAAU,KAEvDV,EAAOU,GAAOI,MAAO,EACjB5C,EAAQ+C,UAAUP,KACpBV,EAAOU,GAAOQ,SAAWhD,EAAQ+C,UAAUP,IAEzCxC,EAAQiD,QAAQT,KAClBV,EAAOU,GAAOS,QAAUjD,EAAQiD,QAAQT,MAI1CV,EAAOU,GAAOI,KAAM,CACtB,IAAK,IAAIM,EAAI,EAAGA,EAAIb,EAAMxB,OAAS,EAAGqC,IAChChB,EAASM,GACXN,EAASM,IAAUxC,EAAQqB,IAAMgB,EAAMa,GAEvChB,EAASM,GAASH,EAAMa,GAG5B,GAAIpB,EAAOU,GAAOQ,SAAU,CAC1B,IAAIG,EAAgBrB,EAAOU,GAAOQ,SAASd,EAASM,IAChDW,eACKjB,EAASM,GAEhBN,EAASM,GAASW,CAEtB,CACIrB,EAAOU,GAAOE,YACXU,SAASlB,EAASM,MAAWN,EAASM,GAAOa,MAAM,aACtDvB,EAAOU,GAAOE,WAAY,GAGhC,CACF,CACA,OAAOR,CACT,YCtDO,WACL,MAAM,IAAIoB,MAAM,0CAClB,UCcO,SAAeC,GAAmB,IAAdvD,EAAUwD,UAAA3C,OAAA,QAAA4C,IAAAD,UAAA,GAAAA,UAAA,GAAA,CAAA,EAOnC,GANAxD,EAAU,IAAKA,QACWyD,IAAtBzD,EAAQ+C,YAAyB/C,EAAQ+C,UAAY,CAAA,QACjCU,IAApBzD,EAAQiD,UAAuBjD,EAAQiD,QAAU,CAAA,QACvBQ,IAA1BzD,EAAQ2C,gBAA6B3C,EAAQ2C,eAAgB,GAG9C,iBADnBY,EAAMzD,EAAayD,IAEjB,MAAM,IAAIhD,UAAU,oCAGtB,QAAoBkD,IAAhBzD,EAAQqB,IAEV,GADArB,EAAQqB,IAAM,KACVrB,EAAQ0D,SAEVH,GADAA,EAAMA,EAAII,QAAQ,QAAS,OACjBA,QAAQ,MAAO,UACpB,CAEL,IAAIC,EAASL,EAAIM,OAAO,EAAG,KACvBD,EAAOnC,QAAQ,SAAW,EAC5BzB,EAAQqB,IAAM,OACLuC,EAAOnC,QAAQ,OAAS,IACjCzB,EAAQqB,IAAM,KAElB,CAGF,IAAIyC,EAAoB5C,EACtBqC,EACC,GAAEvD,EAAQqB,UACXrB,EAAQqB,KAEN0C,EAAY,GACZjC,EAAS,CAAA,EAETkC,EAAQC,KAAKC,MAEjB,IAAK,IAAInD,EAAI,EAAGA,EAAI+C,EAAkBjD,OAAQE,IAAK,CACjD,IAEIgB,EAAgB,GACpB,MAAMG,EAAWN,EAHH2B,EAAInC,aAAa0C,EAAkB/C,IAGXe,EAAQC,EAAe/B,GAC7D,GAAKkC,KACAlC,EAAQmE,QAAUnE,EAAQmE,OAAOjC,IAAW,CAC/C6B,EAAUrC,KAAKQ,GAEf,IAAK,IAAIE,EAAI,EAAGA,EAAIL,EAAclB,OAAQuB,IACxCN,EAAOC,EAAcK,IAAIK,SAE7B,CACF,CAEA,IAAK,IAAID,KAASV,EAAQ,CACxB,IAAIsC,EAAetC,EAAOU,GAC1B,GAAI4B,EAAa1B,UAAW,CAC1B0B,EAAaC,SAAWC,IACxBF,EAAaG,UAAYD,IACzB,IAAK,IAAIlC,EAAI,EAAGA,EAAI2B,EAAUlD,OAAQuB,IACpC,GAAI2B,EAAU3B,GAAGI,GAAQ,CACvB,IAAIgC,EAAQC,WAAWV,EAAU3B,GAAGI,IACpCuB,EAAU3B,GAAGI,GAASgC,EAClBA,EAAQJ,EAAaG,WACvBH,EAAaG,SAAWC,GAEtBA,EAAQJ,EAAaC,WACvBD,EAAaC,SAAWG,EAE5B,CAEJ,CACF,CAGA,IAAK,IAAIE,KAAO5C,EACVA,EAAO4C,GAAKjC,UAAYsB,EAAUlD,OACpCiB,EAAO4C,GAAKC,QAAS,EAErB7C,EAAO4C,GAAKC,QAAS,EAIzB,IAAIC,EAAa,GACjB,IAAK,IAAIF,KAAO5C,EAAQ,CACtB,IAAI+C,EAAY/C,EAAO4C,GACvBG,EAAUrC,MAAQkC,EAClBE,EAAWlD,KAAKmD,EAClB,CAEA,MAAO,CACLC,KAAMb,KAAKC,MAAQF,EACnBD,YACAjC,OAAQiD,OAAOC,KAAKlD,GACpB8C,aAEJ"}