{"version":3,"file":"sdf-parser.min.js","sources":["../node_modules/ensure-string/lib-esm/index.js","../src/getEntriesBoundaries.js","../src/parse.js"],"sourcesContent":["/**\n * Ensure that the data is string. If it is an ArrayBuffer it will be converted to string using TextDecoder.\n * @param blob\n * @param options\n * @returns\n */\nexport function ensureString(blob, options = {}) {\n    if (typeof blob === 'string') {\n        return blob;\n    }\n    if (ArrayBuffer.isView(blob) || blob instanceof ArrayBuffer) {\n        const { encoding = guessEncoding(blob) } = options;\n        const decoder = new TextDecoder(encoding);\n        return decoder.decode(blob);\n    }\n    throw new TypeError(`blob must be a string, ArrayBuffer or ArrayBufferView`);\n}\nfunction guessEncoding(blob) {\n    const uint8 = ArrayBuffer.isView(blob)\n        ? new Uint8Array(blob.buffer, blob.byteOffset, blob.byteLength)\n        : new Uint8Array(blob);\n    if (uint8.length >= 2) {\n        if (uint8[0] === 0xfe && uint8[1] === 0xff) {\n            return 'utf-16be';\n        }\n        if (uint8[0] === 0xff && uint8[1] === 0xfe) {\n            return 'utf-16le';\n        }\n    }\n    return 'utf-8';\n}\n//# sourceMappingURL=index.js.map","export function getEntriesBoundaries(string, substring, eol) {\n  const res = [];\n  let previous = 0;\n  let next = 0;\n  while (next !== -1) {\n    next = string.indexOf(substring, previous);\n    if (next !== -1) {\n      res.push([previous, next]);\n      previous = next =\n        string.indexOf(eol, next + substring.length) + eol.length;\n    } else {\n      res.push([previous, string.length]);\n    }\n  }\n  return res;\n}\n","import { ensureString } from 'ensure-string';\n\nimport { getEntriesBoundaries } from './getEntriesBoundaries';\n/**\n *  Parse a SDF file\n * @param {string|ArrayBuffer|Uint8Array} sdf SDF file to parse\n * @param {any} [options={}]\n * @param {array<string>} [options.include] List of fields to include\n * @param {array<string>} [options.exclude] List of fields to exclude\n * @param {boolean} [options.dynamicTyping] Dynamically type the data\n * @param {object} [options.modifiers] Object containing callbacks to apply on some specific fields\n * @param {boolean} [options.mixedEOL=false] Set to true if you know there is a mixture between \\r\\n and \\n\n */\nexport function parse(sdf, options = {}) {\n  const {\n    include,\n    exclude,\n    filter,\n    modifiers = {},\n    forEach = {},\n    dynamicTyping = true,\n  } = options;\n\n  sdf = ensureString(sdf);\n  if (typeof sdf !== 'string') {\n    throw new TypeError('Parameter \"sdf\" must be a string');\n  }\n\n  let eol = '\\n';\n  if (options.mixedEOL) {\n    sdf = sdf.replace(/\\r\\n/g, '\\n');\n    sdf = sdf.replace(/\\r/g, '\\n');\n  } else {\n    // we will find the delimiter in order to be much faster and not use regular expression\n    let header = sdf.substr(0, 1000);\n    if (header.indexOf('\\r\\n') > -1) {\n      eol = '\\r\\n';\n    } else if (header.indexOf('\\r') > -1) {\n      eol = '\\r';\n    }\n  }\n\n  let entriesBoundaries = getEntriesBoundaries(sdf, `${eol}$$$$`, eol);\n  let molecules = [];\n  let labels = {};\n\n  let start = Date.now();\n\n  for (let i = 0; i < entriesBoundaries.length; i++) {\n    let sdfPart = sdf.substring(...entriesBoundaries[i]);\n    let parts = sdfPart.split(`${eol}>`);\n    if (parts.length > 0 && parts[0].length > 5) {\n      let molecule = {};\n      let currentLabels = [];\n      molecule.molfile = parts[0] + eol;\n      for (let j = 1; j < parts.length; j++) {\n        let lines = parts[j].split(eol);\n        let from = lines[0].indexOf('<');\n        let to = lines[0].indexOf('>');\n        let label = lines[0].substring(from + 1, to);\n        currentLabels.push(label);\n        if (!labels[label]) {\n          labels[label] = {\n            counter: 0,\n            isNumeric: dynamicTyping,\n            keep: false,\n          };\n          if (\n            (!exclude || exclude.indexOf(label) === -1) &&\n            (!include || include.indexOf(label) > -1)\n          ) {\n            labels[label].keep = true;\n            if (modifiers[label]) {\n              labels[label].modifier = modifiers[label];\n            }\n            if (forEach[label]) {\n              labels[label].forEach = forEach[label];\n            }\n          }\n        }\n        if (labels[label].keep) {\n          for (let k = 1; k < lines.length - 1; k++) {\n            if (molecule[label]) {\n              molecule[label] += eol + lines[k];\n            } else {\n              molecule[label] = lines[k];\n            }\n          }\n          if (labels[label].modifier) {\n            let modifiedValue = labels[label].modifier(molecule[label]);\n            if (modifiedValue === undefined || modifiedValue === null) {\n              delete molecule[label];\n            } else {\n              molecule[label] = modifiedValue;\n            }\n          }\n          if (labels[label].isNumeric) {\n            if (\n              !isFinite(molecule[label]) ||\n              molecule[label].match(/^0[0-9]/)\n            ) {\n              labels[label].isNumeric = false;\n            }\n          }\n        }\n      }\n      if (!filter || filter(molecule)) {\n        molecules.push(molecule);\n        // only now we can increase the counter\n        for (let j = 0; j < currentLabels.length; j++) {\n          labels[currentLabels[j]].counter++;\n        }\n      }\n    }\n  }\n\n  // all numeric fields should be converted to numbers\n  for (let label in labels) {\n    let currentLabel = labels[label];\n    if (currentLabel.isNumeric) {\n      currentLabel.minValue = Infinity;\n      currentLabel.maxValue = -Infinity;\n      for (let j = 0; j < molecules.length; j++) {\n        if (molecules[j][label]) {\n          let value = parseFloat(molecules[j][label]);\n          molecules[j][label] = value;\n          if (value > currentLabel.maxValue) {\n            currentLabel.maxValue = value;\n          }\n          if (value < currentLabel.minValue) {\n            currentLabel.minValue = value;\n          }\n        }\n      }\n    }\n  }\n\n  // we check that a label is in all the records\n  for (let key in labels) {\n    if (labels[key].counter === molecules.length) {\n      labels[key].always = true;\n    } else {\n      labels[key].always = false;\n    }\n  }\n\n  let statistics = [];\n  for (let key in labels) {\n    let statistic = labels[key];\n    statistic.label = key;\n    statistics.push(statistic);\n  }\n\n  return {\n    time: Date.now() - start,\n    molecules: molecules,\n    labels: Object.keys(labels),\n    statistics: statistics,\n  };\n}\n"],"names":["ensureString","blob","options","ArrayBuffer","isView","encoding","guessEncoding","TextDecoder","decode","TypeError","uint8","Uint8Array","buffer","byteOffset","byteLength","length","getEntriesBoundaries","string","substring","eol","res","previous","next","indexOf","push","sdf","include","exclude","filter","modifiers","forEach","dynamicTyping","mixedEOL","replace","header","substr","entriesBoundaries","molecules","labels","start","Date","now","i","sdfPart","parts","split","molecule","currentLabels","molfile","j","lines","from","to","label","counter","isNumeric","keep","modifier","k","modifiedValue","isFinite","match","currentLabel","minValue","Infinity","maxValue","value","parseFloat","key","always","statistics","statistic","time","Object","keys"],"mappings":"iPAaM,SAAUA,EACdC,GACAC,IAAAA,yDAA+B,GAE/B,GAAoB,iBAATD,EACT,OAAOA,EAELE,GAAAA,YAAYC,OAAOH,IAASA,aAAgBE,YAAa,CACrD,MAAAE,SAAUA,EAAGC,EAAcL,IAAUC,EAE3C,OADgB,IAAIK,YAAYF,GACjBG,OAAOP,GAExB,MAAM,IAAIQ,UAAU,yDAGtB,SAASH,EAAcL,GACrB,MAAMS,EAAQP,YAAYC,OAAOH,GAC7B,IAAIU,WAAWV,EAAKW,OAAQX,EAAKY,WAAYZ,EAAKa,YAClD,IAAIH,WAAWV,GACnB,GAAIS,EAAMK,QAAU,EAAG,CACrB,GAAiB,MAAbL,EAAM,IAA4B,MAAbA,EAAM,GAC7B,MAAO,WAET,GAAiB,MAAbA,EAAM,IAA4B,MAAbA,EAAM,GAC7B,MAAO,WAGX,MAAO,QCxCF,SAASM,EAAqBC,EAAQC,EAAWC,GAChDC,MAAAA,EAAM,GACRC,IAAAA,EAAW,EACXC,EAAO,EACX,MAAiB,IAAVA,GACLA,EAAOL,EAAOM,QAAQL,EAAWG,IACnB,IAAVC,GACFF,EAAII,KAAK,CAACH,EAAUC,IACpBD,EAAWC,EACTL,EAAOM,QAAQJ,EAAKG,EAAOJ,EAAUH,QAAUI,EAAIJ,QAErDK,EAAII,KAAK,CAACH,EAAUJ,EAAOF,SAG/B,OAAOK,UCDF,SAAeK,GAAKvB,IAAAA,yDAAU,GAC7B,MAAAwB,QACJA,EADIC,QAEJA,EAFIC,OAGJA,EAHIC,UAIJA,EAAY,GAJRC,QAKJA,EAAU,GALNC,cAMJA,GAAgB,GACd7B,EAGJ,GAAmB,iBADnBuB,EAAMzB,EAAayB,IAEjB,MAAM,IAAIhB,UAAU,oCAGlBU,IAAAA,EAAM,KACNjB,GAAAA,EAAQ8B,SAEVP,GADAA,EAAMA,EAAIQ,QAAQ,QAAS,OACjBA,QAAQ,MAAO,UACpB,CAEDC,IAAAA,EAAST,EAAIU,OAAO,EAAG,KACvBD,EAAOX,QAAQ,SAAW,EAC5BJ,EAAM,OACGe,EAAOX,QAAQ,OAAS,IACjCJ,EAAM,MAINiB,IAAAA,EAAoBpB,EAAqBS,EAAM,GAAEN,QAAWA,GAC5DkB,EAAY,GACZC,EAAS,GAETC,EAAQC,KAAKC,MAEjB,IAAK,IAAIC,EAAI,EAAGA,EAAIN,EAAkBrB,OAAQ2B,IAAK,CAC7CC,IACAC,EADUnB,EAAIP,aAAakB,EAAkBM,IAC7BG,MAAO,GAAE1B,MAC7B,GAAIyB,EAAM7B,OAAS,GAAK6B,EAAM,GAAG7B,OAAS,EAAG,CACvC+B,IAAAA,EAAW,GACXC,EAAgB,GACpBD,EAASE,QAAUJ,EAAM,GAAKzB,EAC9B,IAAK,IAAI8B,EAAI,EAAGA,EAAIL,EAAM7B,OAAQkC,IAAK,CACjCC,IAAAA,EAAQN,EAAMK,GAAGJ,MAAM1B,GACvBgC,EAAOD,EAAM,GAAG3B,QAAQ,KACxB6B,EAAKF,EAAM,GAAG3B,QAAQ,KACtB8B,EAAQH,EAAM,GAAGhC,UAAUiC,EAAO,EAAGC,GAqBzC,GApBAL,EAAcvB,KAAK6B,GACdf,EAAOe,KACVf,EAAOe,GAAS,CACdC,QAAS,EACTC,UAAWxB,EACXyB,MAAM,GAGJ7B,IAAuC,IAA5BA,EAAQJ,QAAQ8B,IAC3B3B,KAAWA,EAAQH,QAAQ8B,IAAU,KAEvCf,EAAOe,GAAOG,MAAO,EACjB3B,EAAUwB,KACZf,EAAOe,GAAOI,SAAW5B,EAAUwB,IAEjCvB,EAAQuB,KACVf,EAAOe,GAAOvB,QAAUA,EAAQuB,MAIlCf,EAAOe,GAAOG,KAAM,CACtB,IAAK,IAAIE,EAAI,EAAGA,EAAIR,EAAMnC,OAAS,EAAG2C,IAChCZ,EAASO,GACXP,EAASO,IAAUlC,EAAM+B,EAAMQ,GAE/BZ,EAASO,GAASH,EAAMQ,GAG5B,GAAIpB,EAAOe,GAAOI,SAAU,CAC1B,IAAIE,EAAgBrB,EAAOe,GAAOI,SAASX,EAASO,IAChDM,MAAAA,SACKb,EAASO,GAEhBP,EAASO,GAASM,EAGlBrB,EAAOe,GAAOE,YAEbK,SAASd,EAASO,MACnBP,EAASO,GAAOQ,MAAM,aAEtBvB,EAAOe,GAAOE,WAAY,KAKlC,IAAK3B,GAAUA,EAAOkB,GAAW,CAC/BT,EAAUb,KAAKsB,GAEf,IAAK,IAAIG,EAAI,EAAGA,EAAIF,EAAchC,OAAQkC,IACxCX,EAAOS,EAAcE,IAAIK,YAOjC,IAAK,IAAID,KAASf,EAAQ,CACxB,IAAIwB,EAAexB,EAAOe,GACtBS,GAAAA,EAAaP,UAAW,CAC1BO,EAAaC,SAAWC,EAAAA,EACxBF,EAAaG,UAAYD,EAAAA,EACzB,IAAK,IAAIf,EAAI,EAAGA,EAAIZ,EAAUtB,OAAQkC,IACpC,GAAIZ,EAAUY,GAAGI,GAAQ,CACnBa,IAAAA,EAAQC,WAAW9B,EAAUY,GAAGI,IACpChB,EAAUY,GAAGI,GAASa,EAClBA,EAAQJ,EAAaG,WACvBH,EAAaG,SAAWC,GAEtBA,EAAQJ,EAAaC,WACvBD,EAAaC,SAAWG,KAQlC,IAAK,IAAIE,KAAO9B,EACVA,EAAO8B,GAAKd,UAAYjB,EAAUtB,OACpCuB,EAAO8B,GAAKC,QAAS,EAErB/B,EAAO8B,GAAKC,QAAS,EAIrBC,IAAAA,EAAa,GACjB,IAAK,IAAIF,KAAO9B,EAAQ,CACtB,IAAIiC,EAAYjC,EAAO8B,GACvBG,EAAUlB,MAAQe,EAClBE,EAAW9C,KAAK+C,GAGX,MAAA,CACLC,KAAMhC,KAAKC,MAAQF,EACnBF,UAAWA,EACXC,OAAQmC,OAAOC,KAAKpC,GACpBgC,WAAYA"}