import mammoth from "https://cdn.jsdelivr.net/npm/mammoth@1.9.0/+esm"
export const take = [
  {
    type: "file",
    label: "Document",
    accept: ".doc,.docx,application/msword",
    read: "buffer",
  },
  { type: "toggle", label: "Strip excess backslashes", value: true },
  { type: "toggle", label: "Remove excess new lines", value: true },
  { type: "toggle", label: "Extract footnotes", value: true },
  { type: "toggle", label: "Remove internal links", value: true },
  { type: "toggle", label: "Remove anchor tags around URLs", value: true },
  { type: "toggle", label: "Embed images as base64 strings", value: false },
]
export const make = async ([
  arrayBuffer,
  stripSlashes,
  stripLines,
  extractNotes,
  stripInternalLinks,
  flattenLinks,
  base64img,
]) => {
  let value
  if (arrayBuffer) {
    const convertImage = base64img
      ? null
      : mammoth.images.imgElement((image) => ({
          src: `<!-- ${image.contentType} -->`,
        }))
    value = (await mammoth.convertToMarkdown({ arrayBuffer }, { convertImage }))
      .value
    if (stripSlashes) value = format.stripSlashes(value)
    if (stripLines) value = format.stripLines(value)
    if (extractNotes) value = format.extractNotes(value)
    if (stripInternalLinks) value = format.stripInternalLinks(value)
    if (flattenLinks) value = format.flattenLinks(value)
    return [{ type: "code", value }]
  }
}
const format = {
  stripSlashes: (value) => {
    // By default, Mammoth backslashes every ambiguous Markdown character
    return value.replace(/\\/g, "")
  },
  stripLines: (value) => {
    // Never allow more than two newlines in a row
    return value
      .replace(/\n +/g, "\n")
      .replace(/\n\n\s*/g, "\n\n")
      .trim()
  },
  extractNotes: (value) => {
    // Remove the default formatting around footnotes and endnotes, converting
    // them to use the semi-conventional [^N] syntax
    const notes = value.match(/\[\[\d+\]\]\(#\w+-\d+\)/g)
    notes.forEach((note) => {
      const index = note.slice(2, note.indexOf("]"))
      value = value
        .replace(note, `[^${index}]`)
        .replace(
          new RegExp(`${index}. <a id="\\w+-${index}"></a> `, "s"),
          `[^${index}]: `
        )
        .replace(new RegExp(`\\[↑\\]\\(#[\\w-]+-${index}\\)`), "")
    })
    return value
  },
  stripInternalLinks: (value) => {
    // Remove internal links (with a leading #) and their related, empty anchors
    return value
      .replace(/\[([^\]]+)\]\(#[^\)]+\)/g, "$1")
      .replace(/<a id="[^"]+"><\/a>/g, "")
  },
  flattenLinks: (value) => {
    // If the text and URL match, don't wrap them in Markdown syntax
    return value.replace(/\[([^\]]+)\]\(([^\)]+)\)/g, (_, a, b) => {
      return a === b ? a : _
    })
  },
}