Source: middleware/datasetTaskList.middleware.js

/**
 * @module middleware-dataset-tasklist
 *
 * @description Middleware responsible for assembling and presenting a dataset's task list.
 *
 * The notion of "tasks" used here means some actions that an LPA needs to take to improve
 * the quality of the data.
 *
 * A task is added to the list when:
 * - data ingestion pipeline has problems accessing the endpoint URL (this happens outside of this application)
 * - data ingestion pipeline found issues with the data (again, happnes outside of this application,
 *   but we can query the results in datasette)
 * - an 'expectation' failed (happens outside this application, but we can query the results)
 */

import {
  addEntityCountsToResources,
  expectationFetcher,
  expectations,
  fetchDatasetInfo,
  fetchEntityCount,
  fetchEntityIssueCounts,
  fetchEntryIssueCounts,
  fetchOrgInfo, fetchResources, fetchSources,
  logPageError,
  noop,
  validateOrgAndDatasetQueryParams
} from './common.middleware.js'
import { fetchOne, renderTemplate } from './middleware.builders.js'
import performanceDbApi from '../services/performanceDbApi.js'
import { statusToTagClass } from '../filters/filters.js'
import '../types/datasette.js'
import logger from '../utils/logger.js'
import { types } from '../utils/logging.js'
import { isFeatureEnabled } from '../utils/features.js'
import config from '../../config/index.js'
import pluralize from 'pluralize'

/**
 * Fetches the resource status
 */
export const fetchResourceStatus = fetchOne({
  query: ({ params }) => performanceDbApi.resourceStatusQuery(params.lpa, params.dataset),
  result: 'resourceStatus'
})

const fetchOutOfBoundsExpectations = expectationFetcher({
  expectation: expectations.entitiesOutOfBounds,
  includeDetails: true,
  result: 'expectationOutOfBounds'
})

/**
 * Returns a status tag object with a text label and a CSS class based on the status.
 *
 * @param {string} status - The status to generate a tag for (e.g. "Error", "Needs fixing", etc.)
 * @returns {object} - An object with a `tag` property containing the text label and CSS class.
 */
function getStatusTag (status) {
  return {
    tag: {
      text: status,
      classes: statusToTagClass(status)
    }
  }
}

const SPECIAL_ISSUE_TYPES = ['reference values are not unique']

/**
 * Returns a task message for failed entity out of bounds expectation.
 * @param {string} dataset dataset slug
 * @param {number} count how many entities out of bounds were found
 * @returns {string} task message
 */
export function entityOutOfBoundsMessage (dataset, count) {
  const displayNameConfig = config.datasetsConfig[dataset]?.entityDisplayName ?? { variable: 'entity', base: '' }
  // if count is missing for some reason, we don't display it and default to plural form
  const displayName = `${displayNameConfig.base ?? ''} ${pluralize(displayNameConfig.variable, count ?? 2, false)}`.trim()
  return `You have ${count ?? ''} ${displayName} outside of your boundary`.replace(/ {2}/, ' ')
}

/**
 * Generates a list of tasks based on the issues found in the dataset.
 *
 * @param {Object} req The request object. It should contain the following properties:
 * @param {Object} req.parsedParams An object containing the parameters of the request
 * @param {string} req.parsedParams.lpa The LPA (Local Planning Authority) associated with the request.
 * @param {string} req.parsedParams.dataset The name of the dataset associated with the request.
 * @param {Object} req.entityCount total entity count under `count` field
 * @param {Object[]} req.resources: An array of resource objects.
 * @param {Object[]} req.sources: An array of source objects.
 * @param {Object} req.entryIssueCounts: An object containing the issue counts for the entries in the dataset.
 * @param {Object} req.entityIssueCounts: An object containing the issue counts for the entities in the dataset.
 * @param {Object[]} [req.expectationOutOfBounds]
 * @param {string} req.expectationOutOfBounds[].dataset
 * @param {boolean} req.expectationOutOfBounds[].passed did the exepectation pass
 * @param {number} req.expectationOutOfBounds[].expected
 * @param {number} req.expectationOutOfBounds[].actual
 * @param {Object} req.taskList OUT value
 * @param {Object} res - The response object.
 * @param {Function} next - The next middleware function.
 * @returns {undefined}
 */
export const prepareTasks = (req, res, next) => {
  const { lpa, dataset } = req.parsedParams
  const { entityCount, resources, sources } = req
  const { entryIssueCounts, entityIssueCounts, expectationOutOfBounds = [] } = req

  let issues = [...entryIssueCounts, ...entityIssueCounts]

  issues = issues.filter(
    issue => issue.issue_type !== '' &&
    issue.issue_type !== undefined &&
    issue.field !== '' &&
    issue.field !== undefined
  )

  const taskList = Object.values(issues).map(({ field, issue_type: type, count }) => {
    // if the issue doesn't have an entity, or is one of the special case issue types then we should use the resource_row_count

    let rowCount = entityCount.count
    if (SPECIAL_ISSUE_TYPES.includes(type)) {
      if (resources.length > 0) {
        rowCount = resources[0].entry_count
      } else {
        rowCount = 0
      }
    }

    let title
    try {
      title = performanceDbApi.getTaskMessage({ num_issues: count, rowCount, field, issue_type: type, dataset })
    } catch (e) {
      logger.warn('Failed to generate task title', {
        type: types.App,
        errorMessage: e.message,
        errorStack: e.stack,
        params: { num_issues: count, rowCount, field, issue_type: type }
      })
      title = `${count} issue${count > 1 ? 's' : ''} of type ${type}`
    }

    return {
      title: {
        text: title
      },
      href: `/organisations/${encodeURIComponent(lpa)}/${encodeURIComponent(dataset)}/${encodeURIComponent(type)}/${encodeURIComponent(field)}`,
      status: getStatusTag('Needs fixing')
    }
  })

  // include sources which couldn't be accessed
  for (const source of sources) {
    if (!source.status || source.status >= 300) {
      taskList.push({
        title: {
          text: 'There was an error accessing the URL'
        },
        href: `/organisations/${encodeURIComponent(lpa)}/${encodeURIComponent(dataset)}/endpoint-error/${encodeURIComponent(source.endpoint)}`,
        status: getStatusTag('Error')
      })
    }
  }

  if (expectationOutOfBounds.length > 0) {
    taskList.push({
      title: {
        text: entityOutOfBoundsMessage(dataset, expectationOutOfBounds[0].actual)
      },
      href: `/organisations/${encodeURIComponent(lpa)}/${encodeURIComponent(dataset)}/expectation/${encodeURIComponent(expectations.entitiesOutOfBounds.slug)}`,
      status: getStatusTag('Needs fixing')
    })
  }

  req.taskList = taskList

  next()
}

/**
 * Middleware. Updates req with `templateParams`
 *
 * param {{ orgInfo: OrgInfo, sources: Source[], entityCountRow: undefined | { entity_count: number}, issues: Issue[] }} req
 * @param {Object} req request
 * @param {Object} req.orgInfo organisation info
 * @param {Object} req.dataset dataset info
 * @param {Object} req.sources sources
 * @param {Object} [req.entityCountRow] contains `{ entity_count: number }`
 * @param {Object[]} req.issues dataset issues
 * @param {Object[]} req.taskList task list
 * @param {Object} [req.templateParams] OUT param
 * @param {*} res
 * @param {*} next
 */
export const prepareDatasetTaskListTemplateParams = (req, res, next) => {
  const { taskList, dataset, orgInfo: organisation } = req

  req.templateParams = {
    taskList,
    organisation,
    dataset
  }
  next()
}

const getDatasetTaskList = renderTemplate({
  templateParams: (req) => req.templateParams,
  template: 'organisations/datasetTaskList.html',
  handlerName: 'getDatasetTaskList'
})

export default [
  validateOrgAndDatasetQueryParams,
  fetchOrgInfo,
  fetchSources,
  fetchDatasetInfo,
  fetchResources,
  isFeatureEnabled('expectationOutOfBoundsTask') ? fetchOutOfBoundsExpectations : noop,
  addEntityCountsToResources,
  fetchEntityCount,
  fetchEntityIssueCounts,
  fetchEntryIssueCounts,
  prepareTasks,
  prepareDatasetTaskListTemplateParams,
  getDatasetTaskList,
  logPageError
]