* Replace monitoring solution with kube-prometheus-stack dependency * Enable pod monitors * Download necessary chart dependencies for CI * Actually run dependency update * Enable missed podMonitor * Disable alertmanager by default for feature parity Related issues #294 #65 Supersedes #296 and #297 ### Motivation Our helm chart is out of date. I propose we make a breaking change for the monitoring solution and start using the `kube-prometheus-stack` as a dependency. This should make upgrades easier and will let users leverage all of that chart's features. This change will result in the removal of the StreamNative Grafana Dashboards. We'll need to figure out the right way to address that. The apache/pulsar project has grafana dashboards, but they have not been maintained. With this added dependency, we'll have the benefit of being able to use k8s `ConfigMap`s to configure grafana dashboards. ### Modifications * Remove old prometheus and grafana configuration * Add kube-prometheus-stack chart as a dependency * Enable several components by default. I am not opinionated on these, but it is based on the other values in the chart. ### Verifying this change This is a large change that will require manual validation, and may break deployments. I propose this triggers a helm chart 3.0.0 release.
215 lines
11 KiB
Bash
215 lines
11 KiB
Bash
#!/usr/bin/env bash
|
|
#
|
|
# Licensed to the Apache Software Foundation (ASF) under one
|
|
# or more contributor license agreements. See the NOTICE file
|
|
# distributed with this work for additional information
|
|
# regarding copyright ownership. The ASF licenses this file
|
|
# to you under the Apache License, Version 2.0 (the
|
|
# "License"); you may not use this file except in compliance
|
|
# with the License. You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing,
|
|
# software distributed under the License is distributed on an
|
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
# KIND, either express or implied. See the License for the
|
|
# specific language governing permissions and limitations
|
|
# under the License.
|
|
#
|
|
BINDIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"
|
|
PULSAR_HOME="$(cd "${BINDIR}/.." && pwd)"
|
|
CHARTS_HOME=${PULSAR_HOME}
|
|
OUTPUT_BIN=${CHARTS_HOME}/output/bin
|
|
KIND_BIN=$OUTPUT_BIN/kind
|
|
HELM=${OUTPUT_BIN}/helm
|
|
KUBECTL=${OUTPUT_BIN}/kubectl
|
|
NAMESPACE=pulsar
|
|
CLUSTER=pulsar-ci
|
|
CLUSTER_ID=$(uuidgen)
|
|
K8S_LOGS_DIR="${K8S_LOGS_DIR:-/tmp/k8s-logs}"
|
|
export PATH="$OUTPUT_BIN:$PATH"
|
|
|
|
# brew package 'coreutils' is required on MacOSX
|
|
# coreutils includes the 'timeout' command
|
|
if [[ "$OSTYPE" == "darwin"* ]]; then
|
|
brew_gnubin_packages=(coreutils)
|
|
if ! type -P brew &>/dev/null; then
|
|
echo "On MacOSX, you must install required binaries with the following command:"
|
|
echo "brew install" "${brew_gnubin_packages[@]}"
|
|
exit 1
|
|
fi
|
|
for dep in "${brew_gnubin_packages[@]}"; do
|
|
path_element="$(brew --prefix)/opt/${dep}/libexec/gnubin"
|
|
if [ ! -d "${path_element}" ]; then
|
|
echo "'${path_element}' is missing. Quick fix: 'brew install ${dep}'."
|
|
echo "On MacOSX, you must install required binaries with the following command:"
|
|
echo "brew install" "${brew_gnubin_packages[@]}"
|
|
exit 1
|
|
fi
|
|
PATH="${path_element}:$PATH"
|
|
done
|
|
export PATH
|
|
fi
|
|
|
|
function ci::create_cluster() {
|
|
echo "Creating a kind cluster ..."
|
|
${CHARTS_HOME}/hack/kind-cluster-build.sh --name pulsar-ci-${CLUSTER_ID} -c 1 -v 10
|
|
echo "Successfully created a kind cluster."
|
|
}
|
|
|
|
function ci::delete_cluster() {
|
|
echo "Deleting a kind cluster ..."
|
|
kind delete cluster --name=pulsar-ci-${CLUSTER_ID}
|
|
echo "Successfully delete a kind cluster."
|
|
}
|
|
|
|
function ci::install_cert_manager() {
|
|
echo "Installing the cert-manager ..."
|
|
${KUBECTL} create namespace cert-manager
|
|
${CHARTS_HOME}/scripts/cert-manager/install-cert-manager.sh
|
|
WC=$(${KUBECTL} get pods -n cert-manager --field-selector=status.phase=Running | wc -l)
|
|
while [[ ${WC} -lt 3 ]]; do
|
|
echo ${WC};
|
|
sleep 15
|
|
${KUBECTL} get pods -n cert-manager
|
|
${KUBECTL} get events --sort-by=.lastTimestamp -A | tail -n 30 || true
|
|
WC=$(${KUBECTL} get pods -n cert-manager --field-selector=status.phase=Running | wc -l)
|
|
done
|
|
echo "Successfully installed the cert manager."
|
|
}
|
|
|
|
function ci::print_pod_logs() {
|
|
echo "Logs for all pulsar containers:"
|
|
for k8sobject in $(${KUBECTL} get pods,jobs -n ${NAMESPACE} -l app=pulsar -o=name); do
|
|
${KUBECTL} logs -n ${NAMESPACE} "$k8sobject" --all-containers=true --ignore-errors=true --prefix=true --tail=100 || true
|
|
done;
|
|
}
|
|
|
|
function ci::collect_k8s_logs() {
|
|
mkdir -p "${K8S_LOGS_DIR}" && cd "${K8S_LOGS_DIR}"
|
|
echo "Collecting k8s logs to ${K8S_LOGS_DIR}"
|
|
for k8sobject in $(${KUBECTL} get pods,jobs -n ${NAMESPACE} -l app=pulsar -o=name); do
|
|
filebase="${k8sobject//\//_}"
|
|
${KUBECTL} logs -n ${NAMESPACE} "$k8sobject" --all-containers=true --ignore-errors=true --prefix=true > "${filebase}.$$.log.txt" || true
|
|
${KUBECTL} logs -n ${NAMESPACE} "$k8sobject" --all-containers=true --ignore-errors=true --prefix=true --previous=true > "${filebase}.previous.$$.log.txt" || true
|
|
done;
|
|
${KUBECTL} get events --sort-by=.lastTimestamp -A > events.$$.log.txt || true
|
|
${KUBECTL} get events --sort-by=.lastTimestamp -A -o yaml > events.$$.log.yaml || true
|
|
${KUBECTL} get -n ${NAMESPACE} all -o yaml > k8s_resources.$$.yaml || true
|
|
}
|
|
|
|
function ci::install_pulsar_chart() {
|
|
local value_file=$1
|
|
local extra_opts=$2
|
|
|
|
echo "Installing the pulsar chart"
|
|
${KUBECTL} create namespace ${NAMESPACE}
|
|
ci::install_cert_manager
|
|
echo ${CHARTS_HOME}/scripts/pulsar/prepare_helm_release.sh -k ${CLUSTER} -n ${NAMESPACE} ${extra_opts}
|
|
${CHARTS_HOME}/scripts/pulsar/prepare_helm_release.sh -k ${CLUSTER} -n ${NAMESPACE} ${extra_opts}
|
|
sleep 10
|
|
|
|
echo ${HELM} dependency update ${CHARTS_HOME}/charts/pulsar
|
|
${HELM} dependency update ${CHARTS_HOME}/charts/pulsar
|
|
echo ${HELM} install --set initialize=true --values ${value_file} ${CLUSTER} ${CHARTS_HOME}/charts/pulsar
|
|
${HELM} template --values ${value_file} ${CLUSTER} ${CHARTS_HOME}/charts/pulsar
|
|
${HELM} install --set initialize=true --values ${value_file} --namespace=${NAMESPACE} ${CLUSTER} ${CHARTS_HOME}/charts/pulsar
|
|
|
|
echo "wait until broker is alive"
|
|
WC=$(${KUBECTL} get pods -n ${NAMESPACE} --field-selector=status.phase=Running | grep ${CLUSTER}-broker | wc -l)
|
|
counter=1
|
|
while [[ ${WC} -lt 1 ]]; do
|
|
((counter++))
|
|
echo ${WC};
|
|
sleep 15
|
|
${KUBECTL} get pods,jobs -n ${NAMESPACE}
|
|
${KUBECTL} get events --sort-by=.lastTimestamp -A | tail -n 30 || true
|
|
if [[ $((counter % 20)) -eq 0 ]]; then
|
|
ci::print_pod_logs
|
|
if [[ $counter -gt 100 ]]; then
|
|
echo >&2 "Timeout waiting..."
|
|
exit 1
|
|
fi
|
|
fi
|
|
WC=$(${KUBECTL} get pods -n ${NAMESPACE} | grep ${CLUSTER}-broker | wc -l)
|
|
if [[ ${WC} -gt 1 ]]; then
|
|
${KUBECTL} describe pod -n ${NAMESPACE} pulsar-ci-broker-0
|
|
${KUBECTL} logs -n ${NAMESPACE} pulsar-ci-broker-0
|
|
fi
|
|
WC=$(${KUBECTL} get pods -n ${NAMESPACE} --field-selector=status.phase=Running | grep ${CLUSTER}-broker | wc -l)
|
|
done
|
|
timeout 300s ${KUBECTL} exec -n ${NAMESPACE} ${CLUSTER}-toolset-0 -- bash -c 'until nslookup pulsar-ci-broker; do sleep 3; done' || { echo >&2 "Timeout waiting..."; ci::print_pod_logs; exit 1; }
|
|
timeout 120s ${KUBECTL} exec -n ${NAMESPACE} ${CLUSTER}-toolset-0 -- bash -c 'until [ "$(curl -L http://pulsar-ci-broker:8080/status.html)" == "OK" ]; do sleep 3; done' || { echo >&2 "Timeout waiting..."; ci::print_pod_logs; exit 1; }
|
|
|
|
WC=$(${KUBECTL} get pods -n ${NAMESPACE} --field-selector=status.phase=Running | grep ${CLUSTER}-proxy | wc -l)
|
|
counter=1
|
|
while [[ ${WC} -lt 1 ]]; do
|
|
((counter++))
|
|
echo ${WC};
|
|
sleep 15
|
|
${KUBECTL} get pods,jobs -n ${NAMESPACE}
|
|
${KUBECTL} get events --sort-by=.lastTimestamp -A | tail -n 30 || true
|
|
if [[ $((counter % 8)) -eq 0 ]]; then
|
|
ci::print_pod_logs
|
|
if [[ $counter -gt 16 ]]; then
|
|
echo >&2 "Timeout waiting..."
|
|
exit 1
|
|
fi
|
|
fi
|
|
WC=$(${KUBECTL} get pods -n ${NAMESPACE} --field-selector=status.phase=Running | grep ${CLUSTER}-proxy | wc -l)
|
|
done
|
|
timeout 300s ${KUBECTL} exec -n ${NAMESPACE} ${CLUSTER}-toolset-0 -- bash -c 'until nslookup pulsar-ci-proxy; do sleep 3; done' || { echo >&2 "Timeout waiting..."; ci::print_pod_logs; exit 1; }
|
|
# ${KUBECTL} exec -n ${NAMESPACE} ${CLUSTER}-toolset-0 -- bash -c 'until [ "$(curl -L http://pulsar-ci-proxy:8080/status.html)" == "OK" ]; do sleep 3; done'
|
|
}
|
|
|
|
function ci::test_pulsar_producer() {
|
|
sleep 120
|
|
${KUBECTL} exec -n ${NAMESPACE} ${CLUSTER}-toolset-0 -- bash -c 'until nslookup pulsar-ci-broker; do sleep 3; done'
|
|
${KUBECTL} exec -n ${NAMESPACE} ${CLUSTER}-toolset-0 -- bash -c 'until nslookup pulsar-ci-proxy; do sleep 3; done'
|
|
${KUBECTL} exec -n ${NAMESPACE} ${CLUSTER}-bookie-0 -- df -h
|
|
${KUBECTL} exec -n ${NAMESPACE} ${CLUSTER}-bookie-0 -- cat conf/bookkeeper.conf
|
|
${KUBECTL} exec -n ${NAMESPACE} ${CLUSTER}-toolset-0 -- bin/bookkeeper shell listbookies -rw
|
|
${KUBECTL} exec -n ${NAMESPACE} ${CLUSTER}-toolset-0 -- bin/bookkeeper shell listbookies -ro
|
|
${KUBECTL} exec -n ${NAMESPACE} ${CLUSTER}-toolset-0 -- bin/pulsar-admin tenants create pulsar-ci
|
|
${KUBECTL} exec -n ${NAMESPACE} ${CLUSTER}-toolset-0 -- bin/pulsar-admin namespaces create pulsar-ci/test
|
|
${KUBECTL} exec -n ${NAMESPACE} ${CLUSTER}-toolset-0 -- bin/pulsar-client produce -m "test-message" pulsar-ci/test/test-topic
|
|
}
|
|
|
|
function ci::wait_function_running() {
|
|
num_running=$(${KUBECTL} exec -n ${NAMESPACE} ${CLUSTER}-toolset-0 -- bash -c 'bin/pulsar-admin functions status --tenant pulsar-ci --namespace test --name test-function | bin/jq .numRunning')
|
|
while [[ ${num_running} -lt 1 ]]; do
|
|
echo ${num_running}
|
|
sleep 15
|
|
${KUBECTL} get pods -n ${NAMESPACE} --field-selector=status.phase=Running
|
|
${KUBECTL} get events --sort-by=.lastTimestamp -A | tail -n 30 || true
|
|
num_running=$(${KUBECTL} exec -n ${NAMESPACE} ${CLUSTER}-toolset-0 -- bash -c 'bin/pulsar-admin functions status --tenant pulsar-ci --namespace test --name test-function | bin/jq .numRunning')
|
|
done
|
|
}
|
|
|
|
function ci::wait_message_processed() {
|
|
num_processed=$(${KUBECTL} exec -n ${NAMESPACE} ${CLUSTER}-toolset-0 -- bash -c 'bin/pulsar-admin functions stats --tenant pulsar-ci --namespace test --name test-function | bin/jq .processedSuccessfullyTotal')
|
|
while [[ ${num_processed} -lt 1 ]]; do
|
|
echo ${num_processed}
|
|
sleep 15
|
|
${KUBECTL} exec -n ${NAMESPACE} ${CLUSTER}-toolset-0 -- bin/pulsar-admin functions stats --tenant pulsar-ci --namespace test --name test-function
|
|
num_processed=$(${KUBECTL} exec -n ${NAMESPACE} ${CLUSTER}-toolset-0 -- bash -c 'bin/pulsar-admin functions stats --tenant pulsar-ci --namespace test --name test-function | bin/jq .processedSuccessfullyTotal')
|
|
done
|
|
}
|
|
|
|
function ci::test_pulsar_function() {
|
|
sleep 120
|
|
${KUBECTL} exec -n ${NAMESPACE} ${CLUSTER}-toolset-0 -- bash -c 'until nslookup pulsar-ci-broker; do sleep 3; done'
|
|
${KUBECTL} exec -n ${NAMESPACE} ${CLUSTER}-toolset-0 -- bash -c 'until nslookup pulsar-ci-proxy; do sleep 3; done'
|
|
${KUBECTL} exec -n ${NAMESPACE} ${CLUSTER}-bookie-0 -- df -h
|
|
${KUBECTL} exec -n ${NAMESPACE} ${CLUSTER}-toolset-0 -- bin/bookkeeper shell listbookies -rw
|
|
${KUBECTL} exec -n ${NAMESPACE} ${CLUSTER}-toolset-0 -- bin/bookkeeper shell listbookies -ro
|
|
${KUBECTL} exec -n ${NAMESPACE} ${CLUSTER}-toolset-0 -- bin/pulsar-admin functions create --tenant pulsar-ci --namespace test --name test-function --inputs "pulsar-ci/test/test_input" --output "pulsar-ci/test/test_output" --parallelism 1 --classname org.apache.pulsar.functions.api.examples.ExclamationFunction --jar /pulsar/examples/api-examples.jar
|
|
|
|
# wait until the function is running
|
|
# TODO: re-enable function test
|
|
# ci::wait_function_running
|
|
# ${KUBECTL} exec -n ${NAMESPACE} ${CLUSTER}-toolset-0 -- bin/pulsar-client produce -m "hello pulsar function!" pulsar-ci/test/test_input
|
|
# ci::wait_message_processed
|
|
}
|