Add a clusters page and clusters endpoints (for backups later)

PostHog · Aug 15, 2023 · 5c3b65c · 5c3b65c
1 parent ae7af08
commit 5c3b65c
Show file tree

Hide file tree

Showing 9 changed files with 105 additions and 5 deletions.
diff --git a/frontend/src/Layout.tsx b/frontend/src/Layout.tsx
@@ -6,6 +6,7 @@ import Schema from './pages/SchemaStats/SchemaStats'
 import QueryDetail from './pages/SlowQueries/QueryDetail'
 import SchemaTable from './pages/SchemaStats/SchemaTable'
 import Overview from './pages/Overview/Overview'
+import Clusters from './pages/Clusters/Clusters'
 import Errors from './pages/Errors/Errors'
 import { Switch, Route, useHistory } from 'react-router-dom'
 
@@ -14,6 +15,7 @@ import RunningQueries from './pages/RunningQueries/RunningQueries'
 import Logs from './pages/Logs/Logs'
 import {
     ApartmentOutlined,
+    CloudServerOutlined,
     CodeOutlined,
     DashboardOutlined,
     HddOutlined,
@@ -36,6 +38,7 @@ type MenuItem = Required<MenuProps>['items'][number]
 
 const items: MenuItem[] = [
     { key: '', icon: <HomeOutlined />, label: 'Overview' },
+    { key: 'clusters', label: 'Clusters', icon: <CloudServerOutlined /> },
     { key: 'query_performance', label: 'Query performance', icon: <ClockCircleOutlined /> },
     { key: 'running_queries', label: 'Running queries', icon: <DashboardOutlined /> },
     { key: 'schema', label: 'Schema stats', icon: <HddOutlined /> },
@@ -98,6 +101,7 @@ export default function AppLayout(): JSX.Element {
                     <Content style={{ margin: 'auto', display: 'block', width: '85%', marginTop: 20 }}>
                         <Switch>
                             <Route exact path="/" component={Overview}></Route>
+                            <Route exact path="/clusters" component={Clusters}></Route>
                             <Route exact path="/disk_usage">
                                 <DiskUsage />
                             </Route>

diff --git a/frontend/src/pages/Clusters/Clusters.tsx b/frontend/src/pages/Clusters/Clusters.tsx
@@ -0,0 +1,53 @@
+import React, { useEffect, useState } from 'react'
+import { Line } from '@ant-design/charts'
+import { Card, Col, Row, Tooltip, notification } from 'antd'
+import { InfoCircleOutlined } from '@ant-design/icons'
+
+interface Cluster {
+    cluster: string
+}
+
+interface Clusters {
+    clusters: Cluster[]
+}
+
+export default function Clusters() {
+    const [clusters, setClusters] = useState<Clusters>({
+        clusters: [],
+    })
+
+    const loadData = async () => {
+        try {
+            const res = await fetch('/api/clusters')
+            const resJson = await res.json()
+            const clusters = { clusters: resJson }
+            setClusters(clusters)
+        } catch (err) {
+            notification.error({ message: 'Failed to load data' })
+        }
+    }
+
+    useEffect(() => {
+        loadData()
+    }, [])
+
+    const now = new Date()
+    const dayOfTheYear = Math.floor(
+        (now.getTime() - new Date(now.getFullYear(), 0, 0).getTime()) / (1000 * 60 * 60 * 24)
+    )
+
+    return (
+        <div>
+            <h1 style={{ textAlign: 'left' }}>Clusters</h1>
+            <br />
+            <Row gutter={8} style={{ paddingBottom: 8 }}>
+                <ul>
+                    {clusters.clusters.map((cluster) => (
+                        <li key={cluster.cluster}>{cluster.cluster}</li>
+                    ))}
+                </ul>
+            </Row>
+            <br />
+        </div>
+    )
+}
diff --git a/frontend/src/pages/Clusters/tips.ts b/frontend/src/pages/Clusters/tips.ts
@@ -0,0 +1,14 @@
+export const clickhouseTips = [
+    `Consider benchmarking different join algorithms if your queries contain expensive joins. You may find that algorithms other than the default perform significantly better for your workloads.`,
+    `If you store JSON data in a VARCHAR column, consider materializing frequently acessed properties using materialized columns for much faster queries.`,
+    `You can use the log_comment setting to add metadata to queries that will show up on the query log, including on distributed queries. For instance, you can add a stringified JSON object as a comment to tag queries for analysis.`,
+    `Dictionaries can be an effective tool in large data migrations or backfills.`,
+    `Make sure you push as many of your query filters down to the innermost subquery for better performance. Unlike other databases, ClickHouse does not have a query planner, so you want to minimize the amount of data fetched from other shards.`,
+    `If a column stores values with low cardinality (e.g. country codes), use the LowCardinality data type to improve performance and reduce storage usage. A low cardinality VARCHAR would be defined as LowCardinality(VARCHAR) in the table creation query.`,
+    `quantile is not an exact function but rather a sampled approximation. Use quantileExactExclusive for exact results.`,
+    `ClickHouse is great at introspection, and its system tables contain a lot of metadata about the server. Learning what information is available where can be a great tool in debugging issues and mapping out areas of improvement. A lot of HouseWatch features are effectively wrappers over ClickHouse system tables.`,
+    `Killing a mutation with KILL MUTATION does not kill ongoing merges triggered by the mutation. If you absolutely need to stop ongoing merges as well, you should use SYSTEM STOP MERGES. However, you should not keep merges off for too long, as you may end up with too many parts unmerged, which is problematic for ClickHouse.`,
+    `Set mutations_sync=2 on a mutation to wait for all replicas to complete the mutation.`,
+    `ClickHouse does not support changing table engines in place, requiring you thus to create a new table and move data to it. However, rather than using INSERT to move the data over, you can use ATTACH PARTITION for near-instant operations instead, provided the tables contain the same "structure" i.e. same columns/ORDER BY/PARTITION BY.`,
+    `Consider benchmarking different compression algorithms for large columns for more efficient queries and storage usage.`,
+]
diff --git a/housewatch/api/analyze.py b/housewatch/api/analyze.py
@@ -268,7 +268,6 @@ def tables(self, request: Request):
 
     @action(detail=False, methods=["POST"])
     def natural_language_query(self, request: Request):
-
         table_schema_sql_conditions = []
         for full_table_name in request.data["tables_to_query"]:
             database, table = full_table_name.split(">>>>>")

diff --git a/housewatch/api/cluster.py b/housewatch/api/cluster.py
@@ -0,0 +1,17 @@
+import structlog
+from rest_framework.decorators import action
+from rest_framework.request import Request
+from rest_framework.response import Response
+from rest_framework.viewsets import GenericViewSet
+from housewatch.clickhouse import clusters
+
+
+logger = structlog.get_logger(__name__)
+
+
+class ClusterViewset(GenericViewSet):
+    def list(self, request: Request) -> Response:
+        return Response(clusters.get_clusters())
+
+    def retrieve(self, request: Request, pk: str) -> Response:
+        return Response(clusters.get_cluster(pk))
diff --git a/housewatch/api/instance.py b/housewatch/api/instance.py
@@ -1,7 +1,11 @@
 import structlog
+from rest_framework.decorators import action
+from rest_framework.request import Request
+from rest_framework.response import Response
 from rest_framework.viewsets import ModelViewSet
 from rest_framework.serializers import ModelSerializer
 from housewatch.models import Instance
+from housewatch.clickhouse import clusters
 
 
 logger = structlog.get_logger(__name__)

diff --git a/housewatch/celery.py b/housewatch/celery.py
@@ -17,10 +17,6 @@
 # Load task modules from all registered Django app configs.
 app.autodiscover_tasks()
 
-# Make sure Redis doesn't add too many connections
-# https://stackoverflow.com/questions/47106592/redis-connections-not-being-released-after-celery-task-is-complete
-app.conf.broker_pool_limit = 0
-
 app.steps["worker"].add(DjangoStructLogInitStep)
 
 

diff --git a/housewatch/clickhouse/clusters.py b/housewatch/clickhouse/clusters.py
@@ -0,0 +1,11 @@
+from housewatch.clickhouse.client import run_query
+
+
+def get_clusters():
+    QUERY = """Select cluster FROM system.clusters GROUP BY cluster"""
+    return run_query(QUERY)
+
+
+def get_cluster(cluster):
+    QUERY = """Select * FROM system.clusters WHERE cluster = '%(cluster_name)s' """
+    return run_query(QUERY, {"cluster_name": cluster})
diff --git a/housewatch/urls.py b/housewatch/urls.py
@@ -3,6 +3,7 @@
 from django.urls import path
 from rest_framework_extensions.routers import ExtendedDefaultRouter
 from housewatch.api.instance import InstanceViewset
+from housewatch.api.cluster import ClusterViewset
 from housewatch.api.analyze import AnalyzeViewset
 from housewatch.api.async_migration import AsyncMigrationsViewset
 from housewatch.views import healthz
@@ -19,6 +20,7 @@ def __init__(self, *args, **kwargs):
 
 router = DefaultRouterPlusPlus()
 router.register(r"api/instance", InstanceViewset, basename="instance")
+router.register(r"api/clusters", ClusterViewset, basename="cluster")
 router.register(r"api/analyze", AnalyzeViewset, basename="analyze")
 router.register(r"api/async_migrations", AsyncMigrationsViewset, basename="async_migrations")
 router.register(r"api/saved_queries", SavedQueryViewset, basename="saved_queries")