Skip to content

Commit

Permalink
Add a clusters page and clusters endpoints (for backups later)
Browse files Browse the repository at this point in the history
  • Loading branch information
fuziontech committed Aug 15, 2023
1 parent ae7af08 commit 5c3b65c
Show file tree
Hide file tree
Showing 9 changed files with 105 additions and 5 deletions.
4 changes: 4 additions & 0 deletions frontend/src/Layout.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import Schema from './pages/SchemaStats/SchemaStats'
import QueryDetail from './pages/SlowQueries/QueryDetail'
import SchemaTable from './pages/SchemaStats/SchemaTable'
import Overview from './pages/Overview/Overview'
import Clusters from './pages/Clusters/Clusters'
import Errors from './pages/Errors/Errors'
import { Switch, Route, useHistory } from 'react-router-dom'

Expand All @@ -14,6 +15,7 @@ import RunningQueries from './pages/RunningQueries/RunningQueries'
import Logs from './pages/Logs/Logs'
import {
ApartmentOutlined,
CloudServerOutlined,
CodeOutlined,
DashboardOutlined,
HddOutlined,
Expand All @@ -36,6 +38,7 @@ type MenuItem = Required<MenuProps>['items'][number]

const items: MenuItem[] = [
{ key: '', icon: <HomeOutlined />, label: 'Overview' },
{ key: 'clusters', label: 'Clusters', icon: <CloudServerOutlined /> },
{ key: 'query_performance', label: 'Query performance', icon: <ClockCircleOutlined /> },
{ key: 'running_queries', label: 'Running queries', icon: <DashboardOutlined /> },
{ key: 'schema', label: 'Schema stats', icon: <HddOutlined /> },
Expand Down Expand Up @@ -98,6 +101,7 @@ export default function AppLayout(): JSX.Element {
<Content style={{ margin: 'auto', display: 'block', width: '85%', marginTop: 20 }}>
<Switch>
<Route exact path="/" component={Overview}></Route>
<Route exact path="/clusters" component={Clusters}></Route>
<Route exact path="/disk_usage">
<DiskUsage />
</Route>
Expand Down
53 changes: 53 additions & 0 deletions frontend/src/pages/Clusters/Clusters.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
import React, { useEffect, useState } from 'react'
import { Line } from '@ant-design/charts'
import { Card, Col, Row, Tooltip, notification } from 'antd'
import { InfoCircleOutlined } from '@ant-design/icons'

interface Cluster {
cluster: string
}

interface Clusters {
clusters: Cluster[]
}

export default function Clusters() {
const [clusters, setClusters] = useState<Clusters>({
clusters: [],
})

const loadData = async () => {
try {
const res = await fetch('/api/clusters')
const resJson = await res.json()
const clusters = { clusters: resJson }
setClusters(clusters)
} catch (err) {
notification.error({ message: 'Failed to load data' })
}
}

useEffect(() => {
loadData()
}, [])

const now = new Date()
const dayOfTheYear = Math.floor(
(now.getTime() - new Date(now.getFullYear(), 0, 0).getTime()) / (1000 * 60 * 60 * 24)
)

return (
<div>
<h1 style={{ textAlign: 'left' }}>Clusters</h1>
<br />
<Row gutter={8} style={{ paddingBottom: 8 }}>
<ul>
{clusters.clusters.map((cluster) => (
<li key={cluster.cluster}>{cluster.cluster}</li>
))}
</ul>
</Row>
<br />
</div>
)
}
14 changes: 14 additions & 0 deletions frontend/src/pages/Clusters/tips.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
export const clickhouseTips = [
`Consider benchmarking different join algorithms if your queries contain expensive joins. You may find that algorithms other than the default perform significantly better for your workloads.`,
`If you store JSON data in a VARCHAR column, consider materializing frequently acessed properties using materialized columns for much faster queries.`,
`You can use the log_comment setting to add metadata to queries that will show up on the query log, including on distributed queries. For instance, you can add a stringified JSON object as a comment to tag queries for analysis.`,
`Dictionaries can be an effective tool in large data migrations or backfills.`,
`Make sure you push as many of your query filters down to the innermost subquery for better performance. Unlike other databases, ClickHouse does not have a query planner, so you want to minimize the amount of data fetched from other shards.`,
`If a column stores values with low cardinality (e.g. country codes), use the LowCardinality data type to improve performance and reduce storage usage. A low cardinality VARCHAR would be defined as LowCardinality(VARCHAR) in the table creation query.`,
`quantile is not an exact function but rather a sampled approximation. Use quantileExactExclusive for exact results.`,
`ClickHouse is great at introspection, and its system tables contain a lot of metadata about the server. Learning what information is available where can be a great tool in debugging issues and mapping out areas of improvement. A lot of HouseWatch features are effectively wrappers over ClickHouse system tables.`,
`Killing a mutation with KILL MUTATION does not kill ongoing merges triggered by the mutation. If you absolutely need to stop ongoing merges as well, you should use SYSTEM STOP MERGES. However, you should not keep merges off for too long, as you may end up with too many parts unmerged, which is problematic for ClickHouse.`,
`Set mutations_sync=2 on a mutation to wait for all replicas to complete the mutation.`,
`ClickHouse does not support changing table engines in place, requiring you thus to create a new table and move data to it. However, rather than using INSERT to move the data over, you can use ATTACH PARTITION for near-instant operations instead, provided the tables contain the same "structure" i.e. same columns/ORDER BY/PARTITION BY.`,
`Consider benchmarking different compression algorithms for large columns for more efficient queries and storage usage.`,
]
1 change: 0 additions & 1 deletion housewatch/api/analyze.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,6 @@ def tables(self, request: Request):

@action(detail=False, methods=["POST"])
def natural_language_query(self, request: Request):

table_schema_sql_conditions = []
for full_table_name in request.data["tables_to_query"]:
database, table = full_table_name.split(">>>>>")
Expand Down
17 changes: 17 additions & 0 deletions housewatch/api/cluster.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import structlog
from rest_framework.decorators import action
from rest_framework.request import Request
from rest_framework.response import Response
from rest_framework.viewsets import GenericViewSet
from housewatch.clickhouse import clusters


logger = structlog.get_logger(__name__)


class ClusterViewset(GenericViewSet):
def list(self, request: Request) -> Response:
return Response(clusters.get_clusters())

def retrieve(self, request: Request, pk: str) -> Response:
return Response(clusters.get_cluster(pk))
4 changes: 4 additions & 0 deletions housewatch/api/instance.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
import structlog
from rest_framework.decorators import action
from rest_framework.request import Request
from rest_framework.response import Response
from rest_framework.viewsets import ModelViewSet
from rest_framework.serializers import ModelSerializer
from housewatch.models import Instance
from housewatch.clickhouse import clusters


logger = structlog.get_logger(__name__)
Expand Down
4 changes: 0 additions & 4 deletions housewatch/celery.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,6 @@
# Load task modules from all registered Django app configs.
app.autodiscover_tasks()

# Make sure Redis doesn't add too many connections
# https://stackoverflow.com/questions/47106592/redis-connections-not-being-released-after-celery-task-is-complete
app.conf.broker_pool_limit = 0

app.steps["worker"].add(DjangoStructLogInitStep)


Expand Down
11 changes: 11 additions & 0 deletions housewatch/clickhouse/clusters.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from housewatch.clickhouse.client import run_query


def get_clusters():
QUERY = """Select cluster FROM system.clusters GROUP BY cluster"""
return run_query(QUERY)


def get_cluster(cluster):
QUERY = """Select * FROM system.clusters WHERE cluster = '%(cluster_name)s' """
return run_query(QUERY, {"cluster_name": cluster})
2 changes: 2 additions & 0 deletions housewatch/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from django.urls import path
from rest_framework_extensions.routers import ExtendedDefaultRouter
from housewatch.api.instance import InstanceViewset
from housewatch.api.cluster import ClusterViewset
from housewatch.api.analyze import AnalyzeViewset
from housewatch.api.async_migration import AsyncMigrationsViewset
from housewatch.views import healthz
Expand All @@ -19,6 +20,7 @@ def __init__(self, *args, **kwargs):

router = DefaultRouterPlusPlus()
router.register(r"api/instance", InstanceViewset, basename="instance")
router.register(r"api/clusters", ClusterViewset, basename="cluster")
router.register(r"api/analyze", AnalyzeViewset, basename="analyze")
router.register(r"api/async_migrations", AsyncMigrationsViewset, basename="async_migrations")
router.register(r"api/saved_queries", SavedQueryViewset, basename="saved_queries")
Expand Down

0 comments on commit 5c3b65c

Please sign in to comment.