From 8158ad1cbfcf5f616bca212ee0715ef995bd8c24 Mon Sep 17 00:00:00 2001 From: Kris McGinnes Date: Tue, 9 Jul 2024 20:39:44 -0500 Subject: [PATCH] Improve Gremlin schema sync performance --- Changelog.md | 3 ++ .../src/connector/gremlin/gremlinExplorer.ts | 46 ++++++++++++------- .../connector/gremlin/queries/fetchSchema.ts | 10 +++- .../templates/edgesSchemaTemplate.test.ts | 14 ++++-- .../gremlin/templates/edgesSchemaTemplate.ts | 13 +++++- .../templates/verticesSchemaTemplate.test.ts | 12 +++-- .../templates/verticesSchemaTemplate.ts | 34 +++++++++++--- .../connector/testUtils/globalMockFetch.ts | 6 +-- packages/graph-explorer/src/setupTests.ts | 6 ++- packages/graph-explorer/src/utils/index.ts | 1 + packages/graph-explorer/src/utils/logger.ts | 29 ++++++++++++ 11 files changed, 133 insertions(+), 41 deletions(-) create mode 100644 packages/graph-explorer/src/utils/logger.ts diff --git a/Changelog.md b/Changelog.md index f1d780b81..dd1624289 100644 --- a/Changelog.md +++ b/Changelog.md @@ -6,6 +6,9 @@ - Search in openCypher will now execute a single request when searching across all labels () +- Gremlin schema sync will be much faster on larger databases, thanks to + @dsaban-lightricks for his great suggestion in issue #225 + () **Bug Fixes and Minor Changes** diff --git a/packages/graph-explorer/src/connector/gremlin/gremlinExplorer.ts b/packages/graph-explorer/src/connector/gremlin/gremlinExplorer.ts index 31d4c1ec9..cd67175af 100644 --- a/packages/graph-explorer/src/connector/gremlin/gremlinExplorer.ts +++ b/packages/graph-explorer/src/connector/gremlin/gremlinExplorer.ts @@ -8,7 +8,7 @@ import { fetchDatabaseRequest } from "../fetchDatabaseRequest"; import { GraphSummary } from "./types"; import { v4 } from "uuid"; import { Explorer } from "../useGEFetchTypes"; -import { env } from "../../utils"; +import { logger } from "../../utils"; function _gremlinFetch(connection: ConnectionConfig, options: any) { return async (queryTemplate: string) => { @@ -30,41 +30,53 @@ function _gremlinFetch(connection: ConnectionConfig, options: any) { }; } +async function fetchSummary( + connection: ConnectionConfig, + options: RequestInit +) { + try { + const response = await fetchDatabaseRequest( + connection, + `${connection.url}/pg/statistics/summary?mode=detailed`, + { + method: "GET", + ...options, + } + ); + return response.payload.graphSummary as GraphSummary; + } catch (error) { + logger.error( + "[Gremlin Explorer] Failed to gather summary statistics", + error + ); + } +} + export function createGremlinExplorer(connection: ConnectionConfig): Explorer { return { connection: connection, async fetchSchema(options) { - let summary; - try { - const response = await fetchDatabaseRequest( - connection, - `${connection.url}/pg/statistics/summary?mode=detailed`, - { - method: "GET", - ...options, - } - ); - summary = (response.payload.graphSummary as GraphSummary) || undefined; - } catch (e) { - if (env.DEV) { - console.error("[Summary API]", e); - } - } + logger.log("[Gremlin Explorer] Fetching schema..."); + const summary = await fetchSummary(connection, options); return fetchSchema(_gremlinFetch(connection, options), summary); }, async fetchVertexCountsByType(req, options) { + logger.log("[Gremlin Explorer] Fetching vertex counts by type..."); return fetchVertexTypeCounts(_gremlinFetch(connection, options), req); }, async fetchNeighbors(req, options) { + logger.log("[Gremlin Explorer] Fetching neighbors..."); return fetchNeighbors(_gremlinFetch(connection, options), req); }, async fetchNeighborsCount(req, options) { + logger.log("[Gremlin Explorer] Fetching neighbors count..."); return fetchNeighborsCount(_gremlinFetch(connection, options), req); }, async keywordSearch(req, options) { options ??= {}; options.queryId = v4(); + logger.log("[Gremlin Explorer] Fetching keyword search..."); return keywordSearch(_gremlinFetch(connection, options), req); }, }; diff --git a/packages/graph-explorer/src/connector/gremlin/queries/fetchSchema.ts b/packages/graph-explorer/src/connector/gremlin/queries/fetchSchema.ts index 068538240..ae9eb40d4 100644 --- a/packages/graph-explorer/src/connector/gremlin/queries/fetchSchema.ts +++ b/packages/graph-explorer/src/connector/gremlin/queries/fetchSchema.ts @@ -1,4 +1,4 @@ -import { sanitizeText } from "../../../utils"; +import { logger, sanitizeText } from "../../../utils"; import type { SchemaResponse } from "../../useGEFetchTypes"; import edgeLabelsTemplate from "../templates/edgeLabelsTemplate"; import edgesSchemaTemplate from "../templates/edgesSchemaTemplate"; @@ -87,6 +87,7 @@ const fetchVertexLabels = async ( gremlinFetch: GremlinFetch ): Promise> => { const labelsTemplate = vertexLabelsTemplate(); + logger.log("[Gremlin Explorer] Fetching vertex labels with counts..."); const data = await gremlinFetch(labelsTemplate); const values = data.result.data["@value"][0]["@value"]; @@ -121,6 +122,7 @@ const fetchVerticesAttributes = async ( types: labels, }); + logger.log("[Gremlin Explorer] Fetching vertices attributes..."); const response = await gremlinFetch(verticesTemplate); const verticesSchemas = response.result.data["@value"][0]["@value"]; @@ -163,6 +165,7 @@ const fetchEdgeLabels = async ( gremlinFetch: GremlinFetch ): Promise> => { const labelsTemplate = edgeLabelsTemplate(); + logger.log("[Gremlin Explorer] Fetching edge labels with counts..."); const data = await gremlinFetch(labelsTemplate); const values = data.result.data["@value"][0]["@value"]; @@ -187,6 +190,7 @@ const fetchEdgesAttributes = async ( const edgesTemplate = edgesSchemaTemplate({ types: labels, }); + logger.log("[Gremlin Explorer] Fetching edges attributes..."); const data = await gremlinFetch(edgesTemplate); const edgesSchemas = data.result.data["@value"][0]["@value"]; @@ -238,6 +242,8 @@ const fetchSchema = async ( summary?: GraphSummary ): Promise => { if (!summary) { + logger.log("[Gremlin Explorer] No summary statistics"); + const vertices = await fetchVerticesSchema(gremlinFetch); const totalVertices = vertices.reduce((total, vertex) => { return total + (vertex.total ?? 0); @@ -256,6 +262,8 @@ const fetchSchema = async ( }; } + logger.log("[Gremlin Explorer] Using summary statistics"); + const vertices = await fetchVerticesAttributes( gremlinFetch, summary.nodeLabels, diff --git a/packages/graph-explorer/src/connector/gremlin/templates/edgesSchemaTemplate.test.ts b/packages/graph-explorer/src/connector/gremlin/templates/edgesSchemaTemplate.test.ts index b36b9624a..bb6903618 100644 --- a/packages/graph-explorer/src/connector/gremlin/templates/edgesSchemaTemplate.test.ts +++ b/packages/graph-explorer/src/connector/gremlin/templates/edgesSchemaTemplate.test.ts @@ -1,14 +1,18 @@ import edgesSchemaTemplate from "./edgesSchemaTemplate"; +import { normalizeWithNoSpace as normalize } from "../../../utils/testing"; describe("Gremlin > edgesSchemaTemplate", () => { it("Should return a template with the projection of each type", () => { const template = edgesSchemaTemplate({ types: ["route", "contain"] }); - expect(template).toBe( - 'g.E().project("route","contain")' + - '.by(V().bothE("route").limit(1))' + - '.by(V().bothE("contain").limit(1))' + - ".limit(1)" + expect(normalize(template)).toBe( + normalize(` + g.E() + .project("route", "contain") + .by(V().bothE("route").limit(1)) + .by(V().bothE("contain").limit(1)) + .limit(1) + `) ); }); }); diff --git a/packages/graph-explorer/src/connector/gremlin/templates/edgesSchemaTemplate.ts b/packages/graph-explorer/src/connector/gremlin/templates/edgesSchemaTemplate.ts index 41fd88f36..dd684635a 100644 --- a/packages/graph-explorer/src/connector/gremlin/templates/edgesSchemaTemplate.ts +++ b/packages/graph-explorer/src/connector/gremlin/templates/edgesSchemaTemplate.ts @@ -1,3 +1,4 @@ +import dedent from "dedent"; import { uniq } from "lodash"; /** @@ -14,7 +15,15 @@ import { uniq } from "lodash"; * .limit(1) */ export default function edgesSchemaTemplate({ types }: { types: string[] }) { - const labels = uniq(types.flatMap(type => type.split("::"))); + // Labels with quotes + const labels = uniq(types.flatMap(type => type.split("::"))).map( + label => `"${label}"` + ); - return `g.E().project(${labels.map(l => `"${l}"`).join(",")})${labels.map(l => `.by(V().bothE("${l}").limit(1))`).join("")}.limit(1)`; + return dedent` + g.E() + .project(${labels.join(", ")}) + ${labels.map(label => `.by(V().bothE(${label}).limit(1))`).join("\n ")} + .limit(1) + `; } diff --git a/packages/graph-explorer/src/connector/gremlin/templates/verticesSchemaTemplate.test.ts b/packages/graph-explorer/src/connector/gremlin/templates/verticesSchemaTemplate.test.ts index f5eae5adb..e7718e00a 100644 --- a/packages/graph-explorer/src/connector/gremlin/templates/verticesSchemaTemplate.test.ts +++ b/packages/graph-explorer/src/connector/gremlin/templates/verticesSchemaTemplate.test.ts @@ -7,10 +7,14 @@ describe("Gremlin > verticesSchemaTemplate", () => { expect(normalize(template)).toBe( normalize(` - g.V().project("airport","country") - .by(V().hasLabel("airport").limit(1)) - .by(V().hasLabel("country").limit(1)) - .limit(1) + g.V().union( + __.hasLabel("airport").limit(1), + __.hasLabel("country").limit(1) + ) + .fold() + .project("airport", "country") + .by(unfold().hasLabel("airport")) + .by(unfold().hasLabel("country")) `) ); }); diff --git a/packages/graph-explorer/src/connector/gremlin/templates/verticesSchemaTemplate.ts b/packages/graph-explorer/src/connector/gremlin/templates/verticesSchemaTemplate.ts index 18621b188..e10b583a8 100644 --- a/packages/graph-explorer/src/connector/gremlin/templates/verticesSchemaTemplate.ts +++ b/packages/graph-explorer/src/connector/gremlin/templates/verticesSchemaTemplate.ts @@ -1,3 +1,4 @@ +import dedent from "dedent"; import { uniq } from "lodash"; /** @@ -5,16 +6,35 @@ import { uniq } from "lodash"; * one sample of each node label. * * @example - * types = ["route", "contain"] + * types = ["airport", "country"] * * g.V() - * .project("airport","country") - * .by(V().hasLabel("airport").limit(1)) - * .by(V().hasLabel("country").limit(1)) - * .limit(1) + * .union( + * __.hasLabel("airport").limit(1), + * __.hasLabel("country").limit(1) + * ) + * .fold() + * .project( + * "airport", "country" + * ) + * .by(unfold().hasLabel("airport")) + * .by(unfold().hasLabel("country")) */ export default function verticesSchemaTemplate({ types }: { types: string[] }) { - const labels = uniq(types.flatMap(type => type.split("::"))); + // Labels with quotes + const labels = uniq(types.flatMap(type => type.split("::"))).map( + label => `"${label}"` + ); - return `g.V().project(${labels.map(l => `"${l}"`).join(",")})${labels.map(l => `.by(V().hasLabel("${l}").limit(1))`).join("")}.limit(1)`; + return dedent` + g.V() + .union( + ${labels.map(label => `__.hasLabel(${label}).limit(1)`).join(",\n ")} + ) + .fold() + .project( + ${labels.join(",\n ")} + ) + ${labels.map(label => `.by(unfold().hasLabel(${label}))`).join("\n ")} + `; } diff --git a/packages/graph-explorer/src/connector/testUtils/globalMockFetch.ts b/packages/graph-explorer/src/connector/testUtils/globalMockFetch.ts index 95a1e6dc3..a83f3635c 100644 --- a/packages/graph-explorer/src/connector/testUtils/globalMockFetch.ts +++ b/packages/graph-explorer/src/connector/testUtils/globalMockFetch.ts @@ -2,9 +2,9 @@ import { shortHash } from "./shortHash"; const GREMLIN = "../gremlin/queries/__mock"; const RESPONSES_FILES_MAP: Record = { - "3e5ee5ec": `${GREMLIN}/vertices-schema.json`, + "6281d1a5": `${GREMLIN}/vertices-schema.json`, "186857e1": `${GREMLIN}/vertices-labels-and-counts.json`, - "5766be04": `${GREMLIN}/edges-schema.json`, + "2c38e2dd": `${GREMLIN}/edges-schema.json`, "7062d2e": `${GREMLIN}/edges-labels-and-counts.json`, "35be2501": `${GREMLIN}/should-return-1-random-node.json`, "54fa1494": `${GREMLIN}/should-return-airports-whose-code-matches-with-SFA.json`, @@ -23,7 +23,7 @@ const globalMockFetch = () => { const filePath = RESPONSES_FILES_MAP[key]; if (!filePath) { throw new Error( - `Failed to find a response file in the map for key '${key}'`, + `Failed to find a response file in the map for key '${key}' and URL '${url}'`, { cause: { url } } ); } diff --git a/packages/graph-explorer/src/setupTests.ts b/packages/graph-explorer/src/setupTests.ts index 0efba028d..db53deb1f 100644 --- a/packages/graph-explorer/src/setupTests.ts +++ b/packages/graph-explorer/src/setupTests.ts @@ -12,8 +12,10 @@ import "@testing-library/jest-dom/extend-expect"; // Mock the env module jest.mock("./utils/env", () => { return { - DEV: true, - PROD: false, + env: { + DEV: true, + PROD: false, + }, }; }); diff --git a/packages/graph-explorer/src/utils/index.ts b/packages/graph-explorer/src/utils/index.ts index dd06aea1e..4ef350a43 100644 --- a/packages/graph-explorer/src/utils/index.ts +++ b/packages/graph-explorer/src/utils/index.ts @@ -7,5 +7,6 @@ export { default as useClickOutside } from "./useClickOutside"; export { default as sanitizeText } from "./sanitizeText"; export { DEFAULT_SERVICE_TYPE } from "./constants"; export { default as escapeString } from "./escapeString"; +export { default as logger } from "./logger"; export * from "./set"; export * from "./env"; diff --git a/packages/graph-explorer/src/utils/logger.ts b/packages/graph-explorer/src/utils/logger.ts new file mode 100644 index 000000000..e99d31835 --- /dev/null +++ b/packages/graph-explorer/src/utils/logger.ts @@ -0,0 +1,29 @@ +/* eslint-disable no-console */ + +import { env } from "./env"; + +/* +# DEV NOTE + +This is a simple logging utility that will allow `console.log` calls any time +`env.DEV === true`. This will be useful for local development and debugging. + +I can imagine a future where this logger has some additional functionality where +it can send errors to the server and maybe allow the use to enable debug logging +at runtime. +*/ + +export default { + /** Calls `console.log` if the app is running in DEV mode. */ + log(message?: any, ...optionalParams: any[]) { + env.DEV && console.log(message, optionalParams); + }, + /** Calls `console.warn`. */ + warn(message?: any, ...optionalParams: any[]) { + console.warn(message, optionalParams); + }, + /** Calls `console.error`. */ + error(message?: any, ...optionalParams: any[]) { + console.error(message, optionalParams); + }, +};