Skip to content

Commit dcdb85a

Browse files
committed
[MNG-7592] String deduplication in model building
1 parent 8b3e640 commit dcdb85a

File tree

3 files changed

+399
-1
lines changed

3 files changed

+399
-1
lines changed

api/maven-api-model/src/main/mdo/maven.mdo

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1354,7 +1354,7 @@
13541354
*/
13551355
public String getManagementKey() {
13561356
if (managementKey == null) {
1357-
managementKey = getGroupId() + ":" + getArtifactId() + ":" + getType() + (getClassifier() != null ? ":" + getClassifier() : "");
1357+
managementKey = (getGroupId() + ":" + getArtifactId() + ":" + getType() + (getClassifier() != null ? ":" + getClassifier() : "")).intern();
13581358
}
13591359
return managementKey;
13601360
}
Lines changed: 377 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,377 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
package org.apache.maven.model.pom;
20+
21+
import java.lang.reflect.Field;
22+
import java.lang.reflect.Modifier;
23+
import java.nio.file.Files;
24+
import java.nio.file.Path;
25+
import java.nio.file.Paths;
26+
import java.util.ArrayList;
27+
import java.util.HashMap;
28+
import java.util.HashSet;
29+
import java.util.List;
30+
import java.util.Map;
31+
import java.util.Set;
32+
import java.util.stream.Collectors;
33+
34+
import org.apache.maven.api.model.Model;
35+
import org.apache.maven.model.v4.MavenStaxReader;
36+
37+
/**
38+
* A utility class that analyzes Maven POM files to identify memory usage patterns and potential memory optimizations.
39+
* This analyzer focuses on identifying duplicate strings and their memory impact across different paths in the POM structure.
40+
*
41+
* <p>The analyzer processes POM files recursively, tracking string occurrences and their locations within the POM structure.
42+
* It can identify areas where string deduplication could provide significant memory savings.</p>
43+
*
44+
* <p>Usage example:</p>
45+
* <pre>
46+
* PomMemoryAnalyzer analyzer = new PomMemoryAnalyzer();
47+
* Model model = reader.read(Files.newInputStream(pomPath));
48+
* analyzer.analyzePom(model);
49+
* analyzer.printAnalysis();
50+
* </pre>
51+
*
52+
* <p>The analysis output includes:</p>
53+
* <ul>
54+
* <li>Total memory usage per POM path</li>
55+
* <li>Potential memory savings through string deduplication</li>
56+
* <li>Most frequent string values and their occurrence counts</li>
57+
* <li>Statistics grouped by POM element types</li>
58+
* </ul>
59+
*
60+
* <p>This tool is particularly useful for identifying memory optimization opportunities
61+
* in large Maven multi-module projects where POM files may contain significant
62+
* duplicate content.</p>
63+
*/
64+
public class PomMemoryAnalyzer {
65+
private final Map<String, Map<String, StringStats>> pathStats = new HashMap<>();
66+
private final Map<String, Integer> globalStringFrequency = new HashMap<>();
67+
private int totalPoms = 0;
68+
69+
public static void main(String[] args) throws Exception {
70+
if (args.length < 1) {
71+
System.out.println("Usage: PomMemoryAnalyzer <directory-with-poms>");
72+
System.exit(1);
73+
}
74+
75+
Path rootDir = Paths.get(args[0]);
76+
PomMemoryAnalyzer analyzer = new PomMemoryAnalyzer();
77+
MavenStaxReader reader = new MavenStaxReader();
78+
79+
// Find all pom.xml files, excluding those under src/ or target/
80+
Files.walk(rootDir)
81+
.filter(path -> path.getFileName().toString().equals("pom.xml"))
82+
.filter(path -> !containsSrcOrTarget(path))
83+
.forEach(pomPath -> {
84+
try {
85+
Model model = reader.read(Files.newInputStream(pomPath));
86+
analyzer.analyzePom(model);
87+
} catch (Exception e) {
88+
System.err.println("Error processing " + pomPath + ": " + e.getMessage());
89+
}
90+
});
91+
92+
// Print analysis
93+
analyzer.printAnalysis();
94+
}
95+
96+
private static boolean containsSrcOrTarget(Path pomPath) {
97+
Path parent = pomPath.getParent();
98+
while (parent != null && parent.getFileName() != null) {
99+
String dirName = parent.getFileName().toString();
100+
if (dirName.equals("src") || dirName.equals("target")) {
101+
return true;
102+
}
103+
parent = parent.getParent();
104+
}
105+
return false;
106+
}
107+
108+
public void analyzePom(Model model) {
109+
totalPoms++;
110+
Set<Object> visited = new HashSet<>();
111+
processModelNode(model, "/project", "project", visited);
112+
}
113+
114+
private void processModelNode(Object node, String currentPath, String elementName, Set<Object> visited) {
115+
if (node == null || !visited.add(node)) {
116+
return;
117+
}
118+
119+
Class<?> clazz = node.getClass();
120+
while (clazz != null && !clazz.equals(Object.class)) {
121+
for (Field field : clazz.getDeclaredFields()) {
122+
// Skip static fields and synthetic fields
123+
if (Modifier.isStatic(field.getModifiers()) || field.isSynthetic()) {
124+
continue;
125+
}
126+
127+
try {
128+
field.setAccessible(true);
129+
Object value = field.get(node);
130+
if (value == null) continue;
131+
132+
String fullPath = currentPath + "/" + field.getName();
133+
134+
if (value instanceof String) {
135+
String strValue = (String) value;
136+
recordString(fullPath, strValue);
137+
globalStringFrequency.merge(strValue, 1, Integer::sum);
138+
} else if (value instanceof List) {
139+
List<?> list = (List<?>) value;
140+
for (Object item : list) {
141+
if (item != null) {
142+
String itemName = getSingular(field.getName());
143+
processModelNode(item, fullPath + "/" + itemName, itemName, visited);
144+
}
145+
}
146+
} else if (value instanceof Map) {
147+
Map<?, ?> map = (Map<?, ?>) value;
148+
for (Map.Entry<?, ?> entry : map.entrySet()) {
149+
if (entry.getValue() != null) {
150+
processModelNode(
151+
entry.getValue(),
152+
fullPath + "/" + entry.getKey(),
153+
entry.getKey().toString(),
154+
visited);
155+
}
156+
}
157+
} else if (!value.getClass().isPrimitive()
158+
&& !value.getClass().getName().startsWith("java.")) {
159+
processModelNode(value, fullPath, field.getName(), visited);
160+
}
161+
} catch (Exception e) {
162+
// Skip inaccessible or problematic fields
163+
}
164+
}
165+
clazz = clazz.getSuperclass();
166+
}
167+
}
168+
169+
private String getSingular(String plural) {
170+
if (plural.endsWith("ies")) {
171+
return plural.substring(0, plural.length() - 3) + "y";
172+
}
173+
if (plural.endsWith("s")) {
174+
return plural.substring(0, plural.length() - 1);
175+
}
176+
return plural;
177+
}
178+
179+
private void recordString(String path, String value) {
180+
pathStats
181+
.computeIfAbsent(path, k -> new HashMap<>())
182+
.computeIfAbsent(value, k -> new StringStats())
183+
.recordOccurrence(value);
184+
}
185+
186+
public List<PathAnalysis> getPathAnalysisSorted() {
187+
List<PathAnalysis> analysis = new ArrayList<>();
188+
189+
for (Map.Entry<String, Map<String, StringStats>> entry : pathStats.entrySet()) {
190+
String path = entry.getKey();
191+
Map<String, StringStats> stats = entry.getValue();
192+
193+
long uniqueStrings = stats.size();
194+
long totalOccurrences = stats.values().stream()
195+
.mapToLong(StringStats::getOccurrences)
196+
.sum();
197+
long totalMemory = stats.entrySet().stream()
198+
.mapToLong(e -> e.getKey().length() * e.getValue().getOccurrences() * 2L)
199+
.sum();
200+
long potentialSavings = stats.entrySet().stream()
201+
.mapToLong(e -> e.getKey().length() * 2L * (e.getValue().getOccurrences() - 1))
202+
.sum();
203+
204+
analysis.add(new PathAnalysis(
205+
path,
206+
uniqueStrings,
207+
totalOccurrences,
208+
totalMemory,
209+
potentialSavings,
210+
(double) totalOccurrences / uniqueStrings,
211+
getMostFrequentValues(stats, 5)));
212+
}
213+
214+
analysis.sort((a, b) -> Long.compare(b.potentialSavings, a.potentialSavings));
215+
return analysis;
216+
}
217+
218+
private List<ValueFrequency> getMostFrequentValues(Map<String, StringStats> stats, int limit) {
219+
return stats.entrySet().stream()
220+
.map(e -> new ValueFrequency(e.getKey(), e.getValue().getOccurrences()))
221+
.sorted((a, b) -> Long.compare(b.frequency, a.frequency))
222+
.limit(limit)
223+
.collect(Collectors.toList());
224+
}
225+
226+
public void printAnalysis() {
227+
System.out.printf("Analyzed %d POMs%n%n", totalPoms);
228+
229+
// First, get all paths
230+
List<PathAnalysis> allPaths = getPathAnalysisSorted();
231+
232+
// Create groups based on the final path component
233+
Map<String, List<PathAnalysis>> groupedPaths = new HashMap<>();
234+
Map<String, Map<String, Long>> groupValueFrequencies = new HashMap<>();
235+
236+
for (PathAnalysis path : allPaths) {
237+
String finalComponent = path.path.substring(path.path.lastIndexOf('/') + 1);
238+
239+
// Add path to its group
240+
groupedPaths.computeIfAbsent(finalComponent, k -> new ArrayList<>()).add(path);
241+
242+
// Aggregate value frequencies for the group
243+
Map<String, Long> groupFreqs = groupValueFrequencies.computeIfAbsent(finalComponent, k -> new HashMap<>());
244+
for (ValueFrequency vf : path.mostFrequentValues) {
245+
groupFreqs.merge(vf.value, vf.frequency, Long::sum);
246+
}
247+
}
248+
249+
// Create final group analyses and sort them by total savings
250+
List<GroupAnalysis> sortedGroups = groupedPaths.entrySet().stream()
251+
.map(entry -> {
252+
String groupName = entry.getKey();
253+
List<PathAnalysis> paths = entry.getValue();
254+
Map<String, Long> valueFreqs = groupValueFrequencies.get(groupName);
255+
256+
long totalSavings =
257+
paths.stream().mapToLong(p -> p.potentialSavings).sum();
258+
long totalMemory =
259+
paths.stream().mapToLong(p -> p.totalMemory).sum();
260+
long totalUnique = valueFreqs.size();
261+
long totalOccurrences =
262+
valueFreqs.values().stream().mapToLong(l -> l).sum();
263+
264+
List<ValueFrequency> topValues = valueFreqs.entrySet().stream()
265+
.map(e -> new ValueFrequency(e.getKey(), e.getValue()))
266+
.sorted((a, b) -> Long.compare(b.frequency, a.frequency))
267+
.limit(5)
268+
.collect(Collectors.toList());
269+
270+
return new GroupAnalysis(
271+
groupName, paths, totalUnique, totalOccurrences, totalMemory, totalSavings, topValues);
272+
})
273+
.sorted((a, b) -> Long.compare(b.totalSavings, a.totalSavings))
274+
.collect(Collectors.toList());
275+
276+
// Print each group
277+
for (GroupAnalysis group : sortedGroups) {
278+
System.out.printf("%nPaths ending with '%s':%n", group.name);
279+
System.out.printf("Total potential savings: %dKB%n", group.totalSavings / 1024);
280+
System.out.printf("Total memory: %dKB%n", group.totalMemory / 1024);
281+
System.out.printf("Total unique values: %d%n", group.totalUnique);
282+
System.out.printf("Total occurrences: %d%n", group.totalOccurrences);
283+
System.out.printf("Duplication ratio: %.2f%n", (double) group.totalOccurrences / group.totalUnique);
284+
285+
System.out.println("\nMost frequent values across all paths:");
286+
for (ValueFrequency v : group.mostFrequentValues) {
287+
System.out.printf(" %-70s %d times%n", v.value, v.frequency);
288+
}
289+
290+
System.out.println("\nIndividual paths:");
291+
System.out.println("----------------------------------------");
292+
for (PathAnalysis path : group.paths.stream()
293+
.sorted((a, b) -> Long.compare(b.potentialSavings, a.potentialSavings))
294+
.collect(Collectors.toList())) {
295+
System.out.printf(
296+
"%-90s %6dKB %6dKB%n", path.path, path.totalMemory / 1024, path.potentialSavings / 1024);
297+
}
298+
System.out.println();
299+
}
300+
}
301+
302+
private static class GroupAnalysis {
303+
final String name;
304+
final List<PathAnalysis> paths;
305+
final long totalUnique;
306+
final long totalOccurrences;
307+
final long totalMemory;
308+
final long totalSavings;
309+
final List<ValueFrequency> mostFrequentValues;
310+
311+
GroupAnalysis(
312+
String name,
313+
List<PathAnalysis> paths,
314+
long totalUnique,
315+
long totalOccurrences,
316+
long totalMemory,
317+
long totalSavings,
318+
List<ValueFrequency> mostFrequentValues) {
319+
this.name = name;
320+
this.paths = paths;
321+
this.totalUnique = totalUnique;
322+
this.totalOccurrences = totalOccurrences;
323+
this.totalMemory = totalMemory;
324+
this.totalSavings = totalSavings;
325+
this.mostFrequentValues = mostFrequentValues;
326+
}
327+
}
328+
329+
private static class StringStats {
330+
private long occurrences = 0;
331+
332+
public void recordOccurrence(String value) {
333+
occurrences++;
334+
}
335+
336+
public long getOccurrences() {
337+
return occurrences;
338+
}
339+
}
340+
341+
public static class PathAnalysis {
342+
public final String path;
343+
public final long uniqueStrings;
344+
public final long totalOccurrences;
345+
public final long totalMemory;
346+
public final long potentialSavings;
347+
public final double duplicationRatio;
348+
public final List<ValueFrequency> mostFrequentValues;
349+
350+
public PathAnalysis(
351+
String path,
352+
long uniqueStrings,
353+
long totalOccurrences,
354+
long totalMemory,
355+
long potentialSavings,
356+
double duplicationRatio,
357+
List<ValueFrequency> mostFrequentValues) {
358+
this.path = path;
359+
this.uniqueStrings = uniqueStrings;
360+
this.totalOccurrences = totalOccurrences;
361+
this.totalMemory = totalMemory;
362+
this.potentialSavings = potentialSavings;
363+
this.duplicationRatio = duplicationRatio;
364+
this.mostFrequentValues = mostFrequentValues;
365+
}
366+
}
367+
368+
public static class ValueFrequency {
369+
public final String value;
370+
public final long frequency;
371+
372+
public ValueFrequency(String value, long frequency) {
373+
this.value = value;
374+
this.frequency = frequency;
375+
}
376+
}
377+
}

0 commit comments

Comments
 (0)