Skip to content

Commit 4c9d145

Browse files
[modulevariable] Replace Module Block Structure with Module Variable Steps
* Reverts #3750's module block structure in Python and performs more on-the-fly interprocedural usage resolution * Adds `MODULE` modifier on module-defining methods in JS and Ruby (Python already has this). Note: Other frontends don't seem to have a module-defining method, though I may be wrong. * Transferred import resolution query steps and classes to `semanticcpg` under `importresolver` to perform on-the-fly CPG entity retrieval of resolved imports. * Modified existing `modulevariable` query steps to now reflect `LOCAL` as the base of the `ModuleVariable` node extension and created a bunch of queries around that. * Modified `SourcesToStartingPoints` to handle module variables similarly to fields for interprocedural flows.
1 parent 3ab8fe0 commit 4c9d145

File tree

37 files changed

+652
-381
lines changed

37 files changed

+652
-381
lines changed

dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/package.scala

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,8 @@ import io.shiftleft.semanticcpg.language.*
55

66
package object dataflowengineoss {
77

8-
def globalFromLiteral(lit: Literal): Iterator[Expression] = lit.start
9-
.where(_.inAssignment.method.nameExact("<module>", ":package"))
10-
.inAssignment
8+
def globalFromLiteral(lit: Literal): Iterator[Expression] = lit.start.inAssignment
9+
.where(_.method.isModule)
1110
.argument(1)
1211

1312
def identifierToFirstUsages(node: Identifier): List[Identifier] = node.refsTo.flatMap(identifiersFromCapturedScopes).l

dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/queryengine/SourcesToStartingPoints.scala

Lines changed: 68 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,10 @@ package io.joern.dataflowengineoss.queryengine
33
import io.joern.dataflowengineoss.globalFromLiteral
44
import io.joern.x2cpg.Defines
55
import io.shiftleft.codepropertygraph.Cpg
6-
import io.shiftleft.codepropertygraph.generated.Operators
7-
import io.shiftleft.codepropertygraph.generated.nodes._
8-
import io.shiftleft.semanticcpg.language._
6+
import io.shiftleft.codepropertygraph.generated.nodes.*
7+
import io.shiftleft.semanticcpg.language.*
8+
import io.shiftleft.semanticcpg.language.importresolver.{EvaluatedImport, ResolvedMember, ResolvedTypeDecl}
9+
import io.shiftleft.semanticcpg.language.operatorextension.OpNodes.Assignment
910
import io.shiftleft.semanticcpg.language.operatorextension.allAssignmentTypes
1011
import io.shiftleft.semanticcpg.utils.MemberAccess.isFieldAccess
1112
import org.slf4j.LoggerFactory
@@ -63,6 +64,16 @@ class SourceTravsToStartingPointsTask[NodeType](sourceTravs: IterableOnce[NodeTy
6364
class SourceToStartingPoints(src: StoredNode) extends RecursiveTask[List[CfgNode]] {
6465

6566
private val cpg = Cpg(src.graph())
67+
private lazy val memberToImportingModule: Map[Member, List[(String, Method)]] = cpg.call
68+
.where(_.method.isModule)
69+
.flatMap(x =>
70+
x.referencedImports
71+
.flatMap(extractAliasMemberPair)
72+
.map { case (alias, member) => member -> (alias, x.method) }
73+
)
74+
.groupBy(_._1)
75+
.map { case (member, xs) => member -> xs.map(_._2) }
76+
private val typeDeclToMembers = cpg.typeDecl.map { x => x.fullName -> x.member.l }.toMap
6677

6778
override def compute(): List[CfgNode] = sourceToStartingPoints(src)
6879

@@ -71,7 +82,12 @@ class SourceToStartingPoints(src: StoredNode) extends RecursiveTask[List[CfgNode
7182
case methodReturn: MethodReturn =>
7283
methodReturn.method.callIn.l
7384
case lit: Literal =>
74-
List(lit) ++ usages(targetsToClassIdentifierPair(literalToInitializedMembers(lit))) ++ globalFromLiteral(lit)
85+
val uses = usages(targetsToClassIdentifierPair(literalToInitializedMembers(lit)))
86+
val globals = globalFromLiteral(lit).flatMap {
87+
case x: Identifier if x.isModuleVariable => x +: moduleVarToUsages(x)
88+
case x => x :: Nil
89+
}.l
90+
lit :: (uses ++ globals)
7591
case member: Member =>
7692
usages(targetsToClassIdentifierPair(List(member)))
7793
case x: Declaration =>
@@ -91,17 +107,59 @@ class SourceToStartingPoints(src: StoredNode) extends RecursiveTask[List[CfgNode
91107

92108
private def withFieldAndIndexAccesses(nodes: List[CfgNode]): List[CfgNode] =
93109
nodes.flatMap {
94-
case identifier: Identifier =>
95-
List(identifier) ++ fieldAndIndexAccesses(identifier)
96-
case x => List(x)
110+
case moduleVar: Identifier if moduleVar.isModuleVariable => moduleVar :: moduleVarToUsages(moduleVar)
111+
case identifier: Identifier => identifier :: fieldAndIndexAccesses(identifier)
112+
case x => x :: Nil
97113
}
98114

99115
private def fieldAndIndexAccesses(identifier: Identifier): List[CfgNode] =
100116
identifier.method._identifierViaContainsOut
101117
.nameExact(identifier.name)
102118
.inCall
103119
.collect { case c if isFieldAccess(c.name) => c }
104-
.l
120+
.toList
121+
122+
private def extractAliasMemberPair(i: Import): Seq[(String, Member)] = {
123+
i.importedAs
124+
.map { alias =>
125+
i.call.tag
126+
.flatMap(EvaluatedImport.tagToEvaluatedImport)
127+
.flatMap {
128+
case ResolvedMember(basePath, memberName, _) =>
129+
cpg.typeDecl.fullNameExact(basePath).member.nameExact(memberName).map(m => alias -> m)
130+
case ResolvedTypeDecl(typeFullName, _) =>
131+
cpg.typeDecl.fullNameExact(typeFullName).member.map(m => alias -> m)
132+
case _ => Seq.empty
133+
}
134+
.toSeq
135+
}
136+
.getOrElse(Seq.empty)
137+
}
138+
139+
private def moduleVarToUsages(moduleVar: Identifier): List[CfgNode] = {
140+
typeDeclToMembers
141+
.getOrElse(moduleVar.method.fullName, List.empty)
142+
.nameExact(moduleVar.name)
143+
.flatMap(memberToImportingModule.get)
144+
.flatMap { xs =>
145+
xs.flatMap { case (alias, importingModule) =>
146+
val directAccess = importingModule
147+
.flatMap(_._identifierViaContainsOut.nameExact(alias))
148+
.sortBy(i => (i.lineNumber, i.columnNumber))
149+
.filterNot(notLeftHandOfAssignment)
150+
.headOption
151+
.toList
152+
val accessedAsFields = importingModule
153+
.flatMap(_.fieldAccess.where(_.fieldIdentifier.canonicalNameExact(alias))) // TODO: This does not check LHS
154+
.sortBy(i => (i.lineNumber, i.columnNumber))
155+
.filterNot(notLeftHandOfAssignment)
156+
.headOption
157+
.toList
158+
(directAccess ++ accessedAsFields).collectAll[CfgNode]
159+
}
160+
}
161+
.toList
162+
}
105163

106164
private def usages(pairs: List[(TypeDecl, AstNode)]): List[CfgNode] = {
107165
pairs.flatMap { case (typeDecl, astNode) =>
@@ -184,8 +242,8 @@ class SourceToStartingPoints(src: StoredNode) extends RecursiveTask[List[CfgNode
184242
// If a member shares the name of the identifier then we consider this as a member
185243
lit.method.typeDecl.member.name.toSet.contains(identifier.name) =>
186244
List(identifier)
187-
case call: Call if call.name == Operators.fieldAccess => call.ast.isFieldIdentifier.l
188-
case _ => List[Expression]()
245+
case call: Call if isFieldAccess(call.name) => call.ast.isFieldIdentifier.l
246+
case _ => List[Expression]()
189247
}
190248
.l
191249

joern-cli/frontends/jssrc2cpg/src/main/scala/io/joern/jssrc2cpg/astcreation/AstCreator.scala

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,9 @@ import io.joern.jssrc2cpg.parser.BabelJsonParser.ParseResult
77
import io.joern.jssrc2cpg.parser.BabelNodeInfo
88
import io.joern.jssrc2cpg.passes.Defines
99
import io.joern.x2cpg.datastructures.Stack.*
10-
import io.joern.x2cpg.utils.NodeBuilders.newMethodReturnNode
10+
import io.joern.x2cpg.utils.NodeBuilders.{newMethodReturnNode, newModifierNode}
1111
import io.joern.x2cpg.{Ast, AstCreatorBase, ValidationMode, AstNodeBuilder as X2CpgAstNodeBuilder}
12-
import io.shiftleft.codepropertygraph.generated.{EvaluationStrategies, NodeTypes}
12+
import io.shiftleft.codepropertygraph.generated.{EvaluationStrategies, ModifierTypes, NodeTypes}
1313
import io.shiftleft.codepropertygraph.generated.nodes.NewBlock
1414
import io.shiftleft.codepropertygraph.generated.nodes.NewFile
1515
import io.shiftleft.codepropertygraph.generated.nodes.NewMethod
@@ -124,7 +124,13 @@ class AstCreator(
124124
methodAstParentStack.pop()
125125

126126
functionTypeAndTypeDeclAst.withChild(
127-
methodAst(programMethod, List(Ast(thisParam)), blockAst(blockNode, methodChildren), methodReturn)
127+
methodAst(
128+
programMethod,
129+
Ast(thisParam) :: Nil,
130+
blockAst(blockNode, methodChildren),
131+
methodReturn,
132+
newModifierNode(ModifierTypes.MODULE) :: Nil
133+
)
128134
)
129135
}
130136

joern-cli/frontends/jssrc2cpg/src/main/scala/io/joern/jssrc2cpg/passes/JavaScriptTypeRecovery.scala

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@ private class JavaScriptTypeRecovery(cpg: Cpg, state: XTypeRecoveryState) extend
3333
private class RecoverForJavaScriptFile(cpg: Cpg, cu: File, builder: DiffGraphBuilder, state: XTypeRecoveryState)
3434
extends RecoverForXCompilationUnit[File](cpg, cu, builder, state) {
3535

36+
import io.joern.x2cpg.passes.frontend.XTypeRecovery.AllNodeTypesFromNodeExt
37+
3638
override protected val pathSep = ':'
3739

3840
/** A heuristic method to determine if a call is a constructor or not.

joern-cli/frontends/jssrc2cpg/src/test/scala/io/joern/jssrc2cpg/dataflow/DataflowTest.scala

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -557,7 +557,8 @@ class DataflowTest extends DataFlowCodeToCpgSuite {
557557

558558
val sink = cpg.call.nameExact("fn")
559559
val src = cpg.literal("47")
560-
sink.reachableBy(src).size shouldBe 1
560+
// Deduplicated as flows skip over certain lowerings in other variants of the flows but source-sink pairs are equal
561+
sink.reachableBy(src).dedup.size shouldBe 1
561562
}
562563

563564
"Flow into method defined as lambda and assigned to constant" in {
@@ -679,13 +680,13 @@ class DataflowTest extends DataFlowCodeToCpgSuite {
679680
"literal to captured closure" in {
680681
val literalSource = cpg.literal.codeExact("\"https://test-api-service.com\"").l
681682
literalSource.size shouldBe 1
682-
sink.reachableBy(literalSource).size shouldBe 1
683+
sink.reachableBy(literalSource).dedup.size shouldBe 1
683684
}
684685

685686
"identifiers to captured closure" in {
686687
val identifierSource = cpg.identifier.nameExact("API_Endpoint").lineNumber(5).l
687688
identifierSource.size shouldBe 1
688-
sink.reachableBy(identifierSource).size shouldBe 1
689+
sink.reachableBy(identifierSource).dedup.size shouldBe 1
689690
}
690691

691692
"identifiers in the arg of the call" in {

joern-cli/frontends/pysrc2cpg/src/main/scala/io/joern/pysrc2cpg/ImportResolverPass.scala

Whitespace-only changes.

joern-cli/frontends/pysrc2cpg/src/main/scala/io/joern/pysrc2cpg/PythonImportResolverPass.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
package io.joern.pysrc2cpg
22

33
import better.files.File
4-
import io.joern.x2cpg.passes.frontend.ImportsPass.*
54
import io.joern.x2cpg.passes.frontend.XImportResolverPass
65
import io.shiftleft.codepropertygraph.Cpg
76
import io.shiftleft.codepropertygraph.generated.nodes.*
87
import io.shiftleft.semanticcpg.language.*
8+
import io.shiftleft.semanticcpg.language.importresolver.*
99

1010
import java.io.File as JFile
1111
import java.util.regex.Matcher

joern-cli/frontends/pysrc2cpg/src/main/scala/io/joern/pysrc2cpg/PythonTypeRecovery.scala

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
package io.joern.pysrc2cpg
22

3-
import io.joern.x2cpg.passes.frontend._
3+
import io.joern.x2cpg.passes.frontend.*
44
import io.shiftleft.codepropertygraph.Cpg
5-
import io.shiftleft.codepropertygraph.generated.nodes._
5+
import io.shiftleft.codepropertygraph.generated.nodes.*
66
import io.shiftleft.codepropertygraph.generated.{Operators, PropertyNames}
7-
import io.shiftleft.semanticcpg.language._
7+
import io.shiftleft.semanticcpg.language.*
8+
import io.shiftleft.semanticcpg.language.importresolver.*
89
import io.shiftleft.semanticcpg.language.operatorextension.OpNodes
910
import io.shiftleft.semanticcpg.language.operatorextension.OpNodes.FieldAccess
1011
import overflowdb.BatchedUpdate.DiffGraphBuilder
@@ -48,7 +49,6 @@ private class RecoverForPythonFile(cpg: Cpg, cu: File, builder: DiffGraphBuilder
4849

4950
override def visitImport(i: Import): Unit = {
5051
if (i.importedAs.isDefined && i.importedEntity.isDefined) {
51-
import io.joern.x2cpg.passes.frontend.ImportsPass.*
5252

5353
val entityName = i.importedAs.get
5454
i.call.tag.flatMap(EvaluatedImport.tagToEvaluatedImport).foreach {
@@ -184,7 +184,8 @@ private class RecoverForPythonFile(cpg: Cpg, cu: File, builder: DiffGraphBuilder
184184
}
185185
}
186186

187-
override protected def postSetTypeInformation(): Unit =
187+
override protected def postSetTypeInformation(): Unit = {
188+
super.postSetTypeInformation()
188189
cu.typeDecl
189190
.map(t => t -> t.inheritsFromTypeFullName.partition(itf => symbolTable.contains(LocalVar(itf))))
190191
.foreach { case (t, (identifierTypes, otherTypes)) =>
@@ -195,6 +196,7 @@ private class RecoverForPythonFile(cpg: Cpg, cu: File, builder: DiffGraphBuilder
195196
builder.setNodeProperty(t, PropertyNames.INHERITS_FROM_TYPE_FULL_NAME, resolvedTypes)
196197
}
197198
}
199+
}
198200

199201
override def prepopulateSymbolTable(): Unit = {
200202
cu.ast.isMethodRef.where(_.astSiblings.isIdentifier.nameExact("classmethod")).referencedMethod.foreach {
@@ -218,4 +220,14 @@ private class RecoverForPythonFile(cpg: Cpg, cu: File, builder: DiffGraphBuilder
218220
.headOption
219221
.getOrElse(super.visitIdentifierAssignedToTypeRef(i, t, rec))
220222

223+
override protected def handlePotentialFunctionPointer(
224+
funcPtr: Expression,
225+
baseTypes: Set[String],
226+
funcName: String,
227+
baseName: Option[String]
228+
): Unit = {
229+
if (funcName != "<module>")
230+
super.handlePotentialFunctionPointer(funcPtr, baseTypes, funcName, baseName)
231+
}
232+
221233
}

joern-cli/frontends/pysrc2cpg/src/test/scala/io/joern/pysrc2cpg/dataflow/DataFlowTests.scala

Lines changed: 45 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,7 @@ import io.joern.dataflowengineoss.semanticsloader.FlowSemantic
55
import io.joern.pysrc2cpg.PySrc2CpgFixture
66
import io.shiftleft.codepropertygraph.Cpg
77
import io.shiftleft.codepropertygraph.generated.nodes.{Literal, Member, Method}
8-
import io.shiftleft.semanticcpg.language._
9-
import org.scalatest.Ignore
8+
import io.shiftleft.semanticcpg.language.*
109

1110
import java.io.File
1211

@@ -589,4 +588,48 @@ class RegexDefinedFlowsDataFlowTests
589588
flows.size shouldBe 2
590589
}
591590

591+
"flow across interprocedural module variables" should {
592+
593+
"handle simple import and field-based usage of a literal" in {
594+
val cpg: Cpg = code(
595+
"""
596+
|a = 42
597+
|""".stripMargin,
598+
"foo.py"
599+
)
600+
.moreCode(
601+
"""
602+
|import foo
603+
|
604+
|print(foo.a)
605+
|""".stripMargin,
606+
"bar.py"
607+
)
608+
val source = cpg.literal("42").l
609+
val sink = cpg.call.name("print.*").l
610+
sink.reachableByFlows(source).size shouldBe 1
611+
}
612+
613+
"handle simple import and aliased usage of a literal" in {
614+
val cpg: Cpg = code(
615+
"""
616+
|a = 42
617+
|""".stripMargin,
618+
"foo.py"
619+
)
620+
.moreCode(
621+
"""
622+
|from foo import a as b
623+
|
624+
|print(b)
625+
|""".stripMargin,
626+
"bar.py"
627+
)
628+
val source = cpg.literal("42").l
629+
val sink = cpg.call.name("print.*").l
630+
sink.reachableByFlows(source).size shouldBe 1
631+
}
632+
633+
}
634+
592635
}

0 commit comments

Comments
 (0)