[edk2-devel] [PATCH] BaseTools: Library hashing fix and optimization for --hash feature

Christian Rodriguez posted 1 patch 9 weeks ago
Failed in applying to current master (apply log)
BaseTools/Source/Python/AutoGen/AutoGen.py   | 52 +++++++++++++++++++++++++++++++++++++++-------------
BaseTools/Source/Python/Common/GlobalData.py |  6 ++++++
BaseTools/Source/Python/build/build.py       |  7 ++++++-
3 files changed, 51 insertions(+), 14 deletions(-)

[edk2-devel] [PATCH] BaseTools: Library hashing fix and optimization for --hash feature

Posted by Christian Rodriguez 9 weeks ago
BZ: https://bugzilla.tianocore.org/show_bug.cgi?id=1788
Library hashing is now supported by the --hash feature. The --hash
feature implementation assumed that the hashing could be done in
place once per module, but that isn't true for libraries due to the
fact that they are built as dependencies. So on a clean build, we now
generate the .hash after the library dependencies are complete.
Added early escape as optimization, if hash already exists in memory.

Signed-off-by: Christian Rodriguez <christian.rodriguez@intel.com>
Cc: Bob Feng <bob.c.feng@intel.com>
Cc: Liming Gao <liming.gao@intel.com>
Cc: Yonghong Zhu <yonghong.zhu@intel.com>
---
 BaseTools/Source/Python/AutoGen/AutoGen.py   | 52 +++++++++++++++++++++++++++++++++++++++-------------
 BaseTools/Source/Python/Common/GlobalData.py |  6 ++++++
 BaseTools/Source/Python/build/build.py       |  7 ++++++-
 3 files changed, 51 insertions(+), 14 deletions(-)

diff --git a/BaseTools/Source/Python/AutoGen/AutoGen.py b/BaseTools/Source/Python/AutoGen/AutoGen.py
index 31721a6f9f..cf92d07be1 100644
--- a/BaseTools/Source/Python/AutoGen/AutoGen.py
+++ b/BaseTools/Source/Python/AutoGen/AutoGen.py
@@ -4093,13 +4093,20 @@ class ModuleAutoGen(AutoGen):
         return RetVal
 
     def GenModuleHash(self):
+        # Initialize a dictionary for each arch type
         if self.Arch not in GlobalData.gModuleHash:
             GlobalData.gModuleHash[self.Arch] = {}
-        if self.Name in GlobalData.gModuleHash[self.Arch] and GlobalData.gBinCacheSource and self.AttemptModuleCacheCopy():
-            return False
+
+        # Early exit if module or library has been hashed and is in memory
+        if self.Name in GlobalData.gModuleHash[self.Arch]:
+            return GlobalData.gModuleHash[self.Arch][self.Name].encode('utf-8')
+
+        # Initialze hash object
         m = hashlib.md5()
+
         # Add Platform level hash
         m.update(GlobalData.gPlatformHash.encode('utf-8'))
+
         # Add Package level hash
         if self.DependentPackageList:
             for Pkg in sorted(self.DependentPackageList, key=lambda x: x.PackageName):
@@ -4118,6 +4125,7 @@ class ModuleAutoGen(AutoGen):
         Content = f.read()
         f.close()
         m.update(Content)
+
         # Add Module's source files
         if self.SourceFileList:
             for File in sorted(self.SourceFileList, key=lambda x: str(x)):
@@ -4126,27 +4134,45 @@ class ModuleAutoGen(AutoGen):
                 f.close()
                 m.update(Content)
 
-        ModuleHashFile = path.join(self.BuildDir, self.Name + ".hash")
-        if self.Name not in GlobalData.gModuleHash[self.Arch]:
-            GlobalData.gModuleHash[self.Arch][self.Name] = m.hexdigest()
-        if GlobalData.gBinCacheSource and self.AttemptModuleCacheCopy():
-            return False
-        return SaveFileOnChange(ModuleHashFile, m.hexdigest(), False)
+        GlobalData.gModuleHash[self.Arch][self.Name] = m.hexdigest()
+
+        return GlobalData.gModuleHash[self.Arch][self.Name].encode('utf-8')
 
     ## Decide whether we can skip the ModuleAutoGen process
     def CanSkipbyHash(self):
+        # Hashing feature is off
+        if not GlobalData.gUseHashCache:
+            return False
+
+        # Initialize a dictionary for each arch type
+        if self.Arch not in GlobalData.gBuildHashSkipTracking:
+            GlobalData.gBuildHashSkipTracking[self.Arch] = dict()
+
         # If library or Module is binary do not skip by hash
         if self.IsBinaryModule:
             return False
+
         # .inc is contains binary information so do not skip by hash as well
         for f_ext in self.SourceFileList:
             if '.inc' in str(f_ext):
                 return False
-        if GlobalData.gUseHashCache:
-            # If there is a valid hash or function generated a valid hash; function will return False
-            # and the statement below will return True
-            return not self.GenModuleHash()
-        return False
+
+        # Use Cache, if exists and if Module has a copy in cache
+        if GlobalData.gBinCacheSource and self.AttemptModuleCacheCopy():
+            return True
+
+        # Early exit for libraries that haven't yet finished building
+        HashFile = path.join(self.BuildDir, self.Name + ".hash")
+        if self.IsLibrary and not os.path.exists(HashFile):
+            return False
+
+        # Return a Boolean based on if can skip by hash, either from memory or from IO.
+        if self.Name not in GlobalData.gBuildHashSkipTracking[self.Arch]:
+            # If hashes are the same, SaveFileOnChange() will return False.
+            GlobalData.gBuildHashSkipTracking[self.Arch][self.Name] = not SaveFileOnChange(HashFile, self.GenModuleHash(), False)
+            return GlobalData.gBuildHashSkipTracking[self.Arch][self.Name]
+        else:
+            return GlobalData.gBuildHashSkipTracking[self.Arch][self.Name]
 
     ## Decide whether we can skip the ModuleAutoGen process
     #  If any source file is newer than the module than we cannot skip
diff --git a/BaseTools/Source/Python/Common/GlobalData.py b/BaseTools/Source/Python/Common/GlobalData.py
index 79f23c892d..95e28a988f 100644
--- a/BaseTools/Source/Python/Common/GlobalData.py
+++ b/BaseTools/Source/Python/Common/GlobalData.py
@@ -112,3 +112,9 @@ gSikpAutoGenCache = set()
 # Dictionary for tracking Module build status as success or failure
 # False -> Fail : True -> Success
 gModuleBuildTracking = dict()
+
+# Dictionary of booleans that dictate whether a module or
+# library can be skiped
+# Top Dict:     Key: Arch Type              Value: Dictionary
+# Second Dict:  Key: Module\Library Name    Value: True\False
+gBuildHashSkipTracking = dict()
diff --git a/BaseTools/Source/Python/build/build.py b/BaseTools/Source/Python/build/build.py
index 7271570d29..97a4dc8ee2 100644
--- a/BaseTools/Source/Python/build/build.py
+++ b/BaseTools/Source/Python/build/build.py
@@ -593,7 +593,7 @@ class BuildTask:
     #
     def AddDependency(self, Dependency):
         for Dep in Dependency:
-            if not Dep.BuildObject.IsBinaryModule:
+            if not Dep.BuildObject.IsBinaryModule and not Dep.BuildObject.CanSkipbyHash():
                 self.DependencyList.append(BuildTask.New(Dep))    # BuildTask list
 
     ## The thread wrapper of LaunchCommand function
@@ -605,6 +605,11 @@ class BuildTask:
         try:
             self.BuildItem.BuildObject.BuildTime = LaunchCommand(Command, WorkingDir)
             self.CompleteFlag = True
+
+            # Run hash operation post dependency, to account for libs
+            if GlobalData.gUseHashCache and self.BuildItem.BuildObject.IsLibrary:
+                HashFile = path.join(self.BuildItem.BuildObject.BuildDir, self.BuildItem.BuildObject.Name + ".hash")
+                SaveFileOnChange(HashFile, self.BuildItem.BuildObject.GenModuleHash(), False)
         except:
             #
             # TRICK: hide the output of threads left running, so that the user can
-- 
2.19.1.windows.1


-=-=-=-=-=-=-=-=-=-=-=-
Groups.io Links: You receive all messages sent to this group.

View/Reply Online (#40613): https://edk2.groups.io/g/devel/message/40613
Mute This Topic: https://groups.io/mt/31621303/1787277
Group Owner: devel+owner@edk2.groups.io
Unsubscribe: https://edk2.groups.io/g/devel/unsub  [importer@patchew.org]
-=-=-=-=-=-=-=-=-=-=-=-

Re: [edk2-devel] [PATCH] BaseTools: Library hashing fix and optimization for --hash feature

Posted by Bob Feng 9 weeks ago
Hi Christian,

+        # Return a Boolean based on if can skip by hash, either from memory or from IO.
+        if self.Name not in GlobalData.gBuildHashSkipTracking[self.Arch]:
+            # If hashes are the same, SaveFileOnChange() will return False.
+            GlobalData.gBuildHashSkipTracking[self.Arch][self.Name] = not SaveFileOnChange(HashFile, self.GenModuleHash(), False)
+            return GlobalData.gBuildHashSkipTracking[self.Arch][self.Name]
+        else:
+            return


For SaveFileOnChange(HashFile, self.GenModuleHash(), False), the third parameter False should be True since the self.GenModuleHash() return byte types not str.

Thanks,
Bob


-----Original Message-----
From: Rodriguez, Christian 
Sent: Wednesday, May 15, 2019 1:56 AM
To: devel@edk2.groups.io
Cc: Feng, Bob C <bob.c.feng@intel.com>; Gao, Liming <liming.gao@intel.com>; Zhu, Yonghong <yonghong.zhu@intel.com>
Subject: [PATCH] BaseTools: Library hashing fix and optimization for --hash feature

BZ: https://bugzilla.tianocore.org/show_bug.cgi?id=1788
Library hashing is now supported by the --hash feature. The --hash feature implementation assumed that the hashing could be done in place once per module, but that isn't true for libraries due to the fact that they are built as dependencies. So on a clean build, we now generate the .hash after the library dependencies are complete.
Added early escape as optimization, if hash already exists in memory.

Signed-off-by: Christian Rodriguez <christian.rodriguez@intel.com>
Cc: Bob Feng <bob.c.feng@intel.com>
Cc: Liming Gao <liming.gao@intel.com>
Cc: Yonghong Zhu <yonghong.zhu@intel.com>
---
 BaseTools/Source/Python/AutoGen/AutoGen.py   | 52 +++++++++++++++++++++++++++++++++++++++-------------
 BaseTools/Source/Python/Common/GlobalData.py |  6 ++++++
 BaseTools/Source/Python/build/build.py       |  7 ++++++-
 3 files changed, 51 insertions(+), 14 deletions(-)

diff --git a/BaseTools/Source/Python/AutoGen/AutoGen.py b/BaseTools/Source/Python/AutoGen/AutoGen.py
index 31721a6f9f..cf92d07be1 100644
--- a/BaseTools/Source/Python/AutoGen/AutoGen.py
+++ b/BaseTools/Source/Python/AutoGen/AutoGen.py
@@ -4093,13 +4093,20 @@ class ModuleAutoGen(AutoGen):
         return RetVal
 
     def GenModuleHash(self):
+        # Initialize a dictionary for each arch type
         if self.Arch not in GlobalData.gModuleHash:
             GlobalData.gModuleHash[self.Arch] = {}
-        if self.Name in GlobalData.gModuleHash[self.Arch] and GlobalData.gBinCacheSource and self.AttemptModuleCacheCopy():
-            return False
+
+        # Early exit if module or library has been hashed and is in memory
+        if self.Name in GlobalData.gModuleHash[self.Arch]:
+            return 
+ GlobalData.gModuleHash[self.Arch][self.Name].encode('utf-8')
+
+        # Initialze hash object
         m = hashlib.md5()
+
         # Add Platform level hash
         m.update(GlobalData.gPlatformHash.encode('utf-8'))
+
         # Add Package level hash
         if self.DependentPackageList:
             for Pkg in sorted(self.DependentPackageList, key=lambda x: x.PackageName):
@@ -4118,6 +4125,7 @@ class ModuleAutoGen(AutoGen):
         Content = f.read()
         f.close()
         m.update(Content)
+
         # Add Module's source files
         if self.SourceFileList:
             for File in sorted(self.SourceFileList, key=lambda x: str(x)):
@@ -4126,27 +4134,45 @@ class ModuleAutoGen(AutoGen):
                 f.close()
                 m.update(Content)
 
-        ModuleHashFile = path.join(self.BuildDir, self.Name + ".hash")
-        if self.Name not in GlobalData.gModuleHash[self.Arch]:
-            GlobalData.gModuleHash[self.Arch][self.Name] = m.hexdigest()
-        if GlobalData.gBinCacheSource and self.AttemptModuleCacheCopy():
-            return False
-        return SaveFileOnChange(ModuleHashFile, m.hexdigest(), False)
+        GlobalData.gModuleHash[self.Arch][self.Name] = m.hexdigest()
+
+        return 
+ GlobalData.gModuleHash[self.Arch][self.Name].encode('utf-8')
 
     ## Decide whether we can skip the ModuleAutoGen process
     def CanSkipbyHash(self):
+        # Hashing feature is off
+        if not GlobalData.gUseHashCache:
+            return False
+
+        # Initialize a dictionary for each arch type
+        if self.Arch not in GlobalData.gBuildHashSkipTracking:
+            GlobalData.gBuildHashSkipTracking[self.Arch] = dict()
+
         # If library or Module is binary do not skip by hash
         if self.IsBinaryModule:
             return False
+
         # .inc is contains binary information so do not skip by hash as well
         for f_ext in self.SourceFileList:
             if '.inc' in str(f_ext):
                 return False
-        if GlobalData.gUseHashCache:
-            # If there is a valid hash or function generated a valid hash; function will return False
-            # and the statement below will return True
-            return not self.GenModuleHash()
-        return False
+
+        # Use Cache, if exists and if Module has a copy in cache
+        if GlobalData.gBinCacheSource and self.AttemptModuleCacheCopy():
+            return True
+
+        # Early exit for libraries that haven't yet finished building
+        HashFile = path.join(self.BuildDir, self.Name + ".hash")
+        if self.IsLibrary and not os.path.exists(HashFile):
+            return False
+
+        # Return a Boolean based on if can skip by hash, either from memory or from IO.
+        if self.Name not in GlobalData.gBuildHashSkipTracking[self.Arch]:
+            # If hashes are the same, SaveFileOnChange() will return False.
+            GlobalData.gBuildHashSkipTracking[self.Arch][self.Name] = not SaveFileOnChange(HashFile, self.GenModuleHash(), False)
+            return GlobalData.gBuildHashSkipTracking[self.Arch][self.Name]
+        else:
+            return 
+ GlobalData.gBuildHashSkipTracking[self.Arch][self.Name]
 
     ## Decide whether we can skip the ModuleAutoGen process
     #  If any source file is newer than the module than we cannot skip diff --git a/BaseTools/Source/Python/Common/GlobalData.py b/BaseTools/Source/Python/Common/GlobalData.py
index 79f23c892d..95e28a988f 100644
--- a/BaseTools/Source/Python/Common/GlobalData.py
+++ b/BaseTools/Source/Python/Common/GlobalData.py
@@ -112,3 +112,9 @@ gSikpAutoGenCache = set()  # Dictionary for tracking Module build status as success or failure  # False -> Fail : True -> Success  gModuleBuildTracking = dict()
+
+# Dictionary of booleans that dictate whether a module or # library can 
+be skiped
+# Top Dict:     Key: Arch Type              Value: Dictionary
+# Second Dict:  Key: Module\Library Name    Value: True\False
+gBuildHashSkipTracking = dict()
diff --git a/BaseTools/Source/Python/build/build.py b/BaseTools/Source/Python/build/build.py
index 7271570d29..97a4dc8ee2 100644
--- a/BaseTools/Source/Python/build/build.py
+++ b/BaseTools/Source/Python/build/build.py
@@ -593,7 +593,7 @@ class BuildTask:
     #
     def AddDependency(self, Dependency):
         for Dep in Dependency:
-            if not Dep.BuildObject.IsBinaryModule:
+            if not Dep.BuildObject.IsBinaryModule and not Dep.BuildObject.CanSkipbyHash():
                 self.DependencyList.append(BuildTask.New(Dep))    # BuildTask list
 
     ## The thread wrapper of LaunchCommand function @@ -605,6 +605,11 @@ class BuildTask:
         try:
             self.BuildItem.BuildObject.BuildTime = LaunchCommand(Command, WorkingDir)
             self.CompleteFlag = True
+
+            # Run hash operation post dependency, to account for libs
+            if GlobalData.gUseHashCache and self.BuildItem.BuildObject.IsLibrary:
+                HashFile = path.join(self.BuildItem.BuildObject.BuildDir, self.BuildItem.BuildObject.Name + ".hash")
+                SaveFileOnChange(HashFile, 
+ self.BuildItem.BuildObject.GenModuleHash(), False)
         except:
             #
             # TRICK: hide the output of threads left running, so that the user can
--
2.19.1.windows.1


-=-=-=-=-=-=-=-=-=-=-=-
Groups.io Links: You receive all messages sent to this group.

View/Reply Online (#40783): https://edk2.groups.io/g/devel/message/40783
Mute This Topic: https://groups.io/mt/31621303/1787277
Group Owner: devel+owner@edk2.groups.io
Unsubscribe: https://edk2.groups.io/g/devel/unsub  [importer@patchew.org]
-=-=-=-=-=-=-=-=-=-=-=-