
// Textures:
// 	A diffuse map at tex_0
// 	A tangent space normal map at tex_1.rgb
// 	A bump map as tex_1.a

// Vertex data:
//	A.xyz is the U Axis without normalization
// 	A.w is the maximum depth for where tex_1's alpha channel is black
//	B.xyz is the V Axis without normalization

// You may modify this to move more data from the vertex shader to the pixel shader.
// It needs at least the position to work.
struct VS_to_PS {
	float4 Pos : SV_POSITION;
	float4 Tex : TEXCOORD0;
	float4 Color : COLOR;
	float3 Normal_WorldSpace : NORMAL;
	float4 Pos_WorldSpace : TEXCOORD1;
	float4 Pos_CameraSpace : TEXCOORD2;
	float3 UAxis_WorldSpace : TEXCOORD3;
	float3 VAxis_WorldSpace : TEXCOORD4;
	float3 CUAxis_WorldSpace : TEXCOORD5;
	float3 CVAxis_WorldSpace : TEXCOORD6;
	float2 TangentLengths : TEXCOORD7;
	float Depth : TEXCOORD8;
};

//--------------------------------------------------------------------------------------
// Vertex Shader
//--------------------------------------------------------------------------------------
VS_to_PS VS( VS_structure input ) {
	VS_to_PS output = (VS_to_PS)0;
	
	// Convert vertice positions from object space to world space
	output.Pos_WorldSpace = mul( input.Pos, ObjectToWorld );
	// Convert vertice positions from world space to camera space
	output.Pos_CameraSpace = mul( output.Pos_WorldSpace, WorldToCamera );
	// Convert vertice positions from camera space to 2D projection
	output.Pos = mul( output.Pos_CameraSpace, CameraToImage );
	
	// Transform normals and tangent space from object space to world space
	output.Normal_WorldSpace = mul(input.Normal,(float3x3)ObjectToWorld);
	output.UAxis_WorldSpace = mul(input.A.xyz,(float3x3)ObjectToWorld);
	output.VAxis_WorldSpace = mul(input.B.xyz,(float3x3)ObjectToWorld);
	
	output.TangentLengths = float2(length(input.A.xyz),length(input.B.xyz));
	
	output.CUAxis_WorldSpace = normalize(cross(output.VAxis_WorldSpace,output.Normal_WorldSpace));
	if (dot(output.CUAxis_WorldSpace,output.UAxis_WorldSpace) < 0) {
		output.CUAxis_WorldSpace = -output.CUAxis_WorldSpace;
	}
	
	output.CVAxis_WorldSpace = normalize(cross(output.Normal_WorldSpace,output.UAxis_WorldSpace));
	if (dot(output.CVAxis_WorldSpace,output.VAxis_WorldSpace) < 0) {
		output.CVAxis_WorldSpace = -output.CVAxis_WorldSpace;
	}
	
	output.Depth = input.A.w;
	
	// Give the texture coordinates to the pixel shader
	output.Tex = input.Tex;
	
	// Give the vertex color multiplied with the instance color to the pixel shader
	output.Color = lerp(input.Color * InstanceColor, float4(0.0f,0.0f,1.0f,1.0f), input.Selected);
	
	return output;
}

#define BisectionSteps 8 // How many steps will be spent on the bisection method

#define MaxRealOffset 0.4f // Where should the offset start to become flat
#define MaxOffset 0.5f // What is the maximum amount of offset

#define MaxSteps 128 // This must be small enough to not trigger the operative system's timeout

#define LoopForward(min,var,max) for(var=min;var<=max;var++)
#define LoopBackward(min,var,max) for(var=max;var>=min;var--)

uint SafeModulo(in int X, in uint Y) {
	if (X >= 0) {
		return (uint)X % Y;
	} else {
		return (Y - (((uint)(-X)) % Y)) % Y;
	}
}

#define DepthFromX(X) (((float)(X) - StartF.x) / (EndF.x - StartF.x))
#define DepthFromY(Y) (((float)(Y) - StartF.y) / (EndF.y - StartF.y))

//--------------------------------------------------------------------------------------
// Pixel Shader
//--------------------------------------------------------------------------------------
float4 PS( VS_to_PS input) : SV_Target {
	int RemainingSteps;
	
	// If the camera has a world space cutting plane, clip on the negative side.
	UseCuttingPlane(input.Pos_WorldSpace)
	
	float3 RelativeCameraPosition = (CameraPos_WorldSpace - input.Pos_WorldSpace.xyz);
	
	float FrontOfNormal = max(0.001f,dot(normalize(input.Normal_WorldSpace),RelativeCameraPosition));
	
	float2 ParallaxTextureOffset = float2(
		dot(input.CUAxis_WorldSpace,RelativeCameraPosition) / input.TangentLengths.x,
		dot(input.CVAxis_WorldSpace,RelativeCameraPosition) / input.TangentLengths.y
		) * max(0.000001f,input.Depth) / -FrontOfNormal;
	
	// Get the length of the offset
	float OffsetLength = length(ParallaxTextureOffset);
	
	// Limit the length of the offset using a linear, quadratic and constant section
	if (OffsetLength < MaxRealOffset) {
		// Let the real length of the offset remain
		// OffsetLength = OffsetLength;
	} else if (OffsetLength > (MaxOffset * 2) - MaxRealOffset){
		// Limit to MaxOffset
		OffsetLength = MaxOffset;
	} else {
		// Fade from MaxRealOffset to MaxOffset using a quadratic curve
		OffsetLength = (OffsetLength - MaxRealOffset) - pow((OffsetLength - MaxRealOffset),2.0f) / ((MaxOffset - MaxRealOffset) * 4) + MaxRealOffset;
	}
	
	// Assign the limited length
	float2 LimitedOffset = normalize(ParallaxTextureOffset) * OffsetLength;
	
	// Get the dimensions of the bump map
	uint Width; uint Height; uint Levels;
	tex_1.GetDimensions(0,Width,Height,Levels);
	
	float2 StartUVCoordinate = input.Tex.xy;
	float2 GoalUVCoordinate = input.Tex.xy + LimitedOffset;
	float2 StartF = float2((StartUVCoordinate.x * (float)Width),(StartUVCoordinate.y * (float)Height)) - 0.5f;
	float2 EndF = float2((GoalUVCoordinate.x * (float)Width),(GoalUVCoordinate.y * (float)Height)) - 0.5f;
	
	int StartX; // The X index of the first vertical edge to pass
	int EndX; // The X index of the last vertical edge to pass
	int CX; // Iterator
	int EdgeX; // The
	
	int StartY; // The Y index of the first horizontal edge to pass
	int EndY; // The Y index of the last horizontal edge to pass
	int CY; // Iterator
	int EdgeY;
	
	// Limit execution time
	EndX = clamp(EndX,StartX - 64,StartX + 64);
	EndY = clamp(EndY,StartY - 64,StartY + 64);
	
	float Depth;
	
	// Detection of vertical edges
	float FinalXDepth_Min = 0.0f; // What depth came before the first hit in the vertical edge sampling
	float FinalXDepth_Max = 1.0f; // At what depth did the vertical edge sampling hit the first edge
	if (LimitedOffset.x > 0) {
		// From left to right
		StartX = floor(StartF.x) + 1;
		EndX = floor(EndF.x);
		
		// Limit execution time
		RemainingSteps = MaxSteps;
		
		// Loop from StartX to EndX counting up
		LoopForward(StartX,CX,EndX) {
			// Make a linear intersection with the vertical line at CX and the depth line from StartUVCoordinate to GoalUVCoordinate
			Depth = DepthFromX(CX);
			// Get the Y coordinate
			float FY = lerp(StartF.y,EndF.y,Depth);
			int FFY = floor(FY);
			uint LY = SafeModulo(FFY,Height);
			uint HY = (LY + 1) % Height;
			if (1.0f - lerp(tex_1.Load(int3(CX % Width,LY,0)).a,tex_1.Load(int3(CX % Width,HY,0)).a,frac(FY)) < Depth) {
				FinalXDepth_Max = Depth;
				if (LimitedOffset.y > 0) {
					EdgeX = FFY;
				} else {
					EdgeX = FFY + 1;
				}
				break;
			} else {
				FinalXDepth_Min = Depth;
			}
			
			// Limit execution time
			RemainingSteps--; if (RemainingSteps < 0) { break; }
		}
	} else {
		// From right to left
		StartX = floor(StartF.x);
		EndX = floor(EndF.x) + 1;
		
		// Limit execution time
		RemainingSteps = MaxSteps;
		
		// Loop from StartX to EndX counting down
		LoopBackward(EndX,CX,StartX) {
			// Make a linear intersection with the vertical line at CX and the depth line from StartUVCoordinate to GoalUVCoordinate
			Depth = DepthFromX(CX);
			// Get the Y coordinate
			float FY = lerp(StartF.y,EndF.y,Depth);
			int FFY = floor(FY);
			uint LY = SafeModulo(FFY,Height);
			uint HY = (LY + 1) % Height;
			if (1.0f - lerp(tex_1.Load(int3(CX % Width,LY,0)).a,tex_1.Load(int3(CX % Width,HY,0)).a,frac(FY)) < Depth) {
				FinalXDepth_Max = Depth;
				if (LimitedOffset.y > 0) {
					EdgeX = FFY;
				} else {
					EdgeX = FFY + 1;
				}
				break;
			} else {
				FinalXDepth_Min = Depth;
			}
			
			// Limit execution time
			RemainingSteps--; if (RemainingSteps < 0) { break; }
		}
	}
	
	// Detection of horizontal edges
	float FinalYDepth_Min = 0.0f; // What depth came before the first hit in the horizontal edge sampling
	float FinalYDepth_Max = 1.0f; // At what depth did the horizontal edge sampling hit the first edge
	if (LimitedOffset.y > 0) {
		// From top to bottom
		StartY = floor(StartF.y) + 1;
		EndY = floor(EndF.y);
		
		// Limit execution time
		RemainingSteps = MaxSteps;
		
		// Loop from StartY to EndY counting up
		LoopForward(StartY,CY,EndY) {
			// Make a linear intersection with the vertical line at CX and the depth line from StartUVCoordinate to GoalUVCoordinate
			Depth = DepthFromY(CY);
			// Get the X coordinate
			float FX = lerp(StartF.x,EndF.x,Depth);
			int FFX = floor(FX);
			uint LX = SafeModulo(FFX,Width);
			uint HX = (LX + 1) % Width;
			if (1.0f - lerp(tex_1.Load(int3(LX,CY % Height,0)).a,tex_1.Load(int3(HX,CY % Height,0)).a,frac(FX)) < Depth) {
				FinalYDepth_Max = Depth;
				if (LimitedOffset.x > 0) {
					EdgeY = FFX;
				} else {
					EdgeY = FFX + 1;
				}
				break;
			} else {
				FinalYDepth_Min = Depth;
			}
			
			// Limit execution time
			RemainingSteps--; if (RemainingSteps < 0) { break; }
		}
	} else {
		// Bottom up
		StartY = floor(StartF.y);
		EndY = floor(EndF.y) + 1;
		
		// Limit execution time
		RemainingSteps = MaxSteps;
		
		// Loop from StartY to EndY counting down
		LoopBackward(EndY,CY,StartY) {
			// Make a linear intersection with the vertical line at CX and the depth line from StartUVCoordinate to GoalUVCoordinate
			Depth = DepthFromY(CY);
			// Get the X coordinate
			float FX = lerp(StartF.x,EndF.x,Depth);
			int FFX = floor(FX);
			uint LX = SafeModulo(FFX,Width);
			uint HX = (LX + 1) % Width;
			if (1.0f - lerp(tex_1.Load(int3(LX,CY % Width,0)).a,tex_1.Load(int3(HX,CY % Width,0)).a,frac(FX)) < Depth) {
				FinalYDepth_Max = Depth;
				if (LimitedOffset.x > 0) {
					EdgeY = FFX;
				} else {
					EdgeY = FFX + 1;
				}
				break;
			} else {
				FinalYDepth_Min = Depth;
			}
			
			// Limit execution time
			RemainingSteps--; if (RemainingSteps < 0) { break; }
		}
	}
	
	// Create a depth interval containing the first hit within a bilinear square
	float MinDepth;
	float MaxDepth;
	MaxDepth = min(FinalXDepth_Max,FinalYDepth_Max);
	if (FinalYDepth_Min < FinalXDepth_Max && FinalXDepth_Min < FinalYDepth_Max) {
		// An intersection can be made because the intervals are overlapping
		MinDepth = max(FinalXDepth_Min,FinalYDepth_Min);
	} else if (FinalXDepth_Min < FinalYDepth_Min) {
		// The first edge intersection was at edge X
		MinDepth = max(FinalXDepth_Min, DepthFromY(EdgeX));
	} else {
		// The first edge intersection was at edge Y
		MinDepth = max(FinalYDepth_Min, DepthFromX(EdgeY));
	}
	
	// Read the 4 height values
	float MiddleDepth = lerp(MaxDepth,MinDepth,0.5f);
	float FX = lerp(StartF.x,EndF.x,MiddleDepth);
	int FFX = floor(FX);
	uint LX = SafeModulo(FFX,Width);
	uint HX = (LX + 1) % Width;
	float FY = lerp(StartF.y,EndF.y,MiddleDepth);
	int FFY = floor(FY);
	uint LY = SafeModulo(FFY,Height);
	uint HY = (LY + 1) % Height;
	float LXLYDepth = 1.0f - (tex_1.Load(int3(LX,LY,0)).a);
	float LXHYDepth = 1.0f - (tex_1.Load(int3(LX,HY,0)).a);
	float HXLYDepth = 1.0f - (tex_1.Load(int3(HX,LY,0)).a);
	float HXHYDepth = 1.0f - (tex_1.Load(int3(HX,HY,0)).a);
	
	// Calculate the square's upper and lower depth bounds
	float SquareMin = min(min(LXLYDepth,LXHYDepth),min(HXLYDepth,HXHYDepth));
	float SquareMax = max(max(LXLYDepth,LXHYDepth),max(HXLYDepth,HXHYDepth));
	
	// Improve quality by starting the bisection inside the square
	MinDepth = clamp(MinDepth,SquareMin,SquareMax);
	MaxDepth = clamp(MaxDepth,SquareMin,SquareMax);
	
	// Bisection
	int I = 0;
	while (I < BisectionSteps) {
		Depth = lerp(MinDepth,MaxDepth,0.5f);
		float2 Fraction = frac(lerp(StartF,EndF,Depth));
		
		// Calculate the height within the bilinear square
		float CalculatedDepth = lerp(lerp(LXLYDepth,HXLYDepth,Fraction.x),lerp(LXHYDepth,HXHYDepth,Fraction.x),Fraction.y);
		
		if (CalculatedDepth < Depth) {
			// Contact
			MaxDepth = Depth;
		} else {
			// No contact
			MinDepth = Depth;
		}
		I++;
	}
	Depth = lerp(MinDepth,MaxDepth,0.5f);
	float2 DeepUVCoordinate = input.Tex.xy + (LimitedOffset * Depth);
	
	// Derive UV without depth for the texture sampling to avoid edge artifacts
	float4 finalColor = tex_0.SampleGrad( samAnisotropicMipmap, DeepUVCoordinate, ddx(input.Tex.x), ddy(input.Tex.y) ) * input.Color;
	
	float3x3 TangentSpace = float3x3(normalize(input.UAxis_WorldSpace),normalize(input.VAxis_WorldSpace),normalize(input.Normal_WorldSpace));
	float3 FinalNormal = normalize(mul((tex_1.Sample( samAnisotropicMipmap, DeepUVCoordinate ).xyz * 2.0f) - 1.0f,TangentSpace));
	
	float3 DiffuseLight;
	float3 SpecularLight;
	Engine_GetDiffuseAndSpecularLight(input.Pos_WorldSpace.xyz,FinalNormal,30,DiffuseLight,SpecularLight);
	// finalColor = (finalColor * float4(DiffuseLight,1.0f)) + float4(SpecularLight,1.0f);
	finalColor = (finalColor * float4(DiffuseLight + SpecularLight,1.0f));
	
	// Show the final color with fog on R,G,B channels
	return float4((lerp(finalColor,FogColor,saturate(length(input.Pos_CameraSpace) / FogDistance) * MaxFogIntensity)).xyz,finalColor.w);
}
