Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix retries when connectRetryCount is set to 1 or greater #2513

Open
wants to merge 31 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 29 commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
38da5ce
Fix for connectRetryCount > 0
Jeffery-Wasty Sep 18, 2024
2da9fa2
Cleanup
Jeffery-Wasty Sep 18, 2024
0c624b9
Added test to SQLServerConnectionTest + cleanup
Jeffery-Wasty Sep 18, 2024
16397df
Fix test
Jeffery-Wasty Sep 18, 2024
e8b93e5
Testing
Jeffery-Wasty Sep 18, 2024
aec6b52
ok..
Jeffery-Wasty Sep 18, 2024
825a284
Grasping at straws
Jeffery-Wasty Sep 18, 2024
4e61b54
more debugs, yum
Jeffery-Wasty Sep 19, 2024
8178088
focus on transient error
Jeffery-Wasty Sep 19, 2024
97c49d3
Fix for azure
Jeffery-Wasty Sep 19, 2024
b5c01ad
Cleanup
Jeffery-Wasty Sep 19, 2024
f8b33e1
More cleanup
Jeffery-Wasty Sep 19, 2024
b0267eb
More cleanup
Jeffery-Wasty Sep 19, 2024
fae191a
Trying to resolve thread interrupt test
Jeffery-Wasty Sep 19, 2024
8e2d846
a different try
Jeffery-Wasty Sep 19, 2024
0ed3eb5
add more time to interrupt
Jeffery-Wasty Sep 19, 2024
04b7641
Removed sleep mistakenly
Jeffery-Wasty Sep 19, 2024
2956fdf
Added more variants
Jeffery-Wasty Sep 19, 2024
9c2caa3
Remove unneeded tests
Jeffery-Wasty Sep 19, 2024
ca23e4e
Remove problem line + add fix for uncaught fail for BatchExecutionTest
Jeffery-Wasty Sep 19, 2024
c9d02de
Fail fixes
Jeffery-Wasty Sep 20, 2024
88e9e40
Added back the timer to the BatchExecutionTest; adjusted timeout time
Jeffery-Wasty Sep 20, 2024
428d3b2
More code cleanup
Jeffery-Wasty Sep 20, 2024
8d295e2
Fixed mistake
Jeffery-Wasty Sep 20, 2024
617b977
Formatting
Jeffery-Wasty Sep 20, 2024
902027c
Merge branch 'connRetryFix' of https://github.com/microsoft/mssql-jdb…
Jeffery-Wasty Sep 20, 2024
1de08c9
Trying to resolve code cov
Jeffery-Wasty Sep 20, 2024
2ca351e
Update timeout value
Jeffery-Wasty Sep 20, 2024
3a763bb
Remove stray fail
Jeffery-Wasty Sep 20, 2024
47830db
Revert changes to login retry interval.
Jeffery-Wasty Sep 24, 2024
ac63871
Remove added line.
Jeffery-Wasty Sep 24, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ public class SQLServerConnection implements ISQLServerConnection, java.io.Serial
/**
* Back off interval in ms for retries
*/
static final int BACKOFF_INTERVAL = 100;
static final int BACKOFF_INTERVAL = 500;
Jeffery-Wasty marked this conversation as resolved.
Show resolved Hide resolved

/** Current limit for this particular connection. */
private Boolean enablePrepareOnFirstPreparedStatementCall = null;
Expand Down Expand Up @@ -1994,7 +1994,7 @@ Connection connect(Properties propsIn, SQLServerPooledConnection pooledConnectio
if (0 == connectRetryCount) {
// connection retry disabled
throw e;
} else if (connectRetryAttempt++ > connectRetryCount) {
} else if (connectRetryAttempt++ >= connectRetryCount) {
// maximum connection retry count reached
if (connectionlogger.isLoggable(Level.FINE)) {
connectionlogger.fine("Connection failed. Maximum connection retry count "
Expand Down Expand Up @@ -2027,7 +2027,10 @@ Connection connect(Properties propsIn, SQLServerPooledConnection pooledConnectio
+ connectRetryInterval + ")s before retry.");
}

sleepForInterval(TimeUnit.SECONDS.toMillis(connectRetryInterval));
if (connectRetryAttempt > 1) {
// We do not sleep for first retry; first retry is immediate
sleepForInterval(TimeUnit.SECONDS.toMillis(connectRetryInterval));
}
}
}
}
Expand Down Expand Up @@ -3525,9 +3528,11 @@ private void login(String primary, String primaryInstanceName, int primaryPortNu

// We only get here when we failed to connect, but are going to re-try
// After trying to connect to both servers fails, sleep for a bit to prevent clogging
// the network with requests, then update sleep interval for next iteration (max 1 second interval)
// We have to sleep for every attempt in case of non-dbMirroring scenarios (including multisubnetfailover),
// Whereas for dbMirroring, we sleep for every two attempts as each attempt is to a different server.
// the network with requests, then update sleep interval for next iteration (to a maximum specified by
// connectRetryInterval [default 10]). We have to sleep for every attempt in case of non-dbMirroring
// scenarios (including multisubnetfailover), whereas for dbMirroring, we sleep for every two attempts
// as each attempt is to a different server.

// Make sure there's enough time to do another retry
if (!isDBMirroring || (isDBMirroring && (0 == attemptNumber % 2))
&& (attemptNumber < connectRetryCount && connectRetryCount != 0) && timerRemaining(
Expand All @@ -3542,7 +3547,12 @@ private void login(String primary, String primaryInstanceName, int primaryPortNu
+ attemptNumber);
}

sleepForInterval(TimeUnit.SECONDS.toMillis(connectRetryInterval));
sleepForInterval(fedauthRetryInterval);
int retryIntervalSecs = (int) TimeUnit.SECONDS.toMillis(connectRetryInterval);

// Double each time to a max of connectRetryInterval
fedauthRetryInterval = (fedauthRetryInterval >= retryIntervalSecs) ? retryIntervalSecs
: fedauthRetryInterval * 2;
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -454,17 +454,26 @@ public void testConnectionPoolGetTwice() throws SQLException {
}
}

/**
* Runs the `testConnectCountInLoginAndCorrectRetryCount` test several times with different values of
* connectRetryCount.
*/
@Test
public void testConnectCountInLoginAndCorrectRetryCountForMultipleValues() {
testConnectCountInLoginAndCorrectRetryCount(0);
testConnectCountInLoginAndCorrectRetryCount(1);
testConnectCountInLoginAndCorrectRetryCount(2);
}

/**
* Tests whether connectRetryCount and connectRetryInterval are properly respected in the login loop. As well, tests
* that connection is retried the proper number of times.
*/
@Test
public void testConnectCountInLoginAndCorrectRetryCount() {
private void testConnectCountInLoginAndCorrectRetryCount(int connectRetryCount) {
long timerStart = 0;

int connectRetryCount = 0;
int connectRetryInterval = 60;
int longLoginTimeout = loginTimeOutInSeconds * 3; // 90 seconds
int longLoginTimeout = loginTimeOutInSeconds * 9; // 90 seconds

try {
SQLServerDataSource ds = new SQLServerDataSource();
Expand All @@ -490,6 +499,15 @@ public void testConnectCountInLoginAndCorrectRetryCount() {

// Maximum is unknown, but is needs to be less than longLoginTimeout or else this is an issue.
assertTrue(totalTime < (longLoginTimeout * 1000L), TestResource.getResource("R_executionTooLong"));

// We should at least take as long as the retry interval between all retries past the first.
// Of the above acceptable errors (R_cannotOpenDatabase, R_loginFailedMI, R_MInotAvailable), only
// R_cannotOpenDatabase is transient, and can be used to measure multiple retries with retry interval. The
// others will exit before they have a chance to wait, and min will be too low.
if (e.getMessage().contains(TestResource.getResource("R_cannotOpenDatabase"))) {
int minTimeInSecs = connectRetryInterval * (connectRetryCount - 1);
assertTrue(totalTime > (minTimeInSecs * 1000L), TestResource.getResource("R_executionNotLong"));
}
}
}

Expand Down Expand Up @@ -800,9 +818,9 @@ public void testIncorrectDatabase() throws SQLException {
} catch (Exception e) {
assertTrue(
e.getMessage().contains(TestResource.getResource("R_cannotOpenDatabase"))
|| (TestUtils.getProperty(connectionString, "msiClientId") != null
|| (TestUtils.getProperty(connectionString, "msiClientId") != null
&& e.getMessage().toLowerCase()
.contains(TestResource.getResource("R_loginFailedMI").toLowerCase())),
.contains(TestResource.getResource("R_loginFailedMI").toLowerCase())),
e.getMessage());
timerEnd = System.currentTimeMillis();
}
Expand Down Expand Up @@ -833,9 +851,9 @@ public void testIncorrectUserName() throws SQLException {
} catch (Exception e) {
assertTrue(
e.getMessage().contains(TestResource.getResource("R_loginFailed"))
|| (TestUtils.getProperty(connectionString, "msiClientId") != null
|| (TestUtils.getProperty(connectionString, "msiClientId") != null
&& e.getMessage().toLowerCase()
.contains(TestResource.getResource("R_loginFailedMI").toLowerCase())),
.contains(TestResource.getResource("R_loginFailedMI").toLowerCase())),
e.getMessage());
timerEnd = System.currentTimeMillis();
}
Expand Down Expand Up @@ -867,8 +885,8 @@ public void testIncorrectPassword() throws SQLException {
assertTrue(
e.getMessage().contains(TestResource.getResource("R_loginFailed"))
|| (TestUtils.getProperty(connectionString, "msiClientId") != null
&& e.getMessage().toLowerCase()
.contains(TestResource.getResource("R_loginFailedMI").toLowerCase())),
&& e.getMessage().toLowerCase()
.contains(TestResource.getResource("R_loginFailedMI").toLowerCase())),
e.getMessage());
timerEnd = System.currentTimeMillis();
}
Expand Down Expand Up @@ -1047,8 +1065,8 @@ public void run() {
ds.setURL(connectionString);
ds.setServerName("invalidServerName" + UUID.randomUUID());
ds.setLoginTimeout(30);
ds.setConnectRetryCount(3);
ds.setConnectRetryInterval(10);
ds.setConnectRetryCount(6);
ds.setConnectRetryInterval(20);
try (Connection con = ds.getConnection()) {} catch (SQLException e) {}
}
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -172,18 +172,20 @@ public void testDMLoginTimeoutNotApplied() {
}
}

// Test connect retry set to 0 (disabled)
/**
* Test connect retry set to 0 (disabled)
*/
@Test
public void testConnectRetryDisable() {
long totalTime = 0;
long timerStart = System.currentTimeMillis();
int interval = defaultTimeout; // long interval so we can tell if there was a retry
long timeout = defaultTimeout * 2; // long loginTimeout to accommodate the long interval

// non existent server with long loginTimeout, should return fast if no retries at all
// non-existent server with long loginTimeout, should return fast if no retries at all
try (Connection con = PrepUtil.getConnection(
"jdbc:sqlserver://" + randomServer + ";transparentNetworkIPResolution=false;loginTimeout=" + timeout
+ ";connectRetryCount=0;connectInterval=" + interval)) {
+ ";connectRetryCount=0;connectRetryInterval=" + interval)) {
fail(TestResource.getResource("R_shouldNotConnect"));
} catch (Exception e) {
totalTime = System.currentTimeMillis() - timerStart;
Expand Down Expand Up @@ -230,7 +232,9 @@ public void testConnectRetryBadServer() {
"total time: " + totalTime + " loginTimeout: " + TimeUnit.SECONDS.toMillis(timeout));
}

// Test connect retry for database error
/**
* Test connect retry, with one retry interval, for database error
*/
@Test
public void testConnectRetryServerError() {
String auth = TestUtils.getProperty(connectionString, "authentication");
Expand All @@ -242,10 +246,10 @@ public void testConnectRetryServerError() {
int interval = defaultTimeout; // long interval so we can tell if there was a retry
long timeout = defaultTimeout * 2; // long loginTimeout to accommodate the long interval

// non existent database with interval < loginTimeout this will generate a 4060 transient error and retry 1 time
// non-existent database with interval < loginTimeout this will generate a 4060 transient error and retry 2 times
try (Connection con = PrepUtil.getConnection(
TestUtils.addOrOverrideProperty(connectionString, "database", RandomUtil.getIdentifier("database"))
+ ";loginTimeout=" + timeout + ";connectRetryCount=" + 1 + ";connectRetryInterval=" + interval
+ ";loginTimeout=" + timeout + ";connectRetryCount=" + 2 + ";connectRetryInterval=" + interval
+ ";transparentNetworkIPResolution=false")) {
fail(TestResource.getResource("R_shouldNotConnect"));
} catch (Exception e) {
Expand All @@ -261,14 +265,16 @@ public void testConnectRetryServerError() {
e.getMessage());
}

// 1 retry should be at least 1 interval long but < 2 intervals
// 2 retries should be at least 1 interval long but < 2 intervals (no interval between initial attempt and retry 1)
assertTrue(TimeUnit.SECONDS.toMillis(interval) < totalTime,
"interval: " + TimeUnit.SECONDS.toMillis(interval) + " total time: " + totalTime);
assertTrue(totalTime < TimeUnit.SECONDS.toMillis(2 * interval),
"total time: " + totalTime + " 2 * interval: " + TimeUnit.SECONDS.toMillis(interval));
}

// Test connect retry for database error using Datasource
/**
* Test connect retry, with one retry interval, for database error using Datasource
*/
@Test
public void testConnectRetryServerErrorDS() {
String auth = TestUtils.getProperty(connectionString, "authentication");
Expand All @@ -280,10 +286,10 @@ public void testConnectRetryServerErrorDS() {
int interval = defaultTimeout; // long interval so we can tell if there was a retry
long loginTimeout = defaultTimeout * 2; // long loginTimeout to accommodate the long interval

// non existent database with interval < loginTimeout this will generate a 4060 transient error and retry 1 time
// non-existent database with interval < loginTimeout this will generate a 4060 transient error and retry 2 times
SQLServerDataSource ds = new SQLServerDataSource();
String connectStr = TestUtils.addOrOverrideProperty(connectionString, "database",
RandomUtil.getIdentifier("database")) + ";logintimeout=" + loginTimeout + ";connectRetryCount=1"
RandomUtil.getIdentifier("database")) + ";loginTimeout=" + loginTimeout + ";connectRetryCount=2"
+ ";connectRetryInterval=" + interval;
updateDataSource(connectStr, ds);

Expand All @@ -301,7 +307,7 @@ public void testConnectRetryServerErrorDS() {
totalTime = System.currentTimeMillis() - timerStart;
}

// 1 retry should be at least 1 interval long but < 2 intervals
// 2 retries should be at least 1 interval long but < 2 intervals (no interval between initial attempt and retry 1)
assertTrue(TimeUnit.SECONDS.toMillis(interval) < totalTime,
"interval: " + TimeUnit.SECONDS.toMillis(interval) + " total time: " + totalTime);
assertTrue(totalTime < TimeUnit.SECONDS.toMillis(2 * interval),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,6 @@ public void testSqlServerBulkCopyCachingConnectionLevelMultiThreaded() throws Ex
TimerTask task = new TimerTask() {
public void run() {
((HashMap<?, ?>) bulkcopyCache).clear();
fail(TestResource.getResource("R_executionTooLong"));
}
};
Timer timer = new Timer("Timer");
Expand Down Expand Up @@ -348,7 +347,7 @@ public void testValidTimezoneForTimestampBatchInsertWithBulkCopy() throws Except
public void testValidTimezonesDstTimestampBatchInsertWithBulkCopy() throws Exception {
Calendar gmtCal = Calendar.getInstance(TimeZone.getTimeZone("GMT"));

for (String tzId: TimeZone.getAvailableIDs()) {
for (String tzId : TimeZone.getAvailableIDs()) {
TimeZone.setDefault(TimeZone.getTimeZone(tzId));

long ms = 1696127400000L; // DST
Expand Down