Skip to content

Commit edb3f3a

Browse files
authored
Merge pull request #645 from internetarchive/deps-3.8
Update dependencies and fix some warnings for 3.8.0 release
2 parents 3f323bb + fb9b286 commit edb3f3a

File tree

29 files changed

+90
-288
lines changed

29 files changed

+90
-288
lines changed

CHANGELOG.md

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,35 @@
44

55
[Full Changelog](https://github.com/internetarchive/heritrix3/compare/3.6.0...HEAD)
66

7+
## 3.8.0
8+
9+
[Download distribution zip](https://repo1.maven.org/maven2/org/archive/heritrix/heritrix/3.8.0/heritrix-3.8.0-dist.zip) (or [tar.gz](https://repo1.maven.org/maven2/org/archive/heritrix/heritrix/3.8.0/heritrix-3.8.0-dist.tar.gz))
10+
11+
[Full Changelog](https://github.com/internetarchive/heritrix3/compare/3.7.0...3.8.0) | [Javadoc](https://www.javadoc.io/doc/org.archive.heritrix/heritrix-engine/3.8.0/index.html) | [Maven Central](https://search.maven.org/artifact/org.archive.heritrix/heritrix/3.8.0/pom)
12+
13+
#### New Features
14+
15+
- **ExtractorYoutubeDL processArguments**: New option for overriding the default `yt-dlp` process arguments. [#644](https://github.com/internetarchive/heritrix3/pull/644)
16+
17+
#### Fixes
18+
19+
- **Slow tests**: Fixed `ObjectIdentityBdbManualCacheTest` so it no longer fails when running tests with `-DrunSlowTests=true`.
20+
- **Test stability**: Disabled `FetchHTTPTest.testHostHeaderDefaultPort` due to sporadic test failures.
21+
- **Code cleanup**: Fixed some compiler and IDE warnings. Removed unused utility classes (JavaLiterals, LogUtils).
22+
23+
#### Dependency Upgrades
24+
25+
- **amqp-client**: 5.24.0 → 5.25.0
26+
- **beanshell**: 2.0b5 → 2.0b6
27+
- **commons-codec**: 1.17.2 → 1.18.0
28+
- **dnsjava**: 3.6.2 → 3.6.3
29+
- **groovy**: 4.0.24 → 4.0.26
30+
- **gson**: 2.11.0 → 2.12.1
31+
- **jsch**: 0.2.22 → 0.2.24
32+
- **pdfbox**: 3.0.3 → 3.0.4
33+
- **slf4j**: 2.0.16 → 2.0.17
34+
- **spring**: 6.1.16 → 6.2.5
35+
736
## 3.7.0
837

938
[Download distribution zip](https://repo1.maven.org/maven2/org/archive/heritrix/heritrix/3.7.0/heritrix-3.7.0-dist.zip) (or [tar.gz](https://repo1.maven.org/maven2/org/archive/heritrix/heritrix/3.7.0/heritrix-3.7.0-dist.tar.gz))

commons/pom.xml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@
5757
<dependency>
5858
<groupId>commons-codec</groupId>
5959
<artifactId>commons-codec</artifactId>
60-
<version>1.17.2</version>
60+
<version>1.18.0</version>
6161
<scope>compile</scope>
6262
</dependency>
6363
<dependency>
@@ -81,7 +81,7 @@
8181
<dependency>
8282
<groupId>dnsjava</groupId>
8383
<artifactId>dnsjava</artifactId>
84-
<version>3.6.2</version>
84+
<version>3.6.3</version>
8585
<scope>compile</scope>
8686
</dependency>
8787
<!--
@@ -151,7 +151,7 @@
151151
<dependency>
152152
<groupId>com.github.mwiede</groupId>
153153
<artifactId>jsch</artifactId>
154-
<version>0.2.22</version>
154+
<version>0.2.24</version>
155155
</dependency>
156156
<dependency>
157157
<groupId>org.apache.groovy</groupId>
@@ -211,6 +211,6 @@
211211
</build>
212212
<properties>
213213
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
214-
<spring.version>6.1.16</spring.version>
214+
<spring.version>6.2.5</spring.version>
215215
</properties>
216216
</project>

commons/src/main/java/org/archive/util/JavaLiterals.java

Lines changed: 0 additions & 144 deletions
This file was deleted.

commons/src/main/java/org/archive/util/UriUtils.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,7 @@ public static boolean isPossibleUri(CharSequence candidate) {
108108
* @deprecated produces too many false positives,
109109
* {@link #isVeryLikelyUri(CharSequence)} is preferred
110110
*/
111+
@Deprecated
111112
public static boolean isLikelyUri(CharSequence candidate) {
112113
return isPossibleUri(candidate) && !isLikelyFalsePositive(candidate);
113114
}

commons/src/main/java/st/ata/util/FPGenerator.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ public final class FPGenerator {
5454
array <code>polynomials</code> contains some irreducible
5555
polynomials). */
5656
public static FPGenerator make(long polynomial, int degree) {
57-
Long l = new Long(polynomial);
57+
Long l = polynomial;
5858
FPGenerator fpgen = (FPGenerator) generators.get(l);
5959
if (fpgen == null) {
6060
fpgen = new FPGenerator(polynomial, degree);

contrib/pom.xml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
<dependency>
2828
<groupId>com.rabbitmq</groupId>
2929
<artifactId>amqp-client</artifactId>
30-
<version>5.24.0</version>
30+
<version>5.25.0</version>
3131
</dependency>
3232
<dependency>
3333
<groupId>org.easymock</groupId>
@@ -48,7 +48,7 @@
4848
<dependency>
4949
<groupId>com.google.code.gson</groupId>
5050
<artifactId>gson</artifactId>
51-
<version>2.11.0</version>
51+
<version>2.12.1</version>
5252
</dependency>
5353
</dependencies>
5454
<build>

engine/src/main/java/org/archive/crawler/framework/CrawlJob.java

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -176,25 +176,25 @@ protected void scanJobLog() {
176176
isLaunchInfoPartial = true;
177177
startPosition = jobLog.length()-(FileUtils.ONE_KB * 100);
178178
}
179-
FileInputStream jobLogIn = new FileInputStream(jobLog);
180-
jobLogIn.getChannel().position(startPosition);
181-
BufferedReader jobLogReader = new BufferedReader(
182-
new InputStreamReader(jobLogIn));
183-
String line;
184-
// If we sliced into the file, make sure we skip to the next line:
185-
// (See https://github.com/internetarchive/heritrix3/issues/239)
186-
if (startPosition != 0) {
187-
line = jobLogReader.readLine();
188-
}
189-
// Parse lines looking for launch details:
190-
while ((line = jobLogReader.readLine()) != null) {
191-
Matcher m = launchLine.matcher(line);
192-
if (m.matches()) {
193-
launchCount++;
194-
lastLaunch = Instant.parse(m.group(1));
179+
try (FileInputStream jobLogIn = new FileInputStream(jobLog)) {
180+
jobLogIn.getChannel().position(startPosition);
181+
BufferedReader jobLogReader = new BufferedReader(
182+
new InputStreamReader(jobLogIn));
183+
String line;
184+
// If we sliced into the file, make sure we skip to the next line:
185+
// (See https://github.com/internetarchive/heritrix3/issues/239)
186+
if (startPosition != 0) {
187+
line = jobLogReader.readLine();
188+
}
189+
// Parse lines looking for launch details:
190+
while ((line = jobLogReader.readLine()) != null) {
191+
Matcher m = launchLine.matcher(line);
192+
if (m.matches()) {
193+
launchCount++;
194+
lastLaunch = Instant.parse(m.group(1));
195+
}
195196
}
196197
}
197-
jobLogReader.close();
198198
} catch (IOException e) {
199199
// TODO Auto-generated catch block
200200
e.printStackTrace();

engine/src/main/java/org/archive/crawler/framework/ToeThread.java

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -72,10 +72,8 @@ public enum Step {
7272
private int serialNumber;
7373

7474
/**
75-
* Each ToeThead has an instance of HttpRecord that gets used
75+
* Each ToeThead has an instance of Recorder that gets used
7676
* over and over by each request.
77-
*
78-
* @see org.archive.util.RecorderMarker
7977
*/
8078
private Recorder httpRecorder = null;
8179

engine/src/main/java/org/archive/crawler/frontier/FrontierJournal.java

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -166,8 +166,6 @@ public void run() {
166166
* recovery log into the frontier as considered included.
167167
*
168168
* @param source recovery log file to use
169-
* @param controller CrawlController of crawl to update
170-
* @param retainFailures whether failure ('Ff') URIs should count as done
171169
* @return number of lines in recovery log (for reference)
172170
* @throws IOException
173171
*/
@@ -235,7 +233,6 @@ private static int importCompletionInfoFromLog(File source,
235233
* (excepting those the frontier drops as already having been included)
236234
*
237235
* @param source recovery log file to use
238-
* @param controller CrawlController of crawl to update
239236
* @param params Map of options to apply
240237
* @param enough latch signalling 'enough' URIs queued to begin crawling
241238
*/

engine/src/main/java/org/archive/crawler/frontier/WorkQueueFrontier.java

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -940,7 +940,7 @@ protected void processFinish(CrawlURI curi) {
940940
wq.setSessionBudget(getBalanceReplenishAmount());
941941
wq.setTotalBudget(getQueueTotalBudget());
942942

943-
assert (wq.peek(this) == curi) : "unexpected peek " + wq;
943+
if ((wq.peek(this) != curi)) throw new AssertionError("unexpected peek " + wq);
944944

945945
int holderCost = curi.getHolderCost();
946946

@@ -1224,10 +1224,6 @@ protected int getTotalIneligibleInactiveQueues() {
12241224
getInactiveQueuesByPrecedence().tailMap(getPrecedenceFloor()));
12251225
}
12261226

1227-
/**
1228-
* @param iqueue
1229-
* @return
1230-
*/
12311227
private int tallyInactiveTotals(SortedMap<Integer,Queue<String>> iqueues) {
12321228
int inactiveCount = 0;
12331229
for(Queue<String> q : iqueues.values()) {

0 commit comments

Comments
 (0)