Amazon S3 origin: java.lang.NumberFormatException
I am using Amazon S3 origin to read compressed (zip) files on S3. It was working fine, until file sizes increased, now we get an exception: java.util.concurrent.ExecutionException: java.lang.NumberFormatException: For input string: "2165501968"
Datacollector version: 3.10.0
File size: 15,788,365 lines (969 mb)
Stacktrace:
java.util.concurrent.ExecutionException: java.lang.NumberFormatException: For input string: "2165501968"
at java.util.concurrent.FutureTask.report(FutureTask.java:122)
at java.util.concurrent.FutureTask.get(FutureTask.java:192)
at com.streamsets.pipeline.stage.origin.s3.AbstractAmazonS3Source.produce(AbstractAmazonS3Source.java:110)
at com.streamsets.pipeline.api.base.configurablestage.DPushSource.produce(DPushSource.java:44)
at com.streamsets.datacollector.runner.StageRuntime.lambda$execute$1(StageRuntime.java:270)
at com.streamsets.pipeline.api.impl.CreateByRef.call(CreateByRef.java:40)
at com.streamsets.datacollector.runner.StageRuntime.execute(StageRuntime.java:244)
at com.streamsets.datacollector.runner.StageRuntime.execute(StageRuntime.java:279)
at com.streamsets.datacollector.runner.SourcePipe.process(SourcePipe.java:79)
at com.streamsets.datacollector.execution.runner.common.ProductionPipelineRunner.runPushSource(ProductionPipelineRunner.java:426)
at com.streamsets.datacollector.execution.runner.common.ProductionPipelineRunner.run(ProductionPipelineRunner.java:388)
at com.streamsets.datacollector.runner.Pipeline.run(Pipeline.java:533)
at com.streamsets.datacollector.execution.runner.common.ProductionPipeline.run(ProductionPipeline.java:110)
at com.streamsets.datacollector.execution.runner.common.ProductionPipelineRunnable.run(ProductionPipelineRunnable.java:75)
at com.streamsets.datacollector.execution.runner.standalone.StandaloneRunner.start(StandaloneRunner.java:720)
at com.streamsets.datacollector.execution.runner.common.AsyncRunner.lambda$start$3(AsyncRunner.java:151)
at com.streamsets.pipeline.lib.executor.SafeScheduledExecutorService$SafeCallable.lambda$call$0(SafeScheduledExecutorService.java:226)
at com.streamsets.datacollector.security.GroupsInScope.execute(GroupsInScope.java:33)
at com.streamsets.pipeline.lib.executor.SafeScheduledExecutorService$SafeCallable.call(SafeScheduledExecutorService.java:222)
at com.streamsets.pipeline.lib.executor.SafeScheduledExecutorService$SafeCallable.lambda$call$0(SafeScheduledExecutorService.java:226)
at com.streamsets.datacollector.security.GroupsInScope.execute(GroupsInScope.java:33)
at com.streamsets.pipeline.lib.executor.SafeScheduledExecutorService$SafeCallable.call(SafeScheduledExecutorService.java:222)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$201(ScheduledThreadPoolExecutor.java:180)
at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:293)
at com.streamsets.datacollector.metrics.MetricSafeScheduledExecutorService$MetricsTask.run(MetricSafeScheduledExecutorService.java:100)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Caused by: java.lang.NumberFormatException: For input string: "2165501968"
at java.lang.NumberFormatException.forInputString(NumberFormatException.java:65)
at java.lang.Integer.parseInt(Integer.java:583)
at java.lang.Integer.valueOf(Integer.java:766)
at com.streamsets.pipeline.stage.origin.s3.AmazonS3Util.getFileOffset(AmazonS3Util.java:326)
at com.streamsets.pipeline.stage.origin.s3.AmazonS3Util.parseOffset(AmazonS3Util.java:304)
at com.streamsets.pipeline.stage.origin.s3.AmazonS3SourceImpl.updateOffset(AmazonS3SourceImpl.java:127)
at com.streamsets.pipeline.stage.origin.s3.AmazonS3Runnable.run(AmazonS3Runnable.java:109)
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
at java.util.concurrent.FutureTask.run(FutureTask.java:266 ...
Also reproduced the same error with uncompressed CSV file with 43969237 rows.