From e59bf9301339e4983853e544ee4759d68a968106 Mon Sep 17 00:00:00 2001
From: Mario Molina <mmolimar@gmail.com>
Date: Fri, 21 Feb 2020 18:48:57 -0600
Subject: [PATCH 01/51] Prepare for next version

---
 pom.xml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/pom.xml b/pom.xml
index 8c2c90e..88ae6e6 100644
--- a/pom.xml
+++ b/pom.xml
@@ -4,7 +4,7 @@
 
     <groupId>com.github.mmolimar.kafka.connect</groupId>
     <artifactId>kafka-connect-fs</artifactId>
-    <version>0.3-SNAPSHOT</version>
+    <version>1.0.0-SNAPSHOT</version>
     <packaging>jar</packaging>
 
     <name>kafka-connect-fs</name>
@@ -123,6 +123,7 @@
                 </configuration>
             </plugin>
             <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
                 <artifactId>maven-assembly-plugin</artifactId>
                 <version>${maven-assembly-plugin.version}</version>
                 <configuration>

From 29ce9273eb60dc3efac24e6282c2dcf3fc4eec57 Mon Sep 17 00:00:00 2001
From: Mario Molina <mmolimar@gmail.com>
Date: Fri, 21 Feb 2020 18:49:22 -0600
Subject: [PATCH 02/51] Upgrade dependencies

---
 pom.xml | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/pom.xml b/pom.xml
index 88ae6e6..52c6b89 100644
--- a/pom.xml
+++ b/pom.xml
@@ -11,18 +11,18 @@
 
     <properties>
         <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
-        <kafka.version>0.10.1.0</kafka.version>
-        <confluent.version>3.1.1</confluent.version>
-        <hadoop.version>2.9.0</hadoop.version>
-        <avro.version>1.8.1</avro.version>
-        <parquet.version>1.9.0</parquet.version>
-        <junit.version>4.12</junit.version>
-        <easymock.version>3.4</easymock.version>
-        <powermock.version>1.6.6</powermock.version>
-        <maven-jar-plugin.version>3.0.2</maven-jar-plugin.version>
-        <maven-compiler-plugin.version>3.6.1</maven-compiler-plugin.version>
-        <maven-assembly-plugin.version>3.0.0</maven-assembly-plugin.version>
-        <jacoco-maven-plugin.version>0.7.9</jacoco-maven-plugin.version>
+        <kafka.version>2.4.0</kafka.version>
+        <confluent.version>5.4.0</confluent.version>
+        <hadoop.version>3.2.1</hadoop.version>
+        <avro.version>1.9.2</avro.version>
+        <parquet.version>1.11.0</parquet.version>
+        <junit.version>4.13</junit.version>
+        <easymock.version>4.2</easymock.version>
+        <powermock.version>2.0.5</powermock.version>
+        <maven-jar-plugin.version>3.2.0</maven-jar-plugin.version>
+        <maven-compiler-plugin.version>3.8.1</maven-compiler-plugin.version>
+        <maven-assembly-plugin.version>3.2.0</maven-assembly-plugin.version>
+        <jacoco-maven-plugin.version>0.8.5</jacoco-maven-plugin.version>
         <coveralls-maven-plugin.version>4.3.0</coveralls-maven-plugin.version>
     </properties>
 

From 9af48436e879dd7e626c6f5e12fd972927a0d91a Mon Sep 17 00:00:00 2001
From: Mario Molina <mmolimar@gmail.com>
Date: Sat, 22 Feb 2020 11:37:04 -0600
Subject: [PATCH 03/51] Compatibility with new version in deps

---
 .../kafka/connect/fs/FsSourceConnector.java   |  5 +----
 .../kafka/connect/fs/FsSourceTask.java        | 19 +++++++------------
 .../kafka/connect/fs/file/FileMetadata.java   |  9 +++------
 .../fs/file/reader/AbstractFileReader.java    |  5 ++---
 .../fs/file/reader/AgnosticFileReader.java    | 13 +++++++------
 .../fs/file/reader/AvroFileReader.java        |  8 ++++++--
 .../file/reader/DelimitedTextFileReader.java  | 15 ++++++---------
 .../fs/file/reader/ParquetFileReader.java     | 11 ++++++-----
 .../fs/file/reader/SequenceFileReader.java    |  4 ++--
 .../fs/file/reader/TextFileReader.java        |  2 +-
 .../connect/fs/policy/AbstractPolicy.java     | 17 ++++++++---------
 .../fs/policy/HdfsFileWatcherPolicy.java      | 11 +++++------
 .../kafka/connect/fs/policy/SleepyPolicy.java |  6 +++---
 .../connect/fs/util/ReflectionUtils.java      |  8 ++++----
 .../kafka/connect/fs/util/Version.java        |  2 +-
 15 files changed, 62 insertions(+), 73 deletions(-)

diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/FsSourceConnector.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/FsSourceConnector.java
index e6aab15..0d4ad3e 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/FsSourceConnector.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/FsSourceConnector.java
@@ -1,7 +1,5 @@
 package com.github.mmolimar.kafka.connect.fs;
 
-import com.github.mmolimar.kafka.connect.fs.file.reader.FileReader;
-import com.github.mmolimar.kafka.connect.fs.policy.Policy;
 import com.github.mmolimar.kafka.connect.fs.util.Version;
 import org.apache.kafka.common.config.ConfigDef;
 import org.apache.kafka.common.config.ConfigException;
@@ -16,7 +14,6 @@
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
-import java.util.stream.Collectors;
 
 public class FsSourceConnector extends SourceConnector {
 
@@ -60,7 +57,7 @@ public List<Map<String, String>> taskConfigs(int maxTasks) {
         ConnectorUtils.groupPartitions(config.getFsUris(), groups)
                 .forEach(dirs -> {
                     Map<String, String> taskProps = new HashMap<>(config.originalsStrings());
-                    taskProps.put(FsSourceConnectorConfig.FS_URIS, dirs.stream().collect(Collectors.joining(",")));
+                    taskProps.put(FsSourceConnectorConfig.FS_URIS, String.join(",", dirs));
                     taskConfigs.add(taskProps);
                 });
 
diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/FsSourceTask.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/FsSourceTask.java
index b7c97eb..971f6ee 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/FsSourceTask.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/FsSourceTask.java
@@ -24,7 +24,7 @@
 public class FsSourceTask extends SourceTask {
     private static final Logger log = LoggerFactory.getLogger(FsSourceTask.class);
 
-    private AtomicBoolean stop;
+    private final AtomicBoolean stop = new AtomicBoolean(false);
     private FsSourceTaskConfig config;
     private Policy policy;
 
@@ -37,7 +37,6 @@ public String version() {
     public void start(Map<String, String> properties) {
         try {
             config = new FsSourceTaskConfig(properties);
-
             if (config.getClass(FsSourceTaskConfig.POLICY_CLASS).isAssignableFrom(Policy.class)) {
                 throw new ConfigException("Policy class " +
                         config.getClass(FsSourceTaskConfig.POLICY_CLASS) + "is not a sublass of " + Policy.class);
@@ -57,13 +56,11 @@ public void start(Map<String, String> properties) {
             log.error("Couldn't start FsSourceConnector:", t);
             throw new ConnectException("A problem has occurred reading configuration:" + t.getMessage());
         }
-
-        stop = new AtomicBoolean(false);
     }
 
     @Override
-    public List<SourceRecord> poll() throws InterruptedException {
-        while (stop != null && !stop.get() && !policy.hasEnded()) {
+    public List<SourceRecord> poll() {
+        while (!stop.get() && policy != null && !policy.hasEnded()) {
             log.trace("Polling for new data");
 
             final List<SourceRecord> results = new ArrayList<>();
@@ -92,8 +89,8 @@ private List<FileMetadata> filesToProcess() {
                     .collect(Collectors.toList());
         } catch (IOException | ConnectException e) {
             //when an exception happens executing the policy, the connector continues
-            log.error("Cannot retrive files to process from FS: " + policy.getURIs() + ". Keep going...", e);
-            return Collections.EMPTY_LIST;
+            log.error("Cannot retrieve files to process from FS: " + policy.getURIs() + ". Keep going...", e);
+            return Collections.emptyList();
         }
     }
 
@@ -120,11 +117,9 @@ private SourceRecord convert(FileMetadata metadata, Offset offset, Struct struct
 
     @Override
     public void stop() {
-        if (stop != null) {
-            stop.set(true);
-        }
+        stop.set(true);
         if (policy != null) {
             policy.interrupt();
         }
     }
-}
\ No newline at end of file
+}
diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/FileMetadata.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/FileMetadata.java
index 45902e9..669b681 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/FileMetadata.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/FileMetadata.java
@@ -36,12 +36,9 @@ public boolean equals(Object object) {
         if (!(object instanceof FileMetadata)) return false;
 
         FileMetadata metadata = (FileMetadata) object;
-        if (this.path.equals(metadata.getPath()) &&
+        return this.path.equals(metadata.getPath()) &&
                 this.length == metadata.length &&
-                this.blocks.equals(metadata.getBlocks())) {
-            return true;
-        }
-        return false;
+                this.blocks.equals(metadata.getBlocks());
     }
 
     public int hashCode() {
@@ -65,4 +62,4 @@ public String toString() {
             return String.format("[offset = %s, length = %s, corrupt = %s]", offset, length, corrupt);
         }
     }
-}
\ No newline at end of file
+}
diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/AbstractFileReader.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/AbstractFileReader.java
index 4e1b474..1fbdddb 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/AbstractFileReader.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/AbstractFileReader.java
@@ -15,7 +15,7 @@ public abstract class AbstractFileReader<T> implements FileReader {
     private final Path filePath;
     private ReaderAdapter<T> adapter;
 
-    public AbstractFileReader(FileSystem fs, Path filePath, ReaderAdapter adapter, Map<String, Object> config) {
+    public AbstractFileReader(FileSystem fs, Path filePath, ReaderAdapter<T> adapter, Map<String, Object> config) {
         if (fs == null || filePath == null) {
             throw new IllegalArgumentException("fileSystem and filePath are required");
         }
@@ -25,7 +25,7 @@ public AbstractFileReader(FileSystem fs, Path filePath, ReaderAdapter adapter, M
 
         Map<String, Object> readerConf = config.entrySet().stream()
                 .filter(entry -> entry.getKey().startsWith(FILE_READER_PREFIX))
-                .collect(Collectors.toMap(entry -> entry.getKey(), entry -> entry.getValue()));
+                .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
         configure(readerConf);
     }
 
@@ -49,5 +49,4 @@ public final Struct next() {
     protected ReaderAdapter<T> getAdapter() {
         return adapter;
     }
-
 }
diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/AgnosticFileReader.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/AgnosticFileReader.java
index 5e025da..34f9670 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/AgnosticFileReader.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/AgnosticFileReader.java
@@ -8,6 +8,7 @@
 
 import java.io.IOException;
 import java.util.Arrays;
+import java.util.Collections;
 import java.util.List;
 import java.util.Map;
 
@@ -33,7 +34,7 @@ public AgnosticFileReader(FileSystem fs, Path filePath, Map<String, Object> conf
         } catch (RuntimeException | IOException e) {
             throw e;
         } catch (Throwable t) {
-            throw new IOException("An error has ocurred when creating a concrete reader", t);
+            throw new IOException("An error has occurred when creating a concrete reader", t);
         }
     }
 
@@ -62,13 +63,13 @@ private FileReader readerByExtension(FileSystem fs, Path filePath, Map<String, O
     @Override
     protected void configure(Map<String, Object> config) {
         this.parquetExtensions = config.get(FILE_READER_AGNOSTIC_EXTENSIONS_PARQUET) == null ?
-                Arrays.asList("parquet") :
+                Collections.singletonList("parquet") :
                 Arrays.asList(config.get(FILE_READER_AGNOSTIC_EXTENSIONS_PARQUET).toString().toLowerCase().split(","));
         this.avroExtensions = config.get(FILE_READER_AGNOSTIC_EXTENSIONS_AVRO) == null ?
-                Arrays.asList("avro") :
+                Collections.singletonList("avro") :
                 Arrays.asList(config.get(FILE_READER_AGNOSTIC_EXTENSIONS_AVRO).toString().toLowerCase().split(","));
         this.sequenceExtensions = config.get(FILE_READER_AGNOSTIC_EXTENSIONS_SEQUENCE) == null ?
-                Arrays.asList("seq") :
+                Collections.singletonList("seq") :
                 Arrays.asList(config.get(FILE_READER_AGNOSTIC_EXTENSIONS_SEQUENCE).toString().toLowerCase().split(","));
         this.delimitedExtensions = config.get(FILE_READER_AGNOSTIC_EXTENSIONS_DELIMITED) == null ?
                 Arrays.asList("tsv", "csv") :
@@ -102,7 +103,7 @@ protected AgnosticRecord nextRecord() {
 
     static class AgnosticAdapter implements ReaderAdapter<AgnosticRecord> {
 
-        public AgnosticAdapter() {
+        AgnosticAdapter() {
         }
 
         @Override
@@ -115,7 +116,7 @@ static class AgnosticRecord {
         private final ReaderAdapter<Object> adapter;
         private final Object record;
 
-        public AgnosticRecord(ReaderAdapter<Object> adapter, Object record) {
+        AgnosticRecord(ReaderAdapter<Object> adapter, Object record) {
             this.adapter = adapter;
             this.record = record;
         }
diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/AvroFileReader.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/AvroFileReader.java
index 46e5e9f..44ec3df 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/AvroFileReader.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/AvroFileReader.java
@@ -25,14 +25,18 @@ public class AvroFileReader extends AbstractFileReader<GenericRecord> {
     public static final String FILE_READER_AVRO_SCHEMA = FILE_READER_AVRO + "schema";
 
     private final AvroOffset offset;
-    private DataFileReader<GenericRecord> reader;
+    private final DataFileReader<GenericRecord> reader;
     private Schema schema;
 
     public AvroFileReader(FileSystem fs, Path filePath, Map<String, Object> config) throws IOException {
         super(fs, filePath, new GenericRecordToStruct(), config);
 
         AvroFSInput input = new AvroFSInput(FileContext.getFileContext(filePath.toUri()), filePath);
-        this.reader = new DataFileReader<>(input, new SpecificDatumReader<>(this.schema));
+        if (this.schema == null) {
+            this.reader = new DataFileReader<>(input, new SpecificDatumReader<>());
+        } else {
+            this.reader = new DataFileReader<>(input, new SpecificDatumReader<>(this.schema));
+        }
         this.offset = new AvroOffset(0);
     }
 
diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/DelimitedTextFileReader.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/DelimitedTextFileReader.java
index 542d3c0..b5f59ee 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/DelimitedTextFileReader.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/DelimitedTextFileReader.java
@@ -9,7 +9,6 @@
 
 import java.io.IOException;
 import java.util.Map;
-import java.util.stream.Collectors;
 import java.util.stream.IntStream;
 
 import static com.github.mmolimar.kafka.connect.fs.FsSourceTaskConfig.FILE_READER_PREFIX;
@@ -43,7 +42,7 @@ public DelimitedTextFileReader(FileSystem fs, Path filePath, Map<String, Object>
         SchemaBuilder schemaBuilder = SchemaBuilder.struct();
         if (hasNext()) {
             String firstLine = inner.nextRecord().getValue();
-            String columns[] = firstLine.split(token);
+            String[] columns = firstLine.split(token);
             IntStream.range(0, columns.length).forEach(index -> {
                 String columnName = hasHeader ? columns[index] : DEFAULT_COLUMN_NAME + "_" + ++index;
                 schemaBuilder.field(columnName, SchemaBuilder.STRING_SCHEMA);
@@ -66,13 +65,13 @@ protected void configure(Map<String, Object> config) {
         this.token = config.get(FILE_READER_DELIMITED_TOKEN).toString();
         this.defaultValue = config.get(FILE_READER_DELIMITED_DEFAULT_VALUE) == null ?
                 null : config.get(FILE_READER_DELIMITED_DEFAULT_VALUE).toString();
-        this.hasHeader = Boolean.valueOf((String) config.get(FILE_READER_DELIMITED_HEADER));
+        this.hasHeader = Boolean.parseBoolean((String) config.get(FILE_READER_DELIMITED_HEADER));
     }
 
     @Override
     protected DelimitedRecord nextRecord() {
         offset.inc();
-        String values[] = inner.nextRecord().getValue().split(token);
+        String[] values = inner.nextRecord().getValue().split(token);
         return new DelimitedRecord(schema, defaultValue != null ? fillNullValues(values) : values);
     }
 
@@ -84,9 +83,7 @@ private String[] fillNullValues(final String[] values) {
                     } else {
                         return defaultValue;
                     }
-                })
-                .collect(Collectors.toList())
-                .toArray(new String[0]);
+                }).toArray(String[]::new);
     }
 
     @Override
@@ -123,7 +120,7 @@ public void setOffset(long offset) {
             this.offset = hasHeader && offset > 0 ? offset - 1 : offset;
         }
 
-        protected void inc() {
+        void inc() {
             this.offset++;
         }
 
@@ -151,7 +148,7 @@ static class DelimitedRecord {
         private final Schema schema;
         private final String[] values;
 
-        public DelimitedRecord(Schema schema, String[] values) {
+        DelimitedRecord(Schema schema, String[] values) {
             this.schema = schema;
             this.values = values;
         }
diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/ParquetFileReader.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/ParquetFileReader.java
index 76b71da..f6537f3 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/ParquetFileReader.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/ParquetFileReader.java
@@ -13,6 +13,7 @@
 import org.apache.parquet.avro.AvroParquetReader;
 import org.apache.parquet.avro.AvroReadSupport;
 import org.apache.parquet.hadoop.ParquetReader;
+import org.apache.parquet.hadoop.util.HadoopInputFile;
 
 import java.io.IOException;
 import java.util.Map;
@@ -52,9 +53,9 @@ private ParquetReader<GenericRecord> initReader() throws IOException {
         if (this.projection != null) {
             AvroReadSupport.setRequestedProjection(configuration, this.projection);
         }
-        ParquetReader reader = AvroParquetReader.<GenericRecord>builder(getFilePath())
-                .withConf(configuration).build();
-        return reader;
+        return AvroParquetReader
+                .<GenericRecord>builder(HadoopInputFile.fromPath(getFilePath(), configuration))
+                .build();
     }
 
     protected void configure(Map<String, Object> config) {
@@ -144,7 +145,7 @@ public void setOffset(long offset) {
             this.offset = offset;
         }
 
-        protected void inc() {
+        void inc() {
             this.offset++;
         }
 
@@ -158,7 +159,7 @@ static class GenericRecordToStruct implements ReaderAdapter<GenericRecord> {
         private static final int CACHE_SIZE = 100;
         private final AvroData avroData;
 
-        public GenericRecordToStruct() {
+        GenericRecordToStruct() {
             this.avroData = new AvroData(CACHE_SIZE);
         }
 
diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/SequenceFileReader.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/SequenceFileReader.java
index 013a680..3432da7 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/SequenceFileReader.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/SequenceFileReader.java
@@ -154,7 +154,7 @@ public void setOffset(long offset) {
             this.offset = offset;
         }
 
-        protected void inc() {
+        void inc() {
             this.offset++;
         }
 
@@ -202,7 +202,7 @@ static class SequenceRecord<T, U> {
         private final String valueFieldName;
         private final U value;
 
-        public SequenceRecord(Schema schema, String keyFieldName, T key, String valueFieldName, U value) {
+        SequenceRecord(Schema schema, String keyFieldName, T key, String valueFieldName, U value) {
             this.schema = schema;
             this.keyFieldName = keyFieldName;
             this.key = key;
diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/TextFileReader.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/TextFileReader.java
index a5781af..7ed0b80 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/TextFileReader.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/TextFileReader.java
@@ -158,7 +158,7 @@ static class TextRecord {
         private final Schema schema;
         private final String value;
 
-        public TextRecord(Schema schema, String value) {
+        TextRecord(Schema schema, String value) {
             this.schema = schema;
             this.value = value;
         }
diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/policy/AbstractPolicy.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/policy/AbstractPolicy.java
index 2a6dbce..251987e 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/policy/AbstractPolicy.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/policy/AbstractPolicy.java
@@ -54,7 +54,7 @@ public AbstractPolicy(FsSourceTaskConfig conf) throws IOException {
     private Map<String, Object> customConfigs() {
         return conf.originals().entrySet().stream()
                 .filter(entry -> entry.getKey().startsWith(FsSourceTaskConfig.POLICY_PREFIX))
-                .collect(Collectors.toMap(entry -> entry.getKey(), entry -> entry.getValue()));
+                .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
     }
 
     private void configFs(Map<String, Object> customConfigs) throws IOException {
@@ -126,7 +126,7 @@ public void interrupt() {
     protected void preCheck() {
     }
 
-    protected void postCheck() {
+    private void postCheck() {
     }
 
     public Iterator<FileMetadata> listFiles(FileSystem fs) throws IOException {
@@ -173,17 +173,16 @@ public final boolean hasEnded() {
 
     protected abstract boolean isPolicyCompleted();
 
-    public final int getExecutions() {
+    final int getExecutions() {
         return executions.get();
     }
 
-    protected FileMetadata toMetadata(LocatedFileStatus fileStatus) {
-        List<FileMetadata.BlockInfo> blocks = new ArrayList<>();
+    FileMetadata toMetadata(LocatedFileStatus fileStatus) {
 
-        blocks.addAll(Arrays.stream(fileStatus.getBlockLocations())
+        List<FileMetadata.BlockInfo> blocks = Arrays.stream(fileStatus.getBlockLocations())
                 .map(block ->
                         new FileMetadata.BlockInfo(block.getOffset(), block.getLength(), block.isCorrupt()))
-                .collect(Collectors.toList()));
+                .collect(Collectors.toList());
 
         return new FileMetadata(fileStatus.getPath().toString(), fileStatus.getLen(), blocks);
     }
@@ -215,8 +214,8 @@ public FileReader offer(FileMetadata metadata, OffsetStorageReader offsetStorage
         return reader;
     }
 
-    Iterator<FileMetadata> concat(final Iterator<FileMetadata> it1,
-                                  final Iterator<FileMetadata> it2) {
+    private Iterator<FileMetadata> concat(final Iterator<FileMetadata> it1,
+                                          final Iterator<FileMetadata> it2) {
         return new Iterator<FileMetadata>() {
 
             @Override
diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/policy/HdfsFileWatcherPolicy.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/policy/HdfsFileWatcherPolicy.java
index e928d13..dd558d6 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/policy/HdfsFileWatcherPolicy.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/policy/HdfsFileWatcherPolicy.java
@@ -30,7 +30,7 @@ public class HdfsFileWatcherPolicy extends AbstractPolicy {
 
     public HdfsFileWatcherPolicy(FsSourceTaskConfig conf) throws IOException {
         super(conf);
-        this.fileQueue = new ConcurrentLinkedQueue();
+        this.fileQueue = new ConcurrentLinkedQueue<>();
         startWatchers();
     }
 
@@ -50,15 +50,15 @@ protected void configPolicy(Map<String, Object> customConfigs) {
     }
 
     private void startWatchers() {
-        fsEvenStream.values().forEach(stream -> stream.start());
+        fsEvenStream.values().forEach(Thread::start);
     }
 
     private void stopWatchers() {
-        fsEvenStream.values().forEach(stream -> stream.interrupt());
+        fsEvenStream.values().forEach(Thread::interrupt);
     }
 
     @Override
-    public Iterator<FileMetadata> listFiles(FileSystem fs) throws IOException {
+    public Iterator<FileMetadata> listFiles(FileSystem fs) {
         Set<FileMetadata> files = new HashSet<>();
         FileMetadata metadata;
         while ((metadata = fileQueue.poll()) != null) {
@@ -95,7 +95,7 @@ private class EventStreamThread extends Thread {
         private final FileSystem fs;
         private final HdfsAdmin admin;
 
-        protected EventStreamThread(FileSystem fs, HdfsAdmin admin) {
+        EventStreamThread(FileSystem fs, HdfsAdmin admin) {
             this.fs = fs;
             this.admin = admin;
         }
@@ -151,4 +151,3 @@ private void enqueue(String path) throws IOException {
         }
     }
 }
-
diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/policy/SleepyPolicy.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/policy/SleepyPolicy.java
index 4919c34..2a02884 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/policy/SleepyPolicy.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/policy/SleepyPolicy.java
@@ -31,14 +31,14 @@ public SleepyPolicy(FsSourceTaskConfig conf) throws IOException {
     @Override
     protected void configPolicy(Map<String, Object> customConfigs) {
         try {
-            this.sleep = Long.valueOf((String) customConfigs.get(SLEEPY_POLICY_SLEEP_MS));
+            this.sleep = Long.parseLong((String) customConfigs.get(SLEEPY_POLICY_SLEEP_MS));
         } catch (NumberFormatException nfe) {
             throw new ConfigException(SLEEPY_POLICY_SLEEP_MS + " property is required and must be a number(long). Got: " +
                     customConfigs.get(SLEEPY_POLICY_SLEEP_MS));
         }
         if (customConfigs.get(SLEEPY_POLICY_MAX_EXECS) != null) {
             try {
-                this.maxExecs = Long.valueOf((String) customConfigs.get(SLEEPY_POLICY_MAX_EXECS));
+                this.maxExecs = Long.parseLong((String) customConfigs.get(SLEEPY_POLICY_MAX_EXECS));
             } catch (NumberFormatException nfe) {
                 throw new ConfigException(SLEEPY_POLICY_MAX_EXECS + " property must be a number(long). Got: " +
                         customConfigs.get(SLEEPY_POLICY_MAX_EXECS));
@@ -48,7 +48,7 @@ protected void configPolicy(Map<String, Object> customConfigs) {
         }
         if (customConfigs.get(SLEEPY_POLICY_SLEEP_FRACTION) != null) {
             try {
-                this.sleepFraction = Long.valueOf((String) customConfigs.get(SLEEPY_POLICY_SLEEP_FRACTION));
+                this.sleepFraction = Long.parseLong((String) customConfigs.get(SLEEPY_POLICY_SLEEP_FRACTION));
             } catch (NumberFormatException nfe) {
                 throw new ConfigException(SLEEPY_POLICY_SLEEP_FRACTION + " property must be a number(long). Got: " +
                         customConfigs.get(SLEEPY_POLICY_SLEEP_FRACTION));
diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/util/ReflectionUtils.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/util/ReflectionUtils.java
index babe70c..6b84ca3 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/util/ReflectionUtils.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/util/ReflectionUtils.java
@@ -3,7 +3,7 @@
 import com.github.mmolimar.kafka.connect.fs.FsSourceTaskConfig;
 import com.github.mmolimar.kafka.connect.fs.file.reader.FileReader;
 import com.github.mmolimar.kafka.connect.fs.policy.Policy;
-import org.apache.commons.lang.reflect.ConstructorUtils;
+import org.apache.commons.lang3.reflect.ConstructorUtils;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 
@@ -25,10 +25,10 @@ public static Policy makePolicy(Class<? extends Policy> clazz, FsSourceTaskConfi
 
     private static <T> T make(Class<T> clazz, Object... args) throws Throwable {
         try {
-            Class[] constClasses = Arrays.stream(args).map(arg -> arg.getClass()).toArray(Class[]::new);
+            Class[] constClasses = Arrays.stream(args).map(Object::getClass).toArray(Class[]::new);
 
-            Constructor constructor = ConstructorUtils.getMatchingAccessibleConstructor(clazz, constClasses);
-            return (T) constructor.newInstance(args);
+            Constructor<T> constructor = ConstructorUtils.getMatchingAccessibleConstructor(clazz, constClasses);
+            return constructor.newInstance(args);
         } catch (IllegalAccessException |
                 InstantiationException |
                 InvocationTargetException e) {
diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/util/Version.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/util/Version.java
index 23d2312..7e94e04 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/util/Version.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/util/Version.java
@@ -22,4 +22,4 @@ public class Version {
     public static String getVersion() {
         return version;
     }
-}
\ No newline at end of file
+}

From 896b44378794f7d1c3ddf06a2eb29ccee085e24a Mon Sep 17 00:00:00 2001
From: Mario Molina <mmolimar@gmail.com>
Date: Sat, 22 Feb 2020 11:37:52 -0600
Subject: [PATCH 04/51] Fix tests

---
 .../fs/file/reader/FileReaderTestBase.java      |  8 ++++----
 .../fs/file/reader/hdfs/AvroFileReaderTest.java |  3 ++-
 .../hdfs/DelimitedTextFileReaderTest.java       | 16 ++++++++--------
 .../reader/hdfs/HdfsFileReaderTestBase.java     |  6 ++----
 .../file/reader/hdfs/ParquetFileReaderTest.java | 17 +++++++++++++++--
 .../reader/hdfs/SequenceFileReaderTest.java     |  6 ++----
 .../fs/file/reader/hdfs/TextFileReaderTest.java |  3 +--
 .../file/reader/local/AvroFileReaderTest.java   |  4 ++--
 .../local/DelimitedTextFileReaderTest.java      | 16 ++++++++--------
 .../reader/local/ParquetFileReaderTest.java     | 17 +++++++++++++++--
 .../reader/local/SequenceFileReaderTest.java    |  5 ++---
 .../file/reader/local/TextFileReaderTest.java   |  3 +--
 .../policy/hdfs/HdfsFileWatcherPolicyTest.java  |  5 ++---
 .../fs/policy/hdfs/HdfsPolicyTestBase.java      |  6 ++----
 .../fs/policy/hdfs/SimplePolicyTest.java        |  4 ++--
 .../fs/policy/hdfs/SleepyPolicyTest.java        |  5 ++---
 .../fs/policy/local/SimplePolicyTest.java       |  4 ++--
 .../fs/policy/local/SleepyPolicyTest.java       |  4 ++--
 18 files changed, 74 insertions(+), 58 deletions(-)

diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/FileReaderTestBase.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/FileReaderTestBase.java
index e4aa2b4..238db17 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/FileReaderTestBase.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/FileReaderTestBase.java
@@ -39,7 +39,7 @@ public static void tearDown() throws IOException {
     @Before
     public void openReader() throws Throwable {
         reader = getReader(fs, dataFile, readerConfig);
-        assertTrue(reader.getFilePath().equals(dataFile));
+        assertEquals(reader.getFilePath(), dataFile);
     }
 
     @After
@@ -103,19 +103,19 @@ public void seekFile() {
         int recordIndex = NUM_RECORDS / 2;
         reader.seek(getOffset(OFFSETS_BY_INDEX.get(recordIndex)));
         assertTrue(reader.hasNext());
-        assertEquals(OFFSETS_BY_INDEX.get(recordIndex).longValue() + 1, reader.currentOffset().getRecordOffset());
+        assertEquals(OFFSETS_BY_INDEX.get(recordIndex) + 1, reader.currentOffset().getRecordOffset());
         checkData(reader.next(), recordIndex);
 
         recordIndex = 0;
         reader.seek(getOffset(OFFSETS_BY_INDEX.get(recordIndex)));
         assertTrue(reader.hasNext());
-        assertEquals(OFFSETS_BY_INDEX.get(recordIndex).longValue() + 1, reader.currentOffset().getRecordOffset());
+        assertEquals(OFFSETS_BY_INDEX.get(recordIndex) + 1, reader.currentOffset().getRecordOffset());
         checkData(reader.next(), recordIndex);
 
         recordIndex = NUM_RECORDS - 3;
         reader.seek(getOffset(OFFSETS_BY_INDEX.get(recordIndex)));
         assertTrue(reader.hasNext());
-        assertEquals(OFFSETS_BY_INDEX.get(recordIndex).longValue() + 1, reader.currentOffset().getRecordOffset());
+        assertEquals(OFFSETS_BY_INDEX.get(recordIndex) + 1, reader.currentOffset().getRecordOffset());
         checkData(reader.next(), recordIndex);
 
         reader.seek(getOffset(OFFSETS_BY_INDEX.get(NUM_RECORDS - 1) + 1));
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/AvroFileReaderTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/AvroFileReaderTest.java
index 67a772e..f829ff1 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/AvroFileReaderTest.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/AvroFileReaderTest.java
@@ -23,6 +23,7 @@
 import java.util.UUID;
 import java.util.stream.IntStream;
 
+import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
 
 public class AvroFileReaderTest extends HdfsFileReaderTestBase {
@@ -101,7 +102,7 @@ protected Offset getOffset(long offset) {
 
     @Override
     protected void checkData(Struct record, long index) {
-        assertTrue((Integer) record.get(FIELD_INDEX) == index);
+        assertEquals((int) (Integer) record.get(FIELD_INDEX), index);
         assertTrue(record.get(FIELD_NAME).toString().startsWith(index + "_"));
         assertTrue(record.get(FIELD_SURNAME).toString().startsWith(index + "_"));
     }
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/DelimitedTextFileReaderTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/DelimitedTextFileReaderTest.java
index da5304d..137eee1 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/DelimitedTextFileReaderTest.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/DelimitedTextFileReaderTest.java
@@ -50,7 +50,7 @@ private static Path createDataFile(boolean header) throws IOException {
                 String value = String.format("%d_%s", index, UUID.randomUUID());
                 try {
                     writer.append(value + "," + value + "," + value + "," + value + "\n");
-                    if (header) OFFSETS_BY_INDEX.put(index, Long.valueOf(index++));
+                    if (header) OFFSETS_BY_INDEX.put(index, (long) index);
                 } catch (IOException ioe) {
                     throw new RuntimeException(ioe);
                 }
@@ -124,10 +124,10 @@ public void readAllDataWithMalformedRows() throws Throwable {
         int recordCount = 0;
         while (reader.hasNext()) {
             Struct record = reader.next();
-            assertTrue(record.get(FIELD_COLUMN1).equals("dummy"));
-            assertTrue(record.get(FIELD_COLUMN2).equals("custom_value"));
-            assertTrue(record.get(FIELD_COLUMN3).equals("custom_value"));
-            assertTrue(record.get(FIELD_COLUMN4).equals("custom_value"));
+            assertEquals("dummy", record.get(FIELD_COLUMN1));
+            assertEquals("custom_value", record.get(FIELD_COLUMN2));
+            assertEquals("custom_value", record.get(FIELD_COLUMN3));
+            assertEquals("custom_value", record.get(FIELD_COLUMN4));
             recordCount++;
         }
         assertEquals("The number of records in the file does not match", 2, recordCount);
@@ -146,19 +146,19 @@ public void seekFileWithoutHeader() throws Throwable {
         int recordIndex = NUM_RECORDS / 2;
         reader.seek(getOffset(OFFSETS_BY_INDEX.get(recordIndex), false));
         assertTrue(reader.hasNext());
-        assertEquals(OFFSETS_BY_INDEX.get(recordIndex).longValue() + 1, reader.currentOffset().getRecordOffset());
+        assertEquals(OFFSETS_BY_INDEX.get(recordIndex) + 1, reader.currentOffset().getRecordOffset());
         checkData(reader.next(), recordIndex);
 
         recordIndex = 0;
         reader.seek(getOffset(OFFSETS_BY_INDEX.get(recordIndex), false));
         assertTrue(reader.hasNext());
-        assertEquals(OFFSETS_BY_INDEX.get(recordIndex).longValue() + 1, reader.currentOffset().getRecordOffset());
+        assertEquals(OFFSETS_BY_INDEX.get(recordIndex) + 1, reader.currentOffset().getRecordOffset());
         checkData(reader.next(), recordIndex);
 
         recordIndex = NUM_RECORDS - 3;
         reader.seek(getOffset(OFFSETS_BY_INDEX.get(recordIndex), false));
         assertTrue(reader.hasNext());
-        assertEquals(OFFSETS_BY_INDEX.get(recordIndex).longValue() + 1, reader.currentOffset().getRecordOffset());
+        assertEquals(OFFSETS_BY_INDEX.get(recordIndex) + 1, reader.currentOffset().getRecordOffset());
         checkData(reader.next(), recordIndex);
 
         reader.seek(getOffset(OFFSETS_BY_INDEX.get(NUM_RECORDS - 1) + 1, false));
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/HdfsFileReaderTestBase.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/HdfsFileReaderTestBase.java
index 5a7c1ba..f4f5183 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/HdfsFileReaderTestBase.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/HdfsFileReaderTestBase.java
@@ -15,13 +15,11 @@
 public abstract class HdfsFileReaderTestBase extends FileReaderTestBase {
 
     private static MiniDFSCluster cluster;
-    private static Configuration clusterConfig;
-    private static Path hdfsDir;
 
     @BeforeClass
     public static void initFs() throws IOException {
-        clusterConfig = new Configuration();
-        hdfsDir = Files.createTempDirectory("test-");
+        Configuration clusterConfig = new Configuration();
+        Path hdfsDir = Files.createTempDirectory("test-");
         clusterConfig.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, hdfsDir.toAbsolutePath().toString());
         cluster = new MiniDFSCluster.Builder(clusterConfig).build();
         fsUri = URI.create("hdfs://localhost:" + cluster.getNameNodePort() + "/");
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/ParquetFileReaderTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/ParquetFileReaderTest.java
index ae0e82c..5b69bb3 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/ParquetFileReaderTest.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/ParquetFileReaderTest.java
@@ -19,6 +19,7 @@
 import org.apache.parquet.hadoop.ParquetWriter;
 import org.apache.parquet.io.InvalidRecordException;
 import org.junit.BeforeClass;
+import org.junit.Ignore;
 import org.junit.Test;
 
 import java.io.File;
@@ -63,7 +64,7 @@ private static Path createDataFile() throws IOException {
                 datum.put(FIELD_NAME, String.format("%d_name_%s", index, UUID.randomUUID()));
                 datum.put(FIELD_SURNAME, String.format("%d_surname_%s", index, UUID.randomUUID()));
                 try {
-                    OFFSETS_BY_INDEX.put(index, Long.valueOf(index));
+                    OFFSETS_BY_INDEX.put(index, (long) index);
                     writer.write(datum);
                 } catch (IOException ioe) {
                     throw new RuntimeException(ioe);
@@ -75,6 +76,18 @@ private static Path createDataFile() throws IOException {
         return path;
     }
 
+    @Ignore(value = "This test does not apply for parquet files")
+    @Test(expected = IOException.class)
+    public void emptyFile() throws Throwable {
+        super.emptyFile();
+    }
+
+    @Ignore(value = "This test does not apply for parquet files")
+    @Test(expected = IOException.class)
+    public void invalidFileFormat() throws Throwable {
+        super.invalidFileFormat();
+    }
+
     @Test
     public void readerWithSchema() throws Throwable {
         Map<String, Object> cfg = new HashMap<String, Object>() {{
@@ -138,7 +151,7 @@ protected Offset getOffset(long offset) {
 
     @Override
     protected void checkData(Struct record, long index) {
-        assertTrue((Integer) record.get(FIELD_INDEX) == index);
+        assertEquals((int) (Integer) record.get(FIELD_INDEX), index);
         assertTrue(record.get(FIELD_NAME).toString().startsWith(index + "_"));
         assertTrue(record.get(FIELD_SURNAME).toString().startsWith(index + "_"));
     }
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/SequenceFileReaderTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/SequenceFileReaderTest.java
index d7e6ba0..23e1f8c 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/SequenceFileReaderTest.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/SequenceFileReaderTest.java
@@ -75,8 +75,7 @@ private static Path createDataFile() throws IOException {
     public void defaultFieldNames() throws Throwable {
         Map<String, Object> customReaderCfg = new HashMap<>();
         reader = getReader(fs, dataFile, customReaderCfg);
-        assertTrue(reader.getFilePath().equals(dataFile));
-
+        assertEquals(reader.getFilePath(), dataFile);
         assertTrue(reader.hasNext());
 
         int recordCount = 0;
@@ -99,7 +98,7 @@ protected void checkData(Struct record, long index) {
     }
 
     private void checkData(String keyFieldName, String valueFieldName, Struct record, long index) {
-        assertTrue((Integer) record.get(keyFieldName) == index);
+        assertEquals((int) (Integer) record.get(keyFieldName), index);
         assertTrue(record.get(valueFieldName).toString().startsWith(index + "_"));
     }
 
@@ -107,5 +106,4 @@ private void checkData(String keyFieldName, String valueFieldName, Struct record
     protected String getFileExtension() {
         return FILE_EXTENSION;
     }
-
 }
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/TextFileReaderTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/TextFileReaderTest.java
index 0c37d4d..9a063d3 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/TextFileReaderTest.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/TextFileReaderTest.java
@@ -42,7 +42,7 @@ private static Path createDataFile() throws IOException {
                 String value = String.format("%d_%s", index, UUID.randomUUID());
                 try {
                     writer.append(value + "\n");
-                    OFFSETS_BY_INDEX.put(index, Long.valueOf(index++));
+                    OFFSETS_BY_INDEX.put(index, (long) index);
                 } catch (IOException ioe) {
                     throw new RuntimeException(ioe);
                 }
@@ -98,5 +98,4 @@ protected void checkData(Struct record, long index) {
     protected String getFileExtension() {
         return FILE_EXTENSION;
     }
-
 }
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/AvroFileReaderTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/AvroFileReaderTest.java
index de4ed20..2dc0454 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/AvroFileReaderTest.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/AvroFileReaderTest.java
@@ -23,6 +23,7 @@
 import java.util.UUID;
 import java.util.stream.IntStream;
 
+import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
 
 public class AvroFileReaderTest extends LocalFileReaderTestBase {
@@ -106,7 +107,7 @@ protected Offset getOffset(long offset) {
 
     @Override
     protected void checkData(Struct record, long index) {
-        assertTrue((Integer) record.get(FIELD_INDEX) == index);
+        assertEquals((int) (Integer) record.get(FIELD_INDEX), index);
         assertTrue(record.get(FIELD_NAME).toString().startsWith(index + "_"));
         assertTrue(record.get(FIELD_SURNAME).toString().startsWith(index + "_"));
     }
@@ -115,5 +116,4 @@ protected void checkData(Struct record, long index) {
     protected String getFileExtension() {
         return FILE_EXTENSION;
     }
-
 }
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/DelimitedTextFileReaderTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/DelimitedTextFileReaderTest.java
index 5884240..763ce11 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/DelimitedTextFileReaderTest.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/DelimitedTextFileReaderTest.java
@@ -51,7 +51,7 @@ private static Path createDataFile(boolean header) throws IOException {
                 String value = String.format("%d_%s", index, UUID.randomUUID());
                 try {
                     writer.append(value + "," + value + "," + value + "," + value + "\n");
-                    if (header) OFFSETS_BY_INDEX.put(index, Long.valueOf(index++));
+                    if (header) OFFSETS_BY_INDEX.put(index, (long) index);
                 } catch (IOException ioe) {
                     throw new RuntimeException(ioe);
                 }
@@ -130,10 +130,10 @@ public void readAllDataWithMalformedRows() throws Throwable {
         int recordCount = 0;
         while (reader.hasNext()) {
             Struct record = reader.next();
-            assertTrue(record.get(FIELD_COLUMN1).equals("dummy"));
-            assertTrue(record.get(FIELD_COLUMN2).equals("custom_value"));
-            assertTrue(record.get(FIELD_COLUMN3).equals("custom_value"));
-            assertTrue(record.get(FIELD_COLUMN4).equals("custom_value"));
+            assertEquals("dummy", record.get(FIELD_COLUMN1));
+            assertEquals("custom_value", record.get(FIELD_COLUMN2));
+            assertEquals("custom_value", record.get(FIELD_COLUMN3));
+            assertEquals("custom_value", record.get(FIELD_COLUMN4));
             recordCount++;
         }
         assertEquals("The number of records in the file does not match", 2, recordCount);
@@ -153,19 +153,19 @@ public void seekFileWithoutHeader() throws Throwable {
         int recordIndex = NUM_RECORDS / 2;
         reader.seek(getOffset(OFFSETS_BY_INDEX.get(recordIndex), false));
         assertTrue(reader.hasNext());
-        assertEquals(OFFSETS_BY_INDEX.get(recordIndex).longValue() + 1, reader.currentOffset().getRecordOffset());
+        assertEquals(OFFSETS_BY_INDEX.get(recordIndex) + 1, reader.currentOffset().getRecordOffset());
         checkData(reader.next(), recordIndex);
 
         recordIndex = 0;
         reader.seek(getOffset(OFFSETS_BY_INDEX.get(recordIndex), false));
         assertTrue(reader.hasNext());
-        assertEquals(OFFSETS_BY_INDEX.get(recordIndex).longValue() + 1, reader.currentOffset().getRecordOffset());
+        assertEquals(OFFSETS_BY_INDEX.get(recordIndex) + 1, reader.currentOffset().getRecordOffset());
         checkData(reader.next(), recordIndex);
 
         recordIndex = NUM_RECORDS - 3;
         reader.seek(getOffset(OFFSETS_BY_INDEX.get(recordIndex), false));
         assertTrue(reader.hasNext());
-        assertEquals(OFFSETS_BY_INDEX.get(recordIndex).longValue() + 1, reader.currentOffset().getRecordOffset());
+        assertEquals(OFFSETS_BY_INDEX.get(recordIndex) + 1, reader.currentOffset().getRecordOffset());
         checkData(reader.next(), recordIndex);
 
         reader.seek(getOffset(OFFSETS_BY_INDEX.get(NUM_RECORDS - 1) + 1, false));
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/ParquetFileReaderTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/ParquetFileReaderTest.java
index 91c1eb6..1cceebb 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/ParquetFileReaderTest.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/ParquetFileReaderTest.java
@@ -19,6 +19,7 @@
 import org.apache.parquet.hadoop.ParquetWriter;
 import org.apache.parquet.io.InvalidRecordException;
 import org.junit.BeforeClass;
+import org.junit.Ignore;
 import org.junit.Test;
 
 import java.io.File;
@@ -65,7 +66,7 @@ private static Path createDataFile() throws IOException {
                 datum.put(FIELD_NAME, String.format("%d_name_%s", index, UUID.randomUUID()));
                 datum.put(FIELD_SURNAME, String.format("%d_surname_%s", index, UUID.randomUUID()));
                 try {
-                    OFFSETS_BY_INDEX.put(index, Long.valueOf(index));
+                    OFFSETS_BY_INDEX.put(index, (long) index);
                     writer.write(datum);
                 } catch (IOException ioe) {
                     throw new RuntimeException(ioe);
@@ -77,6 +78,18 @@ private static Path createDataFile() throws IOException {
         return path;
     }
 
+    @Ignore(value = "This test does not apply for parquet files")
+    @Test(expected = IOException.class)
+    public void emptyFile() throws Throwable {
+        super.emptyFile();
+    }
+
+    @Ignore(value = "This test does not apply for parquet files")
+    @Test(expected = IOException.class)
+    public void invalidFileFormat() throws Throwable {
+        super.invalidFileFormat();
+    }
+
     @Test
     public void readerWithSchema() throws Throwable {
         Map<String, Object> cfg = new HashMap<String, Object>() {{
@@ -145,7 +158,7 @@ protected Offset getOffset(long offset) {
 
     @Override
     protected void checkData(Struct record, long index) {
-        assertTrue((Integer) record.get(FIELD_INDEX) == index);
+        assertEquals((int) (Integer) record.get(FIELD_INDEX), index);
         assertTrue(record.get(FIELD_NAME).toString().startsWith(index + "_"));
         assertTrue(record.get(FIELD_SURNAME).toString().startsWith(index + "_"));
     }
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/SequenceFileReaderTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/SequenceFileReaderTest.java
index 8d53cb8..48c4c4e 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/SequenceFileReaderTest.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/SequenceFileReaderTest.java
@@ -78,8 +78,7 @@ public void defaultFieldNames() throws Throwable {
             put(AgnosticFileReader.FILE_READER_AGNOSTIC_EXTENSIONS_SEQUENCE, getFileExtension());
         }};
         reader = getReader(fs, dataFile, customReaderCfg);
-        assertTrue(reader.getFilePath().equals(dataFile));
-
+        assertEquals(reader.getFilePath(), dataFile);
         assertTrue(reader.hasNext());
 
         int recordCount = 0;
@@ -102,7 +101,7 @@ protected void checkData(Struct record, long index) {
     }
 
     private void checkData(String keyFieldName, String valueFieldName, Struct record, long index) {
-        assertTrue((Integer) record.get(keyFieldName) == index);
+        assertEquals((int) (Integer) record.get(keyFieldName), index);
         assertTrue(record.get(valueFieldName).toString().startsWith(index + "_"));
     }
 
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/TextFileReaderTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/TextFileReaderTest.java
index 53d9a98..39ae6e8 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/TextFileReaderTest.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/TextFileReaderTest.java
@@ -42,7 +42,7 @@ private static Path createDataFile() throws IOException {
                 String value = String.format("%d_%s", index, UUID.randomUUID());
                 try {
                     writer.append(value + "\n");
-                    OFFSETS_BY_INDEX.put(index, Long.valueOf(index++));
+                    OFFSETS_BY_INDEX.put(index, (long) index);
                 } catch (IOException ioe) {
                     throw new RuntimeException(ioe);
                 }
@@ -98,5 +98,4 @@ protected void checkData(Struct record, long index) {
     protected String getFileExtension() {
         return FILE_EXTENSION;
     }
-
 }
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/hdfs/HdfsFileWatcherPolicyTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/hdfs/HdfsFileWatcherPolicyTest.java
index d3e0d9a..0c32830 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/hdfs/HdfsFileWatcherPolicyTest.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/hdfs/HdfsFileWatcherPolicyTest.java
@@ -30,8 +30,8 @@ public static void setUp() throws IOException {
         }
 
         Map<String, String> cfg = new HashMap<String, String>() {{
-            String uris[] = directories.stream().map(dir -> dir.toString())
-                    .toArray(size -> new String[size]);
+            String[] uris = directories.stream().map(Path::toString)
+                    .toArray(String[]::new);
             put(FsSourceTaskConfig.FS_URIS, String.join(",", uris));
             put(FsSourceTaskConfig.TOPIC, "topic_test");
             put(FsSourceTaskConfig.POLICY_CLASS, HdfsFileWatcherPolicy.class.getName());
@@ -70,5 +70,4 @@ public void execPolicyAlreadyEnded() throws IOException {
         assertTrue(policy.hasEnded());
         policy.execute();
     }
-
 }
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/hdfs/HdfsPolicyTestBase.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/hdfs/HdfsPolicyTestBase.java
index 3cbe9a9..d046d0b 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/hdfs/HdfsPolicyTestBase.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/hdfs/HdfsPolicyTestBase.java
@@ -15,13 +15,11 @@
 public abstract class HdfsPolicyTestBase extends PolicyTestBase {
 
     private static MiniDFSCluster cluster;
-    private static Configuration clusterConfig;
-    private static Path hdfsDir;
 
     @BeforeClass
     public static void initFs() throws IOException {
-        clusterConfig = new Configuration();
-        hdfsDir = Files.createTempDirectory("test-");
+        Configuration clusterConfig = new Configuration();
+        Path hdfsDir = Files.createTempDirectory("test-");
         clusterConfig.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, hdfsDir.toAbsolutePath().toString());
         cluster = new MiniDFSCluster.Builder(clusterConfig).build();
         fsUri = URI.create("hdfs://localhost:" + cluster.getNameNodePort() + "/");
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/hdfs/SimplePolicyTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/hdfs/SimplePolicyTest.java
index 06f1db7..33ebe28 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/hdfs/SimplePolicyTest.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/hdfs/SimplePolicyTest.java
@@ -25,8 +25,8 @@ public static void setUp() throws IOException {
         }
 
         Map<String, String> cfg = new HashMap<String, String>() {{
-            String uris[] = directories.stream().map(dir -> dir.toString())
-                    .toArray(size -> new String[size]);
+            String[] uris = directories.stream().map(Path::toString)
+                    .toArray(String[]::new);
             put(FsSourceTaskConfig.FS_URIS, String.join(",", uris));
             put(FsSourceTaskConfig.TOPIC, "topic_test");
             put(FsSourceTaskConfig.POLICY_CLASS, SimplePolicy.class.getName());
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/hdfs/SleepyPolicyTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/hdfs/SleepyPolicyTest.java
index edd5533..77d85a6 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/hdfs/SleepyPolicyTest.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/hdfs/SleepyPolicyTest.java
@@ -32,8 +32,8 @@ public static void setUp() throws IOException {
         }
 
         Map<String, String> cfg = new HashMap<String, String>() {{
-            String uris[] = directories.stream().map(dir -> dir.toString())
-                    .toArray(size -> new String[size]);
+            String[] uris = directories.stream().map(Path::toString)
+                    .toArray(String[]::new);
             put(FsSourceTaskConfig.FS_URIS, String.join(",", uris));
             put(FsSourceTaskConfig.TOPIC, "topic_test");
             put(FsSourceTaskConfig.POLICY_CLASS, SleepyPolicy.class.getName());
@@ -105,5 +105,4 @@ public void defaultExecutions() throws Throwable {
         policy.interrupt();
         assertTrue(policy.hasEnded());
     }
-
 }
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/local/SimplePolicyTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/local/SimplePolicyTest.java
index 214849b..c8a221a 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/local/SimplePolicyTest.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/local/SimplePolicyTest.java
@@ -25,8 +25,8 @@ public static void setUp() throws IOException {
         }
 
         Map<String, String> cfg = new HashMap<String, String>() {{
-            String uris[] = directories.stream().map(dir -> dir.toString())
-                    .toArray(size -> new String[size]);
+            String[] uris = directories.stream().map(Path::toString)
+                    .toArray(String[]::new);
             put(FsSourceTaskConfig.FS_URIS, String.join(",", uris));
             put(FsSourceTaskConfig.TOPIC, "topic_test");
             put(FsSourceTaskConfig.POLICY_CLASS, SimplePolicy.class.getName());
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/local/SleepyPolicyTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/local/SleepyPolicyTest.java
index 2f907ae..be6c58b 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/local/SleepyPolicyTest.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/local/SleepyPolicyTest.java
@@ -32,8 +32,8 @@ public static void setUp() throws IOException {
         }
 
         Map<String, String> cfg = new HashMap<String, String>() {{
-            String uris[] = directories.stream().map(dir -> dir.toString())
-                    .toArray(size -> new String[size]);
+            String[] uris = directories.stream().map(Path::toString)
+                    .toArray(String[]::new);
             put(FsSourceTaskConfig.FS_URIS, String.join(",", uris));
             put(FsSourceTaskConfig.TOPIC, "topic_test");
             put(FsSourceTaskConfig.POLICY_CLASS, SleepyPolicy.class.getName());

From 38e1bf050d9239d69180322d5c726edc426f124d Mon Sep 17 00:00:00 2001
From: Mario Molina <mmolimar@gmail.com>
Date: Sat, 22 Feb 2020 11:52:49 -0600
Subject: [PATCH 05/51] Updating docs

---
 docs/Makefile                  | 2 +-
 docs/source/conf.py            | 4 ++--
 docs/source/config_options.rst | 2 +-
 docs/source/connector.rst      | 2 +-
 docs/source/faq.rst            | 1 -
 docs/source/filereaders.rst    | 2 +-
 docs/source/policies.rst       | 2 +-
 7 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/docs/Makefile b/docs/Makefile
index 4dea114..9aeda1f 100644
--- a/docs/Makefile
+++ b/docs/Makefile
@@ -17,4 +17,4 @@ help:
 # Catch-all target: route all unknown targets to Sphinx using the new
 # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
 %: Makefile
-	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
\ No newline at end of file
+	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
diff --git a/docs/source/conf.py b/docs/source/conf.py
index d2ffa24..f6edf0c 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -55,9 +55,9 @@
 # built documents.
 #
 # The short X.Y version.
-version = '0.1'
+version = '1.0'
 # The full version, including alpha/beta/rc tags.
-release = '0.1'
+release = '1.0'
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
diff --git a/docs/source/config_options.rst b/docs/source/config_options.rst
index 6eaf081..851419f 100644
--- a/docs/source/config_options.rst
+++ b/docs/source/config_options.rst
@@ -155,7 +155,7 @@ In order to configure custom properties for this policy, the name you must use i
 
 .. _config_options-policies-hdfs:
 
-Hdfs file watcher
+HDFS file watcher
 --------------------------------------------
 
 This policy does not have any additional configuration.
diff --git a/docs/source/connector.rst b/docs/source/connector.rst
index 48cd0e0..d045f1e 100644
--- a/docs/source/connector.rst
+++ b/docs/source/connector.rst
@@ -24,7 +24,7 @@ Getting started
 Prerequisites
 --------------------------------------------
 
--  Confluent Platform 3.1.1
+-  Confluent Platform 5.4.0
 -  Java 8
 
 Building from source
diff --git a/docs/source/faq.rst b/docs/source/faq.rst
index 49e9ef7..a5077dc 100644
--- a/docs/source/faq.rst
+++ b/docs/source/faq.rst
@@ -53,4 +53,3 @@ until throws an exception. It's a matter of time.
 But the main thing is that you don't have to worry about removing files
 from the FS when they are being processed. The connector tolerates errors
 when reading files and continues with the next file.
-
diff --git a/docs/source/filereaders.rst b/docs/source/filereaders.rst
index 37c76f3..75b349b 100644
--- a/docs/source/filereaders.rst
+++ b/docs/source/filereaders.rst
@@ -16,7 +16,7 @@ Parquet
 Reads files with `Parquet <https://parquet.apache.org/>`__ format.
 
 The reader takes advantage of the Parquet-Avro API and uses the Parquet file
-as if it were an Avro file, so the message sent to Kafka is built in the same
+as if it was an Avro file, so the message sent to Kafka is built in the same
 way as the Avro file reader does.
 
 .. warning:: Seeking Parquet files is a heavy task because the reader has to
diff --git a/docs/source/policies.rst b/docs/source/policies.rst
index abed625..b2ceb86 100644
--- a/docs/source/policies.rst
+++ b/docs/source/policies.rst
@@ -14,7 +14,7 @@ and wait for the next one. Additionally, its custom properties allow to end it.
 
 You can learn more about the properties of this policy :ref:`here<config_options-policies-sleepy>`.
 
-Hdfs file watcher
+HDFS file watcher
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 It uses Hadoop notifications events and all create/append/close events will be reported

From f2e665313da7dba54e53d33d9eb6c9d651fe64b8 Mon Sep 17 00:00:00 2001
From: Mario Molina <mmolimar@gmail.com>
Date: Fri, 28 Feb 2020 23:04:28 -0600
Subject: [PATCH 06/51] Enable flag in TextFileReader to read a record per line

---
 .../fs/file/reader/AbstractFileReader.java    |  4 ++
 .../fs/file/reader/AvroFileReader.java        |  5 +-
 .../file/reader/DelimitedTextFileReader.java  |  8 ++--
 .../fs/file/reader/SequenceFileReader.java    |  5 +-
 .../fs/file/reader/TextFileReader.java        | 48 ++++++++++++-------
 .../local/DelimitedTextFileReaderTest.java    |  1 -
 .../reader/local/ParquetFileReaderTest.java   |  1 -
 .../file/reader/local/TextFileReaderTest.java | 22 +++++++++
 8 files changed, 67 insertions(+), 27 deletions(-)

diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/AbstractFileReader.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/AbstractFileReader.java
index 1fbdddb..533b628 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/AbstractFileReader.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/AbstractFileReader.java
@@ -3,6 +3,8 @@
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.kafka.connect.data.Struct;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 import java.util.Map;
 import java.util.stream.Collectors;
@@ -10,6 +12,7 @@
 import static com.github.mmolimar.kafka.connect.fs.FsSourceTaskConfig.FILE_READER_PREFIX;
 
 public abstract class AbstractFileReader<T> implements FileReader {
+    protected final Logger log = LoggerFactory.getLogger(getClass());
 
     private final FileSystem fs;
     private final Path filePath;
@@ -25,6 +28,7 @@ public AbstractFileReader(FileSystem fs, Path filePath, ReaderAdapter<T> adapter
 
         Map<String, Object> readerConf = config.entrySet().stream()
                 .filter(entry -> entry.getKey().startsWith(FILE_READER_PREFIX))
+                .filter(entry -> entry.getValue() != null)
                 .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
         configure(readerConf);
     }
diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/AvroFileReader.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/AvroFileReader.java
index 44ec3df..1db7e01 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/AvroFileReader.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/AvroFileReader.java
@@ -93,7 +93,7 @@ public void setOffset(long offset) {
             this.offset = offset;
         }
 
-        protected void inc() {
+        void inc() {
             this.offset++;
         }
 
@@ -104,10 +104,11 @@ public long getRecordOffset() {
     }
 
     static class GenericRecordToStruct implements ReaderAdapter<GenericRecord> {
+
         private static final int CACHE_SIZE = 100;
         private final AvroData avroData;
 
-        public GenericRecordToStruct() {
+        GenericRecordToStruct() {
             this.avroData = new AvroData(CACHE_SIZE);
         }
 
diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/DelimitedTextFileReader.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/DelimitedTextFileReader.java
index b5f59ee..ff703aa 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/DelimitedTextFileReader.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/DelimitedTextFileReader.java
@@ -14,6 +14,7 @@
 import static com.github.mmolimar.kafka.connect.fs.FsSourceTaskConfig.FILE_READER_PREFIX;
 
 public class DelimitedTextFileReader extends AbstractFileReader<DelimitedTextFileReader.DelimitedRecord> {
+
     private static final String FILE_READER_DELIMITED = FILE_READER_PREFIX + "delimited.";
     public static final String FILE_READER_DELIMITED_HEADER = FILE_READER_DELIMITED + "header";
     public static final String FILE_READER_DELIMITED_TOKEN = FILE_READER_DELIMITED + "token";
@@ -32,10 +33,9 @@ public class DelimitedTextFileReader extends AbstractFileReader<DelimitedTextFil
     public DelimitedTextFileReader(FileSystem fs, Path filePath, Map<String, Object> config) throws IOException {
         super(fs, filePath, new DelimitedTxtToStruct(), config);
 
-        //mapping encoding for text file reader
-        if (config.get(FILE_READER_DELIMITED_ENCODING) != null) {
-            config.put(TextFileReader.FILE_READER_TEXT_ENCODING, config.get(FILE_READER_DELIMITED_ENCODING));
-        }
+        config.put(TextFileReader.FILE_READER_TEXT_ENCODING, config.get(FILE_READER_DELIMITED_ENCODING));
+        config.put(TextFileReader.FILE_READER_TEXT_RECORD_PER_LINE, "true");
+
         this.inner = new TextFileReader(fs, filePath, config);
         this.offset = new DelimitedTextOffset(0, hasHeader);
 
diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/SequenceFileReader.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/SequenceFileReader.java
index 3432da7..58d1e0e 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/SequenceFileReader.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/SequenceFileReader.java
@@ -30,7 +30,6 @@ public class SequenceFileReader extends AbstractFileReader<SequenceFileReader.Se
     public static final String FILE_READER_SEQUENCE_FIELD_NAME_KEY = FILE_READER_SEQUENCE_FIELD_NAME_PREFIX + "key";
     public static final String FILE_READER_SEQUENCE_FIELD_NAME_VALUE = FILE_READER_SEQUENCE_FIELD_NAME_PREFIX + "value";
 
-
     private final SequenceFile.Reader reader;
     private final Writable key, value;
     private final SeqOffset offset;
@@ -115,7 +114,7 @@ protected SequenceRecord<Writable, Writable> nextRecord() {
             throw new NoSuchElementException("There are no more records in file: " + getFilePath());
         }
         recordIndex++;
-        return new SequenceRecord<Writable, Writable>(schema, keyFieldName, key, valueFieldName, value);
+        return new SequenceRecord<>(schema, keyFieldName, key, valueFieldName, value);
     }
 
     @Override
@@ -196,6 +195,7 @@ private Object toSchemaValue(Writable writable) {
     }
 
     static class SequenceRecord<T, U> {
+
         private final Schema schema;
         private final String keyFieldName;
         private final T key;
@@ -211,5 +211,4 @@ static class SequenceRecord<T, U> {
         }
 
     }
-
 }
diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/TextFileReader.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/TextFileReader.java
index 7ed0b80..c623275 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/TextFileReader.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/TextFileReader.java
@@ -8,31 +8,35 @@
 import org.apache.kafka.connect.data.Struct;
 import org.apache.kafka.connect.errors.ConnectException;
 
+import java.io.BufferedReader;
 import java.io.IOException;
 import java.io.InputStreamReader;
 import java.io.LineNumberReader;
 import java.nio.charset.Charset;
+import java.util.List;
 import java.util.Map;
 import java.util.NoSuchElementException;
+import java.util.stream.Collectors;
 
 import static com.github.mmolimar.kafka.connect.fs.FsSourceTaskConfig.FILE_READER_PREFIX;
 
 public class TextFileReader extends AbstractFileReader<TextFileReader.TextRecord> {
 
-    public static final String FIELD_NAME_VALUE_DEFAULT = "value";
-
     private static final String FILE_READER_TEXT = FILE_READER_PREFIX + "text.";
-    private static final String FILE_READER_SEQUENCE_FIELD_NAME_PREFIX = FILE_READER_TEXT + "field_name.";
+    private static final String FILE_READER_FIELD_NAME_PREFIX = FILE_READER_TEXT + "field_name.";
 
-    public static final String FILE_READER_TEXT_FIELD_NAME_VALUE = FILE_READER_SEQUENCE_FIELD_NAME_PREFIX + "value";
+    public static final String FIELD_NAME_VALUE_DEFAULT = "value";
+    public static final String FILE_READER_TEXT_FIELD_NAME_VALUE = FILE_READER_FIELD_NAME_PREFIX + "value";
+    public static final String FILE_READER_TEXT_RECORD_PER_LINE = FILE_READER_TEXT + "record_per_line";
     public static final String FILE_READER_TEXT_ENCODING = FILE_READER_TEXT + "encoding";
 
     private final TextOffset offset;
-    private String currentLine;
+    private String current;
     private boolean finished = false;
     private LineNumberReader reader;
     private Schema schema;
     private Charset charset;
+    private boolean recordPerLine;
 
     public TextFileReader(FileSystem fs, Path filePath, Map<String, Object> config) throws IOException {
         super(fs, filePath, new TxtToStruct(), config);
@@ -49,34 +53,46 @@ protected void configure(Map<String, Object> config) {
         } else {
             valueFieldName = config.get(FILE_READER_TEXT_FIELD_NAME_VALUE).toString();
         }
-        this.schema = SchemaBuilder.struct()
-                .field(valueFieldName, Schema.STRING_SCHEMA)
-                .build();
-
         if (config.get(FILE_READER_TEXT_ENCODING) == null ||
                 config.get(FILE_READER_TEXT_ENCODING).toString().equals("")) {
             this.charset = Charset.defaultCharset();
         } else {
             this.charset = Charset.forName(config.get(FILE_READER_TEXT_ENCODING).toString());
         }
+        if (config.get(FILE_READER_TEXT_RECORD_PER_LINE) == null ||
+                config.get(FILE_READER_TEXT_RECORD_PER_LINE).toString().equals("")) {
+            this.recordPerLine = true;
+        } else {
+            this.recordPerLine = Boolean.parseBoolean(config.get(FILE_READER_TEXT_RECORD_PER_LINE).toString());
+        }
+        this.schema = SchemaBuilder.struct()
+                .field(valueFieldName, Schema.STRING_SCHEMA)
+                .build();
     }
 
     @Override
     public boolean hasNext() {
-        if (currentLine != null) {
+        if (current != null) {
             return true;
         } else if (finished) {
             return false;
         } else {
             try {
-                while (true) {
+                if (!recordPerLine) {
+                    List<String> lines = new BufferedReader(reader).lines().collect(Collectors.toList());
+                    offset.setOffset(lines.size() - 1);
+                    current = String.join("\n", lines);
+                    finished = true;
+                    return true;
+                }
+                for (; ; ) {
                     String line = reader.readLine();
                     offset.setOffset(reader.getLineNumber());
                     if (line == null) {
                         finished = true;
                         return false;
                     }
-                    currentLine = line;
+                    current = line;
                     return true;
                 }
             } catch (IOException ioe) {
@@ -90,8 +106,8 @@ protected TextRecord nextRecord() {
         if (!hasNext()) {
             throw new NoSuchElementException("There are no more records in file: " + getFilePath());
         }
-        String aux = currentLine;
-        currentLine = null;
+        String aux = current;
+        current = null;
 
         return new TextRecord(schema, aux);
     }
@@ -104,9 +120,9 @@ public void seek(Offset offset) {
         try {
             if (offset.getRecordOffset() < reader.getLineNumber()) {
                 this.reader = new LineNumberReader(new InputStreamReader(getFs().open(getFilePath())));
-                currentLine = null;
+                current = null;
             }
-            while ((currentLine = reader.readLine()) != null) {
+            while ((current = reader.readLine()) != null) {
                 if (reader.getLineNumber() - 1 == offset.getRecordOffset()) {
                     this.offset.setOffset(reader.getLineNumber());
                     return;
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/DelimitedTextFileReaderTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/DelimitedTextFileReaderTest.java
index 763ce11..679ef45 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/DelimitedTextFileReaderTest.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/DelimitedTextFileReaderTest.java
@@ -104,7 +104,6 @@ public void readAllDataWithoutHeader() throws Throwable {
             recordCount++;
         }
         assertEquals("The number of records in the file does not match", NUM_RECORDS, recordCount);
-
     }
 
     @Test
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/ParquetFileReaderTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/ParquetFileReaderTest.java
index 1cceebb..da23677 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/ParquetFileReaderTest.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/ParquetFileReaderTest.java
@@ -59,7 +59,6 @@ private static Path createDataFile() throws IOException {
 
         try (ParquetWriter writer = AvroParquetWriter.<GenericRecord>builder(new Path(parquetFile.toURI()))
                 .withConf(fs.getConf()).withWriteMode(ParquetFileWriter.Mode.OVERWRITE).withSchema(readerSchema).build()) {
-
             IntStream.range(0, NUM_RECORDS).forEach(index -> {
                 GenericRecord datum = new GenericData.Record(readerSchema);
                 datum.put(FIELD_INDEX, index);
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/TextFileReaderTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/TextFileReaderTest.java
index 39ae6e8..7de8414 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/TextFileReaderTest.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/TextFileReaderTest.java
@@ -2,6 +2,7 @@
 
 import com.github.mmolimar.kafka.connect.fs.file.Offset;
 import com.github.mmolimar.kafka.connect.fs.file.reader.AgnosticFileReader;
+import com.github.mmolimar.kafka.connect.fs.file.reader.FileReader;
 import com.github.mmolimar.kafka.connect.fs.file.reader.TextFileReader;
 import org.apache.hadoop.fs.Path;
 import org.apache.kafka.connect.data.Struct;
@@ -18,6 +19,7 @@
 import java.util.UUID;
 import java.util.stream.IntStream;
 
+import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
 
 public class TextFileReaderTest extends LocalFileReaderTestBase {
@@ -84,6 +86,26 @@ public void invalidFileEncoding() throws Throwable {
         getReader(fs, dataFile, cfg);
     }
 
+    @Test
+    public void readDataWithRecordPerLineDisabled() throws Throwable {
+        Path file = createDataFile();
+        FileReader reader = getReader(fs, file, new HashMap<String, Object>() {{
+            put(TextFileReader.FILE_READER_TEXT_FIELD_NAME_VALUE, FIELD_NAME_VALUE);
+            put(TextFileReader.FILE_READER_TEXT_RECORD_PER_LINE, "false");
+        }});
+
+        assertTrue(reader.hasNext());
+
+        int recordCount = 0;
+        while (reader.hasNext()) {
+            Struct record = reader.next();
+            checkData(record, recordCount);
+            recordCount++;
+        }
+        reader.close();
+        assertEquals("The number of records in the file does not match", 1, recordCount);
+    }
+
     @Override
     protected Offset getOffset(long offset) {
         return new TextFileReader.TextOffset(offset);

From 2cd530e5fda239308571fa4933229f916616ac99 Mon Sep 17 00:00:00 2001
From: Mario Molina <mmolimar@gmail.com>
Date: Sat, 29 Feb 2020 01:52:46 -0600
Subject: [PATCH 07/51] Fix offsets when seeking in text readers

---
 .../connect/fs/file/reader/TextFileReader.java     | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/TextFileReader.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/TextFileReader.java
index c623275..4d03487 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/TextFileReader.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/TextFileReader.java
@@ -118,17 +118,15 @@ public void seek(Offset offset) {
             throw new IllegalArgumentException("Record offset must be greater than 0");
         }
         try {
+            current = null;
             if (offset.getRecordOffset() < reader.getLineNumber()) {
-                this.reader = new LineNumberReader(new InputStreamReader(getFs().open(getFilePath())));
-                current = null;
+                finished = false;
+                reader = new LineNumberReader(new InputStreamReader(getFs().open(getFilePath())));
             }
-            while ((current = reader.readLine()) != null) {
-                if (reader.getLineNumber() - 1 == offset.getRecordOffset()) {
-                    this.offset.setOffset(reader.getLineNumber());
-                    return;
-                }
+            while (reader.getLineNumber() < offset.getRecordOffset()) {
+                reader.readLine();
             }
-            this.offset.setOffset(reader.getLineNumber());
+            this.offset.setOffset(reader.getLineNumber() + 1);
         } catch (IOException ioe) {
             throw new ConnectException("Error seeking file " + getFilePath(), ioe);
         }

From 09bf4afca529c6071bb33c3982fb67f3946f8d16 Mon Sep 17 00:00:00 2001
From: Mario Molina <mmolimar@gmail.com>
Date: Mon, 2 Mar 2020 02:58:49 +0100
Subject: [PATCH 08/51] New JSON file reader

---
 pom.xml                                       |  16 +-
 .../fs/file/reader/AgnosticFileReader.java    |  18 +-
 .../fs/file/reader/JsonFileReader.java        | 222 ++++++++++++++++++
 3 files changed, 246 insertions(+), 10 deletions(-)
 create mode 100644 src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/JsonFileReader.java

diff --git a/pom.xml b/pom.xml
index 52c6b89..c63e4fc 100644
--- a/pom.xml
+++ b/pom.xml
@@ -16,6 +16,7 @@
         <hadoop.version>3.2.1</hadoop.version>
         <avro.version>1.9.2</avro.version>
         <parquet.version>1.11.0</parquet.version>
+        <fasterxml-jackson.version>2.10.2</fasterxml-jackson.version>
         <junit.version>4.13</junit.version>
         <easymock.version>4.2</easymock.version>
         <powermock.version>2.0.5</powermock.version>
@@ -51,20 +52,27 @@
         </dependency>
         <dependency>
             <groupId>org.apache.avro</groupId>
-            <artifactId>avro-tools</artifactId>
+            <artifactId>avro</artifactId>
             <version>${avro.version}</version>
-            <classifier>nodeps</classifier>
         </dependency>
         <dependency>
             <groupId>org.apache.avro</groupId>
-            <artifactId>avro</artifactId>
+            <artifactId>avro-tools</artifactId>
             <version>${avro.version}</version>
+            <classifier>nodeps</classifier>
         </dependency>
         <dependency>
             <groupId>org.apache.parquet</groupId>
             <artifactId>parquet-avro</artifactId>
             <version>${parquet.version}</version>
         </dependency>
+        <dependency>
+            <groupId>com.fasterxml.jackson.core</groupId>
+            <artifactId>jackson-core</artifactId>
+            <version>${fasterxml-jackson.version}</version>
+        </dependency>
+
+        <!-- test dependencies -->
         <dependency>
             <groupId>junit</groupId>
             <artifactId>junit</artifactId>
@@ -181,4 +189,4 @@
             <url>http://packages.confluent.io/maven/</url>
         </repository>
     </repositories>
-</project>
\ No newline at end of file
+</project>
diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/AgnosticFileReader.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/AgnosticFileReader.java
index 34f9670..caa89a1 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/AgnosticFileReader.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/AgnosticFileReader.java
@@ -20,17 +20,18 @@ public class AgnosticFileReader extends AbstractFileReader<AgnosticFileReader.Ag
     private static final String FILE_READER_AGNOSTIC_EXTENSIONS = FILE_READER_AGNOSTIC + "extensions.";
     public static final String FILE_READER_AGNOSTIC_EXTENSIONS_PARQUET = FILE_READER_AGNOSTIC_EXTENSIONS + "parquet";
     public static final String FILE_READER_AGNOSTIC_EXTENSIONS_AVRO = FILE_READER_AGNOSTIC_EXTENSIONS + "avro";
+    public static final String FILE_READER_AGNOSTIC_EXTENSIONS_JSON = FILE_READER_AGNOSTIC_EXTENSIONS + "json";
     public static final String FILE_READER_AGNOSTIC_EXTENSIONS_SEQUENCE = FILE_READER_AGNOSTIC_EXTENSIONS + "sequence";
     public static final String FILE_READER_AGNOSTIC_EXTENSIONS_DELIMITED = FILE_READER_AGNOSTIC_EXTENSIONS + "delimited";
 
-    private final AbstractFileReader reader;
-    private List<String> parquetExtensions, avroExtensions, sequenceExtensions, delimitedExtensions;
+    private final AbstractFileReader<Object> reader;
+    private List<String> parquetExtensions, avroExtensions, jsonExtensions, sequenceExtensions, delimitedExtensions;
 
     public AgnosticFileReader(FileSystem fs, Path filePath, Map<String, Object> config) throws IOException {
         super(fs, filePath, new AgnosticAdapter(), config);
 
         try {
-            reader = (AbstractFileReader) readerByExtension(fs, filePath, config);
+            reader = readerByExtension(fs, filePath, config);
         } catch (RuntimeException | IOException e) {
             throw e;
         } catch (Throwable t) {
@@ -38,17 +39,19 @@ public AgnosticFileReader(FileSystem fs, Path filePath, Map<String, Object> conf
         }
     }
 
-    private FileReader readerByExtension(FileSystem fs, Path filePath, Map<String, Object> config)
+    private AbstractFileReader<Object> readerByExtension(FileSystem fs, Path filePath, Map<String, Object> config)
             throws Throwable {
         int index = filePath.getName().lastIndexOf('.');
         String extension = index == -1 || index == filePath.getName().length() - 1 ? "" :
                 filePath.getName().substring(index + 1).toLowerCase();
 
-        Class<? extends FileReader> clz;
+        Class<? extends AbstractFileReader> clz;
         if (parquetExtensions.contains(extension)) {
             clz = ParquetFileReader.class;
         } else if (avroExtensions.contains(extension)) {
             clz = AvroFileReader.class;
+        } else if (jsonExtensions.contains(extension)) {
+            clz = JsonFileReader.class;
         } else if (sequenceExtensions.contains(extension)) {
             clz = SequenceFileReader.class;
         } else if (delimitedExtensions.contains(extension)) {
@@ -57,7 +60,7 @@ private FileReader readerByExtension(FileSystem fs, Path filePath, Map<String, O
             clz = TextFileReader.class;
         }
 
-        return ReflectionUtils.makeReader(clz, fs, filePath, config);
+        return (AbstractFileReader<Object>) ReflectionUtils.makeReader(clz, fs, filePath, config);
     }
 
     @Override
@@ -68,6 +71,9 @@ protected void configure(Map<String, Object> config) {
         this.avroExtensions = config.get(FILE_READER_AGNOSTIC_EXTENSIONS_AVRO) == null ?
                 Collections.singletonList("avro") :
                 Arrays.asList(config.get(FILE_READER_AGNOSTIC_EXTENSIONS_AVRO).toString().toLowerCase().split(","));
+        this.jsonExtensions = config.get(FILE_READER_AGNOSTIC_EXTENSIONS_JSON) == null ?
+                Collections.singletonList("json") :
+                Arrays.asList(config.get(FILE_READER_AGNOSTIC_EXTENSIONS_JSON).toString().toLowerCase().split(","));
         this.sequenceExtensions = config.get(FILE_READER_AGNOSTIC_EXTENSIONS_SEQUENCE) == null ?
                 Collections.singletonList("seq") :
                 Arrays.asList(config.get(FILE_READER_AGNOSTIC_EXTENSIONS_SEQUENCE).toString().toLowerCase().split(","));
diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/JsonFileReader.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/JsonFileReader.java
new file mode 100644
index 0000000..58230f9
--- /dev/null
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/JsonFileReader.java
@@ -0,0 +1,222 @@
+package com.github.mmolimar.kafka.connect.fs.file.reader;
+
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.DeserializationFeature;
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.github.mmolimar.kafka.connect.fs.file.Offset;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.kafka.connect.data.Schema;
+import org.apache.kafka.connect.data.SchemaBuilder;
+import org.apache.kafka.connect.data.Struct;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Map;
+import java.util.Set;
+import java.util.stream.Collectors;
+import java.util.stream.StreamSupport;
+
+import static com.github.mmolimar.kafka.connect.fs.FsSourceTaskConfig.FILE_READER_PREFIX;
+
+public class JsonFileReader extends AbstractFileReader<JsonFileReader.JsonRecord> {
+
+    private static final String FILE_READER_JSON = FILE_READER_PREFIX + "json.";
+
+    public static final String FILE_READER_JSON_DESERIALIZATION_CONFIGS = FILE_READER_JSON + "deserialization.";
+    public static final String FILE_READER_JSON_RECORD_PER_LINE = FILE_READER_JSON + "record_per_line";
+    public static final String FILE_READER_JSON_ENCODING = FILE_READER_JSON + "encoding";
+
+    private final TextFileReader inner;
+    private final Schema schema;
+    private ObjectMapper mapper;
+
+    public JsonFileReader(FileSystem fs, Path filePath, Map<String, Object> config) throws IOException {
+        super(fs, filePath, new JsonToStruct(), config);
+
+        config.put(TextFileReader.FILE_READER_TEXT_ENCODING, config.get(FILE_READER_JSON_ENCODING));
+        config.put(TextFileReader.FILE_READER_TEXT_RECORD_PER_LINE, config.get(FILE_READER_JSON_RECORD_PER_LINE));
+
+        this.inner = new TextFileReader(fs, filePath, config);
+
+        if (hasNext()) {
+            String line = inner.nextRecord().getValue();
+            this.schema = extractSchema(mapper.readTree(line));
+            //back to the first line
+            inner.seek(() -> 0);
+        } else {
+            this.schema = SchemaBuilder.struct().build();
+        }
+    }
+
+    @Override
+    protected void configure(Map<String, Object> config) {
+        mapper = new ObjectMapper();
+        Set<String> deserializationFeatures = Arrays.stream(DeserializationFeature.values())
+                .map(Enum::name)
+                .collect(Collectors.toSet());
+        config.entrySet().stream()
+                .filter(entry -> entry.getValue() != null)
+                .filter(entry -> entry.getKey().startsWith(FILE_READER_JSON_DESERIALIZATION_CONFIGS))
+                .forEach(entry -> {
+                    String feature = entry.getKey().replaceAll(FILE_READER_JSON_DESERIALIZATION_CONFIGS, "");
+                    if (deserializationFeatures.contains(feature)) {
+                        mapper.configure(DeserializationFeature.valueOf(feature),
+                                Boolean.parseBoolean(entry.getValue().toString()));
+                    } else {
+                        log.warn("Ignoring deserialization configuration '" + feature + "' due to it does not exist.");
+                    }
+                });
+    }
+
+    @Override
+    protected JsonRecord nextRecord() {
+        try {
+            JsonNode value = mapper.readTree(inner.nextRecord().getValue());
+            return new JsonRecord(schema, value);
+        } catch (JsonProcessingException jpe) {
+            throw new IllegalStateException(jpe);
+        }
+    }
+
+    @Override
+    public boolean hasNext() {
+        return inner.hasNext();
+    }
+
+    @Override
+    public void seek(Offset offset) {
+        inner.seek(offset);
+    }
+
+    @Override
+    public Offset currentOffset() {
+        return inner.currentOffset();
+    }
+
+    @Override
+    public void close() throws IOException {
+        inner.close();
+    }
+
+    private static Schema extractSchema(JsonNode jsonNode) {
+        switch (jsonNode.getNodeType()) {
+            case BOOLEAN:
+                return Schema.OPTIONAL_BOOLEAN_SCHEMA;
+            case NUMBER:
+                if (jsonNode.isShort()) {
+                    return Schema.OPTIONAL_INT8_SCHEMA;
+                } else if (jsonNode.isInt()) {
+                    return Schema.OPTIONAL_INT32_SCHEMA;
+                } else if (jsonNode.isLong()) {
+                    return Schema.OPTIONAL_INT64_SCHEMA;
+                } else if (jsonNode.isFloat()) {
+                    return Schema.OPTIONAL_FLOAT32_SCHEMA;
+                } else if (jsonNode.isDouble()) {
+                    return Schema.OPTIONAL_FLOAT64_SCHEMA;
+                } else if (jsonNode.isBigInteger()) {
+                    return Schema.OPTIONAL_INT64_SCHEMA;
+                } else if (jsonNode.isBigDecimal()) {
+                    return Schema.OPTIONAL_FLOAT64_SCHEMA;
+                } else {
+                    return Schema.OPTIONAL_FLOAT64_SCHEMA;
+                }
+            case STRING:
+                return Schema.OPTIONAL_STRING_SCHEMA;
+            case BINARY:
+                return Schema.OPTIONAL_BYTES_SCHEMA;
+            case ARRAY:
+                Iterable<JsonNode> elements = jsonNode::elements;
+                Schema arraySchema = StreamSupport.stream(elements.spliterator(), false)
+                        .findFirst().map(JsonFileReader::extractSchema)
+                        .orElse(SchemaBuilder.struct().build());
+                return SchemaBuilder.array(arraySchema).build();
+            case OBJECT:
+                SchemaBuilder builder = SchemaBuilder.struct();
+                jsonNode.fields()
+                        .forEachRemaining(field -> builder.field(field.getKey(), extractSchema(field.getValue())));
+                return builder.build();
+            default:
+                return SchemaBuilder.struct().optional().build();
+        }
+    }
+
+    static class JsonToStruct implements ReaderAdapter<JsonRecord> {
+
+        @Override
+        public Struct apply(JsonRecord record) {
+            return toStruct(record.schema, record.value);
+        }
+
+        private Struct toStruct(Schema schema, JsonNode jsonNode) {
+            if (jsonNode.isNull()) return null;
+            Struct struct = new Struct(schema);
+            jsonNode.fields()
+                    .forEachRemaining(field -> struct.put(field.getKey(),
+                            mapValue(struct.schema().field(field.getKey()).schema(), field.getValue())));
+            return struct;
+        }
+
+        private Object mapValue(Schema schema, JsonNode value) {
+            if (value == null) return null;
+
+            switch (value.getNodeType()) {
+                case BOOLEAN:
+                    return value.booleanValue();
+                case NUMBER:
+                    if (value.isShort()) {
+                        return value.shortValue();
+                    } else if (value.isInt()) {
+                        return value.intValue();
+                    } else if (value.isLong()) {
+                        return value.longValue();
+                    } else if (value.isFloat()) {
+                        return value.floatValue();
+                    } else if (value.isDouble()) {
+                        return value.doubleValue();
+                    } else if (value.isBigInteger()) {
+                        return value.bigIntegerValue();
+                    } else {
+                        return value.numberValue();
+                    }
+                case STRING:
+                    return value.asText();
+                case BINARY:
+                    try {
+                        return value.binaryValue();
+                    } catch (IOException ioe) {
+                        throw new IllegalStateException(ioe);
+                    }
+                case OBJECT:
+                    Struct struct = new Struct(schema);
+                    Iterable<Map.Entry<String, JsonNode>> fields = value::fields;
+                    StreamSupport.stream(fields.spliterator(), false)
+                            .forEach(field -> struct.put(field.getKey(),
+                                    mapValue(extractSchema(field.getValue()), field.getValue()))
+                            );
+                    return struct;
+                case ARRAY:
+                    Iterable<JsonNode> arrayElements = value::elements;
+                    return StreamSupport.stream(arrayElements.spliterator(), false)
+                            .map(elm -> mapValue(schema, elm))
+                            .collect(Collectors.toList());
+                case NULL:
+                case POJO:
+                case MISSING:
+                default:
+                    return null;
+            }
+        }
+    }
+
+    static class JsonRecord {
+        private final Schema schema;
+        private final JsonNode value;
+
+        JsonRecord(Schema schema, JsonNode value) {
+            this.schema = schema;
+            this.value = value;
+        }
+    }
+}

From 9a35403f425367565814b822d585ac0b3b828057 Mon Sep 17 00:00:00 2001
From: Mario Molina <mmolimar@gmail.com>
Date: Mon, 2 Mar 2020 02:59:07 +0100
Subject: [PATCH 09/51] Tests for JSON file reader

---
 .../file/reader/hdfs/JsonFileReaderTest.java  | 170 +++++++++++++++++
 .../file/reader/local/JsonFileReaderTest.java | 174 ++++++++++++++++++
 2 files changed, 344 insertions(+)
 create mode 100644 src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/JsonFileReaderTest.java
 create mode 100644 src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/JsonFileReaderTest.java

diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/JsonFileReaderTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/JsonFileReaderTest.java
new file mode 100644
index 0000000..4a82ede
--- /dev/null
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/JsonFileReaderTest.java
@@ -0,0 +1,170 @@
+package com.github.mmolimar.kafka.connect.fs.file.reader.hdfs;
+
+import com.fasterxml.jackson.databind.DeserializationFeature;
+import com.fasterxml.jackson.databind.node.JsonNodeFactory;
+import com.fasterxml.jackson.databind.node.ObjectNode;
+import com.github.mmolimar.kafka.connect.fs.file.Offset;
+import com.github.mmolimar.kafka.connect.fs.file.reader.AgnosticFileReader;
+import com.github.mmolimar.kafka.connect.fs.file.reader.FileReader;
+import com.github.mmolimar.kafka.connect.fs.file.reader.JsonFileReader;
+import org.apache.hadoop.fs.Path;
+import org.apache.kafka.connect.data.Struct;
+import org.junit.BeforeClass;
+import org.junit.Ignore;
+import org.junit.Test;
+
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.nio.charset.UnsupportedCharsetException;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.UUID;
+import java.util.stream.IntStream;
+
+import static org.junit.Assert.*;
+
+public class JsonFileReaderTest extends HdfsFileReaderTestBase {
+
+    private static final String FIELD_INTEGER = "integerField";
+    private static final String FIELD_LONG = "longField";
+    private static final String FIELD_BOOLEAN = "booleanField";
+    private static final String FIELD_STRING = "stringField";
+    private static final String FIELD_DECIMAL = "decimalField";
+    private static final String FIELD_ARRAY = "arrayField";
+    private static final String FIELD_STRUCT = "structField";
+    private static final String FIELD_NULL = "nullField";
+    private static final String FILE_EXTENSION = "json";
+
+    @BeforeClass
+    public static void setUp() throws IOException {
+        readerClass = AgnosticFileReader.class;
+        dataFile = createDataFile();
+        readerConfig = new HashMap<String, Object>() {{
+            String deserializationConfig = DeserializationFeature.ACCEPT_EMPTY_ARRAY_AS_NULL_OBJECT.name();
+            put(AgnosticFileReader.FILE_READER_AGNOSTIC_EXTENSIONS_JSON, FILE_EXTENSION);
+            put(JsonFileReader.FILE_READER_JSON_DESERIALIZATION_CONFIGS + deserializationConfig, "true");
+            put(JsonFileReader.FILE_READER_JSON_DESERIALIZATION_CONFIGS + "invalid", "false");
+        }};
+    }
+
+    private static Path createDataFile() throws IOException {
+        return createDataFile(NUM_RECORDS, true);
+    }
+
+    private static Path createDataFile(int numRecords, boolean recordPerLine) throws IOException {
+        File txtFile = File.createTempFile("test-", "." + FILE_EXTENSION);
+        try (FileWriter writer = new FileWriter(txtFile)) {
+            IntStream.range(0, numRecords).forEach(index -> {
+                ObjectNode json = JsonNodeFactory.instance.objectNode()
+                        .put(FIELD_INTEGER, index)
+                        .put(FIELD_LONG, Long.MAX_VALUE)
+                        .put(FIELD_STRING, String.format("%d_%s", index, UUID.randomUUID()))
+                        .put(FIELD_BOOLEAN, true)
+                        .put(FIELD_DECIMAL, Double.parseDouble(index + "." + index))
+                        .put(FIELD_NULL, (String) null);
+                json.putArray(FIELD_ARRAY)
+                        .add("elm[" + index + "]")
+                        .add("elm[" + index + "]");
+                json.putObject(FIELD_STRUCT)
+                        .put(FIELD_INTEGER, (short) index)
+                        .put(FIELD_LONG, Long.MAX_VALUE)
+                        .put(FIELD_STRING, String.format("%d_%s", index, UUID.randomUUID()))
+                        .put(FIELD_BOOLEAN, true)
+                        .put(FIELD_DECIMAL, Double.parseDouble(index + "." + index))
+                        .put(FIELD_NULL, (String) null);
+                try {
+                    writer.append(recordPerLine ? json.toString() + "\n" : json.toPrettyString());
+                    OFFSETS_BY_INDEX.put(index, (long) index);
+                } catch (IOException ioe) {
+                    throw new RuntimeException(ioe);
+                }
+            });
+        }
+        Path path = new Path(new Path(fsUri), txtFile.getName());
+        fs.moveFromLocalFile(new Path(txtFile.getAbsolutePath()), path);
+        return path;
+    }
+
+    @Ignore(value = "This test does not apply for json files")
+    @Test(expected = IOException.class)
+    public void emptyFile() throws Throwable {
+        super.emptyFile();
+    }
+
+    @Test
+    public void readEmptyFile() throws Throwable {
+        File tmp = File.createTempFile("test-", "." + getFileExtension());
+        Path path = new Path(new Path(fsUri), tmp.getName());
+        fs.moveFromLocalFile(new Path(tmp.getAbsolutePath()), path);
+        FileReader reader = getReader(fs, path, readerConfig);
+        assertFalse(reader.hasNext());
+    }
+
+    @Test
+    public void validFileEncoding() throws Throwable {
+        Map<String, Object> cfg = new HashMap<String, Object>() {{
+            put(JsonFileReader.FILE_READER_JSON_ENCODING, "Cp1252");
+        }};
+        reader = getReader(fs, dataFile, cfg);
+        readAllData();
+    }
+
+    @Test(expected = UnsupportedCharsetException.class)
+    public void invalidFileEncoding() throws Throwable {
+        Map<String, Object> cfg = new HashMap<String, Object>() {{
+            put(JsonFileReader.FILE_READER_JSON_ENCODING, "invalid_charset");
+        }};
+        getReader(fs, dataFile, cfg);
+    }
+
+    @Test
+    public void readDataWithRecordPerLineDisabled() throws Throwable {
+        Path file = createDataFile(1, false);
+        FileReader reader = getReader(fs, file, new HashMap<String, Object>() {{
+            put(JsonFileReader.FILE_READER_JSON_RECORD_PER_LINE, "false");
+        }});
+
+        assertTrue(reader.hasNext());
+
+        int recordCount = 0;
+        while (reader.hasNext()) {
+            Struct record = reader.next();
+            checkData(record, recordCount);
+            recordCount++;
+        }
+        reader.close();
+        assertEquals("The number of records in the file does not match", 1, recordCount);
+    }
+
+    @Override
+    protected Offset getOffset(long offset) {
+        return () -> offset;
+    }
+
+    @Override
+    protected void checkData(Struct record, long index) {
+        assertEquals((int) (Integer) record.get(FIELD_INTEGER), index);
+        assertEquals((long) (Long) record.get(FIELD_LONG), Long.MAX_VALUE);
+        assertTrue(record.get(FIELD_STRING).toString().startsWith(index + "_"));
+        assertTrue(Boolean.parseBoolean(record.get(FIELD_BOOLEAN).toString()));
+        assertEquals((Double) record.get(FIELD_DECIMAL), Double.parseDouble(index + "." + index), 0);
+        assertNull(record.get(FIELD_NULL));
+        assertNotNull(record.schema().field(FIELD_NULL));
+        assertEquals(record.get(FIELD_ARRAY), Arrays.asList("elm[" + index + "]", "elm[" + index + "]"));
+        Struct subrecord = record.getStruct(FIELD_STRUCT);
+        assertEquals((int) (Integer) subrecord.get(FIELD_INTEGER), index);
+        assertEquals((long) (Long) subrecord.get(FIELD_LONG), Long.MAX_VALUE);
+        assertTrue(subrecord.get(FIELD_STRING).toString().startsWith(index + "_"));
+        assertTrue(Boolean.parseBoolean(subrecord.get(FIELD_BOOLEAN).toString()));
+        assertEquals((Double) subrecord.get(FIELD_DECIMAL), Double.parseDouble(index + "." + index), 0);
+        assertNull(subrecord.get(FIELD_NULL));
+        assertNotNull(subrecord.schema().field(FIELD_NULL));
+    }
+
+    @Override
+    protected String getFileExtension() {
+        return FILE_EXTENSION;
+    }
+}
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/JsonFileReaderTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/JsonFileReaderTest.java
new file mode 100644
index 0000000..16bf3eb
--- /dev/null
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/JsonFileReaderTest.java
@@ -0,0 +1,174 @@
+package com.github.mmolimar.kafka.connect.fs.file.reader.local;
+
+import com.fasterxml.jackson.databind.DeserializationFeature;
+import com.fasterxml.jackson.databind.node.JsonNodeFactory;
+import com.fasterxml.jackson.databind.node.ObjectNode;
+import com.github.mmolimar.kafka.connect.fs.file.Offset;
+import com.github.mmolimar.kafka.connect.fs.file.reader.AgnosticFileReader;
+import com.github.mmolimar.kafka.connect.fs.file.reader.FileReader;
+import com.github.mmolimar.kafka.connect.fs.file.reader.JsonFileReader;
+import org.apache.hadoop.fs.Path;
+import org.apache.kafka.connect.data.Struct;
+import org.junit.BeforeClass;
+import org.junit.Ignore;
+import org.junit.Test;
+
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.nio.charset.UnsupportedCharsetException;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.UUID;
+import java.util.stream.IntStream;
+
+import static org.junit.Assert.*;
+
+public class JsonFileReaderTest extends LocalFileReaderTestBase {
+
+    private static final String FIELD_INTEGER = "integerField";
+    private static final String FIELD_LONG = "longField";
+    private static final String FIELD_BOOLEAN = "booleanField";
+    private static final String FIELD_STRING = "stringField";
+    private static final String FIELD_DECIMAL = "decimalField";
+    private static final String FIELD_ARRAY = "arrayField";
+    private static final String FIELD_STRUCT = "structField";
+    private static final String FIELD_NULL = "nullField";
+    private static final String FILE_EXTENSION = "jsn";
+
+    @BeforeClass
+    public static void setUp() throws IOException {
+        readerClass = AgnosticFileReader.class;
+        dataFile = createDataFile();
+        readerConfig = new HashMap<String, Object>() {{
+            String deserializationConfig = DeserializationFeature.ACCEPT_EMPTY_ARRAY_AS_NULL_OBJECT.name();
+            put(AgnosticFileReader.FILE_READER_AGNOSTIC_EXTENSIONS_JSON, FILE_EXTENSION);
+            put(JsonFileReader.FILE_READER_JSON_DESERIALIZATION_CONFIGS + deserializationConfig, "true");
+            put(JsonFileReader.FILE_READER_JSON_DESERIALIZATION_CONFIGS + "invalid", "false");
+        }};
+    }
+
+    private static Path createDataFile() throws IOException {
+        return createDataFile(NUM_RECORDS, true);
+    }
+
+    private static Path createDataFile(int numRecords, boolean recordPerLine) throws IOException {
+        File txtFile = File.createTempFile("test-", "." + FILE_EXTENSION);
+        try (FileWriter writer = new FileWriter(txtFile)) {
+            IntStream.range(0, numRecords).forEach(index -> {
+                ObjectNode json = JsonNodeFactory.instance.objectNode()
+                        .put(FIELD_INTEGER, index)
+                        .put(FIELD_LONG, Long.MAX_VALUE)
+                        .put(FIELD_STRING, String.format("%d_%s", index, UUID.randomUUID()))
+                        .put(FIELD_BOOLEAN, true)
+                        .put(FIELD_DECIMAL, Double.parseDouble(index + "." + index))
+                        .put(FIELD_NULL, (String) null);
+                json.putArray(FIELD_ARRAY)
+                        .add("elm[" + index + "]")
+                        .add("elm[" + index + "]");
+                json.putObject(FIELD_STRUCT)
+                        .put(FIELD_INTEGER, (short) index)
+                        .put(FIELD_LONG, Long.MAX_VALUE)
+                        .put(FIELD_STRING, String.format("%d_%s", index, UUID.randomUUID()))
+                        .put(FIELD_BOOLEAN, true)
+                        .put(FIELD_DECIMAL, Double.parseDouble(index + "." + index))
+                        .put(FIELD_NULL, (String) null);
+                try {
+                    writer.append(recordPerLine ? json.toString() + "\n" : json.toPrettyString());
+                    OFFSETS_BY_INDEX.put(index, (long) index);
+                } catch (IOException ioe) {
+                    throw new RuntimeException(ioe);
+                }
+            });
+        }
+        Path path = new Path(new Path(fsUri), txtFile.getName());
+        fs.moveFromLocalFile(new Path(txtFile.getAbsolutePath()), path);
+        return path;
+    }
+
+    @Ignore(value = "This test does not apply for json files")
+    @Test(expected = IOException.class)
+    public void emptyFile() throws Throwable {
+        super.emptyFile();
+    }
+
+    @Test
+    public void readEmptyFile() throws Throwable {
+        File tmp = File.createTempFile("test-", "." + getFileExtension());
+        Path path = new Path(new Path(fsUri), tmp.getName());
+        fs.moveFromLocalFile(new Path(tmp.getAbsolutePath()), path);
+        FileReader reader = getReader(fs, path, readerConfig);
+        assertFalse(reader.hasNext());
+    }
+
+    @Test
+    public void validFileEncoding() throws Throwable {
+        Map<String, Object> cfg = new HashMap<String, Object>() {{
+            put(JsonFileReader.FILE_READER_JSON_ENCODING, "Cp1252");
+            put(AgnosticFileReader.FILE_READER_AGNOSTIC_EXTENSIONS_JSON, getFileExtension());
+        }};
+        reader = getReader(fs, dataFile, cfg);
+        readAllData();
+    }
+
+    @Test(expected = UnsupportedCharsetException.class)
+    public void invalidFileEncoding() throws Throwable {
+        Map<String, Object> cfg = new HashMap<String, Object>() {{
+            put(JsonFileReader.FILE_READER_JSON_ENCODING, "invalid_charset");
+            put(AgnosticFileReader.FILE_READER_AGNOSTIC_EXTENSIONS_JSON, getFileExtension());
+        }};
+        getReader(fs, dataFile, cfg);
+    }
+
+    @Test
+    public void readDataWithRecordPerLineDisabled() throws Throwable {
+        Path file = createDataFile(1, false);
+        FileReader reader = getReader(fs, file, new HashMap<String, Object>() {{
+            put(JsonFileReader.FILE_READER_JSON_RECORD_PER_LINE, "false");
+            put(AgnosticFileReader.FILE_READER_AGNOSTIC_EXTENSIONS_JSON, getFileExtension());
+        }});
+
+        assertTrue(reader.hasNext());
+
+        int recordCount = 0;
+        while (reader.hasNext()) {
+            Struct record = reader.next();
+            checkData(record, recordCount);
+            recordCount++;
+        }
+        reader.close();
+        assertEquals("The number of records in the file does not match", 1, recordCount);
+    }
+
+    @Override
+    protected Offset getOffset(long offset) {
+        return () -> offset;
+    }
+
+    @Override
+    protected void checkData(Struct record, long index) {
+        assertEquals((int) (Integer) record.get(FIELD_INTEGER), index);
+        assertEquals((long) (Long) record.get(FIELD_LONG), Long.MAX_VALUE);
+        assertTrue(record.get(FIELD_STRING).toString().startsWith(index + "_"));
+        assertTrue(Boolean.parseBoolean(record.get(FIELD_BOOLEAN).toString()));
+        assertEquals((Double) record.get(FIELD_DECIMAL), Double.parseDouble(index + "." + index), 0);
+        assertNull(record.get(FIELD_NULL));
+        assertNotNull(record.schema().field(FIELD_NULL));
+        assertEquals(record.get(FIELD_ARRAY), Arrays.asList("elm[" + index + "]", "elm[" + index + "]"));
+        Struct subrecord = record.getStruct(FIELD_STRUCT);
+        assertEquals((int) (Integer) subrecord.get(FIELD_INTEGER), index);
+        assertEquals((long) (Long) subrecord.get(FIELD_LONG), Long.MAX_VALUE);
+        assertTrue(subrecord.get(FIELD_STRING).toString().startsWith(index + "_"));
+        assertTrue(Boolean.parseBoolean(subrecord.get(FIELD_BOOLEAN).toString()));
+        assertEquals((Double) subrecord.get(FIELD_DECIMAL), Double.parseDouble(index + "." + index), 0);
+        assertNull(subrecord.get(FIELD_NULL));
+        assertNotNull(subrecord.schema().field(FIELD_NULL));
+    }
+
+    @Override
+    protected String getFileExtension() {
+        return FILE_EXTENSION;
+    }
+
+}

From 528b2b044383fef361161e7d236b208c84e26e84 Mon Sep 17 00:00:00 2001
From: Mario Molina <mmolimar@gmail.com>
Date: Tue, 3 Mar 2020 00:18:35 +0100
Subject: [PATCH 10/51] Throw IllegalStateException in readers when reader is
 already closed

---
 .../fs/file/reader/AvroFileReader.java        | 20 ++++++++++++++-----
 .../fs/file/reader/ParquetFileReader.java     |  2 +-
 .../fs/file/reader/SequenceFileReader.java    |  4 ++++
 3 files changed, 20 insertions(+), 6 deletions(-)

diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/AvroFileReader.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/AvroFileReader.java
index 1db7e01..14b70a2 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/AvroFileReader.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/AvroFileReader.java
@@ -2,6 +2,7 @@
 
 import com.github.mmolimar.kafka.connect.fs.file.Offset;
 import io.confluent.connect.avro.AvroData;
+import org.apache.avro.AvroRuntimeException;
 import org.apache.avro.Schema;
 import org.apache.avro.file.DataFileReader;
 import org.apache.avro.generic.GenericRecord;
@@ -41,7 +42,8 @@ public AvroFileReader(FileSystem fs, Path filePath, Map<String, Object> config)
     }
 
     protected void configure(Map<String, Object> config) {
-        if (config.get(FILE_READER_AVRO_SCHEMA) != null) {
+        if (config.get(FILE_READER_AVRO_SCHEMA) != null &&
+                !config.get(FILE_READER_AVRO_SCHEMA).toString().trim().isEmpty()) {
             this.schema = new Schema.Parser().parse(config.get(FILE_READER_AVRO_SCHEMA).toString());
         } else {
             this.schema = null;
@@ -50,15 +52,23 @@ protected void configure(Map<String, Object> config) {
 
     @Override
     public boolean hasNext() {
-        return reader.hasNext();
+        try {
+            return reader.hasNext();
+        } catch (AvroRuntimeException are) {
+            throw new IllegalStateException(are);
+        }
     }
 
     @Override
     protected GenericRecord nextRecord() {
-        GenericRecord record = reader.next();
-        this.offset.inc();
+        try {
+            GenericRecord record = reader.next();
+            this.offset.inc();
 
-        return record;
+            return record;
+        } catch (AvroRuntimeException are) {
+            throw new IllegalStateException(are);
+        }
     }
 
     @Override
diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/ParquetFileReader.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/ParquetFileReader.java
index f6537f3..6afe74f 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/ParquetFileReader.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/ParquetFileReader.java
@@ -73,7 +73,7 @@ protected void configure(Map<String, Object> config) {
 
     @Override
     public boolean hasNext() {
-        if (closed) return false;
+        if (closed) throw new IllegalStateException("Reader already closed.");
         if (currentRecord == null) {
             try {
                 currentRecord = reader.read();
diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/SequenceFileReader.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/SequenceFileReader.java
index 58d1e0e..40a939a 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/SequenceFileReader.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/SequenceFileReader.java
@@ -37,6 +37,7 @@ public class SequenceFileReader extends AbstractFileReader<SequenceFileReader.Se
     private String keyFieldName, valueFieldName;
     private long recordIndex, hasNextIndex;
     private boolean hasNext;
+    private boolean isClosed;
 
     public SequenceFileReader(FileSystem fs, Path filePath, Map<String, Object> config) throws IOException {
         super(fs, filePath, new SeqToStruct(), config);
@@ -53,6 +54,7 @@ public SequenceFileReader(FileSystem fs, Path filePath, Map<String, Object> conf
         this.offset = new SeqOffset(0);
         this.recordIndex = this.hasNextIndex = -1;
         this.hasNext = false;
+        this.isClosed = false;
     }
 
     @Override
@@ -94,6 +96,7 @@ private Schema getSchema(Writable writable) {
 
     @Override
     public boolean hasNext() {
+        if (isClosed) throw new IllegalStateException("Reader already closed.");
         try {
             if (hasNextIndex == -1 || hasNextIndex == recordIndex) {
                 hasNextIndex++;
@@ -139,6 +142,7 @@ public Offset currentOffset() {
 
     @Override
     public void close() throws IOException {
+        isClosed = true;
         reader.close();
     }
 

From 8da9f0ae911abd8f1480ae1e420f6a87553723df Mon Sep 17 00:00:00 2001
From: Mario Molina <mmolimar@gmail.com>
Date: Tue, 3 Mar 2020 00:57:07 +0100
Subject: [PATCH 11/51] Test migration to JUnit 5

---
 pom.xml                                       | 34 +++++----
 .../FsSourceConnectorConfigTest.java          | 14 ++--
 .../fs/connector/FsSourceConnectorTest.java   | 47 ++++++------
 .../fs/file/reader/FileReaderTestBase.java    | 55 +++++++-------
 .../file/reader/hdfs/AvroFileReaderTest.java  | 34 +++++----
 .../hdfs/DelimitedTextFileReaderTest.java     | 59 ++++++++-------
 .../reader/hdfs/HdfsFileReaderTestBase.java   |  8 +-
 .../file/reader/hdfs/JsonFileReaderTest.java  | 58 ++++++++-------
 .../reader/hdfs/ParquetFileReaderTest.java    | 51 +++++++------
 .../reader/hdfs/SequenceFileReaderTest.java   | 17 +++--
 .../file/reader/hdfs/TextFileReaderTest.java  | 56 ++++++++++----
 .../file/reader/local/AvroFileReaderTest.java | 34 +++++----
 .../local/DelimitedTextFileReaderTest.java    | 68 ++++++++++-------
 .../file/reader/local/JsonFileReaderTest.java | 59 ++++++++-------
 .../reader/local/LocalFileReaderTestBase.java |  9 +--
 .../reader/local/ParquetFileReaderTest.java   | 50 +++++++------
 .../reader/local/SequenceFileReaderTest.java  | 17 +++--
 .../file/reader/local/TextFileReaderTest.java | 38 ++++++----
 .../connect/fs/policy/PolicyTestBase.java     | 74 +++++++++----------
 .../hdfs/HdfsFileWatcherPolicyTest.java       | 26 ++++---
 .../fs/policy/hdfs/HdfsPolicyTestBase.java    | 10 +--
 .../fs/policy/hdfs/SimplePolicyTest.java      |  6 +-
 .../fs/policy/hdfs/SleepyPolicyTest.java      | 40 +++++-----
 .../fs/policy/local/LocalPolicyTestBase.java  |  8 +-
 .../fs/policy/local/SimplePolicyTest.java     |  6 +-
 .../fs/policy/local/SleepyPolicyTest.java     | 40 +++++-----
 .../fs/task/FsSourceTaskConfigTest.java       | 14 ++--
 .../connect/fs/task/FsSourceTaskTest.java     | 55 +++++++-------
 .../connect/fs/task/FsSourceTaskTestBase.java | 40 +++++-----
 .../fs/task/hdfs/HdfsFsSourceTaskTest.java    | 10 +--
 .../task/hdfs/HdfsFsSourceTaskTestBase.java   | 16 ++--
 .../fs/task/local/LocalFsSourceTaskTest.java  | 11 ++-
 .../task/local/LocalFsSourceTaskTestBase.java |  8 +-
 src/test/resources/log4j.properties           | 13 ++++
 34 files changed, 597 insertions(+), 488 deletions(-)
 create mode 100644 src/test/resources/log4j.properties

diff --git a/pom.xml b/pom.xml
index c63e4fc..606806a 100644
--- a/pom.xml
+++ b/pom.xml
@@ -17,14 +17,17 @@
         <avro.version>1.9.2</avro.version>
         <parquet.version>1.11.0</parquet.version>
         <fasterxml-jackson.version>2.10.2</fasterxml-jackson.version>
-        <junit.version>4.13</junit.version>
+        <junit-jupiter.version>5.6.0</junit-jupiter.version>
         <easymock.version>4.2</easymock.version>
         <powermock.version>2.0.5</powermock.version>
+        <maven-compiler.source>1.8</maven-compiler.source>
+        <maven-compiler.target>${maven-compiler.source}</maven-compiler.target>
         <maven-jar-plugin.version>3.2.0</maven-jar-plugin.version>
         <maven-compiler-plugin.version>3.8.1</maven-compiler-plugin.version>
         <maven-assembly-plugin.version>3.2.0</maven-assembly-plugin.version>
-        <jacoco-maven-plugin.version>0.8.5</jacoco-maven-plugin.version>
-        <coveralls-maven-plugin.version>4.3.0</coveralls-maven-plugin.version>
+        <maven-jacoco-plugin.version>0.8.5</maven-jacoco-plugin.version>
+        <maven-coveralls-plugin.version>4.3.0</maven-coveralls-plugin.version>
+        <maven-surfire-plugin.version>3.0.0-M4</maven-surfire-plugin.version>
     </properties>
 
     <dependencies>
@@ -74,9 +77,9 @@
 
         <!-- test dependencies -->
         <dependency>
-            <groupId>junit</groupId>
-            <artifactId>junit</artifactId>
-            <version>${junit.version}</version>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-api</artifactId>
+            <version>${junit-jupiter.version}</version>
             <scope>test</scope>
         </dependency>
         <dependency>
@@ -85,12 +88,6 @@
             <version>${easymock.version}</version>
             <scope>test</scope>
         </dependency>
-        <dependency>
-            <groupId>org.powermock</groupId>
-            <artifactId>powermock-module-junit4</artifactId>
-            <version>${powermock.version}</version>
-            <scope>test</scope>
-        </dependency>
         <dependency>
             <groupId>org.powermock</groupId>
             <artifactId>powermock-api-easymock</artifactId>
@@ -126,10 +123,15 @@
                 <version>${maven-compiler-plugin.version}</version>
                 <inherited>true</inherited>
                 <configuration>
-                    <source>1.8</source>
-                    <target>1.8</target>
+                    <source>${maven-compiler.source}</source>
+                    <target>${maven-compiler.target}</target>
                 </configuration>
             </plugin>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-surefire-plugin</artifactId>
+                <version>${maven-surfire-plugin.version}</version>
+            </plugin>
             <plugin>
                 <groupId>org.apache.maven.plugins</groupId>
                 <artifactId>maven-assembly-plugin</artifactId>
@@ -153,7 +155,7 @@
             <plugin>
                 <groupId>org.jacoco</groupId>
                 <artifactId>jacoco-maven-plugin</artifactId>
-                <version>${jacoco-maven-plugin.version}</version>
+                <version>${maven-jacoco-plugin.version}</version>
                 <executions>
                     <execution>
                         <id>prepare-agent</id>
@@ -166,7 +168,7 @@
             <plugin>
                 <groupId>org.eluder.coveralls</groupId>
                 <artifactId>coveralls-maven-plugin</artifactId>
-                <version>${coveralls-maven-plugin.version}</version>
+                <version>${maven-coveralls-plugin.version}</version>
             </plugin>
         </plugins>
         <resources>
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/connector/FsSourceConnectorConfigTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/connector/FsSourceConnectorConfigTest.java
index 2a33262..5f0538e 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/connector/FsSourceConnectorConfigTest.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/connector/FsSourceConnectorConfigTest.java
@@ -2,10 +2,10 @@
 
 import com.github.mmolimar.kafka.connect.fs.FsSourceConnectorConfig;
 import org.apache.kafka.common.config.ConfigDef;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
 
 public class FsSourceConnectorConfigTest {
 
@@ -13,9 +13,9 @@ public class FsSourceConnectorConfigTest {
     public void checkDocumentation() {
         ConfigDef config = FsSourceConnectorConfig.conf();
         config.names().forEach(key -> {
-            assertFalse("Property " + key + " should be documented",
-                    config.configKeys().get(key).documentation == null ||
-                            "".equals(config.configKeys().get(key).documentation.trim()));
+            assertFalse(config.configKeys().get(key).documentation == null ||
+                            "".equals(config.configKeys().get(key).documentation.trim()),
+                    () -> "Property " + key + " should be documented");
         });
     }
 
@@ -23,4 +23,4 @@ public void checkDocumentation() {
     public void toRst() {
         assertNotNull(FsSourceConnectorConfig.conf().toRst());
     }
-}
\ No newline at end of file
+}
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/connector/FsSourceConnectorTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/connector/FsSourceConnectorTest.java
index 5fc9c5e..a67a92e 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/connector/FsSourceConnectorTest.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/connector/FsSourceConnectorTest.java
@@ -4,55 +4,53 @@
 import com.github.mmolimar.kafka.connect.fs.FsSourceTask;
 import com.github.mmolimar.kafka.connect.fs.FsSourceTaskConfig;
 import org.apache.kafka.connect.errors.ConnectException;
-import org.junit.Before;
-import org.junit.ClassRule;
-import org.junit.Test;
-import org.junit.rules.TemporaryFolder;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;
 
 import java.io.File;
-import java.io.IOException;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.stream.IntStream;
 
-import static org.junit.Assert.*;
+import static org.junit.jupiter.api.Assertions.*;
 
 public class FsSourceConnectorTest {
-    @ClassRule
-    public static final TemporaryFolder temporaryFolder = new TemporaryFolder();
+    @TempDir
+    public static File temporaryFolder;
 
     private FsSourceConnector connector;
     private Map<String, String> connProps;
 
-    @Before
-    public void setup() throws IOException {
+    @BeforeEach
+    public void setup() {
         connector = new FsSourceConnector();
 
         Map<String, String> cfg = new HashMap<String, String>() {{
             put(FsSourceTaskConfig.FS_URIS, String.join(",",
-                    temporaryFolder.getRoot().toURI() + File.separator + "dir1",
-                    temporaryFolder.getRoot().toURI() + File.separator + "dir2",
-                    temporaryFolder.getRoot().toURI() + File.separator + "dir3"));
+                    temporaryFolder.toURI() + File.separator + "dir1",
+                    temporaryFolder.toURI() + File.separator + "dir2",
+                    temporaryFolder.toURI() + File.separator + "dir3"));
             put(FsSourceTaskConfig.TOPIC, "topic_test");
         }};
         connProps = new HashMap<>(cfg);
     }
 
-    @Test(expected = ConnectException.class)
+    @Test
     public void nullProperties() {
-        connector.start(null);
+        assertThrows(ConnectException.class, () -> connector.start(null));
     }
 
-    @Test(expected = ConnectException.class)
+    @Test
     public void expectedFsUris() {
         Map<String, String> testProps = new HashMap<>(connProps);
         testProps.remove(FsSourceTaskConfig.FS_URIS);
-        connector.start(testProps);
+        assertThrows(ConnectException.class, () -> connector.start(testProps));
     }
 
     @Test
-    public void minimunConfig() {
+    public void minimumConfig() {
         connector.start(connProps);
         connector.stop();
     }
@@ -62,15 +60,15 @@ public void checkTaskClass() {
         assertEquals(FsSourceTask.class, connector.taskClass());
     }
 
-    @Test(expected = ConnectException.class)
+    @Test
     public void configTasksWithoutStart() {
-        connector.taskConfigs(1);
+        assertThrows(ConnectException.class, () -> connector.taskConfigs(1));
     }
 
-    @Test(expected = IllegalArgumentException.class)
+    @Test
     public void invalidConfigTaskNumber() {
         connector.start(connProps);
-        connector.taskConfigs(0);
+        assertThrows(IllegalArgumentException.class, () -> connector.taskConfigs(0));
     }
 
     @Test
@@ -80,7 +78,7 @@ public void configTasks() {
         IntStream.range(1, connProps.get(FsSourceTaskConfig.FS_URIS).split(",").length + 1)
                 .forEach(index -> {
                     List<Map<String, String>> taskConfigs = connector.taskConfigs(index);
-                    assertTrue(taskConfigs.size() == (index > uris ? uris : index));
+                    assertEquals(taskConfigs.size(), Math.min(index, uris));
                 });
         connector.stop();
     }
@@ -95,5 +93,4 @@ public void checkVersion() {
     public void checkDefaultConf() {
         assertNotNull(connector.config());
     }
-
-}
\ No newline at end of file
+}
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/FileReaderTestBase.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/FileReaderTestBase.java
index 238db17..c8eec79 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/FileReaderTestBase.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/FileReaderTestBase.java
@@ -5,10 +5,10 @@
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.kafka.connect.data.Struct;
-import org.junit.After;
-import org.junit.AfterClass;
-import org.junit.Before;
-import org.junit.Test;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
 
 import java.io.*;
 import java.net.URI;
@@ -17,7 +17,7 @@
 import java.util.NoSuchElementException;
 import java.util.UUID;
 
-import static org.junit.Assert.*;
+import static org.junit.jupiter.api.Assertions.*;
 
 public abstract class FileReaderTestBase {
 
@@ -31,18 +31,18 @@ public abstract class FileReaderTestBase {
     protected static Map<String, Object> readerConfig;
     protected static FileReader reader;
 
-    @AfterClass
+    @AfterAll
     public static void tearDown() throws IOException {
         fs.close();
     }
 
-    @Before
+    @BeforeEach
     public void openReader() throws Throwable {
         reader = getReader(fs, dataFile, readerConfig);
         assertEquals(reader.getFilePath(), dataFile);
     }
 
-    @After
+    @AfterEach
     public void closeReader() {
         try {
             reader.close();
@@ -51,30 +51,32 @@ public void closeReader() {
         }
     }
 
-    @Test(expected = IllegalArgumentException.class)
-    public void invalidArgs() throws Throwable {
+    @Test
+    public void invalidArgs() {
         try {
             readerClass.getConstructor(FileSystem.class, Path.class, Map.class).newInstance(null, null, null);
         } catch (Exception e) {
-            throw e.getCause();
+            assertThrows(IllegalArgumentException.class, () -> {
+                throw e.getCause();
+            });
         }
     }
 
-    @Test(expected = FileNotFoundException.class)
-    public void fileDoesNotExist() throws Throwable {
+    @Test
+    public void fileDoesNotExist() {
         Path path = new Path(new Path(fsUri), UUID.randomUUID().toString());
-        getReader(fs, path, readerConfig);
+        assertThrows(FileNotFoundException.class, () -> getReader(fs, path, readerConfig));
     }
 
-    @Test(expected = IOException.class)
+    @Test
     public void emptyFile() throws Throwable {
         File tmp = File.createTempFile("test-", "." + getFileExtension());
         Path path = new Path(new Path(fsUri), tmp.getName());
         fs.moveFromLocalFile(new Path(tmp.getAbsolutePath()), path);
-        getReader(fs, path, readerConfig);
+        assertThrows(IOException.class, () -> getReader(fs, path, readerConfig));
     }
 
-    @Test(expected = IOException.class)
+    @Test
     public void invalidFileFormat() throws Throwable {
         File tmp = File.createTempFile("test-", "." + getFileExtension());
         try (BufferedWriter writer = new BufferedWriter(new FileWriter(tmp))) {
@@ -82,7 +84,7 @@ public void invalidFileFormat() throws Throwable {
         }
         Path path = new Path(new Path(fsUri), tmp.getName());
         fs.moveFromLocalFile(new Path(tmp.getAbsolutePath()), path);
-        getReader(fs, path, readerConfig);
+        assertThrows(IOException.class, () -> getReader(fs, path, readerConfig));
     }
 
     @Test
@@ -95,7 +97,7 @@ public void readAllData() {
             checkData(record, recordCount);
             recordCount++;
         }
-        assertEquals("The number of records in the file does not match", NUM_RECORDS, recordCount);
+        assertEquals(NUM_RECORDS, recordCount, () -> "The number of records in the file does not match");
     }
 
     @Test
@@ -120,26 +122,25 @@ public void seekFile() {
 
         reader.seek(getOffset(OFFSETS_BY_INDEX.get(NUM_RECORDS - 1) + 1));
         assertFalse(reader.hasNext());
-
     }
 
-    @Test(expected = RuntimeException.class)
+    @Test
     public void negativeSeek() {
-        reader.seek(getOffset(-1));
+        assertThrows(RuntimeException.class, () -> reader.seek(getOffset(-1)));
     }
 
-    @Test(expected = NoSuchElementException.class)
+    @Test
     public void exceededSeek() {
         reader.seek(getOffset(OFFSETS_BY_INDEX.get(NUM_RECORDS - 1) + 1));
         assertFalse(reader.hasNext());
-        reader.next();
+        assertThrows(NoSuchElementException.class, () -> reader.next());
     }
 
-    @Test(expected = RuntimeException.class)
+    @Test
     public void readFileAlreadyClosed() throws IOException {
         reader.close();
-        assertFalse(reader.hasNext());
-        reader.seek(getOffset(0));
+        assertThrows(IllegalStateException.class, () -> reader.hasNext());
+        assertThrows(IllegalStateException.class, () -> reader.next());
     }
 
     protected final FileReader getReader(FileSystem fs, Path path, Map<String, Object> config) throws Throwable {
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/AvroFileReaderTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/AvroFileReaderTest.java
index f829ff1..b4ae9ae 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/AvroFileReaderTest.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/AvroFileReaderTest.java
@@ -13,8 +13,8 @@
 import org.apache.avro.io.DatumWriter;
 import org.apache.hadoop.fs.Path;
 import org.apache.kafka.connect.data.Struct;
-import org.junit.BeforeClass;
-import org.junit.Test;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
 
 import java.io.File;
 import java.io.IOException;
@@ -23,8 +23,7 @@
 import java.util.UUID;
 import java.util.stream.IntStream;
 
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
+import static org.junit.jupiter.api.Assertions.*;
 
 public class AvroFileReaderTest extends HdfsFileReaderTestBase {
 
@@ -35,7 +34,7 @@ public class AvroFileReaderTest extends HdfsFileReaderTestBase {
 
     private static Schema schema;
 
-    @BeforeClass
+    @BeforeAll
     public static void setUp() throws IOException {
         schema = new Schema.Parser().parse(AvroFileReaderTest.class.getResourceAsStream("/file/reader/schemas/people.avsc"));
         readerClass = AgnosticFileReader.class;
@@ -78,21 +77,28 @@ public void readerWithSchema() throws Throwable {
         readAllData();
     }
 
-    @Test(expected = AvroTypeException.class)
+    @Test
     public void readerWithInvalidSchema() throws Throwable {
         Map<String, Object> cfg = new HashMap<String, Object>() {{
             put(AvroFileReader.FILE_READER_AVRO_SCHEMA, Schema.create(Schema.Type.STRING).toString());
         }};
         reader = getReader(fs, dataFile, cfg);
-        readAllData();
+        assertThrows(IllegalStateException.class, this::readAllData);
+        assertThrows(AvroTypeException.class, () -> {
+            try {
+                readAllData();
+            } catch (Exception e) {
+                throw e.getCause();
+            }
+        });
     }
 
-    @Test(expected = SchemaParseException.class)
-    public void readerWithUnparseableSchema() throws Throwable {
+    @Test
+    public void readerWithUnparseableSchema() {
         Map<String, Object> cfg = new HashMap<String, Object>() {{
             put(AvroFileReader.FILE_READER_AVRO_SCHEMA, "invalid schema");
         }};
-        getReader(fs, dataFile, cfg);
+        assertThrows(SchemaParseException.class, () -> getReader(fs, dataFile, cfg));
     }
 
     @Override
@@ -102,9 +108,11 @@ protected Offset getOffset(long offset) {
 
     @Override
     protected void checkData(Struct record, long index) {
-        assertEquals((int) (Integer) record.get(FIELD_INDEX), index);
-        assertTrue(record.get(FIELD_NAME).toString().startsWith(index + "_"));
-        assertTrue(record.get(FIELD_SURNAME).toString().startsWith(index + "_"));
+        assertAll(
+                () -> assertEquals((int) (Integer) record.get(FIELD_INDEX), index),
+                () -> assertTrue(record.get(FIELD_NAME).toString().startsWith(index + "_")),
+                () -> assertTrue(record.get(FIELD_SURNAME).toString().startsWith(index + "_"))
+        );
     }
 
     @Override
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/DelimitedTextFileReaderTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/DelimitedTextFileReaderTest.java
index 137eee1..f4b6c92 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/DelimitedTextFileReaderTest.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/DelimitedTextFileReaderTest.java
@@ -7,10 +7,10 @@
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.kafka.connect.data.Struct;
-import org.junit.BeforeClass;
-import org.junit.Ignore;
-import org.junit.Test;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
 
+import java.io.BufferedWriter;
 import java.io.File;
 import java.io.FileWriter;
 import java.io.IOException;
@@ -20,7 +20,7 @@
 import java.util.UUID;
 import java.util.stream.IntStream;
 
-import static org.junit.Assert.*;
+import static org.junit.jupiter.api.Assertions.*;
 
 public class DelimitedTextFileReaderTest extends HdfsFileReaderTestBase {
 
@@ -30,7 +30,7 @@ public class DelimitedTextFileReaderTest extends HdfsFileReaderTestBase {
     private static final String FIELD_COLUMN4 = "column_4";
     private static final String FILE_EXTENSION = "csv";
 
-    @BeforeClass
+    @BeforeAll
     public static void setUp() throws IOException {
         readerClass = AgnosticFileReader.class;
         dataFile = createDataFile(true);
@@ -61,24 +61,33 @@ private static Path createDataFile(boolean header) throws IOException {
         return path;
     }
 
-    @Ignore(value = "This test does not apply for txt files")
-    @Test(expected = IOException.class)
+    @Test
     public void emptyFile() throws Throwable {
-        super.emptyFile();
+        File tmp = File.createTempFile("test-", "." + getFileExtension());
+        Path path = new Path(new Path(fsUri), tmp.getName());
+        fs.moveFromLocalFile(new Path(tmp.getAbsolutePath()), path);
+        getReader(fs, path, readerConfig);
     }
 
-    @Ignore(value = "This test does not apply for txt files")
-    @Test(expected = IOException.class)
+    @Test
     public void invalidFileFormat() throws Throwable {
-        super.invalidFileFormat();
+        File tmp = File.createTempFile("test-", "." + getFileExtension());
+        try (BufferedWriter writer = new BufferedWriter(new FileWriter(tmp))) {
+            writer.write("test");
+        }
+        Path path = new Path(new Path(fsUri), tmp.getName());
+        fs.moveFromLocalFile(new Path(tmp.getAbsolutePath()), path);
+        getReader(fs, path, readerConfig);
     }
 
-    @Test(expected = IllegalArgumentException.class)
-    public void invaliConfigArgs() throws Throwable {
+    @Test
+    public void invaliConfigArgs() {
         try {
             readerClass.getConstructor(FileSystem.class, Path.class, Map.class).newInstance(fs, dataFile, new HashMap<>());
         } catch (Exception e) {
-            throw e.getCause();
+            assertThrows(IllegalArgumentException.class, () -> {
+                throw e.getCause();
+            });
         }
     }
 
@@ -98,8 +107,7 @@ public void readAllDataWithoutHeader() throws Throwable {
             checkData(record, recordCount);
             recordCount++;
         }
-        assertEquals("The number of records in the file does not match", NUM_RECORDS, recordCount);
-
+        assertEquals(NUM_RECORDS, recordCount, () -> "The number of records in the file does not match");
     }
 
     @Test
@@ -130,7 +138,7 @@ public void readAllDataWithMalformedRows() throws Throwable {
             assertEquals("custom_value", record.get(FIELD_COLUMN4));
             recordCount++;
         }
-        assertEquals("The number of records in the file does not match", 2, recordCount);
+        assertEquals(2, recordCount, () -> "The number of records in the file does not match");
     }
 
     @Test
@@ -163,7 +171,6 @@ public void seekFileWithoutHeader() throws Throwable {
 
         reader.seek(getOffset(OFFSETS_BY_INDEX.get(NUM_RECORDS - 1) + 1, false));
         assertFalse(reader.hasNext());
-
     }
 
     @Test
@@ -176,14 +183,14 @@ public void validFileEncoding() throws Throwable {
         getReader(fs, dataFile, cfg);
     }
 
-    @Test(expected = UnsupportedCharsetException.class)
-    public void invalidFileEncoding() throws Throwable {
+    @Test
+    public void invalidFileEncoding() {
         Map<String, Object> cfg = new HashMap<String, Object>() {{
             put(DelimitedTextFileReader.FILE_READER_DELIMITED_TOKEN, ",");
             put(DelimitedTextFileReader.FILE_READER_DELIMITED_HEADER, "true");
             put(DelimitedTextFileReader.FILE_READER_DELIMITED_ENCODING, "invalid_charset");
         }};
-        getReader(fs, dataFile, cfg);
+        assertThrows(UnsupportedCharsetException.class, () -> getReader(fs, dataFile, cfg));
     }
 
     @Override
@@ -197,10 +204,12 @@ private Offset getOffset(long offset, boolean hasHeader) {
 
     @Override
     protected void checkData(Struct record, long index) {
-        assertTrue(record.get(FIELD_COLUMN1).toString().startsWith(index + "_"));
-        assertTrue(record.get(FIELD_COLUMN2).toString().startsWith(index + "_"));
-        assertTrue(record.get(FIELD_COLUMN3).toString().startsWith(index + "_"));
-        assertTrue(record.get(FIELD_COLUMN4).toString().startsWith(index + "_"));
+        assertAll(
+                () -> assertTrue(record.get(FIELD_COLUMN1).toString().startsWith(index + "_")),
+                () -> assertTrue(record.get(FIELD_COLUMN2).toString().startsWith(index + "_")),
+                () -> assertTrue(record.get(FIELD_COLUMN3).toString().startsWith(index + "_")),
+                () -> assertTrue(record.get(FIELD_COLUMN4).toString().startsWith(index + "_"))
+        );
     }
 
     @Override
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/HdfsFileReaderTestBase.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/HdfsFileReaderTestBase.java
index f4f5183..c60d0c3 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/HdfsFileReaderTestBase.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/HdfsFileReaderTestBase.java
@@ -4,8 +4,8 @@
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.BeforeAll;
 
 import java.io.IOException;
 import java.net.URI;
@@ -16,7 +16,7 @@ public abstract class HdfsFileReaderTestBase extends FileReaderTestBase {
 
     private static MiniDFSCluster cluster;
 
-    @BeforeClass
+    @BeforeAll
     public static void initFs() throws IOException {
         Configuration clusterConfig = new Configuration();
         Path hdfsDir = Files.createTempDirectory("test-");
@@ -26,7 +26,7 @@ public static void initFs() throws IOException {
         fs = FileSystem.newInstance(fsUri, new Configuration());
     }
 
-    @AfterClass
+    @AfterAll
     public static void finishFs() throws Exception {
         cluster.shutdown(true);
     }
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/JsonFileReaderTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/JsonFileReaderTest.java
index 4a82ede..188487a 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/JsonFileReaderTest.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/JsonFileReaderTest.java
@@ -9,9 +9,8 @@
 import com.github.mmolimar.kafka.connect.fs.file.reader.JsonFileReader;
 import org.apache.hadoop.fs.Path;
 import org.apache.kafka.connect.data.Struct;
-import org.junit.BeforeClass;
-import org.junit.Ignore;
-import org.junit.Test;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
 
 import java.io.File;
 import java.io.FileWriter;
@@ -23,7 +22,7 @@
 import java.util.UUID;
 import java.util.stream.IntStream;
 
-import static org.junit.Assert.*;
+import static org.junit.jupiter.api.Assertions.*;
 
 public class JsonFileReaderTest extends HdfsFileReaderTestBase {
 
@@ -37,7 +36,7 @@ public class JsonFileReaderTest extends HdfsFileReaderTestBase {
     private static final String FIELD_NULL = "nullField";
     private static final String FILE_EXTENSION = "json";
 
-    @BeforeClass
+    @BeforeAll
     public static void setUp() throws IOException {
         readerClass = AgnosticFileReader.class;
         dataFile = createDataFile();
@@ -87,10 +86,12 @@ private static Path createDataFile(int numRecords, boolean recordPerLine) throws
         return path;
     }
 
-    @Ignore(value = "This test does not apply for json files")
-    @Test(expected = IOException.class)
+    @Test
     public void emptyFile() throws Throwable {
-        super.emptyFile();
+        File tmp = File.createTempFile("test-", "." + getFileExtension());
+        Path path = new Path(new Path(fsUri), tmp.getName());
+        fs.moveFromLocalFile(new Path(tmp.getAbsolutePath()), path);
+        getReader(fs, path, readerConfig);
     }
 
     @Test
@@ -111,12 +112,12 @@ public void validFileEncoding() throws Throwable {
         readAllData();
     }
 
-    @Test(expected = UnsupportedCharsetException.class)
-    public void invalidFileEncoding() throws Throwable {
+    @Test
+    public void invalidFileEncoding() {
         Map<String, Object> cfg = new HashMap<String, Object>() {{
             put(JsonFileReader.FILE_READER_JSON_ENCODING, "invalid_charset");
         }};
-        getReader(fs, dataFile, cfg);
+        assertThrows(UnsupportedCharsetException.class, () -> getReader(fs, dataFile, cfg));
     }
 
     @Test
@@ -135,7 +136,7 @@ public void readDataWithRecordPerLineDisabled() throws Throwable {
             recordCount++;
         }
         reader.close();
-        assertEquals("The number of records in the file does not match", 1, recordCount);
+        assertEquals(1, recordCount, () -> "The number of records in the file does not match");
     }
 
     @Override
@@ -145,22 +146,25 @@ protected Offset getOffset(long offset) {
 
     @Override
     protected void checkData(Struct record, long index) {
-        assertEquals((int) (Integer) record.get(FIELD_INTEGER), index);
-        assertEquals((long) (Long) record.get(FIELD_LONG), Long.MAX_VALUE);
-        assertTrue(record.get(FIELD_STRING).toString().startsWith(index + "_"));
-        assertTrue(Boolean.parseBoolean(record.get(FIELD_BOOLEAN).toString()));
-        assertEquals((Double) record.get(FIELD_DECIMAL), Double.parseDouble(index + "." + index), 0);
-        assertNull(record.get(FIELD_NULL));
-        assertNotNull(record.schema().field(FIELD_NULL));
-        assertEquals(record.get(FIELD_ARRAY), Arrays.asList("elm[" + index + "]", "elm[" + index + "]"));
         Struct subrecord = record.getStruct(FIELD_STRUCT);
-        assertEquals((int) (Integer) subrecord.get(FIELD_INTEGER), index);
-        assertEquals((long) (Long) subrecord.get(FIELD_LONG), Long.MAX_VALUE);
-        assertTrue(subrecord.get(FIELD_STRING).toString().startsWith(index + "_"));
-        assertTrue(Boolean.parseBoolean(subrecord.get(FIELD_BOOLEAN).toString()));
-        assertEquals((Double) subrecord.get(FIELD_DECIMAL), Double.parseDouble(index + "." + index), 0);
-        assertNull(subrecord.get(FIELD_NULL));
-        assertNotNull(subrecord.schema().field(FIELD_NULL));
+        assertAll(
+                () -> assertEquals((int) (Integer) record.get(FIELD_INTEGER), index),
+                () -> assertEquals((long) (Long) record.get(FIELD_LONG), Long.MAX_VALUE),
+                () -> assertTrue(record.get(FIELD_STRING).toString().startsWith(index + "_")),
+                () -> assertTrue(Boolean.parseBoolean(record.get(FIELD_BOOLEAN).toString())),
+                () -> assertEquals((Double) record.get(FIELD_DECIMAL), Double.parseDouble(index + "." + index), 0),
+                () -> assertNull(record.get(FIELD_NULL)),
+                () -> assertNotNull(record.schema().field(FIELD_NULL)),
+                () -> assertEquals(record.get(FIELD_ARRAY), Arrays.asList("elm[" + index + "]", "elm[" + index + "]")),
+                () -> assertEquals((int) (Integer) subrecord.get(FIELD_INTEGER), index),
+                () -> assertEquals((long) (Long) subrecord.get(FIELD_LONG), Long.MAX_VALUE),
+                () -> assertTrue(subrecord.get(FIELD_STRING).toString().startsWith(index + "_")),
+                () -> assertTrue(Boolean.parseBoolean(subrecord.get(FIELD_BOOLEAN).toString())),
+                () -> assertEquals((Double) subrecord.get(FIELD_DECIMAL), Double.parseDouble(index + "." + index), 0),
+                () -> assertNull(subrecord.get(FIELD_NULL)),
+                () -> assertNotNull(subrecord.schema().field(FIELD_NULL))
+        );
+
     }
 
     @Override
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/ParquetFileReaderTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/ParquetFileReaderTest.java
index 5b69bb3..d08395d 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/ParquetFileReaderTest.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/ParquetFileReaderTest.java
@@ -18,18 +18,19 @@
 import org.apache.parquet.hadoop.ParquetFileWriter;
 import org.apache.parquet.hadoop.ParquetWriter;
 import org.apache.parquet.io.InvalidRecordException;
-import org.junit.BeforeClass;
-import org.junit.Ignore;
-import org.junit.Test;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
 
+import java.io.BufferedWriter;
 import java.io.File;
+import java.io.FileWriter;
 import java.io.IOException;
 import java.util.HashMap;
 import java.util.Map;
 import java.util.UUID;
 import java.util.stream.IntStream;
 
-import static org.junit.Assert.*;
+import static org.junit.jupiter.api.Assertions.*;
 
 public class ParquetFileReaderTest extends HdfsFileReaderTestBase {
 
@@ -41,7 +42,7 @@ public class ParquetFileReaderTest extends HdfsFileReaderTestBase {
     private static Schema readerSchema;
     private static Schema projectionSchema;
 
-    @BeforeClass
+    @BeforeAll
     public static void setUp() throws IOException {
         readerClass = AgnosticFileReader.class;
         dataFile = createDataFile();
@@ -57,7 +58,6 @@ private static Path createDataFile() throws IOException {
 
         try (ParquetWriter writer = AvroParquetWriter.<GenericRecord>builder(new Path(parquetFile.toURI()))
                 .withConf(fs.getConf()).withWriteMode(ParquetFileWriter.Mode.OVERWRITE).withSchema(readerSchema).build()) {
-
             IntStream.range(0, NUM_RECORDS).forEach(index -> {
                 GenericRecord datum = new GenericData.Record(readerSchema);
                 datum.put(FIELD_INDEX, index);
@@ -76,16 +76,23 @@ private static Path createDataFile() throws IOException {
         return path;
     }
 
-    @Ignore(value = "This test does not apply for parquet files")
-    @Test(expected = IOException.class)
+    @Test
     public void emptyFile() throws Throwable {
-        super.emptyFile();
+        File tmp = File.createTempFile("test-", "." + getFileExtension());
+        Path path = new Path(new Path(fsUri), tmp.getName());
+        fs.moveFromLocalFile(new Path(tmp.getAbsolutePath()), path);
+        getReader(fs, path, readerConfig);
     }
 
-    @Ignore(value = "This test does not apply for parquet files")
-    @Test(expected = IOException.class)
+    @Test
     public void invalidFileFormat() throws Throwable {
-        super.invalidFileFormat();
+        File tmp = File.createTempFile("test-", "." + getFileExtension());
+        try (BufferedWriter writer = new BufferedWriter(new FileWriter(tmp))) {
+            writer.write("test");
+        }
+        Path path = new Path(new Path(fsUri), tmp.getName());
+        fs.moveFromLocalFile(new Path(tmp.getAbsolutePath()), path);
+        getReader(fs, path, readerConfig);
     }
 
     @Test
@@ -97,7 +104,7 @@ public void readerWithSchema() throws Throwable {
         readAllData();
     }
 
-    @Test(expected = DataException.class)
+    @Test
     public void readerWithProjection() throws Throwable {
         Map<String, Object> cfg = new HashMap<String, Object>() {{
             put(ParquetFileReader.FILE_READER_PARQUET_PROJECTION, projectionSchema.toString());
@@ -111,10 +118,10 @@ public void readerWithProjection() throws Throwable {
         }
 
         reader = getReader(FileSystem.newInstance(fsUri, new Configuration()), dataFile, cfg);
-        readAllData();
+        assertThrows(DataException.class, this::readAllData);
     }
 
-    @Test(expected = InvalidRecordException.class)
+    @Test
     public void readerWithInvalidProjection() throws Throwable {
         Schema testSchema = SchemaBuilder.record("test_projection").namespace("test.avro")
                 .fields()
@@ -124,24 +131,25 @@ public void readerWithInvalidProjection() throws Throwable {
             put(ParquetFileReader.FILE_READER_PARQUET_PROJECTION, testSchema.toString());
         }};
         reader = getReader(FileSystem.newInstance(fsUri, new Configuration()), dataFile, cfg);
-        readAllData();
+        assertThrows(InvalidRecordException.class, this::readAllData);
     }
 
-    @Test(expected = AvroRuntimeException.class)
+    @Test
     public void readerWithInvalidSchema() throws Throwable {
         Map<String, Object> cfg = new HashMap<String, Object>() {{
             put(ParquetFileReader.FILE_READER_PARQUET_SCHEMA, Schema.create(Schema.Type.STRING).toString());
         }};
         reader = getReader(FileSystem.newInstance(fsUri, new Configuration()), dataFile, cfg);
-        readAllData();
+        assertThrows(AvroRuntimeException.class, this::readAllData);
     }
 
-    @Test(expected = SchemaParseException.class)
-    public void readerWithUnparseableSchema() throws Throwable {
+    @Test
+    public void readerWithUnparseableSchema() {
         Map<String, Object> cfg = new HashMap<String, Object>() {{
             put(ParquetFileReader.FILE_READER_PARQUET_SCHEMA, "invalid schema");
         }};
-        getReader(FileSystem.newInstance(fsUri, new Configuration()), dataFile, cfg);
+        assertThrows(SchemaParseException.class, () ->
+                getReader(FileSystem.newInstance(fsUri, new Configuration()), dataFile, cfg));
     }
 
     @Override
@@ -160,5 +168,4 @@ protected void checkData(Struct record, long index) {
     protected String getFileExtension() {
         return FILE_EXTENSION;
     }
-
 }
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/SequenceFileReaderTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/SequenceFileReaderTest.java
index 23e1f8c..a4435bc 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/SequenceFileReaderTest.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/SequenceFileReaderTest.java
@@ -10,8 +10,8 @@
 import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.util.ReflectionUtils;
 import org.apache.kafka.connect.data.Struct;
-import org.junit.BeforeClass;
-import org.junit.Test;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
 
 import java.io.File;
 import java.io.IOException;
@@ -20,8 +20,7 @@
 import java.util.UUID;
 import java.util.stream.IntStream;
 
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
+import static org.junit.jupiter.api.Assertions.*;
 
 public class SequenceFileReaderTest extends HdfsFileReaderTestBase {
 
@@ -29,7 +28,7 @@ public class SequenceFileReaderTest extends HdfsFileReaderTestBase {
     private static final String FIELD_NAME_VALUE = "value";
     private static final String FILE_EXTENSION = "seq";
 
-    @BeforeClass
+    @BeforeAll
     public static void setUp() throws IOException {
         readerClass = AgnosticFileReader.class;
         dataFile = createDataFile();
@@ -84,7 +83,7 @@ public void defaultFieldNames() throws Throwable {
             checkData(SequenceFileReader.FIELD_NAME_KEY_DEFAULT, SequenceFileReader.FIELD_NAME_VALUE_DEFAULT, record, recordCount);
             recordCount++;
         }
-        assertEquals("The number of records in the file does not match", NUM_RECORDS, recordCount);
+        assertEquals(NUM_RECORDS, recordCount, () -> "The number of records in the file does not match");
     }
 
     @Override
@@ -98,8 +97,10 @@ protected void checkData(Struct record, long index) {
     }
 
     private void checkData(String keyFieldName, String valueFieldName, Struct record, long index) {
-        assertEquals((int) (Integer) record.get(keyFieldName), index);
-        assertTrue(record.get(valueFieldName).toString().startsWith(index + "_"));
+        assertAll(
+                () -> assertEquals((int) (Integer) record.get(keyFieldName), index),
+                () -> assertTrue(record.get(valueFieldName).toString().startsWith(index + "_"))
+        );
     }
 
     @Override
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/TextFileReaderTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/TextFileReaderTest.java
index 9a063d3..8e932f2 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/TextFileReaderTest.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/TextFileReaderTest.java
@@ -2,13 +2,14 @@
 
 import com.github.mmolimar.kafka.connect.fs.file.Offset;
 import com.github.mmolimar.kafka.connect.fs.file.reader.AgnosticFileReader;
+import com.github.mmolimar.kafka.connect.fs.file.reader.FileReader;
 import com.github.mmolimar.kafka.connect.fs.file.reader.TextFileReader;
 import org.apache.hadoop.fs.Path;
 import org.apache.kafka.connect.data.Struct;
-import org.junit.BeforeClass;
-import org.junit.Ignore;
-import org.junit.Test;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
 
+import java.io.BufferedWriter;
 import java.io.File;
 import java.io.FileWriter;
 import java.io.IOException;
@@ -18,14 +19,14 @@
 import java.util.UUID;
 import java.util.stream.IntStream;
 
-import static org.junit.Assert.assertTrue;
+import static org.junit.jupiter.api.Assertions.*;
 
 public class TextFileReaderTest extends HdfsFileReaderTestBase {
 
     private static final String FIELD_NAME_VALUE = "custom_field_name";
     private static final String FILE_EXTENSION = "txt";
 
-    @BeforeClass
+    @BeforeAll
     public static void setUp() throws IOException {
         readerClass = AgnosticFileReader.class;
         dataFile = createDataFile();
@@ -53,16 +54,23 @@ private static Path createDataFile() throws IOException {
         return path;
     }
 
-    @Ignore(value = "This test does not apply for txt files")
-    @Test(expected = IOException.class)
+    @Test
     public void emptyFile() throws Throwable {
-        super.emptyFile();
+        File tmp = File.createTempFile("test-", "." + getFileExtension());
+        Path path = new Path(new Path(fsUri), tmp.getName());
+        fs.moveFromLocalFile(new Path(tmp.getAbsolutePath()), path);
+        getReader(fs, path, readerConfig);
     }
 
-    @Ignore(value = "This test does not apply for txt files")
-    @Test(expected = IOException.class)
+    @Test
     public void invalidFileFormat() throws Throwable {
-        super.invalidFileFormat();
+        File tmp = File.createTempFile("test-", "." + getFileExtension());
+        try (BufferedWriter writer = new BufferedWriter(new FileWriter(tmp))) {
+            writer.write("test");
+        }
+        Path path = new Path(new Path(fsUri), tmp.getName());
+        fs.moveFromLocalFile(new Path(tmp.getAbsolutePath()), path);
+        getReader(fs, path, readerConfig);
     }
 
     @Test
@@ -75,13 +83,33 @@ public void validFileEncoding() throws Throwable {
         readAllData();
     }
 
-    @Test(expected = UnsupportedCharsetException.class)
-    public void invalidFileEncoding() throws Throwable {
+    @Test
+    public void invalidFileEncoding() {
         Map<String, Object> cfg = new HashMap<String, Object>() {{
             put(TextFileReader.FILE_READER_TEXT_FIELD_NAME_VALUE, FIELD_NAME_VALUE);
             put(TextFileReader.FILE_READER_TEXT_ENCODING, "invalid_charset");
         }};
-        getReader(fs, dataFile, cfg);
+        assertThrows(UnsupportedCharsetException.class, () -> getReader(fs, dataFile, cfg));
+    }
+
+    @Test
+    public void readDataWithRecordPerLineDisabled() throws Throwable {
+        Path file = createDataFile();
+        FileReader reader = getReader(fs, file, new HashMap<String, Object>() {{
+            put(TextFileReader.FILE_READER_TEXT_FIELD_NAME_VALUE, FIELD_NAME_VALUE);
+            put(TextFileReader.FILE_READER_TEXT_RECORD_PER_LINE, "false");
+        }});
+
+        assertTrue(reader.hasNext());
+
+        int recordCount = 0;
+        while (reader.hasNext()) {
+            Struct record = reader.next();
+            checkData(record, recordCount);
+            recordCount++;
+        }
+        reader.close();
+        assertEquals(1, recordCount, () -> "The number of records in the file does not match");
     }
 
     @Override
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/AvroFileReaderTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/AvroFileReaderTest.java
index 2dc0454..5c707e1 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/AvroFileReaderTest.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/AvroFileReaderTest.java
@@ -13,8 +13,8 @@
 import org.apache.avro.io.DatumWriter;
 import org.apache.hadoop.fs.Path;
 import org.apache.kafka.connect.data.Struct;
-import org.junit.BeforeClass;
-import org.junit.Test;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
 
 import java.io.File;
 import java.io.IOException;
@@ -23,8 +23,7 @@
 import java.util.UUID;
 import java.util.stream.IntStream;
 
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
+import static org.junit.jupiter.api.Assertions.*;
 
 public class AvroFileReaderTest extends LocalFileReaderTestBase {
 
@@ -35,7 +34,7 @@ public class AvroFileReaderTest extends LocalFileReaderTestBase {
 
     private static Schema schema;
 
-    @BeforeClass
+    @BeforeAll
     public static void setUp() throws IOException {
         schema = new Schema.Parser().parse(AvroFileReaderTest.class.getResourceAsStream("/file/reader/schemas/people.avsc"));
         readerClass = AgnosticFileReader.class;
@@ -81,23 +80,30 @@ public void readerWithSchema() throws Throwable {
         readAllData();
     }
 
-    @Test(expected = AvroTypeException.class)
+    @Test
     public void readerWithInvalidSchema() throws Throwable {
         Map<String, Object> cfg = new HashMap<String, Object>() {{
             put(AvroFileReader.FILE_READER_AVRO_SCHEMA, Schema.create(Schema.Type.STRING).toString());
             put(AgnosticFileReader.FILE_READER_AGNOSTIC_EXTENSIONS_AVRO, getFileExtension());
         }};
         reader = getReader(fs, dataFile, cfg);
-        readAllData();
+        assertThrows(IllegalStateException.class, this::readAllData);
+        assertThrows(AvroTypeException.class, () -> {
+            try {
+                readAllData();
+            } catch (Exception e) {
+                throw e.getCause();
+            }
+        });
     }
 
-    @Test(expected = SchemaParseException.class)
-    public void readerWithUnparseableSchema() throws Throwable {
+    @Test
+    public void readerWithUnparseableSchema() {
         Map<String, Object> cfg = new HashMap<String, Object>() {{
             put(AvroFileReader.FILE_READER_AVRO_SCHEMA, "invalid schema");
             put(AgnosticFileReader.FILE_READER_AGNOSTIC_EXTENSIONS_AVRO, getFileExtension());
         }};
-        getReader(fs, dataFile, cfg);
+        assertThrows(SchemaParseException.class, () -> getReader(fs, dataFile, cfg));
     }
 
     @Override
@@ -107,9 +113,11 @@ protected Offset getOffset(long offset) {
 
     @Override
     protected void checkData(Struct record, long index) {
-        assertEquals((int) (Integer) record.get(FIELD_INDEX), index);
-        assertTrue(record.get(FIELD_NAME).toString().startsWith(index + "_"));
-        assertTrue(record.get(FIELD_SURNAME).toString().startsWith(index + "_"));
+        assertAll(
+                () -> assertEquals((int) (Integer) record.get(FIELD_INDEX), index),
+                () -> assertTrue(record.get(FIELD_NAME).toString().startsWith(index + "_")),
+                () -> assertTrue(record.get(FIELD_SURNAME).toString().startsWith(index + "_"))
+        );
     }
 
     @Override
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/DelimitedTextFileReaderTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/DelimitedTextFileReaderTest.java
index 679ef45..91f08e9 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/DelimitedTextFileReaderTest.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/DelimitedTextFileReaderTest.java
@@ -7,10 +7,10 @@
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.kafka.connect.data.Struct;
-import org.junit.BeforeClass;
-import org.junit.Ignore;
-import org.junit.Test;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
 
+import java.io.BufferedWriter;
 import java.io.File;
 import java.io.FileWriter;
 import java.io.IOException;
@@ -20,7 +20,7 @@
 import java.util.UUID;
 import java.util.stream.IntStream;
 
-import static org.junit.Assert.*;
+import static org.junit.jupiter.api.Assertions.*;
 
 public class DelimitedTextFileReaderTest extends LocalFileReaderTestBase {
 
@@ -30,7 +30,7 @@ public class DelimitedTextFileReaderTest extends LocalFileReaderTestBase {
     private static final String FIELD_COLUMN4 = "column_4";
     private static final String FILE_EXTENSION = "tcsv";
 
-    @BeforeClass
+    @BeforeAll
     public static void setUp() throws IOException {
         readerClass = AgnosticFileReader.class;
         dataFile = createDataFile(true);
@@ -62,27 +62,36 @@ private static Path createDataFile(boolean header) throws IOException {
         return path;
     }
 
-    @Ignore(value = "This test does not apply for txt files")
-    @Test(expected = IOException.class)
+    @Test
     public void emptyFile() throws Throwable {
-        super.emptyFile();
+        File tmp = File.createTempFile("test-", "." + getFileExtension());
+        Path path = new Path(new Path(fsUri), tmp.getName());
+        fs.moveFromLocalFile(new Path(tmp.getAbsolutePath()), path);
+        getReader(fs, path, readerConfig);
     }
 
-    @Ignore(value = "This test does not apply for txt files")
-    @Test(expected = IOException.class)
+    @Test
     public void invalidFileFormat() throws Throwable {
-        super.invalidFileFormat();
+        File tmp = File.createTempFile("test-", "." + getFileExtension());
+        try (BufferedWriter writer = new BufferedWriter(new FileWriter(tmp))) {
+            writer.write("test");
+        }
+        Path path = new Path(new Path(fsUri), tmp.getName());
+        fs.moveFromLocalFile(new Path(tmp.getAbsolutePath()), path);
+        getReader(fs, path, readerConfig);
     }
 
-    @Test(expected = IllegalArgumentException.class)
-    public void invaliConfigArgs() throws Throwable {
+    @Test
+    public void invaliConfigArgs() {
         try {
             readerClass.getConstructor(FileSystem.class, Path.class, Map.class).newInstance(fs, dataFile,
                     new HashMap<String, Object>() {{
                         put(AgnosticFileReader.FILE_READER_AGNOSTIC_EXTENSIONS_DELIMITED, FILE_EXTENSION);
                     }});
         } catch (Exception e) {
-            throw e.getCause();
+            assertThrows(IllegalArgumentException.class, () -> {
+                throw e.getCause();
+            });
         }
     }
 
@@ -103,7 +112,7 @@ public void readAllDataWithoutHeader() throws Throwable {
             checkData(record, recordCount);
             recordCount++;
         }
-        assertEquals("The number of records in the file does not match", NUM_RECORDS, recordCount);
+        assertEquals(NUM_RECORDS, recordCount, () -> "The number of records in the file does not match");
     }
 
     @Test
@@ -129,13 +138,15 @@ public void readAllDataWithMalformedRows() throws Throwable {
         int recordCount = 0;
         while (reader.hasNext()) {
             Struct record = reader.next();
-            assertEquals("dummy", record.get(FIELD_COLUMN1));
-            assertEquals("custom_value", record.get(FIELD_COLUMN2));
-            assertEquals("custom_value", record.get(FIELD_COLUMN3));
-            assertEquals("custom_value", record.get(FIELD_COLUMN4));
+            assertAll(
+                    () -> assertEquals("dummy", record.get(FIELD_COLUMN1)),
+                    () -> assertEquals("custom_value", record.get(FIELD_COLUMN2)),
+                    () -> assertEquals("custom_value", record.get(FIELD_COLUMN3)),
+                    () -> assertEquals("custom_value", record.get(FIELD_COLUMN4))
+            );
             recordCount++;
         }
-        assertEquals("The number of records in the file does not match", 2, recordCount);
+        assertEquals(2, recordCount, () -> "The number of records in the file does not match");
     }
 
     @Test
@@ -169,7 +180,6 @@ public void seekFileWithoutHeader() throws Throwable {
 
         reader.seek(getOffset(OFFSETS_BY_INDEX.get(NUM_RECORDS - 1) + 1, false));
         assertFalse(reader.hasNext());
-
     }
 
     @Test
@@ -183,15 +193,15 @@ public void validFileEncoding() throws Throwable {
         getReader(fs, dataFile, cfg);
     }
 
-    @Test(expected = UnsupportedCharsetException.class)
-    public void invalidFileEncoding() throws Throwable {
+    @Test
+    public void invalidFileEncoding() {
         Map<String, Object> cfg = new HashMap<String, Object>() {{
             put(DelimitedTextFileReader.FILE_READER_DELIMITED_TOKEN, ",");
             put(DelimitedTextFileReader.FILE_READER_DELIMITED_HEADER, "true");
             put(DelimitedTextFileReader.FILE_READER_DELIMITED_ENCODING, "invalid_charset");
             put(AgnosticFileReader.FILE_READER_AGNOSTIC_EXTENSIONS_DELIMITED, getFileExtension());
         }};
-        getReader(fs, dataFile, cfg);
+        assertThrows(UnsupportedCharsetException.class, () -> getReader(fs, dataFile, cfg));
     }
 
     @Override
@@ -205,10 +215,12 @@ private Offset getOffset(long offset, boolean hasHeader) {
 
     @Override
     protected void checkData(Struct record, long index) {
-        assertTrue(record.get(FIELD_COLUMN1).toString().startsWith(index + "_"));
-        assertTrue(record.get(FIELD_COLUMN2).toString().startsWith(index + "_"));
-        assertTrue(record.get(FIELD_COLUMN3).toString().startsWith(index + "_"));
-        assertTrue(record.get(FIELD_COLUMN4).toString().startsWith(index + "_"));
+        assertAll(
+                () -> assertTrue(record.get(FIELD_COLUMN1).toString().startsWith(index + "_")),
+                () -> assertTrue(record.get(FIELD_COLUMN2).toString().startsWith(index + "_")),
+                () -> assertTrue(record.get(FIELD_COLUMN3).toString().startsWith(index + "_")),
+                () -> assertTrue(record.get(FIELD_COLUMN4).toString().startsWith(index + "_"))
+        );
     }
 
     @Override
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/JsonFileReaderTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/JsonFileReaderTest.java
index 16bf3eb..131e427 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/JsonFileReaderTest.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/JsonFileReaderTest.java
@@ -9,9 +9,8 @@
 import com.github.mmolimar.kafka.connect.fs.file.reader.JsonFileReader;
 import org.apache.hadoop.fs.Path;
 import org.apache.kafka.connect.data.Struct;
-import org.junit.BeforeClass;
-import org.junit.Ignore;
-import org.junit.Test;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
 
 import java.io.File;
 import java.io.FileWriter;
@@ -23,7 +22,7 @@
 import java.util.UUID;
 import java.util.stream.IntStream;
 
-import static org.junit.Assert.*;
+import static org.junit.jupiter.api.Assertions.*;
 
 public class JsonFileReaderTest extends LocalFileReaderTestBase {
 
@@ -37,7 +36,7 @@ public class JsonFileReaderTest extends LocalFileReaderTestBase {
     private static final String FIELD_NULL = "nullField";
     private static final String FILE_EXTENSION = "jsn";
 
-    @BeforeClass
+    @BeforeAll
     public static void setUp() throws IOException {
         readerClass = AgnosticFileReader.class;
         dataFile = createDataFile();
@@ -87,10 +86,12 @@ private static Path createDataFile(int numRecords, boolean recordPerLine) throws
         return path;
     }
 
-    @Ignore(value = "This test does not apply for json files")
-    @Test(expected = IOException.class)
+    @Test
     public void emptyFile() throws Throwable {
-        super.emptyFile();
+        File tmp = File.createTempFile("test-", "." + getFileExtension());
+        Path path = new Path(new Path(fsUri), tmp.getName());
+        fs.moveFromLocalFile(new Path(tmp.getAbsolutePath()), path);
+        getReader(fs, path, readerConfig);
     }
 
     @Test
@@ -112,13 +113,13 @@ public void validFileEncoding() throws Throwable {
         readAllData();
     }
 
-    @Test(expected = UnsupportedCharsetException.class)
-    public void invalidFileEncoding() throws Throwable {
+    @Test
+    public void invalidFileEncoding() {
         Map<String, Object> cfg = new HashMap<String, Object>() {{
             put(JsonFileReader.FILE_READER_JSON_ENCODING, "invalid_charset");
             put(AgnosticFileReader.FILE_READER_AGNOSTIC_EXTENSIONS_JSON, getFileExtension());
         }};
-        getReader(fs, dataFile, cfg);
+        assertThrows(UnsupportedCharsetException.class, () -> getReader(fs, dataFile, cfg));
     }
 
     @Test
@@ -138,7 +139,7 @@ public void readDataWithRecordPerLineDisabled() throws Throwable {
             recordCount++;
         }
         reader.close();
-        assertEquals("The number of records in the file does not match", 1, recordCount);
+        assertEquals(1, recordCount, () -> "The number of records in the file does not match");
     }
 
     @Override
@@ -148,27 +149,29 @@ protected Offset getOffset(long offset) {
 
     @Override
     protected void checkData(Struct record, long index) {
-        assertEquals((int) (Integer) record.get(FIELD_INTEGER), index);
-        assertEquals((long) (Long) record.get(FIELD_LONG), Long.MAX_VALUE);
-        assertTrue(record.get(FIELD_STRING).toString().startsWith(index + "_"));
-        assertTrue(Boolean.parseBoolean(record.get(FIELD_BOOLEAN).toString()));
-        assertEquals((Double) record.get(FIELD_DECIMAL), Double.parseDouble(index + "." + index), 0);
-        assertNull(record.get(FIELD_NULL));
-        assertNotNull(record.schema().field(FIELD_NULL));
-        assertEquals(record.get(FIELD_ARRAY), Arrays.asList("elm[" + index + "]", "elm[" + index + "]"));
         Struct subrecord = record.getStruct(FIELD_STRUCT);
-        assertEquals((int) (Integer) subrecord.get(FIELD_INTEGER), index);
-        assertEquals((long) (Long) subrecord.get(FIELD_LONG), Long.MAX_VALUE);
-        assertTrue(subrecord.get(FIELD_STRING).toString().startsWith(index + "_"));
-        assertTrue(Boolean.parseBoolean(subrecord.get(FIELD_BOOLEAN).toString()));
-        assertEquals((Double) subrecord.get(FIELD_DECIMAL), Double.parseDouble(index + "." + index), 0);
-        assertNull(subrecord.get(FIELD_NULL));
-        assertNotNull(subrecord.schema().field(FIELD_NULL));
+        assertAll(
+                () -> assertEquals((int) (Integer) record.get(FIELD_INTEGER), index),
+                () -> assertEquals((long) (Long) record.get(FIELD_LONG), Long.MAX_VALUE),
+                () -> assertTrue(record.get(FIELD_STRING).toString().startsWith(index + "_")),
+                () -> assertTrue(Boolean.parseBoolean(record.get(FIELD_BOOLEAN).toString())),
+                () -> assertEquals((Double) record.get(FIELD_DECIMAL), Double.parseDouble(index + "." + index), 0),
+                () -> assertNull(record.get(FIELD_NULL)),
+                () -> assertNotNull(record.schema().field(FIELD_NULL)),
+                () -> assertEquals(record.get(FIELD_ARRAY), Arrays.asList("elm[" + index + "]", "elm[" + index + "]")),
+                () -> assertEquals((int) (Integer) subrecord.get(FIELD_INTEGER), index),
+                () -> assertEquals((long) (Long) subrecord.get(FIELD_LONG), Long.MAX_VALUE),
+                () -> assertTrue(subrecord.get(FIELD_STRING).toString().startsWith(index + "_")),
+                () -> assertTrue(Boolean.parseBoolean(subrecord.get(FIELD_BOOLEAN).toString())),
+                () -> assertEquals((Double) subrecord.get(FIELD_DECIMAL), Double.parseDouble(index + "." + index), 0),
+                () -> assertNull(subrecord.get(FIELD_NULL)),
+                () -> assertNotNull(subrecord.schema().field(FIELD_NULL))
+        );
+
     }
 
     @Override
     protected String getFileExtension() {
         return FILE_EXTENSION;
     }
-
 }
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/LocalFileReaderTestBase.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/LocalFileReaderTestBase.java
index 6589e92..f08bff7 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/LocalFileReaderTestBase.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/LocalFileReaderTestBase.java
@@ -4,8 +4,8 @@
 import org.apache.commons.io.FileUtils;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.BeforeAll;
 
 import java.io.IOException;
 import java.nio.file.Files;
@@ -15,16 +15,15 @@ public abstract class LocalFileReaderTestBase extends FileReaderTestBase {
 
     private static Path localDir;
 
-    @BeforeClass
+    @BeforeAll
     public static void initFs() throws IOException {
         localDir = Files.createTempDirectory("test-");
         fsUri = localDir.toUri();
         fs = FileSystem.newInstance(fsUri, new Configuration());
     }
 
-    @AfterClass
+    @AfterAll
     public static void finishFs() throws IOException {
         FileUtils.deleteDirectory(localDir.toFile());
     }
-
 }
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/ParquetFileReaderTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/ParquetFileReaderTest.java
index da23677..41060c6 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/ParquetFileReaderTest.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/ParquetFileReaderTest.java
@@ -18,18 +18,19 @@
 import org.apache.parquet.hadoop.ParquetFileWriter;
 import org.apache.parquet.hadoop.ParquetWriter;
 import org.apache.parquet.io.InvalidRecordException;
-import org.junit.BeforeClass;
-import org.junit.Ignore;
-import org.junit.Test;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
 
+import java.io.BufferedWriter;
 import java.io.File;
+import java.io.FileWriter;
 import java.io.IOException;
 import java.util.HashMap;
 import java.util.Map;
 import java.util.UUID;
 import java.util.stream.IntStream;
 
-import static org.junit.Assert.*;
+import static org.junit.jupiter.api.Assertions.*;
 
 public class ParquetFileReaderTest extends LocalFileReaderTestBase {
 
@@ -41,7 +42,7 @@ public class ParquetFileReaderTest extends LocalFileReaderTestBase {
     private static Schema readerSchema;
     private static Schema projectionSchema;
 
-    @BeforeClass
+    @BeforeAll
     public static void setUp() throws IOException {
         readerClass = AgnosticFileReader.class;
         dataFile = createDataFile();
@@ -77,16 +78,23 @@ private static Path createDataFile() throws IOException {
         return path;
     }
 
-    @Ignore(value = "This test does not apply for parquet files")
-    @Test(expected = IOException.class)
+    @Test
     public void emptyFile() throws Throwable {
-        super.emptyFile();
+        File tmp = File.createTempFile("test-", "." + getFileExtension());
+        Path path = new Path(new Path(fsUri), tmp.getName());
+        fs.moveFromLocalFile(new Path(tmp.getAbsolutePath()), path);
+        getReader(fs, path, readerConfig);
     }
 
-    @Ignore(value = "This test does not apply for parquet files")
-    @Test(expected = IOException.class)
+    @Test
     public void invalidFileFormat() throws Throwable {
-        super.invalidFileFormat();
+        File tmp = File.createTempFile("test-", "." + getFileExtension());
+        try (BufferedWriter writer = new BufferedWriter(new FileWriter(tmp))) {
+            writer.write("test");
+        }
+        Path path = new Path(new Path(fsUri), tmp.getName());
+        fs.moveFromLocalFile(new Path(tmp.getAbsolutePath()), path);
+        getReader(fs, path, readerConfig);
     }
 
     @Test
@@ -99,7 +107,7 @@ public void readerWithSchema() throws Throwable {
         readAllData();
     }
 
-    @Test(expected = DataException.class)
+    @Test
     public void readerWithProjection() throws Throwable {
         Map<String, Object> cfg = new HashMap<String, Object>() {{
             put(ParquetFileReader.FILE_READER_PARQUET_PROJECTION, projectionSchema.toString());
@@ -114,10 +122,10 @@ public void readerWithProjection() throws Throwable {
         }
 
         reader = getReader(FileSystem.newInstance(fsUri, new Configuration()), dataFile, cfg);
-        readAllData();
+        assertThrows(DataException.class, this::readAllData);
     }
 
-    @Test(expected = InvalidRecordException.class)
+    @Test
     public void readerWithInvalidProjection() throws Throwable {
         Schema testSchema = SchemaBuilder.record("test_projection").namespace("test.avro")
                 .fields()
@@ -128,26 +136,27 @@ public void readerWithInvalidProjection() throws Throwable {
             put(AgnosticFileReader.FILE_READER_AGNOSTIC_EXTENSIONS_PARQUET, getFileExtension());
         }};
         reader = getReader(FileSystem.newInstance(fsUri, new Configuration()), dataFile, cfg);
-        readAllData();
+        assertThrows(InvalidRecordException.class, this::readAllData);
     }
 
-    @Test(expected = AvroRuntimeException.class)
+    @Test
     public void readerWithInvalidSchema() throws Throwable {
         Map<String, Object> cfg = new HashMap<String, Object>() {{
             put(ParquetFileReader.FILE_READER_PARQUET_SCHEMA, Schema.create(Schema.Type.STRING).toString());
             put(AgnosticFileReader.FILE_READER_AGNOSTIC_EXTENSIONS_PARQUET, getFileExtension());
         }};
         reader = getReader(FileSystem.newInstance(fsUri, new Configuration()), dataFile, cfg);
-        readAllData();
+        assertThrows(AvroRuntimeException.class, this::readAllData);
     }
 
-    @Test(expected = SchemaParseException.class)
-    public void readerWithUnparseableSchema() throws Throwable {
+    @Test
+    public void readerWithUnparseableSchema() {
         Map<String, Object> cfg = new HashMap<String, Object>() {{
             put(ParquetFileReader.FILE_READER_PARQUET_SCHEMA, "invalid schema");
             put(AgnosticFileReader.FILE_READER_AGNOSTIC_EXTENSIONS_PARQUET, getFileExtension());
         }};
-        getReader(FileSystem.newInstance(fsUri, new Configuration()), dataFile, cfg);
+        assertThrows(SchemaParseException.class, () ->
+                getReader(FileSystem.newInstance(fsUri, new Configuration()), dataFile, cfg));
     }
 
     @Override
@@ -166,5 +175,4 @@ protected void checkData(Struct record, long index) {
     protected String getFileExtension() {
         return FILE_EXTENSION;
     }
-
 }
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/SequenceFileReaderTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/SequenceFileReaderTest.java
index 48c4c4e..411f647 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/SequenceFileReaderTest.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/SequenceFileReaderTest.java
@@ -10,8 +10,8 @@
 import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.util.ReflectionUtils;
 import org.apache.kafka.connect.data.Struct;
-import org.junit.BeforeClass;
-import org.junit.Test;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
 
 import java.io.File;
 import java.io.IOException;
@@ -20,8 +20,7 @@
 import java.util.UUID;
 import java.util.stream.IntStream;
 
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
+import static org.junit.jupiter.api.Assertions.*;
 
 public class SequenceFileReaderTest extends LocalFileReaderTestBase {
 
@@ -29,7 +28,7 @@ public class SequenceFileReaderTest extends LocalFileReaderTestBase {
     private static final String FIELD_NAME_VALUE = "custom_field_name";
     private static final String FILE_EXTENSION = "sq";
 
-    @BeforeClass
+    @BeforeAll
     public static void setUp() throws IOException {
         readerClass = AgnosticFileReader.class;
         dataFile = createDataFile();
@@ -87,7 +86,7 @@ public void defaultFieldNames() throws Throwable {
             checkData(SequenceFileReader.FIELD_NAME_KEY_DEFAULT, SequenceFileReader.FIELD_NAME_VALUE_DEFAULT, record, recordCount);
             recordCount++;
         }
-        assertEquals("The number of records in the file does not match", NUM_RECORDS, recordCount);
+        assertEquals(NUM_RECORDS, recordCount, () -> "The number of records in the file does not match");
     }
 
     @Override
@@ -101,8 +100,10 @@ protected void checkData(Struct record, long index) {
     }
 
     private void checkData(String keyFieldName, String valueFieldName, Struct record, long index) {
-        assertEquals((int) (Integer) record.get(keyFieldName), index);
-        assertTrue(record.get(valueFieldName).toString().startsWith(index + "_"));
+        assertAll(
+                () -> assertEquals((int) (Integer) record.get(keyFieldName), index),
+                () -> assertTrue(record.get(valueFieldName).toString().startsWith(index + "_"))
+        );
     }
 
     @Override
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/TextFileReaderTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/TextFileReaderTest.java
index 7de8414..a605b9f 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/TextFileReaderTest.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/TextFileReaderTest.java
@@ -6,10 +6,10 @@
 import com.github.mmolimar.kafka.connect.fs.file.reader.TextFileReader;
 import org.apache.hadoop.fs.Path;
 import org.apache.kafka.connect.data.Struct;
-import org.junit.BeforeClass;
-import org.junit.Ignore;
-import org.junit.Test;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
 
+import java.io.BufferedWriter;
 import java.io.File;
 import java.io.FileWriter;
 import java.io.IOException;
@@ -19,15 +19,14 @@
 import java.util.UUID;
 import java.util.stream.IntStream;
 
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
+import static org.junit.jupiter.api.Assertions.*;
 
 public class TextFileReaderTest extends LocalFileReaderTestBase {
 
     private static final String FIELD_NAME_VALUE = "custom_field_name";
     private static final String FILE_EXTENSION = "txt";
 
-    @BeforeClass
+    @BeforeAll
     public static void setUp() throws IOException {
         readerClass = AgnosticFileReader.class;
         dataFile = createDataFile();
@@ -55,16 +54,23 @@ private static Path createDataFile() throws IOException {
         return path;
     }
 
-    @Ignore(value = "This test does not apply for txt files")
-    @Test(expected = IOException.class)
+    @Test
     public void emptyFile() throws Throwable {
-        super.emptyFile();
+        File tmp = File.createTempFile("test-", "." + getFileExtension());
+        Path path = new Path(new Path(fsUri), tmp.getName());
+        fs.moveFromLocalFile(new Path(tmp.getAbsolutePath()), path);
+        getReader(fs, path, readerConfig);
     }
 
-    @Ignore(value = "This test does not apply for txt files")
-    @Test(expected = IOException.class)
+    @Test
     public void invalidFileFormat() throws Throwable {
-        super.invalidFileFormat();
+        File tmp = File.createTempFile("test-", "." + getFileExtension());
+        try (BufferedWriter writer = new BufferedWriter(new FileWriter(tmp))) {
+            writer.write("test");
+        }
+        Path path = new Path(new Path(fsUri), tmp.getName());
+        fs.moveFromLocalFile(new Path(tmp.getAbsolutePath()), path);
+        getReader(fs, path, readerConfig);
     }
 
     @Test
@@ -77,13 +83,13 @@ public void validFileEncoding() throws Throwable {
         readAllData();
     }
 
-    @Test(expected = UnsupportedCharsetException.class)
-    public void invalidFileEncoding() throws Throwable {
+    @Test
+    public void invalidFileEncoding() {
         Map<String, Object> cfg = new HashMap<String, Object>() {{
             put(TextFileReader.FILE_READER_TEXT_FIELD_NAME_VALUE, FIELD_NAME_VALUE);
             put(TextFileReader.FILE_READER_TEXT_ENCODING, "invalid_charset");
         }};
-        getReader(fs, dataFile, cfg);
+        assertThrows(UnsupportedCharsetException.class, () -> getReader(fs, dataFile, cfg));
     }
 
     @Test
@@ -103,7 +109,7 @@ public void readDataWithRecordPerLineDisabled() throws Throwable {
             recordCount++;
         }
         reader.close();
-        assertEquals("The number of records in the file does not match", 1, recordCount);
+        assertEquals(1, recordCount, () -> "The number of records in the file does not match");
     }
 
     @Override
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/PolicyTestBase.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/PolicyTestBase.java
index 4f2bc24..8c9eba9 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/PolicyTestBase.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/PolicyTestBase.java
@@ -3,27 +3,23 @@
 import com.github.mmolimar.kafka.connect.fs.FsSourceTaskConfig;
 import com.github.mmolimar.kafka.connect.fs.file.FileMetadata;
 import com.github.mmolimar.kafka.connect.fs.util.ReflectionUtils;
-import org.apache.commons.collections.map.HashedMap;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.kafka.common.config.ConfigException;
 import org.apache.kafka.connect.errors.IllegalWorkerStateException;
-import org.junit.After;
-import org.junit.AfterClass;
-import org.junit.Before;
-import org.junit.Test;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
 
 import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.net.URI;
 import java.time.LocalDateTime;
 import java.time.format.DateTimeFormatter;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.NoSuchElementException;
+import java.util.*;
 
-import static org.junit.Assert.*;
+import static org.junit.jupiter.api.Assertions.*;
 
 public abstract class PolicyTestBase {
 
@@ -33,19 +29,19 @@ public abstract class PolicyTestBase {
     protected static FsSourceTaskConfig taskConfig;
     protected static URI fsUri;
 
-    @AfterClass
+    @AfterAll
     public static void tearDown() throws Exception {
         policy.close();
         fs.close();
     }
 
-    @Before
+    @BeforeEach
     public void initPolicy() throws Throwable {
-        policy = ReflectionUtils.makePolicy((Class<? extends Policy>) taskConfig.getClass(FsSourceTaskConfig.POLICY_CLASS),
-                taskConfig);
+        policy = ReflectionUtils.makePolicy(
+                (Class<? extends Policy>) taskConfig.getClass(FsSourceTaskConfig.POLICY_CLASS), taskConfig);
     }
 
-    @After
+    @AfterEach
     public void cleanDirs() throws IOException {
         for (Path dir : directories) {
             fs.delete(dir, true);
@@ -54,15 +50,17 @@ public void cleanDirs() throws IOException {
         policy.close();
     }
 
-    @Test(expected = IllegalArgumentException.class)
-    public void invalidArgs() throws Exception {
-        taskConfig.getClass(FsSourceTaskConfig.POLICY_CLASS).getConstructor(taskConfig.getClass()).newInstance(null);
+    @Test
+    public void invalidArgs() {
+        assertThrows(IllegalArgumentException.class, () -> taskConfig.getClass(FsSourceTaskConfig.POLICY_CLASS)
+                .getConstructor(taskConfig.getClass()).newInstance(null));
     }
 
-    @Test(expected = ConfigException.class)
-    public void invalidConfig() throws Throwable {
-        ReflectionUtils.makePolicy((Class<? extends Policy>) taskConfig.getClass(FsSourceTaskConfig.POLICY_CLASS),
-                new FsSourceTaskConfig(new HashedMap()));
+    @Test
+    public void invalidConfig() {
+        assertThrows(ConfigException.class, () -> ReflectionUtils.makePolicy(
+                (Class<? extends Policy>) taskConfig.getClass(FsSourceTaskConfig.POLICY_CLASS),
+                new FsSourceTaskConfig(new HashMap<>())));
     }
 
     @Test
@@ -72,13 +70,13 @@ public void interruptPolicy() throws Throwable {
         assertTrue(policy.hasEnded());
     }
 
-    @Test(expected = FileNotFoundException.class)
+    @Test
     public void invalidDirectory() throws IOException {
         for (Path dir : directories) {
             fs.delete(dir, true);
         }
         try {
-            policy.execute();
+            assertThrows(FileNotFoundException.class, () -> policy.execute());
         } finally {
             for (Path dir : directories) {
                 fs.mkdirs(dir);
@@ -86,19 +84,19 @@ public void invalidDirectory() throws IOException {
         }
     }
 
-    @Test(expected = NoSuchElementException.class)
+    @Test
     public void listEmptyDirectories() throws IOException {
         Iterator<FileMetadata> it = policy.execute();
         assertFalse(it.hasNext());
-        it.next();
+        assertThrows(NoSuchElementException.class, it::next);
     }
 
     @Test
     public void oneFilePerFs() throws IOException, InterruptedException {
         for (Path dir : directories) {
-            fs.createNewFile(new Path(dir, String.valueOf(System.nanoTime() + ".txt")));
+            fs.createNewFile(new Path(dir, System.nanoTime() + ".txt"));
             //this file does not match the regexp
-            fs.createNewFile(new Path(dir, String.valueOf(System.nanoTime()) + ".invalid"));
+            fs.createNewFile(new Path(dir, System.nanoTime() + ".invalid"));
         }
         //we wait till FS has registered the files
         Thread.sleep(500);
@@ -116,9 +114,9 @@ public void recursiveDirectory() throws IOException, InterruptedException {
         for (Path dir : directories) {
             Path tmpDir = new Path(dir, String.valueOf(System.nanoTime()));
             fs.mkdirs(tmpDir);
-            fs.createNewFile(new Path(tmpDir, String.valueOf(System.nanoTime() + ".txt")));
+            fs.createNewFile(new Path(tmpDir, System.nanoTime() + ".txt"));
             //this file does not match the regexp
-            fs.createNewFile(new Path(tmpDir, String.valueOf(System.nanoTime()) + ".invalid"));
+            fs.createNewFile(new Path(tmpDir, System.nanoTime() + ".invalid"));
         }
         //we wait till FS has registered the files
         Thread.sleep(500);
@@ -137,11 +135,11 @@ public void hasEnded() throws IOException {
         assertTrue(policy.hasEnded());
     }
 
-    @Test(expected = IllegalWorkerStateException.class)
+    @Test
     public void execPolicyAlreadyEnded() throws IOException {
         policy.execute();
         assertTrue(policy.hasEnded());
-        policy.execute();
+        assertThrows(IllegalWorkerStateException.class, () -> policy.execute());
     }
 
     @Test
@@ -151,8 +149,8 @@ public void dynamicURIs() throws Throwable {
         Map<String, String> originals = taskConfig.originalsStrings();
         originals.put(FsSourceTaskConfig.FS_URIS, dynamic.toString());
         FsSourceTaskConfig cfg = new FsSourceTaskConfig(originals);
-        policy = ReflectionUtils.makePolicy((Class<? extends Policy>) taskConfig.getClass(FsSourceTaskConfig.POLICY_CLASS),
-                cfg);
+        policy = ReflectionUtils.makePolicy(
+                (Class<? extends Policy>) taskConfig.getClass(FsSourceTaskConfig.POLICY_CLASS), cfg);
         assertEquals(1, policy.getURIs().size());
 
         LocalDateTime dateTime = LocalDateTime.now();
@@ -168,18 +166,16 @@ public void dynamicURIs() throws Throwable {
         formatter = DateTimeFormatter.ofPattern("W");
         uri.append(dateTime.format(formatter));
         assertTrue(policy.getURIs().get(0).endsWith(uri.toString()));
-
     }
 
-    @Test(expected = IllegalArgumentException.class)
+    @Test
     public void invalidDynamicURIs() throws Throwable {
         Path dynamic = new Path(fsUri.toString(), "${yyyy}/${MM}/${mmmmmmm}");
         fs.create(dynamic);
         Map<String, String> originals = taskConfig.originalsStrings();
         originals.put(FsSourceTaskConfig.FS_URIS, dynamic.toString());
         FsSourceTaskConfig cfg = new FsSourceTaskConfig(originals);
-        policy = ReflectionUtils.makePolicy((Class<? extends Policy>) taskConfig.getClass(FsSourceTaskConfig.POLICY_CLASS),
-                cfg);
+        assertThrows(IllegalArgumentException.class, () -> ReflectionUtils.makePolicy(
+                (Class<? extends Policy>) taskConfig.getClass(FsSourceTaskConfig.POLICY_CLASS), cfg));
     }
-
 }
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/hdfs/HdfsFileWatcherPolicyTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/hdfs/HdfsFileWatcherPolicyTest.java
index 0c32830..dca39be 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/hdfs/HdfsFileWatcherPolicyTest.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/hdfs/HdfsFileWatcherPolicyTest.java
@@ -5,8 +5,8 @@
 import com.github.mmolimar.kafka.connect.fs.policy.HdfsFileWatcherPolicy;
 import org.apache.hadoop.fs.Path;
 import org.apache.kafka.connect.errors.IllegalWorkerStateException;
-import org.junit.BeforeClass;
-import org.junit.Test;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
 
 import java.io.IOException;
 import java.util.ArrayList;
@@ -14,12 +14,11 @@
 import java.util.Map;
 import java.util.UUID;
 
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertTrue;
+import static org.junit.jupiter.api.Assertions.*;
 
 public class HdfsFileWatcherPolicyTest extends HdfsPolicyTestBase {
 
-    @BeforeClass
+    @BeforeAll
     public static void setUp() throws IOException {
         directories = new ArrayList<Path>() {{
             add(new Path(fsUri.toString(), UUID.randomUUID().toString()));
@@ -38,7 +37,7 @@ public static void setUp() throws IOException {
             put(FsSourceTaskConfig.FILE_READER_CLASS, TextFileReader.class.getName());
             put(FsSourceTaskConfig.POLICY_REGEXP, "^[0-9]*\\.txt$");
             put(FsSourceTaskConfig.POLICY_PREFIX_FS + "dfs.data.dir", "test");
-            put(FsSourceTaskConfig.POLICY_PREFIX_FS + "fs.default.name", "test");
+            put(FsSourceTaskConfig.POLICY_PREFIX_FS + "fs.default.name", "hdfs://test");
         }};
         taskConfig = new FsSourceTaskConfig(cfg);
     }
@@ -47,7 +46,16 @@ public static void setUp() throws IOException {
     @Test
     @Override
     public void invalidDirectory() throws IOException {
-        super.invalidDirectory();
+        for (Path dir : directories) {
+            fs.delete(dir, true);
+        }
+        try {
+            policy.execute();
+        } finally {
+            for (Path dir : directories) {
+                fs.mkdirs(dir);
+            }
+        }
     }
 
     //This policy never ends at least all watchers die
@@ -61,13 +69,13 @@ public void hasEnded() throws IOException {
     }
 
     //This policy never ends. We have to interrupt it
-    @Test(expected = IllegalWorkerStateException.class)
+    @Test
     @Override
     public void execPolicyAlreadyEnded() throws IOException {
         policy.execute();
         assertFalse(policy.hasEnded());
         policy.interrupt();
         assertTrue(policy.hasEnded());
-        policy.execute();
+        assertThrows(IllegalWorkerStateException.class, () -> policy.execute());
     }
 }
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/hdfs/HdfsPolicyTestBase.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/hdfs/HdfsPolicyTestBase.java
index d046d0b..522d1de 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/hdfs/HdfsPolicyTestBase.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/hdfs/HdfsPolicyTestBase.java
@@ -4,8 +4,8 @@
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.BeforeAll;
 
 import java.io.IOException;
 import java.net.URI;
@@ -16,7 +16,7 @@ public abstract class HdfsPolicyTestBase extends PolicyTestBase {
 
     private static MiniDFSCluster cluster;
 
-    @BeforeClass
+    @BeforeAll
     public static void initFs() throws IOException {
         Configuration clusterConfig = new Configuration();
         Path hdfsDir = Files.createTempDirectory("test-");
@@ -26,8 +26,8 @@ public static void initFs() throws IOException {
         fs = FileSystem.newInstance(fsUri, new Configuration());
     }
 
-    @AfterClass
-    public static void finishFs() throws Exception {
+    @AfterAll
+    public static void finishFs() {
         cluster.shutdown(true);
     }
 }
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/hdfs/SimplePolicyTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/hdfs/SimplePolicyTest.java
index 33ebe28..5e0eb7f 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/hdfs/SimplePolicyTest.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/hdfs/SimplePolicyTest.java
@@ -4,7 +4,7 @@
 import com.github.mmolimar.kafka.connect.fs.file.reader.TextFileReader;
 import com.github.mmolimar.kafka.connect.fs.policy.SimplePolicy;
 import org.apache.hadoop.fs.Path;
-import org.junit.BeforeClass;
+import org.junit.jupiter.api.BeforeAll;
 
 import java.io.IOException;
 import java.util.ArrayList;
@@ -14,7 +14,7 @@
 
 public class SimplePolicyTest extends HdfsPolicyTestBase {
 
-    @BeforeClass
+    @BeforeAll
     public static void setUp() throws IOException {
         directories = new ArrayList<Path>() {{
             add(new Path(fsUri.toString(), UUID.randomUUID().toString()));
@@ -33,7 +33,7 @@ public static void setUp() throws IOException {
             put(FsSourceTaskConfig.FILE_READER_CLASS, TextFileReader.class.getName());
             put(FsSourceTaskConfig.POLICY_REGEXP, "^[0-9]*\\.txt$");
             put(FsSourceTaskConfig.POLICY_PREFIX_FS + "dfs.data.dir", "test");
-            put(FsSourceTaskConfig.POLICY_PREFIX_FS + "fs.default.name", "test");
+            put(FsSourceTaskConfig.POLICY_PREFIX_FS + "fs.default.name", "hdfs://test");
         }};
         taskConfig = new FsSourceTaskConfig(cfg);
     }
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/hdfs/SleepyPolicyTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/hdfs/SleepyPolicyTest.java
index 77d85a6..d47faae 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/hdfs/SleepyPolicyTest.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/hdfs/SleepyPolicyTest.java
@@ -7,8 +7,8 @@
 import com.github.mmolimar.kafka.connect.fs.util.ReflectionUtils;
 import org.apache.hadoop.fs.Path;
 import org.apache.kafka.common.config.ConfigException;
-import org.junit.BeforeClass;
-import org.junit.Test;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
 
 import java.io.IOException;
 import java.util.ArrayList;
@@ -16,12 +16,11 @@
 import java.util.Map;
 import java.util.UUID;
 
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertTrue;
+import static org.junit.jupiter.api.Assertions.*;
 
 public class SleepyPolicyTest extends HdfsPolicyTestBase {
 
-    @BeforeClass
+    @BeforeAll
     public static void setUp() throws IOException {
         directories = new ArrayList<Path>() {{
             add(new Path(fsUri.toString(), UUID.randomUUID().toString()));
@@ -40,35 +39,38 @@ public static void setUp() throws IOException {
             put(FsSourceTaskConfig.FILE_READER_CLASS, TextFileReader.class.getName());
             put(FsSourceTaskConfig.POLICY_REGEXP, "^[0-9]*\\.txt$");
             put(FsSourceTaskConfig.POLICY_PREFIX_FS + "dfs.data.dir", "test");
-            put(FsSourceTaskConfig.POLICY_PREFIX_FS + "fs.default.name", "test");
+            put(FsSourceTaskConfig.POLICY_PREFIX_FS + "fs.default.name", "hdfs://test");
             put(SleepyPolicy.SLEEPY_POLICY_SLEEP_MS, "100");
             put(SleepyPolicy.SLEEPY_POLICY_MAX_EXECS, "1");
         }};
         taskConfig = new FsSourceTaskConfig(cfg);
     }
 
-    @Test(expected = ConfigException.class)
-    public void invalidSleepTime() throws Throwable {
+    @Test
+    public void invalidSleepTime() {
         Map<String, String> originals = taskConfig.originalsStrings();
         originals.put(SleepyPolicy.SLEEPY_POLICY_SLEEP_MS, "invalid");
         FsSourceTaskConfig cfg = new FsSourceTaskConfig(originals);
-        ReflectionUtils.makePolicy((Class<? extends Policy>) taskConfig.getClass(FsSourceTaskConfig.POLICY_CLASS), cfg);
+        assertThrows(ConfigException.class, () -> ReflectionUtils.makePolicy(
+                (Class<? extends Policy>) taskConfig.getClass(FsSourceTaskConfig.POLICY_CLASS), cfg));
     }
 
-    @Test(expected = ConfigException.class)
-    public void invalidMaxExecs() throws Throwable {
+    @Test
+    public void invalidMaxExecs() {
         Map<String, String> originals = taskConfig.originalsStrings();
         originals.put(SleepyPolicy.SLEEPY_POLICY_MAX_EXECS, "invalid");
         FsSourceTaskConfig cfg = new FsSourceTaskConfig(originals);
-        ReflectionUtils.makePolicy((Class<? extends Policy>) taskConfig.getClass(FsSourceTaskConfig.POLICY_CLASS), cfg);
+        assertThrows(ConfigException.class, () -> ReflectionUtils.makePolicy(
+                (Class<? extends Policy>) taskConfig.getClass(FsSourceTaskConfig.POLICY_CLASS), cfg));
     }
 
-    @Test(expected = ConfigException.class)
-    public void invalidSleepFraction() throws Throwable {
+    @Test
+    public void invalidSleepFraction() {
         Map<String, String> originals = taskConfig.originalsStrings();
         originals.put(SleepyPolicy.SLEEPY_POLICY_SLEEP_FRACTION, "invalid");
         FsSourceTaskConfig cfg = new FsSourceTaskConfig(originals);
-        ReflectionUtils.makePolicy((Class<? extends Policy>) taskConfig.getClass(FsSourceTaskConfig.POLICY_CLASS), cfg);
+        assertThrows(ConfigException.class, () -> ReflectionUtils.makePolicy(
+                (Class<? extends Policy>) taskConfig.getClass(FsSourceTaskConfig.POLICY_CLASS), cfg));
     }
 
     @Test
@@ -78,8 +80,8 @@ public void sleepExecution() throws Throwable {
         tConfig.put(SleepyPolicy.SLEEPY_POLICY_MAX_EXECS, "2");
         FsSourceTaskConfig sleepConfig = new FsSourceTaskConfig(tConfig);
 
-        policy = ReflectionUtils.makePolicy((Class<? extends Policy>) taskConfig.getClass(FsSourceTaskConfig.POLICY_CLASS),
-                sleepConfig);
+        policy = ReflectionUtils.makePolicy(
+                (Class<? extends Policy>) taskConfig.getClass(FsSourceTaskConfig.POLICY_CLASS), sleepConfig);
         assertFalse(policy.hasEnded());
         policy.execute();
         assertFalse(policy.hasEnded());
@@ -94,8 +96,8 @@ public void defaultExecutions() throws Throwable {
         tConfig.remove(SleepyPolicy.SLEEPY_POLICY_MAX_EXECS);
         FsSourceTaskConfig sleepConfig = new FsSourceTaskConfig(tConfig);
 
-        policy = ReflectionUtils.makePolicy((Class<? extends Policy>) taskConfig.getClass(FsSourceTaskConfig.POLICY_CLASS),
-                sleepConfig);
+        policy = ReflectionUtils.makePolicy(
+                (Class<? extends Policy>) taskConfig.getClass(FsSourceTaskConfig.POLICY_CLASS), sleepConfig);
 
         //it never ends
         for (int i = 0; i < 100; i++) {
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/local/LocalPolicyTestBase.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/local/LocalPolicyTestBase.java
index 6aa4cd5..8c12b3a 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/local/LocalPolicyTestBase.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/local/LocalPolicyTestBase.java
@@ -4,8 +4,8 @@
 import org.apache.commons.io.FileUtils;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.BeforeAll;
 
 import java.io.IOException;
 import java.nio.file.Files;
@@ -15,14 +15,14 @@ public abstract class LocalPolicyTestBase extends PolicyTestBase {
 
     private static Path localDir;
 
-    @BeforeClass
+    @BeforeAll
     public static void initFs() throws IOException {
         localDir = Files.createTempDirectory("test-");
         fsUri = localDir.toUri();
         fs = FileSystem.newInstance(fsUri, new Configuration());
     }
 
-    @AfterClass
+    @AfterAll
     public static void finishFs() throws IOException {
         FileUtils.deleteDirectory(localDir.toFile());
     }
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/local/SimplePolicyTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/local/SimplePolicyTest.java
index c8a221a..2de53e6 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/local/SimplePolicyTest.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/local/SimplePolicyTest.java
@@ -4,7 +4,7 @@
 import com.github.mmolimar.kafka.connect.fs.file.reader.TextFileReader;
 import com.github.mmolimar.kafka.connect.fs.policy.SimplePolicy;
 import org.apache.hadoop.fs.Path;
-import org.junit.BeforeClass;
+import org.junit.jupiter.api.BeforeAll;
 
 import java.io.IOException;
 import java.util.ArrayList;
@@ -14,7 +14,7 @@
 
 public class SimplePolicyTest extends LocalPolicyTestBase {
 
-    @BeforeClass
+    @BeforeAll
     public static void setUp() throws IOException {
         directories = new ArrayList<Path>() {{
             add(new Path(fsUri.toString(), UUID.randomUUID().toString()));
@@ -33,7 +33,7 @@ public static void setUp() throws IOException {
             put(FsSourceTaskConfig.FILE_READER_CLASS, TextFileReader.class.getName());
             put(FsSourceTaskConfig.POLICY_REGEXP, "^[0-9]*\\.txt$");
             put(FsSourceTaskConfig.POLICY_PREFIX_FS + "dfs.data.dir", "test");
-            put(FsSourceTaskConfig.POLICY_PREFIX_FS + "fs.default.name", "test");
+            put(FsSourceTaskConfig.POLICY_PREFIX_FS + "fs.default.name", "hdfs://test/");
         }};
         taskConfig = new FsSourceTaskConfig(cfg);
     }
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/local/SleepyPolicyTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/local/SleepyPolicyTest.java
index be6c58b..93c9f09 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/local/SleepyPolicyTest.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/local/SleepyPolicyTest.java
@@ -7,8 +7,8 @@
 import com.github.mmolimar.kafka.connect.fs.util.ReflectionUtils;
 import org.apache.hadoop.fs.Path;
 import org.apache.kafka.common.config.ConfigException;
-import org.junit.BeforeClass;
-import org.junit.Test;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
 
 import java.io.IOException;
 import java.util.ArrayList;
@@ -16,12 +16,11 @@
 import java.util.Map;
 import java.util.UUID;
 
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertTrue;
+import static org.junit.jupiter.api.Assertions.*;
 
 public class SleepyPolicyTest extends LocalPolicyTestBase {
 
-    @BeforeClass
+    @BeforeAll
     public static void setUp() throws IOException {
         directories = new ArrayList<Path>() {{
             add(new Path(fsUri.toString(), UUID.randomUUID().toString()));
@@ -40,35 +39,38 @@ public static void setUp() throws IOException {
             put(FsSourceTaskConfig.FILE_READER_CLASS, TextFileReader.class.getName());
             put(FsSourceTaskConfig.POLICY_REGEXP, "^[0-9]*\\.txt$");
             put(FsSourceTaskConfig.POLICY_PREFIX_FS + "dfs.data.dir", "test");
-            put(FsSourceTaskConfig.POLICY_PREFIX_FS + "fs.default.name", "test");
+            put(FsSourceTaskConfig.POLICY_PREFIX_FS + "fs.default.name", "hdfs://test");
             put(SleepyPolicy.SLEEPY_POLICY_SLEEP_MS, "100");
             put(SleepyPolicy.SLEEPY_POLICY_MAX_EXECS, "1");
         }};
         taskConfig = new FsSourceTaskConfig(cfg);
     }
 
-    @Test(expected = ConfigException.class)
-    public void invalidSleepTime() throws Throwable {
+    @Test
+    public void invalidSleepTime() {
         Map<String, String> originals = taskConfig.originalsStrings();
         originals.put(SleepyPolicy.SLEEPY_POLICY_SLEEP_MS, "invalid");
         FsSourceTaskConfig cfg = new FsSourceTaskConfig(originals);
-        ReflectionUtils.makePolicy((Class<? extends Policy>) taskConfig.getClass(FsSourceTaskConfig.POLICY_CLASS), cfg);
+        assertThrows(ConfigException.class, () -> ReflectionUtils.makePolicy(
+                (Class<? extends Policy>) taskConfig.getClass(FsSourceTaskConfig.POLICY_CLASS), cfg));
     }
 
-    @Test(expected = ConfigException.class)
-    public void invalidMaxExecs() throws Throwable {
+    @Test
+    public void invalidMaxExecs() {
         Map<String, String> originals = taskConfig.originalsStrings();
         originals.put(SleepyPolicy.SLEEPY_POLICY_MAX_EXECS, "invalid");
         FsSourceTaskConfig cfg = new FsSourceTaskConfig(originals);
-        ReflectionUtils.makePolicy((Class<? extends Policy>) taskConfig.getClass(FsSourceTaskConfig.POLICY_CLASS), cfg);
+        assertThrows(ConfigException.class, () -> ReflectionUtils.makePolicy(
+                (Class<? extends Policy>) taskConfig.getClass(FsSourceTaskConfig.POLICY_CLASS), cfg));
     }
 
-    @Test(expected = ConfigException.class)
-    public void invalidSleepFraction() throws Throwable {
+    @Test
+    public void invalidSleepFraction() {
         Map<String, String> originals = taskConfig.originalsStrings();
         originals.put(SleepyPolicy.SLEEPY_POLICY_SLEEP_FRACTION, "invalid");
         FsSourceTaskConfig cfg = new FsSourceTaskConfig(originals);
-        ReflectionUtils.makePolicy((Class<? extends Policy>) taskConfig.getClass(FsSourceTaskConfig.POLICY_CLASS), cfg);
+        assertThrows(ConfigException.class, () -> ReflectionUtils.makePolicy(
+                (Class<? extends Policy>) taskConfig.getClass(FsSourceTaskConfig.POLICY_CLASS), cfg));
     }
 
     @Test
@@ -78,8 +80,8 @@ public void sleepExecution() throws Throwable {
         tConfig.put(SleepyPolicy.SLEEPY_POLICY_MAX_EXECS, "2");
         FsSourceTaskConfig sleepConfig = new FsSourceTaskConfig(tConfig);
 
-        policy = ReflectionUtils.makePolicy((Class<? extends Policy>) taskConfig.getClass(FsSourceTaskConfig.POLICY_CLASS),
-                sleepConfig);
+        policy = ReflectionUtils.makePolicy(
+                (Class<? extends Policy>) taskConfig.getClass(FsSourceTaskConfig.POLICY_CLASS), sleepConfig);
         assertFalse(policy.hasEnded());
         policy.execute();
         assertFalse(policy.hasEnded());
@@ -94,8 +96,8 @@ public void defaultExecutions() throws Throwable {
         tConfig.remove(SleepyPolicy.SLEEPY_POLICY_MAX_EXECS);
         FsSourceTaskConfig sleepConfig = new FsSourceTaskConfig(tConfig);
 
-        policy = ReflectionUtils.makePolicy((Class<? extends Policy>) taskConfig.getClass(FsSourceTaskConfig.POLICY_CLASS),
-                sleepConfig);
+        policy = ReflectionUtils.makePolicy(
+                (Class<? extends Policy>) taskConfig.getClass(FsSourceTaskConfig.POLICY_CLASS), sleepConfig);
 
         //it never ends
         for (int i = 0; i < 100; i++) {
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/task/FsSourceTaskConfigTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/task/FsSourceTaskConfigTest.java
index 6b0e619..5506baf 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/task/FsSourceTaskConfigTest.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/task/FsSourceTaskConfigTest.java
@@ -3,10 +3,10 @@
 import com.github.mmolimar.kafka.connect.fs.FsSourceConnectorConfig;
 import com.github.mmolimar.kafka.connect.fs.FsSourceTaskConfig;
 import org.apache.kafka.common.config.ConfigDef;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
 
 public class FsSourceTaskConfigTest {
 
@@ -14,9 +14,9 @@ public class FsSourceTaskConfigTest {
     public void checkDocumentation() {
         ConfigDef config = FsSourceTaskConfig.conf();
         config.names().forEach(key -> {
-            assertFalse("Property " + key + " should be documented",
-                    config.configKeys().get(key).documentation == null ||
-                            "".equals(config.configKeys().get(key).documentation.trim()));
+            assertFalse(config.configKeys().get(key).documentation == null ||
+                            "".equals(config.configKeys().get(key).documentation.trim()),
+                    () -> "Property " + key + " should be documented");
         });
     }
 
@@ -24,4 +24,4 @@ public void checkDocumentation() {
     public void toRst() {
         assertNotNull(FsSourceConnectorConfig.conf().toRst());
     }
-}
\ No newline at end of file
+}
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/task/FsSourceTaskTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/task/FsSourceTaskTest.java
index f0fbacc..6d4a823 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/task/FsSourceTaskTest.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/task/FsSourceTaskTest.java
@@ -5,88 +5,86 @@
 import com.github.mmolimar.kafka.connect.fs.file.reader.TextFileReader;
 import com.github.mmolimar.kafka.connect.fs.policy.SimplePolicy;
 import org.apache.kafka.connect.errors.ConnectException;
-import org.junit.Before;
-import org.junit.ClassRule;
-import org.junit.Test;
-import org.junit.rules.TemporaryFolder;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;
 
 import java.io.File;
-import java.io.IOException;
 import java.util.HashMap;
 import java.util.Map;
 
-import static org.junit.Assert.*;
+import static org.junit.jupiter.api.Assertions.*;
 
 public class FsSourceTaskTest {
-    @ClassRule
-    public static final TemporaryFolder temporaryFolder = new TemporaryFolder();
+    @TempDir
+    public static File temporaryFolder;
 
     private FsSourceTask task;
     private Map<String, String> taskConfig;
 
-    @Before
-    public void setup() throws IOException {
+    @BeforeEach
+    public void setup() {
         task = new FsSourceTask();
 
         taskConfig = new HashMap<String, String>() {{
             put(FsSourceTaskConfig.FS_URIS, String.join(",",
-                    temporaryFolder.getRoot().toURI() + File.separator + "dir1",
-                    temporaryFolder.getRoot().toURI() + File.separator + "dir2",
-                    temporaryFolder.getRoot().toURI() + File.separator + "dir3"));
+                    temporaryFolder.toURI() + File.separator + "dir1",
+                    temporaryFolder.toURI() + File.separator + "dir2",
+                    temporaryFolder.toURI() + File.separator + "dir3"));
             put(FsSourceTaskConfig.TOPIC, "topic_test");
             put(FsSourceTaskConfig.POLICY_CLASS, SimplePolicy.class.getName());
             put(FsSourceTaskConfig.FILE_READER_CLASS, TextFileReader.class.getName());
         }};
     }
 
-    @Test(expected = ConnectException.class)
+    @Test
     public void nullProperties() {
-        task.start(null);
+        assertThrows(ConnectException.class, () -> task.start(null));
     }
 
-    @Test(expected = ConnectException.class)
+    @Test
     public void expectedFsUris() {
         Map<String, String> testProps = new HashMap<>(taskConfig);
         testProps.remove(FsSourceTaskConfig.FS_URIS);
-        task.start(testProps);
+        assertThrows(ConnectException.class, () -> task.start(testProps));
     }
 
-    @Test(expected = ConnectException.class)
+    @Test
     public void expectedPolicyClass() {
         Map<String, String> testProps = new HashMap<>(taskConfig);
         testProps.remove(FsSourceTaskConfig.POLICY_CLASS);
-        task.start(testProps);
+        assertThrows(ConnectException.class, () -> task.start(testProps));
     }
 
-    @Test(expected = ConnectException.class)
+    @Test
     public void invalidPolicyClass() {
         Map<String, String> testProps = new HashMap<>(taskConfig);
         testProps.put(FsSourceTaskConfig.POLICY_CLASS, Object.class.getName());
-        task.start(testProps);
+        assertThrows(ConnectException.class, () -> task.start(testProps));
     }
 
-    @Test(expected = ConnectException.class)
+    @Test
     public void expectedReaderClass() {
         Map<String, String> testProps = new HashMap<>(taskConfig);
         testProps.remove(FsSourceTaskConfig.FILE_READER_CLASS);
-        task.start(testProps);
+        assertThrows(ConnectException.class, () -> task.start(testProps));
     }
 
-    @Test(expected = ConnectException.class)
+    @Test
     public void invalidReaderClass() {
         Map<String, String> testProps = new HashMap<>(taskConfig);
         testProps.put(FsSourceTaskConfig.FILE_READER_CLASS, Object.class.getName());
-        task.start(testProps);
+        assertThrows(ConnectException.class, () -> task.start(testProps));
     }
 
     @Test
-    public void minimunConfig() {
+    public void minimumConfig() {
         task.start(taskConfig);
         task.stop();
     }
 
     @Test
-    public void pollWithoutStart() throws InterruptedException {
+    public void pollWithoutStart() {
         assertNull(task.poll());
         task.stop();
     }
@@ -96,5 +94,4 @@ public void checkVersion() {
         assertNotNull(task.version());
         assertFalse("unknown".equalsIgnoreCase(task.version()));
     }
-
-}
\ No newline at end of file
+}
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/task/FsSourceTaskTestBase.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/task/FsSourceTaskTestBase.java
index 192b756..22d388c 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/task/FsSourceTaskTestBase.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/task/FsSourceTaskTestBase.java
@@ -13,10 +13,10 @@
 import org.apache.kafka.connect.source.SourceTaskContext;
 import org.apache.kafka.connect.storage.OffsetStorageReader;
 import org.easymock.EasyMock;
-import org.junit.After;
-import org.junit.AfterClass;
-import org.junit.Before;
-import org.junit.Test;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
 import org.powermock.api.easymock.PowerMock;
 import org.powermock.api.support.membermodification.MemberModifier;
 
@@ -29,8 +29,8 @@
 import java.util.Map;
 import java.util.UUID;
 
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNull;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNull;
 
 public abstract class FsSourceTaskTestBase {
 
@@ -45,17 +45,17 @@ public abstract class FsSourceTaskTestBase {
     protected SourceTaskContext taskContext;
     protected OffsetStorageReader offsetStorageReader;
 
-    @AfterClass
+    @AfterAll
     public static void tearDown() throws Exception {
         fs.close();
     }
 
-    @Before
+    @BeforeEach
     public void initTask() {
         task = new FsSourceTask();
         taskConfig = new HashMap<String, String>() {{
-            String uris[] = directories.stream().map(dir -> dir.toString())
-                    .toArray(size -> new String[size]);
+            String[] uris = directories.stream().map(Path::toString)
+                    .toArray(String[]::new);
             put(FsSourceTaskConfig.FS_URIS, String.join(",", uris));
             put(FsSourceTaskConfig.TOPIC, "topic_test");
             put(FsSourceTaskConfig.POLICY_CLASS, SimplePolicy.class.getName());
@@ -92,7 +92,7 @@ public void initTask() {
 
     }
 
-    @After
+    @AfterEach
     public void cleanDirsAndStop() throws IOException {
         for (Path dir : directories) {
             fs.delete(dir, true);
@@ -102,7 +102,7 @@ public void cleanDirsAndStop() throws IOException {
     }
 
     @Test
-    public void pollNoData() throws InterruptedException {
+    public void pollNoData() {
         task.start(taskConfig);
         assertEquals(0, task.poll().size());
         //policy has ended
@@ -110,9 +110,9 @@ public void pollNoData() throws InterruptedException {
     }
 
     @Test
-    public void emptyFilesToProcess() throws IOException, InterruptedException {
+    public void emptyFilesToProcess() throws IOException {
         for (Path dir : directories) {
-            fs.createNewFile(new Path(dir, String.valueOf(System.nanoTime() + ".txt")));
+            fs.createNewFile(new Path(dir, System.nanoTime() + ".txt"));
             //this file does not match the regexp
             fs.createNewFile(new Path(dir, String.valueOf(System.nanoTime())));
         }
@@ -123,9 +123,9 @@ public void emptyFilesToProcess() throws IOException, InterruptedException {
     }
 
     @Test
-    public void oneFilePerFs() throws IOException, InterruptedException {
+    public void oneFilePerFs() throws IOException {
         for (Path dir : directories) {
-            Path dataFile = new Path(dir, String.valueOf(System.nanoTime() + ".txt"));
+            Path dataFile = new Path(dir, System.nanoTime() + ".txt");
             createDataFile(dataFile);
             //this file does not match the regexp
             fs.createNewFile(new Path(dir, String.valueOf(System.nanoTime())));
@@ -140,7 +140,7 @@ public void oneFilePerFs() throws IOException, InterruptedException {
     }
 
     @Test
-    public void nonExistentUri() throws InterruptedException {
+    public void nonExistentUri() {
         Map<String, String> props = new HashMap<>(taskConfig);
         props.put(FsSourceTaskConfig.FS_URIS, new Path(fs.getWorkingDirectory(), UUID.randomUUID().toString()).toString());
         task.start(props);
@@ -148,7 +148,7 @@ public void nonExistentUri() throws InterruptedException {
     }
 
     @Test
-    public void exceptionExecutingPolicy() throws InterruptedException, IOException, IllegalAccessException {
+    public void exceptionExecutingPolicy() throws IOException, IllegalAccessException {
         Map<String, String> props = new HashMap<>(taskConfig);
         task.start(props);
 
@@ -164,7 +164,7 @@ public void exceptionExecutingPolicy() throws InterruptedException, IOException,
     }
 
     @Test
-    public void exceptionReadingFile() throws InterruptedException, IOException {
+    public void exceptionReadingFile() throws IOException {
         Map<String, String> props = new HashMap<>(taskConfig);
         File tmp = File.createTempFile("test-", ".txt");
         try (PrintWriter writer = new PrintWriter(tmp)) {
@@ -184,4 +184,4 @@ public void exceptionReadingFile() throws InterruptedException, IOException {
 
     protected abstract void createDataFile(Path path) throws IOException;
 
-}
\ No newline at end of file
+}
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/task/hdfs/HdfsFsSourceTaskTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/task/hdfs/HdfsFsSourceTaskTest.java
index 629a0f8..1e8b303 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/task/hdfs/HdfsFsSourceTaskTest.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/task/hdfs/HdfsFsSourceTaskTest.java
@@ -4,7 +4,7 @@
 import org.apache.hadoop.fs.Path;
 import org.apache.kafka.connect.data.Struct;
 import org.apache.kafka.connect.source.SourceRecord;
-import org.junit.BeforeClass;
+import org.junit.jupiter.api.BeforeAll;
 
 import java.io.File;
 import java.io.FileWriter;
@@ -14,12 +14,12 @@
 import java.util.UUID;
 import java.util.stream.IntStream;
 
-import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
 
 public class HdfsFsSourceTaskTest extends HdfsFsSourceTaskTestBase {
 
-    @BeforeClass
+    @BeforeAll
     public static void setUp() throws IOException {
         directories = new ArrayList<Path>() {{
             add(new Path(fsUri.toString(), UUID.randomUUID().toString()));
@@ -33,7 +33,7 @@ public static void setUp() throws IOException {
     @Override
     protected void checkRecords(List<SourceRecord> records) {
         records.forEach(record -> {
-            assertTrue(record.topic().equals("topic_test"));
+            assertEquals("topic_test", record.topic());
             assertNotNull(record.sourcePartition());
             assertNotNull(record.sourceOffset());
             assertNotNull(record.value());
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/task/hdfs/HdfsFsSourceTaskTestBase.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/task/hdfs/HdfsFsSourceTaskTestBase.java
index fd8c3bd..1132bc6 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/task/hdfs/HdfsFsSourceTaskTestBase.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/task/hdfs/HdfsFsSourceTaskTestBase.java
@@ -4,8 +4,8 @@
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.BeforeAll;
 
 import java.io.IOException;
 import java.net.URI;
@@ -15,21 +15,19 @@
 public abstract class HdfsFsSourceTaskTestBase extends FsSourceTaskTestBase {
 
     private static MiniDFSCluster cluster;
-    private static Configuration clusterConfig;
-    private static Path hdfsDir;
 
-    @BeforeClass
+    @BeforeAll
     public static void initFs() throws IOException {
-        clusterConfig = new Configuration();
-        hdfsDir = Files.createTempDirectory("test-");
+        Configuration clusterConfig = new Configuration();
+        Path hdfsDir = Files.createTempDirectory("test-");
         clusterConfig.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, hdfsDir.toAbsolutePath().toString());
         cluster = new MiniDFSCluster.Builder(clusterConfig).build();
         fsUri = URI.create("hdfs://localhost:" + cluster.getNameNodePort() + "/");
         fs = FileSystem.newInstance(fsUri, clusterConfig);
     }
 
-    @AfterClass
-    public static void finishFs() throws Exception {
+    @AfterAll
+    public static void finishFs() {
         cluster.shutdown(true);
     }
 }
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/task/local/LocalFsSourceTaskTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/task/local/LocalFsSourceTaskTest.java
index bbacd9e..8623e05 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/task/local/LocalFsSourceTaskTest.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/task/local/LocalFsSourceTaskTest.java
@@ -4,7 +4,7 @@
 import org.apache.hadoop.fs.Path;
 import org.apache.kafka.connect.data.Struct;
 import org.apache.kafka.connect.source.SourceRecord;
-import org.junit.BeforeClass;
+import org.junit.jupiter.api.BeforeAll;
 
 import java.io.File;
 import java.io.FileWriter;
@@ -14,12 +14,12 @@
 import java.util.UUID;
 import java.util.stream.IntStream;
 
-import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
 
 public class LocalFsSourceTaskTest extends LocalFsSourceTaskTestBase {
 
-    @BeforeClass
+    @BeforeAll
     public static void setUp() throws IOException {
         directories = new ArrayList<Path>() {{
             add(new Path(fsUri.toString(), UUID.randomUUID().toString()));
@@ -33,11 +33,10 @@ public static void setUp() throws IOException {
     @Override
     protected void checkRecords(List<SourceRecord> records) {
         records.forEach(record -> {
-            assertTrue(record.topic().equals("topic_test"));
+            assertEquals("topic_test", record.topic());
             assertNotNull(record.sourcePartition());
             assertNotNull(record.sourceOffset());
             assertNotNull(record.value());
-
             assertNotNull(((Struct) record.value()).get(TextFileReader.FIELD_NAME_VALUE_DEFAULT));
         });
     }
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/task/local/LocalFsSourceTaskTestBase.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/task/local/LocalFsSourceTaskTestBase.java
index 569b623..4cf1074 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/task/local/LocalFsSourceTaskTestBase.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/task/local/LocalFsSourceTaskTestBase.java
@@ -4,8 +4,8 @@
 import org.apache.commons.io.FileUtils;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.BeforeAll;
 
 import java.io.IOException;
 import java.nio.file.Files;
@@ -15,14 +15,14 @@ public abstract class LocalFsSourceTaskTestBase extends FsSourceTaskTestBase {
 
     private static Path localDir;
 
-    @BeforeClass
+    @BeforeAll
     public static void initFs() throws IOException {
         localDir = Files.createTempDirectory("test-");
         fsUri = localDir.toUri();
         fs = FileSystem.newInstance(fsUri, new Configuration());
     }
 
-    @AfterClass
+    @AfterAll
     public static void finishFs() throws IOException {
         FileUtils.deleteDirectory(localDir.toFile());
     }
diff --git a/src/test/resources/log4j.properties b/src/test/resources/log4j.properties
new file mode 100644
index 0000000..18e87a8
--- /dev/null
+++ b/src/test/resources/log4j.properties
@@ -0,0 +1,13 @@
+# Root logger option
+log4j.rootLogger=INFO, stdout
+
+# Direct log messages to stdout
+log4j.appender.stdout=org.apache.log4j.ConsoleAppender
+log4j.appender.stdout.Target=System.out
+log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
+log4j.appender.stdout.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss} %-5p %c:%L - %m%n
+
+log4j.logger.org.apache.hadoop=WARN
+log4j.logger.org.apache.parquet=WARN
+log4j.logger.org.eclipse.jetty=WARN
+log4j.logger.io.confluent.connect.avro=WARN

From 7eef49f8b0c40a3bfd568ec097a5dc8ed878595d Mon Sep 17 00:00:00 2001
From: Mario Molina <mmolimar@gmail.com>
Date: Fri, 6 Mar 2020 17:20:09 +0100
Subject: [PATCH 12/51] Compression type and JSON reader configs in the docs

---
 docs/source/config_options.rst | 136 ++++++++++++++++++++++++++++-----
 docs/source/connector.rst      |   2 +-
 docs/source/faq.rst            |   4 +-
 docs/source/filereaders.rst    |  12 ++-
 docs/source/policies.rst       |   2 +-
 5 files changed, 133 insertions(+), 23 deletions(-)

diff --git a/docs/source/config_options.rst b/docs/source/config_options.rst
index 851419f..962f572 100644
--- a/docs/source/config_options.rst
+++ b/docs/source/config_options.rst
@@ -176,7 +176,7 @@ Avro
 In order to configure custom properties for this reader, the name you must use is ``avro``.
 
 ``file_reader.avro.schema``
-  AVRO schema in JSON format to use when reading a file.
+  Avro schema in JSON format to use when reading a file.
   If not specified, the reader will use the schema defined in the file.
 
   * Type: string
@@ -190,13 +190,13 @@ Parquet
 In order to configure custom properties for this reader, the name you must use is ``parquet``.
 
 ``file_reader.parquet.schema``
-  AVRO schema in JSON format to use when reading a file.
+  Avro schema in JSON format to use when reading a file.
 
   * Type: string
   * Importance: medium
 
 ``file_reader.parquet.projection``
-  AVRO schema in JSON format to use for projecting fields from records in a file.
+  Avro schema in JSON format to use for projecting fields from records in a file.
 
   * Type: string
   * Importance: medium
@@ -208,33 +208,99 @@ SequenceFile
 
 In order to configure custom properties for this reader, the name you must use is ``sequence``.
 
-``file_reader.sequence.buffer_size``
-  Custom buffer size to read data from the Sequence file.
-
-  * Type: int
-  * Default: 4096
-  * Importance: medium
-
 ``file_reader.sequence.field_name.key``
   Custom field name for the output key to include in the Kafka message.
 
   * Type: string
   * Default: key
-  * Importance: low
+  * Importance: medium
 
 ``file_reader.sequence.field_name.value``
   Custom field name for the output value to include in the Kafka message.
 
   * Type: string
   * Default: value
+  * Importance: medium
+
+``file_reader.sequence.buffer_size``
+  Custom buffer size to read data from the Sequence file.
+
+  * Type: int
+  * Default: 4096
   * Importance: low
 
+.. _config_options-filereaders-json:
+
+JSON
+--------------------------------------------
+
+To configure custom properties for this reader, the name you must use is ``json``.
+
+``file_reader.json.record_per_line``
+  If enabled, the reader will read each line as a record. Otherwise, the reader will read the full
+  content of the file as a record.
+
+  * Type: boolean
+  * Default: true
+  * Importance: medium
+
+``file_reader.json.deserialization.<deserialization_feature>``
+  Deserialization feature to use when reading a JSON file. You can add as much as you like
+  based on the ones defined `here. <https://fasterxml.github.io/jackson-databind/javadoc/2.10/com/fasterxml/jackson/databind/DeserializationFeature.html#enum.constant.summary>`__
+
+  * Type: boolean
+  * Importance: medium
+
+``file_reader.json.compression.type``
+  Compression type to use when reading a file.
+
+  * Type: enum (available values ``bzip2``, ``gzip`` and ``none``)
+  * Default: none
+  * Importance: medium
+
+``file_reader.json.compression.concatenated``
+  Flag to specify if the decompression of the reader will finish at the end of the file or after
+  the first compressed stream.
+
+  * Type: boolean
+  * Default: true
+  * Importance: low
+
+``file_reader.json.encoding``
+  Encoding to use for reading a file. If not specified, the reader will use the default encoding.
+
+  * Type: string
+  * Importance: medium
+
 .. _config_options-filereaders-text:
 
 Text
 --------------------------------------------
 
-In order to configure custom properties for this reader, the name you must use is ``text``.
+To configure custom properties for this reader, the name you must use is ``text``.
+
+``file_reader.json.record_per_line``
+  If enabled, the reader will read each line as a record. Otherwise, the reader will read the full
+  content of the file as a record.
+
+  * Type: boolean
+  * Default: true
+  * Importance: medium
+
+``file_reader.json.compression.type``
+  Compression type to use when reading a file.
+
+  * Type: enum (available values ``bzip2``, ``gzip`` and ``none``)
+  * Default: none
+  * Importance: medium
+
+``file_reader.json.compression.concatenated``
+  Flag to specify if the decompression of the reader will finish at the end of the file or after
+  the first compressed stream.
+
+  * Type: boolean
+  * Default: true
+  * Importance: low
 
 ``file_reader.text.field_name.value``
   Custom field name for the output value to include in the Kafka message.
@@ -254,7 +320,7 @@ In order to configure custom properties for this reader, the name you must use i
 Delimited text
 --------------------------------------------
 
-In order to configure custom properties for this reader, the name you must use is ``delimited``.
+To configure custom properties for this reader, the name you must use is ``delimited``.
 
 ``file_reader.delimited.token``
   The token delimiter for columns.
@@ -269,10 +335,12 @@ In order to configure custom properties for this reader, the name you must use i
   * Default: false
   * Importance: medium
 
-``file_reader.delimited.encoding``
-  Encoding to use for reading a file. If not specified, the reader will use the default encoding.
+``file_reader.json.record_per_line``
+  If enabled, the reader will read each line as a record. Otherwise, the reader will read the full
+  content of the file as a record.
 
-  * Type: string
+  * Type: boolean
+  * Default: true
   * Importance: medium
 
 ``file_reader.delimited.default_value``
@@ -280,13 +348,34 @@ In order to configure custom properties for this reader, the name you must use i
   all expected columns).
 
   * Type: string
-  * Default: null
+  * Default: ``null``
+  * Importance: medium
+
+``file_reader.json.compression.type``
+  Compression type to use when reading a file.
+
+  * Type: enum (available values ``bzip2``, ``gzip`` and ``none``)
+  * Default: none
+  * Importance: medium
+
+``file_reader.json.compression.concatenated``
+  Flag to specify if the decompression of the reader will finish at the end of the file or after
+  the first compressed stream.
+
+  * Type: boolean
+  * Default: true
   * Importance: low
 
+``file_reader.delimited.encoding``
+  Encoding to use for reading a file. If not specified, the reader will use the default encoding.
+
+  * Type: string
+  * Importance: medium
+
 Agnostic
 --------------------------------------------
 
-In order to configure custom properties for this reader, the name you must use is ``agnostic``.
+To configure custom properties for this reader, the name you must use is ``agnostic``.
 
 ``file_reader.agnostic.extensions.parquet``
   A comma-separated string list with the accepted extensions for Parquet files.
@@ -309,9 +398,20 @@ In order to configure custom properties for this reader, the name you must use i
   * Default: seq
   * Importance: medium
 
+``file_reader.agnostic.extensions.json``
+  A comma-separated string list with the accepted extensions for JSON files.
+
+  * Type: string
+  * Default: json
+  * Importance: medium
+
 ``file_reader.agnostic.extensions.delimited``
   A comma-separated string list with the accepted extensions for Delimited text files.
 
   * Type: string
   * Default: tsv,csv
   * Importance: medium
+
+.. note:: The Agnostic reader uses the previous ones as inner readers. So, in case of using this
+          reader, you'll probably need to include also the specified properties for those
+          readers in the connector configuration as well.
diff --git a/docs/source/connector.rst b/docs/source/connector.rst
index d045f1e..8d2e305 100644
--- a/docs/source/connector.rst
+++ b/docs/source/connector.rst
@@ -91,7 +91,7 @@ Policies
 
 In order to ingest data from the FS(s), the connector needs a **policy** to define the rules to do it.
 
-Basically, the policy tries to connect to each FS included in ``fs.uris`` connector property, list files
+Basically, the policy tries to connect to each FS included in ``fs.uris`` connector property, lists files
 (and filter them using the regular expression provided in the ``policy.regexp`` property) and enables
 a file reader to read records from them.
 
diff --git a/docs/source/faq.rst b/docs/source/faq.rst
index a5077dc..1041bc4 100644
--- a/docs/source/faq.rst
+++ b/docs/source/faq.rst
@@ -4,7 +4,7 @@
 FAQs
 ********************************************
 
-**My file was already processed and the connector, when it is executed again,
+**My file was already processed and the connector, when it's executed again,
 processes the same records again.**
 
 If during the previous executions the records were sent successfully to Kafka,
@@ -33,7 +33,7 @@ the connector everyday.**
 
 Don't do this! Take advantage of the dynamic URIs using expressions.
 
-For instance, if you have this URI ``hdfs://host:9000/data/2017``, you can
+For instance, if you have this URI ``hdfs://host:9000/data/2020``, you can
 use this URI ``hdfs://host:9000/data/${yyyy}`` instead.
 
 **The connector is too slow to process all URIs I have.**
diff --git a/docs/source/filereaders.rst b/docs/source/filereaders.rst
index 75b349b..0ea1560 100644
--- a/docs/source/filereaders.rst
+++ b/docs/source/filereaders.rst
@@ -38,12 +38,21 @@ by default but you can customize these field names.
 
 More information about properties of this file reader :ref:`here<config_options-filereaders-sequencefile>`.
 
+JSON
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Reads JSON files which might contain multiple number of fields with their specified
+data types. The schema for this sort of records is inferred reading the first record
+and marked as optional in the schema all the fields contained.
+
+More information about properties of this file reader :ref:`here<config_options-filereaders-json>`.
+
 Text
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 Reads plain text files.
 
-Each line represents one record which will be in a field
+Each line represents one record (by default) which will be in a field
 named ``value`` in the message sent to Kafka by default but you can
 customize these field names.
 
@@ -74,6 +83,7 @@ Default extensions for each format:
 * Parquet: .parquet
 * Avro: .avro
 * SequenceFile: .seq
+* JSON: .json
 * Delimited text: .tsv, .csv
 * Text: any other sort of file extension.
 
diff --git a/docs/source/policies.rst b/docs/source/policies.rst
index b2ceb86..2e496be 100644
--- a/docs/source/policies.rst
+++ b/docs/source/policies.rst
@@ -17,7 +17,7 @@ You can learn more about the properties of this policy :ref:`here<config_options
 HDFS file watcher
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-It uses Hadoop notifications events and all create/append/close events will be reported
+It uses Hadoop notifications events and all create/append/rename/close events will be reported
 as new files to be ingested.
 
 Just use it when you have HDFS URIs.

From 3102c085eb1f98c2b6672ed1c2c935c659304581 Mon Sep 17 00:00:00 2001
From: Mario Molina <mmolimar@gmail.com>
Date: Fri, 6 Mar 2020 17:41:39 +0100
Subject: [PATCH 13/51] Enable different input streams in text readers

---
 .../file/reader/DelimitedTextFileReader.java  |  1 +
 .../fs/file/reader/TextFileReader.java        | 69 ++++++++++++--
 .../file/reader/hdfs/TextFileReaderTest.java  | 92 ++++++++++++-------
 .../file/reader/local/TextFileReaderTest.java | 92 ++++++++++++-------
 4 files changed, 180 insertions(+), 74 deletions(-)

diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/DelimitedTextFileReader.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/DelimitedTextFileReader.java
index ff703aa..d6a8834 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/DelimitedTextFileReader.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/DelimitedTextFileReader.java
@@ -16,6 +16,7 @@
 public class DelimitedTextFileReader extends AbstractFileReader<DelimitedTextFileReader.DelimitedRecord> {
 
     private static final String FILE_READER_DELIMITED = FILE_READER_PREFIX + "delimited.";
+
     public static final String FILE_READER_DELIMITED_HEADER = FILE_READER_DELIMITED + "header";
     public static final String FILE_READER_DELIMITED_TOKEN = FILE_READER_DELIMITED + "token";
     public static final String FILE_READER_DELIMITED_ENCODING = FILE_READER_DELIMITED + "encoding";
diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/TextFileReader.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/TextFileReader.java
index 4d03487..fb5db46 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/TextFileReader.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/TextFileReader.java
@@ -1,6 +1,8 @@
 package com.github.mmolimar.kafka.connect.fs.file.reader;
 
 import com.github.mmolimar.kafka.connect.fs.file.Offset;
+import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
+import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.kafka.connect.data.Schema;
@@ -8,10 +10,7 @@
 import org.apache.kafka.connect.data.Struct;
 import org.apache.kafka.connect.errors.ConnectException;
 
-import java.io.BufferedReader;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.io.LineNumberReader;
+import java.io.*;
 import java.nio.charset.Charset;
 import java.util.List;
 import java.util.Map;
@@ -24,10 +23,14 @@ public class TextFileReader extends AbstractFileReader<TextFileReader.TextRecord
 
     private static final String FILE_READER_TEXT = FILE_READER_PREFIX + "text.";
     private static final String FILE_READER_FIELD_NAME_PREFIX = FILE_READER_TEXT + "field_name.";
+    private static final String FILE_READER_TEXT_COMPRESSION = FILE_READER_TEXT + "compression.";
 
     public static final String FIELD_NAME_VALUE_DEFAULT = "value";
+
     public static final String FILE_READER_TEXT_FIELD_NAME_VALUE = FILE_READER_FIELD_NAME_PREFIX + "value";
     public static final String FILE_READER_TEXT_RECORD_PER_LINE = FILE_READER_TEXT + "record_per_line";
+    public static final String FILE_READER_TEXT_COMPRESSION_TYPE = FILE_READER_TEXT_COMPRESSION + "type";
+    public static final String FILE_READER_TEXT_COMPRESSION_CONCATENATED = FILE_READER_TEXT_COMPRESSION + "concatenated";
     public static final String FILE_READER_TEXT_ENCODING = FILE_READER_TEXT + "encoding";
 
     private final TextOffset offset;
@@ -36,11 +39,34 @@ public class TextFileReader extends AbstractFileReader<TextFileReader.TextRecord
     private LineNumberReader reader;
     private Schema schema;
     private Charset charset;
+    private CompressionType compression;
     private boolean recordPerLine;
 
+    public enum CompressionType {
+        BZIP2,
+        GZIP,
+        NONE;
+
+        private boolean concatenated;
+
+        CompressionType() {
+            this.concatenated = true;
+        }
+
+        public boolean isConcatenated() {
+            return concatenated;
+        }
+
+        public static CompressionType fromName(String compression, boolean concatenated) {
+            CompressionType ct = CompressionType.valueOf(compression.trim().toUpperCase());
+            ct.concatenated = concatenated;
+            return ct;
+        }
+    }
+
     public TextFileReader(FileSystem fs, Path filePath, Map<String, Object> config) throws IOException {
         super(fs, filePath, new TxtToStruct(), config);
-        this.reader = new LineNumberReader(new InputStreamReader(fs.open(filePath), this.charset));
+        this.reader = new LineNumberReader(getFileReader(fs.open(filePath)));
         this.offset = new TextOffset(0);
     }
 
@@ -53,6 +79,18 @@ protected void configure(Map<String, Object> config) {
         } else {
             valueFieldName = config.get(FILE_READER_TEXT_FIELD_NAME_VALUE).toString();
         }
+        if (config.get(FILE_READER_TEXT_COMPRESSION_TYPE) == null ||
+                config.get(FILE_READER_TEXT_COMPRESSION_TYPE).toString().equals("")) {
+            this.compression = CompressionType.NONE;
+        } else {
+            boolean concatenated = true;
+            if (config.get(FILE_READER_TEXT_COMPRESSION_CONCATENATED) != null &&
+                    !config.get(FILE_READER_TEXT_COMPRESSION_CONCATENATED).toString().equals("")) {
+                concatenated = Boolean.parseBoolean(config.get(FILE_READER_TEXT_COMPRESSION_CONCATENATED)
+                        .toString().trim());
+            }
+            this.compression = CompressionType.fromName(config.get(FILE_READER_TEXT_COMPRESSION_TYPE).toString(), concatenated);
+        }
         if (config.get(FILE_READER_TEXT_ENCODING) == null ||
                 config.get(FILE_READER_TEXT_ENCODING).toString().equals("")) {
             this.charset = Charset.defaultCharset();
@@ -70,6 +108,24 @@ protected void configure(Map<String, Object> config) {
                 .build();
     }
 
+    private Reader getFileReader(InputStream inputStream) throws IOException {
+        final InputStreamReader isr;
+        switch (this.compression) {
+            case BZIP2:
+                isr = new InputStreamReader(new BZip2CompressorInputStream(inputStream,
+                        this.compression.isConcatenated()), this.charset);
+                break;
+            case GZIP:
+                isr = new InputStreamReader(new GzipCompressorInputStream(inputStream,
+                        this.compression.isConcatenated()), this.charset);
+                break;
+            default:
+                isr = new InputStreamReader(inputStream, this.charset);
+                break;
+        }
+        return isr;
+    }
+
     @Override
     public boolean hasNext() {
         if (current != null) {
@@ -121,7 +177,8 @@ public void seek(Offset offset) {
             current = null;
             if (offset.getRecordOffset() < reader.getLineNumber()) {
                 finished = false;
-                reader = new LineNumberReader(new InputStreamReader(getFs().open(getFilePath())));
+                reader.close();
+                reader = new LineNumberReader(getFileReader(getFs().open(getFilePath())));
             }
             while (reader.getLineNumber() < offset.getRecordOffset()) {
                 reader.readLine();
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/TextFileReaderTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/TextFileReaderTest.java
index 8e932f2..79e8b88 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/TextFileReaderTest.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/TextFileReaderTest.java
@@ -4,16 +4,16 @@
 import com.github.mmolimar.kafka.connect.fs.file.reader.AgnosticFileReader;
 import com.github.mmolimar.kafka.connect.fs.file.reader.FileReader;
 import com.github.mmolimar.kafka.connect.fs.file.reader.TextFileReader;
+import org.apache.commons.compress.compressors.bzip2.BZip2CompressorOutputStream;
+import org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream;
 import org.apache.hadoop.fs.Path;
 import org.apache.kafka.connect.data.Struct;
 import org.junit.jupiter.api.BeforeAll;
 import org.junit.jupiter.api.Test;
 
-import java.io.BufferedWriter;
-import java.io.File;
-import java.io.FileWriter;
-import java.io.IOException;
+import java.io.*;
 import java.nio.charset.UnsupportedCharsetException;
+import java.util.Arrays;
 import java.util.HashMap;
 import java.util.Map;
 import java.util.UUID;
@@ -25,28 +25,42 @@ public class TextFileReaderTest extends HdfsFileReaderTestBase {
 
     private static final String FIELD_NAME_VALUE = "custom_field_name";
     private static final String FILE_EXTENSION = "txt";
+    private static final TextFileReader.CompressionType COMPRESSION_TYPE = TextFileReader.CompressionType.GZIP;
 
     @BeforeAll
     public static void setUp() throws IOException {
         readerClass = AgnosticFileReader.class;
-        dataFile = createDataFile();
+        dataFile = createDataFile(COMPRESSION_TYPE);
         readerConfig = new HashMap<String, Object>() {{
             put(TextFileReader.FILE_READER_TEXT_FIELD_NAME_VALUE, FIELD_NAME_VALUE);
+            put(TextFileReader.FILE_READER_TEXT_COMPRESSION_TYPE, COMPRESSION_TYPE);
+            put(TextFileReader.FILE_READER_TEXT_COMPRESSION_CONCATENATED, "true");
         }};
     }
 
-    private static Path createDataFile() throws IOException {
-        File txtFile = File.createTempFile("test-", "." + FILE_EXTENSION);
-        try (FileWriter writer = new FileWriter(txtFile)) {
+    private static OutputStream getOutputStream(File file, TextFileReader.CompressionType compression) throws IOException {
+        final OutputStream os;
+        switch (compression) {
+            case BZIP2:
+                os = new BZip2CompressorOutputStream(new FileOutputStream(file));
+                break;
+            case GZIP:
+                os = new GzipCompressorOutputStream(new FileOutputStream(file));
+                break;
+            default:
+                os = new FileOutputStream(file);
+                break;
+        }
+        return os;
+    }
 
+    private static Path createDataFile(TextFileReader.CompressionType compression) throws IOException {
+        File txtFile = File.createTempFile("test-", "." + FILE_EXTENSION);
+        try (PrintWriter writer = new PrintWriter(getOutputStream(txtFile, compression))) {
             IntStream.range(0, NUM_RECORDS).forEach(index -> {
                 String value = String.format("%d_%s", index, UUID.randomUUID());
-                try {
-                    writer.append(value + "\n");
-                    OFFSETS_BY_INDEX.put(index, (long) index);
-                } catch (IOException ioe) {
-                    throw new RuntimeException(ioe);
-                }
+                writer.append(value + "\n");
+                OFFSETS_BY_INDEX.put(index, (long) index);
             });
         }
         Path path = new Path(new Path(fsUri), txtFile.getName());
@@ -54,30 +68,12 @@ private static Path createDataFile() throws IOException {
         return path;
     }
 
-    @Test
-    public void emptyFile() throws Throwable {
-        File tmp = File.createTempFile("test-", "." + getFileExtension());
-        Path path = new Path(new Path(fsUri), tmp.getName());
-        fs.moveFromLocalFile(new Path(tmp.getAbsolutePath()), path);
-        getReader(fs, path, readerConfig);
-    }
-
-    @Test
-    public void invalidFileFormat() throws Throwable {
-        File tmp = File.createTempFile("test-", "." + getFileExtension());
-        try (BufferedWriter writer = new BufferedWriter(new FileWriter(tmp))) {
-            writer.write("test");
-        }
-        Path path = new Path(new Path(fsUri), tmp.getName());
-        fs.moveFromLocalFile(new Path(tmp.getAbsolutePath()), path);
-        getReader(fs, path, readerConfig);
-    }
-
     @Test
     public void validFileEncoding() throws Throwable {
         Map<String, Object> cfg = new HashMap<String, Object>() {{
             put(TextFileReader.FILE_READER_TEXT_FIELD_NAME_VALUE, FIELD_NAME_VALUE);
             put(TextFileReader.FILE_READER_TEXT_ENCODING, "Cp1252");
+            put(TextFileReader.FILE_READER_TEXT_COMPRESSION_TYPE, COMPRESSION_TYPE);
         }};
         reader = getReader(fs, dataFile, cfg);
         readAllData();
@@ -88,16 +84,18 @@ public void invalidFileEncoding() {
         Map<String, Object> cfg = new HashMap<String, Object>() {{
             put(TextFileReader.FILE_READER_TEXT_FIELD_NAME_VALUE, FIELD_NAME_VALUE);
             put(TextFileReader.FILE_READER_TEXT_ENCODING, "invalid_charset");
+            put(TextFileReader.FILE_READER_TEXT_COMPRESSION_TYPE, COMPRESSION_TYPE);
         }};
         assertThrows(UnsupportedCharsetException.class, () -> getReader(fs, dataFile, cfg));
     }
 
     @Test
     public void readDataWithRecordPerLineDisabled() throws Throwable {
-        Path file = createDataFile();
+        Path file = createDataFile(COMPRESSION_TYPE);
         FileReader reader = getReader(fs, file, new HashMap<String, Object>() {{
             put(TextFileReader.FILE_READER_TEXT_FIELD_NAME_VALUE, FIELD_NAME_VALUE);
             put(TextFileReader.FILE_READER_TEXT_RECORD_PER_LINE, "false");
+            put(TextFileReader.FILE_READER_TEXT_COMPRESSION_TYPE, COMPRESSION_TYPE);
         }});
 
         assertTrue(reader.hasNext());
@@ -112,6 +110,32 @@ public void readDataWithRecordPerLineDisabled() throws Throwable {
         assertEquals(1, recordCount, () -> "The number of records in the file does not match");
     }
 
+    @Test
+    public void readDifferentCompressionTypes() {
+        Arrays.stream(TextFileReader.CompressionType.values()).forEach(compressionType -> {
+            try {
+                Path file = createDataFile(compressionType);
+                FileReader reader = getReader(fs, file, new HashMap<String, Object>() {{
+                    put(TextFileReader.FILE_READER_TEXT_FIELD_NAME_VALUE, FIELD_NAME_VALUE);
+                    put(TextFileReader.FILE_READER_TEXT_COMPRESSION_TYPE, compressionType);
+                }});
+
+                assertTrue(reader.hasNext());
+
+                int recordCount = 0;
+                while (reader.hasNext()) {
+                    Struct record = reader.next();
+                    checkData(record, recordCount);
+                    recordCount++;
+                }
+                reader.close();
+                assertEquals(NUM_RECORDS, recordCount, () -> "The number of records in the file does not match");
+            } catch (Throwable e) {
+                throw new RuntimeException(e);
+            }
+        });
+    }
+
     @Override
     protected Offset getOffset(long offset) {
         return new TextFileReader.TextOffset(offset);
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/TextFileReaderTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/TextFileReaderTest.java
index a605b9f..edb26e4 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/TextFileReaderTest.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/TextFileReaderTest.java
@@ -4,16 +4,16 @@
 import com.github.mmolimar.kafka.connect.fs.file.reader.AgnosticFileReader;
 import com.github.mmolimar.kafka.connect.fs.file.reader.FileReader;
 import com.github.mmolimar.kafka.connect.fs.file.reader.TextFileReader;
+import org.apache.commons.compress.compressors.bzip2.BZip2CompressorOutputStream;
+import org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream;
 import org.apache.hadoop.fs.Path;
 import org.apache.kafka.connect.data.Struct;
 import org.junit.jupiter.api.BeforeAll;
 import org.junit.jupiter.api.Test;
 
-import java.io.BufferedWriter;
-import java.io.File;
-import java.io.FileWriter;
-import java.io.IOException;
+import java.io.*;
 import java.nio.charset.UnsupportedCharsetException;
+import java.util.Arrays;
 import java.util.HashMap;
 import java.util.Map;
 import java.util.UUID;
@@ -25,28 +25,42 @@ public class TextFileReaderTest extends LocalFileReaderTestBase {
 
     private static final String FIELD_NAME_VALUE = "custom_field_name";
     private static final String FILE_EXTENSION = "txt";
+    private static final TextFileReader.CompressionType COMPRESSION_TYPE = TextFileReader.CompressionType.GZIP;
 
     @BeforeAll
     public static void setUp() throws IOException {
         readerClass = AgnosticFileReader.class;
-        dataFile = createDataFile();
+        dataFile = createDataFile(COMPRESSION_TYPE);
         readerConfig = new HashMap<String, Object>() {{
             put(TextFileReader.FILE_READER_TEXT_FIELD_NAME_VALUE, FIELD_NAME_VALUE);
+            put(TextFileReader.FILE_READER_TEXT_COMPRESSION_TYPE, COMPRESSION_TYPE);
+            put(TextFileReader.FILE_READER_TEXT_COMPRESSION_CONCATENATED, "true");
         }};
     }
 
-    private static Path createDataFile() throws IOException {
-        File txtFile = File.createTempFile("test-", "." + FILE_EXTENSION);
-        try (FileWriter writer = new FileWriter(txtFile)) {
+    private static OutputStream getOutputStream(File file, TextFileReader.CompressionType compression) throws IOException {
+        final OutputStream os;
+        switch (compression) {
+            case BZIP2:
+                os = new BZip2CompressorOutputStream(new FileOutputStream(file));
+                break;
+            case GZIP:
+                os = new GzipCompressorOutputStream(new FileOutputStream(file));
+                break;
+            default:
+                os = new FileOutputStream(file);
+                break;
+        }
+        return os;
+    }
 
+    private static Path createDataFile(TextFileReader.CompressionType compression) throws IOException {
+        File txtFile = File.createTempFile("test-", "." + FILE_EXTENSION);
+        try (PrintWriter writer = new PrintWriter(getOutputStream(txtFile, compression))) {
             IntStream.range(0, NUM_RECORDS).forEach(index -> {
                 String value = String.format("%d_%s", index, UUID.randomUUID());
-                try {
-                    writer.append(value + "\n");
-                    OFFSETS_BY_INDEX.put(index, (long) index);
-                } catch (IOException ioe) {
-                    throw new RuntimeException(ioe);
-                }
+                writer.append(value + "\n");
+                OFFSETS_BY_INDEX.put(index, (long) index);
             });
         }
         Path path = new Path(new Path(fsUri), txtFile.getName());
@@ -54,30 +68,12 @@ private static Path createDataFile() throws IOException {
         return path;
     }
 
-    @Test
-    public void emptyFile() throws Throwable {
-        File tmp = File.createTempFile("test-", "." + getFileExtension());
-        Path path = new Path(new Path(fsUri), tmp.getName());
-        fs.moveFromLocalFile(new Path(tmp.getAbsolutePath()), path);
-        getReader(fs, path, readerConfig);
-    }
-
-    @Test
-    public void invalidFileFormat() throws Throwable {
-        File tmp = File.createTempFile("test-", "." + getFileExtension());
-        try (BufferedWriter writer = new BufferedWriter(new FileWriter(tmp))) {
-            writer.write("test");
-        }
-        Path path = new Path(new Path(fsUri), tmp.getName());
-        fs.moveFromLocalFile(new Path(tmp.getAbsolutePath()), path);
-        getReader(fs, path, readerConfig);
-    }
-
     @Test
     public void validFileEncoding() throws Throwable {
         Map<String, Object> cfg = new HashMap<String, Object>() {{
             put(TextFileReader.FILE_READER_TEXT_FIELD_NAME_VALUE, FIELD_NAME_VALUE);
             put(TextFileReader.FILE_READER_TEXT_ENCODING, "Cp1252");
+            put(TextFileReader.FILE_READER_TEXT_COMPRESSION_TYPE, COMPRESSION_TYPE);
         }};
         reader = getReader(fs, dataFile, cfg);
         readAllData();
@@ -88,16 +84,18 @@ public void invalidFileEncoding() {
         Map<String, Object> cfg = new HashMap<String, Object>() {{
             put(TextFileReader.FILE_READER_TEXT_FIELD_NAME_VALUE, FIELD_NAME_VALUE);
             put(TextFileReader.FILE_READER_TEXT_ENCODING, "invalid_charset");
+            put(TextFileReader.FILE_READER_TEXT_COMPRESSION_TYPE, COMPRESSION_TYPE);
         }};
         assertThrows(UnsupportedCharsetException.class, () -> getReader(fs, dataFile, cfg));
     }
 
     @Test
     public void readDataWithRecordPerLineDisabled() throws Throwable {
-        Path file = createDataFile();
+        Path file = createDataFile(COMPRESSION_TYPE);
         FileReader reader = getReader(fs, file, new HashMap<String, Object>() {{
             put(TextFileReader.FILE_READER_TEXT_FIELD_NAME_VALUE, FIELD_NAME_VALUE);
             put(TextFileReader.FILE_READER_TEXT_RECORD_PER_LINE, "false");
+            put(TextFileReader.FILE_READER_TEXT_COMPRESSION_TYPE, COMPRESSION_TYPE);
         }});
 
         assertTrue(reader.hasNext());
@@ -112,6 +110,32 @@ public void readDataWithRecordPerLineDisabled() throws Throwable {
         assertEquals(1, recordCount, () -> "The number of records in the file does not match");
     }
 
+    @Test
+    public void readDifferentCompressionTypes() {
+        Arrays.stream(TextFileReader.CompressionType.values()).forEach(compressionType -> {
+            try {
+                Path file = createDataFile(compressionType);
+                FileReader reader = getReader(fs, file, new HashMap<String, Object>() {{
+                    put(TextFileReader.FILE_READER_TEXT_FIELD_NAME_VALUE, FIELD_NAME_VALUE);
+                    put(TextFileReader.FILE_READER_TEXT_COMPRESSION_TYPE, compressionType);
+                }});
+
+                assertTrue(reader.hasNext());
+
+                int recordCount = 0;
+                while (reader.hasNext()) {
+                    Struct record = reader.next();
+                    checkData(record, recordCount);
+                    recordCount++;
+                }
+                reader.close();
+                assertEquals(NUM_RECORDS, recordCount, () -> "The number of records in the file does not match");
+            } catch (Throwable e) {
+                throw new RuntimeException(e);
+            }
+        });
+    }
+
     @Override
     protected Offset getOffset(long offset) {
         return new TextFileReader.TextOffset(offset);

From 1d439223f29d6104b968a24452dba2a024e6c3a3 Mon Sep 17 00:00:00 2001
From: Mario Molina <mmolimar@gmail.com>
Date: Sat, 7 Mar 2020 13:18:50 +0100
Subject: [PATCH 14/51] File readers configuration with string

---
 .../fs/file/reader/AbstractFileReader.java    |  6 +--
 .../fs/file/reader/AgnosticFileReader.java    | 34 +++++++--------
 .../fs/file/reader/AvroFileReader.java        | 12 +++---
 .../file/reader/DelimitedTextFileReader.java  | 18 ++++----
 .../fs/file/reader/JsonFileReader.java        |  7 ++--
 .../fs/file/reader/SequenceFileReader.java    | 16 ++-----
 .../fs/file/reader/TextFileReader.java        | 42 ++++---------------
 7 files changed, 46 insertions(+), 89 deletions(-)

diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/AbstractFileReader.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/AbstractFileReader.java
index 533b628..180b0e2 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/AbstractFileReader.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/AbstractFileReader.java
@@ -26,14 +26,14 @@ public AbstractFileReader(FileSystem fs, Path filePath, ReaderAdapter<T> adapter
         this.filePath = filePath;
         this.adapter = adapter;
 
-        Map<String, Object> readerConf = config.entrySet().stream()
+        Map<String, String> readerConf = config.entrySet().stream()
                 .filter(entry -> entry.getKey().startsWith(FILE_READER_PREFIX))
                 .filter(entry -> entry.getValue() != null)
-                .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
+                .collect(Collectors.toMap(Map.Entry::getKey, entry -> entry.getValue().toString()));
         configure(readerConf);
     }
 
-    protected abstract void configure(Map<String, Object> config);
+    protected abstract void configure(Map<String, String> config);
 
     protected FileSystem getFs() {
         return fs;
diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/AgnosticFileReader.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/AgnosticFileReader.java
index caa89a1..30a6371 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/AgnosticFileReader.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/AgnosticFileReader.java
@@ -8,7 +8,6 @@
 
 import java.io.IOException;
 import java.util.Arrays;
-import java.util.Collections;
 import java.util.List;
 import java.util.Map;
 
@@ -25,7 +24,7 @@ public class AgnosticFileReader extends AbstractFileReader<AgnosticFileReader.Ag
     public static final String FILE_READER_AGNOSTIC_EXTENSIONS_DELIMITED = FILE_READER_AGNOSTIC_EXTENSIONS + "delimited";
 
     private final AbstractFileReader<Object> reader;
-    private List<String> parquetExtensions, avroExtensions, jsonExtensions, sequenceExtensions, delimitedExtensions;
+    private List<String> parquetExtensions, avroExtensions, sequenceExtensions, jsonExtensions, delimitedExtensions;
 
     public AgnosticFileReader(FileSystem fs, Path filePath, Map<String, Object> config) throws IOException {
         super(fs, filePath, new AgnosticAdapter(), config);
@@ -50,10 +49,10 @@ private AbstractFileReader<Object> readerByExtension(FileSystem fs, Path filePat
             clz = ParquetFileReader.class;
         } else if (avroExtensions.contains(extension)) {
             clz = AvroFileReader.class;
-        } else if (jsonExtensions.contains(extension)) {
-            clz = JsonFileReader.class;
         } else if (sequenceExtensions.contains(extension)) {
             clz = SequenceFileReader.class;
+        } else if (jsonExtensions.contains(extension)) {
+            clz = JsonFileReader.class;
         } else if (delimitedExtensions.contains(extension)) {
             clz = DelimitedTextFileReader.class;
         } else {
@@ -64,22 +63,17 @@ private AbstractFileReader<Object> readerByExtension(FileSystem fs, Path filePat
     }
 
     @Override
-    protected void configure(Map<String, Object> config) {
-        this.parquetExtensions = config.get(FILE_READER_AGNOSTIC_EXTENSIONS_PARQUET) == null ?
-                Collections.singletonList("parquet") :
-                Arrays.asList(config.get(FILE_READER_AGNOSTIC_EXTENSIONS_PARQUET).toString().toLowerCase().split(","));
-        this.avroExtensions = config.get(FILE_READER_AGNOSTIC_EXTENSIONS_AVRO) == null ?
-                Collections.singletonList("avro") :
-                Arrays.asList(config.get(FILE_READER_AGNOSTIC_EXTENSIONS_AVRO).toString().toLowerCase().split(","));
-        this.jsonExtensions = config.get(FILE_READER_AGNOSTIC_EXTENSIONS_JSON) == null ?
-                Collections.singletonList("json") :
-                Arrays.asList(config.get(FILE_READER_AGNOSTIC_EXTENSIONS_JSON).toString().toLowerCase().split(","));
-        this.sequenceExtensions = config.get(FILE_READER_AGNOSTIC_EXTENSIONS_SEQUENCE) == null ?
-                Collections.singletonList("seq") :
-                Arrays.asList(config.get(FILE_READER_AGNOSTIC_EXTENSIONS_SEQUENCE).toString().toLowerCase().split(","));
-        this.delimitedExtensions = config.get(FILE_READER_AGNOSTIC_EXTENSIONS_DELIMITED) == null ?
-                Arrays.asList("tsv", "csv") :
-                Arrays.asList(config.get(FILE_READER_AGNOSTIC_EXTENSIONS_DELIMITED).toString().toLowerCase().split(","));
+    protected void configure(Map<String, String> config) {
+        this.parquetExtensions = Arrays.asList(config.getOrDefault(FILE_READER_AGNOSTIC_EXTENSIONS_PARQUET, "parquet")
+                .toLowerCase().split(","));
+        this.avroExtensions = Arrays.asList(config.getOrDefault(FILE_READER_AGNOSTIC_EXTENSIONS_AVRO, "avro")
+                .toLowerCase().split(","));
+        this.sequenceExtensions = Arrays.asList(config.getOrDefault(FILE_READER_AGNOSTIC_EXTENSIONS_SEQUENCE, "seq")
+                .toLowerCase().split(","));
+        this.jsonExtensions = Arrays.asList(config.getOrDefault(FILE_READER_AGNOSTIC_EXTENSIONS_JSON, "json")
+                .toLowerCase().split(","));
+        this.delimitedExtensions = Arrays.asList(config.getOrDefault(FILE_READER_AGNOSTIC_EXTENSIONS_DELIMITED, "tsv,csv")
+                .toLowerCase().split(","));
     }
 
     @Override
diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/AvroFileReader.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/AvroFileReader.java
index 14b70a2..040d837 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/AvroFileReader.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/AvroFileReader.java
@@ -16,6 +16,7 @@
 
 import java.io.IOException;
 import java.util.Map;
+import java.util.Optional;
 
 import static com.github.mmolimar.kafka.connect.fs.FsSourceTaskConfig.FILE_READER_PREFIX;
 
@@ -41,13 +42,10 @@ public AvroFileReader(FileSystem fs, Path filePath, Map<String, Object> config)
         this.offset = new AvroOffset(0);
     }
 
-    protected void configure(Map<String, Object> config) {
-        if (config.get(FILE_READER_AVRO_SCHEMA) != null &&
-                !config.get(FILE_READER_AVRO_SCHEMA).toString().trim().isEmpty()) {
-            this.schema = new Schema.Parser().parse(config.get(FILE_READER_AVRO_SCHEMA).toString());
-        } else {
-            this.schema = null;
-        }
+    protected void configure(Map<String, String> config) {
+        this.schema = Optional.ofNullable(config.get(FILE_READER_AVRO_SCHEMA))
+                .map(c -> new Schema.Parser().parse(c))
+                .orElse(null);
     }
 
     @Override
diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/DelimitedTextFileReader.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/DelimitedTextFileReader.java
index d6a8834..0cae141 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/DelimitedTextFileReader.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/DelimitedTextFileReader.java
@@ -9,6 +9,7 @@
 
 import java.io.IOException;
 import java.util.Map;
+import java.util.Optional;
 import java.util.stream.IntStream;
 
 import static com.github.mmolimar.kafka.connect.fs.FsSourceTaskConfig.FILE_READER_PREFIX;
@@ -58,15 +59,14 @@ public DelimitedTextFileReader(FileSystem fs, Path filePath, Map<String, Object>
     }
 
     @Override
-    protected void configure(Map<String, Object> config) {
-        if (config.get(FILE_READER_DELIMITED_TOKEN) == null ||
-                config.get(FILE_READER_DELIMITED_TOKEN).toString().equals("")) {
-            throw new IllegalArgumentException(FILE_READER_DELIMITED_TOKEN + " property cannot be empty for DelimitedTextFileReader");
-        }
-        this.token = config.get(FILE_READER_DELIMITED_TOKEN).toString();
-        this.defaultValue = config.get(FILE_READER_DELIMITED_DEFAULT_VALUE) == null ?
-                null : config.get(FILE_READER_DELIMITED_DEFAULT_VALUE).toString();
-        this.hasHeader = Boolean.parseBoolean((String) config.get(FILE_READER_DELIMITED_HEADER));
+    protected void configure(Map<String, String> config) {
+        this.token = Optional.ofNullable(config.get(FILE_READER_DELIMITED_TOKEN))
+                .filter(t -> !t.isEmpty())
+                .orElseThrow(() -> new IllegalArgumentException(
+                        FILE_READER_DELIMITED_TOKEN + " property cannot be empty for DelimitedTextFileReader")
+                );
+        this.defaultValue = config.get(FILE_READER_DELIMITED_DEFAULT_VALUE);
+        this.hasHeader = Boolean.parseBoolean(config.getOrDefault(FILE_READER_DELIMITED_HEADER, "false"));
     }
 
     @Override
diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/JsonFileReader.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/JsonFileReader.java
index 58230f9..3e8f411 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/JsonFileReader.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/JsonFileReader.java
@@ -51,19 +51,18 @@ public JsonFileReader(FileSystem fs, Path filePath, Map<String, Object> config)
     }
 
     @Override
-    protected void configure(Map<String, Object> config) {
+    protected void configure(Map<String, String> config) {
         mapper = new ObjectMapper();
         Set<String> deserializationFeatures = Arrays.stream(DeserializationFeature.values())
                 .map(Enum::name)
                 .collect(Collectors.toSet());
         config.entrySet().stream()
-                .filter(entry -> entry.getValue() != null)
                 .filter(entry -> entry.getKey().startsWith(FILE_READER_JSON_DESERIALIZATION_CONFIGS))
                 .forEach(entry -> {
                     String feature = entry.getKey().replaceAll(FILE_READER_JSON_DESERIALIZATION_CONFIGS, "");
                     if (deserializationFeatures.contains(feature)) {
                         mapper.configure(DeserializationFeature.valueOf(feature),
-                                Boolean.parseBoolean(entry.getValue().toString()));
+                                Boolean.parseBoolean(entry.getValue()));
                     } else {
                         log.warn("Ignoring deserialization configuration '" + feature + "' due to it does not exist.");
                     }
@@ -189,6 +188,7 @@ private Object mapValue(Schema schema, JsonNode value) {
                         throw new IllegalStateException(ioe);
                     }
                 case OBJECT:
+                case POJO:
                     Struct struct = new Struct(schema);
                     Iterable<Map.Entry<String, JsonNode>> fields = value::fields;
                     StreamSupport.stream(fields.spliterator(), false)
@@ -202,7 +202,6 @@ private Object mapValue(Schema schema, JsonNode value) {
                             .map(elm -> mapValue(schema, elm))
                             .collect(Collectors.toList());
                 case NULL:
-                case POJO:
                 case MISSING:
                 default:
                     return null;
diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/SequenceFileReader.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/SequenceFileReader.java
index 40a939a..595c340 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/SequenceFileReader.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/SequenceFileReader.java
@@ -58,19 +58,9 @@ public SequenceFileReader(FileSystem fs, Path filePath, Map<String, Object> conf
     }
 
     @Override
-    protected void configure(Map<String, Object> config) {
-        if (config.get(FILE_READER_SEQUENCE_FIELD_NAME_KEY) == null ||
-                config.get(FILE_READER_SEQUENCE_FIELD_NAME_KEY).toString().equals("")) {
-            this.keyFieldName = FIELD_NAME_KEY_DEFAULT;
-        } else {
-            this.keyFieldName = config.get(FILE_READER_SEQUENCE_FIELD_NAME_KEY).toString();
-        }
-        if (config.get(FILE_READER_SEQUENCE_FIELD_NAME_VALUE) == null ||
-                config.get(FILE_READER_SEQUENCE_FIELD_NAME_VALUE).toString().equals("")) {
-            this.valueFieldName = FIELD_NAME_VALUE_DEFAULT;
-        } else {
-            this.valueFieldName = config.get(FILE_READER_SEQUENCE_FIELD_NAME_VALUE).toString();
-        }
+    protected void configure(Map<String, String> config) {
+        this.keyFieldName = config.getOrDefault(FILE_READER_SEQUENCE_FIELD_NAME_KEY, FIELD_NAME_KEY_DEFAULT);
+        this.valueFieldName = config.getOrDefault(FILE_READER_SEQUENCE_FIELD_NAME_VALUE, FIELD_NAME_VALUE_DEFAULT);
     }
 
     private Schema getSchema(Writable writable) {
diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/TextFileReader.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/TextFileReader.java
index fb5db46..28f5c48 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/TextFileReader.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/TextFileReader.java
@@ -71,41 +71,17 @@ public TextFileReader(FileSystem fs, Path filePath, Map<String, Object> config)
     }
 
     @Override
-    protected void configure(Map<String, Object> config) {
-        String valueFieldName;
-        if (config.get(FILE_READER_TEXT_FIELD_NAME_VALUE) == null ||
-                config.get(FILE_READER_TEXT_FIELD_NAME_VALUE).toString().equals("")) {
-            valueFieldName = FIELD_NAME_VALUE_DEFAULT;
-        } else {
-            valueFieldName = config.get(FILE_READER_TEXT_FIELD_NAME_VALUE).toString();
-        }
-        if (config.get(FILE_READER_TEXT_COMPRESSION_TYPE) == null ||
-                config.get(FILE_READER_TEXT_COMPRESSION_TYPE).toString().equals("")) {
-            this.compression = CompressionType.NONE;
-        } else {
-            boolean concatenated = true;
-            if (config.get(FILE_READER_TEXT_COMPRESSION_CONCATENATED) != null &&
-                    !config.get(FILE_READER_TEXT_COMPRESSION_CONCATENATED).toString().equals("")) {
-                concatenated = Boolean.parseBoolean(config.get(FILE_READER_TEXT_COMPRESSION_CONCATENATED)
-                        .toString().trim());
-            }
-            this.compression = CompressionType.fromName(config.get(FILE_READER_TEXT_COMPRESSION_TYPE).toString(), concatenated);
-        }
-        if (config.get(FILE_READER_TEXT_ENCODING) == null ||
-                config.get(FILE_READER_TEXT_ENCODING).toString().equals("")) {
-            this.charset = Charset.defaultCharset();
-        } else {
-            this.charset = Charset.forName(config.get(FILE_READER_TEXT_ENCODING).toString());
-        }
-        if (config.get(FILE_READER_TEXT_RECORD_PER_LINE) == null ||
-                config.get(FILE_READER_TEXT_RECORD_PER_LINE).toString().equals("")) {
-            this.recordPerLine = true;
-        } else {
-            this.recordPerLine = Boolean.parseBoolean(config.get(FILE_READER_TEXT_RECORD_PER_LINE).toString());
-        }
+    protected void configure(Map<String, String> config) {
         this.schema = SchemaBuilder.struct()
-                .field(valueFieldName, Schema.STRING_SCHEMA)
+                .field(config.getOrDefault(FILE_READER_TEXT_FIELD_NAME_VALUE, FIELD_NAME_VALUE_DEFAULT),
+                        Schema.STRING_SCHEMA)
                 .build();
+        this.recordPerLine = Boolean.parseBoolean(config.getOrDefault(FILE_READER_TEXT_RECORD_PER_LINE, "true"));
+        String cType = config.getOrDefault(FILE_READER_TEXT_COMPRESSION_TYPE, CompressionType.NONE.toString());
+        boolean concatenated = Boolean.parseBoolean(config.getOrDefault(FILE_READER_TEXT_COMPRESSION_CONCATENATED,
+                "true"));
+        this.compression = CompressionType.fromName(cType, concatenated);
+        this.charset = Charset.forName(config.getOrDefault(FILE_READER_TEXT_ENCODING, Charset.defaultCharset().name()));
     }
 
     private Reader getFileReader(InputStream inputStream) throws IOException {

From 6246f21bcd91ad404be63a343a5634861193150a Mon Sep 17 00:00:00 2001
From: Mario Molina <mmolimar@gmail.com>
Date: Sat, 7 Mar 2020 13:57:42 +0100
Subject: [PATCH 15/51] New policy based on cron expressions

---
 pom.xml                                       | 18 ++--
 .../fs/file/reader/ParquetFileReader.java     | 19 ++--
 .../connect/fs/policy/AbstractPolicy.java     |  8 +-
 .../kafka/connect/fs/policy/CronPolicy.java   | 75 ++++++++++++++++
 .../connect/fs/policy/PolicyTestBase.java     |  6 --
 .../fs/policy/hdfs/CronPolicyTest.java        | 90 +++++++++++++++++++
 .../fs/policy/local/CronPolicyTest.java       | 90 +++++++++++++++++++
 7 files changed, 279 insertions(+), 27 deletions(-)
 create mode 100644 src/main/java/com/github/mmolimar/kafka/connect/fs/policy/CronPolicy.java
 create mode 100644 src/test/java/com/github/mmolimar/kafka/connect/fs/policy/hdfs/CronPolicyTest.java
 create mode 100644 src/test/java/com/github/mmolimar/kafka/connect/fs/policy/local/CronPolicyTest.java

diff --git a/pom.xml b/pom.xml
index 606806a..11a210e 100644
--- a/pom.xml
+++ b/pom.xml
@@ -14,9 +14,10 @@
         <kafka.version>2.4.0</kafka.version>
         <confluent.version>5.4.0</confluent.version>
         <hadoop.version>3.2.1</hadoop.version>
-        <avro.version>1.9.2</avro.version>
         <parquet.version>1.11.0</parquet.version>
+        <avro.version>1.9.2</avro.version>
         <fasterxml-jackson.version>2.10.2</fasterxml-jackson.version>
+        <cron-utils.version>9.0.2</cron-utils.version>
         <junit-jupiter.version>5.6.0</junit-jupiter.version>
         <easymock.version>4.2</easymock.version>
         <powermock.version>2.0.5</powermock.version>
@@ -53,6 +54,11 @@
             <artifactId>hadoop-aws</artifactId>
             <version>${hadoop.version}</version>
         </dependency>
+        <dependency>
+            <groupId>org.apache.parquet</groupId>
+            <artifactId>parquet-avro</artifactId>
+            <version>${parquet.version}</version>
+        </dependency>
         <dependency>
             <groupId>org.apache.avro</groupId>
             <artifactId>avro</artifactId>
@@ -64,16 +70,16 @@
             <version>${avro.version}</version>
             <classifier>nodeps</classifier>
         </dependency>
-        <dependency>
-            <groupId>org.apache.parquet</groupId>
-            <artifactId>parquet-avro</artifactId>
-            <version>${parquet.version}</version>
-        </dependency>
         <dependency>
             <groupId>com.fasterxml.jackson.core</groupId>
             <artifactId>jackson-core</artifactId>
             <version>${fasterxml-jackson.version}</version>
         </dependency>
+        <dependency>
+            <groupId>com.cronutils</groupId>
+            <artifactId>cron-utils</artifactId>
+            <version>${cron-utils.version}</version>
+        </dependency>
 
         <!-- test dependencies -->
         <dependency>
diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/ParquetFileReader.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/ParquetFileReader.java
index 6afe74f..8e5fd33 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/ParquetFileReader.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/ParquetFileReader.java
@@ -18,6 +18,7 @@
 import java.io.IOException;
 import java.util.Map;
 import java.util.NoSuchElementException;
+import java.util.Optional;
 
 import static com.github.mmolimar.kafka.connect.fs.FsSourceTaskConfig.FILE_READER_PREFIX;
 
@@ -58,17 +59,13 @@ private ParquetReader<GenericRecord> initReader() throws IOException {
                 .build();
     }
 
-    protected void configure(Map<String, Object> config) {
-        if (config.get(FILE_READER_PARQUET_SCHEMA) != null) {
-            this.schema = new Schema.Parser().parse(config.get(FILE_READER_PARQUET_SCHEMA).toString());
-        } else {
-            this.schema = null;
-        }
-        if (config.get(FILE_READER_PARQUET_PROJECTION) != null) {
-            this.projection = new Schema.Parser().parse(config.get(FILE_READER_PARQUET_PROJECTION).toString());
-        } else {
-            this.projection = null;
-        }
+    protected void configure(Map<String, String> config) {
+        this.schema = Optional.ofNullable(config.get(FILE_READER_PARQUET_SCHEMA))
+                .map(c -> new Schema.Parser().parse(c))
+                .orElse(null);
+        this.projection = Optional.ofNullable(config.get(FILE_READER_PARQUET_PROJECTION))
+                .map(c -> new Schema.Parser().parse(c))
+                .orElse(null);
     }
 
     @Override
diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/policy/AbstractPolicy.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/policy/AbstractPolicy.java
index 251987e..849692a 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/policy/AbstractPolicy.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/policy/AbstractPolicy.java
@@ -126,14 +126,13 @@ public void interrupt() {
     protected void preCheck() {
     }
 
-    private void postCheck() {
+    protected void postCheck() {
     }
 
     public Iterator<FileMetadata> listFiles(FileSystem fs) throws IOException {
         return new Iterator<FileMetadata>() {
             RemoteIterator<LocatedFileStatus> it = fs.listFiles(fs.getWorkingDirectory(), recursive);
             LocatedFileStatus current = null;
-            boolean previous = false;
 
             @Override
             public boolean hasNext() {
@@ -188,7 +187,7 @@ FileMetadata toMetadata(LocatedFileStatus fileStatus) {
     }
 
     @Override
-    public FileReader offer(FileMetadata metadata, OffsetStorageReader offsetStorageReader) throws IOException {
+    public FileReader offer(FileMetadata metadata, OffsetStorageReader offsetStorageReader) {
         Map<String, Object> partition = new HashMap<String, Object>() {{
             put("path", metadata.getPath());
             //TODO manage blocks
@@ -201,7 +200,8 @@ public FileReader offer(FileMetadata metadata, OffsetStorageReader offsetStorage
 
         FileReader reader;
         try {
-            reader = ReflectionUtils.makeReader((Class<? extends FileReader>) conf.getClass(FsSourceTaskConfig.FILE_READER_CLASS),
+            reader = ReflectionUtils.makeReader(
+                    (Class<? extends FileReader>) conf.getClass(FsSourceTaskConfig.FILE_READER_CLASS),
                     current, new Path(metadata.getPath()), conf.originals());
         } catch (Throwable t) {
             throw new ConnectException("An error has occurred when creating reader for file: " + metadata.getPath(), t);
diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/policy/CronPolicy.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/policy/CronPolicy.java
new file mode 100644
index 0000000..0774789
--- /dev/null
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/policy/CronPolicy.java
@@ -0,0 +1,75 @@
+package com.github.mmolimar.kafka.connect.fs.policy;
+
+import com.cronutils.model.CronType;
+import com.cronutils.model.definition.CronDefinitionBuilder;
+import com.cronutils.model.time.ExecutionTime;
+import com.cronutils.parser.CronParser;
+import com.github.mmolimar.kafka.connect.fs.FsSourceTaskConfig;
+import org.apache.kafka.common.config.ConfigException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.time.DateTimeException;
+import java.time.LocalDateTime;
+import java.time.ZoneId;
+import java.time.ZonedDateTime;
+import java.util.Date;
+import java.util.Map;
+
+public class CronPolicy extends AbstractPolicy {
+
+    private static final Logger log = LoggerFactory.getLogger(CronPolicy.class);
+
+    private static final String CRON_POLICY_PREFIX = FsSourceTaskConfig.POLICY_PREFIX + "cron.";
+
+    public static final String CRON_POLICY_EXPRESSION = CRON_POLICY_PREFIX + "expression";
+    public static final String CRON_POLICY_END_DATE = CRON_POLICY_PREFIX + "end_date";
+
+    private ExecutionTime executionTime;
+    private Date endDate;
+
+    public CronPolicy(FsSourceTaskConfig conf) throws IOException {
+        super(conf);
+    }
+
+    @Override
+    protected void configPolicy(Map<String, Object> customConfigs) {
+        try {
+            if (customConfigs.get(CRON_POLICY_END_DATE) != null &&
+                    !customConfigs.get(CRON_POLICY_END_DATE).toString().equals("")) {
+                endDate = Date.from(LocalDateTime.parse(customConfigs.get(CRON_POLICY_END_DATE).toString().trim())
+                        .atZone(ZoneId.systemDefault()).toInstant());
+            }
+            executionTime = ExecutionTime.forCron(
+                    new CronParser(CronDefinitionBuilder.instanceDefinitionFor(CronType.QUARTZ))
+                            .parse(customConfigs.get(CRON_POLICY_EXPRESSION).toString())
+            );
+        } catch (DateTimeException dte) {
+            throw new ConfigException(CRON_POLICY_END_DATE + " property must have a proper value. Got: '" +
+                    customConfigs.get(CRON_POLICY_END_DATE) + "'.");
+        } catch (IllegalArgumentException iae) {
+            throw new ConfigException(CRON_POLICY_EXPRESSION + " property must have a proper value. Got: '" +
+                    customConfigs.get(CRON_POLICY_EXPRESSION) + "'.");
+        }
+    }
+
+    @Override
+    protected void preCheck() {
+        executionTime.timeToNextExecution(ZonedDateTime.now())
+                .ifPresent(next -> {
+                    try {
+                        Thread.sleep(next.toMillis());
+                    } catch (InterruptedException ie) {
+                        log.warn("An interrupted exception has occurred.", ie);
+                    }
+                });
+    }
+
+    @Override
+    protected boolean isPolicyCompleted() {
+        return (endDate != null &&
+                endDate.before(Date.from(LocalDateTime.now().atZone(ZoneId.systemDefault()).toInstant()))) ||
+                !executionTime.timeToNextExecution(ZonedDateTime.now()).isPresent();
+    }
+}
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/PolicyTestBase.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/PolicyTestBase.java
index 8c9eba9..4a1aa42 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/PolicyTestBase.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/PolicyTestBase.java
@@ -129,12 +129,6 @@ public void recursiveDirectory() throws IOException, InterruptedException {
         assertFalse(it.hasNext());
     }
 
-    @Test
-    public void hasEnded() throws IOException {
-        policy.execute();
-        assertTrue(policy.hasEnded());
-    }
-
     @Test
     public void execPolicyAlreadyEnded() throws IOException {
         policy.execute();
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/hdfs/CronPolicyTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/hdfs/CronPolicyTest.java
new file mode 100644
index 0000000..26d20d1
--- /dev/null
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/hdfs/CronPolicyTest.java
@@ -0,0 +1,90 @@
+package com.github.mmolimar.kafka.connect.fs.policy.hdfs;
+
+import com.github.mmolimar.kafka.connect.fs.FsSourceTaskConfig;
+import com.github.mmolimar.kafka.connect.fs.file.reader.TextFileReader;
+import com.github.mmolimar.kafka.connect.fs.policy.CronPolicy;
+import com.github.mmolimar.kafka.connect.fs.policy.Policy;
+import com.github.mmolimar.kafka.connect.fs.util.ReflectionUtils;
+import org.apache.hadoop.fs.Path;
+import org.apache.kafka.common.config.ConfigException;
+import org.apache.kafka.connect.errors.IllegalWorkerStateException;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
+
+import java.io.IOException;
+import java.time.LocalDateTime;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.UUID;
+
+import static org.junit.jupiter.api.Assertions.*;
+
+public class CronPolicyTest extends HdfsPolicyTestBase {
+
+    @BeforeAll
+    public static void setUp() throws IOException {
+        directories = new ArrayList<Path>() {{
+            add(new Path(fsUri.toString(), UUID.randomUUID().toString()));
+            add(new Path(fsUri.toString(), UUID.randomUUID().toString()));
+        }};
+        for (Path dir : directories) {
+            fs.mkdirs(dir);
+        }
+
+        Map<String, String> cfg = new HashMap<String, String>() {{
+            String[] uris = directories.stream().map(Path::toString)
+                    .toArray(String[]::new);
+            put(FsSourceTaskConfig.FS_URIS, String.join(",", uris));
+            put(FsSourceTaskConfig.TOPIC, "topic_test");
+            put(FsSourceTaskConfig.POLICY_CLASS, CronPolicy.class.getName());
+            put(FsSourceTaskConfig.FILE_READER_CLASS, TextFileReader.class.getName());
+            put(FsSourceTaskConfig.POLICY_REGEXP, "^[0-9]*\\.txt$");
+            put(FsSourceTaskConfig.POLICY_PREFIX_FS + "dfs.data.dir", "test");
+            put(FsSourceTaskConfig.POLICY_PREFIX_FS + "fs.default.name", "hdfs://test");
+            put(CronPolicy.CRON_POLICY_EXPRESSION, "0/2 * * * * ?");
+            put(CronPolicy.CRON_POLICY_END_DATE, LocalDateTime.now().plusDays(1).toString());
+        }};
+        taskConfig = new FsSourceTaskConfig(cfg);
+    }
+
+    @Test
+    @Override
+    public void execPolicyAlreadyEnded() throws IOException {
+        policy.execute();
+        policy.interrupt();
+        assertTrue(policy.hasEnded());
+        assertThrows(IllegalWorkerStateException.class, () -> policy.execute());
+    }
+
+    @Test
+    public void invalidCronExpression() {
+        Map<String, String> originals = taskConfig.originalsStrings();
+        originals.put(CronPolicy.CRON_POLICY_EXPRESSION, "invalid");
+        FsSourceTaskConfig cfg = new FsSourceTaskConfig(originals);
+        assertThrows(ConfigException.class, () -> ReflectionUtils.makePolicy(
+                (Class<? extends Policy>) taskConfig.getClass(FsSourceTaskConfig.POLICY_CLASS), cfg));
+    }
+
+    @Test
+    public void invalidEndDate() {
+        Map<String, String> originals = taskConfig.originalsStrings();
+        originals.put(CronPolicy.CRON_POLICY_END_DATE, "invalid");
+        FsSourceTaskConfig cfg = new FsSourceTaskConfig(originals);
+        assertThrows(ConfigException.class, () -> ReflectionUtils.makePolicy(
+                (Class<? extends Policy>) taskConfig.getClass(FsSourceTaskConfig.POLICY_CLASS), cfg));
+    }
+
+    @Test
+    public void canBeInterrupted() throws Throwable {
+        policy = ReflectionUtils.makePolicy(
+                (Class<? extends Policy>) taskConfig.getClass(FsSourceTaskConfig.POLICY_CLASS), taskConfig);
+
+        for (int i = 0; i < 5; i++) {
+            assertFalse(policy.hasEnded());
+            policy.execute();
+        }
+        policy.interrupt();
+        assertTrue(policy.hasEnded());
+    }
+}
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/local/CronPolicyTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/local/CronPolicyTest.java
new file mode 100644
index 0000000..f054371
--- /dev/null
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/local/CronPolicyTest.java
@@ -0,0 +1,90 @@
+package com.github.mmolimar.kafka.connect.fs.policy.local;
+
+import com.github.mmolimar.kafka.connect.fs.FsSourceTaskConfig;
+import com.github.mmolimar.kafka.connect.fs.file.reader.TextFileReader;
+import com.github.mmolimar.kafka.connect.fs.policy.CronPolicy;
+import com.github.mmolimar.kafka.connect.fs.policy.Policy;
+import com.github.mmolimar.kafka.connect.fs.util.ReflectionUtils;
+import org.apache.hadoop.fs.Path;
+import org.apache.kafka.common.config.ConfigException;
+import org.apache.kafka.connect.errors.IllegalWorkerStateException;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
+
+import java.io.IOException;
+import java.time.LocalDateTime;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.UUID;
+
+import static org.junit.jupiter.api.Assertions.*;
+
+public class CronPolicyTest extends LocalPolicyTestBase {
+
+    @BeforeAll
+    public static void setUp() throws IOException {
+        directories = new ArrayList<Path>() {{
+            add(new Path(fsUri.toString(), UUID.randomUUID().toString()));
+            add(new Path(fsUri.toString(), UUID.randomUUID().toString()));
+        }};
+        for (Path dir : directories) {
+            fs.mkdirs(dir);
+        }
+
+        Map<String, String> cfg = new HashMap<String, String>() {{
+            String[] uris = directories.stream().map(Path::toString)
+                    .toArray(String[]::new);
+            put(FsSourceTaskConfig.FS_URIS, String.join(",", uris));
+            put(FsSourceTaskConfig.TOPIC, "topic_test");
+            put(FsSourceTaskConfig.POLICY_CLASS, CronPolicy.class.getName());
+            put(FsSourceTaskConfig.FILE_READER_CLASS, TextFileReader.class.getName());
+            put(FsSourceTaskConfig.POLICY_REGEXP, "^[0-9]*\\.txt$");
+            put(FsSourceTaskConfig.POLICY_PREFIX_FS + "dfs.data.dir", "test");
+            put(FsSourceTaskConfig.POLICY_PREFIX_FS + "fs.default.name", "hdfs://test");
+            put(CronPolicy.CRON_POLICY_EXPRESSION, "0/2 * * * * ?");
+            put(CronPolicy.CRON_POLICY_END_DATE, LocalDateTime.now().plusDays(1).toString());
+        }};
+        taskConfig = new FsSourceTaskConfig(cfg);
+    }
+
+    @Test
+    @Override
+    public void execPolicyAlreadyEnded() throws IOException {
+        policy.execute();
+        policy.interrupt();
+        assertTrue(policy.hasEnded());
+        assertThrows(IllegalWorkerStateException.class, () -> policy.execute());
+    }
+
+    @Test
+    public void invalidCronExpression() {
+        Map<String, String> originals = taskConfig.originalsStrings();
+        originals.put(CronPolicy.CRON_POLICY_EXPRESSION, "invalid");
+        FsSourceTaskConfig cfg = new FsSourceTaskConfig(originals);
+        assertThrows(ConfigException.class, () -> ReflectionUtils.makePolicy(
+                (Class<? extends Policy>) taskConfig.getClass(FsSourceTaskConfig.POLICY_CLASS), cfg));
+    }
+
+    @Test
+    public void invalidEndDate() {
+        Map<String, String> originals = taskConfig.originalsStrings();
+        originals.put(CronPolicy.CRON_POLICY_END_DATE, "invalid");
+        FsSourceTaskConfig cfg = new FsSourceTaskConfig(originals);
+        assertThrows(ConfigException.class, () -> ReflectionUtils.makePolicy(
+                (Class<? extends Policy>) taskConfig.getClass(FsSourceTaskConfig.POLICY_CLASS), cfg));
+    }
+
+    @Test
+    public void canBeInterrupted() throws Throwable {
+        policy = ReflectionUtils.makePolicy(
+                (Class<? extends Policy>) taskConfig.getClass(FsSourceTaskConfig.POLICY_CLASS), taskConfig);
+
+        for (int i = 0; i < 5; i++) {
+            assertFalse(policy.hasEnded());
+            policy.execute();
+        }
+        policy.interrupt();
+        assertTrue(policy.hasEnded());
+    }
+}

From 21bd7616145fa831158070dc80916fa0ae047a4c Mon Sep 17 00:00:00 2001
From: Mario Molina <mmolimar@gmail.com>
Date: Sun, 8 Mar 2020 14:39:35 +0100
Subject: [PATCH 16/51] Fix in HDFS file watcher policy to avoid events for
 files which are being copied

---
 .../fs/policy/HdfsFileWatcherPolicy.java        | 17 ++++++++++++++---
 .../policy/hdfs/HdfsFileWatcherPolicyTest.java  | 10 ----------
 2 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/policy/HdfsFileWatcherPolicy.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/policy/HdfsFileWatcherPolicy.java
index dd558d6..a6505a3 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/policy/HdfsFileWatcherPolicy.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/policy/HdfsFileWatcherPolicy.java
@@ -112,13 +112,24 @@ public void run() {
                     for (Event event : batch.getEvents()) {
                         switch (event.getEventType()) {
                             case CREATE:
-                                enqueue(((Event.CreateEvent) event).getPath());
+                                if (!((Event.CreateEvent) event).getPath().endsWith("._COPYING_")) {
+                                    enqueue(((Event.CreateEvent) event).getPath());
+                                }
                                 break;
                             case APPEND:
-                                enqueue(((Event.AppendEvent) event).getPath());
+                                if (!((Event.AppendEvent) event).getPath().endsWith("._COPYING_")) {
+                                    enqueue(((Event.AppendEvent) event).getPath());
+                                }
+                                break;
+                            case RENAME:
+                                if (((Event.RenameEvent) event).getSrcPath().endsWith("._COPYING_")) {
+                                    enqueue(((Event.RenameEvent) event).getDstPath());
+                                }
                                 break;
                             case CLOSE:
-                                enqueue(((Event.CloseEvent) event).getPath());
+                                if (!((Event.CloseEvent) event).getPath().endsWith("._COPYING_")) {
+                                    enqueue(((Event.CloseEvent) event).getPath());
+                                }
                                 break;
                             default:
                                 break;
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/hdfs/HdfsFileWatcherPolicyTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/hdfs/HdfsFileWatcherPolicyTest.java
index dca39be..ec68d68 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/hdfs/HdfsFileWatcherPolicyTest.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/hdfs/HdfsFileWatcherPolicyTest.java
@@ -58,16 +58,6 @@ public void invalidDirectory() throws IOException {
         }
     }
 
-    //This policy never ends at least all watchers die
-    @Test
-    @Override
-    public void hasEnded() throws IOException {
-        policy.execute();
-        assertFalse(policy.hasEnded());
-        policy.interrupt();
-        assertTrue(policy.hasEnded());
-    }
-
     //This policy never ends. We have to interrupt it
     @Test
     @Override

From e5102e5fe803c8b518619359f5bbfe66dcd7f213 Mon Sep 17 00:00:00 2001
From: Mario Molina <mmolimar@gmail.com>
Date: Sun, 8 Mar 2020 14:41:33 +0100
Subject: [PATCH 17/51] Adding Cron policy to docs

---
 docs/source/config_options.rst | 65 ++++++++++++++++++++++------------
 docs/source/policies.rst       | 10 ++++++
 2 files changed, 53 insertions(+), 22 deletions(-)

diff --git a/docs/source/config_options.rst b/docs/source/config_options.rst
index 962f572..bc89498 100644
--- a/docs/source/config_options.rst
+++ b/docs/source/config_options.rst
@@ -75,7 +75,7 @@ General config properties for this connector.
   Flag to activate traversed recursion in subdirectories when listing files.
 
   * Type: boolean
-  * Default: false
+  * Default: ``false``
   * Importance: medium
 
 ``policy.regexp``
@@ -142,7 +142,7 @@ In order to configure custom properties for this policy, the name you must use i
   Sleep fraction to divide the sleep time to allow interrupting the policy faster.
 
   * Type: long
-  * Default: 10
+  * Default: ``10``
   * Importance: medium
 
 ``policy.sleepy.max_execs``
@@ -150,7 +150,28 @@ In order to configure custom properties for this policy, the name you must use i
   An execution represents: listing files from the FS and its corresponding sleep time.
 
   * Type: long
-  * Default: -1
+  * Default: ``-1``
+  * Importance: medium
+
+.. _config_options-policies-cron:
+
+Cron
+--------------------------------------------
+
+In order to configure custom properties for this policy, the name you must use is ``cron``.
+
+``policy.cron.expression``
+  Cron expression to schedule the policy.
+
+  * Type: string
+  * Importance: high
+
+``policy.cron.end_date``
+  End date to finish the policy with `ISO date-time <https://docs.oracle.com/javase/8/docs/api/java/time/format/DateTimeFormatter.html#ISO_LOCAL_DATE_TIME>`__
+  format.
+
+  * Type: date
+  * Default: ``null``
   * Importance: medium
 
 .. _config_options-policies-hdfs:
@@ -212,21 +233,21 @@ In order to configure custom properties for this reader, the name you must use i
   Custom field name for the output key to include in the Kafka message.
 
   * Type: string
-  * Default: key
+  * Default: ``key``
   * Importance: medium
 
 ``file_reader.sequence.field_name.value``
   Custom field name for the output value to include in the Kafka message.
 
   * Type: string
-  * Default: value
+  * Default: ``value``
   * Importance: medium
 
 ``file_reader.sequence.buffer_size``
   Custom buffer size to read data from the Sequence file.
 
   * Type: int
-  * Default: 4096
+  * Default: ``4096``
   * Importance: low
 
 .. _config_options-filereaders-json:
@@ -241,7 +262,7 @@ To configure custom properties for this reader, the name you must use is ``json`
   content of the file as a record.
 
   * Type: boolean
-  * Default: true
+  * Default: ``true``
   * Importance: medium
 
 ``file_reader.json.deserialization.<deserialization_feature>``
@@ -255,7 +276,7 @@ To configure custom properties for this reader, the name you must use is ``json`
   Compression type to use when reading a file.
 
   * Type: enum (available values ``bzip2``, ``gzip`` and ``none``)
-  * Default: none
+  * Default: ``none``
   * Importance: medium
 
 ``file_reader.json.compression.concatenated``
@@ -263,7 +284,7 @@ To configure custom properties for this reader, the name you must use is ``json`
   the first compressed stream.
 
   * Type: boolean
-  * Default: true
+  * Default: ``true``
   * Importance: low
 
 ``file_reader.json.encoding``
@@ -284,14 +305,14 @@ To configure custom properties for this reader, the name you must use is ``text`
   content of the file as a record.
 
   * Type: boolean
-  * Default: true
+  * Default: ``true``
   * Importance: medium
 
 ``file_reader.json.compression.type``
   Compression type to use when reading a file.
 
   * Type: enum (available values ``bzip2``, ``gzip`` and ``none``)
-  * Default: none
+  * Default: ``none``
   * Importance: medium
 
 ``file_reader.json.compression.concatenated``
@@ -299,14 +320,14 @@ To configure custom properties for this reader, the name you must use is ``text`
   the first compressed stream.
 
   * Type: boolean
-  * Default: true
+  * Default: ``true``
   * Importance: low
 
 ``file_reader.text.field_name.value``
   Custom field name for the output value to include in the Kafka message.
 
   * Type: string
-  * Default: value
+  * Default: ``value``
   * Importance: low
 
 ``file_reader.text.encoding``
@@ -332,7 +353,7 @@ To configure custom properties for this reader, the name you must use is ``delim
   If the file contains header or not.
 
   * Type: boolean
-  * Default: false
+  * Default: ``false``
   * Importance: medium
 
 ``file_reader.json.record_per_line``
@@ -340,7 +361,7 @@ To configure custom properties for this reader, the name you must use is ``delim
   content of the file as a record.
 
   * Type: boolean
-  * Default: true
+  * Default: ``true``
   * Importance: medium
 
 ``file_reader.delimited.default_value``
@@ -355,7 +376,7 @@ To configure custom properties for this reader, the name you must use is ``delim
   Compression type to use when reading a file.
 
   * Type: enum (available values ``bzip2``, ``gzip`` and ``none``)
-  * Default: none
+  * Default: ``none``
   * Importance: medium
 
 ``file_reader.json.compression.concatenated``
@@ -363,7 +384,7 @@ To configure custom properties for this reader, the name you must use is ``delim
   the first compressed stream.
 
   * Type: boolean
-  * Default: true
+  * Default: ``true``
   * Importance: low
 
 ``file_reader.delimited.encoding``
@@ -381,35 +402,35 @@ To configure custom properties for this reader, the name you must use is ``agnos
   A comma-separated string list with the accepted extensions for Parquet files.
 
   * Type: string
-  * Default: parquet
+  * Default: ``parquet``
   * Importance: medium
 
 ``file_reader.agnostic.extensions.avro``
   A comma-separated string list with the accepted extensions for Avro files.
 
   * Type: string
-  * Default: avro
+  * Default: ``avro``
   * Importance: medium
 
 ``file_reader.agnostic.extensions.sequence``
   A comma-separated string list with the accepted extensions for Sequence files.
 
   * Type: string
-  * Default: seq
+  * Default: ``seq``
   * Importance: medium
 
 ``file_reader.agnostic.extensions.json``
   A comma-separated string list with the accepted extensions for JSON files.
 
   * Type: string
-  * Default: json
+  * Default: ``json``
   * Importance: medium
 
 ``file_reader.agnostic.extensions.delimited``
   A comma-separated string list with the accepted extensions for Delimited text files.
 
   * Type: string
-  * Default: tsv,csv
+  * Default: ``tsv,csv``
   * Importance: medium
 
 .. note:: The Agnostic reader uses the previous ones as inner readers. So, in case of using this
diff --git a/docs/source/policies.rst b/docs/source/policies.rst
index 2e496be..dc0f607 100644
--- a/docs/source/policies.rst
+++ b/docs/source/policies.rst
@@ -14,6 +14,16 @@ and wait for the next one. Additionally, its custom properties allow to end it.
 
 You can learn more about the properties of this policy :ref:`here<config_options-policies-sleepy>`.
 
+Cron
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+This policy is scheduled based on cron expressions and their format to put in the configuration
+are based on the library `Quartz Scheduler <https://www.quartz-scheduler.org>`__
+
+After finishing each execution, the policy gets slept until the next one is scheduled, if applicable.
+
+You can learn more about the properties of this policy :ref:`here<config_options-policies-cron>`.
+
 HDFS file watcher
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 

From f0f105a9439903fd65b3b35c70daf1867995fdb0 Mon Sep 17 00:00:00 2001
From: Mario Molina <mmolimar@gmail.com>
Date: Mon, 9 Mar 2020 05:17:29 +0100
Subject: [PATCH 18/51] Fix offset indexes in readers

---
 .../connect/fs/file/reader/AvroFileReader.java |  2 +-
 .../file/reader/DelimitedTextFileReader.java   |  6 +-----
 .../fs/file/reader/ParquetFileReader.java      |  4 ++--
 .../fs/file/reader/SequenceFileReader.java     |  4 ++--
 .../connect/fs/file/reader/TextFileReader.java | 10 ++++++----
 .../fs/file/reader/FileReaderTestBase.java     |  6 +++---
 .../hdfs/DelimitedTextFileReaderTest.java      | 18 ++++++++++--------
 .../local/DelimitedTextFileReaderTest.java     |  8 ++++----
 8 files changed, 29 insertions(+), 29 deletions(-)

diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/AvroFileReader.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/AvroFileReader.java
index 040d837..6dd8d8a 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/AvroFileReader.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/AvroFileReader.java
@@ -73,7 +73,7 @@ protected GenericRecord nextRecord() {
     public void seek(Offset offset) {
         try {
             reader.sync(offset.getRecordOffset());
-            this.offset.setOffset(reader.previousSync() - 15);
+            this.offset.setOffset(reader.previousSync() - 16);
         } catch (IOException ioe) {
             throw new ConnectException("Error seeking file " + getFilePath(), ioe);
         }
diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/DelimitedTextFileReader.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/DelimitedTextFileReader.java
index 0cae141..19fd83f 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/DelimitedTextFileReader.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/DelimitedTextFileReader.java
@@ -49,11 +49,7 @@ public DelimitedTextFileReader(FileSystem fs, Path filePath, Map<String, Object>
                 String columnName = hasHeader ? columns[index] : DEFAULT_COLUMN_NAME + "_" + ++index;
                 schemaBuilder.field(columnName, SchemaBuilder.STRING_SCHEMA);
             });
-
-            if (!hasHeader) {
-                //back to the first line
-                inner.seek(this.offset);
-            }
+            inner.seek(this.offset);
         }
         this.schema = schemaBuilder.build();
     }
diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/ParquetFileReader.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/ParquetFileReader.java
index 8e5fd33..cf12483 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/ParquetFileReader.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/ParquetFileReader.java
@@ -74,7 +74,6 @@ public boolean hasNext() {
         if (currentRecord == null) {
             try {
                 currentRecord = reader.read();
-                if (currentRecord != null) offset.inc();
             } catch (IOException ioe) {
                 throw new ConnectException("Error reading parquet record", ioe);
             }
@@ -95,6 +94,7 @@ record = new GenericData.Record(this.projection);
             record = currentRecord;
         }
         currentRecord = null;
+        offset.inc();
         return record;
     }
 
@@ -115,7 +115,7 @@ public void seek(Offset offset) {
                 throw new ConnectException("Error initializing parquet reader", ioe);
             }
         }
-        while (hasNext() && this.offset.getRecordOffset() <= offset.getRecordOffset()) {
+        while (hasNext() && this.offset.getRecordOffset() < offset.getRecordOffset()) {
             nextRecord();
         }
     }
diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/SequenceFileReader.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/SequenceFileReader.java
index 595c340..15a9d8f 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/SequenceFileReader.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/SequenceFileReader.java
@@ -91,7 +91,7 @@ public boolean hasNext() {
             if (hasNextIndex == -1 || hasNextIndex == recordIndex) {
                 hasNextIndex++;
                 offset.inc();
-                return hasNext = reader.next(key, value);
+                hasNext = reader.next(key, value);
             }
             return hasNext;
         } catch (EOFException eofe) {
@@ -119,7 +119,7 @@ public void seek(Offset offset) {
             reader.sync(offset.getRecordOffset());
             hasNextIndex = recordIndex = offset.getRecordOffset();
             hasNext = false;
-            this.offset.setOffset(offset.getRecordOffset());
+            this.offset.setOffset(offset.getRecordOffset() - 1);
         } catch (IOException ioe) {
             throw new ConnectException("Error seeking file " + getFilePath(), ioe);
         }
diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/TextFileReader.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/TextFileReader.java
index 28f5c48..bb3d634 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/TextFileReader.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/TextFileReader.java
@@ -112,14 +112,12 @@ public boolean hasNext() {
             try {
                 if (!recordPerLine) {
                     List<String> lines = new BufferedReader(reader).lines().collect(Collectors.toList());
-                    offset.setOffset(lines.size() - 1);
                     current = String.join("\n", lines);
                     finished = true;
                     return true;
                 }
                 for (; ; ) {
                     String line = reader.readLine();
-                    offset.setOffset(reader.getLineNumber());
                     if (line == null) {
                         finished = true;
                         return false;
@@ -140,7 +138,7 @@ protected TextRecord nextRecord() {
         }
         String aux = current;
         current = null;
-
+        offset.inc();
         return new TextRecord(schema, aux);
     }
 
@@ -159,7 +157,7 @@ public void seek(Offset offset) {
             while (reader.getLineNumber() < offset.getRecordOffset()) {
                 reader.readLine();
             }
-            this.offset.setOffset(reader.getLineNumber() + 1);
+            this.offset.setOffset(reader.getLineNumber());
         } catch (IOException ioe) {
             throw new ConnectException("Error seeking file " + getFilePath(), ioe);
         }
@@ -186,6 +184,10 @@ public void setOffset(long offset) {
             this.offset = offset;
         }
 
+        void inc() {
+            this.offset++;
+        }
+
         @Override
         public long getRecordOffset() {
             return offset;
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/FileReaderTestBase.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/FileReaderTestBase.java
index c8eec79..d9cc9f4 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/FileReaderTestBase.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/FileReaderTestBase.java
@@ -105,19 +105,19 @@ public void seekFile() {
         int recordIndex = NUM_RECORDS / 2;
         reader.seek(getOffset(OFFSETS_BY_INDEX.get(recordIndex)));
         assertTrue(reader.hasNext());
-        assertEquals(OFFSETS_BY_INDEX.get(recordIndex) + 1, reader.currentOffset().getRecordOffset());
+        assertEquals(OFFSETS_BY_INDEX.get(recordIndex), reader.currentOffset().getRecordOffset());
         checkData(reader.next(), recordIndex);
 
         recordIndex = 0;
         reader.seek(getOffset(OFFSETS_BY_INDEX.get(recordIndex)));
         assertTrue(reader.hasNext());
-        assertEquals(OFFSETS_BY_INDEX.get(recordIndex) + 1, reader.currentOffset().getRecordOffset());
+        assertEquals(OFFSETS_BY_INDEX.get(recordIndex), reader.currentOffset().getRecordOffset());
         checkData(reader.next(), recordIndex);
 
         recordIndex = NUM_RECORDS - 3;
         reader.seek(getOffset(OFFSETS_BY_INDEX.get(recordIndex)));
         assertTrue(reader.hasNext());
-        assertEquals(OFFSETS_BY_INDEX.get(recordIndex) + 1, reader.currentOffset().getRecordOffset());
+        assertEquals(OFFSETS_BY_INDEX.get(recordIndex), reader.currentOffset().getRecordOffset());
         checkData(reader.next(), recordIndex);
 
         reader.seek(getOffset(OFFSETS_BY_INDEX.get(NUM_RECORDS - 1) + 1));
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/DelimitedTextFileReaderTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/DelimitedTextFileReaderTest.java
index f4b6c92..7914f12 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/DelimitedTextFileReaderTest.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/DelimitedTextFileReaderTest.java
@@ -50,7 +50,7 @@ private static Path createDataFile(boolean header) throws IOException {
                 String value = String.format("%d_%s", index, UUID.randomUUID());
                 try {
                     writer.append(value + "," + value + "," + value + "," + value + "\n");
-                    if (header) OFFSETS_BY_INDEX.put(index, (long) index);
+                    OFFSETS_BY_INDEX.put(index, (long) index);
                 } catch (IOException ioe) {
                     throw new RuntimeException(ioe);
                 }
@@ -132,10 +132,12 @@ public void readAllDataWithMalformedRows() throws Throwable {
         int recordCount = 0;
         while (reader.hasNext()) {
             Struct record = reader.next();
-            assertEquals("dummy", record.get(FIELD_COLUMN1));
-            assertEquals("custom_value", record.get(FIELD_COLUMN2));
-            assertEquals("custom_value", record.get(FIELD_COLUMN3));
-            assertEquals("custom_value", record.get(FIELD_COLUMN4));
+            assertAll(
+                    () -> assertEquals("dummy", record.get(FIELD_COLUMN1)),
+                    () -> assertEquals("custom_value", record.get(FIELD_COLUMN2)),
+                    () -> assertEquals("custom_value", record.get(FIELD_COLUMN3)),
+                    () -> assertEquals("custom_value", record.get(FIELD_COLUMN4))
+            );
             recordCount++;
         }
         assertEquals(2, recordCount, () -> "The number of records in the file does not match");
@@ -154,19 +156,19 @@ public void seekFileWithoutHeader() throws Throwable {
         int recordIndex = NUM_RECORDS / 2;
         reader.seek(getOffset(OFFSETS_BY_INDEX.get(recordIndex), false));
         assertTrue(reader.hasNext());
-        assertEquals(OFFSETS_BY_INDEX.get(recordIndex) + 1, reader.currentOffset().getRecordOffset());
+        assertEquals(OFFSETS_BY_INDEX.get(recordIndex), reader.currentOffset().getRecordOffset());
         checkData(reader.next(), recordIndex);
 
         recordIndex = 0;
         reader.seek(getOffset(OFFSETS_BY_INDEX.get(recordIndex), false));
         assertTrue(reader.hasNext());
-        assertEquals(OFFSETS_BY_INDEX.get(recordIndex) + 1, reader.currentOffset().getRecordOffset());
+        assertEquals(OFFSETS_BY_INDEX.get(recordIndex), reader.currentOffset().getRecordOffset());
         checkData(reader.next(), recordIndex);
 
         recordIndex = NUM_RECORDS - 3;
         reader.seek(getOffset(OFFSETS_BY_INDEX.get(recordIndex), false));
         assertTrue(reader.hasNext());
-        assertEquals(OFFSETS_BY_INDEX.get(recordIndex) + 1, reader.currentOffset().getRecordOffset());
+        assertEquals(OFFSETS_BY_INDEX.get(recordIndex), reader.currentOffset().getRecordOffset());
         checkData(reader.next(), recordIndex);
 
         reader.seek(getOffset(OFFSETS_BY_INDEX.get(NUM_RECORDS - 1) + 1, false));
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/DelimitedTextFileReaderTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/DelimitedTextFileReaderTest.java
index 91f08e9..e8413ad 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/DelimitedTextFileReaderTest.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/DelimitedTextFileReaderTest.java
@@ -51,7 +51,7 @@ private static Path createDataFile(boolean header) throws IOException {
                 String value = String.format("%d_%s", index, UUID.randomUUID());
                 try {
                     writer.append(value + "," + value + "," + value + "," + value + "\n");
-                    if (header) OFFSETS_BY_INDEX.put(index, (long) index);
+                    OFFSETS_BY_INDEX.put(index, (long) index);
                 } catch (IOException ioe) {
                     throw new RuntimeException(ioe);
                 }
@@ -163,19 +163,19 @@ public void seekFileWithoutHeader() throws Throwable {
         int recordIndex = NUM_RECORDS / 2;
         reader.seek(getOffset(OFFSETS_BY_INDEX.get(recordIndex), false));
         assertTrue(reader.hasNext());
-        assertEquals(OFFSETS_BY_INDEX.get(recordIndex) + 1, reader.currentOffset().getRecordOffset());
+        assertEquals(OFFSETS_BY_INDEX.get(recordIndex), reader.currentOffset().getRecordOffset());
         checkData(reader.next(), recordIndex);
 
         recordIndex = 0;
         reader.seek(getOffset(OFFSETS_BY_INDEX.get(recordIndex), false));
         assertTrue(reader.hasNext());
-        assertEquals(OFFSETS_BY_INDEX.get(recordIndex) + 1, reader.currentOffset().getRecordOffset());
+        assertEquals(OFFSETS_BY_INDEX.get(recordIndex), reader.currentOffset().getRecordOffset());
         checkData(reader.next(), recordIndex);
 
         recordIndex = NUM_RECORDS - 3;
         reader.seek(getOffset(OFFSETS_BY_INDEX.get(recordIndex), false));
         assertTrue(reader.hasNext());
-        assertEquals(OFFSETS_BY_INDEX.get(recordIndex) + 1, reader.currentOffset().getRecordOffset());
+        assertEquals(OFFSETS_BY_INDEX.get(recordIndex), reader.currentOffset().getRecordOffset());
         checkData(reader.next(), recordIndex);
 
         reader.seek(getOffset(OFFSETS_BY_INDEX.get(NUM_RECORDS - 1) + 1, false));

From 1a11a893b02f72b7c440e2adf5a345c4dad8f680 Mon Sep 17 00:00:00 2001
From: Mario Molina <mmolimar@gmail.com>
Date: Thu, 12 Mar 2020 20:15:40 -0600
Subject: [PATCH 19/51] Compression support for text-based file readers

---
 .../kafka/connect/fs/FsSourceConnector.java   |  1 -
 .../fs/file/reader/AbstractFileReader.java    |  9 +++++---
 .../fs/file/reader/AvroFileReader.java        |  1 +
 .../fs/file/reader/CompressionType.java       | 23 +++++++++++++++++++
 .../fs/file/reader/JsonFileReader.java        |  5 ++++
 .../fs/file/reader/TextFileReader.java        | 22 ------------------
 .../file/reader/hdfs/TextFileReaderTest.java  |  9 ++++----
 .../file/reader/local/TextFileReaderTest.java | 10 ++++----
 8 files changed, 46 insertions(+), 34 deletions(-)
 create mode 100644 src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/CompressionType.java

diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/FsSourceConnector.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/FsSourceConnector.java
index 0d4ad3e..0316acd 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/FsSourceConnector.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/FsSourceConnector.java
@@ -31,7 +31,6 @@ public void start(Map<String, String> properties) {
         log.info("Starting FsSourceConnector...");
         try {
             config = new FsSourceConnectorConfig(properties);
-
         } catch (ConfigException ce) {
             log.error("Couldn't start FsSourceConnector:", ce);
             throw new ConnectException("Couldn't start FsSourceConnector due to configuration error.", ce);
diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/AbstractFileReader.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/AbstractFileReader.java
index 180b0e2..dae25af 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/AbstractFileReader.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/AbstractFileReader.java
@@ -16,7 +16,7 @@ public abstract class AbstractFileReader<T> implements FileReader {
 
     private final FileSystem fs;
     private final Path filePath;
-    private ReaderAdapter<T> adapter;
+    private final ReaderAdapter<T> adapter;
 
     public AbstractFileReader(FileSystem fs, Path filePath, ReaderAdapter<T> adapter, Map<String, Object> config) {
         if (fs == null || filePath == null) {
@@ -26,11 +26,14 @@ public AbstractFileReader(FileSystem fs, Path filePath, ReaderAdapter<T> adapter
         this.filePath = filePath;
         this.adapter = adapter;
 
-        Map<String, String> readerConf = config.entrySet().stream()
+        configure(readerConfig(config));
+    }
+
+    protected final Map<String, String> readerConfig(Map<String, Object> config) {
+        return config.entrySet().stream()
                 .filter(entry -> entry.getKey().startsWith(FILE_READER_PREFIX))
                 .filter(entry -> entry.getValue() != null)
                 .collect(Collectors.toMap(Map.Entry::getKey, entry -> entry.getValue().toString()));
-        configure(readerConf);
     }
 
     protected abstract void configure(Map<String, String> config);
diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/AvroFileReader.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/AvroFileReader.java
index 6dd8d8a..2438f51 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/AvroFileReader.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/AvroFileReader.java
@@ -42,6 +42,7 @@ public AvroFileReader(FileSystem fs, Path filePath, Map<String, Object> config)
         this.offset = new AvroOffset(0);
     }
 
+    @Override
     protected void configure(Map<String, String> config) {
         this.schema = Optional.ofNullable(config.get(FILE_READER_AVRO_SCHEMA))
                 .map(c -> new Schema.Parser().parse(c))
diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/CompressionType.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/CompressionType.java
new file mode 100644
index 0000000..9dade35
--- /dev/null
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/CompressionType.java
@@ -0,0 +1,23 @@
+package com.github.mmolimar.kafka.connect.fs.file.reader;
+
+public enum CompressionType {
+    BZIP2,
+    GZIP,
+    NONE;
+
+    private boolean concatenated;
+
+    CompressionType() {
+        this.concatenated = true;
+    }
+
+    public boolean isConcatenated() {
+        return concatenated;
+    }
+
+    public static CompressionType fromName(String compression, boolean concatenated) {
+        CompressionType ct = CompressionType.valueOf(compression.trim().toUpperCase());
+        ct.concatenated = concatenated;
+        return ct;
+    }
+}
diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/JsonFileReader.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/JsonFileReader.java
index 3e8f411..cf26a34 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/JsonFileReader.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/JsonFileReader.java
@@ -23,10 +23,13 @@
 public class JsonFileReader extends AbstractFileReader<JsonFileReader.JsonRecord> {
 
     private static final String FILE_READER_JSON = FILE_READER_PREFIX + "json.";
+    private static final String FILE_READER_JSON_COMPRESSION = FILE_READER_JSON + "compression.";
 
     public static final String FILE_READER_JSON_DESERIALIZATION_CONFIGS = FILE_READER_JSON + "deserialization.";
     public static final String FILE_READER_JSON_RECORD_PER_LINE = FILE_READER_JSON + "record_per_line";
     public static final String FILE_READER_JSON_ENCODING = FILE_READER_JSON + "encoding";
+    public static final String FILE_READER_JSON_COMPRESSION_TYPE = FILE_READER_JSON_COMPRESSION + "type";
+    public static final String FILE_READER_JSON_COMPRESSION_CONCATENATED = FILE_READER_JSON_COMPRESSION + "concatenated";
 
     private final TextFileReader inner;
     private final Schema schema;
@@ -37,6 +40,8 @@ public JsonFileReader(FileSystem fs, Path filePath, Map<String, Object> config)
 
         config.put(TextFileReader.FILE_READER_TEXT_ENCODING, config.get(FILE_READER_JSON_ENCODING));
         config.put(TextFileReader.FILE_READER_TEXT_RECORD_PER_LINE, config.get(FILE_READER_JSON_RECORD_PER_LINE));
+        config.put(TextFileReader.FILE_READER_TEXT_COMPRESSION_TYPE, config.get(FILE_READER_JSON_COMPRESSION_TYPE));
+        config.put(TextFileReader.FILE_READER_TEXT_COMPRESSION_CONCATENATED, config.get(FILE_READER_JSON_COMPRESSION_CONCATENATED));
 
         this.inner = new TextFileReader(fs, filePath, config);
 
diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/TextFileReader.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/TextFileReader.java
index bb3d634..a12323e 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/TextFileReader.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/TextFileReader.java
@@ -42,28 +42,6 @@ public class TextFileReader extends AbstractFileReader<TextFileReader.TextRecord
     private CompressionType compression;
     private boolean recordPerLine;
 
-    public enum CompressionType {
-        BZIP2,
-        GZIP,
-        NONE;
-
-        private boolean concatenated;
-
-        CompressionType() {
-            this.concatenated = true;
-        }
-
-        public boolean isConcatenated() {
-            return concatenated;
-        }
-
-        public static CompressionType fromName(String compression, boolean concatenated) {
-            CompressionType ct = CompressionType.valueOf(compression.trim().toUpperCase());
-            ct.concatenated = concatenated;
-            return ct;
-        }
-    }
-
     public TextFileReader(FileSystem fs, Path filePath, Map<String, Object> config) throws IOException {
         super(fs, filePath, new TxtToStruct(), config);
         this.reader = new LineNumberReader(getFileReader(fs.open(filePath)));
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/TextFileReaderTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/TextFileReaderTest.java
index 79e8b88..fdb3004 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/TextFileReaderTest.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/TextFileReaderTest.java
@@ -2,6 +2,7 @@
 
 import com.github.mmolimar.kafka.connect.fs.file.Offset;
 import com.github.mmolimar.kafka.connect.fs.file.reader.AgnosticFileReader;
+import com.github.mmolimar.kafka.connect.fs.file.reader.CompressionType;
 import com.github.mmolimar.kafka.connect.fs.file.reader.FileReader;
 import com.github.mmolimar.kafka.connect.fs.file.reader.TextFileReader;
 import org.apache.commons.compress.compressors.bzip2.BZip2CompressorOutputStream;
@@ -25,7 +26,7 @@ public class TextFileReaderTest extends HdfsFileReaderTestBase {
 
     private static final String FIELD_NAME_VALUE = "custom_field_name";
     private static final String FILE_EXTENSION = "txt";
-    private static final TextFileReader.CompressionType COMPRESSION_TYPE = TextFileReader.CompressionType.GZIP;
+    private static final CompressionType COMPRESSION_TYPE = CompressionType.GZIP;
 
     @BeforeAll
     public static void setUp() throws IOException {
@@ -38,7 +39,7 @@ public static void setUp() throws IOException {
         }};
     }
 
-    private static OutputStream getOutputStream(File file, TextFileReader.CompressionType compression) throws IOException {
+    private static OutputStream getOutputStream(File file, CompressionType compression) throws IOException {
         final OutputStream os;
         switch (compression) {
             case BZIP2:
@@ -54,7 +55,7 @@ private static OutputStream getOutputStream(File file, TextFileReader.Compressio
         return os;
     }
 
-    private static Path createDataFile(TextFileReader.CompressionType compression) throws IOException {
+    private static Path createDataFile(CompressionType compression) throws IOException {
         File txtFile = File.createTempFile("test-", "." + FILE_EXTENSION);
         try (PrintWriter writer = new PrintWriter(getOutputStream(txtFile, compression))) {
             IntStream.range(0, NUM_RECORDS).forEach(index -> {
@@ -112,7 +113,7 @@ public void readDataWithRecordPerLineDisabled() throws Throwable {
 
     @Test
     public void readDifferentCompressionTypes() {
-        Arrays.stream(TextFileReader.CompressionType.values()).forEach(compressionType -> {
+        Arrays.stream(CompressionType.values()).forEach(compressionType -> {
             try {
                 Path file = createDataFile(compressionType);
                 FileReader reader = getReader(fs, file, new HashMap<String, Object>() {{
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/TextFileReaderTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/TextFileReaderTest.java
index edb26e4..fd80931 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/TextFileReaderTest.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/TextFileReaderTest.java
@@ -2,6 +2,7 @@
 
 import com.github.mmolimar.kafka.connect.fs.file.Offset;
 import com.github.mmolimar.kafka.connect.fs.file.reader.AgnosticFileReader;
+import com.github.mmolimar.kafka.connect.fs.file.reader.CompressionType;
 import com.github.mmolimar.kafka.connect.fs.file.reader.FileReader;
 import com.github.mmolimar.kafka.connect.fs.file.reader.TextFileReader;
 import org.apache.commons.compress.compressors.bzip2.BZip2CompressorOutputStream;
@@ -25,7 +26,7 @@ public class TextFileReaderTest extends LocalFileReaderTestBase {
 
     private static final String FIELD_NAME_VALUE = "custom_field_name";
     private static final String FILE_EXTENSION = "txt";
-    private static final TextFileReader.CompressionType COMPRESSION_TYPE = TextFileReader.CompressionType.GZIP;
+    private static final CompressionType COMPRESSION_TYPE = CompressionType.GZIP;
 
     @BeforeAll
     public static void setUp() throws IOException {
@@ -38,7 +39,7 @@ public static void setUp() throws IOException {
         }};
     }
 
-    private static OutputStream getOutputStream(File file, TextFileReader.CompressionType compression) throws IOException {
+    private static OutputStream getOutputStream(File file, CompressionType compression) throws IOException {
         final OutputStream os;
         switch (compression) {
             case BZIP2:
@@ -54,7 +55,8 @@ private static OutputStream getOutputStream(File file, TextFileReader.Compressio
         return os;
     }
 
-    private static Path createDataFile(TextFileReader.CompressionType compression) throws IOException {
+
+    private static Path createDataFile(CompressionType compression) throws IOException {
         File txtFile = File.createTempFile("test-", "." + FILE_EXTENSION);
         try (PrintWriter writer = new PrintWriter(getOutputStream(txtFile, compression))) {
             IntStream.range(0, NUM_RECORDS).forEach(index -> {
@@ -112,7 +114,7 @@ public void readDataWithRecordPerLineDisabled() throws Throwable {
 
     @Test
     public void readDifferentCompressionTypes() {
-        Arrays.stream(TextFileReader.CompressionType.values()).forEach(compressionType -> {
+        Arrays.stream(CompressionType.values()).forEach(compressionType -> {
             try {
                 Path file = createDataFile(compressionType);
                 FileReader reader = getReader(fs, file, new HashMap<String, Object>() {{

From 5ee8eb4c9b928a3f96e6e611a096effc8cd75645 Mon Sep 17 00:00:00 2001
From: Mario Molina <mmolimar@gmail.com>
Date: Thu, 12 Mar 2020 20:17:24 -0600
Subject: [PATCH 20/51] Avoid verbosity in logs from HDFS for tests

---
 src/test/resources/log4j.properties | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/test/resources/log4j.properties b/src/test/resources/log4j.properties
index 18e87a8..493f160 100644
--- a/src/test/resources/log4j.properties
+++ b/src/test/resources/log4j.properties
@@ -7,7 +7,8 @@ log4j.appender.stdout.Target=System.out
 log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
 log4j.appender.stdout.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss} %-5p %c:%L - %m%n
 
-log4j.logger.org.apache.hadoop=WARN
+log4j.logger.org.apache.hadoop=ERROR
+log4j.logger.BlockStateChange=WARN
 log4j.logger.org.apache.parquet=WARN
 log4j.logger.org.eclipse.jetty=WARN
 log4j.logger.io.confluent.connect.avro=WARN

From 22de7392996c18a6b587ddec2fd3093167f06b17 Mon Sep 17 00:00:00 2001
From: Mario Molina <mmolimar@gmail.com>
Date: Fri, 13 Mar 2020 18:07:29 -0600
Subject: [PATCH 21/51] Removed delimited file reader

---
 .../file/reader/DelimitedTextFileReader.java  | 153 ------------
 .../fs/file/reader/SequenceFileReader.java    |   8 +-
 .../hdfs/DelimitedTextFileReaderTest.java     | 221 -----------------
 .../local/DelimitedTextFileReaderTest.java    | 230 ------------------
 4 files changed, 4 insertions(+), 608 deletions(-)
 delete mode 100644 src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/DelimitedTextFileReader.java
 delete mode 100644 src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/DelimitedTextFileReaderTest.java
 delete mode 100644 src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/DelimitedTextFileReaderTest.java

diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/DelimitedTextFileReader.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/DelimitedTextFileReader.java
deleted file mode 100644
index 19fd83f..0000000
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/DelimitedTextFileReader.java
+++ /dev/null
@@ -1,153 +0,0 @@
-package com.github.mmolimar.kafka.connect.fs.file.reader;
-
-import com.github.mmolimar.kafka.connect.fs.file.Offset;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.kafka.connect.data.Schema;
-import org.apache.kafka.connect.data.SchemaBuilder;
-import org.apache.kafka.connect.data.Struct;
-
-import java.io.IOException;
-import java.util.Map;
-import java.util.Optional;
-import java.util.stream.IntStream;
-
-import static com.github.mmolimar.kafka.connect.fs.FsSourceTaskConfig.FILE_READER_PREFIX;
-
-public class DelimitedTextFileReader extends AbstractFileReader<DelimitedTextFileReader.DelimitedRecord> {
-
-    private static final String FILE_READER_DELIMITED = FILE_READER_PREFIX + "delimited.";
-
-    public static final String FILE_READER_DELIMITED_HEADER = FILE_READER_DELIMITED + "header";
-    public static final String FILE_READER_DELIMITED_TOKEN = FILE_READER_DELIMITED + "token";
-    public static final String FILE_READER_DELIMITED_ENCODING = FILE_READER_DELIMITED + "encoding";
-    public static final String FILE_READER_DELIMITED_DEFAULT_VALUE = FILE_READER_DELIMITED + "default_value";
-
-    private static final String DEFAULT_COLUMN_NAME = "column";
-
-    private final TextFileReader inner;
-    private final Schema schema;
-    private DelimitedTextOffset offset;
-    private String token;
-    private String defaultValue;
-    private boolean hasHeader;
-
-    public DelimitedTextFileReader(FileSystem fs, Path filePath, Map<String, Object> config) throws IOException {
-        super(fs, filePath, new DelimitedTxtToStruct(), config);
-
-        config.put(TextFileReader.FILE_READER_TEXT_ENCODING, config.get(FILE_READER_DELIMITED_ENCODING));
-        config.put(TextFileReader.FILE_READER_TEXT_RECORD_PER_LINE, "true");
-
-        this.inner = new TextFileReader(fs, filePath, config);
-        this.offset = new DelimitedTextOffset(0, hasHeader);
-
-        SchemaBuilder schemaBuilder = SchemaBuilder.struct();
-        if (hasNext()) {
-            String firstLine = inner.nextRecord().getValue();
-            String[] columns = firstLine.split(token);
-            IntStream.range(0, columns.length).forEach(index -> {
-                String columnName = hasHeader ? columns[index] : DEFAULT_COLUMN_NAME + "_" + ++index;
-                schemaBuilder.field(columnName, SchemaBuilder.STRING_SCHEMA);
-            });
-            inner.seek(this.offset);
-        }
-        this.schema = schemaBuilder.build();
-    }
-
-    @Override
-    protected void configure(Map<String, String> config) {
-        this.token = Optional.ofNullable(config.get(FILE_READER_DELIMITED_TOKEN))
-                .filter(t -> !t.isEmpty())
-                .orElseThrow(() -> new IllegalArgumentException(
-                        FILE_READER_DELIMITED_TOKEN + " property cannot be empty for DelimitedTextFileReader")
-                );
-        this.defaultValue = config.get(FILE_READER_DELIMITED_DEFAULT_VALUE);
-        this.hasHeader = Boolean.parseBoolean(config.getOrDefault(FILE_READER_DELIMITED_HEADER, "false"));
-    }
-
-    @Override
-    protected DelimitedRecord nextRecord() {
-        offset.inc();
-        String[] values = inner.nextRecord().getValue().split(token);
-        return new DelimitedRecord(schema, defaultValue != null ? fillNullValues(values) : values);
-    }
-
-    private String[] fillNullValues(final String[] values) {
-        return IntStream.range(0, schema.fields().size())
-                .mapToObj(index -> {
-                    if (index < values.length) {
-                        return values[index];
-                    } else {
-                        return defaultValue;
-                    }
-                }).toArray(String[]::new);
-    }
-
-    @Override
-    public boolean hasNext() {
-        return inner.hasNext();
-    }
-
-    @Override
-    public void seek(Offset offset) {
-        inner.seek(offset);
-        this.offset.setOffset(inner.currentOffset().getRecordOffset());
-    }
-
-    @Override
-    public Offset currentOffset() {
-        return offset;
-    }
-
-    @Override
-    public void close() throws IOException {
-        inner.close();
-    }
-
-    public static class DelimitedTextOffset implements Offset {
-        private long offset;
-        private boolean hasHeader;
-
-        public DelimitedTextOffset(long offset, boolean hasHeader) {
-            this.hasHeader = hasHeader;
-            this.offset = hasHeader && offset >= 0 ? offset + 1 : offset;
-        }
-
-        public void setOffset(long offset) {
-            this.offset = hasHeader && offset > 0 ? offset - 1 : offset;
-        }
-
-        void inc() {
-            this.offset++;
-        }
-
-        @Override
-        public long getRecordOffset() {
-            return offset;
-        }
-    }
-
-    static class DelimitedTxtToStruct implements ReaderAdapter<DelimitedRecord> {
-
-        @Override
-        public Struct apply(DelimitedRecord record) {
-            Struct struct = new Struct(record.schema);
-            IntStream.range(0, record.schema.fields().size()).forEach(index -> {
-                if (index < record.values.length) {
-                    struct.put(record.schema.fields().get(index).name(), record.values[index]);
-                }
-            });
-            return struct;
-        }
-    }
-
-    static class DelimitedRecord {
-        private final Schema schema;
-        private final String[] values;
-
-        DelimitedRecord(Schema schema, String[] values) {
-            this.schema = schema;
-            this.values = values;
-        }
-    }
-}
diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/SequenceFileReader.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/SequenceFileReader.java
index 15a9d8f..bdde95b 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/SequenceFileReader.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/SequenceFileReader.java
@@ -37,7 +37,7 @@ public class SequenceFileReader extends AbstractFileReader<SequenceFileReader.Se
     private String keyFieldName, valueFieldName;
     private long recordIndex, hasNextIndex;
     private boolean hasNext;
-    private boolean isClosed;
+    private boolean closed;
 
     public SequenceFileReader(FileSystem fs, Path filePath, Map<String, Object> config) throws IOException {
         super(fs, filePath, new SeqToStruct(), config);
@@ -54,7 +54,7 @@ public SequenceFileReader(FileSystem fs, Path filePath, Map<String, Object> conf
         this.offset = new SeqOffset(0);
         this.recordIndex = this.hasNextIndex = -1;
         this.hasNext = false;
-        this.isClosed = false;
+        this.closed = false;
     }
 
     @Override
@@ -86,7 +86,7 @@ private Schema getSchema(Writable writable) {
 
     @Override
     public boolean hasNext() {
-        if (isClosed) throw new IllegalStateException("Reader already closed.");
+        if (closed) throw new IllegalStateException("Reader already closed.");
         try {
             if (hasNextIndex == -1 || hasNextIndex == recordIndex) {
                 hasNextIndex++;
@@ -132,7 +132,7 @@ public Offset currentOffset() {
 
     @Override
     public void close() throws IOException {
-        isClosed = true;
+        closed = true;
         reader.close();
     }
 
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/DelimitedTextFileReaderTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/DelimitedTextFileReaderTest.java
deleted file mode 100644
index 7914f12..0000000
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/DelimitedTextFileReaderTest.java
+++ /dev/null
@@ -1,221 +0,0 @@
-package com.github.mmolimar.kafka.connect.fs.file.reader.hdfs;
-
-import com.github.mmolimar.kafka.connect.fs.file.Offset;
-import com.github.mmolimar.kafka.connect.fs.file.reader.AgnosticFileReader;
-import com.github.mmolimar.kafka.connect.fs.file.reader.DelimitedTextFileReader;
-import com.github.mmolimar.kafka.connect.fs.file.reader.FileReader;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.kafka.connect.data.Struct;
-import org.junit.jupiter.api.BeforeAll;
-import org.junit.jupiter.api.Test;
-
-import java.io.BufferedWriter;
-import java.io.File;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.nio.charset.UnsupportedCharsetException;
-import java.util.HashMap;
-import java.util.Map;
-import java.util.UUID;
-import java.util.stream.IntStream;
-
-import static org.junit.jupiter.api.Assertions.*;
-
-public class DelimitedTextFileReaderTest extends HdfsFileReaderTestBase {
-
-    private static final String FIELD_COLUMN1 = "column_1";
-    private static final String FIELD_COLUMN2 = "column_2";
-    private static final String FIELD_COLUMN3 = "column_3";
-    private static final String FIELD_COLUMN4 = "column_4";
-    private static final String FILE_EXTENSION = "csv";
-
-    @BeforeAll
-    public static void setUp() throws IOException {
-        readerClass = AgnosticFileReader.class;
-        dataFile = createDataFile(true);
-        readerConfig = new HashMap<String, Object>() {{
-            put(DelimitedTextFileReader.FILE_READER_DELIMITED_TOKEN, ",");
-            put(DelimitedTextFileReader.FILE_READER_DELIMITED_HEADER, "true");
-        }};
-    }
-
-    private static Path createDataFile(boolean header) throws IOException {
-        File txtFile = File.createTempFile("test-", "." + FILE_EXTENSION);
-        try (FileWriter writer = new FileWriter(txtFile)) {
-
-            if (header)
-                writer.append(FIELD_COLUMN1 + "," + FIELD_COLUMN2 + "," + FIELD_COLUMN3 + "," + FIELD_COLUMN4 + "\n");
-            IntStream.range(0, NUM_RECORDS).forEach(index -> {
-                String value = String.format("%d_%s", index, UUID.randomUUID());
-                try {
-                    writer.append(value + "," + value + "," + value + "," + value + "\n");
-                    OFFSETS_BY_INDEX.put(index, (long) index);
-                } catch (IOException ioe) {
-                    throw new RuntimeException(ioe);
-                }
-            });
-        }
-        Path path = new Path(new Path(fsUri), txtFile.getName());
-        fs.moveFromLocalFile(new Path(txtFile.getAbsolutePath()), path);
-        return path;
-    }
-
-    @Test
-    public void emptyFile() throws Throwable {
-        File tmp = File.createTempFile("test-", "." + getFileExtension());
-        Path path = new Path(new Path(fsUri), tmp.getName());
-        fs.moveFromLocalFile(new Path(tmp.getAbsolutePath()), path);
-        getReader(fs, path, readerConfig);
-    }
-
-    @Test
-    public void invalidFileFormat() throws Throwable {
-        File tmp = File.createTempFile("test-", "." + getFileExtension());
-        try (BufferedWriter writer = new BufferedWriter(new FileWriter(tmp))) {
-            writer.write("test");
-        }
-        Path path = new Path(new Path(fsUri), tmp.getName());
-        fs.moveFromLocalFile(new Path(tmp.getAbsolutePath()), path);
-        getReader(fs, path, readerConfig);
-    }
-
-    @Test
-    public void invaliConfigArgs() {
-        try {
-            readerClass.getConstructor(FileSystem.class, Path.class, Map.class).newInstance(fs, dataFile, new HashMap<>());
-        } catch (Exception e) {
-            assertThrows(IllegalArgumentException.class, () -> {
-                throw e.getCause();
-            });
-        }
-    }
-
-    @Test
-    public void readAllDataWithoutHeader() throws Throwable {
-        Path file = createDataFile(false);
-        FileReader reader = getReader(fs, file, new HashMap<String, Object>() {{
-            put(DelimitedTextFileReader.FILE_READER_DELIMITED_TOKEN, ",");
-            put(DelimitedTextFileReader.FILE_READER_DELIMITED_HEADER, "false");
-        }});
-
-        assertTrue(reader.hasNext());
-
-        int recordCount = 0;
-        while (reader.hasNext()) {
-            Struct record = reader.next();
-            checkData(record, recordCount);
-            recordCount++;
-        }
-        assertEquals(NUM_RECORDS, recordCount, () -> "The number of records in the file does not match");
-    }
-
-    @Test
-    public void readAllDataWithMalformedRows() throws Throwable {
-        File tmp = File.createTempFile("test-", "." + getFileExtension());
-        try (FileWriter writer = new FileWriter(tmp)) {
-            writer.append(FIELD_COLUMN1 + "," + FIELD_COLUMN2 + "," + FIELD_COLUMN3 + "," + FIELD_COLUMN4 + "\n");
-            writer.append("dummy\n");
-            writer.append("dummy\n");
-        }
-        Map<String, Object> cfg = new HashMap<String, Object>() {{
-            put(DelimitedTextFileReader.FILE_READER_DELIMITED_TOKEN, ",");
-            put(DelimitedTextFileReader.FILE_READER_DELIMITED_HEADER, "true");
-            put(DelimitedTextFileReader.FILE_READER_DELIMITED_DEFAULT_VALUE, "custom_value");
-        }};
-        Path path = new Path(new Path(fsUri), tmp.getName());
-        fs.moveFromLocalFile(new Path(tmp.getAbsolutePath()), path);
-        reader = getReader(fs, path, cfg);
-
-        assertTrue(reader.hasNext());
-
-        int recordCount = 0;
-        while (reader.hasNext()) {
-            Struct record = reader.next();
-            assertAll(
-                    () -> assertEquals("dummy", record.get(FIELD_COLUMN1)),
-                    () -> assertEquals("custom_value", record.get(FIELD_COLUMN2)),
-                    () -> assertEquals("custom_value", record.get(FIELD_COLUMN3)),
-                    () -> assertEquals("custom_value", record.get(FIELD_COLUMN4))
-            );
-            recordCount++;
-        }
-        assertEquals(2, recordCount, () -> "The number of records in the file does not match");
-    }
-
-    @Test
-    public void seekFileWithoutHeader() throws Throwable {
-        Path file = createDataFile(false);
-        FileReader reader = getReader(fs, file, new HashMap<String, Object>() {{
-            put(DelimitedTextFileReader.FILE_READER_DELIMITED_TOKEN, ",");
-            put(DelimitedTextFileReader.FILE_READER_DELIMITED_HEADER, "false");
-        }});
-
-        assertTrue(reader.hasNext());
-
-        int recordIndex = NUM_RECORDS / 2;
-        reader.seek(getOffset(OFFSETS_BY_INDEX.get(recordIndex), false));
-        assertTrue(reader.hasNext());
-        assertEquals(OFFSETS_BY_INDEX.get(recordIndex), reader.currentOffset().getRecordOffset());
-        checkData(reader.next(), recordIndex);
-
-        recordIndex = 0;
-        reader.seek(getOffset(OFFSETS_BY_INDEX.get(recordIndex), false));
-        assertTrue(reader.hasNext());
-        assertEquals(OFFSETS_BY_INDEX.get(recordIndex), reader.currentOffset().getRecordOffset());
-        checkData(reader.next(), recordIndex);
-
-        recordIndex = NUM_RECORDS - 3;
-        reader.seek(getOffset(OFFSETS_BY_INDEX.get(recordIndex), false));
-        assertTrue(reader.hasNext());
-        assertEquals(OFFSETS_BY_INDEX.get(recordIndex), reader.currentOffset().getRecordOffset());
-        checkData(reader.next(), recordIndex);
-
-        reader.seek(getOffset(OFFSETS_BY_INDEX.get(NUM_RECORDS - 1) + 1, false));
-        assertFalse(reader.hasNext());
-    }
-
-    @Test
-    public void validFileEncoding() throws Throwable {
-        Map<String, Object> cfg = new HashMap<String, Object>() {{
-            put(DelimitedTextFileReader.FILE_READER_DELIMITED_TOKEN, ",");
-            put(DelimitedTextFileReader.FILE_READER_DELIMITED_HEADER, "true");
-            put(DelimitedTextFileReader.FILE_READER_DELIMITED_ENCODING, "Cp1252");
-        }};
-        getReader(fs, dataFile, cfg);
-    }
-
-    @Test
-    public void invalidFileEncoding() {
-        Map<String, Object> cfg = new HashMap<String, Object>() {{
-            put(DelimitedTextFileReader.FILE_READER_DELIMITED_TOKEN, ",");
-            put(DelimitedTextFileReader.FILE_READER_DELIMITED_HEADER, "true");
-            put(DelimitedTextFileReader.FILE_READER_DELIMITED_ENCODING, "invalid_charset");
-        }};
-        assertThrows(UnsupportedCharsetException.class, () -> getReader(fs, dataFile, cfg));
-    }
-
-    @Override
-    protected Offset getOffset(long offset) {
-        return getOffset(offset, true);
-    }
-
-    private Offset getOffset(long offset, boolean hasHeader) {
-        return new DelimitedTextFileReader.DelimitedTextOffset(offset, hasHeader);
-    }
-
-    @Override
-    protected void checkData(Struct record, long index) {
-        assertAll(
-                () -> assertTrue(record.get(FIELD_COLUMN1).toString().startsWith(index + "_")),
-                () -> assertTrue(record.get(FIELD_COLUMN2).toString().startsWith(index + "_")),
-                () -> assertTrue(record.get(FIELD_COLUMN3).toString().startsWith(index + "_")),
-                () -> assertTrue(record.get(FIELD_COLUMN4).toString().startsWith(index + "_"))
-        );
-    }
-
-    @Override
-    protected String getFileExtension() {
-        return FILE_EXTENSION;
-    }
-}
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/DelimitedTextFileReaderTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/DelimitedTextFileReaderTest.java
deleted file mode 100644
index e8413ad..0000000
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/DelimitedTextFileReaderTest.java
+++ /dev/null
@@ -1,230 +0,0 @@
-package com.github.mmolimar.kafka.connect.fs.file.reader.local;
-
-import com.github.mmolimar.kafka.connect.fs.file.Offset;
-import com.github.mmolimar.kafka.connect.fs.file.reader.AgnosticFileReader;
-import com.github.mmolimar.kafka.connect.fs.file.reader.DelimitedTextFileReader;
-import com.github.mmolimar.kafka.connect.fs.file.reader.FileReader;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.kafka.connect.data.Struct;
-import org.junit.jupiter.api.BeforeAll;
-import org.junit.jupiter.api.Test;
-
-import java.io.BufferedWriter;
-import java.io.File;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.nio.charset.UnsupportedCharsetException;
-import java.util.HashMap;
-import java.util.Map;
-import java.util.UUID;
-import java.util.stream.IntStream;
-
-import static org.junit.jupiter.api.Assertions.*;
-
-public class DelimitedTextFileReaderTest extends LocalFileReaderTestBase {
-
-    private static final String FIELD_COLUMN1 = "column_1";
-    private static final String FIELD_COLUMN2 = "column_2";
-    private static final String FIELD_COLUMN3 = "column_3";
-    private static final String FIELD_COLUMN4 = "column_4";
-    private static final String FILE_EXTENSION = "tcsv";
-
-    @BeforeAll
-    public static void setUp() throws IOException {
-        readerClass = AgnosticFileReader.class;
-        dataFile = createDataFile(true);
-        readerConfig = new HashMap<String, Object>() {{
-            put(DelimitedTextFileReader.FILE_READER_DELIMITED_TOKEN, ",");
-            put(DelimitedTextFileReader.FILE_READER_DELIMITED_HEADER, "true");
-            put(AgnosticFileReader.FILE_READER_AGNOSTIC_EXTENSIONS_DELIMITED, FILE_EXTENSION);
-        }};
-    }
-
-    private static Path createDataFile(boolean header) throws IOException {
-        File txtFile = File.createTempFile("test-", "." + FILE_EXTENSION);
-        try (FileWriter writer = new FileWriter(txtFile)) {
-
-            if (header)
-                writer.append(FIELD_COLUMN1 + "," + FIELD_COLUMN2 + "," + FIELD_COLUMN3 + "," + FIELD_COLUMN4 + "\n");
-            IntStream.range(0, NUM_RECORDS).forEach(index -> {
-                String value = String.format("%d_%s", index, UUID.randomUUID());
-                try {
-                    writer.append(value + "," + value + "," + value + "," + value + "\n");
-                    OFFSETS_BY_INDEX.put(index, (long) index);
-                } catch (IOException ioe) {
-                    throw new RuntimeException(ioe);
-                }
-            });
-        }
-        Path path = new Path(new Path(fsUri), txtFile.getName());
-        fs.moveFromLocalFile(new Path(txtFile.getAbsolutePath()), path);
-        return path;
-    }
-
-    @Test
-    public void emptyFile() throws Throwable {
-        File tmp = File.createTempFile("test-", "." + getFileExtension());
-        Path path = new Path(new Path(fsUri), tmp.getName());
-        fs.moveFromLocalFile(new Path(tmp.getAbsolutePath()), path);
-        getReader(fs, path, readerConfig);
-    }
-
-    @Test
-    public void invalidFileFormat() throws Throwable {
-        File tmp = File.createTempFile("test-", "." + getFileExtension());
-        try (BufferedWriter writer = new BufferedWriter(new FileWriter(tmp))) {
-            writer.write("test");
-        }
-        Path path = new Path(new Path(fsUri), tmp.getName());
-        fs.moveFromLocalFile(new Path(tmp.getAbsolutePath()), path);
-        getReader(fs, path, readerConfig);
-    }
-
-    @Test
-    public void invaliConfigArgs() {
-        try {
-            readerClass.getConstructor(FileSystem.class, Path.class, Map.class).newInstance(fs, dataFile,
-                    new HashMap<String, Object>() {{
-                        put(AgnosticFileReader.FILE_READER_AGNOSTIC_EXTENSIONS_DELIMITED, FILE_EXTENSION);
-                    }});
-        } catch (Exception e) {
-            assertThrows(IllegalArgumentException.class, () -> {
-                throw e.getCause();
-            });
-        }
-    }
-
-    @Test
-    public void readAllDataWithoutHeader() throws Throwable {
-        Path file = createDataFile(false);
-        FileReader reader = getReader(fs, file, new HashMap<String, Object>() {{
-            put(DelimitedTextFileReader.FILE_READER_DELIMITED_TOKEN, ",");
-            put(DelimitedTextFileReader.FILE_READER_DELIMITED_HEADER, "false");
-            put(AgnosticFileReader.FILE_READER_AGNOSTIC_EXTENSIONS_DELIMITED, getFileExtension());
-        }});
-
-        assertTrue(reader.hasNext());
-
-        int recordCount = 0;
-        while (reader.hasNext()) {
-            Struct record = reader.next();
-            checkData(record, recordCount);
-            recordCount++;
-        }
-        assertEquals(NUM_RECORDS, recordCount, () -> "The number of records in the file does not match");
-    }
-
-    @Test
-    public void readAllDataWithMalformedRows() throws Throwable {
-        File tmp = File.createTempFile("test-", "." + getFileExtension());
-        try (FileWriter writer = new FileWriter(tmp)) {
-            writer.append(FIELD_COLUMN1 + "," + FIELD_COLUMN2 + "," + FIELD_COLUMN3 + "," + FIELD_COLUMN4 + "\n");
-            writer.append("dummy\n");
-            writer.append("dummy\n");
-        }
-        Map<String, Object> cfg = new HashMap<String, Object>() {{
-            put(DelimitedTextFileReader.FILE_READER_DELIMITED_TOKEN, ",");
-            put(DelimitedTextFileReader.FILE_READER_DELIMITED_HEADER, "true");
-            put(DelimitedTextFileReader.FILE_READER_DELIMITED_DEFAULT_VALUE, "custom_value");
-            put(AgnosticFileReader.FILE_READER_AGNOSTIC_EXTENSIONS_DELIMITED, getFileExtension());
-        }};
-        Path path = new Path(new Path(fsUri), tmp.getName());
-        fs.moveFromLocalFile(new Path(tmp.getAbsolutePath()), path);
-        reader = getReader(fs, path, cfg);
-
-        assertTrue(reader.hasNext());
-
-        int recordCount = 0;
-        while (reader.hasNext()) {
-            Struct record = reader.next();
-            assertAll(
-                    () -> assertEquals("dummy", record.get(FIELD_COLUMN1)),
-                    () -> assertEquals("custom_value", record.get(FIELD_COLUMN2)),
-                    () -> assertEquals("custom_value", record.get(FIELD_COLUMN3)),
-                    () -> assertEquals("custom_value", record.get(FIELD_COLUMN4))
-            );
-            recordCount++;
-        }
-        assertEquals(2, recordCount, () -> "The number of records in the file does not match");
-    }
-
-    @Test
-    public void seekFileWithoutHeader() throws Throwable {
-        Path file = createDataFile(false);
-        FileReader reader = getReader(fs, file, new HashMap<String, Object>() {{
-            put(DelimitedTextFileReader.FILE_READER_DELIMITED_TOKEN, ",");
-            put(DelimitedTextFileReader.FILE_READER_DELIMITED_HEADER, "false");
-            put(AgnosticFileReader.FILE_READER_AGNOSTIC_EXTENSIONS_DELIMITED, getFileExtension());
-        }});
-
-        assertTrue(reader.hasNext());
-
-        int recordIndex = NUM_RECORDS / 2;
-        reader.seek(getOffset(OFFSETS_BY_INDEX.get(recordIndex), false));
-        assertTrue(reader.hasNext());
-        assertEquals(OFFSETS_BY_INDEX.get(recordIndex), reader.currentOffset().getRecordOffset());
-        checkData(reader.next(), recordIndex);
-
-        recordIndex = 0;
-        reader.seek(getOffset(OFFSETS_BY_INDEX.get(recordIndex), false));
-        assertTrue(reader.hasNext());
-        assertEquals(OFFSETS_BY_INDEX.get(recordIndex), reader.currentOffset().getRecordOffset());
-        checkData(reader.next(), recordIndex);
-
-        recordIndex = NUM_RECORDS - 3;
-        reader.seek(getOffset(OFFSETS_BY_INDEX.get(recordIndex), false));
-        assertTrue(reader.hasNext());
-        assertEquals(OFFSETS_BY_INDEX.get(recordIndex), reader.currentOffset().getRecordOffset());
-        checkData(reader.next(), recordIndex);
-
-        reader.seek(getOffset(OFFSETS_BY_INDEX.get(NUM_RECORDS - 1) + 1, false));
-        assertFalse(reader.hasNext());
-    }
-
-    @Test
-    public void validFileEncoding() throws Throwable {
-        Map<String, Object> cfg = new HashMap<String, Object>() {{
-            put(DelimitedTextFileReader.FILE_READER_DELIMITED_TOKEN, ",");
-            put(DelimitedTextFileReader.FILE_READER_DELIMITED_HEADER, "true");
-            put(DelimitedTextFileReader.FILE_READER_DELIMITED_ENCODING, "Cp1252");
-            put(AgnosticFileReader.FILE_READER_AGNOSTIC_EXTENSIONS_DELIMITED, getFileExtension());
-        }};
-        getReader(fs, dataFile, cfg);
-    }
-
-    @Test
-    public void invalidFileEncoding() {
-        Map<String, Object> cfg = new HashMap<String, Object>() {{
-            put(DelimitedTextFileReader.FILE_READER_DELIMITED_TOKEN, ",");
-            put(DelimitedTextFileReader.FILE_READER_DELIMITED_HEADER, "true");
-            put(DelimitedTextFileReader.FILE_READER_DELIMITED_ENCODING, "invalid_charset");
-            put(AgnosticFileReader.FILE_READER_AGNOSTIC_EXTENSIONS_DELIMITED, getFileExtension());
-        }};
-        assertThrows(UnsupportedCharsetException.class, () -> getReader(fs, dataFile, cfg));
-    }
-
-    @Override
-    protected Offset getOffset(long offset) {
-        return getOffset(offset, true);
-    }
-
-    private Offset getOffset(long offset, boolean hasHeader) {
-        return new DelimitedTextFileReader.DelimitedTextOffset(offset, hasHeader);
-    }
-
-    @Override
-    protected void checkData(Struct record, long index) {
-        assertAll(
-                () -> assertTrue(record.get(FIELD_COLUMN1).toString().startsWith(index + "_")),
-                () -> assertTrue(record.get(FIELD_COLUMN2).toString().startsWith(index + "_")),
-                () -> assertTrue(record.get(FIELD_COLUMN3).toString().startsWith(index + "_")),
-                () -> assertTrue(record.get(FIELD_COLUMN4).toString().startsWith(index + "_"))
-        );
-    }
-
-    @Override
-    protected String getFileExtension() {
-        return FILE_EXTENSION;
-    }
-}

From b7f15f30c77d03865c1cd04de4d3c08cfd29fec4 Mon Sep 17 00:00:00 2001
From: Mario Molina <mmolimar@gmail.com>
Date: Fri, 13 Mar 2020 18:16:42 -0600
Subject: [PATCH 22/51] Refactor tests for file readers

---
 .../{local => }/AvroFileReaderTest.java       |  85 ++++----
 .../fs/file/reader/FileReaderTestBase.java    | 185 ++++++++++------
 .../fs/file/reader/FileSystemConfig.java      | 153 ++++++++++++++
 .../fs/file/reader/JsonFileReaderTest.java    | 199 ++++++++++++++++++
 .../fs/file/reader/ParquetFileReaderTest.java | 192 +++++++++++++++++
 .../{local => }/SequenceFileReaderTest.java   |  69 +++---
 .../fs/file/reader/TextFileReaderTest.java    | 143 +++++++++++++
 .../file/reader/hdfs/AvroFileReaderTest.java  | 122 -----------
 .../reader/hdfs/HdfsFileReaderTestBase.java   |  33 ---
 .../file/reader/hdfs/JsonFileReaderTest.java  | 174 ---------------
 .../reader/hdfs/ParquetFileReaderTest.java    | 171 ---------------
 .../reader/hdfs/SequenceFileReaderTest.java   | 110 ----------
 .../file/reader/hdfs/TextFileReaderTest.java  | 154 --------------
 .../file/reader/local/JsonFileReaderTest.java | 177 ----------------
 .../reader/local/LocalFileReaderTestBase.java |  29 ---
 .../reader/local/ParquetFileReaderTest.java   | 178 ----------------
 .../file/reader/local/TextFileReaderTest.java | 155 --------------
 17 files changed, 891 insertions(+), 1438 deletions(-)
 rename src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/{local => }/AvroFileReaderTest.java (53%)
 create mode 100644 src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/FileSystemConfig.java
 create mode 100644 src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/JsonFileReaderTest.java
 create mode 100644 src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/ParquetFileReaderTest.java
 rename src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/{local => }/SequenceFileReaderTest.java (64%)
 create mode 100644 src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/TextFileReaderTest.java
 delete mode 100644 src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/AvroFileReaderTest.java
 delete mode 100644 src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/HdfsFileReaderTestBase.java
 delete mode 100644 src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/JsonFileReaderTest.java
 delete mode 100644 src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/ParquetFileReaderTest.java
 delete mode 100644 src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/SequenceFileReaderTest.java
 delete mode 100644 src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/TextFileReaderTest.java
 delete mode 100644 src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/JsonFileReaderTest.java
 delete mode 100644 src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/LocalFileReaderTestBase.java
 delete mode 100644 src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/ParquetFileReaderTest.java
 delete mode 100644 src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/TextFileReaderTest.java

diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/AvroFileReaderTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/AvroFileReaderTest.java
similarity index 53%
rename from src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/AvroFileReaderTest.java
rename to src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/AvroFileReaderTest.java
index 5c707e1..176b6dd 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/AvroFileReaderTest.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/AvroFileReaderTest.java
@@ -1,8 +1,6 @@
-package com.github.mmolimar.kafka.connect.fs.file.reader.local;
+package com.github.mmolimar.kafka.connect.fs.file.reader;
 
 import com.github.mmolimar.kafka.connect.fs.file.Offset;
-import com.github.mmolimar.kafka.connect.fs.file.reader.AgnosticFileReader;
-import com.github.mmolimar.kafka.connect.fs.file.reader.AvroFileReader;
 import org.apache.avro.AvroTypeException;
 import org.apache.avro.Schema;
 import org.apache.avro.SchemaParseException;
@@ -11,10 +9,13 @@
 import org.apache.avro.generic.GenericDatumWriter;
 import org.apache.avro.generic.GenericRecord;
 import org.apache.avro.io.DatumWriter;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.kafka.connect.data.Struct;
 import org.junit.jupiter.api.BeforeAll;
-import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.MethodSource;
 
 import java.io.File;
 import java.io.IOException;
@@ -25,7 +26,7 @@
 
 import static org.junit.jupiter.api.Assertions.*;
 
-public class AvroFileReaderTest extends LocalFileReaderTestBase {
+public class AvroFileReaderTest extends FileReaderTestBase {
 
     private static final String FIELD_INDEX = "index";
     private static final String FIELD_NAME = "name";
@@ -37,15 +38,11 @@ public class AvroFileReaderTest extends LocalFileReaderTestBase {
     @BeforeAll
     public static void setUp() throws IOException {
         schema = new Schema.Parser().parse(AvroFileReaderTest.class.getResourceAsStream("/file/reader/schemas/people.avsc"));
-        readerClass = AgnosticFileReader.class;
-        dataFile = createDataFile();
-        readerConfig = new HashMap<String, Object>() {{
-            put(AgnosticFileReader.FILE_READER_AGNOSTIC_EXTENSIONS_AVRO, FILE_EXTENSION);
-        }};
     }
 
-    private static Path createDataFile() throws IOException {
-        File avroFile = File.createTempFile("test-", "." + FILE_EXTENSION);
+    @Override
+    protected Path createDataFile(FileSystemConfig fsConfig, Object... args) throws IOException {
+        File avroFile = File.createTempFile("test-", "." + getFileExtension());
         DatumWriter<GenericRecord> writer = new GenericDatumWriter<>(schema);
         try (DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(writer)) {
             dataFileWriter.setFlushOnEveryBlock(true);
@@ -58,52 +55,52 @@ private static Path createDataFile() throws IOException {
                 datum.put(FIELD_NAME, String.format("%d_name_%s", index, UUID.randomUUID()));
                 datum.put(FIELD_SURNAME, String.format("%d_surname_%s", index, UUID.randomUUID()));
                 try {
-                    OFFSETS_BY_INDEX.put(index, dataFileWriter.sync() - 16L);
+                    fsConfig.getOffsetsByIndex().put(index, dataFileWriter.sync() - 16L);
                     dataFileWriter.append(datum);
                 } catch (IOException ioe) {
                     throw new RuntimeException(ioe);
                 }
             });
         }
-        Path path = new Path(new Path(fsUri), avroFile.getName());
-        fs.moveFromLocalFile(new Path(avroFile.getAbsolutePath()), path);
+        Path path = new Path(new Path(fsConfig.getFsUri()), avroFile.getName());
+        fsConfig.getFs().moveFromLocalFile(new Path(avroFile.getAbsolutePath()), path);
         return path;
     }
 
-    @Test
-    public void readerWithSchema() throws Throwable {
-        Map<String, Object> cfg = new HashMap<String, Object>() {{
-            put(AvroFileReader.FILE_READER_AVRO_SCHEMA, schema.toString());
-            put(AgnosticFileReader.FILE_READER_AGNOSTIC_EXTENSIONS_AVRO, getFileExtension());
-        }};
-        reader = getReader(fs, dataFile, cfg);
-        readAllData();
+    @ParameterizedTest
+    @MethodSource("fileSystemConfigProvider")
+    public void readerWithSchema(FileSystemConfig fsConfig) throws Throwable {
+        Map<String, Object> readerConfig = getReaderConfig();
+        readerConfig.put(AvroFileReader.FILE_READER_AVRO_SCHEMA, schema.toString());
+        FileSystem testFs = FileSystem.newInstance(fsConfig.getFsUri(), new Configuration());
+        fsConfig.setReader(getReader(testFs, fsConfig.getDataFile(), readerConfig));
+        readAllData(fsConfig);
     }
 
-    @Test
-    public void readerWithInvalidSchema() throws Throwable {
-        Map<String, Object> cfg = new HashMap<String, Object>() {{
-            put(AvroFileReader.FILE_READER_AVRO_SCHEMA, Schema.create(Schema.Type.STRING).toString());
-            put(AgnosticFileReader.FILE_READER_AGNOSTIC_EXTENSIONS_AVRO, getFileExtension());
-        }};
-        reader = getReader(fs, dataFile, cfg);
-        assertThrows(IllegalStateException.class, this::readAllData);
+    @ParameterizedTest
+    @MethodSource("fileSystemConfigProvider")
+    public void readerWithInvalidSchema(FileSystemConfig fsConfig) throws Throwable {
+        Map<String, Object> readerConfig = getReaderConfig();
+        readerConfig.put(AvroFileReader.FILE_READER_AVRO_SCHEMA, Schema.create(Schema.Type.STRING).toString());
+        FileSystem testFs = FileSystem.newInstance(fsConfig.getFsUri(), new Configuration());
+        fsConfig.setReader(getReader(testFs, fsConfig.getDataFile(), readerConfig));
+        assertThrows(IllegalStateException.class, () -> readAllData(fsConfig));
         assertThrows(AvroTypeException.class, () -> {
             try {
-                readAllData();
+                readAllData(fsConfig);
             } catch (Exception e) {
                 throw e.getCause();
             }
         });
     }
 
-    @Test
-    public void readerWithUnparseableSchema() {
-        Map<String, Object> cfg = new HashMap<String, Object>() {{
-            put(AvroFileReader.FILE_READER_AVRO_SCHEMA, "invalid schema");
-            put(AgnosticFileReader.FILE_READER_AGNOSTIC_EXTENSIONS_AVRO, getFileExtension());
-        }};
-        assertThrows(SchemaParseException.class, () -> getReader(fs, dataFile, cfg));
+    @ParameterizedTest
+    @MethodSource("fileSystemConfigProvider")
+    public void readerWithUnparseableSchema(FileSystemConfig fsConfig) throws IOException {
+        Map<String, Object> readerConfig = getReaderConfig();
+        readerConfig.put(AvroFileReader.FILE_READER_AVRO_SCHEMA, "invalid schema");
+        FileSystem testFs = FileSystem.newInstance(fsConfig.getFsUri(), new Configuration());
+        assertThrows(SchemaParseException.class, () -> getReader(testFs, fsConfig.getDataFile(), readerConfig));
     }
 
     @Override
@@ -111,6 +108,16 @@ protected Offset getOffset(long offset) {
         return new AvroFileReader.AvroOffset(offset);
     }
 
+    @Override
+    protected Class<? extends FileReader> getReaderClass() {
+        return AvroFileReader.class;
+    }
+
+    @Override
+    protected Map<String, Object> getReaderConfig() {
+        return new HashMap<>();
+    }
+
     @Override
     protected void checkData(Struct record, long index) {
         assertAll(
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/FileReaderTestBase.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/FileReaderTestBase.java
index d9cc9f4..188960e 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/FileReaderTestBase.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/FileReaderTestBase.java
@@ -2,59 +2,79 @@
 
 import com.github.mmolimar.kafka.connect.fs.file.Offset;
 import com.github.mmolimar.kafka.connect.fs.util.ReflectionUtils;
+import org.apache.commons.compress.compressors.bzip2.BZip2CompressorOutputStream;
+import org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.kafka.connect.data.Struct;
 import org.junit.jupiter.api.AfterAll;
 import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeAll;
 import org.junit.jupiter.api.BeforeEach;
-import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.Arguments;
+import org.junit.jupiter.params.provider.MethodSource;
 
 import java.io.*;
-import java.net.URI;
-import java.util.HashMap;
-import java.util.Map;
-import java.util.NoSuchElementException;
-import java.util.UUID;
+import java.util.*;
+import java.util.stream.Stream;
 
 import static org.junit.jupiter.api.Assertions.*;
 
-public abstract class FileReaderTestBase {
+abstract class FileReaderTestBase {
 
+    private static final List<FileSystemConfig> TEST_FILE_SYSTEMS = Arrays.asList(
+            new LocalFsConfig(),
+            new HdfsFsConfig()
+    );
     protected static final int NUM_RECORDS = 100;
-    protected static final Map<Integer, Long> OFFSETS_BY_INDEX = new HashMap<>();
 
-    protected static Class<? extends FileReader> readerClass;
-    protected static FileSystem fs;
-    protected static URI fsUri;
-    protected static Path dataFile;
-    protected static Map<String, Object> readerConfig;
-    protected static FileReader reader;
+    @BeforeAll
+    public static void initFs() throws IOException {
+        for (FileSystemConfig fsConfig : TEST_FILE_SYSTEMS) {
+            fsConfig.initFs();
+        }
+    }
 
     @AfterAll
-    public static void tearDown() throws IOException {
-        fs.close();
+    public static void finishFs() throws IOException {
+        for (FileSystemConfig fsConfig : TEST_FILE_SYSTEMS) {
+            fsConfig.close();
+        }
     }
 
     @BeforeEach
     public void openReader() throws Throwable {
-        reader = getReader(fs, dataFile, readerConfig);
-        assertEquals(reader.getFilePath(), dataFile);
+        for (FileSystemConfig fsConfig : TEST_FILE_SYSTEMS) {
+            fsConfig.setDataFile(createDataFile(fsConfig));
+            FileReader reader = ReflectionUtils.makeReader(getReaderClass(), fsConfig.getFs(),
+                    fsConfig.getDataFile(), getReaderConfig());
+            assertEquals(reader.getFilePath(), fsConfig.getDataFile());
+            fsConfig.setReader(reader);
+        }
     }
 
     @AfterEach
     public void closeReader() {
-        try {
-            reader.close();
-        } catch (Exception e) {
-            //ignoring
+        for (FileSystemConfig fsConfig : TEST_FILE_SYSTEMS) {
+            try {
+                fsConfig.getReader().close();
+            } catch (Exception e) {
+                //ignoring
+            }
         }
     }
 
-    @Test
-    public void invalidArgs() {
+    private static Stream<Arguments> fileSystemConfigProvider() {
+        return TEST_FILE_SYSTEMS.stream().map(Arguments::of);
+    }
+
+    @ParameterizedTest
+    @MethodSource("fileSystemConfigProvider")
+    public void invalidArgs(FileSystemConfig fsConfig) {
         try {
-            readerClass.getConstructor(FileSystem.class, Path.class, Map.class).newInstance(null, null, null);
+            fsConfig.getReader().getClass().getConstructor(FileSystem.class, Path.class, Map.class)
+                    .newInstance(null, null, null);
         } catch (Exception e) {
             assertThrows(IllegalArgumentException.class, () -> {
                 throw e.getCause();
@@ -62,33 +82,38 @@ public void invalidArgs() {
         }
     }
 
-    @Test
-    public void fileDoesNotExist() {
-        Path path = new Path(new Path(fsUri), UUID.randomUUID().toString());
-        assertThrows(FileNotFoundException.class, () -> getReader(fs, path, readerConfig));
+    @ParameterizedTest
+    @MethodSource("fileSystemConfigProvider")
+    public void fileDoesNotExist(FileSystemConfig fsConfig) {
+        Path path = new Path(new Path(fsConfig.getFsUri()), UUID.randomUUID().toString());
+        assertThrows(FileNotFoundException.class, () -> getReader(fsConfig.getFs(), path, getReaderConfig()));
     }
 
-    @Test
-    public void emptyFile() throws Throwable {
+    @ParameterizedTest
+    @MethodSource("fileSystemConfigProvider")
+    public void emptyFile(FileSystemConfig fsConfig) throws Throwable {
         File tmp = File.createTempFile("test-", "." + getFileExtension());
-        Path path = new Path(new Path(fsUri), tmp.getName());
-        fs.moveFromLocalFile(new Path(tmp.getAbsolutePath()), path);
-        assertThrows(IOException.class, () -> getReader(fs, path, readerConfig));
+        Path path = new Path(new Path(fsConfig.getFsUri()), tmp.getName());
+        fsConfig.getFs().moveFromLocalFile(new Path(tmp.getAbsolutePath()), path);
+        assertThrows(IOException.class, () -> getReader(fsConfig.getFs(), path, getReaderConfig()));
     }
 
-    @Test
-    public void invalidFileFormat() throws Throwable {
+    @ParameterizedTest
+    @MethodSource("fileSystemConfigProvider")
+    public void invalidFileFormat(FileSystemConfig fsConfig) throws Throwable {
         File tmp = File.createTempFile("test-", "." + getFileExtension());
         try (BufferedWriter writer = new BufferedWriter(new FileWriter(tmp))) {
             writer.write("test");
         }
-        Path path = new Path(new Path(fsUri), tmp.getName());
-        fs.moveFromLocalFile(new Path(tmp.getAbsolutePath()), path);
-        assertThrows(IOException.class, () -> getReader(fs, path, readerConfig));
+        Path path = new Path(new Path(fsConfig.getFsUri()), tmp.getName());
+        fsConfig.getFs().moveFromLocalFile(new Path(tmp.getAbsolutePath()), path);
+        assertThrows(IOException.class, () -> getReader(fsConfig.getFs(), path, getReaderConfig()));
     }
 
-    @Test
-    public void readAllData() {
+    @ParameterizedTest
+    @MethodSource("fileSystemConfigProvider")
+    public void readAllData(FileSystemConfig fsConfig) {
+        FileReader reader = fsConfig.getReader();
         assertTrue(reader.hasNext());
 
         int recordCount = 0;
@@ -97,60 +122,92 @@ public void readAllData() {
             checkData(record, recordCount);
             recordCount++;
         }
-        assertEquals(NUM_RECORDS, recordCount, () -> "The number of records in the file does not match");
+        assertEquals(NUM_RECORDS, recordCount, "The number of records in the file does not match");
     }
 
-    @Test
-    public void seekFile() {
+    @ParameterizedTest
+    @MethodSource("fileSystemConfigProvider")
+    public void seekFile(FileSystemConfig fsConfig) {
+        FileReader reader = fsConfig.getReader();
         int recordIndex = NUM_RECORDS / 2;
-        reader.seek(getOffset(OFFSETS_BY_INDEX.get(recordIndex)));
+        reader.seek(getOffset(fsConfig.getOffsetsByIndex().get(recordIndex)));
         assertTrue(reader.hasNext());
-        assertEquals(OFFSETS_BY_INDEX.get(recordIndex), reader.currentOffset().getRecordOffset());
+        assertEquals(fsConfig.getOffsetsByIndex().get(recordIndex), reader.currentOffset().getRecordOffset());
         checkData(reader.next(), recordIndex);
 
         recordIndex = 0;
-        reader.seek(getOffset(OFFSETS_BY_INDEX.get(recordIndex)));
+        reader.seek(getOffset(fsConfig.getOffsetsByIndex().get(recordIndex)));
         assertTrue(reader.hasNext());
-        assertEquals(OFFSETS_BY_INDEX.get(recordIndex), reader.currentOffset().getRecordOffset());
+        assertEquals(fsConfig.getOffsetsByIndex().get(recordIndex), reader.currentOffset().getRecordOffset());
         checkData(reader.next(), recordIndex);
 
         recordIndex = NUM_RECORDS - 3;
-        reader.seek(getOffset(OFFSETS_BY_INDEX.get(recordIndex)));
+        reader.seek(getOffset(fsConfig.getOffsetsByIndex().get(recordIndex)));
         assertTrue(reader.hasNext());
-        assertEquals(OFFSETS_BY_INDEX.get(recordIndex), reader.currentOffset().getRecordOffset());
+        assertEquals(fsConfig.getOffsetsByIndex().get(recordIndex), reader.currentOffset().getRecordOffset());
         checkData(reader.next(), recordIndex);
 
-        reader.seek(getOffset(OFFSETS_BY_INDEX.get(NUM_RECORDS - 1) + 1));
+        reader.seek(getOffset(fsConfig.getOffsetsByIndex().get(NUM_RECORDS - 1) + 1));
         assertFalse(reader.hasNext());
     }
 
-    @Test
-    public void negativeSeek() {
+    @ParameterizedTest
+    @MethodSource("fileSystemConfigProvider")
+    public void negativeSeek(FileSystemConfig fsConfig) {
+        FileReader reader = fsConfig.getReader();
         assertThrows(RuntimeException.class, () -> reader.seek(getOffset(-1)));
     }
 
-    @Test
-    public void exceededSeek() {
-        reader.seek(getOffset(OFFSETS_BY_INDEX.get(NUM_RECORDS - 1) + 1));
+    @ParameterizedTest
+    @MethodSource("fileSystemConfigProvider")
+    public void exceededSeek(FileSystemConfig fsConfig) {
+        FileReader reader = fsConfig.getReader();
+        reader.seek(getOffset(fsConfig.getOffsetsByIndex().get(NUM_RECORDS - 1) + 1));
         assertFalse(reader.hasNext());
-        assertThrows(NoSuchElementException.class, () -> reader.next());
+        assertThrows(NoSuchElementException.class, reader::next);
     }
 
-    @Test
-    public void readFileAlreadyClosed() throws IOException {
+    @ParameterizedTest
+    @MethodSource("fileSystemConfigProvider")
+    public void readFileAlreadyClosed(FileSystemConfig fsConfig) throws IOException {
+        FileReader reader = fsConfig.getReader();
         reader.close();
-        assertThrows(IllegalStateException.class, () -> reader.hasNext());
-        assertThrows(IllegalStateException.class, () -> reader.next());
+        assertThrows(IllegalStateException.class, reader::hasNext);
+        assertThrows(IllegalStateException.class, reader::next);
+    }
+
+    protected Offset getOffset(long offset) {
+        return () -> offset;
     }
 
     protected final FileReader getReader(FileSystem fs, Path path, Map<String, Object> config) throws Throwable {
-        return ReflectionUtils.makeReader(readerClass, fs, path, config);
+        return ReflectionUtils.makeReader(getReaderClass(), fs, path, config);
     }
 
-    protected abstract Offset getOffset(long offset);
+    protected OutputStream getOutputStream(File file, CompressionType compression) throws IOException {
+        final OutputStream os;
+        switch (compression) {
+            case BZIP2:
+                os = new BZip2CompressorOutputStream(new FileOutputStream(file));
+                break;
+            case GZIP:
+                os = new GzipCompressorOutputStream(new FileOutputStream(file));
+                break;
+            default:
+                os = new FileOutputStream(file);
+                break;
+        }
+        return os;
+    }
 
-    protected abstract void checkData(Struct record, long index);
+    protected abstract Class<? extends FileReader> getReaderClass();
+
+    protected abstract Path createDataFile(FileSystemConfig fsConfig, Object... args) throws IOException;
+
+    protected abstract Map<String, Object> getReaderConfig();
 
     protected abstract String getFileExtension();
 
+    protected abstract void checkData(Struct record, long index);
+
 }
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/FileSystemConfig.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/FileSystemConfig.java
new file mode 100644
index 0000000..c670e5f
--- /dev/null
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/FileSystemConfig.java
@@ -0,0 +1,153 @@
+package com.github.mmolimar.kafka.connect.fs.file.reader;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+
+import java.io.Closeable;
+import java.io.IOException;
+import java.net.URI;
+import java.nio.file.Files;
+import java.util.HashMap;
+import java.util.Map;
+
+interface FileSystemConfig extends Closeable {
+
+    void initFs() throws IOException;
+
+    FileSystem getFs();
+
+    URI getFsUri();
+
+    void setDataFile(Path dataFile);
+
+    Path getDataFile();
+
+    void setReader(FileReader reader);
+
+    FileReader getReader();
+
+    Map<Integer, Long> getOffsetsByIndex();
+
+}
+
+class LocalFsConfig implements FileSystemConfig {
+    private java.nio.file.Path localDir;
+    private FileSystem fs;
+    private URI fsUri;
+    private Path dataFile;
+    private FileReader reader;
+    private Map<Integer, Long> offsetsByIndex;
+
+    @Override
+    public void initFs() throws IOException {
+        localDir = Files.createTempDirectory("test-");
+        fsUri = localDir.toUri();
+        fs = FileSystem.newInstance(fsUri, new Configuration());
+        offsetsByIndex = new HashMap<>();
+    }
+
+    @Override
+    public FileSystem getFs() {
+        return fs;
+    }
+
+    @Override
+    public URI getFsUri() {
+        return fsUri;
+    }
+
+    @Override
+    public void setDataFile(Path dataFile) {
+        this.dataFile = dataFile;
+    }
+
+    @Override
+    public Path getDataFile() {
+        return dataFile;
+    }
+
+    @Override
+    public void setReader(FileReader reader) {
+        this.reader = reader;
+    }
+
+    @Override
+    public FileReader getReader() {
+        return reader;
+    }
+
+    @Override
+    public Map<Integer, Long> getOffsetsByIndex() {
+        return offsetsByIndex;
+    }
+
+    @Override
+    public void close() throws IOException {
+        fs.close();
+        FileUtils.deleteDirectory(localDir.toFile());
+    }
+}
+
+class HdfsFsConfig implements FileSystemConfig {
+    private MiniDFSCluster cluster;
+    private FileSystem fs;
+    private URI fsUri;
+    private Path dataFile;
+    private FileReader reader;
+    private Map<Integer, Long> offsetsByIndex;
+
+    @Override
+    public void initFs() throws IOException {
+        Configuration clusterConfig = new Configuration();
+        java.nio.file.Path hdfsDir = Files.createTempDirectory("test-");
+        clusterConfig.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, hdfsDir.toAbsolutePath().toString());
+        cluster = new MiniDFSCluster.Builder(clusterConfig).build();
+        fsUri = URI.create("hdfs://localhost:" + cluster.getNameNodePort() + "/");
+        fs = FileSystem.newInstance(fsUri, new Configuration());
+        offsetsByIndex = new HashMap<>();
+    }
+
+    @Override
+    public FileSystem getFs() {
+        return fs;
+    }
+
+    @Override
+    public URI getFsUri() {
+        return fsUri;
+    }
+
+    @Override
+    public Path getDataFile() {
+        return dataFile;
+    }
+
+    @Override
+    public void setDataFile(Path dataFile) {
+        this.dataFile = dataFile;
+    }
+
+    @Override
+    public void setReader(FileReader reader) {
+        this.reader = reader;
+    }
+
+    @Override
+    public FileReader getReader() {
+        return reader;
+    }
+
+    @Override
+    public Map<Integer, Long> getOffsetsByIndex() {
+        return offsetsByIndex;
+    }
+
+    @Override
+    public void close() throws IOException {
+        fs.close();
+        cluster.shutdown(true);
+    }
+}
\ No newline at end of file
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/JsonFileReaderTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/JsonFileReaderTest.java
new file mode 100644
index 0000000..9d05edf
--- /dev/null
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/JsonFileReaderTest.java
@@ -0,0 +1,199 @@
+package com.github.mmolimar.kafka.connect.fs.file.reader;
+
+import com.fasterxml.jackson.databind.DeserializationFeature;
+import com.fasterxml.jackson.databind.node.JsonNodeFactory;
+import com.fasterxml.jackson.databind.node.ObjectNode;
+import com.github.mmolimar.kafka.connect.fs.file.Offset;
+import org.apache.hadoop.fs.Path;
+import org.apache.kafka.connect.data.Struct;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.MethodSource;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.nio.charset.UnsupportedCharsetException;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.UUID;
+import java.util.stream.IntStream;
+
+import static org.junit.jupiter.api.Assertions.*;
+
+public class JsonFileReaderTest extends FileReaderTestBase {
+
+    private static final String FIELD_INTEGER = "integerField";
+    private static final String FIELD_LONG = "longField";
+    private static final String FIELD_BOOLEAN = "booleanField";
+    private static final String FIELD_STRING = "stringField";
+    private static final String FIELD_DECIMAL = "decimalField";
+    private static final String FIELD_ARRAY = "arrayField";
+    private static final String FIELD_STRUCT = "structField";
+    private static final String FIELD_NULL = "nullField";
+    private static final String FILE_EXTENSION = "jsn";
+    private static final CompressionType COMPRESSION_TYPE_DEFAULT = CompressionType.NONE;
+
+    @Override
+    protected Path createDataFile(FileSystemConfig fsConfig, Object... args) throws IOException {
+        int numRecords = args.length < 1 ? NUM_RECORDS : (int) args[0];
+        boolean recordPerLine = args.length < 2 || (boolean) args[1];
+        CompressionType compression = args.length < 3 ? COMPRESSION_TYPE_DEFAULT : (CompressionType) args[2];
+        File txtFile = File.createTempFile("test-", "." + getFileExtension());
+        try (PrintWriter writer = new PrintWriter(getOutputStream(txtFile, compression))) {
+            IntStream.range(0, numRecords).forEach(index -> {
+                ObjectNode json = JsonNodeFactory.instance.objectNode()
+                        .put(FIELD_INTEGER, index)
+                        .put(FIELD_LONG, Long.MAX_VALUE)
+                        .put(FIELD_STRING, String.format("%d_%s", index, UUID.randomUUID()))
+                        .put(FIELD_BOOLEAN, true)
+                        .put(FIELD_DECIMAL, Double.parseDouble(index + "." + index))
+                        .put(FIELD_NULL, (String) null);
+                json.putArray(FIELD_ARRAY)
+                        .add("elm[" + index + "]")
+                        .add("elm[" + index + "]");
+                json.putObject(FIELD_STRUCT)
+                        .put(FIELD_INTEGER, (short) index)
+                        .put(FIELD_LONG, Long.MAX_VALUE)
+                        .put(FIELD_STRING, String.format("%d_%s", index, UUID.randomUUID()))
+                        .put(FIELD_BOOLEAN, true)
+                        .put(FIELD_DECIMAL, Double.parseDouble(index + "." + index))
+                        .put(FIELD_NULL, (String) null);
+                writer.append(recordPerLine ? json.toString() + "\n" : json.toPrettyString());
+                fsConfig.getOffsetsByIndex().put(index, (long) index);
+            });
+        }
+        Path path = new Path(new Path(fsConfig.getFsUri()), txtFile.getName());
+        fsConfig.getFs().moveFromLocalFile(new Path(txtFile.getAbsolutePath()), path);
+        return path;
+    }
+
+    @ParameterizedTest
+    @MethodSource("fileSystemConfigProvider")
+    public void emptyFile(FileSystemConfig fsConfig) throws Throwable {
+        File tmp = File.createTempFile("test-", "." + getFileExtension());
+        Path path = new Path(new Path(fsConfig.getFsUri()), tmp.getName());
+        fsConfig.getFs().moveFromLocalFile(new Path(tmp.getAbsolutePath()), path);
+        FileReader reader = getReader(fsConfig.getFs(), path, getReaderConfig());
+        assertFalse(reader.hasNext());
+    }
+
+    @ParameterizedTest
+    @MethodSource("fileSystemConfigProvider")
+    public void validFileEncoding(FileSystemConfig fsConfig) throws Throwable {
+        Map<String, Object> readerConfig = getReaderConfig();
+        readerConfig.put(JsonFileReader.FILE_READER_JSON_ENCODING, "Cp1252");
+        fsConfig.setReader(getReader(fsConfig.getFs(), fsConfig.getDataFile(), readerConfig));
+        readAllData(fsConfig);
+    }
+
+    @ParameterizedTest
+    @MethodSource("fileSystemConfigProvider")
+    public void invalidDeserializationConfig(FileSystemConfig fsConfig) throws Throwable {
+        Map<String, Object> readerConfig = getReaderConfig();
+        readerConfig.put(JsonFileReader.FILE_READER_JSON_DESERIALIZATION_CONFIGS + "invalid", "false");
+        fsConfig.setReader(getReader(fsConfig.getFs(), fsConfig.getDataFile(), readerConfig));
+        readAllData(fsConfig);
+    }
+
+    @ParameterizedTest
+    @MethodSource("fileSystemConfigProvider")
+    public void invalidFileEncoding(FileSystemConfig fsConfig) {
+        Map<String, Object> readerConfig = getReaderConfig();
+        readerConfig.put(JsonFileReader.FILE_READER_JSON_ENCODING, "invalid_charset");
+        assertThrows(UnsupportedCharsetException.class, () -> getReader(fsConfig.getFs(),
+                fsConfig.getDataFile(), readerConfig));
+    }
+
+    @ParameterizedTest
+    @MethodSource("fileSystemConfigProvider")
+    public void readDataWithRecordPerLineDisabled(FileSystemConfig fsConfig) throws Throwable {
+        Path file = createDataFile(fsConfig, 1, false);
+        Map<String, Object> readerConfig = getReaderConfig();
+        readerConfig.put(JsonFileReader.FILE_READER_JSON_RECORD_PER_LINE, "false");
+        FileReader reader = getReader(fsConfig.getFs(), file, readerConfig);
+
+        assertTrue(reader.hasNext());
+
+        int recordCount = 0;
+        while (reader.hasNext()) {
+            Struct record = reader.next();
+            checkData(record, recordCount);
+            recordCount++;
+        }
+        reader.close();
+        assertEquals(1, recordCount, "The number of records in the file does not match");
+    }
+
+    @ParameterizedTest
+    @MethodSource("fileSystemConfigProvider")
+    public void readDifferentCompressionTypes(FileSystemConfig fsConfig) {
+        Arrays.stream(CompressionType.values()).forEach(compressionType -> {
+            try {
+                Path file = createDataFile(fsConfig, NUM_RECORDS, true, compressionType);
+                Map<String, Object> readerConfig = getReaderConfig();
+                readerConfig.put(JsonFileReader.FILE_READER_JSON_COMPRESSION_TYPE, compressionType.toString());
+                readerConfig.put(JsonFileReader.FILE_READER_JSON_COMPRESSION_CONCATENATED, "true");
+                FileReader reader = getReader(fsConfig.getFs(), file, readerConfig);
+
+                assertTrue(reader.hasNext());
+
+                int recordCount = 0;
+                while (reader.hasNext()) {
+                    Struct record = reader.next();
+                    checkData(record, recordCount);
+                    recordCount++;
+                }
+                reader.close();
+                assertEquals(NUM_RECORDS, recordCount, "The number of records in the file does not match");
+            } catch (Throwable e) {
+                throw new RuntimeException(e);
+            }
+        });
+    }
+
+    @Override
+    protected Offset getOffset(long offset) {
+        return () -> offset;
+    }
+
+    @Override
+    protected Class<? extends FileReader> getReaderClass() {
+        return JsonFileReader.class;
+    }
+
+    @Override
+    protected Map<String, Object> getReaderConfig() {
+        return new HashMap<String, Object>() {{
+            String deserializationConfig = DeserializationFeature.ACCEPT_EMPTY_ARRAY_AS_NULL_OBJECT.name();
+            put(JsonFileReader.FILE_READER_JSON_DESERIALIZATION_CONFIGS + deserializationConfig, "true");
+        }};
+    }
+
+    @Override
+    protected void checkData(Struct record, long index) {
+        Struct subrecord = record.getStruct(FIELD_STRUCT);
+        assertAll(
+                () -> assertEquals((int) (Integer) record.get(FIELD_INTEGER), index),
+                () -> assertEquals((long) (Long) record.get(FIELD_LONG), Long.MAX_VALUE),
+                () -> assertTrue(record.get(FIELD_STRING).toString().startsWith(index + "_")),
+                () -> assertTrue(Boolean.parseBoolean(record.get(FIELD_BOOLEAN).toString())),
+                () -> assertEquals((Double) record.get(FIELD_DECIMAL), Double.parseDouble(index + "." + index), 0),
+                () -> assertNull(record.get(FIELD_NULL)),
+                () -> assertNotNull(record.schema().field(FIELD_NULL)),
+                () -> assertEquals(record.get(FIELD_ARRAY), Arrays.asList("elm[" + index + "]", "elm[" + index + "]")),
+                () -> assertEquals((int) (Integer) subrecord.get(FIELD_INTEGER), index),
+                () -> assertEquals((long) (Long) subrecord.get(FIELD_LONG), Long.MAX_VALUE),
+                () -> assertTrue(subrecord.get(FIELD_STRING).toString().startsWith(index + "_")),
+                () -> assertTrue(Boolean.parseBoolean(subrecord.get(FIELD_BOOLEAN).toString())),
+                () -> assertEquals((Double) subrecord.get(FIELD_DECIMAL), Double.parseDouble(index + "." + index), 0),
+                () -> assertNull(subrecord.get(FIELD_NULL)),
+                () -> assertNotNull(subrecord.schema().field(FIELD_NULL))
+        );
+    }
+
+    @Override
+    protected String getFileExtension() {
+        return FILE_EXTENSION;
+    }
+}
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/ParquetFileReaderTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/ParquetFileReaderTest.java
new file mode 100644
index 0000000..672872e
--- /dev/null
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/ParquetFileReaderTest.java
@@ -0,0 +1,192 @@
+package com.github.mmolimar.kafka.connect.fs.file.reader;
+
+import com.github.mmolimar.kafka.connect.fs.file.Offset;
+import org.apache.avro.AvroRuntimeException;
+import org.apache.avro.Schema;
+import org.apache.avro.SchemaBuilder;
+import org.apache.avro.SchemaParseException;
+import org.apache.avro.generic.GenericData;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.kafka.connect.data.Struct;
+import org.apache.kafka.connect.errors.DataException;
+import org.apache.parquet.avro.AvroParquetWriter;
+import org.apache.parquet.hadoop.ParquetFileWriter;
+import org.apache.parquet.hadoop.ParquetWriter;
+import org.apache.parquet.io.InvalidRecordException;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.MethodSource;
+
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.UUID;
+import java.util.stream.IntStream;
+
+import static org.junit.jupiter.api.Assertions.*;
+
+public class ParquetFileReaderTest extends FileReaderTestBase {
+
+    private static final String FIELD_INDEX = "index";
+    private static final String FIELD_NAME = "name";
+    private static final String FIELD_SURNAME = "surname";
+    private static final String FILE_EXTENSION = "parquet";
+
+    private static Schema readerSchema;
+    private static Schema projectionSchema;
+
+    @BeforeAll
+    public static void setUp() throws IOException {
+        readerSchema = new Schema.Parser().parse(
+                ParquetFileReaderTest.class.getResourceAsStream("/file/reader/schemas/people.avsc"));
+        projectionSchema = new Schema.Parser().parse(
+                ParquetFileReaderTest.class.getResourceAsStream("/file/reader/schemas/people_projection.avsc"));
+    }
+
+    @Override
+    protected Path createDataFile(FileSystemConfig fsConfig, Object... args) throws IOException {
+        FileSystem fs = fsConfig.getFs();
+        File parquetFile = File.createTempFile("test-", "." + getFileExtension());
+
+        try (ParquetWriter writer = AvroParquetWriter.<GenericRecord>builder(new Path(parquetFile.toURI()))
+                .withConf(fs.getConf()).withWriteMode(ParquetFileWriter.Mode.OVERWRITE).withSchema(readerSchema).build()) {
+            IntStream.range(0, NUM_RECORDS).forEach(index -> {
+                GenericRecord datum = new GenericData.Record(readerSchema);
+                datum.put(FIELD_INDEX, index);
+                String uuid = UUID.randomUUID().toString();
+                datum.put(FIELD_NAME, String.format("%d_name_%s", index, uuid));
+                datum.put(FIELD_SURNAME, String.format("%d_surname_%s", index, uuid));
+                try {
+                    fsConfig.getOffsetsByIndex().put(index, (long) index);
+                    writer.write(datum);
+                } catch (IOException ioe) {
+                    throw new RuntimeException(ioe);
+                }
+            });
+        }
+        Path path = new Path(new Path(fsConfig.getFsUri()), parquetFile.getName());
+        fs.moveFromLocalFile(new Path(parquetFile.getAbsolutePath()), path);
+        return path;
+    }
+
+    @ParameterizedTest
+    @MethodSource("fileSystemConfigProvider")
+    public void emptyFile(FileSystemConfig fsConfig) throws Throwable {
+        File tmp = File.createTempFile("test-", "." + getFileExtension());
+        Path path = new Path(new Path(fsConfig.getFsUri()), tmp.getName());
+        fsConfig.getFs().moveFromLocalFile(new Path(tmp.getAbsolutePath()), path);
+        getReader(fsConfig.getFs(), path, getReaderConfig());
+    }
+
+    @ParameterizedTest
+    @MethodSource("fileSystemConfigProvider")
+    public void invalidFileFormat(FileSystemConfig fsConfig) throws Throwable {
+        File tmp = File.createTempFile("test-", "." + getFileExtension());
+        try (BufferedWriter writer = new BufferedWriter(new FileWriter(tmp))) {
+            writer.write("test");
+        }
+        Path path = new Path(new Path(fsConfig.getFsUri()), tmp.getName());
+        fsConfig.getFs().moveFromLocalFile(new Path(tmp.getAbsolutePath()), path);
+        getReader(fsConfig.getFs(), path, getReaderConfig());
+    }
+
+    @ParameterizedTest
+    @MethodSource("fileSystemConfigProvider")
+    public void readerWithSchema(FileSystemConfig fsConfig) throws Throwable {
+        Map<String, Object> readerConfig = getReaderConfig();
+        readerConfig.put(ParquetFileReader.FILE_READER_PARQUET_SCHEMA, readerSchema.toString());
+        readerConfig.put(AgnosticFileReader.FILE_READER_AGNOSTIC_EXTENSIONS_PARQUET, getFileExtension());
+        FileSystem testFs = FileSystem.newInstance(fsConfig.getFsUri(), new Configuration());
+        fsConfig.setReader(getReader(testFs, fsConfig.getDataFile(), readerConfig));
+        readAllData(fsConfig);
+    }
+
+    @ParameterizedTest
+    @MethodSource("fileSystemConfigProvider")
+    public void readerWithProjection(FileSystemConfig fsConfig) throws Throwable {
+        Map<String, Object> readerConfig = getReaderConfig();
+        readerConfig.put(ParquetFileReader.FILE_READER_PARQUET_PROJECTION, projectionSchema.toString());
+        readerConfig.put(AgnosticFileReader.FILE_READER_AGNOSTIC_EXTENSIONS_PARQUET, getFileExtension());
+        fsConfig.setReader(getReader(fsConfig.getFs(), fsConfig.getDataFile(), readerConfig));
+        while (fsConfig.getReader().hasNext()) {
+            Struct record = fsConfig.getReader().next();
+            assertNotNull(record.schema().field(FIELD_INDEX));
+            assertNotNull(record.schema().field(FIELD_NAME));
+            assertNull(record.schema().field(FIELD_SURNAME));
+        }
+        FileSystem testFs = FileSystem.newInstance(fsConfig.getFsUri(), new Configuration());
+        fsConfig.setReader(getReader(testFs, fsConfig.getDataFile(), readerConfig));
+        assertThrows(DataException.class, () -> readAllData(fsConfig));
+    }
+
+    @ParameterizedTest
+    @MethodSource("fileSystemConfigProvider")
+    public void readerWithInvalidProjection(FileSystemConfig fsConfig) throws Throwable {
+        Schema testSchema = SchemaBuilder.record("test_projection").namespace("test.avro")
+                .fields()
+                .name("field1").type("string").noDefault()
+                .endRecord();
+        Map<String, Object> readerConfig = getReaderConfig();
+        readerConfig.put(ParquetFileReader.FILE_READER_PARQUET_PROJECTION, testSchema.toString());
+        readerConfig.put(AgnosticFileReader.FILE_READER_AGNOSTIC_EXTENSIONS_PARQUET, getFileExtension());
+        FileSystem testFs = FileSystem.newInstance(fsConfig.getFsUri(), new Configuration());
+        fsConfig.setReader(getReader(testFs, fsConfig.getDataFile(), readerConfig));
+        assertThrows(InvalidRecordException.class, () -> readAllData(fsConfig));
+    }
+
+    @ParameterizedTest
+    @MethodSource("fileSystemConfigProvider")
+    public void readerWithInvalidSchema(FileSystemConfig fsConfig) throws Throwable {
+        Map<String, Object> readerConfig = getReaderConfig();
+        readerConfig.put(ParquetFileReader.FILE_READER_PARQUET_SCHEMA, Schema.create(Schema.Type.STRING).toString());
+        readerConfig.put(AgnosticFileReader.FILE_READER_AGNOSTIC_EXTENSIONS_PARQUET, getFileExtension());
+        FileSystem testFs = FileSystem.newInstance(fsConfig.getFsUri(), new Configuration());
+        fsConfig.setReader(getReader(testFs, fsConfig.getDataFile(), readerConfig));
+        assertThrows(AvroRuntimeException.class, () -> readAllData(fsConfig));
+    }
+
+    @ParameterizedTest
+    @MethodSource("fileSystemConfigProvider")
+    public void readerWithUnparseableSchema(FileSystemConfig fsConfig) {
+        Map<String, Object> readerConfig = getReaderConfig();
+        readerConfig.put(ParquetFileReader.FILE_READER_PARQUET_SCHEMA, "invalid schema");
+        readerConfig.put(AgnosticFileReader.FILE_READER_AGNOSTIC_EXTENSIONS_PARQUET, getFileExtension());
+        assertThrows(SchemaParseException.class, () ->
+                getReader(FileSystem.newInstance(fsConfig.getFsUri(), new Configuration()),
+                        fsConfig.getDataFile(), readerConfig));
+    }
+
+    @Override
+    protected Map<String, Object> getReaderConfig() {
+        return new HashMap<>();
+    }
+
+    @Override
+    protected Offset getOffset(long offset) {
+        return new ParquetFileReader.ParquetOffset(offset);
+    }
+
+    @Override
+    protected Class<? extends FileReader> getReaderClass() {
+        return ParquetFileReader.class;
+    }
+
+    @Override
+    protected void checkData(Struct record, long index) {
+        assertEquals((int) (Integer) record.get(FIELD_INDEX), index);
+        assertTrue(record.get(FIELD_NAME).toString().startsWith(index + "_"));
+        assertTrue(record.get(FIELD_SURNAME).toString().startsWith(index + "_"));
+    }
+
+    @Override
+    protected String getFileExtension() {
+        return FILE_EXTENSION;
+    }
+
+}
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/SequenceFileReaderTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/SequenceFileReaderTest.java
similarity index 64%
rename from src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/SequenceFileReaderTest.java
rename to src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/SequenceFileReaderTest.java
index 411f647..18377fa 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/SequenceFileReaderTest.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/SequenceFileReaderTest.java
@@ -1,8 +1,7 @@
-package com.github.mmolimar.kafka.connect.fs.file.reader.local;
+package com.github.mmolimar.kafka.connect.fs.file.reader;
 
 import com.github.mmolimar.kafka.connect.fs.file.Offset;
-import com.github.mmolimar.kafka.connect.fs.file.reader.AgnosticFileReader;
-import com.github.mmolimar.kafka.connect.fs.file.reader.SequenceFileReader;
+import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.SequenceFile;
@@ -10,8 +9,8 @@
 import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.util.ReflectionUtils;
 import org.apache.kafka.connect.data.Struct;
-import org.junit.jupiter.api.BeforeAll;
-import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.MethodSource;
 
 import java.io.File;
 import java.io.IOException;
@@ -22,28 +21,19 @@
 
 import static org.junit.jupiter.api.Assertions.*;
 
-public class SequenceFileReaderTest extends LocalFileReaderTestBase {
+public class SequenceFileReaderTest extends FileReaderTestBase {
 
     private static final String FIELD_NAME_KEY = "custom_field_key";
     private static final String FIELD_NAME_VALUE = "custom_field_name";
     private static final String FILE_EXTENSION = "sq";
 
-    @BeforeAll
-    public static void setUp() throws IOException {
-        readerClass = AgnosticFileReader.class;
-        dataFile = createDataFile();
-        readerConfig = new HashMap<String, Object>() {{
-            put(SequenceFileReader.FILE_READER_SEQUENCE_FIELD_NAME_KEY, FIELD_NAME_KEY);
-            put(SequenceFileReader.FILE_READER_SEQUENCE_FIELD_NAME_VALUE, FIELD_NAME_VALUE);
-            put(AgnosticFileReader.FILE_READER_AGNOSTIC_EXTENSIONS_SEQUENCE, FILE_EXTENSION);
-        }};
-    }
-
-    private static Path createDataFile() throws IOException {
-        File seqFile = File.createTempFile("test-", "." + FILE_EXTENSION);
-        try (SequenceFile.Writer writer = SequenceFile.createWriter(fs.getConf(), SequenceFile.Writer.file(new Path(seqFile.getAbsolutePath())),
+    @Override
+    protected Path createDataFile(FileSystemConfig fsConfig, Object... args) throws IOException {
+        FileSystem fs = fsConfig.getFs();
+        File seqFile = File.createTempFile("test-", "." + getFileExtension());
+        try (SequenceFile.Writer writer = SequenceFile.createWriter(fs.getConf(),
+                SequenceFile.Writer.file(new Path(seqFile.getAbsolutePath())),
                 SequenceFile.Writer.keyClass(IntWritable.class), SequenceFile.Writer.valueClass(Text.class))) {
-
             IntStream.range(0, NUM_RECORDS).forEach(index -> {
                 Writable key = new IntWritable(index);
                 Writable value = new Text(String.format("%d_%s", index, UUID.randomUUID()));
@@ -62,31 +52,33 @@ private static Path createDataFile() throws IOException {
             int index = 0;
             long pos = reader.getPosition() - 1;
             while (reader.next(key, value)) {
-                OFFSETS_BY_INDEX.put(index++, pos);
+                fsConfig.getOffsetsByIndex().put(index++, pos);
                 pos = reader.getPosition();
             }
         }
-        Path path = new Path(new Path(fsUri), seqFile.getName());
+        Path path = new Path(new Path(fsConfig.getFsUri()), seqFile.getName());
         fs.moveFromLocalFile(new Path(seqFile.getAbsolutePath()), path);
         return path;
     }
 
-    @Test
-    public void defaultFieldNames() throws Throwable {
-        Map<String, Object> customReaderCfg = new HashMap<String, Object>() {{
-            put(AgnosticFileReader.FILE_READER_AGNOSTIC_EXTENSIONS_SEQUENCE, getFileExtension());
-        }};
-        reader = getReader(fs, dataFile, customReaderCfg);
-        assertEquals(reader.getFilePath(), dataFile);
+    @ParameterizedTest
+    @MethodSource("fileSystemConfigProvider")
+    public void defaultFieldNames(FileSystemConfig fsConfig) throws Throwable {
+        Map<String, Object> readerConfig = getReaderConfig();
+        readerConfig.put(SequenceFileReader.FILE_READER_SEQUENCE_FIELD_NAME_KEY, null);
+        readerConfig.put(SequenceFileReader.FILE_READER_SEQUENCE_FIELD_NAME_VALUE, null);
+        FileReader reader = getReader(fsConfig.getFs(), fsConfig.getDataFile(), readerConfig);
+        assertEquals(reader.getFilePath(), fsConfig.getDataFile());
         assertTrue(reader.hasNext());
 
         int recordCount = 0;
         while (reader.hasNext()) {
             Struct record = reader.next();
-            checkData(SequenceFileReader.FIELD_NAME_KEY_DEFAULT, SequenceFileReader.FIELD_NAME_VALUE_DEFAULT, record, recordCount);
+            checkData(SequenceFileReader.FIELD_NAME_KEY_DEFAULT, SequenceFileReader.FIELD_NAME_VALUE_DEFAULT,
+                    record, recordCount);
             recordCount++;
         }
-        assertEquals(NUM_RECORDS, recordCount, () -> "The number of records in the file does not match");
+        assertEquals(NUM_RECORDS, recordCount, "The number of records in the file does not match");
     }
 
     @Override
@@ -94,6 +86,19 @@ protected Offset getOffset(long offset) {
         return new SequenceFileReader.SeqOffset(offset);
     }
 
+    @Override
+    protected Class<? extends FileReader> getReaderClass() {
+        return SequenceFileReader.class;
+    }
+
+    @Override
+    protected Map<String, Object> getReaderConfig() {
+        return new HashMap<String, Object>() {{
+            put(SequenceFileReader.FILE_READER_SEQUENCE_FIELD_NAME_KEY, FIELD_NAME_KEY);
+            put(SequenceFileReader.FILE_READER_SEQUENCE_FIELD_NAME_VALUE, FIELD_NAME_VALUE);
+        }};
+    }
+
     @Override
     protected void checkData(Struct record, long index) {
         checkData(FIELD_NAME_KEY, FIELD_NAME_VALUE, record, index);
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/TextFileReaderTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/TextFileReaderTest.java
new file mode 100644
index 0000000..9220772
--- /dev/null
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/TextFileReaderTest.java
@@ -0,0 +1,143 @@
+package com.github.mmolimar.kafka.connect.fs.file.reader;
+
+import com.github.mmolimar.kafka.connect.fs.file.Offset;
+import org.apache.hadoop.fs.Path;
+import org.apache.kafka.connect.data.Struct;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.MethodSource;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.nio.charset.UnsupportedCharsetException;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.UUID;
+import java.util.stream.IntStream;
+
+import static org.junit.jupiter.api.Assertions.*;
+
+public class TextFileReaderTest extends FileReaderTestBase {
+
+    private static final String FIELD_NAME_VALUE = "custom_field_name";
+    private static final String FILE_EXTENSION = "txt";
+    private static final CompressionType COMPRESSION_TYPE_DEFAULT = CompressionType.GZIP;
+
+    @Override
+    protected Path createDataFile(FileSystemConfig fsConfig, Object... args) throws IOException {
+        CompressionType compression = args.length < 1 ? COMPRESSION_TYPE_DEFAULT : (CompressionType) args[0];
+        File txtFile = File.createTempFile("test-", "." + FILE_EXTENSION);
+        try (PrintWriter writer = new PrintWriter(getOutputStream(txtFile, compression))) {
+            IntStream.range(0, NUM_RECORDS).forEach(index -> {
+                String value = String.format("%d_%s", index, UUID.randomUUID());
+                writer.append(value + "\n");
+                fsConfig.getOffsetsByIndex().put(index, (long) index);
+            });
+        }
+        Path path = new Path(new Path(fsConfig.getFsUri()), txtFile.getName());
+        fsConfig.getFs().moveFromLocalFile(new Path(txtFile.getAbsolutePath()), path);
+        return path;
+    }
+
+    @ParameterizedTest
+    @MethodSource("fileSystemConfigProvider")
+    public void validFileEncoding(FileSystemConfig fsConfig) throws Throwable {
+        Map<String, Object> readerConfig = getReaderConfig();
+        readerConfig.put(TextFileReader.FILE_READER_TEXT_FIELD_NAME_VALUE, FIELD_NAME_VALUE);
+        readerConfig.put(TextFileReader.FILE_READER_TEXT_ENCODING, "Cp1252");
+        readerConfig.put(TextFileReader.FILE_READER_TEXT_COMPRESSION_TYPE, COMPRESSION_TYPE_DEFAULT);
+        FileReader reader = getReader(fsConfig.getFs(), fsConfig.getDataFile(), readerConfig);
+        fsConfig.setReader(reader);
+        readAllData(fsConfig);
+    }
+
+    @ParameterizedTest
+    @MethodSource("fileSystemConfigProvider")
+    public void invalidFileEncoding(FileSystemConfig fsConfig) {
+        Map<String, Object> readerConfig = getReaderConfig();
+        readerConfig.put(TextFileReader.FILE_READER_TEXT_FIELD_NAME_VALUE, FIELD_NAME_VALUE);
+        readerConfig.put(TextFileReader.FILE_READER_TEXT_ENCODING, "invalid_charset");
+        readerConfig.put(TextFileReader.FILE_READER_TEXT_COMPRESSION_TYPE, COMPRESSION_TYPE_DEFAULT);
+        assertThrows(UnsupportedCharsetException.class, () -> getReader(fsConfig.getFs(),
+                fsConfig.getDataFile(), readerConfig));
+    }
+
+    @ParameterizedTest
+    @MethodSource("fileSystemConfigProvider")
+    public void readDataWithRecordPerLineDisabled(FileSystemConfig fsConfig) throws Throwable {
+        Path file = createDataFile(fsConfig, COMPRESSION_TYPE_DEFAULT);
+        Map<String, Object> readerConfig = getReaderConfig();
+        readerConfig.put(TextFileReader.FILE_READER_TEXT_FIELD_NAME_VALUE, FIELD_NAME_VALUE);
+        readerConfig.put(TextFileReader.FILE_READER_TEXT_RECORD_PER_LINE, "false");
+        readerConfig.put(TextFileReader.FILE_READER_TEXT_COMPRESSION_TYPE, COMPRESSION_TYPE_DEFAULT);
+        FileReader reader = getReader(fsConfig.getFs(), file, readerConfig);
+
+        assertTrue(reader.hasNext());
+
+        int recordCount = 0;
+        while (reader.hasNext()) {
+            Struct record = reader.next();
+            checkData(record, recordCount);
+            recordCount++;
+        }
+        reader.close();
+        assertEquals(1, recordCount, () -> "The number of records in the file does not match");
+    }
+
+    @ParameterizedTest
+    @MethodSource("fileSystemConfigProvider")
+    public void readDifferentCompressionTypes(FileSystemConfig fsConfig) {
+        Arrays.stream(CompressionType.values()).forEach(compressionType -> {
+            try {
+                Path file = createDataFile(fsConfig, compressionType);
+                Map<String, Object> readerConfig = getReaderConfig();
+                readerConfig.put(TextFileReader.FILE_READER_TEXT_FIELD_NAME_VALUE, FIELD_NAME_VALUE);
+                readerConfig.put(TextFileReader.FILE_READER_TEXT_COMPRESSION_TYPE, compressionType);
+                FileReader reader = getReader(fsConfig.getFs(), file, readerConfig);
+
+                assertTrue(reader.hasNext());
+
+                int recordCount = 0;
+                while (reader.hasNext()) {
+                    Struct record = reader.next();
+                    checkData(record, recordCount);
+                    recordCount++;
+                }
+                reader.close();
+                assertEquals(NUM_RECORDS, recordCount, "The number of records in the file does not match");
+            } catch (Throwable e) {
+                throw new RuntimeException(e);
+            }
+        });
+    }
+
+    @Override
+    protected Offset getOffset(long offset) {
+        return new TextFileReader.TextOffset(offset);
+    }
+
+    @Override
+    protected Class<? extends FileReader> getReaderClass() {
+        return TextFileReader.class;
+    }
+
+    @Override
+    protected Map<String, Object> getReaderConfig() {
+        return new HashMap<String, Object>() {{
+            put(TextFileReader.FILE_READER_TEXT_FIELD_NAME_VALUE, FIELD_NAME_VALUE);
+            put(TextFileReader.FILE_READER_TEXT_COMPRESSION_TYPE, COMPRESSION_TYPE_DEFAULT);
+            put(TextFileReader.FILE_READER_TEXT_COMPRESSION_CONCATENATED, "true");
+        }};
+    }
+
+    @Override
+    protected void checkData(Struct record, long index) {
+        assertTrue(record.get(FIELD_NAME_VALUE).toString().startsWith(index + "_"));
+    }
+
+    @Override
+    protected String getFileExtension() {
+        return FILE_EXTENSION;
+    }
+}
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/AvroFileReaderTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/AvroFileReaderTest.java
deleted file mode 100644
index b4ae9ae..0000000
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/AvroFileReaderTest.java
+++ /dev/null
@@ -1,122 +0,0 @@
-package com.github.mmolimar.kafka.connect.fs.file.reader.hdfs;
-
-import com.github.mmolimar.kafka.connect.fs.file.Offset;
-import com.github.mmolimar.kafka.connect.fs.file.reader.AgnosticFileReader;
-import com.github.mmolimar.kafka.connect.fs.file.reader.AvroFileReader;
-import org.apache.avro.AvroTypeException;
-import org.apache.avro.Schema;
-import org.apache.avro.SchemaParseException;
-import org.apache.avro.file.DataFileWriter;
-import org.apache.avro.generic.GenericData;
-import org.apache.avro.generic.GenericDatumWriter;
-import org.apache.avro.generic.GenericRecord;
-import org.apache.avro.io.DatumWriter;
-import org.apache.hadoop.fs.Path;
-import org.apache.kafka.connect.data.Struct;
-import org.junit.jupiter.api.BeforeAll;
-import org.junit.jupiter.api.Test;
-
-import java.io.File;
-import java.io.IOException;
-import java.util.HashMap;
-import java.util.Map;
-import java.util.UUID;
-import java.util.stream.IntStream;
-
-import static org.junit.jupiter.api.Assertions.*;
-
-public class AvroFileReaderTest extends HdfsFileReaderTestBase {
-
-    private static final String FIELD_INDEX = "index";
-    private static final String FIELD_NAME = "name";
-    private static final String FIELD_SURNAME = "surname";
-    private static final String FILE_EXTENSION = "avro";
-
-    private static Schema schema;
-
-    @BeforeAll
-    public static void setUp() throws IOException {
-        schema = new Schema.Parser().parse(AvroFileReaderTest.class.getResourceAsStream("/file/reader/schemas/people.avsc"));
-        readerClass = AgnosticFileReader.class;
-        dataFile = createDataFile();
-        readerConfig = new HashMap<>();
-    }
-
-    private static Path createDataFile() throws IOException {
-        File avroFile = File.createTempFile("test-", "." + FILE_EXTENSION);
-        DatumWriter<GenericRecord> writer = new GenericDatumWriter<>(schema);
-        try (DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(writer)) {
-            dataFileWriter.setFlushOnEveryBlock(true);
-            dataFileWriter.setSyncInterval(32);
-            dataFileWriter.create(schema, avroFile);
-
-            IntStream.range(0, NUM_RECORDS).forEach(index -> {
-                GenericRecord datum = new GenericData.Record(schema);
-                datum.put(FIELD_INDEX, index);
-                datum.put(FIELD_NAME, String.format("%d_name_%s", index, UUID.randomUUID()));
-                datum.put(FIELD_SURNAME, String.format("%d_surname_%s", index, UUID.randomUUID()));
-                try {
-                    OFFSETS_BY_INDEX.put(index, dataFileWriter.sync() - 16L);
-                    dataFileWriter.append(datum);
-                } catch (IOException ioe) {
-                    throw new RuntimeException(ioe);
-                }
-            });
-        }
-        Path path = new Path(new Path(fsUri), avroFile.getName());
-        fs.moveFromLocalFile(new Path(avroFile.getAbsolutePath()), path);
-        return path;
-    }
-
-    @Test
-    public void readerWithSchema() throws Throwable {
-        Map<String, Object> cfg = new HashMap<String, Object>() {{
-            put(AvroFileReader.FILE_READER_AVRO_SCHEMA, schema.toString());
-        }};
-        reader = getReader(fs, dataFile, cfg);
-        readAllData();
-    }
-
-    @Test
-    public void readerWithInvalidSchema() throws Throwable {
-        Map<String, Object> cfg = new HashMap<String, Object>() {{
-            put(AvroFileReader.FILE_READER_AVRO_SCHEMA, Schema.create(Schema.Type.STRING).toString());
-        }};
-        reader = getReader(fs, dataFile, cfg);
-        assertThrows(IllegalStateException.class, this::readAllData);
-        assertThrows(AvroTypeException.class, () -> {
-            try {
-                readAllData();
-            } catch (Exception e) {
-                throw e.getCause();
-            }
-        });
-    }
-
-    @Test
-    public void readerWithUnparseableSchema() {
-        Map<String, Object> cfg = new HashMap<String, Object>() {{
-            put(AvroFileReader.FILE_READER_AVRO_SCHEMA, "invalid schema");
-        }};
-        assertThrows(SchemaParseException.class, () -> getReader(fs, dataFile, cfg));
-    }
-
-    @Override
-    protected Offset getOffset(long offset) {
-        return new AvroFileReader.AvroOffset(offset);
-    }
-
-    @Override
-    protected void checkData(Struct record, long index) {
-        assertAll(
-                () -> assertEquals((int) (Integer) record.get(FIELD_INDEX), index),
-                () -> assertTrue(record.get(FIELD_NAME).toString().startsWith(index + "_")),
-                () -> assertTrue(record.get(FIELD_SURNAME).toString().startsWith(index + "_"))
-        );
-    }
-
-    @Override
-    protected String getFileExtension() {
-        return FILE_EXTENSION;
-    }
-}
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/HdfsFileReaderTestBase.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/HdfsFileReaderTestBase.java
deleted file mode 100644
index c60d0c3..0000000
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/HdfsFileReaderTestBase.java
+++ /dev/null
@@ -1,33 +0,0 @@
-package com.github.mmolimar.kafka.connect.fs.file.reader.hdfs;
-
-import com.github.mmolimar.kafka.connect.fs.file.reader.FileReaderTestBase;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.hdfs.MiniDFSCluster;
-import org.junit.jupiter.api.AfterAll;
-import org.junit.jupiter.api.BeforeAll;
-
-import java.io.IOException;
-import java.net.URI;
-import java.nio.file.Files;
-import java.nio.file.Path;
-
-public abstract class HdfsFileReaderTestBase extends FileReaderTestBase {
-
-    private static MiniDFSCluster cluster;
-
-    @BeforeAll
-    public static void initFs() throws IOException {
-        Configuration clusterConfig = new Configuration();
-        Path hdfsDir = Files.createTempDirectory("test-");
-        clusterConfig.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, hdfsDir.toAbsolutePath().toString());
-        cluster = new MiniDFSCluster.Builder(clusterConfig).build();
-        fsUri = URI.create("hdfs://localhost:" + cluster.getNameNodePort() + "/");
-        fs = FileSystem.newInstance(fsUri, new Configuration());
-    }
-
-    @AfterAll
-    public static void finishFs() throws Exception {
-        cluster.shutdown(true);
-    }
-}
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/JsonFileReaderTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/JsonFileReaderTest.java
deleted file mode 100644
index 188487a..0000000
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/JsonFileReaderTest.java
+++ /dev/null
@@ -1,174 +0,0 @@
-package com.github.mmolimar.kafka.connect.fs.file.reader.hdfs;
-
-import com.fasterxml.jackson.databind.DeserializationFeature;
-import com.fasterxml.jackson.databind.node.JsonNodeFactory;
-import com.fasterxml.jackson.databind.node.ObjectNode;
-import com.github.mmolimar.kafka.connect.fs.file.Offset;
-import com.github.mmolimar.kafka.connect.fs.file.reader.AgnosticFileReader;
-import com.github.mmolimar.kafka.connect.fs.file.reader.FileReader;
-import com.github.mmolimar.kafka.connect.fs.file.reader.JsonFileReader;
-import org.apache.hadoop.fs.Path;
-import org.apache.kafka.connect.data.Struct;
-import org.junit.jupiter.api.BeforeAll;
-import org.junit.jupiter.api.Test;
-
-import java.io.File;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.nio.charset.UnsupportedCharsetException;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.Map;
-import java.util.UUID;
-import java.util.stream.IntStream;
-
-import static org.junit.jupiter.api.Assertions.*;
-
-public class JsonFileReaderTest extends HdfsFileReaderTestBase {
-
-    private static final String FIELD_INTEGER = "integerField";
-    private static final String FIELD_LONG = "longField";
-    private static final String FIELD_BOOLEAN = "booleanField";
-    private static final String FIELD_STRING = "stringField";
-    private static final String FIELD_DECIMAL = "decimalField";
-    private static final String FIELD_ARRAY = "arrayField";
-    private static final String FIELD_STRUCT = "structField";
-    private static final String FIELD_NULL = "nullField";
-    private static final String FILE_EXTENSION = "json";
-
-    @BeforeAll
-    public static void setUp() throws IOException {
-        readerClass = AgnosticFileReader.class;
-        dataFile = createDataFile();
-        readerConfig = new HashMap<String, Object>() {{
-            String deserializationConfig = DeserializationFeature.ACCEPT_EMPTY_ARRAY_AS_NULL_OBJECT.name();
-            put(AgnosticFileReader.FILE_READER_AGNOSTIC_EXTENSIONS_JSON, FILE_EXTENSION);
-            put(JsonFileReader.FILE_READER_JSON_DESERIALIZATION_CONFIGS + deserializationConfig, "true");
-            put(JsonFileReader.FILE_READER_JSON_DESERIALIZATION_CONFIGS + "invalid", "false");
-        }};
-    }
-
-    private static Path createDataFile() throws IOException {
-        return createDataFile(NUM_RECORDS, true);
-    }
-
-    private static Path createDataFile(int numRecords, boolean recordPerLine) throws IOException {
-        File txtFile = File.createTempFile("test-", "." + FILE_EXTENSION);
-        try (FileWriter writer = new FileWriter(txtFile)) {
-            IntStream.range(0, numRecords).forEach(index -> {
-                ObjectNode json = JsonNodeFactory.instance.objectNode()
-                        .put(FIELD_INTEGER, index)
-                        .put(FIELD_LONG, Long.MAX_VALUE)
-                        .put(FIELD_STRING, String.format("%d_%s", index, UUID.randomUUID()))
-                        .put(FIELD_BOOLEAN, true)
-                        .put(FIELD_DECIMAL, Double.parseDouble(index + "." + index))
-                        .put(FIELD_NULL, (String) null);
-                json.putArray(FIELD_ARRAY)
-                        .add("elm[" + index + "]")
-                        .add("elm[" + index + "]");
-                json.putObject(FIELD_STRUCT)
-                        .put(FIELD_INTEGER, (short) index)
-                        .put(FIELD_LONG, Long.MAX_VALUE)
-                        .put(FIELD_STRING, String.format("%d_%s", index, UUID.randomUUID()))
-                        .put(FIELD_BOOLEAN, true)
-                        .put(FIELD_DECIMAL, Double.parseDouble(index + "." + index))
-                        .put(FIELD_NULL, (String) null);
-                try {
-                    writer.append(recordPerLine ? json.toString() + "\n" : json.toPrettyString());
-                    OFFSETS_BY_INDEX.put(index, (long) index);
-                } catch (IOException ioe) {
-                    throw new RuntimeException(ioe);
-                }
-            });
-        }
-        Path path = new Path(new Path(fsUri), txtFile.getName());
-        fs.moveFromLocalFile(new Path(txtFile.getAbsolutePath()), path);
-        return path;
-    }
-
-    @Test
-    public void emptyFile() throws Throwable {
-        File tmp = File.createTempFile("test-", "." + getFileExtension());
-        Path path = new Path(new Path(fsUri), tmp.getName());
-        fs.moveFromLocalFile(new Path(tmp.getAbsolutePath()), path);
-        getReader(fs, path, readerConfig);
-    }
-
-    @Test
-    public void readEmptyFile() throws Throwable {
-        File tmp = File.createTempFile("test-", "." + getFileExtension());
-        Path path = new Path(new Path(fsUri), tmp.getName());
-        fs.moveFromLocalFile(new Path(tmp.getAbsolutePath()), path);
-        FileReader reader = getReader(fs, path, readerConfig);
-        assertFalse(reader.hasNext());
-    }
-
-    @Test
-    public void validFileEncoding() throws Throwable {
-        Map<String, Object> cfg = new HashMap<String, Object>() {{
-            put(JsonFileReader.FILE_READER_JSON_ENCODING, "Cp1252");
-        }};
-        reader = getReader(fs, dataFile, cfg);
-        readAllData();
-    }
-
-    @Test
-    public void invalidFileEncoding() {
-        Map<String, Object> cfg = new HashMap<String, Object>() {{
-            put(JsonFileReader.FILE_READER_JSON_ENCODING, "invalid_charset");
-        }};
-        assertThrows(UnsupportedCharsetException.class, () -> getReader(fs, dataFile, cfg));
-    }
-
-    @Test
-    public void readDataWithRecordPerLineDisabled() throws Throwable {
-        Path file = createDataFile(1, false);
-        FileReader reader = getReader(fs, file, new HashMap<String, Object>() {{
-            put(JsonFileReader.FILE_READER_JSON_RECORD_PER_LINE, "false");
-        }});
-
-        assertTrue(reader.hasNext());
-
-        int recordCount = 0;
-        while (reader.hasNext()) {
-            Struct record = reader.next();
-            checkData(record, recordCount);
-            recordCount++;
-        }
-        reader.close();
-        assertEquals(1, recordCount, () -> "The number of records in the file does not match");
-    }
-
-    @Override
-    protected Offset getOffset(long offset) {
-        return () -> offset;
-    }
-
-    @Override
-    protected void checkData(Struct record, long index) {
-        Struct subrecord = record.getStruct(FIELD_STRUCT);
-        assertAll(
-                () -> assertEquals((int) (Integer) record.get(FIELD_INTEGER), index),
-                () -> assertEquals((long) (Long) record.get(FIELD_LONG), Long.MAX_VALUE),
-                () -> assertTrue(record.get(FIELD_STRING).toString().startsWith(index + "_")),
-                () -> assertTrue(Boolean.parseBoolean(record.get(FIELD_BOOLEAN).toString())),
-                () -> assertEquals((Double) record.get(FIELD_DECIMAL), Double.parseDouble(index + "." + index), 0),
-                () -> assertNull(record.get(FIELD_NULL)),
-                () -> assertNotNull(record.schema().field(FIELD_NULL)),
-                () -> assertEquals(record.get(FIELD_ARRAY), Arrays.asList("elm[" + index + "]", "elm[" + index + "]")),
-                () -> assertEquals((int) (Integer) subrecord.get(FIELD_INTEGER), index),
-                () -> assertEquals((long) (Long) subrecord.get(FIELD_LONG), Long.MAX_VALUE),
-                () -> assertTrue(subrecord.get(FIELD_STRING).toString().startsWith(index + "_")),
-                () -> assertTrue(Boolean.parseBoolean(subrecord.get(FIELD_BOOLEAN).toString())),
-                () -> assertEquals((Double) subrecord.get(FIELD_DECIMAL), Double.parseDouble(index + "." + index), 0),
-                () -> assertNull(subrecord.get(FIELD_NULL)),
-                () -> assertNotNull(subrecord.schema().field(FIELD_NULL))
-        );
-
-    }
-
-    @Override
-    protected String getFileExtension() {
-        return FILE_EXTENSION;
-    }
-}
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/ParquetFileReaderTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/ParquetFileReaderTest.java
deleted file mode 100644
index d08395d..0000000
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/ParquetFileReaderTest.java
+++ /dev/null
@@ -1,171 +0,0 @@
-package com.github.mmolimar.kafka.connect.fs.file.reader.hdfs;
-
-import com.github.mmolimar.kafka.connect.fs.file.Offset;
-import com.github.mmolimar.kafka.connect.fs.file.reader.AgnosticFileReader;
-import com.github.mmolimar.kafka.connect.fs.file.reader.ParquetFileReader;
-import org.apache.avro.AvroRuntimeException;
-import org.apache.avro.Schema;
-import org.apache.avro.SchemaBuilder;
-import org.apache.avro.SchemaParseException;
-import org.apache.avro.generic.GenericData;
-import org.apache.avro.generic.GenericRecord;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.kafka.connect.data.Struct;
-import org.apache.kafka.connect.errors.DataException;
-import org.apache.parquet.avro.AvroParquetWriter;
-import org.apache.parquet.hadoop.ParquetFileWriter;
-import org.apache.parquet.hadoop.ParquetWriter;
-import org.apache.parquet.io.InvalidRecordException;
-import org.junit.jupiter.api.BeforeAll;
-import org.junit.jupiter.api.Test;
-
-import java.io.BufferedWriter;
-import java.io.File;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.util.HashMap;
-import java.util.Map;
-import java.util.UUID;
-import java.util.stream.IntStream;
-
-import static org.junit.jupiter.api.Assertions.*;
-
-public class ParquetFileReaderTest extends HdfsFileReaderTestBase {
-
-    private static final String FIELD_INDEX = "index";
-    private static final String FIELD_NAME = "name";
-    private static final String FIELD_SURNAME = "surname";
-    private static final String FILE_EXTENSION = "parquet";
-
-    private static Schema readerSchema;
-    private static Schema projectionSchema;
-
-    @BeforeAll
-    public static void setUp() throws IOException {
-        readerClass = AgnosticFileReader.class;
-        dataFile = createDataFile();
-        readerConfig = new HashMap<>();
-    }
-
-    private static Path createDataFile() throws IOException {
-        File parquetFile = File.createTempFile("test-", "." + FILE_EXTENSION);
-        readerSchema = new Schema.Parser().parse(
-                ParquetFileReaderTest.class.getResourceAsStream("/file/reader/schemas/people.avsc"));
-        projectionSchema = new Schema.Parser().parse(
-                ParquetFileReaderTest.class.getResourceAsStream("/file/reader/schemas/people_projection.avsc"));
-
-        try (ParquetWriter writer = AvroParquetWriter.<GenericRecord>builder(new Path(parquetFile.toURI()))
-                .withConf(fs.getConf()).withWriteMode(ParquetFileWriter.Mode.OVERWRITE).withSchema(readerSchema).build()) {
-            IntStream.range(0, NUM_RECORDS).forEach(index -> {
-                GenericRecord datum = new GenericData.Record(readerSchema);
-                datum.put(FIELD_INDEX, index);
-                datum.put(FIELD_NAME, String.format("%d_name_%s", index, UUID.randomUUID()));
-                datum.put(FIELD_SURNAME, String.format("%d_surname_%s", index, UUID.randomUUID()));
-                try {
-                    OFFSETS_BY_INDEX.put(index, (long) index);
-                    writer.write(datum);
-                } catch (IOException ioe) {
-                    throw new RuntimeException(ioe);
-                }
-            });
-        }
-        Path path = new Path(new Path(fsUri), parquetFile.getName());
-        fs.moveFromLocalFile(new Path(parquetFile.getAbsolutePath()), path);
-        return path;
-    }
-
-    @Test
-    public void emptyFile() throws Throwable {
-        File tmp = File.createTempFile("test-", "." + getFileExtension());
-        Path path = new Path(new Path(fsUri), tmp.getName());
-        fs.moveFromLocalFile(new Path(tmp.getAbsolutePath()), path);
-        getReader(fs, path, readerConfig);
-    }
-
-    @Test
-    public void invalidFileFormat() throws Throwable {
-        File tmp = File.createTempFile("test-", "." + getFileExtension());
-        try (BufferedWriter writer = new BufferedWriter(new FileWriter(tmp))) {
-            writer.write("test");
-        }
-        Path path = new Path(new Path(fsUri), tmp.getName());
-        fs.moveFromLocalFile(new Path(tmp.getAbsolutePath()), path);
-        getReader(fs, path, readerConfig);
-    }
-
-    @Test
-    public void readerWithSchema() throws Throwable {
-        Map<String, Object> cfg = new HashMap<String, Object>() {{
-            put(ParquetFileReader.FILE_READER_PARQUET_SCHEMA, readerSchema.toString());
-        }};
-        reader = getReader(FileSystem.newInstance(fsUri, new Configuration()), dataFile, cfg);
-        readAllData();
-    }
-
-    @Test
-    public void readerWithProjection() throws Throwable {
-        Map<String, Object> cfg = new HashMap<String, Object>() {{
-            put(ParquetFileReader.FILE_READER_PARQUET_PROJECTION, projectionSchema.toString());
-        }};
-        reader = getReader(FileSystem.newInstance(fsUri, new Configuration()), dataFile, cfg);
-        while (reader.hasNext()) {
-            Struct record = reader.next();
-            assertNotNull(record.schema().field(FIELD_INDEX));
-            assertNotNull(record.schema().field(FIELD_NAME));
-            assertNull(record.schema().field(FIELD_SURNAME));
-        }
-
-        reader = getReader(FileSystem.newInstance(fsUri, new Configuration()), dataFile, cfg);
-        assertThrows(DataException.class, this::readAllData);
-    }
-
-    @Test
-    public void readerWithInvalidProjection() throws Throwable {
-        Schema testSchema = SchemaBuilder.record("test_projection").namespace("test.avro")
-                .fields()
-                .name("field1").type("string").noDefault()
-                .endRecord();
-        Map<String, Object> cfg = new HashMap<String, Object>() {{
-            put(ParquetFileReader.FILE_READER_PARQUET_PROJECTION, testSchema.toString());
-        }};
-        reader = getReader(FileSystem.newInstance(fsUri, new Configuration()), dataFile, cfg);
-        assertThrows(InvalidRecordException.class, this::readAllData);
-    }
-
-    @Test
-    public void readerWithInvalidSchema() throws Throwable {
-        Map<String, Object> cfg = new HashMap<String, Object>() {{
-            put(ParquetFileReader.FILE_READER_PARQUET_SCHEMA, Schema.create(Schema.Type.STRING).toString());
-        }};
-        reader = getReader(FileSystem.newInstance(fsUri, new Configuration()), dataFile, cfg);
-        assertThrows(AvroRuntimeException.class, this::readAllData);
-    }
-
-    @Test
-    public void readerWithUnparseableSchema() {
-        Map<String, Object> cfg = new HashMap<String, Object>() {{
-            put(ParquetFileReader.FILE_READER_PARQUET_SCHEMA, "invalid schema");
-        }};
-        assertThrows(SchemaParseException.class, () ->
-                getReader(FileSystem.newInstance(fsUri, new Configuration()), dataFile, cfg));
-    }
-
-    @Override
-    protected Offset getOffset(long offset) {
-        return new ParquetFileReader.ParquetOffset(offset);
-    }
-
-    @Override
-    protected void checkData(Struct record, long index) {
-        assertEquals((int) (Integer) record.get(FIELD_INDEX), index);
-        assertTrue(record.get(FIELD_NAME).toString().startsWith(index + "_"));
-        assertTrue(record.get(FIELD_SURNAME).toString().startsWith(index + "_"));
-    }
-
-    @Override
-    protected String getFileExtension() {
-        return FILE_EXTENSION;
-    }
-}
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/SequenceFileReaderTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/SequenceFileReaderTest.java
deleted file mode 100644
index a4435bc..0000000
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/SequenceFileReaderTest.java
+++ /dev/null
@@ -1,110 +0,0 @@
-package com.github.mmolimar.kafka.connect.fs.file.reader.hdfs;
-
-import com.github.mmolimar.kafka.connect.fs.file.Offset;
-import com.github.mmolimar.kafka.connect.fs.file.reader.AgnosticFileReader;
-import com.github.mmolimar.kafka.connect.fs.file.reader.SequenceFileReader;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.IntWritable;
-import org.apache.hadoop.io.SequenceFile;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.util.ReflectionUtils;
-import org.apache.kafka.connect.data.Struct;
-import org.junit.jupiter.api.BeforeAll;
-import org.junit.jupiter.api.Test;
-
-import java.io.File;
-import java.io.IOException;
-import java.util.HashMap;
-import java.util.Map;
-import java.util.UUID;
-import java.util.stream.IntStream;
-
-import static org.junit.jupiter.api.Assertions.*;
-
-public class SequenceFileReaderTest extends HdfsFileReaderTestBase {
-
-    private static final String FIELD_NAME_KEY = "key";
-    private static final String FIELD_NAME_VALUE = "value";
-    private static final String FILE_EXTENSION = "seq";
-
-    @BeforeAll
-    public static void setUp() throws IOException {
-        readerClass = AgnosticFileReader.class;
-        dataFile = createDataFile();
-        readerConfig = new HashMap<String, Object>() {{
-            put(SequenceFileReader.FILE_READER_SEQUENCE_FIELD_NAME_KEY, FIELD_NAME_KEY);
-            put(SequenceFileReader.FILE_READER_SEQUENCE_FIELD_NAME_VALUE, FIELD_NAME_VALUE);
-        }};
-    }
-
-    private static Path createDataFile() throws IOException {
-        File seqFile = File.createTempFile("test-", "." + FILE_EXTENSION);
-        try (SequenceFile.Writer writer = SequenceFile.createWriter(fs.getConf(), SequenceFile.Writer.file(new Path(seqFile.getAbsolutePath())),
-                SequenceFile.Writer.keyClass(IntWritable.class), SequenceFile.Writer.valueClass(Text.class))) {
-
-            IntStream.range(0, NUM_RECORDS).forEach(index -> {
-                Writable key = new IntWritable(index);
-                Writable value = new Text(String.format("%d_%s", index, UUID.randomUUID()));
-                try {
-                    writer.append(key, value);
-                    writer.sync();
-                } catch (IOException ioe) {
-                    throw new RuntimeException(ioe);
-                }
-            });
-        }
-        try (SequenceFile.Reader reader = new SequenceFile.Reader(fs.getConf(),
-                SequenceFile.Reader.file(new Path(seqFile.getAbsolutePath())))) {
-            Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), fs.getConf());
-            Writable value = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), fs.getConf());
-            int index = 0;
-            long pos = reader.getPosition() - 1;
-            while (reader.next(key, value)) {
-                OFFSETS_BY_INDEX.put(index++, pos);
-                pos = reader.getPosition();
-            }
-        }
-        Path path = new Path(new Path(fsUri), seqFile.getName());
-        fs.moveFromLocalFile(new Path(seqFile.getAbsolutePath()), path);
-        return path;
-    }
-
-    @Test
-    public void defaultFieldNames() throws Throwable {
-        Map<String, Object> customReaderCfg = new HashMap<>();
-        reader = getReader(fs, dataFile, customReaderCfg);
-        assertEquals(reader.getFilePath(), dataFile);
-        assertTrue(reader.hasNext());
-
-        int recordCount = 0;
-        while (reader.hasNext()) {
-            Struct record = reader.next();
-            checkData(SequenceFileReader.FIELD_NAME_KEY_DEFAULT, SequenceFileReader.FIELD_NAME_VALUE_DEFAULT, record, recordCount);
-            recordCount++;
-        }
-        assertEquals(NUM_RECORDS, recordCount, () -> "The number of records in the file does not match");
-    }
-
-    @Override
-    protected Offset getOffset(long offset) {
-        return new SequenceFileReader.SeqOffset(offset);
-    }
-
-    @Override
-    protected void checkData(Struct record, long index) {
-        checkData(FIELD_NAME_KEY, FIELD_NAME_VALUE, record, index);
-    }
-
-    private void checkData(String keyFieldName, String valueFieldName, Struct record, long index) {
-        assertAll(
-                () -> assertEquals((int) (Integer) record.get(keyFieldName), index),
-                () -> assertTrue(record.get(valueFieldName).toString().startsWith(index + "_"))
-        );
-    }
-
-    @Override
-    protected String getFileExtension() {
-        return FILE_EXTENSION;
-    }
-}
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/TextFileReaderTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/TextFileReaderTest.java
deleted file mode 100644
index fdb3004..0000000
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/hdfs/TextFileReaderTest.java
+++ /dev/null
@@ -1,154 +0,0 @@
-package com.github.mmolimar.kafka.connect.fs.file.reader.hdfs;
-
-import com.github.mmolimar.kafka.connect.fs.file.Offset;
-import com.github.mmolimar.kafka.connect.fs.file.reader.AgnosticFileReader;
-import com.github.mmolimar.kafka.connect.fs.file.reader.CompressionType;
-import com.github.mmolimar.kafka.connect.fs.file.reader.FileReader;
-import com.github.mmolimar.kafka.connect.fs.file.reader.TextFileReader;
-import org.apache.commons.compress.compressors.bzip2.BZip2CompressorOutputStream;
-import org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream;
-import org.apache.hadoop.fs.Path;
-import org.apache.kafka.connect.data.Struct;
-import org.junit.jupiter.api.BeforeAll;
-import org.junit.jupiter.api.Test;
-
-import java.io.*;
-import java.nio.charset.UnsupportedCharsetException;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.Map;
-import java.util.UUID;
-import java.util.stream.IntStream;
-
-import static org.junit.jupiter.api.Assertions.*;
-
-public class TextFileReaderTest extends HdfsFileReaderTestBase {
-
-    private static final String FIELD_NAME_VALUE = "custom_field_name";
-    private static final String FILE_EXTENSION = "txt";
-    private static final CompressionType COMPRESSION_TYPE = CompressionType.GZIP;
-
-    @BeforeAll
-    public static void setUp() throws IOException {
-        readerClass = AgnosticFileReader.class;
-        dataFile = createDataFile(COMPRESSION_TYPE);
-        readerConfig = new HashMap<String, Object>() {{
-            put(TextFileReader.FILE_READER_TEXT_FIELD_NAME_VALUE, FIELD_NAME_VALUE);
-            put(TextFileReader.FILE_READER_TEXT_COMPRESSION_TYPE, COMPRESSION_TYPE);
-            put(TextFileReader.FILE_READER_TEXT_COMPRESSION_CONCATENATED, "true");
-        }};
-    }
-
-    private static OutputStream getOutputStream(File file, CompressionType compression) throws IOException {
-        final OutputStream os;
-        switch (compression) {
-            case BZIP2:
-                os = new BZip2CompressorOutputStream(new FileOutputStream(file));
-                break;
-            case GZIP:
-                os = new GzipCompressorOutputStream(new FileOutputStream(file));
-                break;
-            default:
-                os = new FileOutputStream(file);
-                break;
-        }
-        return os;
-    }
-
-    private static Path createDataFile(CompressionType compression) throws IOException {
-        File txtFile = File.createTempFile("test-", "." + FILE_EXTENSION);
-        try (PrintWriter writer = new PrintWriter(getOutputStream(txtFile, compression))) {
-            IntStream.range(0, NUM_RECORDS).forEach(index -> {
-                String value = String.format("%d_%s", index, UUID.randomUUID());
-                writer.append(value + "\n");
-                OFFSETS_BY_INDEX.put(index, (long) index);
-            });
-        }
-        Path path = new Path(new Path(fsUri), txtFile.getName());
-        fs.moveFromLocalFile(new Path(txtFile.getAbsolutePath()), path);
-        return path;
-    }
-
-    @Test
-    public void validFileEncoding() throws Throwable {
-        Map<String, Object> cfg = new HashMap<String, Object>() {{
-            put(TextFileReader.FILE_READER_TEXT_FIELD_NAME_VALUE, FIELD_NAME_VALUE);
-            put(TextFileReader.FILE_READER_TEXT_ENCODING, "Cp1252");
-            put(TextFileReader.FILE_READER_TEXT_COMPRESSION_TYPE, COMPRESSION_TYPE);
-        }};
-        reader = getReader(fs, dataFile, cfg);
-        readAllData();
-    }
-
-    @Test
-    public void invalidFileEncoding() {
-        Map<String, Object> cfg = new HashMap<String, Object>() {{
-            put(TextFileReader.FILE_READER_TEXT_FIELD_NAME_VALUE, FIELD_NAME_VALUE);
-            put(TextFileReader.FILE_READER_TEXT_ENCODING, "invalid_charset");
-            put(TextFileReader.FILE_READER_TEXT_COMPRESSION_TYPE, COMPRESSION_TYPE);
-        }};
-        assertThrows(UnsupportedCharsetException.class, () -> getReader(fs, dataFile, cfg));
-    }
-
-    @Test
-    public void readDataWithRecordPerLineDisabled() throws Throwable {
-        Path file = createDataFile(COMPRESSION_TYPE);
-        FileReader reader = getReader(fs, file, new HashMap<String, Object>() {{
-            put(TextFileReader.FILE_READER_TEXT_FIELD_NAME_VALUE, FIELD_NAME_VALUE);
-            put(TextFileReader.FILE_READER_TEXT_RECORD_PER_LINE, "false");
-            put(TextFileReader.FILE_READER_TEXT_COMPRESSION_TYPE, COMPRESSION_TYPE);
-        }});
-
-        assertTrue(reader.hasNext());
-
-        int recordCount = 0;
-        while (reader.hasNext()) {
-            Struct record = reader.next();
-            checkData(record, recordCount);
-            recordCount++;
-        }
-        reader.close();
-        assertEquals(1, recordCount, () -> "The number of records in the file does not match");
-    }
-
-    @Test
-    public void readDifferentCompressionTypes() {
-        Arrays.stream(CompressionType.values()).forEach(compressionType -> {
-            try {
-                Path file = createDataFile(compressionType);
-                FileReader reader = getReader(fs, file, new HashMap<String, Object>() {{
-                    put(TextFileReader.FILE_READER_TEXT_FIELD_NAME_VALUE, FIELD_NAME_VALUE);
-                    put(TextFileReader.FILE_READER_TEXT_COMPRESSION_TYPE, compressionType);
-                }});
-
-                assertTrue(reader.hasNext());
-
-                int recordCount = 0;
-                while (reader.hasNext()) {
-                    Struct record = reader.next();
-                    checkData(record, recordCount);
-                    recordCount++;
-                }
-                reader.close();
-                assertEquals(NUM_RECORDS, recordCount, () -> "The number of records in the file does not match");
-            } catch (Throwable e) {
-                throw new RuntimeException(e);
-            }
-        });
-    }
-
-    @Override
-    protected Offset getOffset(long offset) {
-        return new TextFileReader.TextOffset(offset);
-    }
-
-    @Override
-    protected void checkData(Struct record, long index) {
-        assertTrue(record.get(FIELD_NAME_VALUE).toString().startsWith(index + "_"));
-    }
-
-    @Override
-    protected String getFileExtension() {
-        return FILE_EXTENSION;
-    }
-}
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/JsonFileReaderTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/JsonFileReaderTest.java
deleted file mode 100644
index 131e427..0000000
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/JsonFileReaderTest.java
+++ /dev/null
@@ -1,177 +0,0 @@
-package com.github.mmolimar.kafka.connect.fs.file.reader.local;
-
-import com.fasterxml.jackson.databind.DeserializationFeature;
-import com.fasterxml.jackson.databind.node.JsonNodeFactory;
-import com.fasterxml.jackson.databind.node.ObjectNode;
-import com.github.mmolimar.kafka.connect.fs.file.Offset;
-import com.github.mmolimar.kafka.connect.fs.file.reader.AgnosticFileReader;
-import com.github.mmolimar.kafka.connect.fs.file.reader.FileReader;
-import com.github.mmolimar.kafka.connect.fs.file.reader.JsonFileReader;
-import org.apache.hadoop.fs.Path;
-import org.apache.kafka.connect.data.Struct;
-import org.junit.jupiter.api.BeforeAll;
-import org.junit.jupiter.api.Test;
-
-import java.io.File;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.nio.charset.UnsupportedCharsetException;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.Map;
-import java.util.UUID;
-import java.util.stream.IntStream;
-
-import static org.junit.jupiter.api.Assertions.*;
-
-public class JsonFileReaderTest extends LocalFileReaderTestBase {
-
-    private static final String FIELD_INTEGER = "integerField";
-    private static final String FIELD_LONG = "longField";
-    private static final String FIELD_BOOLEAN = "booleanField";
-    private static final String FIELD_STRING = "stringField";
-    private static final String FIELD_DECIMAL = "decimalField";
-    private static final String FIELD_ARRAY = "arrayField";
-    private static final String FIELD_STRUCT = "structField";
-    private static final String FIELD_NULL = "nullField";
-    private static final String FILE_EXTENSION = "jsn";
-
-    @BeforeAll
-    public static void setUp() throws IOException {
-        readerClass = AgnosticFileReader.class;
-        dataFile = createDataFile();
-        readerConfig = new HashMap<String, Object>() {{
-            String deserializationConfig = DeserializationFeature.ACCEPT_EMPTY_ARRAY_AS_NULL_OBJECT.name();
-            put(AgnosticFileReader.FILE_READER_AGNOSTIC_EXTENSIONS_JSON, FILE_EXTENSION);
-            put(JsonFileReader.FILE_READER_JSON_DESERIALIZATION_CONFIGS + deserializationConfig, "true");
-            put(JsonFileReader.FILE_READER_JSON_DESERIALIZATION_CONFIGS + "invalid", "false");
-        }};
-    }
-
-    private static Path createDataFile() throws IOException {
-        return createDataFile(NUM_RECORDS, true);
-    }
-
-    private static Path createDataFile(int numRecords, boolean recordPerLine) throws IOException {
-        File txtFile = File.createTempFile("test-", "." + FILE_EXTENSION);
-        try (FileWriter writer = new FileWriter(txtFile)) {
-            IntStream.range(0, numRecords).forEach(index -> {
-                ObjectNode json = JsonNodeFactory.instance.objectNode()
-                        .put(FIELD_INTEGER, index)
-                        .put(FIELD_LONG, Long.MAX_VALUE)
-                        .put(FIELD_STRING, String.format("%d_%s", index, UUID.randomUUID()))
-                        .put(FIELD_BOOLEAN, true)
-                        .put(FIELD_DECIMAL, Double.parseDouble(index + "." + index))
-                        .put(FIELD_NULL, (String) null);
-                json.putArray(FIELD_ARRAY)
-                        .add("elm[" + index + "]")
-                        .add("elm[" + index + "]");
-                json.putObject(FIELD_STRUCT)
-                        .put(FIELD_INTEGER, (short) index)
-                        .put(FIELD_LONG, Long.MAX_VALUE)
-                        .put(FIELD_STRING, String.format("%d_%s", index, UUID.randomUUID()))
-                        .put(FIELD_BOOLEAN, true)
-                        .put(FIELD_DECIMAL, Double.parseDouble(index + "." + index))
-                        .put(FIELD_NULL, (String) null);
-                try {
-                    writer.append(recordPerLine ? json.toString() + "\n" : json.toPrettyString());
-                    OFFSETS_BY_INDEX.put(index, (long) index);
-                } catch (IOException ioe) {
-                    throw new RuntimeException(ioe);
-                }
-            });
-        }
-        Path path = new Path(new Path(fsUri), txtFile.getName());
-        fs.moveFromLocalFile(new Path(txtFile.getAbsolutePath()), path);
-        return path;
-    }
-
-    @Test
-    public void emptyFile() throws Throwable {
-        File tmp = File.createTempFile("test-", "." + getFileExtension());
-        Path path = new Path(new Path(fsUri), tmp.getName());
-        fs.moveFromLocalFile(new Path(tmp.getAbsolutePath()), path);
-        getReader(fs, path, readerConfig);
-    }
-
-    @Test
-    public void readEmptyFile() throws Throwable {
-        File tmp = File.createTempFile("test-", "." + getFileExtension());
-        Path path = new Path(new Path(fsUri), tmp.getName());
-        fs.moveFromLocalFile(new Path(tmp.getAbsolutePath()), path);
-        FileReader reader = getReader(fs, path, readerConfig);
-        assertFalse(reader.hasNext());
-    }
-
-    @Test
-    public void validFileEncoding() throws Throwable {
-        Map<String, Object> cfg = new HashMap<String, Object>() {{
-            put(JsonFileReader.FILE_READER_JSON_ENCODING, "Cp1252");
-            put(AgnosticFileReader.FILE_READER_AGNOSTIC_EXTENSIONS_JSON, getFileExtension());
-        }};
-        reader = getReader(fs, dataFile, cfg);
-        readAllData();
-    }
-
-    @Test
-    public void invalidFileEncoding() {
-        Map<String, Object> cfg = new HashMap<String, Object>() {{
-            put(JsonFileReader.FILE_READER_JSON_ENCODING, "invalid_charset");
-            put(AgnosticFileReader.FILE_READER_AGNOSTIC_EXTENSIONS_JSON, getFileExtension());
-        }};
-        assertThrows(UnsupportedCharsetException.class, () -> getReader(fs, dataFile, cfg));
-    }
-
-    @Test
-    public void readDataWithRecordPerLineDisabled() throws Throwable {
-        Path file = createDataFile(1, false);
-        FileReader reader = getReader(fs, file, new HashMap<String, Object>() {{
-            put(JsonFileReader.FILE_READER_JSON_RECORD_PER_LINE, "false");
-            put(AgnosticFileReader.FILE_READER_AGNOSTIC_EXTENSIONS_JSON, getFileExtension());
-        }});
-
-        assertTrue(reader.hasNext());
-
-        int recordCount = 0;
-        while (reader.hasNext()) {
-            Struct record = reader.next();
-            checkData(record, recordCount);
-            recordCount++;
-        }
-        reader.close();
-        assertEquals(1, recordCount, () -> "The number of records in the file does not match");
-    }
-
-    @Override
-    protected Offset getOffset(long offset) {
-        return () -> offset;
-    }
-
-    @Override
-    protected void checkData(Struct record, long index) {
-        Struct subrecord = record.getStruct(FIELD_STRUCT);
-        assertAll(
-                () -> assertEquals((int) (Integer) record.get(FIELD_INTEGER), index),
-                () -> assertEquals((long) (Long) record.get(FIELD_LONG), Long.MAX_VALUE),
-                () -> assertTrue(record.get(FIELD_STRING).toString().startsWith(index + "_")),
-                () -> assertTrue(Boolean.parseBoolean(record.get(FIELD_BOOLEAN).toString())),
-                () -> assertEquals((Double) record.get(FIELD_DECIMAL), Double.parseDouble(index + "." + index), 0),
-                () -> assertNull(record.get(FIELD_NULL)),
-                () -> assertNotNull(record.schema().field(FIELD_NULL)),
-                () -> assertEquals(record.get(FIELD_ARRAY), Arrays.asList("elm[" + index + "]", "elm[" + index + "]")),
-                () -> assertEquals((int) (Integer) subrecord.get(FIELD_INTEGER), index),
-                () -> assertEquals((long) (Long) subrecord.get(FIELD_LONG), Long.MAX_VALUE),
-                () -> assertTrue(subrecord.get(FIELD_STRING).toString().startsWith(index + "_")),
-                () -> assertTrue(Boolean.parseBoolean(subrecord.get(FIELD_BOOLEAN).toString())),
-                () -> assertEquals((Double) subrecord.get(FIELD_DECIMAL), Double.parseDouble(index + "." + index), 0),
-                () -> assertNull(subrecord.get(FIELD_NULL)),
-                () -> assertNotNull(subrecord.schema().field(FIELD_NULL))
-        );
-
-    }
-
-    @Override
-    protected String getFileExtension() {
-        return FILE_EXTENSION;
-    }
-}
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/LocalFileReaderTestBase.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/LocalFileReaderTestBase.java
deleted file mode 100644
index f08bff7..0000000
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/LocalFileReaderTestBase.java
+++ /dev/null
@@ -1,29 +0,0 @@
-package com.github.mmolimar.kafka.connect.fs.file.reader.local;
-
-import com.github.mmolimar.kafka.connect.fs.file.reader.FileReaderTestBase;
-import org.apache.commons.io.FileUtils;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.junit.jupiter.api.AfterAll;
-import org.junit.jupiter.api.BeforeAll;
-
-import java.io.IOException;
-import java.nio.file.Files;
-import java.nio.file.Path;
-
-public abstract class LocalFileReaderTestBase extends FileReaderTestBase {
-
-    private static Path localDir;
-
-    @BeforeAll
-    public static void initFs() throws IOException {
-        localDir = Files.createTempDirectory("test-");
-        fsUri = localDir.toUri();
-        fs = FileSystem.newInstance(fsUri, new Configuration());
-    }
-
-    @AfterAll
-    public static void finishFs() throws IOException {
-        FileUtils.deleteDirectory(localDir.toFile());
-    }
-}
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/ParquetFileReaderTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/ParquetFileReaderTest.java
deleted file mode 100644
index 41060c6..0000000
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/ParquetFileReaderTest.java
+++ /dev/null
@@ -1,178 +0,0 @@
-package com.github.mmolimar.kafka.connect.fs.file.reader.local;
-
-import com.github.mmolimar.kafka.connect.fs.file.Offset;
-import com.github.mmolimar.kafka.connect.fs.file.reader.AgnosticFileReader;
-import com.github.mmolimar.kafka.connect.fs.file.reader.ParquetFileReader;
-import org.apache.avro.AvroRuntimeException;
-import org.apache.avro.Schema;
-import org.apache.avro.SchemaBuilder;
-import org.apache.avro.SchemaParseException;
-import org.apache.avro.generic.GenericData;
-import org.apache.avro.generic.GenericRecord;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.kafka.connect.data.Struct;
-import org.apache.kafka.connect.errors.DataException;
-import org.apache.parquet.avro.AvroParquetWriter;
-import org.apache.parquet.hadoop.ParquetFileWriter;
-import org.apache.parquet.hadoop.ParquetWriter;
-import org.apache.parquet.io.InvalidRecordException;
-import org.junit.jupiter.api.BeforeAll;
-import org.junit.jupiter.api.Test;
-
-import java.io.BufferedWriter;
-import java.io.File;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.util.HashMap;
-import java.util.Map;
-import java.util.UUID;
-import java.util.stream.IntStream;
-
-import static org.junit.jupiter.api.Assertions.*;
-
-public class ParquetFileReaderTest extends LocalFileReaderTestBase {
-
-    private static final String FIELD_INDEX = "index";
-    private static final String FIELD_NAME = "name";
-    private static final String FIELD_SURNAME = "surname";
-    private static final String FILE_EXTENSION = "prqt";
-
-    private static Schema readerSchema;
-    private static Schema projectionSchema;
-
-    @BeforeAll
-    public static void setUp() throws IOException {
-        readerClass = AgnosticFileReader.class;
-        dataFile = createDataFile();
-        readerConfig = new HashMap<String, Object>() {{
-            put(AgnosticFileReader.FILE_READER_AGNOSTIC_EXTENSIONS_PARQUET, FILE_EXTENSION);
-        }};
-    }
-
-    private static Path createDataFile() throws IOException {
-        File parquetFile = File.createTempFile("test-", "." + FILE_EXTENSION);
-        readerSchema = new Schema.Parser().parse(
-                ParquetFileReaderTest.class.getResourceAsStream("/file/reader/schemas/people.avsc"));
-        projectionSchema = new Schema.Parser().parse(
-                ParquetFileReaderTest.class.getResourceAsStream("/file/reader/schemas/people_projection.avsc"));
-
-        try (ParquetWriter writer = AvroParquetWriter.<GenericRecord>builder(new Path(parquetFile.toURI()))
-                .withConf(fs.getConf()).withWriteMode(ParquetFileWriter.Mode.OVERWRITE).withSchema(readerSchema).build()) {
-            IntStream.range(0, NUM_RECORDS).forEach(index -> {
-                GenericRecord datum = new GenericData.Record(readerSchema);
-                datum.put(FIELD_INDEX, index);
-                datum.put(FIELD_NAME, String.format("%d_name_%s", index, UUID.randomUUID()));
-                datum.put(FIELD_SURNAME, String.format("%d_surname_%s", index, UUID.randomUUID()));
-                try {
-                    OFFSETS_BY_INDEX.put(index, (long) index);
-                    writer.write(datum);
-                } catch (IOException ioe) {
-                    throw new RuntimeException(ioe);
-                }
-            });
-        }
-        Path path = new Path(new Path(fsUri), parquetFile.getName());
-        fs.moveFromLocalFile(new Path(parquetFile.getAbsolutePath()), path);
-        return path;
-    }
-
-    @Test
-    public void emptyFile() throws Throwable {
-        File tmp = File.createTempFile("test-", "." + getFileExtension());
-        Path path = new Path(new Path(fsUri), tmp.getName());
-        fs.moveFromLocalFile(new Path(tmp.getAbsolutePath()), path);
-        getReader(fs, path, readerConfig);
-    }
-
-    @Test
-    public void invalidFileFormat() throws Throwable {
-        File tmp = File.createTempFile("test-", "." + getFileExtension());
-        try (BufferedWriter writer = new BufferedWriter(new FileWriter(tmp))) {
-            writer.write("test");
-        }
-        Path path = new Path(new Path(fsUri), tmp.getName());
-        fs.moveFromLocalFile(new Path(tmp.getAbsolutePath()), path);
-        getReader(fs, path, readerConfig);
-    }
-
-    @Test
-    public void readerWithSchema() throws Throwable {
-        Map<String, Object> cfg = new HashMap<String, Object>() {{
-            put(ParquetFileReader.FILE_READER_PARQUET_SCHEMA, readerSchema.toString());
-            put(AgnosticFileReader.FILE_READER_AGNOSTIC_EXTENSIONS_PARQUET, getFileExtension());
-        }};
-        reader = getReader(FileSystem.newInstance(fsUri, new Configuration()), dataFile, cfg);
-        readAllData();
-    }
-
-    @Test
-    public void readerWithProjection() throws Throwable {
-        Map<String, Object> cfg = new HashMap<String, Object>() {{
-            put(ParquetFileReader.FILE_READER_PARQUET_PROJECTION, projectionSchema.toString());
-            put(AgnosticFileReader.FILE_READER_AGNOSTIC_EXTENSIONS_PARQUET, getFileExtension());
-        }};
-        reader = getReader(FileSystem.newInstance(fsUri, new Configuration()), dataFile, cfg);
-        while (reader.hasNext()) {
-            Struct record = reader.next();
-            assertNotNull(record.schema().field(FIELD_INDEX));
-            assertNotNull(record.schema().field(FIELD_NAME));
-            assertNull(record.schema().field(FIELD_SURNAME));
-        }
-
-        reader = getReader(FileSystem.newInstance(fsUri, new Configuration()), dataFile, cfg);
-        assertThrows(DataException.class, this::readAllData);
-    }
-
-    @Test
-    public void readerWithInvalidProjection() throws Throwable {
-        Schema testSchema = SchemaBuilder.record("test_projection").namespace("test.avro")
-                .fields()
-                .name("field1").type("string").noDefault()
-                .endRecord();
-        Map<String, Object> cfg = new HashMap<String, Object>() {{
-            put(ParquetFileReader.FILE_READER_PARQUET_PROJECTION, testSchema.toString());
-            put(AgnosticFileReader.FILE_READER_AGNOSTIC_EXTENSIONS_PARQUET, getFileExtension());
-        }};
-        reader = getReader(FileSystem.newInstance(fsUri, new Configuration()), dataFile, cfg);
-        assertThrows(InvalidRecordException.class, this::readAllData);
-    }
-
-    @Test
-    public void readerWithInvalidSchema() throws Throwable {
-        Map<String, Object> cfg = new HashMap<String, Object>() {{
-            put(ParquetFileReader.FILE_READER_PARQUET_SCHEMA, Schema.create(Schema.Type.STRING).toString());
-            put(AgnosticFileReader.FILE_READER_AGNOSTIC_EXTENSIONS_PARQUET, getFileExtension());
-        }};
-        reader = getReader(FileSystem.newInstance(fsUri, new Configuration()), dataFile, cfg);
-        assertThrows(AvroRuntimeException.class, this::readAllData);
-    }
-
-    @Test
-    public void readerWithUnparseableSchema() {
-        Map<String, Object> cfg = new HashMap<String, Object>() {{
-            put(ParquetFileReader.FILE_READER_PARQUET_SCHEMA, "invalid schema");
-            put(AgnosticFileReader.FILE_READER_AGNOSTIC_EXTENSIONS_PARQUET, getFileExtension());
-        }};
-        assertThrows(SchemaParseException.class, () ->
-                getReader(FileSystem.newInstance(fsUri, new Configuration()), dataFile, cfg));
-    }
-
-    @Override
-    protected Offset getOffset(long offset) {
-        return new ParquetFileReader.ParquetOffset(offset);
-    }
-
-    @Override
-    protected void checkData(Struct record, long index) {
-        assertEquals((int) (Integer) record.get(FIELD_INDEX), index);
-        assertTrue(record.get(FIELD_NAME).toString().startsWith(index + "_"));
-        assertTrue(record.get(FIELD_SURNAME).toString().startsWith(index + "_"));
-    }
-
-    @Override
-    protected String getFileExtension() {
-        return FILE_EXTENSION;
-    }
-}
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/TextFileReaderTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/TextFileReaderTest.java
deleted file mode 100644
index fd80931..0000000
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/local/TextFileReaderTest.java
+++ /dev/null
@@ -1,155 +0,0 @@
-package com.github.mmolimar.kafka.connect.fs.file.reader.local;
-
-import com.github.mmolimar.kafka.connect.fs.file.Offset;
-import com.github.mmolimar.kafka.connect.fs.file.reader.AgnosticFileReader;
-import com.github.mmolimar.kafka.connect.fs.file.reader.CompressionType;
-import com.github.mmolimar.kafka.connect.fs.file.reader.FileReader;
-import com.github.mmolimar.kafka.connect.fs.file.reader.TextFileReader;
-import org.apache.commons.compress.compressors.bzip2.BZip2CompressorOutputStream;
-import org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream;
-import org.apache.hadoop.fs.Path;
-import org.apache.kafka.connect.data.Struct;
-import org.junit.jupiter.api.BeforeAll;
-import org.junit.jupiter.api.Test;
-
-import java.io.*;
-import java.nio.charset.UnsupportedCharsetException;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.Map;
-import java.util.UUID;
-import java.util.stream.IntStream;
-
-import static org.junit.jupiter.api.Assertions.*;
-
-public class TextFileReaderTest extends LocalFileReaderTestBase {
-
-    private static final String FIELD_NAME_VALUE = "custom_field_name";
-    private static final String FILE_EXTENSION = "txt";
-    private static final CompressionType COMPRESSION_TYPE = CompressionType.GZIP;
-
-    @BeforeAll
-    public static void setUp() throws IOException {
-        readerClass = AgnosticFileReader.class;
-        dataFile = createDataFile(COMPRESSION_TYPE);
-        readerConfig = new HashMap<String, Object>() {{
-            put(TextFileReader.FILE_READER_TEXT_FIELD_NAME_VALUE, FIELD_NAME_VALUE);
-            put(TextFileReader.FILE_READER_TEXT_COMPRESSION_TYPE, COMPRESSION_TYPE);
-            put(TextFileReader.FILE_READER_TEXT_COMPRESSION_CONCATENATED, "true");
-        }};
-    }
-
-    private static OutputStream getOutputStream(File file, CompressionType compression) throws IOException {
-        final OutputStream os;
-        switch (compression) {
-            case BZIP2:
-                os = new BZip2CompressorOutputStream(new FileOutputStream(file));
-                break;
-            case GZIP:
-                os = new GzipCompressorOutputStream(new FileOutputStream(file));
-                break;
-            default:
-                os = new FileOutputStream(file);
-                break;
-        }
-        return os;
-    }
-
-
-    private static Path createDataFile(CompressionType compression) throws IOException {
-        File txtFile = File.createTempFile("test-", "." + FILE_EXTENSION);
-        try (PrintWriter writer = new PrintWriter(getOutputStream(txtFile, compression))) {
-            IntStream.range(0, NUM_RECORDS).forEach(index -> {
-                String value = String.format("%d_%s", index, UUID.randomUUID());
-                writer.append(value + "\n");
-                OFFSETS_BY_INDEX.put(index, (long) index);
-            });
-        }
-        Path path = new Path(new Path(fsUri), txtFile.getName());
-        fs.moveFromLocalFile(new Path(txtFile.getAbsolutePath()), path);
-        return path;
-    }
-
-    @Test
-    public void validFileEncoding() throws Throwable {
-        Map<String, Object> cfg = new HashMap<String, Object>() {{
-            put(TextFileReader.FILE_READER_TEXT_FIELD_NAME_VALUE, FIELD_NAME_VALUE);
-            put(TextFileReader.FILE_READER_TEXT_ENCODING, "Cp1252");
-            put(TextFileReader.FILE_READER_TEXT_COMPRESSION_TYPE, COMPRESSION_TYPE);
-        }};
-        reader = getReader(fs, dataFile, cfg);
-        readAllData();
-    }
-
-    @Test
-    public void invalidFileEncoding() {
-        Map<String, Object> cfg = new HashMap<String, Object>() {{
-            put(TextFileReader.FILE_READER_TEXT_FIELD_NAME_VALUE, FIELD_NAME_VALUE);
-            put(TextFileReader.FILE_READER_TEXT_ENCODING, "invalid_charset");
-            put(TextFileReader.FILE_READER_TEXT_COMPRESSION_TYPE, COMPRESSION_TYPE);
-        }};
-        assertThrows(UnsupportedCharsetException.class, () -> getReader(fs, dataFile, cfg));
-    }
-
-    @Test
-    public void readDataWithRecordPerLineDisabled() throws Throwable {
-        Path file = createDataFile(COMPRESSION_TYPE);
-        FileReader reader = getReader(fs, file, new HashMap<String, Object>() {{
-            put(TextFileReader.FILE_READER_TEXT_FIELD_NAME_VALUE, FIELD_NAME_VALUE);
-            put(TextFileReader.FILE_READER_TEXT_RECORD_PER_LINE, "false");
-            put(TextFileReader.FILE_READER_TEXT_COMPRESSION_TYPE, COMPRESSION_TYPE);
-        }});
-
-        assertTrue(reader.hasNext());
-
-        int recordCount = 0;
-        while (reader.hasNext()) {
-            Struct record = reader.next();
-            checkData(record, recordCount);
-            recordCount++;
-        }
-        reader.close();
-        assertEquals(1, recordCount, () -> "The number of records in the file does not match");
-    }
-
-    @Test
-    public void readDifferentCompressionTypes() {
-        Arrays.stream(CompressionType.values()).forEach(compressionType -> {
-            try {
-                Path file = createDataFile(compressionType);
-                FileReader reader = getReader(fs, file, new HashMap<String, Object>() {{
-                    put(TextFileReader.FILE_READER_TEXT_FIELD_NAME_VALUE, FIELD_NAME_VALUE);
-                    put(TextFileReader.FILE_READER_TEXT_COMPRESSION_TYPE, compressionType);
-                }});
-
-                assertTrue(reader.hasNext());
-
-                int recordCount = 0;
-                while (reader.hasNext()) {
-                    Struct record = reader.next();
-                    checkData(record, recordCount);
-                    recordCount++;
-                }
-                reader.close();
-                assertEquals(NUM_RECORDS, recordCount, () -> "The number of records in the file does not match");
-            } catch (Throwable e) {
-                throw new RuntimeException(e);
-            }
-        });
-    }
-
-    @Override
-    protected Offset getOffset(long offset) {
-        return new TextFileReader.TextOffset(offset);
-    }
-
-    @Override
-    protected void checkData(Struct record, long index) {
-        assertTrue(record.get(FIELD_NAME_VALUE).toString().startsWith(index + "_"));
-    }
-
-    @Override
-    protected String getFileExtension() {
-        return FILE_EXTENSION;
-    }
-}

From e4427c5836217374452b3f7d67c05adfdf37a43f Mon Sep 17 00:00:00 2001
From: Mario Molina <mmolimar@gmail.com>
Date: Fri, 20 Mar 2020 17:48:57 -0600
Subject: [PATCH 23/51] New Univocity readers for TSV and CSV text files

---
 pom.xml                                       |  8 +++-
 .../fs/file/reader/AgnosticFileReader.java    | 21 +++++----
 .../connect/fs/file/reader/CsvFileReader.java | 43 +++++++++++++++++++
 .../connect/fs/file/reader/TsvFileReader.java | 37 ++++++++++++++++
 4 files changed, 100 insertions(+), 9 deletions(-)
 create mode 100644 src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/CsvFileReader.java
 create mode 100644 src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/TsvFileReader.java

diff --git a/pom.xml b/pom.xml
index 11a210e..b575f0e 100644
--- a/pom.xml
+++ b/pom.xml
@@ -16,6 +16,7 @@
         <hadoop.version>3.2.1</hadoop.version>
         <parquet.version>1.11.0</parquet.version>
         <avro.version>1.9.2</avro.version>
+        <univocity.version>2.8.4</univocity.version>
         <fasterxml-jackson.version>2.10.2</fasterxml-jackson.version>
         <cron-utils.version>9.0.2</cron-utils.version>
         <junit-jupiter.version>5.6.0</junit-jupiter.version>
@@ -75,6 +76,11 @@
             <artifactId>jackson-core</artifactId>
             <version>${fasterxml-jackson.version}</version>
         </dependency>
+        <dependency>
+            <groupId>com.univocity</groupId>
+            <artifactId>univocity-parsers</artifactId>
+            <version>${univocity.version}</version>
+        </dependency>
         <dependency>
             <groupId>com.cronutils</groupId>
             <artifactId>cron-utils</artifactId>
@@ -84,7 +90,7 @@
         <!-- test dependencies -->
         <dependency>
             <groupId>org.junit.jupiter</groupId>
-            <artifactId>junit-jupiter-api</artifactId>
+            <artifactId>junit-jupiter</artifactId>
             <version>${junit-jupiter.version}</version>
             <scope>test</scope>
         </dependency>
diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/AgnosticFileReader.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/AgnosticFileReader.java
index 30a6371..a096f5c 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/AgnosticFileReader.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/AgnosticFileReader.java
@@ -17,14 +17,18 @@ public class AgnosticFileReader extends AbstractFileReader<AgnosticFileReader.Ag
 
     private static final String FILE_READER_AGNOSTIC = FILE_READER_PREFIX + "agnostic.";
     private static final String FILE_READER_AGNOSTIC_EXTENSIONS = FILE_READER_AGNOSTIC + "extensions.";
+
     public static final String FILE_READER_AGNOSTIC_EXTENSIONS_PARQUET = FILE_READER_AGNOSTIC_EXTENSIONS + "parquet";
     public static final String FILE_READER_AGNOSTIC_EXTENSIONS_AVRO = FILE_READER_AGNOSTIC_EXTENSIONS + "avro";
     public static final String FILE_READER_AGNOSTIC_EXTENSIONS_JSON = FILE_READER_AGNOSTIC_EXTENSIONS + "json";
     public static final String FILE_READER_AGNOSTIC_EXTENSIONS_SEQUENCE = FILE_READER_AGNOSTIC_EXTENSIONS + "sequence";
-    public static final String FILE_READER_AGNOSTIC_EXTENSIONS_DELIMITED = FILE_READER_AGNOSTIC_EXTENSIONS + "delimited";
+    public static final String FILE_READER_AGNOSTIC_EXTENSIONS_TEXT = FILE_READER_AGNOSTIC_EXTENSIONS + "text";
+    public static final String FILE_READER_AGNOSTIC_EXTENSIONS_CSV = FILE_READER_AGNOSTIC_EXTENSIONS + "csv";
+    public static final String FILE_READER_AGNOSTIC_EXTENSIONS_TSV = FILE_READER_AGNOSTIC_EXTENSIONS + "tsv";
 
     private final AbstractFileReader<Object> reader;
-    private List<String> parquetExtensions, avroExtensions, sequenceExtensions, jsonExtensions, delimitedExtensions;
+    private List<String> parquetExtensions, avroExtensions, sequenceExtensions,
+            jsonExtensions, csvExtensions, tsvExtensions;
 
     public AgnosticFileReader(FileSystem fs, Path filePath, Map<String, Object> config) throws IOException {
         super(fs, filePath, new AgnosticAdapter(), config);
@@ -53,8 +57,10 @@ private AbstractFileReader<Object> readerByExtension(FileSystem fs, Path filePat
             clz = SequenceFileReader.class;
         } else if (jsonExtensions.contains(extension)) {
             clz = JsonFileReader.class;
-        } else if (delimitedExtensions.contains(extension)) {
-            clz = DelimitedTextFileReader.class;
+        } else if (csvExtensions.contains(extension)) {
+            clz = CsvFileReader.class;
+        } else if (tsvExtensions.contains(extension)) {
+            clz = TsvFileReader.class;
         } else {
             clz = TextFileReader.class;
         }
@@ -72,7 +78,9 @@ protected void configure(Map<String, String> config) {
                 .toLowerCase().split(","));
         this.jsonExtensions = Arrays.asList(config.getOrDefault(FILE_READER_AGNOSTIC_EXTENSIONS_JSON, "json")
                 .toLowerCase().split(","));
-        this.delimitedExtensions = Arrays.asList(config.getOrDefault(FILE_READER_AGNOSTIC_EXTENSIONS_DELIMITED, "tsv,csv")
+        this.csvExtensions = Arrays.asList(config.getOrDefault(FILE_READER_AGNOSTIC_EXTENSIONS_CSV, "csv")
+                .toLowerCase().split(","));
+        this.tsvExtensions = Arrays.asList(config.getOrDefault(FILE_READER_AGNOSTIC_EXTENSIONS_TSV, "tsv")
                 .toLowerCase().split(","));
     }
 
@@ -103,9 +111,6 @@ protected AgnosticRecord nextRecord() {
 
     static class AgnosticAdapter implements ReaderAdapter<AgnosticRecord> {
 
-        AgnosticAdapter() {
-        }
-
         @Override
         public Struct apply(AgnosticRecord ag) {
             return ag.adapter.apply(ag.record);
diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/CsvFileReader.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/CsvFileReader.java
new file mode 100644
index 0000000..9442e54
--- /dev/null
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/CsvFileReader.java
@@ -0,0 +1,43 @@
+package com.github.mmolimar.kafka.connect.fs.file.reader;
+
+import com.univocity.parsers.common.AbstractParser;
+import com.univocity.parsers.csv.CsvParser;
+import com.univocity.parsers.csv.CsvParserSettings;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+
+import java.io.IOException;
+import java.util.Map;
+
+public class CsvFileReader extends UnivocityFileReader<CsvParserSettings> {
+
+    public static final String FILE_READER_DELIMITED_SETTINGS_DELIMITER_DETECTION = FILE_READER_DELIMITED_SETTINGS + "delimiter_detection";
+    public static final String FILE_READER_DELIMITED_SETTINGS_EMPTY_VALUE = FILE_READER_DELIMITED_SETTINGS + "empty_value";
+    public static final String FILE_READER_DELIMITED_SETTINGS_ESCAPE_UNQUOTED = FILE_READER_DELIMITED_SETTINGS + "escape_unquoted";
+    public static final String FILE_READER_DELIMITED_SETTINGS_FORMAT_DELIMITER = FILE_READER_DELIMITED_SETTINGS_FORMAT + "delimiter";
+
+    public static final String FILE_READER_DELIMITED_SETTINGS_FORMAT_QUOTE = FILE_READER_DELIMITED_SETTINGS_FORMAT + "quote";
+    public static final String FILE_READER_DELIMITED_SETTINGS_FORMAT_QUOTE_ESCAPE = FILE_READER_DELIMITED_SETTINGS_FORMAT + "quote_scape";
+
+    public CsvFileReader(FileSystem fs, Path filePath, Map<String, Object> config) throws IOException {
+        super(fs, filePath, config);
+    }
+
+    @Override
+    protected CsvParserSettings parserSettings(Map<String, String> config) {
+        CsvParserSettings settings = new CsvParserSettings();
+        settings.setDelimiterDetectionEnabled(getBoolean(config, FILE_READER_DELIMITED_SETTINGS_DELIMITER_DETECTION, false));
+        settings.setEmptyValue(config.get(FILE_READER_DELIMITED_SETTINGS_EMPTY_VALUE));
+        settings.setEscapeUnquotedValues(getBoolean(config, FILE_READER_DELIMITED_SETTINGS_ESCAPE_UNQUOTED, false));
+        settings.getFormat().setDelimiter(config.getOrDefault(FILE_READER_DELIMITED_SETTINGS_FORMAT_DELIMITER, ","));
+        settings.getFormat().setQuote(config.getOrDefault(FILE_READER_DELIMITED_SETTINGS_FORMAT_QUOTE, "\"").charAt(0));
+        settings.getFormat().setQuoteEscape(config.getOrDefault(FILE_READER_DELIMITED_SETTINGS_FORMAT_QUOTE_ESCAPE, "\"").charAt(0));
+
+        return settings;
+    }
+
+    @Override
+    protected AbstractParser<CsvParserSettings> createParser(CsvParserSettings settings) {
+        return new CsvParser(settings);
+    }
+}
diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/TsvFileReader.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/TsvFileReader.java
new file mode 100644
index 0000000..f626a8e
--- /dev/null
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/TsvFileReader.java
@@ -0,0 +1,37 @@
+package com.github.mmolimar.kafka.connect.fs.file.reader;
+
+import com.univocity.parsers.common.AbstractParser;
+import com.univocity.parsers.tsv.TsvParser;
+import com.univocity.parsers.tsv.TsvParserSettings;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+
+import java.io.IOException;
+import java.util.Map;
+
+public class TsvFileReader extends UnivocityFileReader<TsvParserSettings> {
+
+    public static final String FILE_READER_DELIMITED_SETTINGS_LINE_JOINING = FILE_READER_DELIMITED_SETTINGS + "line_joining";
+
+    public static final String FILE_READER_DELIMITED_SETTINGS_FORMAT_ESCAPE = FILE_READER_DELIMITED_SETTINGS_FORMAT + "escape";
+    public static final String FILE_READER_DELIMITED_SETTINGS_FORMAT_ESCAPED_CHAR = FILE_READER_DELIMITED_SETTINGS_FORMAT + "escaped_char";
+
+    public TsvFileReader(FileSystem fs, Path filePath, Map<String, Object> config) throws IOException {
+        super(fs, filePath, config);
+    }
+
+    @Override
+    protected TsvParserSettings parserSettings(Map<String, String> config) {
+        TsvParserSettings settings = new TsvParserSettings();
+        settings.setLineJoiningEnabled(getBoolean(config, FILE_READER_DELIMITED_SETTINGS_LINE_JOINING, false));
+        settings.getFormat().setEscapeChar(config.getOrDefault(FILE_READER_DELIMITED_SETTINGS_FORMAT_ESCAPE, "\"").charAt(0));
+        settings.getFormat().setEscapedTabChar(config.getOrDefault(FILE_READER_DELIMITED_SETTINGS_FORMAT_ESCAPED_CHAR, "\"").charAt(0));
+
+        return settings;
+    }
+
+    @Override
+    protected AbstractParser<TsvParserSettings> createParser(TsvParserSettings settings) {
+        return new TsvParser(settings);
+    }
+}

From 5007c745c96f7cb507ae7cfc68b8e4b54a6f684a Mon Sep 17 00:00:00 2001
From: Mario Molina <mmolimar@gmail.com>
Date: Fri, 20 Mar 2020 18:26:57 -0600
Subject: [PATCH 24/51] Upgrade Kafka and Confluent versions

---
 pom.xml | 23 ++---------------------
 1 file changed, 2 insertions(+), 21 deletions(-)

diff --git a/pom.xml b/pom.xml
index b575f0e..fcdad09 100644
--- a/pom.xml
+++ b/pom.xml
@@ -11,13 +11,11 @@
 
     <properties>
         <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
-        <kafka.version>2.4.0</kafka.version>
-        <confluent.version>5.4.0</confluent.version>
+        <kafka.version>2.4.1</kafka.version>
+        <confluent.version>5.4.1</confluent.version>
         <hadoop.version>3.2.1</hadoop.version>
         <parquet.version>1.11.0</parquet.version>
-        <avro.version>1.9.2</avro.version>
         <univocity.version>2.8.4</univocity.version>
-        <fasterxml-jackson.version>2.10.2</fasterxml-jackson.version>
         <cron-utils.version>9.0.2</cron-utils.version>
         <junit-jupiter.version>5.6.0</junit-jupiter.version>
         <easymock.version>4.2</easymock.version>
@@ -43,7 +41,6 @@
             <groupId>io.confluent</groupId>
             <artifactId>kafka-connect-avro-converter</artifactId>
             <version>${confluent.version}</version>
-            <scope>provided</scope>
         </dependency>
         <dependency>
             <groupId>org.apache.hadoop</groupId>
@@ -60,22 +57,6 @@
             <artifactId>parquet-avro</artifactId>
             <version>${parquet.version}</version>
         </dependency>
-        <dependency>
-            <groupId>org.apache.avro</groupId>
-            <artifactId>avro</artifactId>
-            <version>${avro.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>org.apache.avro</groupId>
-            <artifactId>avro-tools</artifactId>
-            <version>${avro.version}</version>
-            <classifier>nodeps</classifier>
-        </dependency>
-        <dependency>
-            <groupId>com.fasterxml.jackson.core</groupId>
-            <artifactId>jackson-core</artifactId>
-            <version>${fasterxml-jackson.version}</version>
-        </dependency>
         <dependency>
             <groupId>com.univocity</groupId>
             <artifactId>univocity-parsers</artifactId>

From 55b7a7308d1e7665dc10398ca26cac77548ec60b Mon Sep 17 00:00:00 2001
From: Mario Molina <mmolimar@gmail.com>
Date: Fri, 20 Mar 2020 20:16:30 -0600
Subject: [PATCH 25/51] Tests for CSV and TSV file readers

---
 .../fs/file/reader/AgnosticFileReader.java    |   4 +-
 .../connect/fs/file/reader/CsvFileReader.java |   8 +-
 .../fs/file/reader/JsonFileReader.java        |   9 +-
 .../fs/file/reader/UnivocityFileReader.java   | 236 ++++++++++++++++++
 .../file/reader/AgnosticFileReaderTest.java   | 158 ++++++++++++
 .../fs/file/reader/AvroFileReaderTest.java    |   2 +-
 .../fs/file/reader/CsvFileReaderTest.java     |  84 +++++++
 .../fs/file/reader/FileReaderTestBase.java    |  16 +-
 .../fs/file/reader/FileSystemConfig.java      |   8 +-
 .../fs/file/reader/JsonFileReaderTest.java    |  16 +-
 .../fs/file/reader/ParquetFileReaderTest.java |   2 +-
 .../file/reader/SequenceFileReaderTest.java   |   2 +-
 .../fs/file/reader/TextFileReaderTest.java    |   2 +-
 .../fs/file/reader/TsvFileReaderTest.java     |  41 +++
 .../file/reader/UnivocityFileReaderTest.java  | 187 ++++++++++++++
 15 files changed, 745 insertions(+), 30 deletions(-)
 create mode 100644 src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/UnivocityFileReader.java
 create mode 100644 src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/AgnosticFileReaderTest.java
 create mode 100644 src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/CsvFileReaderTest.java
 create mode 100644 src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/TsvFileReaderTest.java
 create mode 100644 src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/UnivocityFileReaderTest.java

diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/AgnosticFileReader.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/AgnosticFileReader.java
index a096f5c..9ee8665 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/AgnosticFileReader.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/AgnosticFileReader.java
@@ -20,11 +20,11 @@ public class AgnosticFileReader extends AbstractFileReader<AgnosticFileReader.Ag
 
     public static final String FILE_READER_AGNOSTIC_EXTENSIONS_PARQUET = FILE_READER_AGNOSTIC_EXTENSIONS + "parquet";
     public static final String FILE_READER_AGNOSTIC_EXTENSIONS_AVRO = FILE_READER_AGNOSTIC_EXTENSIONS + "avro";
-    public static final String FILE_READER_AGNOSTIC_EXTENSIONS_JSON = FILE_READER_AGNOSTIC_EXTENSIONS + "json";
     public static final String FILE_READER_AGNOSTIC_EXTENSIONS_SEQUENCE = FILE_READER_AGNOSTIC_EXTENSIONS + "sequence";
-    public static final String FILE_READER_AGNOSTIC_EXTENSIONS_TEXT = FILE_READER_AGNOSTIC_EXTENSIONS + "text";
+    public static final String FILE_READER_AGNOSTIC_EXTENSIONS_JSON = FILE_READER_AGNOSTIC_EXTENSIONS + "json";
     public static final String FILE_READER_AGNOSTIC_EXTENSIONS_CSV = FILE_READER_AGNOSTIC_EXTENSIONS + "csv";
     public static final String FILE_READER_AGNOSTIC_EXTENSIONS_TSV = FILE_READER_AGNOSTIC_EXTENSIONS + "tsv";
+    public static final String FILE_READER_AGNOSTIC_EXTENSIONS_TEXT = FILE_READER_AGNOSTIC_EXTENSIONS + "text";
 
     private final AbstractFileReader<Object> reader;
     private List<String> parquetExtensions, avroExtensions, sequenceExtensions,
diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/CsvFileReader.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/CsvFileReader.java
index 9442e54..70388dc 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/CsvFileReader.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/CsvFileReader.java
@@ -11,13 +11,13 @@
 
 public class CsvFileReader extends UnivocityFileReader<CsvParserSettings> {
 
-    public static final String FILE_READER_DELIMITED_SETTINGS_DELIMITER_DETECTION = FILE_READER_DELIMITED_SETTINGS + "delimiter_detection";
     public static final String FILE_READER_DELIMITED_SETTINGS_EMPTY_VALUE = FILE_READER_DELIMITED_SETTINGS + "empty_value";
+    public static final String FILE_READER_DELIMITED_SETTINGS_DELIMITER_DETECTION = FILE_READER_DELIMITED_SETTINGS + "delimiter_detection";
     public static final String FILE_READER_DELIMITED_SETTINGS_ESCAPE_UNQUOTED = FILE_READER_DELIMITED_SETTINGS + "escape_unquoted";
-    public static final String FILE_READER_DELIMITED_SETTINGS_FORMAT_DELIMITER = FILE_READER_DELIMITED_SETTINGS_FORMAT + "delimiter";
 
+    public static final String FILE_READER_DELIMITED_SETTINGS_FORMAT_DELIMITER = FILE_READER_DELIMITED_SETTINGS_FORMAT + "delimiter";
     public static final String FILE_READER_DELIMITED_SETTINGS_FORMAT_QUOTE = FILE_READER_DELIMITED_SETTINGS_FORMAT + "quote";
-    public static final String FILE_READER_DELIMITED_SETTINGS_FORMAT_QUOTE_ESCAPE = FILE_READER_DELIMITED_SETTINGS_FORMAT + "quote_scape";
+    public static final String FILE_READER_DELIMITED_SETTINGS_FORMAT_QUOTE_ESCAPE = FILE_READER_DELIMITED_SETTINGS_FORMAT + "quote_escape";
 
     public CsvFileReader(FileSystem fs, Path filePath, Map<String, Object> config) throws IOException {
         super(fs, filePath, config);
@@ -26,8 +26,8 @@ public CsvFileReader(FileSystem fs, Path filePath, Map<String, Object> config) t
     @Override
     protected CsvParserSettings parserSettings(Map<String, String> config) {
         CsvParserSettings settings = new CsvParserSettings();
-        settings.setDelimiterDetectionEnabled(getBoolean(config, FILE_READER_DELIMITED_SETTINGS_DELIMITER_DETECTION, false));
         settings.setEmptyValue(config.get(FILE_READER_DELIMITED_SETTINGS_EMPTY_VALUE));
+        settings.setDelimiterDetectionEnabled(getBoolean(config, FILE_READER_DELIMITED_SETTINGS_DELIMITER_DETECTION, false));
         settings.setEscapeUnquotedValues(getBoolean(config, FILE_READER_DELIMITED_SETTINGS_ESCAPE_UNQUOTED, false));
         settings.getFormat().setDelimiter(config.getOrDefault(FILE_READER_DELIMITED_SETTINGS_FORMAT_DELIMITER, ","));
         settings.getFormat().setQuote(config.getOrDefault(FILE_READER_DELIMITED_SETTINGS_FORMAT_QUOTE, "\"").charAt(0));
diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/JsonFileReader.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/JsonFileReader.java
index cf26a34..76db116 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/JsonFileReader.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/JsonFileReader.java
@@ -25,11 +25,12 @@ public class JsonFileReader extends AbstractFileReader<JsonFileReader.JsonRecord
     private static final String FILE_READER_JSON = FILE_READER_PREFIX + "json.";
     private static final String FILE_READER_JSON_COMPRESSION = FILE_READER_JSON + "compression.";
 
-    public static final String FILE_READER_JSON_DESERIALIZATION_CONFIGS = FILE_READER_JSON + "deserialization.";
     public static final String FILE_READER_JSON_RECORD_PER_LINE = FILE_READER_JSON + "record_per_line";
-    public static final String FILE_READER_JSON_ENCODING = FILE_READER_JSON + "encoding";
+    public static final String FILE_READER_JSON_DESERIALIZATION_CONFIGS = FILE_READER_JSON + "deserialization.";
+
     public static final String FILE_READER_JSON_COMPRESSION_TYPE = FILE_READER_JSON_COMPRESSION + "type";
     public static final String FILE_READER_JSON_COMPRESSION_CONCATENATED = FILE_READER_JSON_COMPRESSION + "concatenated";
+    public static final String FILE_READER_JSON_ENCODING = FILE_READER_JSON + "encoding";
 
     private final TextFileReader inner;
     private final Schema schema;
@@ -79,8 +80,8 @@ protected JsonRecord nextRecord() {
         try {
             JsonNode value = mapper.readTree(inner.nextRecord().getValue());
             return new JsonRecord(schema, value);
-        } catch (JsonProcessingException jpe) {
-            throw new IllegalStateException(jpe);
+        } catch (IOException ioe) {
+            throw new IllegalStateException(ioe);
         }
     }
 
diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/UnivocityFileReader.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/UnivocityFileReader.java
new file mode 100644
index 0000000..caab986
--- /dev/null
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/UnivocityFileReader.java
@@ -0,0 +1,236 @@
+package com.github.mmolimar.kafka.connect.fs.file.reader;
+
+import com.github.mmolimar.kafka.connect.fs.file.Offset;
+import com.univocity.parsers.common.AbstractParser;
+import com.univocity.parsers.common.CommonParserSettings;
+import com.univocity.parsers.common.ParsingContext;
+import com.univocity.parsers.common.ResultIterator;
+import com.univocity.parsers.common.record.Record;
+import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
+import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.kafka.connect.data.Schema;
+import org.apache.kafka.connect.data.SchemaBuilder;
+import org.apache.kafka.connect.data.Struct;
+import org.apache.kafka.connect.errors.ConnectException;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.Reader;
+import java.nio.charset.Charset;
+import java.util.Map;
+import java.util.NoSuchElementException;
+import java.util.Optional;
+import java.util.stream.IntStream;
+
+import static com.github.mmolimar.kafka.connect.fs.FsSourceTaskConfig.FILE_READER_PREFIX;
+
+abstract class UnivocityFileReader<T extends CommonParserSettings<?>>
+        extends AbstractFileReader<UnivocityFileReader.UnivocityRecord> {
+
+    private static final String FILE_READER_DELIMITED = FILE_READER_PREFIX + "delimited.";
+    private static final String FILE_READER_COMPRESSION = FILE_READER_DELIMITED + "compression.";
+
+    protected static final String FILE_READER_DELIMITED_SETTINGS = FILE_READER_DELIMITED + "settings.";
+    protected static final String FILE_READER_DELIMITED_SETTINGS_FORMAT = FILE_READER_DELIMITED_SETTINGS + "format.";
+
+    public static final String FILE_READER_DELIMITED_SETTINGS_HEADER = FILE_READER_DELIMITED_SETTINGS + "header";
+    public static final String FILE_READER_DELIMITED_SETTINGS_LINE_SEPARATOR_DETECTION = FILE_READER_DELIMITED_SETTINGS + "line_separator_detection";
+    public static final String FILE_READER_DELIMITED_SETTINGS_NULL_VALUE = FILE_READER_DELIMITED_SETTINGS + "null_value";
+    public static final String FILE_READER_DELIMITED_SETTINGS_MAX_COLUMNS = FILE_READER_DELIMITED_SETTINGS + "max_columns";
+    public static final String FILE_READER_DELIMITED_SETTINGS_MAX_CHARS_PER_COLUMN = FILE_READER_DELIMITED_SETTINGS + "max_chars_per_column";
+    public static final String FILE_READER_DELIMITED_SETTINGS_ROWS_TO_SKIP = FILE_READER_DELIMITED_SETTINGS + "rows_to_skip";
+    public static final String FILE_READER_DELIMITED_SETTINGS_ILW = FILE_READER_DELIMITED_SETTINGS + "ignore_leading_whitespaces";
+    public static final String FILE_READER_DELIMITED_SETTINGS_ITW = FILE_READER_DELIMITED_SETTINGS + "ignore_trailing_whitespaces";
+
+    public static final String FILE_READER_DELIMITED_SETTINGS_FORMAT_LINE_SEP = FILE_READER_DELIMITED_SETTINGS_FORMAT + "line_separator";
+    public static final String FILE_READER_DELIMITED_SETTINGS_FORMAT_COMMENT = FILE_READER_DELIMITED_SETTINGS_FORMAT + "comment";
+
+    public static final String FILE_READER_DELIMITED_COMPRESSION_TYPE = FILE_READER_COMPRESSION + "type";
+    public static final String FILE_READER_DELIMITED_COMPRESSION_CONCATENATED = FILE_READER_COMPRESSION + "concatenated";
+    public static final String FILE_READER_DELIMITED_ENCODING = FILE_READER_DELIMITED + "encoding";
+
+    private static final String DEFAULT_COLUMN_NAME = "column_";
+
+    private final UnivocityOffset offset;
+    private T settings;
+    private Schema schema;
+    private Charset charset;
+    private CompressionType compression;
+    private boolean closed;
+
+    private ResultIterator<Record, ParsingContext> iterator;
+
+    public UnivocityFileReader(FileSystem fs, Path filePath, Map<String, Object> config) throws IOException {
+        super(fs, filePath, new UnivocityToStruct(), config);
+
+        this.offset = new UnivocityOffset(0);
+        this.iterator = iterateRecords();
+        this.schema = buildSchema(this.iterator, settings.isHeaderExtractionEnabled());
+    }
+
+    private Schema buildSchema(ResultIterator<Record, ParsingContext> it, boolean hasHeader) {
+        SchemaBuilder builder = SchemaBuilder.struct();
+        if (it.hasNext() && !hasHeader) {
+            Record first = it.next();
+            IntStream.range(0, first.getValues().length)
+                    .forEach(index -> builder.field(DEFAULT_COLUMN_NAME + ++index, SchemaBuilder.STRING_SCHEMA));
+            seek(new UnivocityOffset(0));
+        } else if (hasHeader) {
+            Optional.ofNullable(it.getContext().headers()).ifPresent(headers -> {
+                IntStream.range(0, headers.length)
+                        .forEach(index -> builder.field(headers[index], SchemaBuilder.STRING_SCHEMA));
+            });
+        }
+        return builder.build();
+    }
+
+    @Override
+    protected void configure(Map<String, String> config) {
+        String cType = config.getOrDefault(FILE_READER_DELIMITED_COMPRESSION_TYPE, CompressionType.NONE.toString());
+        boolean concatenated = Boolean.parseBoolean(config.getOrDefault(FILE_READER_DELIMITED_COMPRESSION_CONCATENATED,
+                "true"));
+        this.compression = CompressionType.fromName(cType, concatenated);
+        this.charset = Charset.forName(config.getOrDefault(FILE_READER_DELIMITED_ENCODING, Charset.defaultCharset().name()));
+        this.settings = allSettings(config);
+    }
+
+    private T allSettings(Map<String, String> config) {
+        T settings = parserSettings(config);
+        settings.setHeaderExtractionEnabled(getBoolean(config, FILE_READER_DELIMITED_SETTINGS_HEADER, false));
+        settings.setLineSeparatorDetectionEnabled(getBoolean(config, FILE_READER_DELIMITED_SETTINGS_LINE_SEPARATOR_DETECTION, false));
+        settings.setNullValue(config.get(FILE_READER_DELIMITED_SETTINGS_NULL_VALUE));
+        settings.setMaxColumns(Integer.parseInt(config.getOrDefault(FILE_READER_DELIMITED_SETTINGS_MAX_COLUMNS, "512")));
+        settings.setMaxCharsPerColumn(Integer.parseInt(config.getOrDefault(FILE_READER_DELIMITED_SETTINGS_MAX_CHARS_PER_COLUMN, "4096")));
+        settings.setNumberOfRowsToSkip(Long.parseLong(config.getOrDefault(FILE_READER_DELIMITED_SETTINGS_ROWS_TO_SKIP, "0")));
+        settings.setIgnoreLeadingWhitespaces(getBoolean(config, FILE_READER_DELIMITED_SETTINGS_ILW, true));
+        settings.setIgnoreTrailingWhitespaces(getBoolean(config, FILE_READER_DELIMITED_SETTINGS_ITW, true));
+        settings.getFormat().setLineSeparator(config.getOrDefault(FILE_READER_DELIMITED_SETTINGS_FORMAT_LINE_SEP, "\n"));
+        settings.getFormat().setComment(config.getOrDefault(FILE_READER_DELIMITED_SETTINGS_FORMAT_COMMENT, "#").charAt(0));
+
+        return settings;
+    }
+
+    protected boolean getBoolean(Map<String, String> config, String property, boolean defaultValue) {
+        return Boolean.parseBoolean(config.getOrDefault(property, String.valueOf(defaultValue)));
+    }
+
+    protected abstract T parserSettings(Map<String, String> config);
+
+    protected abstract AbstractParser<T> createParser(T settings);
+
+    private Reader getFileReader(InputStream is, CompressionType compression, Charset charset) throws IOException {
+        final InputStreamReader isr;
+        switch (compression) {
+            case BZIP2:
+                isr = new InputStreamReader(new BZip2CompressorInputStream(is, compression.isConcatenated()), charset);
+                break;
+            case GZIP:
+                isr = new InputStreamReader(new GzipCompressorInputStream(is, compression.isConcatenated()), charset);
+                break;
+            default:
+                isr = new InputStreamReader(is, charset);
+                break;
+        }
+        return isr;
+    }
+
+    private ResultIterator<Record, ParsingContext> iterateRecords() throws IOException {
+        return createParser(settings)
+                .iterateRecords(getFileReader(getFs().open(getFilePath()), this.compression, this.charset))
+                .iterator();
+    }
+
+    @Override
+    protected final UnivocityRecord nextRecord() {
+        if (!hasNext()) throw new NoSuchElementException("There are no more records in file: " + getFilePath());
+
+        offset.inc();
+        Record record = iterator.next();
+        return new UnivocityRecord(schema, record.getValues());
+    }
+
+    @Override
+    public final boolean hasNext() {
+        if (closed) throw new IllegalStateException("Reader already closed.");
+
+        return iterator.hasNext();
+    }
+
+    @Override
+    public final void seek(Offset offset) {
+        if (offset.getRecordOffset() < 0) {
+            throw new IllegalArgumentException("Record offset must be greater than 0");
+        }
+        try {
+            if (offset.getRecordOffset() > this.offset.getRecordOffset()) {
+                iterator.hasNext();
+                iterator.getContext().skipLines(offset.getRecordOffset() - this.offset.getRecordOffset() - 1);
+                iterator.next();
+            } else {
+                iterator = iterateRecords();
+                iterator.hasNext();
+                iterator.getContext().skipLines(offset.getRecordOffset());
+            }
+            this.offset.setOffset(offset.getRecordOffset());
+        } catch (IOException ioe) {
+            throw new ConnectException("Error seeking file " + getFilePath(), ioe);
+        }
+    }
+
+    @Override
+    public final Offset currentOffset() {
+        return offset;
+    }
+
+    @Override
+    public final void close() {
+        iterator.getContext().stop();
+        closed = true;
+    }
+
+    public static class UnivocityOffset implements Offset {
+        private long offset;
+
+        public UnivocityOffset(long offset) {
+            this.offset = offset;
+        }
+
+        public void setOffset(long offset) {
+            this.offset = offset;
+        }
+
+        void inc() {
+            this.offset++;
+        }
+
+        @Override
+        public long getRecordOffset() {
+            return offset;
+        }
+    }
+
+    static class UnivocityToStruct implements ReaderAdapter<UnivocityRecord> {
+
+        @Override
+        public Struct apply(UnivocityRecord record) {
+            Struct struct = new Struct(record.schema);
+            IntStream.range(0, record.schema.fields().size())
+                    .filter(index -> index < record.values.length)
+                    .forEach(index -> struct.put(record.schema.fields().get(index).name(), record.values[index]));
+            return struct;
+        }
+    }
+
+    static class UnivocityRecord {
+        private final Schema schema;
+        private final String[] values;
+
+        UnivocityRecord(Schema schema, String[] values) {
+            this.schema = schema;
+            this.values = values;
+        }
+    }
+}
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/AgnosticFileReaderTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/AgnosticFileReaderTest.java
new file mode 100644
index 0000000..7f25e66
--- /dev/null
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/AgnosticFileReaderTest.java
@@ -0,0 +1,158 @@
+package com.github.mmolimar.kafka.connect.fs.file.reader;
+
+import org.junit.jupiter.api.Nested;
+
+import java.util.Map;
+
+public class AgnosticFileReaderTest {
+
+    private static final String FILE_EXTENSION = "test";
+
+    @Nested
+    class AgnosticTextFileReaderTest extends TextFileReaderTest {
+
+        @Override
+        protected Map<String, Object> getReaderConfig() {
+            Map<String, Object> config = super.getReaderConfig();
+            config.put(AgnosticFileReader.FILE_READER_AGNOSTIC_EXTENSIONS_TEXT, getFileExtension());
+            return config;
+        }
+
+        @Override
+        public Class<? extends FileReader> getReaderClass() {
+            return AgnosticFileReader.class;
+        }
+
+        @Override
+        public String getFileExtension() {
+            return FILE_EXTENSION;
+        }
+    }
+
+    @Nested
+    class AgnosticCsvFileReaderTest extends CsvFileReaderTest {
+
+        @Override
+        protected Map<String, Object> getReaderConfig() {
+            Map<String, Object> config = super.getReaderConfig();
+            config.put(AgnosticFileReader.FILE_READER_AGNOSTIC_EXTENSIONS_CSV, getFileExtension());
+            return config;
+        }
+
+        @Override
+        public Class<? extends FileReader> getReaderClass() {
+            return AgnosticFileReader.class;
+        }
+
+        @Override
+        public String getFileExtension() {
+            return FILE_EXTENSION;
+        }
+    }
+
+    @Nested
+    class AgnosticTsvFileReaderTest extends TsvFileReaderTest {
+
+        @Override
+        protected Map<String, Object> getReaderConfig() {
+            Map<String, Object> config = super.getReaderConfig();
+            config.put(AgnosticFileReader.FILE_READER_AGNOSTIC_EXTENSIONS_TSV, getFileExtension());
+            return config;
+        }
+
+        @Override
+        public Class<? extends FileReader> getReaderClass() {
+            return AgnosticFileReader.class;
+        }
+
+        @Override
+        public String getFileExtension() {
+            return FILE_EXTENSION;
+        }
+    }
+
+    @Nested
+    class AgnosticJsonFileReaderTest extends JsonFileReaderTest {
+
+        @Override
+        protected Map<String, Object> getReaderConfig() {
+            Map<String, Object> config = super.getReaderConfig();
+            config.put(AgnosticFileReader.FILE_READER_AGNOSTIC_EXTENSIONS_JSON, getFileExtension());
+            return config;
+        }
+
+        @Override
+        public Class<? extends FileReader> getReaderClass() {
+            return AgnosticFileReader.class;
+        }
+
+        @Override
+        public String getFileExtension() {
+            return FILE_EXTENSION;
+        }
+    }
+
+    @Nested
+    class AgnosticAvroFileReaderTest extends AvroFileReaderTest {
+
+        @Override
+        protected Map<String, Object> getReaderConfig() {
+            Map<String, Object> config = super.getReaderConfig();
+            config.put(AgnosticFileReader.FILE_READER_AGNOSTIC_EXTENSIONS_AVRO, getFileExtension());
+            return config;
+        }
+
+        @Override
+        public Class<? extends FileReader> getReaderClass() {
+            return AgnosticFileReader.class;
+        }
+
+        @Override
+        public String getFileExtension() {
+            return FILE_EXTENSION;
+        }
+    }
+
+    @Nested
+    class AgnosticParquetFileReaderTest extends ParquetFileReaderTest {
+
+        @Override
+        protected Map<String, Object> getReaderConfig() {
+            Map<String, Object> config = super.getReaderConfig();
+            config.put(AgnosticFileReader.FILE_READER_AGNOSTIC_EXTENSIONS_PARQUET, getFileExtension());
+            return config;
+        }
+
+        @Override
+        public Class<? extends FileReader> getReaderClass() {
+            return AgnosticFileReader.class;
+        }
+
+        @Override
+        public String getFileExtension() {
+            return FILE_EXTENSION;
+        }
+    }
+
+    @Nested
+    class AgnosticSequenceFileReaderTest extends SequenceFileReaderTest {
+
+        @Override
+        protected Map<String, Object> getReaderConfig() {
+            Map<String, Object> config = super.getReaderConfig();
+            config.put(AgnosticFileReader.FILE_READER_AGNOSTIC_EXTENSIONS_SEQUENCE, getFileExtension());
+            return config;
+        }
+
+        @Override
+        public Class<? extends FileReader> getReaderClass() {
+            return AgnosticFileReader.class;
+        }
+
+        @Override
+        public String getFileExtension() {
+            return FILE_EXTENSION;
+        }
+    }
+
+}
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/AvroFileReaderTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/AvroFileReaderTest.java
index 176b6dd..75fc8a2 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/AvroFileReaderTest.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/AvroFileReaderTest.java
@@ -55,7 +55,7 @@ protected Path createDataFile(FileSystemConfig fsConfig, Object... args) throws
                 datum.put(FIELD_NAME, String.format("%d_name_%s", index, UUID.randomUUID()));
                 datum.put(FIELD_SURNAME, String.format("%d_surname_%s", index, UUID.randomUUID()));
                 try {
-                    fsConfig.getOffsetsByIndex().put(index, dataFileWriter.sync() - 16L);
+                    fsConfig.offsetsByIndex().put(index, dataFileWriter.sync() - 16L);
                     dataFileWriter.append(datum);
                 } catch (IOException ioe) {
                     throw new RuntimeException(ioe);
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/CsvFileReaderTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/CsvFileReaderTest.java
new file mode 100644
index 0000000..f4a0809
--- /dev/null
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/CsvFileReaderTest.java
@@ -0,0 +1,84 @@
+package com.github.mmolimar.kafka.connect.fs.file.reader;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.kafka.connect.data.Struct;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.MethodSource;
+
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.UUID;
+import java.util.stream.IntStream;
+
+import static org.junit.jupiter.api.Assertions.*;
+
+public class CsvFileReaderTest extends UnivocityFileReaderTest<CsvFileReader> {
+
+    @Override
+    protected Path createDataFile(FileSystemConfig fsConfig, Object... args) throws IOException {
+        boolean header = args.length < 1 || (boolean) args[0];
+        CompressionType compression = args.length < 2 ? COMPRESSION_TYPE_DEFAULT : (CompressionType) args[1];
+        File txtFile = File.createTempFile("test-", "." + getFileExtension());
+        try (PrintWriter writer = new PrintWriter(getOutputStream(txtFile, compression))) {
+            if (header) {
+                writer.append(FIELD_COLUMN1 + "#" + FIELD_COLUMN2 + "#" + FIELD_COLUMN3 + "#" + FIELD_COLUMN4 + "\n");
+            }
+            IntStream.range(0, NUM_RECORDS).forEach(index -> {
+                String value = String.format("%d_%s", index, UUID.randomUUID());
+                writer.append(value + "#" + value + "#" + value + "#" + value + "\n");
+                fsConfig.offsetsByIndex().put(index, (long) index);
+            });
+        }
+        Path path = new Path(new Path(fsConfig.getFsUri()), txtFile.getName());
+        fsConfig.getFs().moveFromLocalFile(new Path(txtFile.getAbsolutePath()), path);
+        return path;
+    }
+
+    @ParameterizedTest
+    @MethodSource("fileSystemConfigProvider")
+    public void readAllDataWithMalformedRows(FileSystemConfig fsConfig) throws Throwable {
+        File tmp = File.createTempFile("test-", "." + getFileExtension());
+        try (FileWriter writer = new FileWriter(tmp)) {
+            writer.append(FIELD_COLUMN1 + "," + FIELD_COLUMN2 + "," + FIELD_COLUMN3 + "," + FIELD_COLUMN4 + "\n");
+            writer.append("dummy,\"\",,dummy\n");
+            writer.append("#comment\n");
+            writer.append("dummy,\"\",,dummy\n");
+        }
+        Map<String, Object> readerConfig = getReaderConfig();
+        readerConfig.put(CsvFileReader.FILE_READER_DELIMITED_SETTINGS_FORMAT_DELIMITER, ",");
+        readerConfig.put(CsvFileReader.FILE_READER_DELIMITED_SETTINGS_HEADER, "true");
+        readerConfig.put(CsvFileReader.FILE_READER_DELIMITED_SETTINGS_EMPTY_VALUE, "empty_value");
+        readerConfig.put(CsvFileReader.FILE_READER_DELIMITED_SETTINGS_NULL_VALUE, "null_value");
+
+        Path path = new Path(new Path(fsConfig.getFsUri()), tmp.getName());
+        fsConfig.getFs().moveFromLocalFile(new Path(tmp.getAbsolutePath()), path);
+        FileReader reader = getReader(fsConfig.getFs(), path, readerConfig);
+
+        assertTrue(reader.hasNext());
+
+        int recordCount = 0;
+        while (reader.hasNext()) {
+            Struct record = reader.next();
+            assertAll(
+                    () -> assertEquals("dummy", record.get(FIELD_COLUMN1)),
+                    () -> assertEquals("empty_value", record.get(FIELD_COLUMN2)),
+                    () -> assertEquals("null_value", record.get(FIELD_COLUMN3)),
+                    () -> assertEquals("dummy", record.get(FIELD_COLUMN4))
+            );
+            recordCount++;
+        }
+        assertEquals(2, recordCount, () -> "The number of records in the file does not match");
+    }
+
+    @Override
+    protected Map<String, Object> getReaderConfig() {
+        return new HashMap<String, Object>() {{
+            put(CsvFileReader.FILE_READER_DELIMITED_SETTINGS_FORMAT_DELIMITER, "#");
+            put(CsvFileReader.FILE_READER_DELIMITED_SETTINGS_HEADER, "true");
+        }};
+    }
+}
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/FileReaderTestBase.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/FileReaderTestBase.java
index 188960e..4e0d38b 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/FileReaderTestBase.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/FileReaderTestBase.java
@@ -130,24 +130,24 @@ public void readAllData(FileSystemConfig fsConfig) {
     public void seekFile(FileSystemConfig fsConfig) {
         FileReader reader = fsConfig.getReader();
         int recordIndex = NUM_RECORDS / 2;
-        reader.seek(getOffset(fsConfig.getOffsetsByIndex().get(recordIndex)));
+        reader.seek(getOffset(fsConfig.offsetsByIndex().get(recordIndex)));
         assertTrue(reader.hasNext());
-        assertEquals(fsConfig.getOffsetsByIndex().get(recordIndex), reader.currentOffset().getRecordOffset());
+        assertEquals(fsConfig.offsetsByIndex().get(recordIndex), reader.currentOffset().getRecordOffset());
         checkData(reader.next(), recordIndex);
 
         recordIndex = 0;
-        reader.seek(getOffset(fsConfig.getOffsetsByIndex().get(recordIndex)));
+        reader.seek(getOffset(fsConfig.offsetsByIndex().get(recordIndex)));
         assertTrue(reader.hasNext());
-        assertEquals(fsConfig.getOffsetsByIndex().get(recordIndex), reader.currentOffset().getRecordOffset());
+        assertEquals(fsConfig.offsetsByIndex().get(recordIndex), reader.currentOffset().getRecordOffset());
         checkData(reader.next(), recordIndex);
 
         recordIndex = NUM_RECORDS - 3;
-        reader.seek(getOffset(fsConfig.getOffsetsByIndex().get(recordIndex)));
+        reader.seek(getOffset(fsConfig.offsetsByIndex().get(recordIndex)));
         assertTrue(reader.hasNext());
-        assertEquals(fsConfig.getOffsetsByIndex().get(recordIndex), reader.currentOffset().getRecordOffset());
+        assertEquals(fsConfig.offsetsByIndex().get(recordIndex), reader.currentOffset().getRecordOffset());
         checkData(reader.next(), recordIndex);
 
-        reader.seek(getOffset(fsConfig.getOffsetsByIndex().get(NUM_RECORDS - 1) + 1));
+        reader.seek(getOffset(fsConfig.offsetsByIndex().get(NUM_RECORDS - 1) + 1));
         assertFalse(reader.hasNext());
     }
 
@@ -162,7 +162,7 @@ public void negativeSeek(FileSystemConfig fsConfig) {
     @MethodSource("fileSystemConfigProvider")
     public void exceededSeek(FileSystemConfig fsConfig) {
         FileReader reader = fsConfig.getReader();
-        reader.seek(getOffset(fsConfig.getOffsetsByIndex().get(NUM_RECORDS - 1) + 1));
+        reader.seek(getOffset(fsConfig.offsetsByIndex().get(NUM_RECORDS - 1) + 1));
         assertFalse(reader.hasNext());
         assertThrows(NoSuchElementException.class, reader::next);
     }
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/FileSystemConfig.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/FileSystemConfig.java
index c670e5f..a838251 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/FileSystemConfig.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/FileSystemConfig.java
@@ -29,7 +29,7 @@ interface FileSystemConfig extends Closeable {
 
     FileReader getReader();
 
-    Map<Integer, Long> getOffsetsByIndex();
+    Map<Integer, Long> offsetsByIndex();
 
 }
 
@@ -80,7 +80,7 @@ public FileReader getReader() {
     }
 
     @Override
-    public Map<Integer, Long> getOffsetsByIndex() {
+    public Map<Integer, Long> offsetsByIndex() {
         return offsetsByIndex;
     }
 
@@ -141,7 +141,7 @@ public FileReader getReader() {
     }
 
     @Override
-    public Map<Integer, Long> getOffsetsByIndex() {
+    public Map<Integer, Long> offsetsByIndex() {
         return offsetsByIndex;
     }
 
@@ -150,4 +150,4 @@ public void close() throws IOException {
         fs.close();
         cluster.shutdown(true);
     }
-}
\ No newline at end of file
+}
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/JsonFileReaderTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/JsonFileReaderTest.java
index 9d05edf..da8994b 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/JsonFileReaderTest.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/JsonFileReaderTest.java
@@ -1,6 +1,9 @@
 package com.github.mmolimar.kafka.connect.fs.file.reader;
 
+import com.fasterxml.jackson.core.JsonProcessingException;
 import com.fasterxml.jackson.databind.DeserializationFeature;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.ObjectWriter;
 import com.fasterxml.jackson.databind.node.JsonNodeFactory;
 import com.fasterxml.jackson.databind.node.ObjectNode;
 import com.github.mmolimar.kafka.connect.fs.file.Offset;
@@ -41,6 +44,7 @@ protected Path createDataFile(FileSystemConfig fsConfig, Object... args) throws
         CompressionType compression = args.length < 3 ? COMPRESSION_TYPE_DEFAULT : (CompressionType) args[2];
         File txtFile = File.createTempFile("test-", "." + getFileExtension());
         try (PrintWriter writer = new PrintWriter(getOutputStream(txtFile, compression))) {
+            ObjectWriter jsonWriter = new ObjectMapper().writerWithDefaultPrettyPrinter();
             IntStream.range(0, numRecords).forEach(index -> {
                 ObjectNode json = JsonNodeFactory.instance.objectNode()
                         .put(FIELD_INTEGER, index)
@@ -51,7 +55,7 @@ protected Path createDataFile(FileSystemConfig fsConfig, Object... args) throws
                         .put(FIELD_NULL, (String) null);
                 json.putArray(FIELD_ARRAY)
                         .add("elm[" + index + "]")
-                        .add("elm[" + index + "]");
+                        .add("elm[" + (index + 1) + "]");
                 json.putObject(FIELD_STRUCT)
                         .put(FIELD_INTEGER, (short) index)
                         .put(FIELD_LONG, Long.MAX_VALUE)
@@ -59,8 +63,12 @@ protected Path createDataFile(FileSystemConfig fsConfig, Object... args) throws
                         .put(FIELD_BOOLEAN, true)
                         .put(FIELD_DECIMAL, Double.parseDouble(index + "." + index))
                         .put(FIELD_NULL, (String) null);
-                writer.append(recordPerLine ? json.toString() + "\n" : json.toPrettyString());
-                fsConfig.getOffsetsByIndex().put(index, (long) index);
+                try {
+                    writer.append(recordPerLine ? json.toString() + "\n" : jsonWriter.writeValueAsString(json));
+                } catch (JsonProcessingException jpe) {
+                    throw new RuntimeException(jpe);
+                }
+                fsConfig.offsetsByIndex().put(index, (long) index);
             });
         }
         Path path = new Path(new Path(fsConfig.getFsUri()), txtFile.getName());
@@ -181,7 +189,7 @@ protected void checkData(Struct record, long index) {
                 () -> assertEquals((Double) record.get(FIELD_DECIMAL), Double.parseDouble(index + "." + index), 0),
                 () -> assertNull(record.get(FIELD_NULL)),
                 () -> assertNotNull(record.schema().field(FIELD_NULL)),
-                () -> assertEquals(record.get(FIELD_ARRAY), Arrays.asList("elm[" + index + "]", "elm[" + index + "]")),
+                () -> assertEquals(record.get(FIELD_ARRAY), Arrays.asList("elm[" + index + "]", "elm[" + (index + 1) + "]")),
                 () -> assertEquals((int) (Integer) subrecord.get(FIELD_INTEGER), index),
                 () -> assertEquals((long) (Long) subrecord.get(FIELD_LONG), Long.MAX_VALUE),
                 () -> assertTrue(subrecord.get(FIELD_STRING).toString().startsWith(index + "_")),
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/ParquetFileReaderTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/ParquetFileReaderTest.java
index 672872e..ae21b88 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/ParquetFileReaderTest.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/ParquetFileReaderTest.java
@@ -63,7 +63,7 @@ protected Path createDataFile(FileSystemConfig fsConfig, Object... args) throws
                 datum.put(FIELD_NAME, String.format("%d_name_%s", index, uuid));
                 datum.put(FIELD_SURNAME, String.format("%d_surname_%s", index, uuid));
                 try {
-                    fsConfig.getOffsetsByIndex().put(index, (long) index);
+                    fsConfig.offsetsByIndex().put(index, (long) index);
                     writer.write(datum);
                 } catch (IOException ioe) {
                     throw new RuntimeException(ioe);
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/SequenceFileReaderTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/SequenceFileReaderTest.java
index 18377fa..ae87901 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/SequenceFileReaderTest.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/SequenceFileReaderTest.java
@@ -52,7 +52,7 @@ protected Path createDataFile(FileSystemConfig fsConfig, Object... args) throws
             int index = 0;
             long pos = reader.getPosition() - 1;
             while (reader.next(key, value)) {
-                fsConfig.getOffsetsByIndex().put(index++, pos);
+                fsConfig.offsetsByIndex().put(index++, pos);
                 pos = reader.getPosition();
             }
         }
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/TextFileReaderTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/TextFileReaderTest.java
index 9220772..53ac900 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/TextFileReaderTest.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/TextFileReaderTest.java
@@ -32,7 +32,7 @@ protected Path createDataFile(FileSystemConfig fsConfig, Object... args) throws
             IntStream.range(0, NUM_RECORDS).forEach(index -> {
                 String value = String.format("%d_%s", index, UUID.randomUUID());
                 writer.append(value + "\n");
-                fsConfig.getOffsetsByIndex().put(index, (long) index);
+                fsConfig.offsetsByIndex().put(index, (long) index);
             });
         }
         Path path = new Path(new Path(fsConfig.getFsUri()), txtFile.getName());
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/TsvFileReaderTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/TsvFileReaderTest.java
new file mode 100644
index 0000000..20c0dc0
--- /dev/null
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/TsvFileReaderTest.java
@@ -0,0 +1,41 @@
+package com.github.mmolimar.kafka.connect.fs.file.reader;
+
+import org.apache.hadoop.fs.Path;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.UUID;
+import java.util.stream.IntStream;
+
+public class TsvFileReaderTest extends UnivocityFileReaderTest<TsvFileReader> {
+
+    @Override
+    protected Path createDataFile(FileSystemConfig fsConfig, Object... args) throws IOException {
+        boolean header = args.length < 1 || (boolean) args[0];
+        CompressionType compression = args.length < 2 ? COMPRESSION_TYPE_DEFAULT : (CompressionType) args[1];
+        File txtFile = File.createTempFile("test-", "." + getFileExtension());
+        try (PrintWriter writer = new PrintWriter(getOutputStream(txtFile, compression))) {
+            if (header) {
+                writer.append(FIELD_COLUMN1 + "\t" + FIELD_COLUMN2 + "\t" + FIELD_COLUMN3 + "\t" + FIELD_COLUMN4 + "\n");
+            }
+            IntStream.range(0, NUM_RECORDS).forEach(index -> {
+                String value = String.format("%d_%s", index, UUID.randomUUID());
+                writer.append(value + "\t" + value + "\t" + value + "\t" + value + "\n");
+                fsConfig.offsetsByIndex().put(index, (long) index);
+            });
+        }
+        Path path = new Path(new Path(fsConfig.getFsUri()), txtFile.getName());
+        fsConfig.getFs().moveFromLocalFile(new Path(txtFile.getAbsolutePath()), path);
+        return path;
+    }
+
+    @Override
+    protected Map<String, Object> getReaderConfig() {
+        return new HashMap<String, Object>() {{
+            put(TsvFileReader.FILE_READER_DELIMITED_SETTINGS_HEADER, "true");
+        }};
+    }
+}
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/UnivocityFileReaderTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/UnivocityFileReaderTest.java
new file mode 100644
index 0000000..da18e0e
--- /dev/null
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/UnivocityFileReaderTest.java
@@ -0,0 +1,187 @@
+package com.github.mmolimar.kafka.connect.fs.file.reader;
+
+import com.github.mmolimar.kafka.connect.fs.file.Offset;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.kafka.connect.data.Struct;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.MethodSource;
+
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileWriter;
+import java.lang.reflect.ParameterizedType;
+import java.nio.charset.UnsupportedCharsetException;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Map;
+
+import static org.junit.jupiter.api.Assertions.*;
+
+abstract class UnivocityFileReaderTest<T extends UnivocityFileReader> extends FileReaderTestBase {
+
+    protected static final String FIELD_COLUMN1 = "column_1";
+    protected static final String FIELD_COLUMN2 = "column_2";
+    protected static final String FIELD_COLUMN3 = "column_3";
+    protected static final String FIELD_COLUMN4 = "column_4";
+    protected static final String FILE_EXTENSION = "tcsv";
+    protected static final CompressionType COMPRESSION_TYPE_DEFAULT = CompressionType.NONE;
+
+    @ParameterizedTest
+    @MethodSource("fileSystemConfigProvider")
+    public void emptyFile(FileSystemConfig fsConfig) throws Throwable {
+        File tmp = File.createTempFile("test-", "." + getFileExtension());
+        Path path = new Path(new Path(fsConfig.getFsUri()), tmp.getName());
+        fsConfig.getFs().moveFromLocalFile(new Path(tmp.getAbsolutePath()), path);
+        getReader(fsConfig.getFs(), path, getReaderConfig());
+    }
+
+    @ParameterizedTest
+    @MethodSource("fileSystemConfigProvider")
+    public void invalidFileFormat(FileSystemConfig fsConfig) throws Throwable {
+        File tmp = File.createTempFile("test-", "." + getFileExtension());
+        try (BufferedWriter writer = new BufferedWriter(new FileWriter(tmp))) {
+            writer.write("test");
+        }
+        Path path = new Path(new Path(fsConfig.getFsUri()), tmp.getName());
+        fsConfig.getFs().moveFromLocalFile(new Path(tmp.getAbsolutePath()), path);
+        getReader(fsConfig.getFs(), path, getReaderConfig());
+    }
+
+    @ParameterizedTest
+    @MethodSource("fileSystemConfigProvider")
+    public void invaliConfigArgs(FileSystemConfig fsConfig) {
+        try {
+            getReaderClass().getConstructor(FileSystem.class, Path.class, Map.class)
+                    .newInstance(fsConfig.getFs(), fsConfig.getDataFile(), new HashMap<String, Object>());
+        } catch (Exception e) {
+            assertThrows(IllegalArgumentException.class, () -> {
+                throw e.getCause();
+            });
+        }
+    }
+
+    @ParameterizedTest
+    @MethodSource("fileSystemConfigProvider")
+    public void readAllDataWithoutHeader(FileSystemConfig fsConfig) throws Throwable {
+        Path file = createDataFile(fsConfig, false);
+        Map<String, Object> readerConfig = getReaderConfig();
+        readerConfig.put(T.FILE_READER_DELIMITED_SETTINGS_HEADER, "false");
+        FileReader reader = getReader(fsConfig.getFs(), file, readerConfig);
+
+        assertTrue(reader.hasNext());
+
+        int recordCount = 0;
+        while (reader.hasNext()) {
+            Struct record = reader.next();
+            checkData(record, recordCount);
+            recordCount++;
+        }
+        assertEquals(NUM_RECORDS, recordCount, "The number of records in the file does not match");
+    }
+
+    @ParameterizedTest
+    @MethodSource("fileSystemConfigProvider")
+    public void readDifferentCompressionTypes(FileSystemConfig fsConfig) {
+        Arrays.stream(CompressionType.values()).forEach(compressionType -> {
+            try {
+                Path file = createDataFile(fsConfig, true, compressionType);
+                Map<String, Object> readerConfig = getReaderConfig();
+                readerConfig.put(T.FILE_READER_DELIMITED_COMPRESSION_TYPE, compressionType.toString());
+                readerConfig.put(T.FILE_READER_DELIMITED_COMPRESSION_CONCATENATED, "true");
+                readerConfig.put(T.FILE_READER_DELIMITED_SETTINGS_HEADER, "true");
+                FileReader reader = getReader(fsConfig.getFs(), file, readerConfig);
+
+                assertTrue(reader.hasNext());
+
+                int recordCount = 0;
+                while (reader.hasNext()) {
+                    Struct record = reader.next();
+                    checkData(record, recordCount);
+                    recordCount++;
+                }
+                reader.close();
+                assertEquals(NUM_RECORDS, recordCount, "The number of records in the file does not match");
+            } catch (Throwable e) {
+                throw new RuntimeException(e);
+            }
+        });
+    }
+
+    @ParameterizedTest
+    @MethodSource("fileSystemConfigProvider")
+    public void seekFileWithoutHeader(FileSystemConfig fsConfig) throws Throwable {
+        Path file = createDataFile(fsConfig, false);
+        Map<String, Object> readerConfig = getReaderConfig();
+        readerConfig.put(T.FILE_READER_DELIMITED_SETTINGS_HEADER, "false");
+        FileReader reader = getReader(fsConfig.getFs(), file, readerConfig);
+
+        assertTrue(reader.hasNext());
+
+        int recordIndex = NUM_RECORDS / 2;
+        reader.seek(getOffset(fsConfig.offsetsByIndex().get(recordIndex)));
+        assertTrue(reader.hasNext());
+        assertEquals(fsConfig.offsetsByIndex().get(recordIndex), reader.currentOffset().getRecordOffset());
+        checkData(reader.next(), recordIndex);
+
+        recordIndex = 0;
+        reader.seek(getOffset(fsConfig.offsetsByIndex().get(recordIndex)));
+        assertTrue(reader.hasNext());
+        assertEquals(fsConfig.offsetsByIndex().get(recordIndex), reader.currentOffset().getRecordOffset());
+        checkData(reader.next(), recordIndex);
+
+        recordIndex = NUM_RECORDS - 3;
+        reader.seek(getOffset(fsConfig.offsetsByIndex().get(recordIndex)));
+        assertTrue(reader.hasNext());
+        assertEquals(fsConfig.offsetsByIndex().get(recordIndex), reader.currentOffset().getRecordOffset());
+        checkData(reader.next(), recordIndex);
+
+        reader.seek(getOffset(fsConfig.offsetsByIndex().get(NUM_RECORDS - 1) + 1));
+        assertFalse(reader.hasNext());
+    }
+
+    @ParameterizedTest
+    @MethodSource("fileSystemConfigProvider")
+    public void validFileEncoding(FileSystemConfig fsConfig) throws Throwable {
+        Map<String, Object> readerConfig = getReaderConfig();
+        readerConfig.put(T.FILE_READER_DELIMITED_SETTINGS_HEADER, "true");
+        readerConfig.put(T.FILE_READER_DELIMITED_ENCODING, "Cp1252");
+        getReader(fsConfig.getFs(), fsConfig.getDataFile(), readerConfig);
+    }
+
+    @ParameterizedTest
+    @MethodSource("fileSystemConfigProvider")
+    public void invalidFileEncoding(FileSystemConfig fsConfig) {
+        Map<String, Object> readerConfig = getReaderConfig();
+        readerConfig.put(T.FILE_READER_DELIMITED_SETTINGS_HEADER, "true");
+        readerConfig.put(T.FILE_READER_DELIMITED_ENCODING, "invalid_charset");
+        assertThrows(UnsupportedCharsetException.class, () -> getReader(fsConfig.getFs(),
+                fsConfig.getDataFile(), readerConfig));
+    }
+
+    @Override
+    protected Offset getOffset(long offset) {
+        return new T.UnivocityOffset(offset);
+    }
+
+    @Override
+    protected Class<? extends FileReader> getReaderClass() {
+        return (Class<T>) ((ParameterizedType) this.getClass().getGenericSuperclass())
+                .getActualTypeArguments()[0];
+    }
+
+    @Override
+    protected void checkData(Struct record, long index) {
+        assertAll(
+                () -> assertTrue(record.get(FIELD_COLUMN1).toString().startsWith(index + "_")),
+                () -> assertTrue(record.get(FIELD_COLUMN2).toString().startsWith(index + "_")),
+                () -> assertTrue(record.get(FIELD_COLUMN3).toString().startsWith(index + "_")),
+                () -> assertTrue(record.get(FIELD_COLUMN4).toString().startsWith(index + "_"))
+        );
+    }
+
+    @Override
+    protected String getFileExtension() {
+        return FILE_EXTENSION;
+    }
+}

From f47a1d42aac877920f7883b3b1fd6af8891a0531 Mon Sep 17 00:00:00 2001
From: Mario Molina <mmolimar@gmail.com>
Date: Sat, 21 Mar 2020 12:05:21 -0600
Subject: [PATCH 26/51] Updating docs

---
 README.md                      |   6 +-
 docs/source/config_options.rst | 322 +++++++++++++++++++++++++++------
 docs/source/connector.rst      |   2 +-
 docs/source/filereaders.rst    |  48 +++--
 4 files changed, 300 insertions(+), 78 deletions(-)

diff --git a/README.md b/README.md
index 76d3961..40d1b27 100644
--- a/README.md
+++ b/README.md
@@ -1,9 +1,9 @@
 # Kafka Connect FileSystem Connector [![Build Status](https://travis-ci.org/mmolimar/kafka-connect-fs.svg?branch=master)](https://travis-ci.org/mmolimar/kafka-connect-fs)[![Coverage Status](https://coveralls.io/repos/github/mmolimar/kafka-connect-fs/badge.svg?branch=master)](https://coveralls.io/github/mmolimar/kafka-connect-fs?branch=master)
 
-**kafka-connect-fs** is a [Kafka Connector](http://kafka.apache.org/documentation.html#connect) 
+**kafka-connect-fs** is a [Kafka Connector](https://kafka.apache.org/documentation.html#connect)
 for reading records from files in the file systems specified and load them into Kafka.
 
-Documentation for this connector can be found [here](http://kafka-connect-fs.readthedocs.io/).
+Documentation for this connector can be found [here](https://kafka-connect-fs.readthedocs.io/).
 
 ## Development
 
@@ -13,7 +13,7 @@ kafka-connect-fs with Maven using the standard lifecycle phases.
 ## FAQ
 
 Some frequently asked questions on Kafka Connect FileSystem Connector can be found here -
-http://kafka-connect-fs.readthedocs.io/en/latest/faq.html
+https://kafka-connect-fs.readthedocs.io/en/latest/faq.html
 
 ## Contribute
 
diff --git a/docs/source/config_options.rst b/docs/source/config_options.rst
index bc89498..5b25ed1 100644
--- a/docs/source/config_options.rst
+++ b/docs/source/config_options.rst
@@ -48,7 +48,7 @@ General config properties for this connector.
   If you want to ingest data from dynamic directories, this is, directories created every day and
   avoiding to add new URIs or look for files from a parent directory, you can include expressions
   in the URIs to do that. For example, for this URI ``file:///data/${yyyy}``, it will be
-  converted to ``file:///data/2017`` (when executing whe policy).
+  converted to ``file:///data/2020`` (when executing whe policy).
 
   You can use as many as you like in the URIs, for instance:
   ``file:///data/${yyyy}/${MM}/${dd}/${HH}${mm}``
@@ -60,7 +60,7 @@ General config properties for this connector.
   ``policy.fs.fs.s3a.secret.key=<SECRET_KEY>``
   
 ``topic``
-  Topic in which copy data.
+  Topic in which copy data to.
 
   * Type: string
   * Importance: high
@@ -71,6 +71,12 @@ General config properties for this connector.
   * Type: string
   * Importance: high
 
+``policy.regexp``
+  Regular expression to filter files from the FS.
+
+  * Type: string
+  * Importance: high
+
 ``policy.recursive``
   Flag to activate traversed recursion in subdirectories when listing files.
 
@@ -78,23 +84,17 @@ General config properties for this connector.
   * Default: ``false``
   * Importance: medium
 
-``policy.regexp``
-  Regular expression to filter files from the FS.
-
-  * Type: string
-  * Importance: high
-
 ``policy.<policy_name>.<policy_property>``
-  This represents the custom properties you can include based on the policy class specified.
+  This represents custom properties you can include based on the policy class specified.
 
-  * Type: depending on the policy.
-  * Importance: depending on the policy.
+  * Type: based on the policy.
+  * Importance: based on the policy.
 
 ``policy.fs.<fs_property>``
   Custom properties to use for the FS.
 
-  * Type: depending on the FS.
-  * Importance: depending on the FS.
+  * Type: based on the FS.
+  * Importance: based on the FS.
 
 ``file_reader.class``
   File reader class to read files from the FS (must implement
@@ -104,10 +104,10 @@ General config properties for this connector.
   * Importance: high
 
 ``file_reader.<file_reader_name>.<file_reader_property>``
-  This represents the custom properties you can include based on the file reader class specified.
+  This represents custom properties you can include based on the file reader class specified.
 
-  * Type: depending on the file reader.
-  * Importance: depending on the file reader.
+  * Type: based on the file reader.
+  * Importance: based on the file reader.
 
 .. _config_options-policies:
 
@@ -272,6 +272,12 @@ To configure custom properties for this reader, the name you must use is ``json`
   * Type: boolean
   * Importance: medium
 
+``file_reader.json.encoding``
+  Encoding to use for reading a file. If not specified, the reader will use the default encoding.
+
+  * Type: string
+  * Importance: medium
+
 ``file_reader.json.compression.type``
   Compression type to use when reading a file.
 
@@ -287,35 +293,139 @@ To configure custom properties for this reader, the name you must use is ``json`
   * Default: ``true``
   * Importance: low
 
-``file_reader.json.encoding``
-  Encoding to use for reading a file. If not specified, the reader will use the default encoding.
+.. _config_options-filereaders-csv:
+
+CSV
+--------------------------------------------
+
+To configure custom properties for this reader, the name you must use is ``delimited`` (even though it's for CSV).
+
+``file_reader.delimited.settings.header``
+  If the file contains header or not.
+
+  * Type: boolean
+  * Default: ``false``
+  * Importance: high
+
+``file_reader.delimited.settings.format.delimiter``
+  Field delimiter.
 
   * Type: string
+  * Default: ``,``
+  * Importance: high
+
+``file_reader.delimited.settings.null_value``
+  Default value for ``null`` values.
+
+  * Type: string
+  * Default: ``null``
   * Importance: medium
 
-.. _config_options-filereaders-text:
+``file_reader.delimited.settings.empty_value``
+  Default value for empty values (empty values within quotes).
 
-Text
---------------------------------------------
+  * Type: string
+  * Default: ``null``
+  * Importance: medium
 
-To configure custom properties for this reader, the name you must use is ``text``.
+``file_reader.delimited.settings.format.line_separator``
+  Line separator to be used.
 
-``file_reader.json.record_per_line``
-  If enabled, the reader will read each line as a record. Otherwise, the reader will read the full
-  content of the file as a record.
+  * Type: string
+  * Default: ``\n``
+  * Importance: medium
+
+``file_reader.delimited.settings.max_columns``
+  Default value for ``null`` values.
+
+  * Type: int
+  * Default: ``512``
+  * Importance: low
+
+``file_reader.delimited.settings.max_chars_per_column``
+  Default value for ``null`` values.
+
+  * Type: int
+  * Default: ``4096``
+  * Importance: low
+
+``file_reader.delimited.settings.rows_to_skip``
+  Number of rows to skip.
+
+  * Type: long
+  * Default: ``0``
+  * Importance: low
+
+``file_reader.delimited.settings.line_separator_detection``
+  If the reader should detect the line separator automatically.
+
+  * Type: boolean
+  * Default: ``false``
+  * Importance: medium
+
+``file_reader.delimited.settings.delimiter_detection``
+  If the reader should detect the delimiter automatically.
+
+  * Type: boolean
+  * Default: ``false``
+  * Importance: medium
+
+``file_reader.delimited.settings.ignore_leading_whitespaces``
+  Flag to enable/disable skipping leading whitespaces from values.
 
   * Type: boolean
   * Default: ``true``
+  * Importance: low
+
+``file_reader.delimited.settings.ignore_trailing_whitespaces``
+  Flag to enable/disable skipping trailing whitespaces from values.
+
+  * Type: boolean
+  * Default: ``true``
+  * Importance: low
+
+``file_reader.delimited.settings.format.comment``
+  Character that represents a line comment at the beginning of a line.
+
+  * Type: char
+  * Default: ``#``
+  * Importance: low
+
+``file_reader.delimited.settings.escape_unquoted``
+  Flag to enable/disable processing escape sequences in unquoted values.
+
+  * Type: boolean
+  * Default: ``false``
+  * Importance: low
+
+``file_reader.delimited.settings.format.quote``
+  Character used for escaping values where the field delimiter is part of the value.
+
+  * Type: char
+  * Default: ``"``
+  * Importance: low
+
+``file_reader.delimited.settings.format.quote_escape``
+  Character used for escaping quotes inside an already quoted value.
+
+  * Type: char
+  * Default: ``"``
+  * Importance: low
+
+``file_reader.delimited.encoding``
+  Encoding to use for reading a file. If not specified, the reader will use the default encoding.
+
+  * Type: string
   * Importance: medium
 
-``file_reader.json.compression.type``
+``file_reader.delimited.compression.type``
   Compression type to use when reading a file.
 
   * Type: enum (available values ``bzip2``, ``gzip`` and ``none``)
   * Default: ``none``
   * Importance: medium
 
-``file_reader.json.compression.concatenated``
+``file_reader.delimited.compression.concatenated``
   Flag to specify if the decompression of the reader will finish at the end of the file or after
   the first compressed stream.
 
@@ -323,39 +433,133 @@ To configure custom properties for this reader, the name you must use is ``text`
   * Default: ``true``
   * Importance: low
 
-``file_reader.text.field_name.value``
-  Custom field name for the output value to include in the Kafka message.
+.. _config_options-filereaders-tsv:
+
+TSV
+--------------------------------------------
+
+To configure custom properties for this reader, the name you must use is ``delimited`` (even though it's for TSV).
+
+``file_reader.delimited.settings.header``
+  If the file contains header or not.
+
+  * Type: boolean
+  * Default: ``false``
+  * Importance: high
+
+``file_reader.delimited.settings.null_value``
+  Default value for ``null`` values.
 
   * Type: string
-  * Default: ``value``
-  * Importance: low
+  * Default: ``null``
+  * Importance: medium
 
-``file_reader.text.encoding``
-  Encoding to use for reading a file. If not specified, the reader will use the default encoding.
+``file_reader.delimited.settings.format.line_separator``
+  Line separator to be used.
 
   * Type: string
+  * Default: ``\n``
   * Importance: medium
 
-.. _config_options-filereaders-delimited:
+``file_reader.delimited.settings.max_columns``
+  Default value for ``null`` values.
 
-Delimited text
---------------------------------------------
+  * Type: int
+  * Default: ``512``
+  * Importance: low
 
-To configure custom properties for this reader, the name you must use is ``delimited``.
+``file_reader.delimited.settings.max_chars_per_column``
+  Default value for ``null`` values.
 
-``file_reader.delimited.token``
-  The token delimiter for columns.
+  * Type: int
+  * Default: ``4096``
+  * Importance: low
 
-  * Type: string
-  * Importance: high
+``file_reader.delimited.settings.rows_to_skip``
+  Number of rows to skip.
 
-``file_reader.delimited.header``
-  If the file contains header or not.
+  * Type: long
+  * Default: ``0``
+  * Importance: low
+
+``file_reader.delimited.settings.line_separator_detection``
+  If the reader should detect the line separator automatically.
+
+  * Type: boolean
+  * Default: ``false``
+  * Importance: medium
+
+``file_reader.delimited.settings.line_separator_detection``
+  If the reader should detect the line separator automatically.
 
   * Type: boolean
   * Default: ``false``
+  * Importance: low
+
+``file_reader.delimited.settings.line_joining``
+  Identifies whether or lines ending with the escape character and followed by a line
+  separator character should be joined with the following line.
+
+  * Type: boolean
+  * Default: ``true``
+  * Importance: low
+
+``file_reader.delimited.settings.ignore_trailing_whitespaces``
+  Flag to enable/disable skipping trailing whitespaces from values.
+
+  * Type: boolean
+  * Default: ``true``
+  * Importance: low
+
+``file_reader.delimited.settings.format.comment``
+  Character that represents a line comment at the beginning of a line.
+
+  * Type: char
+  * Default: ``#``
+  * Importance: low
+
+``file_reader.delimited.settings.format.escape``
+  Character used for escaping special characters.
+
+  * Type: char
+  * Default: ``\``
+  * Importance: low
+
+``file_reader.delimited.settings.format.escaped_char``
+  Character used to represent an escaped tab.
+
+  * Type: char
+  * Default: ``t``
+  * Importance: low
+
+``file_reader.delimited.encoding``
+  Encoding to use for reading a file. If not specified, the reader will use the default encoding.
+
+  * Type: string
   * Importance: medium
 
+``file_reader.delimited.compression.type``
+  Compression type to use when reading a file.
+
+  * Type: enum (available values ``bzip2``, ``gzip`` and ``none``)
+  * Default: ``none``
+  * Importance: medium
+
+``file_reader.delimited.compression.concatenated``
+  Flag to specify if the decompression of the reader will finish at the end of the file or after
+  the first compressed stream.
+
+  * Type: boolean
+  * Default: ``true``
+  * Importance: low
+
+.. _config_options-filereaders-text:
+
+Text
+--------------------------------------------
+
+To configure custom properties for this reader, the name you must use is ``text``.
+
 ``file_reader.json.record_per_line``
   If enabled, the reader will read each line as a record. Otherwise, the reader will read the full
   content of the file as a record.
@@ -364,12 +568,17 @@ To configure custom properties for this reader, the name you must use is ``delim
   * Default: ``true``
   * Importance: medium
 
-``file_reader.delimited.default_value``
-  Sets a default value in a column when its value is null. This is due to the record is malformed (it does not contain
-  all expected columns).
+``file_reader.text.field_name.value``
+  Custom field name for the output value to include in the Kafka message.
+
+  * Type: string
+  * Default: ``value``
+  * Importance: medium
+
+``file_reader.text.encoding``
+  Encoding to use for reading a file. If not specified, the reader will use the default encoding.
 
   * Type: string
-  * Default: ``null``
   * Importance: medium
 
 ``file_reader.json.compression.type``
@@ -387,11 +596,7 @@ To configure custom properties for this reader, the name you must use is ``delim
   * Default: ``true``
   * Importance: low
 
-``file_reader.delimited.encoding``
-  Encoding to use for reading a file. If not specified, the reader will use the default encoding.
-
-  * Type: string
-  * Importance: medium
+.. _config_options-filereaders-agnostic:
 
 Agnostic
 --------------------------------------------
@@ -426,11 +631,18 @@ To configure custom properties for this reader, the name you must use is ``agnos
   * Default: ``json``
   * Importance: medium
 
-``file_reader.agnostic.extensions.delimited``
-  A comma-separated string list with the accepted extensions for Delimited text files.
+``file_reader.agnostic.extensions.csv``
+ A comma-separated string list with the accepted extensions for CSV files.
+
+  * Type: string
+  * Default: ``csv``
+  * Importance: medium
+
+``file_reader.agnostic.extensions.tsv``
+ A comma-separated string list with the accepted extensions for TSV files.
 
   * Type: string
-  * Default: ``tsv,csv``
+  * Default: ``tsv``
   * Importance: medium
 
 .. note:: The Agnostic reader uses the previous ones as inner readers. So, in case of using this
diff --git a/docs/source/connector.rst b/docs/source/connector.rst
index 8d2e305..6c79317 100644
--- a/docs/source/connector.rst
+++ b/docs/source/connector.rst
@@ -24,7 +24,7 @@ Getting started
 Prerequisites
 --------------------------------------------
 
--  Confluent Platform 5.4.0
+-  Confluent Platform 5.4.1
 -  Java 8
 
 Building from source
diff --git a/docs/source/filereaders.rst b/docs/source/filereaders.rst
index 0ea1560..8e52634 100644
--- a/docs/source/filereaders.rst
+++ b/docs/source/filereaders.rst
@@ -47,6 +47,27 @@ and marked as optional in the schema all the fields contained.
 
 More information about properties of this file reader :ref:`here<config_options-filereaders-json>`.
 
+CSV
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+CSV file reader using a custom token to distinguish different columns on each line.
+
+It allows to distinguish a header in the files and set the name of their columns
+in the message sent to Kafka. If there is no header, the value of each column will be in
+the field named ``column_N`` (**N** represents the column index) in the message.
+Also, the token delimiter for columns is configurable.
+
+More information about properties of this file reader :ref:`here<config_options-filereaders-csv>`.
+
+TSV
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+TSV file reader using a tab (``\t``) to distinguish different columns on each line.
+
+Its behaviour is the same one for the CSV file reader regarding the header and the column names.
+
+More information about properties of this file reader :ref:`here<config_options-filereaders-tsv>`.
+
 Text
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
@@ -58,33 +79,22 @@ customize these field names.
 
 More information about properties of this file reader :ref:`here<config_options-filereaders-text>`.
 
-Delimited text
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-Text file reader using a custom token to distinguish different columns on each line.
-
-It allows to distinguish a header in the files and set the name of their columns
-in the message sent to Kafka. If there is no header, the value of each column will be in
-the field named ``column_N`` (**N** represents the column index) in the message.
-Also, the token delimiter for columns is configurable.
-
-More information about properties of this file reader :ref:`here<config_options-filereaders-delimited>`.
-
 Agnostic
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 Actually, this reader is a wrapper of the readers listing above.
 
 It tries to read any kind of file format using an internal reader based on the file extension,
-applying the proper one (Parquet, Avro, SecuenceFile, Text or Delimited text). In case of no
+applying the proper one (Parquet, Avro, SecuenceFile, CSV, TSV or Text). In case of no
 extension has been matched, the Text file reader will be applied.
 
-Default extensions for each format:
-* Parquet: .parquet
-* Avro: .avro
-* SequenceFile: .seq
-* JSON: .json
-* Delimited text: .tsv, .csv
+Default extensions for each format (configurable):
+* Parquet: ``.parquet``
+* Avro: ``.avro``
+* SequenceFile: ``.seq``
+* JSON: ``.json``
+* CSV: ``.csv``
+* TSV: ``.tsv``
 * Text: any other sort of file extension.
 
 More information about properties of this file reader :ref:`here<config_options-filereaders-agnostic>`.

From 372ecf6071275d18320a589e8cce0293440565dc Mon Sep 17 00:00:00 2001
From: Mario Molina <mmolimar@gmail.com>
Date: Sat, 21 Mar 2020 19:13:52 -0600
Subject: [PATCH 27/51] Test refactor for tasks and policies

---
 .../kafka/connect/fs/FsSourceTask.java        |   4 +-
 ...estBase.java => AbstractHdfsFsConfig.java} |  37 +-
 .../connect/fs/AbstractLocalFsConfig.java     |  41 +++
 .../kafka/connect/fs/FsTestConfig.java        |  17 +
 .../fs/file/reader/AvroFileReaderTest.java    |   8 +-
 .../fs/file/reader/CsvFileReaderTest.java     |   4 +-
 .../fs/file/reader/FileReaderTestBase.java    |  30 +-
 .../fs/file/reader/FileSystemConfig.java      | 153 ---------
 .../fs/file/reader/JsonFileReaderTest.java    |  14 +-
 .../fs/file/reader/ParquetFileReaderTest.java |  16 +-
 .../fs/file/reader/ReaderFsTestConfig.java    |  97 ++++++
 .../file/reader/SequenceFileReaderTest.java   |   4 +-
 .../fs/file/reader/TextFileReaderTest.java    |  10 +-
 .../fs/file/reader/TsvFileReaderTest.java     |   2 +-
 .../file/reader/UnivocityFileReaderTest.java  |  16 +-
 .../fs/policy/{local => }/CronPolicyTest.java |  68 ++--
 .../fs/policy/HdfsFileWatcherPolicyTest.java  |  79 +++++
 .../connect/fs/policy/PolicyFsTestConfig.java | 112 +++++++
 .../connect/fs/policy/PolicyTestBase.java     | 172 ++++++----
 .../policy/{local => }/SimplePolicyTest.java  |  25 +-
 .../policy/{local => }/SleepyPolicyTest.java  |  78 ++---
 .../fs/policy/hdfs/CronPolicyTest.java        |  90 -----
 .../hdfs/HdfsFileWatcherPolicyTest.java       |  71 ----
 .../fs/policy/hdfs/SimplePolicyTest.java      |  40 ---
 .../fs/policy/hdfs/SleepyPolicyTest.java      | 110 ------
 .../fs/policy/local/LocalPolicyTestBase.java  |  29 --
 .../connect/fs/task/FsSourceTaskTest.java     | 316 +++++++++++++++---
 .../connect/fs/task/FsSourceTaskTestBase.java | 187 -----------
 .../connect/fs/task/TaskFsTestConfig.java     | 113 +++++++
 .../fs/task/hdfs/HdfsFsSourceTaskTest.java    |  66 ----
 .../task/hdfs/HdfsFsSourceTaskTestBase.java   |  33 --
 .../fs/task/local/LocalFsSourceTaskTest.java  |  65 ----
 .../task/local/LocalFsSourceTaskTestBase.java |  29 --
 33 files changed, 980 insertions(+), 1156 deletions(-)
 rename src/test/java/com/github/mmolimar/kafka/connect/fs/{policy/hdfs/HdfsPolicyTestBase.java => AbstractHdfsFsConfig.java} (50%)
 create mode 100644 src/test/java/com/github/mmolimar/kafka/connect/fs/AbstractLocalFsConfig.java
 create mode 100644 src/test/java/com/github/mmolimar/kafka/connect/fs/FsTestConfig.java
 delete mode 100644 src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/FileSystemConfig.java
 create mode 100644 src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/ReaderFsTestConfig.java
 rename src/test/java/com/github/mmolimar/kafka/connect/fs/policy/{local => }/CronPolicyTest.java (53%)
 create mode 100644 src/test/java/com/github/mmolimar/kafka/connect/fs/policy/HdfsFileWatcherPolicyTest.java
 create mode 100644 src/test/java/com/github/mmolimar/kafka/connect/fs/policy/PolicyFsTestConfig.java
 rename src/test/java/com/github/mmolimar/kafka/connect/fs/policy/{local => }/SimplePolicyTest.java (57%)
 rename src/test/java/com/github/mmolimar/kafka/connect/fs/policy/{local => }/SleepyPolicyTest.java (54%)
 delete mode 100644 src/test/java/com/github/mmolimar/kafka/connect/fs/policy/hdfs/CronPolicyTest.java
 delete mode 100644 src/test/java/com/github/mmolimar/kafka/connect/fs/policy/hdfs/HdfsFileWatcherPolicyTest.java
 delete mode 100644 src/test/java/com/github/mmolimar/kafka/connect/fs/policy/hdfs/SimplePolicyTest.java
 delete mode 100644 src/test/java/com/github/mmolimar/kafka/connect/fs/policy/hdfs/SleepyPolicyTest.java
 delete mode 100644 src/test/java/com/github/mmolimar/kafka/connect/fs/policy/local/LocalPolicyTestBase.java
 delete mode 100644 src/test/java/com/github/mmolimar/kafka/connect/fs/task/FsSourceTaskTestBase.java
 create mode 100644 src/test/java/com/github/mmolimar/kafka/connect/fs/task/TaskFsTestConfig.java
 delete mode 100644 src/test/java/com/github/mmolimar/kafka/connect/fs/task/hdfs/HdfsFsSourceTaskTest.java
 delete mode 100644 src/test/java/com/github/mmolimar/kafka/connect/fs/task/hdfs/HdfsFsSourceTaskTestBase.java
 delete mode 100644 src/test/java/com/github/mmolimar/kafka/connect/fs/task/local/LocalFsSourceTaskTest.java
 delete mode 100644 src/test/java/com/github/mmolimar/kafka/connect/fs/task/local/LocalFsSourceTaskTestBase.java

diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/FsSourceTask.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/FsSourceTask.java
index 971f6ee..1fdc9b5 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/FsSourceTask.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/FsSourceTask.java
@@ -89,7 +89,9 @@ private List<FileMetadata> filesToProcess() {
                     .collect(Collectors.toList());
         } catch (IOException | ConnectException e) {
             //when an exception happens executing the policy, the connector continues
-            log.error("Cannot retrieve files to process from FS: " + policy.getURIs() + ". Keep going...", e);
+            log.error("Cannot retrieve files to process from the FS: " + policy.getURIs() + ". " +
+                    "There was an error executing the policy but the task tolerates this and continues. " +
+                    "Error message: " + e.getMessage());
             return Collections.emptyList();
         }
     }
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/hdfs/HdfsPolicyTestBase.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/AbstractHdfsFsConfig.java
similarity index 50%
rename from src/test/java/com/github/mmolimar/kafka/connect/fs/policy/hdfs/HdfsPolicyTestBase.java
rename to src/test/java/com/github/mmolimar/kafka/connect/fs/AbstractHdfsFsConfig.java
index 522d1de..f3fef89 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/hdfs/HdfsPolicyTestBase.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/AbstractHdfsFsConfig.java
@@ -1,33 +1,44 @@
-package com.github.mmolimar.kafka.connect.fs.policy.hdfs;
+package com.github.mmolimar.kafka.connect.fs;
 
-import com.github.mmolimar.kafka.connect.fs.policy.PolicyTestBase;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
-import org.junit.jupiter.api.AfterAll;
-import org.junit.jupiter.api.BeforeAll;
 
 import java.io.IOException;
 import java.net.URI;
 import java.nio.file.Files;
-import java.nio.file.Path;
 
-public abstract class HdfsPolicyTestBase extends PolicyTestBase {
+public abstract class AbstractHdfsFsConfig implements FsTestConfig {
+    private MiniDFSCluster cluster;
+    private FileSystem fs;
+    private URI fsUri;
 
-    private static MiniDFSCluster cluster;
-
-    @BeforeAll
-    public static void initFs() throws IOException {
+    @Override
+    public final void initFs() throws IOException {
         Configuration clusterConfig = new Configuration();
-        Path hdfsDir = Files.createTempDirectory("test-");
+        java.nio.file.Path hdfsDir = Files.createTempDirectory("test-");
         clusterConfig.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, hdfsDir.toAbsolutePath().toString());
         cluster = new MiniDFSCluster.Builder(clusterConfig).build();
         fsUri = URI.create("hdfs://localhost:" + cluster.getNameNodePort() + "/");
         fs = FileSystem.newInstance(fsUri, new Configuration());
+        init();
+    }
+
+    protected abstract void init() throws IOException;
+
+    @Override
+    public FileSystem getFs() {
+        return fs;
+    }
+
+    @Override
+    public URI getFsUri() {
+        return fsUri;
     }
 
-    @AfterAll
-    public static void finishFs() {
+    @Override
+    public void close() throws IOException {
+        fs.close();
         cluster.shutdown(true);
     }
 }
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/AbstractLocalFsConfig.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/AbstractLocalFsConfig.java
new file mode 100644
index 0000000..dab5736
--- /dev/null
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/AbstractLocalFsConfig.java
@@ -0,0 +1,41 @@
+package com.github.mmolimar.kafka.connect.fs;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+
+import java.io.IOException;
+import java.net.URI;
+import java.nio.file.Files;
+
+public abstract class AbstractLocalFsConfig implements FsTestConfig {
+    private java.nio.file.Path localDir;
+    private FileSystem fs;
+    private URI fsUri;
+
+    @Override
+    public final void initFs() throws IOException {
+        localDir = Files.createTempDirectory("test-");
+        fsUri = localDir.toUri();
+        fs = FileSystem.newInstance(fsUri, new Configuration());
+        init();
+    }
+
+    protected abstract void init() throws IOException;
+
+    @Override
+    public FileSystem getFs() {
+        return fs;
+    }
+
+    @Override
+    public URI getFsUri() {
+        return fsUri;
+    }
+
+    @Override
+    public void close() throws IOException {
+        fs.close();
+        FileUtils.deleteDirectory(localDir.toFile());
+    }
+}
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/FsTestConfig.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/FsTestConfig.java
new file mode 100644
index 0000000..64b9c4c
--- /dev/null
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/FsTestConfig.java
@@ -0,0 +1,17 @@
+package com.github.mmolimar.kafka.connect.fs;
+
+import org.apache.hadoop.fs.FileSystem;
+
+import java.io.Closeable;
+import java.io.IOException;
+import java.net.URI;
+
+public interface FsTestConfig extends Closeable {
+
+    void initFs() throws IOException;
+
+    FileSystem getFs();
+
+    URI getFsUri();
+
+}
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/AvroFileReaderTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/AvroFileReaderTest.java
index 75fc8a2..841c951 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/AvroFileReaderTest.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/AvroFileReaderTest.java
@@ -41,7 +41,7 @@ public static void setUp() throws IOException {
     }
 
     @Override
-    protected Path createDataFile(FileSystemConfig fsConfig, Object... args) throws IOException {
+    protected Path createDataFile(ReaderFsTestConfig fsConfig, Object... args) throws IOException {
         File avroFile = File.createTempFile("test-", "." + getFileExtension());
         DatumWriter<GenericRecord> writer = new GenericDatumWriter<>(schema);
         try (DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(writer)) {
@@ -69,7 +69,7 @@ protected Path createDataFile(FileSystemConfig fsConfig, Object... args) throws
 
     @ParameterizedTest
     @MethodSource("fileSystemConfigProvider")
-    public void readerWithSchema(FileSystemConfig fsConfig) throws Throwable {
+    public void readerWithSchema(ReaderFsTestConfig fsConfig) throws Throwable {
         Map<String, Object> readerConfig = getReaderConfig();
         readerConfig.put(AvroFileReader.FILE_READER_AVRO_SCHEMA, schema.toString());
         FileSystem testFs = FileSystem.newInstance(fsConfig.getFsUri(), new Configuration());
@@ -79,7 +79,7 @@ public void readerWithSchema(FileSystemConfig fsConfig) throws Throwable {
 
     @ParameterizedTest
     @MethodSource("fileSystemConfigProvider")
-    public void readerWithInvalidSchema(FileSystemConfig fsConfig) throws Throwable {
+    public void readerWithInvalidSchema(ReaderFsTestConfig fsConfig) throws Throwable {
         Map<String, Object> readerConfig = getReaderConfig();
         readerConfig.put(AvroFileReader.FILE_READER_AVRO_SCHEMA, Schema.create(Schema.Type.STRING).toString());
         FileSystem testFs = FileSystem.newInstance(fsConfig.getFsUri(), new Configuration());
@@ -96,7 +96,7 @@ public void readerWithInvalidSchema(FileSystemConfig fsConfig) throws Throwable
 
     @ParameterizedTest
     @MethodSource("fileSystemConfigProvider")
-    public void readerWithUnparseableSchema(FileSystemConfig fsConfig) throws IOException {
+    public void readerWithUnparseableSchema(ReaderFsTestConfig fsConfig) throws IOException {
         Map<String, Object> readerConfig = getReaderConfig();
         readerConfig.put(AvroFileReader.FILE_READER_AVRO_SCHEMA, "invalid schema");
         FileSystem testFs = FileSystem.newInstance(fsConfig.getFsUri(), new Configuration());
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/CsvFileReaderTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/CsvFileReaderTest.java
index f4a0809..a6b9fbf 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/CsvFileReaderTest.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/CsvFileReaderTest.java
@@ -19,7 +19,7 @@
 public class CsvFileReaderTest extends UnivocityFileReaderTest<CsvFileReader> {
 
     @Override
-    protected Path createDataFile(FileSystemConfig fsConfig, Object... args) throws IOException {
+    protected Path createDataFile(ReaderFsTestConfig fsConfig, Object... args) throws IOException {
         boolean header = args.length < 1 || (boolean) args[0];
         CompressionType compression = args.length < 2 ? COMPRESSION_TYPE_DEFAULT : (CompressionType) args[1];
         File txtFile = File.createTempFile("test-", "." + getFileExtension());
@@ -40,7 +40,7 @@ protected Path createDataFile(FileSystemConfig fsConfig, Object... args) throws
 
     @ParameterizedTest
     @MethodSource("fileSystemConfigProvider")
-    public void readAllDataWithMalformedRows(FileSystemConfig fsConfig) throws Throwable {
+    public void readAllDataWithMalformedRows(ReaderFsTestConfig fsConfig) throws Throwable {
         File tmp = File.createTempFile("test-", "." + getFileExtension());
         try (FileWriter writer = new FileWriter(tmp)) {
             writer.append(FIELD_COLUMN1 + "," + FIELD_COLUMN2 + "," + FIELD_COLUMN3 + "," + FIELD_COLUMN4 + "\n");
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/FileReaderTestBase.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/FileReaderTestBase.java
index 4e0d38b..e691d87 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/FileReaderTestBase.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/FileReaderTestBase.java
@@ -23,7 +23,7 @@
 
 abstract class FileReaderTestBase {
 
-    private static final List<FileSystemConfig> TEST_FILE_SYSTEMS = Arrays.asList(
+    private static final List<ReaderFsTestConfig> TEST_FILE_SYSTEMS = Arrays.asList(
             new LocalFsConfig(),
             new HdfsFsConfig()
     );
@@ -31,21 +31,21 @@ abstract class FileReaderTestBase {
 
     @BeforeAll
     public static void initFs() throws IOException {
-        for (FileSystemConfig fsConfig : TEST_FILE_SYSTEMS) {
+        for (ReaderFsTestConfig fsConfig : TEST_FILE_SYSTEMS) {
             fsConfig.initFs();
         }
     }
 
     @AfterAll
     public static void finishFs() throws IOException {
-        for (FileSystemConfig fsConfig : TEST_FILE_SYSTEMS) {
+        for (ReaderFsTestConfig fsConfig : TEST_FILE_SYSTEMS) {
             fsConfig.close();
         }
     }
 
     @BeforeEach
     public void openReader() throws Throwable {
-        for (FileSystemConfig fsConfig : TEST_FILE_SYSTEMS) {
+        for (ReaderFsTestConfig fsConfig : TEST_FILE_SYSTEMS) {
             fsConfig.setDataFile(createDataFile(fsConfig));
             FileReader reader = ReflectionUtils.makeReader(getReaderClass(), fsConfig.getFs(),
                     fsConfig.getDataFile(), getReaderConfig());
@@ -56,7 +56,7 @@ public void openReader() throws Throwable {
 
     @AfterEach
     public void closeReader() {
-        for (FileSystemConfig fsConfig : TEST_FILE_SYSTEMS) {
+        for (ReaderFsTestConfig fsConfig : TEST_FILE_SYSTEMS) {
             try {
                 fsConfig.getReader().close();
             } catch (Exception e) {
@@ -71,7 +71,7 @@ private static Stream<Arguments> fileSystemConfigProvider() {
 
     @ParameterizedTest
     @MethodSource("fileSystemConfigProvider")
-    public void invalidArgs(FileSystemConfig fsConfig) {
+    public void invalidArgs(ReaderFsTestConfig fsConfig) {
         try {
             fsConfig.getReader().getClass().getConstructor(FileSystem.class, Path.class, Map.class)
                     .newInstance(null, null, null);
@@ -84,14 +84,14 @@ public void invalidArgs(FileSystemConfig fsConfig) {
 
     @ParameterizedTest
     @MethodSource("fileSystemConfigProvider")
-    public void fileDoesNotExist(FileSystemConfig fsConfig) {
+    public void fileDoesNotExist(ReaderFsTestConfig fsConfig) {
         Path path = new Path(new Path(fsConfig.getFsUri()), UUID.randomUUID().toString());
         assertThrows(FileNotFoundException.class, () -> getReader(fsConfig.getFs(), path, getReaderConfig()));
     }
 
     @ParameterizedTest
     @MethodSource("fileSystemConfigProvider")
-    public void emptyFile(FileSystemConfig fsConfig) throws Throwable {
+    public void emptyFile(ReaderFsTestConfig fsConfig) throws Throwable {
         File tmp = File.createTempFile("test-", "." + getFileExtension());
         Path path = new Path(new Path(fsConfig.getFsUri()), tmp.getName());
         fsConfig.getFs().moveFromLocalFile(new Path(tmp.getAbsolutePath()), path);
@@ -100,7 +100,7 @@ public void emptyFile(FileSystemConfig fsConfig) throws Throwable {
 
     @ParameterizedTest
     @MethodSource("fileSystemConfigProvider")
-    public void invalidFileFormat(FileSystemConfig fsConfig) throws Throwable {
+    public void invalidFileFormat(ReaderFsTestConfig fsConfig) throws Throwable {
         File tmp = File.createTempFile("test-", "." + getFileExtension());
         try (BufferedWriter writer = new BufferedWriter(new FileWriter(tmp))) {
             writer.write("test");
@@ -112,7 +112,7 @@ public void invalidFileFormat(FileSystemConfig fsConfig) throws Throwable {
 
     @ParameterizedTest
     @MethodSource("fileSystemConfigProvider")
-    public void readAllData(FileSystemConfig fsConfig) {
+    public void readAllData(ReaderFsTestConfig fsConfig) {
         FileReader reader = fsConfig.getReader();
         assertTrue(reader.hasNext());
 
@@ -127,7 +127,7 @@ public void readAllData(FileSystemConfig fsConfig) {
 
     @ParameterizedTest
     @MethodSource("fileSystemConfigProvider")
-    public void seekFile(FileSystemConfig fsConfig) {
+    public void seekFile(ReaderFsTestConfig fsConfig) {
         FileReader reader = fsConfig.getReader();
         int recordIndex = NUM_RECORDS / 2;
         reader.seek(getOffset(fsConfig.offsetsByIndex().get(recordIndex)));
@@ -153,14 +153,14 @@ public void seekFile(FileSystemConfig fsConfig) {
 
     @ParameterizedTest
     @MethodSource("fileSystemConfigProvider")
-    public void negativeSeek(FileSystemConfig fsConfig) {
+    public void negativeSeek(ReaderFsTestConfig fsConfig) {
         FileReader reader = fsConfig.getReader();
         assertThrows(RuntimeException.class, () -> reader.seek(getOffset(-1)));
     }
 
     @ParameterizedTest
     @MethodSource("fileSystemConfigProvider")
-    public void exceededSeek(FileSystemConfig fsConfig) {
+    public void exceededSeek(ReaderFsTestConfig fsConfig) {
         FileReader reader = fsConfig.getReader();
         reader.seek(getOffset(fsConfig.offsetsByIndex().get(NUM_RECORDS - 1) + 1));
         assertFalse(reader.hasNext());
@@ -169,7 +169,7 @@ public void exceededSeek(FileSystemConfig fsConfig) {
 
     @ParameterizedTest
     @MethodSource("fileSystemConfigProvider")
-    public void readFileAlreadyClosed(FileSystemConfig fsConfig) throws IOException {
+    public void readFileAlreadyClosed(ReaderFsTestConfig fsConfig) throws IOException {
         FileReader reader = fsConfig.getReader();
         reader.close();
         assertThrows(IllegalStateException.class, reader::hasNext);
@@ -202,7 +202,7 @@ protected OutputStream getOutputStream(File file, CompressionType compression) t
 
     protected abstract Class<? extends FileReader> getReaderClass();
 
-    protected abstract Path createDataFile(FileSystemConfig fsConfig, Object... args) throws IOException;
+    protected abstract Path createDataFile(ReaderFsTestConfig fsConfig, Object... args) throws IOException;
 
     protected abstract Map<String, Object> getReaderConfig();
 
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/FileSystemConfig.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/FileSystemConfig.java
deleted file mode 100644
index a838251..0000000
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/FileSystemConfig.java
+++ /dev/null
@@ -1,153 +0,0 @@
-package com.github.mmolimar.kafka.connect.fs.file.reader;
-
-import org.apache.commons.io.FileUtils;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hdfs.MiniDFSCluster;
-
-import java.io.Closeable;
-import java.io.IOException;
-import java.net.URI;
-import java.nio.file.Files;
-import java.util.HashMap;
-import java.util.Map;
-
-interface FileSystemConfig extends Closeable {
-
-    void initFs() throws IOException;
-
-    FileSystem getFs();
-
-    URI getFsUri();
-
-    void setDataFile(Path dataFile);
-
-    Path getDataFile();
-
-    void setReader(FileReader reader);
-
-    FileReader getReader();
-
-    Map<Integer, Long> offsetsByIndex();
-
-}
-
-class LocalFsConfig implements FileSystemConfig {
-    private java.nio.file.Path localDir;
-    private FileSystem fs;
-    private URI fsUri;
-    private Path dataFile;
-    private FileReader reader;
-    private Map<Integer, Long> offsetsByIndex;
-
-    @Override
-    public void initFs() throws IOException {
-        localDir = Files.createTempDirectory("test-");
-        fsUri = localDir.toUri();
-        fs = FileSystem.newInstance(fsUri, new Configuration());
-        offsetsByIndex = new HashMap<>();
-    }
-
-    @Override
-    public FileSystem getFs() {
-        return fs;
-    }
-
-    @Override
-    public URI getFsUri() {
-        return fsUri;
-    }
-
-    @Override
-    public void setDataFile(Path dataFile) {
-        this.dataFile = dataFile;
-    }
-
-    @Override
-    public Path getDataFile() {
-        return dataFile;
-    }
-
-    @Override
-    public void setReader(FileReader reader) {
-        this.reader = reader;
-    }
-
-    @Override
-    public FileReader getReader() {
-        return reader;
-    }
-
-    @Override
-    public Map<Integer, Long> offsetsByIndex() {
-        return offsetsByIndex;
-    }
-
-    @Override
-    public void close() throws IOException {
-        fs.close();
-        FileUtils.deleteDirectory(localDir.toFile());
-    }
-}
-
-class HdfsFsConfig implements FileSystemConfig {
-    private MiniDFSCluster cluster;
-    private FileSystem fs;
-    private URI fsUri;
-    private Path dataFile;
-    private FileReader reader;
-    private Map<Integer, Long> offsetsByIndex;
-
-    @Override
-    public void initFs() throws IOException {
-        Configuration clusterConfig = new Configuration();
-        java.nio.file.Path hdfsDir = Files.createTempDirectory("test-");
-        clusterConfig.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, hdfsDir.toAbsolutePath().toString());
-        cluster = new MiniDFSCluster.Builder(clusterConfig).build();
-        fsUri = URI.create("hdfs://localhost:" + cluster.getNameNodePort() + "/");
-        fs = FileSystem.newInstance(fsUri, new Configuration());
-        offsetsByIndex = new HashMap<>();
-    }
-
-    @Override
-    public FileSystem getFs() {
-        return fs;
-    }
-
-    @Override
-    public URI getFsUri() {
-        return fsUri;
-    }
-
-    @Override
-    public Path getDataFile() {
-        return dataFile;
-    }
-
-    @Override
-    public void setDataFile(Path dataFile) {
-        this.dataFile = dataFile;
-    }
-
-    @Override
-    public void setReader(FileReader reader) {
-        this.reader = reader;
-    }
-
-    @Override
-    public FileReader getReader() {
-        return reader;
-    }
-
-    @Override
-    public Map<Integer, Long> offsetsByIndex() {
-        return offsetsByIndex;
-    }
-
-    @Override
-    public void close() throws IOException {
-        fs.close();
-        cluster.shutdown(true);
-    }
-}
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/JsonFileReaderTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/JsonFileReaderTest.java
index da8994b..1df1cd0 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/JsonFileReaderTest.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/JsonFileReaderTest.java
@@ -38,7 +38,7 @@ public class JsonFileReaderTest extends FileReaderTestBase {
     private static final CompressionType COMPRESSION_TYPE_DEFAULT = CompressionType.NONE;
 
     @Override
-    protected Path createDataFile(FileSystemConfig fsConfig, Object... args) throws IOException {
+    protected Path createDataFile(ReaderFsTestConfig fsConfig, Object... args) throws IOException {
         int numRecords = args.length < 1 ? NUM_RECORDS : (int) args[0];
         boolean recordPerLine = args.length < 2 || (boolean) args[1];
         CompressionType compression = args.length < 3 ? COMPRESSION_TYPE_DEFAULT : (CompressionType) args[2];
@@ -78,7 +78,7 @@ protected Path createDataFile(FileSystemConfig fsConfig, Object... args) throws
 
     @ParameterizedTest
     @MethodSource("fileSystemConfigProvider")
-    public void emptyFile(FileSystemConfig fsConfig) throws Throwable {
+    public void emptyFile(ReaderFsTestConfig fsConfig) throws Throwable {
         File tmp = File.createTempFile("test-", "." + getFileExtension());
         Path path = new Path(new Path(fsConfig.getFsUri()), tmp.getName());
         fsConfig.getFs().moveFromLocalFile(new Path(tmp.getAbsolutePath()), path);
@@ -88,7 +88,7 @@ public void emptyFile(FileSystemConfig fsConfig) throws Throwable {
 
     @ParameterizedTest
     @MethodSource("fileSystemConfigProvider")
-    public void validFileEncoding(FileSystemConfig fsConfig) throws Throwable {
+    public void validFileEncoding(ReaderFsTestConfig fsConfig) throws Throwable {
         Map<String, Object> readerConfig = getReaderConfig();
         readerConfig.put(JsonFileReader.FILE_READER_JSON_ENCODING, "Cp1252");
         fsConfig.setReader(getReader(fsConfig.getFs(), fsConfig.getDataFile(), readerConfig));
@@ -97,7 +97,7 @@ public void validFileEncoding(FileSystemConfig fsConfig) throws Throwable {
 
     @ParameterizedTest
     @MethodSource("fileSystemConfigProvider")
-    public void invalidDeserializationConfig(FileSystemConfig fsConfig) throws Throwable {
+    public void invalidDeserializationConfig(ReaderFsTestConfig fsConfig) throws Throwable {
         Map<String, Object> readerConfig = getReaderConfig();
         readerConfig.put(JsonFileReader.FILE_READER_JSON_DESERIALIZATION_CONFIGS + "invalid", "false");
         fsConfig.setReader(getReader(fsConfig.getFs(), fsConfig.getDataFile(), readerConfig));
@@ -106,7 +106,7 @@ public void invalidDeserializationConfig(FileSystemConfig fsConfig) throws Throw
 
     @ParameterizedTest
     @MethodSource("fileSystemConfigProvider")
-    public void invalidFileEncoding(FileSystemConfig fsConfig) {
+    public void invalidFileEncoding(ReaderFsTestConfig fsConfig) {
         Map<String, Object> readerConfig = getReaderConfig();
         readerConfig.put(JsonFileReader.FILE_READER_JSON_ENCODING, "invalid_charset");
         assertThrows(UnsupportedCharsetException.class, () -> getReader(fsConfig.getFs(),
@@ -115,7 +115,7 @@ public void invalidFileEncoding(FileSystemConfig fsConfig) {
 
     @ParameterizedTest
     @MethodSource("fileSystemConfigProvider")
-    public void readDataWithRecordPerLineDisabled(FileSystemConfig fsConfig) throws Throwable {
+    public void readDataWithRecordPerLineDisabled(ReaderFsTestConfig fsConfig) throws Throwable {
         Path file = createDataFile(fsConfig, 1, false);
         Map<String, Object> readerConfig = getReaderConfig();
         readerConfig.put(JsonFileReader.FILE_READER_JSON_RECORD_PER_LINE, "false");
@@ -135,7 +135,7 @@ public void readDataWithRecordPerLineDisabled(FileSystemConfig fsConfig) throws
 
     @ParameterizedTest
     @MethodSource("fileSystemConfigProvider")
-    public void readDifferentCompressionTypes(FileSystemConfig fsConfig) {
+    public void readDifferentCompressionTypes(ReaderFsTestConfig fsConfig) {
         Arrays.stream(CompressionType.values()).forEach(compressionType -> {
             try {
                 Path file = createDataFile(fsConfig, NUM_RECORDS, true, compressionType);
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/ParquetFileReaderTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/ParquetFileReaderTest.java
index ae21b88..891eeec 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/ParquetFileReaderTest.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/ParquetFileReaderTest.java
@@ -50,7 +50,7 @@ public static void setUp() throws IOException {
     }
 
     @Override
-    protected Path createDataFile(FileSystemConfig fsConfig, Object... args) throws IOException {
+    protected Path createDataFile(ReaderFsTestConfig fsConfig, Object... args) throws IOException {
         FileSystem fs = fsConfig.getFs();
         File parquetFile = File.createTempFile("test-", "." + getFileExtension());
 
@@ -77,7 +77,7 @@ protected Path createDataFile(FileSystemConfig fsConfig, Object... args) throws
 
     @ParameterizedTest
     @MethodSource("fileSystemConfigProvider")
-    public void emptyFile(FileSystemConfig fsConfig) throws Throwable {
+    public void emptyFile(ReaderFsTestConfig fsConfig) throws Throwable {
         File tmp = File.createTempFile("test-", "." + getFileExtension());
         Path path = new Path(new Path(fsConfig.getFsUri()), tmp.getName());
         fsConfig.getFs().moveFromLocalFile(new Path(tmp.getAbsolutePath()), path);
@@ -86,7 +86,7 @@ public void emptyFile(FileSystemConfig fsConfig) throws Throwable {
 
     @ParameterizedTest
     @MethodSource("fileSystemConfigProvider")
-    public void invalidFileFormat(FileSystemConfig fsConfig) throws Throwable {
+    public void invalidFileFormat(ReaderFsTestConfig fsConfig) throws Throwable {
         File tmp = File.createTempFile("test-", "." + getFileExtension());
         try (BufferedWriter writer = new BufferedWriter(new FileWriter(tmp))) {
             writer.write("test");
@@ -98,7 +98,7 @@ public void invalidFileFormat(FileSystemConfig fsConfig) throws Throwable {
 
     @ParameterizedTest
     @MethodSource("fileSystemConfigProvider")
-    public void readerWithSchema(FileSystemConfig fsConfig) throws Throwable {
+    public void readerWithSchema(ReaderFsTestConfig fsConfig) throws Throwable {
         Map<String, Object> readerConfig = getReaderConfig();
         readerConfig.put(ParquetFileReader.FILE_READER_PARQUET_SCHEMA, readerSchema.toString());
         readerConfig.put(AgnosticFileReader.FILE_READER_AGNOSTIC_EXTENSIONS_PARQUET, getFileExtension());
@@ -109,7 +109,7 @@ public void readerWithSchema(FileSystemConfig fsConfig) throws Throwable {
 
     @ParameterizedTest
     @MethodSource("fileSystemConfigProvider")
-    public void readerWithProjection(FileSystemConfig fsConfig) throws Throwable {
+    public void readerWithProjection(ReaderFsTestConfig fsConfig) throws Throwable {
         Map<String, Object> readerConfig = getReaderConfig();
         readerConfig.put(ParquetFileReader.FILE_READER_PARQUET_PROJECTION, projectionSchema.toString());
         readerConfig.put(AgnosticFileReader.FILE_READER_AGNOSTIC_EXTENSIONS_PARQUET, getFileExtension());
@@ -127,7 +127,7 @@ public void readerWithProjection(FileSystemConfig fsConfig) throws Throwable {
 
     @ParameterizedTest
     @MethodSource("fileSystemConfigProvider")
-    public void readerWithInvalidProjection(FileSystemConfig fsConfig) throws Throwable {
+    public void readerWithInvalidProjection(ReaderFsTestConfig fsConfig) throws Throwable {
         Schema testSchema = SchemaBuilder.record("test_projection").namespace("test.avro")
                 .fields()
                 .name("field1").type("string").noDefault()
@@ -142,7 +142,7 @@ public void readerWithInvalidProjection(FileSystemConfig fsConfig) throws Throwa
 
     @ParameterizedTest
     @MethodSource("fileSystemConfigProvider")
-    public void readerWithInvalidSchema(FileSystemConfig fsConfig) throws Throwable {
+    public void readerWithInvalidSchema(ReaderFsTestConfig fsConfig) throws Throwable {
         Map<String, Object> readerConfig = getReaderConfig();
         readerConfig.put(ParquetFileReader.FILE_READER_PARQUET_SCHEMA, Schema.create(Schema.Type.STRING).toString());
         readerConfig.put(AgnosticFileReader.FILE_READER_AGNOSTIC_EXTENSIONS_PARQUET, getFileExtension());
@@ -153,7 +153,7 @@ public void readerWithInvalidSchema(FileSystemConfig fsConfig) throws Throwable
 
     @ParameterizedTest
     @MethodSource("fileSystemConfigProvider")
-    public void readerWithUnparseableSchema(FileSystemConfig fsConfig) {
+    public void readerWithUnparseableSchema(ReaderFsTestConfig fsConfig) {
         Map<String, Object> readerConfig = getReaderConfig();
         readerConfig.put(ParquetFileReader.FILE_READER_PARQUET_SCHEMA, "invalid schema");
         readerConfig.put(AgnosticFileReader.FILE_READER_AGNOSTIC_EXTENSIONS_PARQUET, getFileExtension());
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/ReaderFsTestConfig.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/ReaderFsTestConfig.java
new file mode 100644
index 0000000..7fde007
--- /dev/null
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/ReaderFsTestConfig.java
@@ -0,0 +1,97 @@
+package com.github.mmolimar.kafka.connect.fs.file.reader;
+
+import com.github.mmolimar.kafka.connect.fs.AbstractHdfsFsConfig;
+import com.github.mmolimar.kafka.connect.fs.AbstractLocalFsConfig;
+import com.github.mmolimar.kafka.connect.fs.FsTestConfig;
+import org.apache.hadoop.fs.Path;
+
+import java.util.HashMap;
+import java.util.Map;
+
+interface ReaderFsTestConfig extends FsTestConfig {
+
+    void setDataFile(Path dataFile);
+
+    Path getDataFile();
+
+    void setReader(FileReader reader);
+
+    FileReader getReader();
+
+    Map<Integer, Long> offsetsByIndex();
+
+}
+
+class LocalFsConfig extends AbstractLocalFsConfig implements ReaderFsTestConfig {
+    private Path dataFile;
+    private FileReader reader;
+    private Map<Integer, Long> offsetsByIndex;
+
+    @Override
+    public void init() {
+        offsetsByIndex = new HashMap<>();
+    }
+
+    @Override
+    public void setDataFile(Path dataFile) {
+        this.dataFile = dataFile;
+    }
+
+    @Override
+    public Path getDataFile() {
+        return dataFile;
+    }
+
+    @Override
+    public void setReader(FileReader reader) {
+        this.reader = reader;
+    }
+
+    @Override
+    public FileReader getReader() {
+        return reader;
+    }
+
+    @Override
+    public Map<Integer, Long> offsetsByIndex() {
+        return offsetsByIndex;
+    }
+
+}
+
+class HdfsFsConfig extends AbstractHdfsFsConfig implements ReaderFsTestConfig {
+    private Path dataFile;
+    private FileReader reader;
+    private Map<Integer, Long> offsetsByIndex;
+
+    @Override
+    public void init() {
+        offsetsByIndex = new HashMap<>();
+    }
+
+    @Override
+    public Path getDataFile() {
+        return dataFile;
+    }
+
+    @Override
+    public void setDataFile(Path dataFile) {
+        this.dataFile = dataFile;
+    }
+
+    @Override
+    public void setReader(FileReader reader) {
+        this.reader = reader;
+    }
+
+    @Override
+    public FileReader getReader() {
+        return reader;
+    }
+
+    @Override
+    public Map<Integer, Long> offsetsByIndex() {
+        return offsetsByIndex;
+    }
+
+}
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/SequenceFileReaderTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/SequenceFileReaderTest.java
index ae87901..ee5bdd8 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/SequenceFileReaderTest.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/SequenceFileReaderTest.java
@@ -28,7 +28,7 @@ public class SequenceFileReaderTest extends FileReaderTestBase {
     private static final String FILE_EXTENSION = "sq";
 
     @Override
-    protected Path createDataFile(FileSystemConfig fsConfig, Object... args) throws IOException {
+    protected Path createDataFile(ReaderFsTestConfig fsConfig, Object... args) throws IOException {
         FileSystem fs = fsConfig.getFs();
         File seqFile = File.createTempFile("test-", "." + getFileExtension());
         try (SequenceFile.Writer writer = SequenceFile.createWriter(fs.getConf(),
@@ -63,7 +63,7 @@ protected Path createDataFile(FileSystemConfig fsConfig, Object... args) throws
 
     @ParameterizedTest
     @MethodSource("fileSystemConfigProvider")
-    public void defaultFieldNames(FileSystemConfig fsConfig) throws Throwable {
+    public void defaultFieldNames(ReaderFsTestConfig fsConfig) throws Throwable {
         Map<String, Object> readerConfig = getReaderConfig();
         readerConfig.put(SequenceFileReader.FILE_READER_SEQUENCE_FIELD_NAME_KEY, null);
         readerConfig.put(SequenceFileReader.FILE_READER_SEQUENCE_FIELD_NAME_VALUE, null);
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/TextFileReaderTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/TextFileReaderTest.java
index 53ac900..281bb24 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/TextFileReaderTest.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/TextFileReaderTest.java
@@ -25,7 +25,7 @@ public class TextFileReaderTest extends FileReaderTestBase {
     private static final CompressionType COMPRESSION_TYPE_DEFAULT = CompressionType.GZIP;
 
     @Override
-    protected Path createDataFile(FileSystemConfig fsConfig, Object... args) throws IOException {
+    protected Path createDataFile(ReaderFsTestConfig fsConfig, Object... args) throws IOException {
         CompressionType compression = args.length < 1 ? COMPRESSION_TYPE_DEFAULT : (CompressionType) args[0];
         File txtFile = File.createTempFile("test-", "." + FILE_EXTENSION);
         try (PrintWriter writer = new PrintWriter(getOutputStream(txtFile, compression))) {
@@ -42,7 +42,7 @@ protected Path createDataFile(FileSystemConfig fsConfig, Object... args) throws
 
     @ParameterizedTest
     @MethodSource("fileSystemConfigProvider")
-    public void validFileEncoding(FileSystemConfig fsConfig) throws Throwable {
+    public void validFileEncoding(ReaderFsTestConfig fsConfig) throws Throwable {
         Map<String, Object> readerConfig = getReaderConfig();
         readerConfig.put(TextFileReader.FILE_READER_TEXT_FIELD_NAME_VALUE, FIELD_NAME_VALUE);
         readerConfig.put(TextFileReader.FILE_READER_TEXT_ENCODING, "Cp1252");
@@ -54,7 +54,7 @@ public void validFileEncoding(FileSystemConfig fsConfig) throws Throwable {
 
     @ParameterizedTest
     @MethodSource("fileSystemConfigProvider")
-    public void invalidFileEncoding(FileSystemConfig fsConfig) {
+    public void invalidFileEncoding(ReaderFsTestConfig fsConfig) {
         Map<String, Object> readerConfig = getReaderConfig();
         readerConfig.put(TextFileReader.FILE_READER_TEXT_FIELD_NAME_VALUE, FIELD_NAME_VALUE);
         readerConfig.put(TextFileReader.FILE_READER_TEXT_ENCODING, "invalid_charset");
@@ -65,7 +65,7 @@ public void invalidFileEncoding(FileSystemConfig fsConfig) {
 
     @ParameterizedTest
     @MethodSource("fileSystemConfigProvider")
-    public void readDataWithRecordPerLineDisabled(FileSystemConfig fsConfig) throws Throwable {
+    public void readDataWithRecordPerLineDisabled(ReaderFsTestConfig fsConfig) throws Throwable {
         Path file = createDataFile(fsConfig, COMPRESSION_TYPE_DEFAULT);
         Map<String, Object> readerConfig = getReaderConfig();
         readerConfig.put(TextFileReader.FILE_READER_TEXT_FIELD_NAME_VALUE, FIELD_NAME_VALUE);
@@ -87,7 +87,7 @@ public void readDataWithRecordPerLineDisabled(FileSystemConfig fsConfig) throws
 
     @ParameterizedTest
     @MethodSource("fileSystemConfigProvider")
-    public void readDifferentCompressionTypes(FileSystemConfig fsConfig) {
+    public void readDifferentCompressionTypes(ReaderFsTestConfig fsConfig) {
         Arrays.stream(CompressionType.values()).forEach(compressionType -> {
             try {
                 Path file = createDataFile(fsConfig, compressionType);
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/TsvFileReaderTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/TsvFileReaderTest.java
index 20c0dc0..2f94f28 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/TsvFileReaderTest.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/TsvFileReaderTest.java
@@ -13,7 +13,7 @@
 public class TsvFileReaderTest extends UnivocityFileReaderTest<TsvFileReader> {
 
     @Override
-    protected Path createDataFile(FileSystemConfig fsConfig, Object... args) throws IOException {
+    protected Path createDataFile(ReaderFsTestConfig fsConfig, Object... args) throws IOException {
         boolean header = args.length < 1 || (boolean) args[0];
         CompressionType compression = args.length < 2 ? COMPRESSION_TYPE_DEFAULT : (CompressionType) args[1];
         File txtFile = File.createTempFile("test-", "." + getFileExtension());
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/UnivocityFileReaderTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/UnivocityFileReaderTest.java
index da18e0e..d224027 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/UnivocityFileReaderTest.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/UnivocityFileReaderTest.java
@@ -29,7 +29,7 @@ abstract class UnivocityFileReaderTest<T extends UnivocityFileReader> extends Fi
 
     @ParameterizedTest
     @MethodSource("fileSystemConfigProvider")
-    public void emptyFile(FileSystemConfig fsConfig) throws Throwable {
+    public void emptyFile(ReaderFsTestConfig fsConfig) throws Throwable {
         File tmp = File.createTempFile("test-", "." + getFileExtension());
         Path path = new Path(new Path(fsConfig.getFsUri()), tmp.getName());
         fsConfig.getFs().moveFromLocalFile(new Path(tmp.getAbsolutePath()), path);
@@ -38,7 +38,7 @@ public void emptyFile(FileSystemConfig fsConfig) throws Throwable {
 
     @ParameterizedTest
     @MethodSource("fileSystemConfigProvider")
-    public void invalidFileFormat(FileSystemConfig fsConfig) throws Throwable {
+    public void invalidFileFormat(ReaderFsTestConfig fsConfig) throws Throwable {
         File tmp = File.createTempFile("test-", "." + getFileExtension());
         try (BufferedWriter writer = new BufferedWriter(new FileWriter(tmp))) {
             writer.write("test");
@@ -50,7 +50,7 @@ public void invalidFileFormat(FileSystemConfig fsConfig) throws Throwable {
 
     @ParameterizedTest
     @MethodSource("fileSystemConfigProvider")
-    public void invaliConfigArgs(FileSystemConfig fsConfig) {
+    public void invaliConfigArgs(ReaderFsTestConfig fsConfig) {
         try {
             getReaderClass().getConstructor(FileSystem.class, Path.class, Map.class)
                     .newInstance(fsConfig.getFs(), fsConfig.getDataFile(), new HashMap<String, Object>());
@@ -63,7 +63,7 @@ public void invaliConfigArgs(FileSystemConfig fsConfig) {
 
     @ParameterizedTest
     @MethodSource("fileSystemConfigProvider")
-    public void readAllDataWithoutHeader(FileSystemConfig fsConfig) throws Throwable {
+    public void readAllDataWithoutHeader(ReaderFsTestConfig fsConfig) throws Throwable {
         Path file = createDataFile(fsConfig, false);
         Map<String, Object> readerConfig = getReaderConfig();
         readerConfig.put(T.FILE_READER_DELIMITED_SETTINGS_HEADER, "false");
@@ -82,7 +82,7 @@ public void readAllDataWithoutHeader(FileSystemConfig fsConfig) throws Throwable
 
     @ParameterizedTest
     @MethodSource("fileSystemConfigProvider")
-    public void readDifferentCompressionTypes(FileSystemConfig fsConfig) {
+    public void readDifferentCompressionTypes(ReaderFsTestConfig fsConfig) {
         Arrays.stream(CompressionType.values()).forEach(compressionType -> {
             try {
                 Path file = createDataFile(fsConfig, true, compressionType);
@@ -110,7 +110,7 @@ public void readDifferentCompressionTypes(FileSystemConfig fsConfig) {
 
     @ParameterizedTest
     @MethodSource("fileSystemConfigProvider")
-    public void seekFileWithoutHeader(FileSystemConfig fsConfig) throws Throwable {
+    public void seekFileWithoutHeader(ReaderFsTestConfig fsConfig) throws Throwable {
         Path file = createDataFile(fsConfig, false);
         Map<String, Object> readerConfig = getReaderConfig();
         readerConfig.put(T.FILE_READER_DELIMITED_SETTINGS_HEADER, "false");
@@ -142,7 +142,7 @@ public void seekFileWithoutHeader(FileSystemConfig fsConfig) throws Throwable {
 
     @ParameterizedTest
     @MethodSource("fileSystemConfigProvider")
-    public void validFileEncoding(FileSystemConfig fsConfig) throws Throwable {
+    public void validFileEncoding(ReaderFsTestConfig fsConfig) throws Throwable {
         Map<String, Object> readerConfig = getReaderConfig();
         readerConfig.put(T.FILE_READER_DELIMITED_SETTINGS_HEADER, "true");
         readerConfig.put(T.FILE_READER_DELIMITED_ENCODING, "Cp1252");
@@ -151,7 +151,7 @@ public void validFileEncoding(FileSystemConfig fsConfig) throws Throwable {
 
     @ParameterizedTest
     @MethodSource("fileSystemConfigProvider")
-    public void invalidFileEncoding(FileSystemConfig fsConfig) {
+    public void invalidFileEncoding(ReaderFsTestConfig fsConfig) {
         Map<String, Object> readerConfig = getReaderConfig();
         readerConfig.put(T.FILE_READER_DELIMITED_SETTINGS_HEADER, "true");
         readerConfig.put(T.FILE_READER_DELIMITED_ENCODING, "invalid_charset");
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/local/CronPolicyTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/CronPolicyTest.java
similarity index 53%
rename from src/test/java/com/github/mmolimar/kafka/connect/fs/policy/local/CronPolicyTest.java
rename to src/test/java/com/github/mmolimar/kafka/connect/fs/policy/CronPolicyTest.java
index f054371..5de4e95 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/local/CronPolicyTest.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/CronPolicyTest.java
@@ -1,37 +1,26 @@
-package com.github.mmolimar.kafka.connect.fs.policy.local;
+package com.github.mmolimar.kafka.connect.fs.policy;
 
 import com.github.mmolimar.kafka.connect.fs.FsSourceTaskConfig;
 import com.github.mmolimar.kafka.connect.fs.file.reader.TextFileReader;
-import com.github.mmolimar.kafka.connect.fs.policy.CronPolicy;
-import com.github.mmolimar.kafka.connect.fs.policy.Policy;
 import com.github.mmolimar.kafka.connect.fs.util.ReflectionUtils;
 import org.apache.hadoop.fs.Path;
 import org.apache.kafka.common.config.ConfigException;
 import org.apache.kafka.connect.errors.IllegalWorkerStateException;
-import org.junit.jupiter.api.BeforeAll;
-import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.MethodSource;
 
 import java.io.IOException;
 import java.time.LocalDateTime;
-import java.util.ArrayList;
 import java.util.HashMap;
+import java.util.List;
 import java.util.Map;
-import java.util.UUID;
 
 import static org.junit.jupiter.api.Assertions.*;
 
-public class CronPolicyTest extends LocalPolicyTestBase {
-
-    @BeforeAll
-    public static void setUp() throws IOException {
-        directories = new ArrayList<Path>() {{
-            add(new Path(fsUri.toString(), UUID.randomUUID().toString()));
-            add(new Path(fsUri.toString(), UUID.randomUUID().toString()));
-        }};
-        for (Path dir : directories) {
-            fs.mkdirs(dir);
-        }
+public class CronPolicyTest extends PolicyTestBase {
 
+    @Override
+    protected FsSourceTaskConfig buildSourceTaskConfig(List<Path> directories) {
         Map<String, String> cfg = new HashMap<String, String>() {{
             String[] uris = directories.stream().map(Path::toString)
                     .toArray(String[]::new);
@@ -45,40 +34,45 @@ public static void setUp() throws IOException {
             put(CronPolicy.CRON_POLICY_EXPRESSION, "0/2 * * * * ?");
             put(CronPolicy.CRON_POLICY_END_DATE, LocalDateTime.now().plusDays(1).toString());
         }};
-        taskConfig = new FsSourceTaskConfig(cfg);
+        return new FsSourceTaskConfig(cfg);
     }
 
-    @Test
+    @ParameterizedTest
+    @MethodSource("fileSystemConfigProvider")
     @Override
-    public void execPolicyAlreadyEnded() throws IOException {
-        policy.execute();
-        policy.interrupt();
-        assertTrue(policy.hasEnded());
-        assertThrows(IllegalWorkerStateException.class, () -> policy.execute());
+    public void execPolicyAlreadyEnded(PolicyFsTestConfig fsConfig) throws IOException {
+        fsConfig.getPolicy().execute();
+        fsConfig.getPolicy().interrupt();
+        assertTrue(fsConfig.getPolicy().hasEnded());
+        assertThrows(IllegalWorkerStateException.class, () -> fsConfig.getPolicy().execute());
     }
 
-    @Test
-    public void invalidCronExpression() {
-        Map<String, String> originals = taskConfig.originalsStrings();
+    @ParameterizedTest
+    @MethodSource("fileSystemConfigProvider")
+    public void invalidCronExpression(PolicyFsTestConfig fsConfig) {
+        Map<String, String> originals = fsConfig.getSourceTaskConfig().originalsStrings();
         originals.put(CronPolicy.CRON_POLICY_EXPRESSION, "invalid");
         FsSourceTaskConfig cfg = new FsSourceTaskConfig(originals);
         assertThrows(ConfigException.class, () -> ReflectionUtils.makePolicy(
-                (Class<? extends Policy>) taskConfig.getClass(FsSourceTaskConfig.POLICY_CLASS), cfg));
+                (Class<? extends Policy>) fsConfig.getSourceTaskConfig().getClass(FsSourceTaskConfig.POLICY_CLASS), cfg));
     }
 
-    @Test
-    public void invalidEndDate() {
-        Map<String, String> originals = taskConfig.originalsStrings();
+    @ParameterizedTest
+    @MethodSource("fileSystemConfigProvider")
+    public void invalidEndDate(PolicyFsTestConfig fsConfig) {
+        Map<String, String> originals = fsConfig.getSourceTaskConfig().originalsStrings();
         originals.put(CronPolicy.CRON_POLICY_END_DATE, "invalid");
         FsSourceTaskConfig cfg = new FsSourceTaskConfig(originals);
         assertThrows(ConfigException.class, () -> ReflectionUtils.makePolicy(
-                (Class<? extends Policy>) taskConfig.getClass(FsSourceTaskConfig.POLICY_CLASS), cfg));
+                (Class<? extends Policy>) fsConfig.getSourceTaskConfig().getClass(FsSourceTaskConfig.POLICY_CLASS), cfg));
     }
 
-    @Test
-    public void canBeInterrupted() throws Throwable {
-        policy = ReflectionUtils.makePolicy(
-                (Class<? extends Policy>) taskConfig.getClass(FsSourceTaskConfig.POLICY_CLASS), taskConfig);
+    @ParameterizedTest
+    @MethodSource("fileSystemConfigProvider")
+    public void canBeInterrupted(PolicyFsTestConfig fsConfig) throws Throwable {
+        Policy policy = ReflectionUtils.makePolicy(
+                (Class<? extends Policy>) fsConfig.getSourceTaskConfig().getClass(FsSourceTaskConfig.POLICY_CLASS),
+                fsConfig.getSourceTaskConfig());
 
         for (int i = 0; i < 5; i++) {
             assertFalse(policy.hasEnded());
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/HdfsFileWatcherPolicyTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/HdfsFileWatcherPolicyTest.java
new file mode 100644
index 0000000..ddf69b7
--- /dev/null
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/HdfsFileWatcherPolicyTest.java
@@ -0,0 +1,79 @@
+package com.github.mmolimar.kafka.connect.fs.policy;
+
+import com.github.mmolimar.kafka.connect.fs.FsSourceTaskConfig;
+import com.github.mmolimar.kafka.connect.fs.file.reader.TextFileReader;
+import org.apache.hadoop.fs.Path;
+import org.apache.kafka.connect.errors.IllegalWorkerStateException;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.MethodSource;
+
+import java.io.IOException;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import static org.junit.jupiter.api.Assertions.*;
+
+public class HdfsFileWatcherPolicyTest extends PolicyTestBase {
+
+    static {
+        TEST_FILE_SYSTEMS = Collections.singletonList(
+                new HdfsFsConfig()
+        );
+    }
+
+    @BeforeAll
+    public static void initFs() throws IOException {
+        for (PolicyFsTestConfig fsConfig : TEST_FILE_SYSTEMS) {
+            fsConfig.initFs();
+        }
+    }
+
+    @Override
+    protected FsSourceTaskConfig buildSourceTaskConfig(List<Path> directories) {
+        Map<String, String> cfg = new HashMap<String, String>() {{
+            String[] uris = directories.stream().map(Path::toString)
+                    .toArray(String[]::new);
+            put(FsSourceTaskConfig.FS_URIS, String.join(",", uris));
+            put(FsSourceTaskConfig.TOPIC, "topic_test");
+            put(FsSourceTaskConfig.POLICY_CLASS, HdfsFileWatcherPolicy.class.getName());
+            put(FsSourceTaskConfig.FILE_READER_CLASS, TextFileReader.class.getName());
+            put(FsSourceTaskConfig.POLICY_REGEXP, "^[0-9]*\\.txt$");
+            put(FsSourceTaskConfig.POLICY_PREFIX_FS + "dfs.data.dir", "test");
+            put(FsSourceTaskConfig.POLICY_PREFIX_FS + "fs.default.name", "hdfs://test");
+        }};
+        return new FsSourceTaskConfig(cfg);
+    }
+
+    //This policy does not throw any exception. Just stop watching those nonexistent dirs
+    @ParameterizedTest
+    @MethodSource("fileSystemConfigProvider")
+    @Override
+    public void invalidDirectory(PolicyFsTestConfig fsConfig) throws IOException {
+        for (Path dir : fsConfig.getDirectories()) {
+            fsConfig.getFs().delete(dir, true);
+        }
+        try {
+            fsConfig.getPolicy().execute();
+        } finally {
+            for (Path dir : fsConfig.getDirectories()) {
+                fsConfig.getFs().mkdirs(dir);
+            }
+        }
+    }
+
+    //This policy never ends. We have to interrupt it
+    @ParameterizedTest
+    @MethodSource("fileSystemConfigProvider")
+    @Override
+    public void execPolicyAlreadyEnded(PolicyFsTestConfig fsConfig) throws IOException {
+        fsConfig.getPolicy().execute();
+        assertFalse(fsConfig.getPolicy().hasEnded());
+        fsConfig.getPolicy().interrupt();
+        assertTrue(fsConfig.getPolicy().hasEnded());
+        assertThrows(IllegalWorkerStateException.class, () -> fsConfig.getPolicy().execute());
+    }
+
+}
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/PolicyFsTestConfig.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/PolicyFsTestConfig.java
new file mode 100644
index 0000000..60382c9
--- /dev/null
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/PolicyFsTestConfig.java
@@ -0,0 +1,112 @@
+package com.github.mmolimar.kafka.connect.fs.policy;
+
+import com.github.mmolimar.kafka.connect.fs.AbstractHdfsFsConfig;
+import com.github.mmolimar.kafka.connect.fs.AbstractLocalFsConfig;
+import com.github.mmolimar.kafka.connect.fs.FsSourceTaskConfig;
+import com.github.mmolimar.kafka.connect.fs.FsTestConfig;
+import org.apache.hadoop.fs.Path;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.UUID;
+
+interface PolicyFsTestConfig extends FsTestConfig {
+
+    Policy getPolicy();
+
+    void setPolicy(Policy policy);
+
+    FsSourceTaskConfig getSourceTaskConfig();
+
+    void setSourceTaskConfig(FsSourceTaskConfig sourceTaskConfig);
+
+    List<Path> getDirectories();
+
+}
+
+class LocalFsConfig extends AbstractLocalFsConfig implements PolicyFsTestConfig {
+    private Policy policy;
+    private FsSourceTaskConfig sourceTaskConfig;
+    private List<Path> directories;
+
+    @Override
+    public void init() throws IOException {
+        directories = new ArrayList<Path>() {{
+            add(new Path(getFsUri().toString(), UUID.randomUUID().toString()));
+            add(new Path(getFsUri().toString(), UUID.randomUUID().toString()));
+        }};
+        for (Path dir : directories) {
+            getFs().mkdirs(dir);
+        }
+    }
+
+    @Override
+    public Policy getPolicy() {
+        return policy;
+    }
+
+    @Override
+    public void setPolicy(Policy policy) {
+        this.policy = policy;
+    }
+
+    @Override
+    public FsSourceTaskConfig getSourceTaskConfig() {
+        return sourceTaskConfig;
+    }
+
+    @Override
+    public void setSourceTaskConfig(FsSourceTaskConfig sourceTaskConfig) {
+        this.sourceTaskConfig = sourceTaskConfig;
+    }
+
+    @Override
+    public List<Path> getDirectories() {
+        return directories;
+    }
+
+}
+
+class HdfsFsConfig extends AbstractHdfsFsConfig implements PolicyFsTestConfig {
+    private Policy policy;
+    private FsSourceTaskConfig sourceTaskConfig;
+    private List<Path> directories;
+
+    @Override
+    public void init() throws IOException {
+        directories = new ArrayList<Path>() {{
+            add(new Path(getFsUri().toString(), UUID.randomUUID().toString()));
+            add(new Path(getFsUri().toString(), UUID.randomUUID().toString()));
+        }};
+        for (Path dir : directories) {
+            getFs().mkdirs(dir);
+        }
+    }
+
+    @Override
+    public Policy getPolicy() {
+        return policy;
+    }
+
+    @Override
+    public void setPolicy(Policy policy) {
+        this.policy = policy;
+    }
+
+    @Override
+    public FsSourceTaskConfig getSourceTaskConfig() {
+        return sourceTaskConfig;
+    }
+
+    @Override
+    public void setSourceTaskConfig(FsSourceTaskConfig sourceTaskConfig) {
+        this.sourceTaskConfig = sourceTaskConfig;
+    }
+
+    @Override
+    public List<Path> getDirectories() {
+        return directories;
+    }
+
+}
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/PolicyTestBase.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/PolicyTestBase.java
index 4a1aa42..6aa77b1 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/PolicyTestBase.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/PolicyTestBase.java
@@ -9,91 +9,122 @@
 import org.apache.kafka.connect.errors.IllegalWorkerStateException;
 import org.junit.jupiter.api.AfterAll;
 import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeAll;
 import org.junit.jupiter.api.BeforeEach;
-import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.Arguments;
+import org.junit.jupiter.params.provider.MethodSource;
 
 import java.io.FileNotFoundException;
 import java.io.IOException;
-import java.net.URI;
 import java.time.LocalDateTime;
 import java.time.format.DateTimeFormatter;
 import java.util.*;
+import java.util.stream.Stream;
 
 import static org.junit.jupiter.api.Assertions.*;
 
-public abstract class PolicyTestBase {
+abstract class PolicyTestBase {
 
-    protected static FileSystem fs;
-    protected static Policy policy;
-    protected static List<Path> directories;
-    protected static FsSourceTaskConfig taskConfig;
-    protected static URI fsUri;
+    protected static List<PolicyFsTestConfig> TEST_FILE_SYSTEMS = Arrays.asList(
+            new LocalFsConfig(),
+            new HdfsFsConfig()
+    );
+
+    @BeforeAll
+    public static void initFs() throws IOException {
+        for (PolicyFsTestConfig fsConfig : TEST_FILE_SYSTEMS) {
+            fsConfig.initFs();
+        }
+    }
 
     @AfterAll
-    public static void tearDown() throws Exception {
-        policy.close();
-        fs.close();
+    public static void finishFs() throws IOException {
+        for (PolicyFsTestConfig fsConfig : TEST_FILE_SYSTEMS) {
+            fsConfig.getPolicy().close();
+            fsConfig.close();
+        }
     }
 
     @BeforeEach
     public void initPolicy() throws Throwable {
-        policy = ReflectionUtils.makePolicy(
-                (Class<? extends Policy>) taskConfig.getClass(FsSourceTaskConfig.POLICY_CLASS), taskConfig);
+        for (PolicyFsTestConfig fsConfig : TEST_FILE_SYSTEMS) {
+            FsSourceTaskConfig sourceTaskConfig = buildSourceTaskConfig(fsConfig.getDirectories());
+            Policy policy = ReflectionUtils.makePolicy(
+                    (Class<? extends Policy>) sourceTaskConfig.getClass(FsSourceTaskConfig.POLICY_CLASS), sourceTaskConfig);
+            fsConfig.setSourceTaskConfig(sourceTaskConfig);
+            fsConfig.setPolicy(policy);
+        }
     }
 
     @AfterEach
-    public void cleanDirs() throws IOException {
-        for (Path dir : directories) {
-            fs.delete(dir, true);
-            fs.mkdirs(dir);
+    public void cleanDirsAndClose() throws IOException {
+        for (PolicyFsTestConfig fsConfig : TEST_FILE_SYSTEMS) {
+            for (Path dir : fsConfig.getDirectories()) {
+                fsConfig.getFs().delete(dir, true);
+                fsConfig.getFs().mkdirs(dir);
+            }
+            fsConfig.getPolicy().close();
         }
-        policy.close();
     }
 
-    @Test
-    public void invalidArgs() {
-        assertThrows(IllegalArgumentException.class, () -> taskConfig.getClass(FsSourceTaskConfig.POLICY_CLASS)
-                .getConstructor(taskConfig.getClass()).newInstance(null));
+    private static Stream<Arguments> fileSystemConfigProvider() {
+        return TEST_FILE_SYSTEMS.stream().map(Arguments::of);
+    }
+
+    @ParameterizedTest
+    @MethodSource("fileSystemConfigProvider")
+    public void invalidArgs(PolicyFsTestConfig fsConfig) {
+        assertThrows(IllegalArgumentException.class, () -> fsConfig.getSourceTaskConfig()
+                .getClass(FsSourceTaskConfig.POLICY_CLASS)
+                .getConstructor(fsConfig.getSourceTaskConfig().getClass()).newInstance(null));
     }
 
-    @Test
-    public void invalidConfig() {
+    @ParameterizedTest
+    @MethodSource("fileSystemConfigProvider")
+    public void invalidConfig(PolicyFsTestConfig fsConfig) {
         assertThrows(ConfigException.class, () -> ReflectionUtils.makePolicy(
-                (Class<? extends Policy>) taskConfig.getClass(FsSourceTaskConfig.POLICY_CLASS),
+                (Class<? extends Policy>) fsConfig.getSourceTaskConfig().getClass(FsSourceTaskConfig.POLICY_CLASS),
                 new FsSourceTaskConfig(new HashMap<>())));
     }
 
-    @Test
-    public void interruptPolicy() throws Throwable {
-        policy.execute();
-        policy.interrupt();
-        assertTrue(policy.hasEnded());
+    @ParameterizedTest
+    @MethodSource("fileSystemConfigProvider")
+    public void interruptPolicy(PolicyFsTestConfig fsConfig) throws Throwable {
+        fsConfig.getPolicy().execute();
+        fsConfig.getPolicy().interrupt();
+        assertTrue(fsConfig.getPolicy().hasEnded());
     }
 
-    @Test
-    public void invalidDirectory() throws IOException {
-        for (Path dir : directories) {
+    @ParameterizedTest
+    @MethodSource("fileSystemConfigProvider")
+    public void invalidDirectory(PolicyFsTestConfig fsConfig) throws IOException {
+        FileSystem fs = fsConfig.getFs();
+        for (Path dir : fsConfig.getDirectories()) {
             fs.delete(dir, true);
         }
         try {
-            assertThrows(FileNotFoundException.class, () -> policy.execute());
+            assertThrows(FileNotFoundException.class, () -> fsConfig.getPolicy().execute());
         } finally {
-            for (Path dir : directories) {
+            for (Path dir : fsConfig.getDirectories()) {
                 fs.mkdirs(dir);
             }
         }
     }
 
-    @Test
-    public void listEmptyDirectories() throws IOException {
-        Iterator<FileMetadata> it = policy.execute();
+    @ParameterizedTest
+    @MethodSource("fileSystemConfigProvider")
+    public void listEmptyDirectories(PolicyFsTestConfig fsConfig) throws IOException {
+        Iterator<FileMetadata> it = fsConfig.getPolicy().execute();
         assertFalse(it.hasNext());
         assertThrows(NoSuchElementException.class, it::next);
     }
 
-    @Test
-    public void oneFilePerFs() throws IOException, InterruptedException {
-        for (Path dir : directories) {
+    @ParameterizedTest
+    @MethodSource("fileSystemConfigProvider")
+    public void oneFilePerFs(PolicyFsTestConfig fsConfig) throws IOException, InterruptedException {
+        FileSystem fs = fsConfig.getFs();
+        for (Path dir : fsConfig.getDirectories()) {
             fs.createNewFile(new Path(dir, System.nanoTime() + ".txt"));
             //this file does not match the regexp
             fs.createNewFile(new Path(dir, System.nanoTime() + ".invalid"));
@@ -101,7 +132,7 @@ public void oneFilePerFs() throws IOException, InterruptedException {
         //we wait till FS has registered the files
         Thread.sleep(500);
 
-        Iterator<FileMetadata> it = policy.execute();
+        Iterator<FileMetadata> it = fsConfig.getPolicy().execute();
         assertTrue(it.hasNext());
         it.next();
         assertTrue(it.hasNext());
@@ -109,9 +140,11 @@ public void oneFilePerFs() throws IOException, InterruptedException {
         assertFalse(it.hasNext());
     }
 
-    @Test
-    public void recursiveDirectory() throws IOException, InterruptedException {
-        for (Path dir : directories) {
+    @ParameterizedTest
+    @MethodSource("fileSystemConfigProvider")
+    public void recursiveDirectory(PolicyFsTestConfig fsConfig) throws IOException, InterruptedException {
+        FileSystem fs = fsConfig.getFs();
+        for (Path dir : fsConfig.getDirectories()) {
             Path tmpDir = new Path(dir, String.valueOf(System.nanoTime()));
             fs.mkdirs(tmpDir);
             fs.createNewFile(new Path(tmpDir, System.nanoTime() + ".txt"));
@@ -121,7 +154,7 @@ public void recursiveDirectory() throws IOException, InterruptedException {
         //we wait till FS has registered the files
         Thread.sleep(500);
 
-        Iterator<FileMetadata> it = policy.execute();
+        Iterator<FileMetadata> it = fsConfig.getPolicy().execute();
         assertTrue(it.hasNext());
         it.next();
         assertTrue(it.hasNext());
@@ -129,23 +162,26 @@ public void recursiveDirectory() throws IOException, InterruptedException {
         assertFalse(it.hasNext());
     }
 
-    @Test
-    public void execPolicyAlreadyEnded() throws IOException {
-        policy.execute();
-        assertTrue(policy.hasEnded());
-        assertThrows(IllegalWorkerStateException.class, () -> policy.execute());
+    @ParameterizedTest
+    @MethodSource("fileSystemConfigProvider")
+    public void execPolicyAlreadyEnded(PolicyFsTestConfig fsConfig) throws IOException {
+        fsConfig.getPolicy().execute();
+        assertTrue(fsConfig.getPolicy().hasEnded());
+        assertThrows(IllegalWorkerStateException.class, () -> fsConfig.getPolicy().execute());
     }
 
-    @Test
-    public void dynamicURIs() throws Throwable {
-        Path dynamic = new Path(fsUri.toString(), "${G}/${yyyy}/${MM}/${W}");
-        fs.create(dynamic);
-        Map<String, String> originals = taskConfig.originalsStrings();
+    @ParameterizedTest
+    @MethodSource("fileSystemConfigProvider")
+    public void dynamicURIs(PolicyFsTestConfig fsConfig) throws Throwable {
+        Path dynamic = new Path(fsConfig.getFsUri().toString(), "${G}/${yyyy}/${MM}/${W}");
+        fsConfig.getFs().create(dynamic);
+        Map<String, String> originals = fsConfig.getSourceTaskConfig().originalsStrings();
         originals.put(FsSourceTaskConfig.FS_URIS, dynamic.toString());
         FsSourceTaskConfig cfg = new FsSourceTaskConfig(originals);
-        policy = ReflectionUtils.makePolicy(
-                (Class<? extends Policy>) taskConfig.getClass(FsSourceTaskConfig.POLICY_CLASS), cfg);
-        assertEquals(1, policy.getURIs().size());
+        Policy policy = ReflectionUtils.makePolicy(
+                (Class<? extends Policy>) fsConfig.getSourceTaskConfig().getClass(FsSourceTaskConfig.POLICY_CLASS), cfg);
+        fsConfig.setPolicy(policy);
+        assertEquals(1, fsConfig.getPolicy().getURIs().size());
 
         LocalDateTime dateTime = LocalDateTime.now();
         DateTimeFormatter formatter = DateTimeFormatter.ofPattern("G");
@@ -159,17 +195,21 @@ public void dynamicURIs() throws Throwable {
         uri.append("/");
         formatter = DateTimeFormatter.ofPattern("W");
         uri.append(dateTime.format(formatter));
-        assertTrue(policy.getURIs().get(0).endsWith(uri.toString()));
+        assertTrue(fsConfig.getPolicy().getURIs().get(0).endsWith(uri.toString()));
     }
 
-    @Test
-    public void invalidDynamicURIs() throws Throwable {
-        Path dynamic = new Path(fsUri.toString(), "${yyyy}/${MM}/${mmmmmmm}");
-        fs.create(dynamic);
-        Map<String, String> originals = taskConfig.originalsStrings();
+    @ParameterizedTest
+    @MethodSource("fileSystemConfigProvider")
+    public void invalidDynamicURIs(PolicyFsTestConfig fsConfig) throws Throwable {
+        Path dynamic = new Path(fsConfig.getFsUri().toString(), "${yyyy}/${MM}/${mmmmmmm}");
+        fsConfig.getFs().create(dynamic);
+        Map<String, String> originals = fsConfig.getSourceTaskConfig().originalsStrings();
         originals.put(FsSourceTaskConfig.FS_URIS, dynamic.toString());
         FsSourceTaskConfig cfg = new FsSourceTaskConfig(originals);
         assertThrows(IllegalArgumentException.class, () -> ReflectionUtils.makePolicy(
-                (Class<? extends Policy>) taskConfig.getClass(FsSourceTaskConfig.POLICY_CLASS), cfg));
+                (Class<? extends Policy>) fsConfig.getSourceTaskConfig().getClass(FsSourceTaskConfig.POLICY_CLASS), cfg));
     }
+
+    protected abstract FsSourceTaskConfig buildSourceTaskConfig(List<Path> directories);
+
 }
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/local/SimplePolicyTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/SimplePolicyTest.java
similarity index 57%
rename from src/test/java/com/github/mmolimar/kafka/connect/fs/policy/local/SimplePolicyTest.java
rename to src/test/java/com/github/mmolimar/kafka/connect/fs/policy/SimplePolicyTest.java
index 2de53e6..279a775 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/local/SimplePolicyTest.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/SimplePolicyTest.java
@@ -1,29 +1,17 @@
-package com.github.mmolimar.kafka.connect.fs.policy.local;
+package com.github.mmolimar.kafka.connect.fs.policy;
 
 import com.github.mmolimar.kafka.connect.fs.FsSourceTaskConfig;
 import com.github.mmolimar.kafka.connect.fs.file.reader.TextFileReader;
-import com.github.mmolimar.kafka.connect.fs.policy.SimplePolicy;
 import org.apache.hadoop.fs.Path;
-import org.junit.jupiter.api.BeforeAll;
 
-import java.io.IOException;
-import java.util.ArrayList;
 import java.util.HashMap;
+import java.util.List;
 import java.util.Map;
-import java.util.UUID;
 
-public class SimplePolicyTest extends LocalPolicyTestBase {
-
-    @BeforeAll
-    public static void setUp() throws IOException {
-        directories = new ArrayList<Path>() {{
-            add(new Path(fsUri.toString(), UUID.randomUUID().toString()));
-            add(new Path(fsUri.toString(), UUID.randomUUID().toString()));
-        }};
-        for (Path dir : directories) {
-            fs.mkdirs(dir);
-        }
+public class SimplePolicyTest extends PolicyTestBase {
 
+    @Override
+    protected FsSourceTaskConfig buildSourceTaskConfig(List<Path> directories) {
         Map<String, String> cfg = new HashMap<String, String>() {{
             String[] uris = directories.stream().map(Path::toString)
                     .toArray(String[]::new);
@@ -35,6 +23,7 @@ public static void setUp() throws IOException {
             put(FsSourceTaskConfig.POLICY_PREFIX_FS + "dfs.data.dir", "test");
             put(FsSourceTaskConfig.POLICY_PREFIX_FS + "fs.default.name", "hdfs://test/");
         }};
-        taskConfig = new FsSourceTaskConfig(cfg);
+        return new FsSourceTaskConfig(cfg);
     }
+
 }
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/local/SleepyPolicyTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/SleepyPolicyTest.java
similarity index 54%
rename from src/test/java/com/github/mmolimar/kafka/connect/fs/policy/local/SleepyPolicyTest.java
rename to src/test/java/com/github/mmolimar/kafka/connect/fs/policy/SleepyPolicyTest.java
index 93c9f09..9748d15 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/local/SleepyPolicyTest.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/SleepyPolicyTest.java
@@ -1,35 +1,23 @@
-package com.github.mmolimar.kafka.connect.fs.policy.local;
+package com.github.mmolimar.kafka.connect.fs.policy;
 
 import com.github.mmolimar.kafka.connect.fs.FsSourceTaskConfig;
 import com.github.mmolimar.kafka.connect.fs.file.reader.TextFileReader;
-import com.github.mmolimar.kafka.connect.fs.policy.Policy;
-import com.github.mmolimar.kafka.connect.fs.policy.SleepyPolicy;
 import com.github.mmolimar.kafka.connect.fs.util.ReflectionUtils;
 import org.apache.hadoop.fs.Path;
 import org.apache.kafka.common.config.ConfigException;
-import org.junit.jupiter.api.BeforeAll;
-import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.MethodSource;
 
-import java.io.IOException;
-import java.util.ArrayList;
 import java.util.HashMap;
+import java.util.List;
 import java.util.Map;
-import java.util.UUID;
 
 import static org.junit.jupiter.api.Assertions.*;
 
-public class SleepyPolicyTest extends LocalPolicyTestBase {
-
-    @BeforeAll
-    public static void setUp() throws IOException {
-        directories = new ArrayList<Path>() {{
-            add(new Path(fsUri.toString(), UUID.randomUUID().toString()));
-            add(new Path(fsUri.toString(), UUID.randomUUID().toString()));
-        }};
-        for (Path dir : directories) {
-            fs.mkdirs(dir);
-        }
+public class SleepyPolicyTest extends PolicyTestBase {
 
+    @Override
+    protected FsSourceTaskConfig buildSourceTaskConfig(List<Path> directories) {
         Map<String, String> cfg = new HashMap<String, String>() {{
             String[] uris = directories.stream().map(Path::toString)
                     .toArray(String[]::new);
@@ -43,45 +31,49 @@ public static void setUp() throws IOException {
             put(SleepyPolicy.SLEEPY_POLICY_SLEEP_MS, "100");
             put(SleepyPolicy.SLEEPY_POLICY_MAX_EXECS, "1");
         }};
-        taskConfig = new FsSourceTaskConfig(cfg);
+        return new FsSourceTaskConfig(cfg);
     }
 
-    @Test
-    public void invalidSleepTime() {
-        Map<String, String> originals = taskConfig.originalsStrings();
+    @ParameterizedTest
+    @MethodSource("fileSystemConfigProvider")
+    public void invalidSleepTime(PolicyFsTestConfig fsConfig) {
+        Map<String, String> originals = fsConfig.getSourceTaskConfig().originalsStrings();
         originals.put(SleepyPolicy.SLEEPY_POLICY_SLEEP_MS, "invalid");
         FsSourceTaskConfig cfg = new FsSourceTaskConfig(originals);
         assertThrows(ConfigException.class, () -> ReflectionUtils.makePolicy(
-                (Class<? extends Policy>) taskConfig.getClass(FsSourceTaskConfig.POLICY_CLASS), cfg));
+                (Class<? extends Policy>) fsConfig.getSourceTaskConfig().getClass(FsSourceTaskConfig.POLICY_CLASS), cfg));
     }
 
-    @Test
-    public void invalidMaxExecs() {
-        Map<String, String> originals = taskConfig.originalsStrings();
+    @ParameterizedTest
+    @MethodSource("fileSystemConfigProvider")
+    public void invalidMaxExecs(PolicyFsTestConfig fsConfig) {
+        Map<String, String> originals = fsConfig.getSourceTaskConfig().originalsStrings();
         originals.put(SleepyPolicy.SLEEPY_POLICY_MAX_EXECS, "invalid");
         FsSourceTaskConfig cfg = new FsSourceTaskConfig(originals);
         assertThrows(ConfigException.class, () -> ReflectionUtils.makePolicy(
-                (Class<? extends Policy>) taskConfig.getClass(FsSourceTaskConfig.POLICY_CLASS), cfg));
+                (Class<? extends Policy>) fsConfig.getSourceTaskConfig().getClass(FsSourceTaskConfig.POLICY_CLASS), cfg));
     }
 
-    @Test
-    public void invalidSleepFraction() {
-        Map<String, String> originals = taskConfig.originalsStrings();
+    @ParameterizedTest
+    @MethodSource("fileSystemConfigProvider")
+    public void invalidSleepFraction(PolicyFsTestConfig fsConfig) {
+        Map<String, String> originals = fsConfig.getSourceTaskConfig().originalsStrings();
         originals.put(SleepyPolicy.SLEEPY_POLICY_SLEEP_FRACTION, "invalid");
         FsSourceTaskConfig cfg = new FsSourceTaskConfig(originals);
         assertThrows(ConfigException.class, () -> ReflectionUtils.makePolicy(
-                (Class<? extends Policy>) taskConfig.getClass(FsSourceTaskConfig.POLICY_CLASS), cfg));
+                (Class<? extends Policy>) fsConfig.getSourceTaskConfig().getClass(FsSourceTaskConfig.POLICY_CLASS), cfg));
     }
 
-    @Test
-    public void sleepExecution() throws Throwable {
-        Map<String, String> tConfig = taskConfig.originalsStrings();
+    @ParameterizedTest
+    @MethodSource("fileSystemConfigProvider")
+    public void sleepExecution(PolicyFsTestConfig fsConfig) throws Throwable {
+        Map<String, String> tConfig = fsConfig.getSourceTaskConfig().originalsStrings();
         tConfig.put(SleepyPolicy.SLEEPY_POLICY_SLEEP_MS, "1000");
         tConfig.put(SleepyPolicy.SLEEPY_POLICY_MAX_EXECS, "2");
         FsSourceTaskConfig sleepConfig = new FsSourceTaskConfig(tConfig);
 
-        policy = ReflectionUtils.makePolicy(
-                (Class<? extends Policy>) taskConfig.getClass(FsSourceTaskConfig.POLICY_CLASS), sleepConfig);
+        Policy policy = ReflectionUtils.makePolicy(
+                (Class<? extends Policy>) fsConfig.getSourceTaskConfig().getClass(FsSourceTaskConfig.POLICY_CLASS), sleepConfig);
         assertFalse(policy.hasEnded());
         policy.execute();
         assertFalse(policy.hasEnded());
@@ -89,15 +81,16 @@ public void sleepExecution() throws Throwable {
         assertTrue(policy.hasEnded());
     }
 
-    @Test
-    public void defaultExecutions() throws Throwable {
-        Map<String, String> tConfig = taskConfig.originalsStrings();
+    @ParameterizedTest
+    @MethodSource("fileSystemConfigProvider")
+    public void defaultExecutions(PolicyFsTestConfig fsConfig) throws Throwable {
+        Map<String, String> tConfig = fsConfig.getSourceTaskConfig().originalsStrings();
         tConfig.put(SleepyPolicy.SLEEPY_POLICY_SLEEP_MS, "1");
         tConfig.remove(SleepyPolicy.SLEEPY_POLICY_MAX_EXECS);
         FsSourceTaskConfig sleepConfig = new FsSourceTaskConfig(tConfig);
 
-        policy = ReflectionUtils.makePolicy(
-                (Class<? extends Policy>) taskConfig.getClass(FsSourceTaskConfig.POLICY_CLASS), sleepConfig);
+        Policy policy = ReflectionUtils.makePolicy(
+                (Class<? extends Policy>) fsConfig.getSourceTaskConfig().getClass(FsSourceTaskConfig.POLICY_CLASS), sleepConfig);
 
         //it never ends
         for (int i = 0; i < 100; i++) {
@@ -107,4 +100,5 @@ public void defaultExecutions() throws Throwable {
         policy.interrupt();
         assertTrue(policy.hasEnded());
     }
+
 }
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/hdfs/CronPolicyTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/hdfs/CronPolicyTest.java
deleted file mode 100644
index 26d20d1..0000000
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/hdfs/CronPolicyTest.java
+++ /dev/null
@@ -1,90 +0,0 @@
-package com.github.mmolimar.kafka.connect.fs.policy.hdfs;
-
-import com.github.mmolimar.kafka.connect.fs.FsSourceTaskConfig;
-import com.github.mmolimar.kafka.connect.fs.file.reader.TextFileReader;
-import com.github.mmolimar.kafka.connect.fs.policy.CronPolicy;
-import com.github.mmolimar.kafka.connect.fs.policy.Policy;
-import com.github.mmolimar.kafka.connect.fs.util.ReflectionUtils;
-import org.apache.hadoop.fs.Path;
-import org.apache.kafka.common.config.ConfigException;
-import org.apache.kafka.connect.errors.IllegalWorkerStateException;
-import org.junit.jupiter.api.BeforeAll;
-import org.junit.jupiter.api.Test;
-
-import java.io.IOException;
-import java.time.LocalDateTime;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.Map;
-import java.util.UUID;
-
-import static org.junit.jupiter.api.Assertions.*;
-
-public class CronPolicyTest extends HdfsPolicyTestBase {
-
-    @BeforeAll
-    public static void setUp() throws IOException {
-        directories = new ArrayList<Path>() {{
-            add(new Path(fsUri.toString(), UUID.randomUUID().toString()));
-            add(new Path(fsUri.toString(), UUID.randomUUID().toString()));
-        }};
-        for (Path dir : directories) {
-            fs.mkdirs(dir);
-        }
-
-        Map<String, String> cfg = new HashMap<String, String>() {{
-            String[] uris = directories.stream().map(Path::toString)
-                    .toArray(String[]::new);
-            put(FsSourceTaskConfig.FS_URIS, String.join(",", uris));
-            put(FsSourceTaskConfig.TOPIC, "topic_test");
-            put(FsSourceTaskConfig.POLICY_CLASS, CronPolicy.class.getName());
-            put(FsSourceTaskConfig.FILE_READER_CLASS, TextFileReader.class.getName());
-            put(FsSourceTaskConfig.POLICY_REGEXP, "^[0-9]*\\.txt$");
-            put(FsSourceTaskConfig.POLICY_PREFIX_FS + "dfs.data.dir", "test");
-            put(FsSourceTaskConfig.POLICY_PREFIX_FS + "fs.default.name", "hdfs://test");
-            put(CronPolicy.CRON_POLICY_EXPRESSION, "0/2 * * * * ?");
-            put(CronPolicy.CRON_POLICY_END_DATE, LocalDateTime.now().plusDays(1).toString());
-        }};
-        taskConfig = new FsSourceTaskConfig(cfg);
-    }
-
-    @Test
-    @Override
-    public void execPolicyAlreadyEnded() throws IOException {
-        policy.execute();
-        policy.interrupt();
-        assertTrue(policy.hasEnded());
-        assertThrows(IllegalWorkerStateException.class, () -> policy.execute());
-    }
-
-    @Test
-    public void invalidCronExpression() {
-        Map<String, String> originals = taskConfig.originalsStrings();
-        originals.put(CronPolicy.CRON_POLICY_EXPRESSION, "invalid");
-        FsSourceTaskConfig cfg = new FsSourceTaskConfig(originals);
-        assertThrows(ConfigException.class, () -> ReflectionUtils.makePolicy(
-                (Class<? extends Policy>) taskConfig.getClass(FsSourceTaskConfig.POLICY_CLASS), cfg));
-    }
-
-    @Test
-    public void invalidEndDate() {
-        Map<String, String> originals = taskConfig.originalsStrings();
-        originals.put(CronPolicy.CRON_POLICY_END_DATE, "invalid");
-        FsSourceTaskConfig cfg = new FsSourceTaskConfig(originals);
-        assertThrows(ConfigException.class, () -> ReflectionUtils.makePolicy(
-                (Class<? extends Policy>) taskConfig.getClass(FsSourceTaskConfig.POLICY_CLASS), cfg));
-    }
-
-    @Test
-    public void canBeInterrupted() throws Throwable {
-        policy = ReflectionUtils.makePolicy(
-                (Class<? extends Policy>) taskConfig.getClass(FsSourceTaskConfig.POLICY_CLASS), taskConfig);
-
-        for (int i = 0; i < 5; i++) {
-            assertFalse(policy.hasEnded());
-            policy.execute();
-        }
-        policy.interrupt();
-        assertTrue(policy.hasEnded());
-    }
-}
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/hdfs/HdfsFileWatcherPolicyTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/hdfs/HdfsFileWatcherPolicyTest.java
deleted file mode 100644
index ec68d68..0000000
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/hdfs/HdfsFileWatcherPolicyTest.java
+++ /dev/null
@@ -1,71 +0,0 @@
-package com.github.mmolimar.kafka.connect.fs.policy.hdfs;
-
-import com.github.mmolimar.kafka.connect.fs.FsSourceTaskConfig;
-import com.github.mmolimar.kafka.connect.fs.file.reader.TextFileReader;
-import com.github.mmolimar.kafka.connect.fs.policy.HdfsFileWatcherPolicy;
-import org.apache.hadoop.fs.Path;
-import org.apache.kafka.connect.errors.IllegalWorkerStateException;
-import org.junit.jupiter.api.BeforeAll;
-import org.junit.jupiter.api.Test;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.Map;
-import java.util.UUID;
-
-import static org.junit.jupiter.api.Assertions.*;
-
-public class HdfsFileWatcherPolicyTest extends HdfsPolicyTestBase {
-
-    @BeforeAll
-    public static void setUp() throws IOException {
-        directories = new ArrayList<Path>() {{
-            add(new Path(fsUri.toString(), UUID.randomUUID().toString()));
-            add(new Path(fsUri.toString(), UUID.randomUUID().toString()));
-        }};
-        for (Path dir : directories) {
-            fs.mkdirs(dir);
-        }
-
-        Map<String, String> cfg = new HashMap<String, String>() {{
-            String[] uris = directories.stream().map(Path::toString)
-                    .toArray(String[]::new);
-            put(FsSourceTaskConfig.FS_URIS, String.join(",", uris));
-            put(FsSourceTaskConfig.TOPIC, "topic_test");
-            put(FsSourceTaskConfig.POLICY_CLASS, HdfsFileWatcherPolicy.class.getName());
-            put(FsSourceTaskConfig.FILE_READER_CLASS, TextFileReader.class.getName());
-            put(FsSourceTaskConfig.POLICY_REGEXP, "^[0-9]*\\.txt$");
-            put(FsSourceTaskConfig.POLICY_PREFIX_FS + "dfs.data.dir", "test");
-            put(FsSourceTaskConfig.POLICY_PREFIX_FS + "fs.default.name", "hdfs://test");
-        }};
-        taskConfig = new FsSourceTaskConfig(cfg);
-    }
-
-    //This policy does not throw any exception. Just stop watching those nonexistent dirs
-    @Test
-    @Override
-    public void invalidDirectory() throws IOException {
-        for (Path dir : directories) {
-            fs.delete(dir, true);
-        }
-        try {
-            policy.execute();
-        } finally {
-            for (Path dir : directories) {
-                fs.mkdirs(dir);
-            }
-        }
-    }
-
-    //This policy never ends. We have to interrupt it
-    @Test
-    @Override
-    public void execPolicyAlreadyEnded() throws IOException {
-        policy.execute();
-        assertFalse(policy.hasEnded());
-        policy.interrupt();
-        assertTrue(policy.hasEnded());
-        assertThrows(IllegalWorkerStateException.class, () -> policy.execute());
-    }
-}
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/hdfs/SimplePolicyTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/hdfs/SimplePolicyTest.java
deleted file mode 100644
index 5e0eb7f..0000000
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/hdfs/SimplePolicyTest.java
+++ /dev/null
@@ -1,40 +0,0 @@
-package com.github.mmolimar.kafka.connect.fs.policy.hdfs;
-
-import com.github.mmolimar.kafka.connect.fs.FsSourceTaskConfig;
-import com.github.mmolimar.kafka.connect.fs.file.reader.TextFileReader;
-import com.github.mmolimar.kafka.connect.fs.policy.SimplePolicy;
-import org.apache.hadoop.fs.Path;
-import org.junit.jupiter.api.BeforeAll;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.Map;
-import java.util.UUID;
-
-public class SimplePolicyTest extends HdfsPolicyTestBase {
-
-    @BeforeAll
-    public static void setUp() throws IOException {
-        directories = new ArrayList<Path>() {{
-            add(new Path(fsUri.toString(), UUID.randomUUID().toString()));
-            add(new Path(fsUri.toString(), UUID.randomUUID().toString()));
-        }};
-        for (Path dir : directories) {
-            fs.mkdirs(dir);
-        }
-
-        Map<String, String> cfg = new HashMap<String, String>() {{
-            String[] uris = directories.stream().map(Path::toString)
-                    .toArray(String[]::new);
-            put(FsSourceTaskConfig.FS_URIS, String.join(",", uris));
-            put(FsSourceTaskConfig.TOPIC, "topic_test");
-            put(FsSourceTaskConfig.POLICY_CLASS, SimplePolicy.class.getName());
-            put(FsSourceTaskConfig.FILE_READER_CLASS, TextFileReader.class.getName());
-            put(FsSourceTaskConfig.POLICY_REGEXP, "^[0-9]*\\.txt$");
-            put(FsSourceTaskConfig.POLICY_PREFIX_FS + "dfs.data.dir", "test");
-            put(FsSourceTaskConfig.POLICY_PREFIX_FS + "fs.default.name", "hdfs://test");
-        }};
-        taskConfig = new FsSourceTaskConfig(cfg);
-    }
-}
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/hdfs/SleepyPolicyTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/hdfs/SleepyPolicyTest.java
deleted file mode 100644
index d47faae..0000000
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/hdfs/SleepyPolicyTest.java
+++ /dev/null
@@ -1,110 +0,0 @@
-package com.github.mmolimar.kafka.connect.fs.policy.hdfs;
-
-import com.github.mmolimar.kafka.connect.fs.FsSourceTaskConfig;
-import com.github.mmolimar.kafka.connect.fs.file.reader.TextFileReader;
-import com.github.mmolimar.kafka.connect.fs.policy.Policy;
-import com.github.mmolimar.kafka.connect.fs.policy.SleepyPolicy;
-import com.github.mmolimar.kafka.connect.fs.util.ReflectionUtils;
-import org.apache.hadoop.fs.Path;
-import org.apache.kafka.common.config.ConfigException;
-import org.junit.jupiter.api.BeforeAll;
-import org.junit.jupiter.api.Test;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.Map;
-import java.util.UUID;
-
-import static org.junit.jupiter.api.Assertions.*;
-
-public class SleepyPolicyTest extends HdfsPolicyTestBase {
-
-    @BeforeAll
-    public static void setUp() throws IOException {
-        directories = new ArrayList<Path>() {{
-            add(new Path(fsUri.toString(), UUID.randomUUID().toString()));
-            add(new Path(fsUri.toString(), UUID.randomUUID().toString()));
-        }};
-        for (Path dir : directories) {
-            fs.mkdirs(dir);
-        }
-
-        Map<String, String> cfg = new HashMap<String, String>() {{
-            String[] uris = directories.stream().map(Path::toString)
-                    .toArray(String[]::new);
-            put(FsSourceTaskConfig.FS_URIS, String.join(",", uris));
-            put(FsSourceTaskConfig.TOPIC, "topic_test");
-            put(FsSourceTaskConfig.POLICY_CLASS, SleepyPolicy.class.getName());
-            put(FsSourceTaskConfig.FILE_READER_CLASS, TextFileReader.class.getName());
-            put(FsSourceTaskConfig.POLICY_REGEXP, "^[0-9]*\\.txt$");
-            put(FsSourceTaskConfig.POLICY_PREFIX_FS + "dfs.data.dir", "test");
-            put(FsSourceTaskConfig.POLICY_PREFIX_FS + "fs.default.name", "hdfs://test");
-            put(SleepyPolicy.SLEEPY_POLICY_SLEEP_MS, "100");
-            put(SleepyPolicy.SLEEPY_POLICY_MAX_EXECS, "1");
-        }};
-        taskConfig = new FsSourceTaskConfig(cfg);
-    }
-
-    @Test
-    public void invalidSleepTime() {
-        Map<String, String> originals = taskConfig.originalsStrings();
-        originals.put(SleepyPolicy.SLEEPY_POLICY_SLEEP_MS, "invalid");
-        FsSourceTaskConfig cfg = new FsSourceTaskConfig(originals);
-        assertThrows(ConfigException.class, () -> ReflectionUtils.makePolicy(
-                (Class<? extends Policy>) taskConfig.getClass(FsSourceTaskConfig.POLICY_CLASS), cfg));
-    }
-
-    @Test
-    public void invalidMaxExecs() {
-        Map<String, String> originals = taskConfig.originalsStrings();
-        originals.put(SleepyPolicy.SLEEPY_POLICY_MAX_EXECS, "invalid");
-        FsSourceTaskConfig cfg = new FsSourceTaskConfig(originals);
-        assertThrows(ConfigException.class, () -> ReflectionUtils.makePolicy(
-                (Class<? extends Policy>) taskConfig.getClass(FsSourceTaskConfig.POLICY_CLASS), cfg));
-    }
-
-    @Test
-    public void invalidSleepFraction() {
-        Map<String, String> originals = taskConfig.originalsStrings();
-        originals.put(SleepyPolicy.SLEEPY_POLICY_SLEEP_FRACTION, "invalid");
-        FsSourceTaskConfig cfg = new FsSourceTaskConfig(originals);
-        assertThrows(ConfigException.class, () -> ReflectionUtils.makePolicy(
-                (Class<? extends Policy>) taskConfig.getClass(FsSourceTaskConfig.POLICY_CLASS), cfg));
-    }
-
-    @Test
-    public void sleepExecution() throws Throwable {
-        Map<String, String> tConfig = taskConfig.originalsStrings();
-        tConfig.put(SleepyPolicy.SLEEPY_POLICY_SLEEP_MS, "1000");
-        tConfig.put(SleepyPolicy.SLEEPY_POLICY_MAX_EXECS, "2");
-        FsSourceTaskConfig sleepConfig = new FsSourceTaskConfig(tConfig);
-
-        policy = ReflectionUtils.makePolicy(
-                (Class<? extends Policy>) taskConfig.getClass(FsSourceTaskConfig.POLICY_CLASS), sleepConfig);
-        assertFalse(policy.hasEnded());
-        policy.execute();
-        assertFalse(policy.hasEnded());
-        policy.execute();
-        assertTrue(policy.hasEnded());
-    }
-
-    @Test
-    public void defaultExecutions() throws Throwable {
-        Map<String, String> tConfig = taskConfig.originalsStrings();
-        tConfig.put(SleepyPolicy.SLEEPY_POLICY_SLEEP_MS, "1");
-        tConfig.remove(SleepyPolicy.SLEEPY_POLICY_MAX_EXECS);
-        FsSourceTaskConfig sleepConfig = new FsSourceTaskConfig(tConfig);
-
-        policy = ReflectionUtils.makePolicy(
-                (Class<? extends Policy>) taskConfig.getClass(FsSourceTaskConfig.POLICY_CLASS), sleepConfig);
-
-        //it never ends
-        for (int i = 0; i < 100; i++) {
-            assertFalse(policy.hasEnded());
-            policy.execute();
-        }
-        policy.interrupt();
-        assertTrue(policy.hasEnded());
-    }
-}
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/local/LocalPolicyTestBase.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/local/LocalPolicyTestBase.java
deleted file mode 100644
index 8c12b3a..0000000
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/local/LocalPolicyTestBase.java
+++ /dev/null
@@ -1,29 +0,0 @@
-package com.github.mmolimar.kafka.connect.fs.policy.local;
-
-import com.github.mmolimar.kafka.connect.fs.policy.PolicyTestBase;
-import org.apache.commons.io.FileUtils;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.junit.jupiter.api.AfterAll;
-import org.junit.jupiter.api.BeforeAll;
-
-import java.io.IOException;
-import java.nio.file.Files;
-import java.nio.file.Path;
-
-public abstract class LocalPolicyTestBase extends PolicyTestBase {
-
-    private static Path localDir;
-
-    @BeforeAll
-    public static void initFs() throws IOException {
-        localDir = Files.createTempDirectory("test-");
-        fsUri = localDir.toUri();
-        fs = FileSystem.newInstance(fsUri, new Configuration());
-    }
-
-    @AfterAll
-    public static void finishFs() throws IOException {
-        FileUtils.deleteDirectory(localDir.toFile());
-    }
-}
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/task/FsSourceTaskTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/task/FsSourceTaskTest.java
index 6d4a823..8dd610a 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/task/FsSourceTaskTest.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/task/FsSourceTaskTest.java
@@ -2,96 +2,304 @@
 
 import com.github.mmolimar.kafka.connect.fs.FsSourceTask;
 import com.github.mmolimar.kafka.connect.fs.FsSourceTaskConfig;
+import com.github.mmolimar.kafka.connect.fs.file.reader.AvroFileReader;
 import com.github.mmolimar.kafka.connect.fs.file.reader.TextFileReader;
+import com.github.mmolimar.kafka.connect.fs.policy.Policy;
 import com.github.mmolimar.kafka.connect.fs.policy.SimplePolicy;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.kafka.connect.data.Struct;
 import org.apache.kafka.connect.errors.ConnectException;
+import org.apache.kafka.connect.source.SourceRecord;
+import org.apache.kafka.connect.source.SourceTaskContext;
+import org.apache.kafka.connect.storage.OffsetStorageReader;
+import org.easymock.EasyMock;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeAll;
 import org.junit.jupiter.api.BeforeEach;
-import org.junit.jupiter.api.Test;
-import org.junit.jupiter.api.io.TempDir;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.Arguments;
+import org.junit.jupiter.params.provider.MethodSource;
+import org.powermock.api.easymock.PowerMock;
+import org.powermock.api.support.membermodification.MemberModifier;
 
 import java.io.File;
-import java.util.HashMap;
-import java.util.Map;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.util.*;
+import java.util.stream.IntStream;
+import java.util.stream.Stream;
 
 import static org.junit.jupiter.api.Assertions.*;
 
 public class FsSourceTaskTest {
-    @TempDir
-    public static File temporaryFolder;
 
-    private FsSourceTask task;
-    private Map<String, String> taskConfig;
+    private static final List<TaskFsTestConfig> TEST_FILE_SYSTEMS = Arrays.asList(
+            new LocalFsConfig(),
+            new HdfsFsConfig()
+    );
+    private static final int NUM_RECORDS = 10;
+
+    @BeforeAll
+    public static void initFs() throws IOException {
+        for (TaskFsTestConfig fsConfig : TEST_FILE_SYSTEMS) {
+            fsConfig.initFs();
+        }
+    }
+
+    @AfterAll
+    public static void finishFs() throws IOException {
+        for (TaskFsTestConfig fsConfig : TEST_FILE_SYSTEMS) {
+            fsConfig.close();
+        }
+    }
 
     @BeforeEach
-    public void setup() {
-        task = new FsSourceTask();
+    public void initTask() {
+        for (TaskFsTestConfig fsConfig : TEST_FILE_SYSTEMS) {
+            Map<String, String> taskConfig = new HashMap<String, String>() {{
+                String[] uris = fsConfig.getDirectories().stream().map(Path::toString)
+                        .toArray(String[]::new);
+                put(FsSourceTaskConfig.FS_URIS, String.join(",", uris));
+                put(FsSourceTaskConfig.TOPIC, "topic_test");
+                put(FsSourceTaskConfig.POLICY_CLASS, SimplePolicy.class.getName());
+                put(FsSourceTaskConfig.FILE_READER_CLASS, TextFileReader.class.getName());
+                put(FsSourceTaskConfig.POLICY_REGEXP, "^[0-9]*\\.txt$");
+            }};
+
+            //Mock initialization
+            SourceTaskContext taskContext = PowerMock.createMock(SourceTaskContext.class);
+            OffsetStorageReader offsetStorageReader = PowerMock.createMock(OffsetStorageReader.class);
+
+            EasyMock.expect(taskContext.offsetStorageReader())
+                    .andReturn(offsetStorageReader);
+
+            EasyMock.expect(taskContext.offsetStorageReader())
+                    .andReturn(offsetStorageReader);
+
+            EasyMock.expect(offsetStorageReader.offset(EasyMock.anyObject()))
+                    .andReturn(new HashMap<String, Object>() {{
+                        put("offset", 5L);
+                    }});
+            EasyMock.expect(offsetStorageReader.offset(EasyMock.anyObject()))
+                    .andReturn(new HashMap<String, Object>() {{
+                        put("offset", 5L);
+                    }});
+
+            EasyMock.checkOrder(taskContext, false);
+            EasyMock.replay(taskContext);
+
+            EasyMock.checkOrder(offsetStorageReader, false);
+            EasyMock.replay(offsetStorageReader);
+
+            FsSourceTask task = new FsSourceTask();
+            task.initialize(taskContext);
+
+            fsConfig.setTaskConfig(taskConfig);
+            fsConfig.setTask(task);
+        }
+    }
+
+    @AfterEach
+    public void cleanDirsAndStop() throws IOException {
+        for (TaskFsTestConfig fsConfig : TEST_FILE_SYSTEMS) {
+            for (Path dir : fsConfig.getDirectories()) {
+                fsConfig.getFs().delete(dir, true);
+                fsConfig.getFs().mkdirs(dir);
+            }
+            fsConfig.getTask().stop();
+        }
+    }
+
+    private static Stream<Arguments> fileSystemConfigProvider() {
+        return TEST_FILE_SYSTEMS.stream().map(Arguments::of);
+    }
+
+    @ParameterizedTest
+    @MethodSource("fileSystemConfigProvider")
+    public void pollNoData(TaskFsTestConfig fsConfig) {
+        fsConfig.getTask().start(fsConfig.getTaskConfig());
+        assertEquals(0, fsConfig.getTask().poll().size());
+        //policy has ended
+        assertNull(fsConfig.getTask().poll());
+    }
+
+    @ParameterizedTest
+    @MethodSource("fileSystemConfigProvider")
+    public void emptyFilesToProcess(TaskFsTestConfig fsConfig) throws IOException {
+        for (Path dir : fsConfig.getDirectories()) {
+            fsConfig.getFs().createNewFile(new Path(dir, System.nanoTime() + ".txt"));
+            //this file does not match the regexp
+            fsConfig.getFs().createNewFile(new Path(dir, String.valueOf(System.nanoTime())));
+        }
+        fsConfig.getTask().start(fsConfig.getTaskConfig());
+        assertEquals(0, fsConfig.getTask().poll().size());
+        //policy has ended
+        assertNull(fsConfig.getTask().poll());
+    }
+
+    @ParameterizedTest
+    @MethodSource("fileSystemConfigProvider")
+    public void oneFilePerFs(TaskFsTestConfig fsConfig) throws IOException {
+        for (Path dir : fsConfig.getDirectories()) {
+            Path dataFile = new Path(dir, System.nanoTime() + ".txt");
+            createDataFile(fsConfig.getFs(), dataFile);
+            //this file does not match the regexp
+            fsConfig.getFs().createNewFile(new Path(dir, String.valueOf(System.nanoTime())));
+        }
+
+        fsConfig.getTask().start(fsConfig.getTaskConfig());
+        List<SourceRecord> records = fsConfig.getTask().poll();
+        assertEquals(10, records.size());
+        checkRecords(records);
+        //policy has ended
+        assertNull(fsConfig.getTask().poll());
+    }
+
+    @ParameterizedTest
+    @MethodSource("fileSystemConfigProvider")
+    public void nonExistentUri(TaskFsTestConfig fsConfig) {
+        Map<String, String> props = new HashMap<>(fsConfig.getTaskConfig());
+        props.put(FsSourceTaskConfig.FS_URIS,
+                new Path(fsConfig.getFs().getWorkingDirectory(), UUID.randomUUID().toString()).toString());
+        fsConfig.getTask().start(props);
+        fsConfig.getTask().poll();
+    }
+
+    @ParameterizedTest
+    @MethodSource("fileSystemConfigProvider")
+    public void exceptionExecutingPolicy(TaskFsTestConfig fsConfig) throws IOException, IllegalAccessException {
+        Map<String, String> props = new HashMap<>(fsConfig.getTaskConfig());
+        fsConfig.getTask().start(props);
+
+        Policy policy = EasyMock.createNiceMock(Policy.class);
+        EasyMock.expect(policy.hasEnded()).andReturn(Boolean.FALSE);
+        EasyMock.expect(policy.execute()).andThrow(new ConnectException("Exception from mock"));
+        EasyMock.expect(policy.getURIs()).andReturn(null);
+        EasyMock.checkOrder(policy, false);
+        EasyMock.replay(policy);
+        MemberModifier.field(FsSourceTask.class, "policy").set(fsConfig.getTask(), policy);
+
+        assertEquals(0, fsConfig.getTask().poll().size());
+    }
 
-        taskConfig = new HashMap<String, String>() {{
-            put(FsSourceTaskConfig.FS_URIS, String.join(",",
-                    temporaryFolder.toURI() + File.separator + "dir1",
-                    temporaryFolder.toURI() + File.separator + "dir2",
-                    temporaryFolder.toURI() + File.separator + "dir3"));
-            put(FsSourceTaskConfig.TOPIC, "topic_test");
-            put(FsSourceTaskConfig.POLICY_CLASS, SimplePolicy.class.getName());
-            put(FsSourceTaskConfig.FILE_READER_CLASS, TextFileReader.class.getName());
-        }};
+    @ParameterizedTest
+    @MethodSource("fileSystemConfigProvider")
+    public void exceptionReadingFile(TaskFsTestConfig fsConfig) throws IOException {
+        Map<String, String> props = new HashMap<>(fsConfig.getTaskConfig());
+        File tmp = File.createTempFile("test-", ".txt");
+        try (PrintWriter writer = new PrintWriter(tmp)) {
+            writer.append("txt");
+        }
+        Path dest = new Path(fsConfig.getDirectories().get(0).toString(), System.nanoTime() + ".txt");
+        fsConfig.getFs().moveFromLocalFile(new Path(tmp.getAbsolutePath()), dest);
+        props.put(FsSourceTaskConfig.FILE_READER_CLASS, AvroFileReader.class.getName());
+        fsConfig.getTask().start(props);
+        assertEquals(0, fsConfig.getTask().poll().size());
+        fsConfig.getTask().stop();
+
+        fsConfig.getFs().delete(dest, false);
     }
 
-    @Test
-    public void nullProperties() {
-        assertThrows(ConnectException.class, () -> task.start(null));
+    @ParameterizedTest
+    @MethodSource("fileSystemConfigProvider")
+    public void nullProperties(TaskFsTestConfig fsConfig) {
+        assertThrows(ConnectException.class, () -> fsConfig.getTask().start(null));
     }
 
-    @Test
-    public void expectedFsUris() {
-        Map<String, String> testProps = new HashMap<>(taskConfig);
+    @ParameterizedTest
+    @MethodSource("fileSystemConfigProvider")
+    public void expectedFsUris(TaskFsTestConfig fsConfig) {
+        Map<String, String> testProps = new HashMap<>(fsConfig.getTaskConfig());
         testProps.remove(FsSourceTaskConfig.FS_URIS);
-        assertThrows(ConnectException.class, () -> task.start(testProps));
+        assertThrows(ConnectException.class, () -> fsConfig.getTask().start(testProps));
     }
 
-    @Test
-    public void expectedPolicyClass() {
-        Map<String, String> testProps = new HashMap<>(taskConfig);
+    @ParameterizedTest
+    @MethodSource("fileSystemConfigProvider")
+    public void expectedPolicyClass(TaskFsTestConfig fsConfig) {
+        Map<String, String> testProps = new HashMap<>(fsConfig.getTaskConfig());
         testProps.remove(FsSourceTaskConfig.POLICY_CLASS);
-        assertThrows(ConnectException.class, () -> task.start(testProps));
+        assertThrows(ConnectException.class, () -> fsConfig.getTask().start(testProps));
     }
 
-    @Test
-    public void invalidPolicyClass() {
-        Map<String, String> testProps = new HashMap<>(taskConfig);
+    @ParameterizedTest
+    @MethodSource("fileSystemConfigProvider")
+    public void invalidPolicyClass(TaskFsTestConfig fsConfig) {
+        Map<String, String> testProps = new HashMap<>(fsConfig.getTaskConfig());
         testProps.put(FsSourceTaskConfig.POLICY_CLASS, Object.class.getName());
-        assertThrows(ConnectException.class, () -> task.start(testProps));
+        assertThrows(ConnectException.class, () -> fsConfig.getTask().start(testProps));
     }
 
-    @Test
-    public void expectedReaderClass() {
-        Map<String, String> testProps = new HashMap<>(taskConfig);
+    @ParameterizedTest
+    @MethodSource("fileSystemConfigProvider")
+    public void expectedReaderClass(TaskFsTestConfig fsConfig) {
+        Map<String, String> testProps = new HashMap<>(fsConfig.getTaskConfig());
         testProps.remove(FsSourceTaskConfig.FILE_READER_CLASS);
-        assertThrows(ConnectException.class, () -> task.start(testProps));
+        assertThrows(ConnectException.class, () -> fsConfig.getTask().start(testProps));
     }
 
-    @Test
-    public void invalidReaderClass() {
-        Map<String, String> testProps = new HashMap<>(taskConfig);
+    @ParameterizedTest
+    @MethodSource("fileSystemConfigProvider")
+    public void invalidReaderClass(TaskFsTestConfig fsConfig) {
+        Map<String, String> testProps = new HashMap<>(fsConfig.getTaskConfig());
         testProps.put(FsSourceTaskConfig.FILE_READER_CLASS, Object.class.getName());
-        assertThrows(ConnectException.class, () -> task.start(testProps));
+        assertThrows(ConnectException.class, () -> fsConfig.getTask().start(testProps));
+    }
+
+    @ParameterizedTest
+    @MethodSource("fileSystemConfigProvider")
+    public void minimumConfig(TaskFsTestConfig fsConfig) {
+        fsConfig.getTask().start(fsConfig.getTaskConfig());
+        fsConfig.getTask().stop();
+    }
+
+    @ParameterizedTest
+    @MethodSource("fileSystemConfigProvider")
+    public void pollWithoutStart(TaskFsTestConfig fsConfig) {
+        assertNull(fsConfig.getTask().poll());
+        fsConfig.getTask().stop();
     }
 
-    @Test
-    public void minimumConfig() {
-        task.start(taskConfig);
-        task.stop();
+    @ParameterizedTest
+    @MethodSource("fileSystemConfigProvider")
+    public void checkVersion(TaskFsTestConfig fsConfig) {
+        assertNotNull(fsConfig.getTask().version());
+        assertFalse("unknown".equalsIgnoreCase(fsConfig.getTask().version()));
     }
 
-    @Test
-    public void pollWithoutStart() {
-        assertNull(task.poll());
-        task.stop();
+    protected void checkRecords(List<SourceRecord> records) {
+        records.forEach(record -> {
+            assertEquals("topic_test", record.topic());
+            assertNotNull(record.sourcePartition());
+            assertNotNull(record.sourceOffset());
+            assertNotNull(record.value());
+
+            assertNotNull(((Struct) record.value()).get(TextFileReader.FIELD_NAME_VALUE_DEFAULT));
+        });
     }
 
-    @Test
-    public void checkVersion() {
-        assertNotNull(task.version());
-        assertFalse("unknown".equalsIgnoreCase(task.version()));
+    protected void createDataFile(FileSystem fs, Path path) throws IOException {
+        File file = fillDataFile();
+        fs.moveFromLocalFile(new Path(file.getAbsolutePath()), path);
     }
+
+    private File fillDataFile() throws IOException {
+        File txtFile = File.createTempFile("test-", ".txt");
+        try (FileWriter writer = new FileWriter(txtFile)) {
+
+            IntStream.range(0, NUM_RECORDS).forEach(index -> {
+                String value = String.format("%d_%s", index, UUID.randomUUID());
+                try {
+                    writer.append(value + "\n");
+                } catch (IOException ioe) {
+                    throw new RuntimeException(ioe);
+                }
+            });
+        }
+        return txtFile;
+    }
+
 }
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/task/FsSourceTaskTestBase.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/task/FsSourceTaskTestBase.java
deleted file mode 100644
index 22d388c..0000000
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/task/FsSourceTaskTestBase.java
+++ /dev/null
@@ -1,187 +0,0 @@
-package com.github.mmolimar.kafka.connect.fs.task;
-
-import com.github.mmolimar.kafka.connect.fs.FsSourceTask;
-import com.github.mmolimar.kafka.connect.fs.FsSourceTaskConfig;
-import com.github.mmolimar.kafka.connect.fs.file.reader.AvroFileReader;
-import com.github.mmolimar.kafka.connect.fs.file.reader.TextFileReader;
-import com.github.mmolimar.kafka.connect.fs.policy.Policy;
-import com.github.mmolimar.kafka.connect.fs.policy.SimplePolicy;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.kafka.connect.errors.ConnectException;
-import org.apache.kafka.connect.source.SourceRecord;
-import org.apache.kafka.connect.source.SourceTaskContext;
-import org.apache.kafka.connect.storage.OffsetStorageReader;
-import org.easymock.EasyMock;
-import org.junit.jupiter.api.AfterAll;
-import org.junit.jupiter.api.AfterEach;
-import org.junit.jupiter.api.BeforeEach;
-import org.junit.jupiter.api.Test;
-import org.powermock.api.easymock.PowerMock;
-import org.powermock.api.support.membermodification.MemberModifier;
-
-import java.io.File;
-import java.io.IOException;
-import java.io.PrintWriter;
-import java.net.URI;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.UUID;
-
-import static org.junit.jupiter.api.Assertions.assertEquals;
-import static org.junit.jupiter.api.Assertions.assertNull;
-
-public abstract class FsSourceTaskTestBase {
-
-    protected static final int NUM_RECORDS = 10;
-
-    protected static FileSystem fs;
-    protected static List<Path> directories;
-    protected static URI fsUri;
-
-    protected FsSourceTask task;
-    protected Map<String, String> taskConfig;
-    protected SourceTaskContext taskContext;
-    protected OffsetStorageReader offsetStorageReader;
-
-    @AfterAll
-    public static void tearDown() throws Exception {
-        fs.close();
-    }
-
-    @BeforeEach
-    public void initTask() {
-        task = new FsSourceTask();
-        taskConfig = new HashMap<String, String>() {{
-            String[] uris = directories.stream().map(Path::toString)
-                    .toArray(String[]::new);
-            put(FsSourceTaskConfig.FS_URIS, String.join(",", uris));
-            put(FsSourceTaskConfig.TOPIC, "topic_test");
-            put(FsSourceTaskConfig.POLICY_CLASS, SimplePolicy.class.getName());
-            put(FsSourceTaskConfig.FILE_READER_CLASS, TextFileReader.class.getName());
-            put(FsSourceTaskConfig.POLICY_REGEXP, "^[0-9]*\\.txt$");
-        }};
-
-        //Mock initialization
-        taskContext = PowerMock.createMock(SourceTaskContext.class);
-        offsetStorageReader = PowerMock.createMock(OffsetStorageReader.class);
-
-        EasyMock.expect(taskContext.offsetStorageReader())
-                .andReturn(offsetStorageReader);
-
-        EasyMock.expect(taskContext.offsetStorageReader())
-                .andReturn(offsetStorageReader);
-
-        EasyMock.expect(offsetStorageReader.offset(EasyMock.anyObject()))
-                .andReturn(new HashMap<String, Object>() {{
-                    put("offset", 5L);
-                }});
-        EasyMock.expect(offsetStorageReader.offset(EasyMock.anyObject()))
-                .andReturn(new HashMap<String, Object>() {{
-                    put("offset", 5L);
-                }});
-
-        EasyMock.checkOrder(taskContext, false);
-        EasyMock.replay(taskContext);
-
-        EasyMock.checkOrder(offsetStorageReader, false);
-        EasyMock.replay(offsetStorageReader);
-
-        task.initialize(taskContext);
-
-    }
-
-    @AfterEach
-    public void cleanDirsAndStop() throws IOException {
-        for (Path dir : directories) {
-            fs.delete(dir, true);
-            fs.mkdirs(dir);
-        }
-        task.stop();
-    }
-
-    @Test
-    public void pollNoData() {
-        task.start(taskConfig);
-        assertEquals(0, task.poll().size());
-        //policy has ended
-        assertNull(task.poll());
-    }
-
-    @Test
-    public void emptyFilesToProcess() throws IOException {
-        for (Path dir : directories) {
-            fs.createNewFile(new Path(dir, System.nanoTime() + ".txt"));
-            //this file does not match the regexp
-            fs.createNewFile(new Path(dir, String.valueOf(System.nanoTime())));
-        }
-        task.start(taskConfig);
-        assertEquals(0, task.poll().size());
-        //policy has ended
-        assertNull(task.poll());
-    }
-
-    @Test
-    public void oneFilePerFs() throws IOException {
-        for (Path dir : directories) {
-            Path dataFile = new Path(dir, System.nanoTime() + ".txt");
-            createDataFile(dataFile);
-            //this file does not match the regexp
-            fs.createNewFile(new Path(dir, String.valueOf(System.nanoTime())));
-        }
-
-        task.start(taskConfig);
-        List<SourceRecord> records = task.poll();
-        assertEquals(10, records.size());
-        checkRecords(records);
-        //policy has ended
-        assertNull(task.poll());
-    }
-
-    @Test
-    public void nonExistentUri() {
-        Map<String, String> props = new HashMap<>(taskConfig);
-        props.put(FsSourceTaskConfig.FS_URIS, new Path(fs.getWorkingDirectory(), UUID.randomUUID().toString()).toString());
-        task.start(props);
-        task.poll();
-    }
-
-    @Test
-    public void exceptionExecutingPolicy() throws IOException, IllegalAccessException {
-        Map<String, String> props = new HashMap<>(taskConfig);
-        task.start(props);
-
-        Policy policy = EasyMock.createNiceMock(Policy.class);
-        EasyMock.expect(policy.hasEnded()).andReturn(Boolean.FALSE);
-        EasyMock.expect(policy.execute()).andThrow(new ConnectException("Exception from mock"));
-        EasyMock.expect(policy.getURIs()).andReturn(null);
-        EasyMock.checkOrder(policy, false);
-        EasyMock.replay(policy);
-        MemberModifier.field(FsSourceTask.class, "policy").set(task, policy);
-
-        assertEquals(0, task.poll().size());
-    }
-
-    @Test
-    public void exceptionReadingFile() throws IOException {
-        Map<String, String> props = new HashMap<>(taskConfig);
-        File tmp = File.createTempFile("test-", ".txt");
-        try (PrintWriter writer = new PrintWriter(tmp)) {
-            writer.append("txt");
-        }
-        Path dest = new Path(directories.get(0).toString(), System.nanoTime() + ".txt");
-        fs.moveFromLocalFile(new Path(tmp.getAbsolutePath()), dest);
-        props.put(FsSourceTaskConfig.FILE_READER_CLASS, AvroFileReader.class.getName());
-        task.start(props);
-        assertEquals(0, task.poll().size());
-        task.stop();
-
-        fs.delete(dest, false);
-    }
-
-    protected abstract void checkRecords(List<SourceRecord> records);
-
-    protected abstract void createDataFile(Path path) throws IOException;
-
-}
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/task/TaskFsTestConfig.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/task/TaskFsTestConfig.java
new file mode 100644
index 0000000..1efe3b4
--- /dev/null
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/task/TaskFsTestConfig.java
@@ -0,0 +1,113 @@
+package com.github.mmolimar.kafka.connect.fs.task;
+
+import com.github.mmolimar.kafka.connect.fs.AbstractHdfsFsConfig;
+import com.github.mmolimar.kafka.connect.fs.AbstractLocalFsConfig;
+import com.github.mmolimar.kafka.connect.fs.FsSourceTask;
+import com.github.mmolimar.kafka.connect.fs.FsTestConfig;
+import org.apache.hadoop.fs.Path;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.UUID;
+
+interface TaskFsTestConfig extends FsTestConfig {
+
+    FsSourceTask getTask();
+
+    void setTask(FsSourceTask task);
+
+    Map<String, String> getTaskConfig();
+
+    void setTaskConfig(Map<String, String> taskConfig);
+
+    List<Path> getDirectories();
+
+}
+
+class LocalFsConfig extends AbstractLocalFsConfig implements TaskFsTestConfig {
+    private FsSourceTask task;
+    private Map<String, String> taskConfig;
+    private List<Path> directories;
+
+    @Override
+    public void init() throws IOException {
+        directories = new ArrayList<Path>() {{
+            add(new Path(getFsUri().toString(), UUID.randomUUID().toString()));
+            add(new Path(getFsUri().toString(), UUID.randomUUID().toString()));
+        }};
+        for (Path dir : directories) {
+            getFs().mkdirs(dir);
+        }
+    }
+
+    @Override
+    public FsSourceTask getTask() {
+        return task;
+    }
+
+    @Override
+    public void setTask(FsSourceTask task) {
+        this.task = task;
+    }
+
+    @Override
+    public Map<String, String> getTaskConfig() {
+        return taskConfig;
+    }
+
+    @Override
+    public void setTaskConfig(Map<String, String> taskConfig) {
+        this.taskConfig = taskConfig;
+    }
+
+    @Override
+    public List<Path> getDirectories() {
+        return directories;
+    }
+
+}
+
+class HdfsFsConfig extends AbstractHdfsFsConfig implements TaskFsTestConfig {
+    private FsSourceTask task;
+    private Map<String, String> taskConfig;
+    private List<Path> directories;
+
+    @Override
+    public void init() throws IOException {
+        directories = new ArrayList<Path>() {{
+            add(new Path(getFsUri().toString(), UUID.randomUUID().toString()));
+            add(new Path(getFsUri().toString(), UUID.randomUUID().toString()));
+        }};
+        for (Path dir : directories) {
+            getFs().mkdirs(dir);
+        }
+    }
+
+    @Override
+    public FsSourceTask getTask() {
+        return task;
+    }
+
+    @Override
+    public void setTask(FsSourceTask task) {
+        this.task = task;
+    }
+
+    @Override
+    public Map<String, String> getTaskConfig() {
+        return taskConfig;
+    }
+
+    @Override
+    public void setTaskConfig(Map<String, String> taskConfig) {
+        this.taskConfig = taskConfig;
+    }
+
+    @Override
+    public List<Path> getDirectories() {
+        return directories;
+    }
+
+}
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/task/hdfs/HdfsFsSourceTaskTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/task/hdfs/HdfsFsSourceTaskTest.java
deleted file mode 100644
index 1e8b303..0000000
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/task/hdfs/HdfsFsSourceTaskTest.java
+++ /dev/null
@@ -1,66 +0,0 @@
-package com.github.mmolimar.kafka.connect.fs.task.hdfs;
-
-import com.github.mmolimar.kafka.connect.fs.file.reader.TextFileReader;
-import org.apache.hadoop.fs.Path;
-import org.apache.kafka.connect.data.Struct;
-import org.apache.kafka.connect.source.SourceRecord;
-import org.junit.jupiter.api.BeforeAll;
-
-import java.io.File;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.UUID;
-import java.util.stream.IntStream;
-
-import static org.junit.jupiter.api.Assertions.assertEquals;
-import static org.junit.jupiter.api.Assertions.assertNotNull;
-
-public class HdfsFsSourceTaskTest extends HdfsFsSourceTaskTestBase {
-
-    @BeforeAll
-    public static void setUp() throws IOException {
-        directories = new ArrayList<Path>() {{
-            add(new Path(fsUri.toString(), UUID.randomUUID().toString()));
-            add(new Path(fsUri.toString(), UUID.randomUUID().toString()));
-        }};
-        for (Path dir : directories) {
-            fs.mkdirs(dir);
-        }
-    }
-
-    @Override
-    protected void checkRecords(List<SourceRecord> records) {
-        records.forEach(record -> {
-            assertEquals("topic_test", record.topic());
-            assertNotNull(record.sourcePartition());
-            assertNotNull(record.sourceOffset());
-            assertNotNull(record.value());
-
-            assertNotNull(((Struct) record.value()).get(TextFileReader.FIELD_NAME_VALUE_DEFAULT));
-        });
-    }
-
-    @Override
-    protected void createDataFile(Path path) throws IOException {
-        File file = fillDataFile();
-        fs.moveFromLocalFile(new Path(file.getAbsolutePath()), path);
-    }
-
-    private File fillDataFile() throws IOException {
-        File txtFile = File.createTempFile("test-", ".txt");
-        try (FileWriter writer = new FileWriter(txtFile)) {
-
-            IntStream.range(0, NUM_RECORDS).forEach(index -> {
-                String value = String.format("%d_%s", index, UUID.randomUUID());
-                try {
-                    writer.append(value + "\n");
-                } catch (IOException ioe) {
-                    throw new RuntimeException(ioe);
-                }
-            });
-        }
-        return txtFile;
-    }
-}
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/task/hdfs/HdfsFsSourceTaskTestBase.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/task/hdfs/HdfsFsSourceTaskTestBase.java
deleted file mode 100644
index 1132bc6..0000000
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/task/hdfs/HdfsFsSourceTaskTestBase.java
+++ /dev/null
@@ -1,33 +0,0 @@
-package com.github.mmolimar.kafka.connect.fs.task.hdfs;
-
-import com.github.mmolimar.kafka.connect.fs.task.FsSourceTaskTestBase;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.hdfs.MiniDFSCluster;
-import org.junit.jupiter.api.AfterAll;
-import org.junit.jupiter.api.BeforeAll;
-
-import java.io.IOException;
-import java.net.URI;
-import java.nio.file.Files;
-import java.nio.file.Path;
-
-public abstract class HdfsFsSourceTaskTestBase extends FsSourceTaskTestBase {
-
-    private static MiniDFSCluster cluster;
-
-    @BeforeAll
-    public static void initFs() throws IOException {
-        Configuration clusterConfig = new Configuration();
-        Path hdfsDir = Files.createTempDirectory("test-");
-        clusterConfig.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, hdfsDir.toAbsolutePath().toString());
-        cluster = new MiniDFSCluster.Builder(clusterConfig).build();
-        fsUri = URI.create("hdfs://localhost:" + cluster.getNameNodePort() + "/");
-        fs = FileSystem.newInstance(fsUri, clusterConfig);
-    }
-
-    @AfterAll
-    public static void finishFs() {
-        cluster.shutdown(true);
-    }
-}
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/task/local/LocalFsSourceTaskTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/task/local/LocalFsSourceTaskTest.java
deleted file mode 100644
index 8623e05..0000000
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/task/local/LocalFsSourceTaskTest.java
+++ /dev/null
@@ -1,65 +0,0 @@
-package com.github.mmolimar.kafka.connect.fs.task.local;
-
-import com.github.mmolimar.kafka.connect.fs.file.reader.TextFileReader;
-import org.apache.hadoop.fs.Path;
-import org.apache.kafka.connect.data.Struct;
-import org.apache.kafka.connect.source.SourceRecord;
-import org.junit.jupiter.api.BeforeAll;
-
-import java.io.File;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.UUID;
-import java.util.stream.IntStream;
-
-import static org.junit.jupiter.api.Assertions.assertEquals;
-import static org.junit.jupiter.api.Assertions.assertNotNull;
-
-public class LocalFsSourceTaskTest extends LocalFsSourceTaskTestBase {
-
-    @BeforeAll
-    public static void setUp() throws IOException {
-        directories = new ArrayList<Path>() {{
-            add(new Path(fsUri.toString(), UUID.randomUUID().toString()));
-            add(new Path(fsUri.toString(), UUID.randomUUID().toString()));
-        }};
-        for (Path dir : directories) {
-            fs.mkdirs(dir);
-        }
-    }
-
-    @Override
-    protected void checkRecords(List<SourceRecord> records) {
-        records.forEach(record -> {
-            assertEquals("topic_test", record.topic());
-            assertNotNull(record.sourcePartition());
-            assertNotNull(record.sourceOffset());
-            assertNotNull(record.value());
-            assertNotNull(((Struct) record.value()).get(TextFileReader.FIELD_NAME_VALUE_DEFAULT));
-        });
-    }
-
-    @Override
-    protected void createDataFile(Path path) throws IOException {
-        File file = fillDataFile();
-        fs.moveFromLocalFile(new Path(file.getAbsolutePath()), path);
-    }
-
-    private File fillDataFile() throws IOException {
-        File txtFile = File.createTempFile("test-", ".txt");
-        try (FileWriter writer = new FileWriter(txtFile)) {
-
-            IntStream.range(0, NUM_RECORDS).forEach(index -> {
-                String value = String.format("%d_%s", index, UUID.randomUUID());
-                try {
-                    writer.append(value + "\n");
-                } catch (IOException ioe) {
-                    throw new RuntimeException(ioe);
-                }
-            });
-        }
-        return txtFile;
-    }
-}
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/task/local/LocalFsSourceTaskTestBase.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/task/local/LocalFsSourceTaskTestBase.java
deleted file mode 100644
index 4cf1074..0000000
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/task/local/LocalFsSourceTaskTestBase.java
+++ /dev/null
@@ -1,29 +0,0 @@
-package com.github.mmolimar.kafka.connect.fs.task.local;
-
-import com.github.mmolimar.kafka.connect.fs.task.FsSourceTaskTestBase;
-import org.apache.commons.io.FileUtils;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.junit.jupiter.api.AfterAll;
-import org.junit.jupiter.api.BeforeAll;
-
-import java.io.IOException;
-import java.nio.file.Files;
-import java.nio.file.Path;
-
-public abstract class LocalFsSourceTaskTestBase extends FsSourceTaskTestBase {
-
-    private static Path localDir;
-
-    @BeforeAll
-    public static void initFs() throws IOException {
-        localDir = Files.createTempDirectory("test-");
-        fsUri = localDir.toUri();
-        fs = FileSystem.newInstance(fsUri, new Configuration());
-    }
-
-    @AfterAll
-    public static void finishFs() throws IOException {
-        FileUtils.deleteDirectory(localDir.toFile());
-    }
-}

From 3655b714dcd9055db3ff488f10d6f0e2883149a3 Mon Sep 17 00:00:00 2001
From: Mario Molina <mmolimar@gmail.com>
Date: Sat, 21 Mar 2020 19:17:54 -0600
Subject: [PATCH 28/51] Change to Open JDK

---
 .travis.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index d97b432..2d90a0c 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,6 +1,6 @@
 language: java
 jdk:
- - oraclejdk8
+ - openjdk8
 sudo: false
 install:
   - mvn test-compile -DskipTests=true -Dmaven.javadoc.skip=true -B -V

From e2429c54c6aac79bd0cdb35c04e590fdaa550306 Mon Sep 17 00:00:00 2001
From: Mario Molina <mmolimar@gmail.com>
Date: Sat, 21 Mar 2020 19:33:58 -0600
Subject: [PATCH 29/51] Fix in doc

---
 docs/source/config_options.rst | 4 ++++
 docs/source/filereaders.rst    | 3 ++-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/docs/source/config_options.rst b/docs/source/config_options.rst
index 5b25ed1..df4a946 100644
--- a/docs/source/config_options.rst
+++ b/docs/source/config_options.rst
@@ -276,6 +276,7 @@ To configure custom properties for this reader, the name you must use is ``json`
   Encoding to use for reading a file. If not specified, the reader will use the default encoding.
 
   * Type: string
+  * Default: based on the locale and charset of the underlying operating system.
   * Importance: medium
 
 ``file_reader.json.compression.type``
@@ -416,6 +417,7 @@ To configure custom properties for this reader, the name you must use is ``delim
   Encoding to use for reading a file. If not specified, the reader will use the default encoding.
 
   * Type: string
+  * Default: based on the locale and charset of the underlying operating system.
   * Importance: medium
 
 ``file_reader.delimited.compression.type``
@@ -536,6 +538,7 @@ To configure custom properties for this reader, the name you must use is ``delim
   Encoding to use for reading a file. If not specified, the reader will use the default encoding.
 
   * Type: string
+  * Default: based on the locale and charset of the underlying operating system.
   * Importance: medium
 
 ``file_reader.delimited.compression.type``
@@ -579,6 +582,7 @@ To configure custom properties for this reader, the name you must use is ``text`
   Encoding to use for reading a file. If not specified, the reader will use the default encoding.
 
   * Type: string
+  * Default: based on the locale and charset of the underlying operating system.
   * Importance: medium
 
 ``file_reader.json.compression.type``
diff --git a/docs/source/filereaders.rst b/docs/source/filereaders.rst
index 8e52634..2b029c5 100644
--- a/docs/source/filereaders.rst
+++ b/docs/source/filereaders.rst
@@ -85,10 +85,11 @@ Agnostic
 Actually, this reader is a wrapper of the readers listing above.
 
 It tries to read any kind of file format using an internal reader based on the file extension,
-applying the proper one (Parquet, Avro, SecuenceFile, CSV, TSV or Text). In case of no
+applying the proper one (Parquet, Avro, SequenceFile, CSV, TSV or Text). In case of no
 extension has been matched, the Text file reader will be applied.
 
 Default extensions for each format (configurable):
+
 * Parquet: ``.parquet``
 * Avro: ``.avro``
 * SequenceFile: ``.seq``

From cc2fe9fa9b4536bfebd231f44f10b24379cd4db4 Mon Sep 17 00:00:00 2001
From: Mario Molina <mmolimar@gmail.com>
Date: Mon, 23 Mar 2020 21:02:37 -0600
Subject: [PATCH 30/51] Enabled set header names in Univocity file readers

---
 .../kafka/connect/fs/file/reader/UnivocityFileReader.java      | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/UnivocityFileReader.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/UnivocityFileReader.java
index caab986..dac8740 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/UnivocityFileReader.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/UnivocityFileReader.java
@@ -37,6 +37,7 @@ abstract class UnivocityFileReader<T extends CommonParserSettings<?>>
     protected static final String FILE_READER_DELIMITED_SETTINGS_FORMAT = FILE_READER_DELIMITED_SETTINGS + "format.";
 
     public static final String FILE_READER_DELIMITED_SETTINGS_HEADER = FILE_READER_DELIMITED_SETTINGS + "header";
+    public static final String FILE_READER_DELIMITED_SETTINGS_HEADER_NAMES = FILE_READER_DELIMITED_SETTINGS + "header_names";
     public static final String FILE_READER_DELIMITED_SETTINGS_LINE_SEPARATOR_DETECTION = FILE_READER_DELIMITED_SETTINGS + "line_separator_detection";
     public static final String FILE_READER_DELIMITED_SETTINGS_NULL_VALUE = FILE_READER_DELIMITED_SETTINGS + "null_value";
     public static final String FILE_READER_DELIMITED_SETTINGS_MAX_COLUMNS = FILE_READER_DELIMITED_SETTINGS + "max_columns";
@@ -100,6 +101,8 @@ protected void configure(Map<String, String> config) {
     private T allSettings(Map<String, String> config) {
         T settings = parserSettings(config);
         settings.setHeaderExtractionEnabled(getBoolean(config, FILE_READER_DELIMITED_SETTINGS_HEADER, false));
+        settings.setHeaders(Optional.ofNullable(config.get(FILE_READER_DELIMITED_SETTINGS_HEADER_NAMES))
+                .map(headers -> headers.split(",")).orElse(null));
         settings.setLineSeparatorDetectionEnabled(getBoolean(config, FILE_READER_DELIMITED_SETTINGS_LINE_SEPARATOR_DETECTION, false));
         settings.setNullValue(config.get(FILE_READER_DELIMITED_SETTINGS_NULL_VALUE));
         settings.setMaxColumns(Integer.parseInt(config.getOrDefault(FILE_READER_DELIMITED_SETTINGS_MAX_COLUMNS, "512")));

From 0bb547bdf8eaffa8ab28a26c7b444f62e8a2f3d8 Mon Sep 17 00:00:00 2001
From: Mario Molina <mmolimar@gmail.com>
Date: Mon, 23 Mar 2020 21:02:58 -0600
Subject: [PATCH 31/51] New fixed-width file reader

---
 .../fs/file/reader/AgnosticFileReader.java    | 36 +++++++------
 .../fs/file/reader/FixedWidthFileReader.java  | 50 +++++++++++++++++
 .../file/reader/AgnosticFileReaderTest.java   | 21 ++++++++
 .../file/reader/FixedWidthFileReaderTest.java | 54 +++++++++++++++++++
 4 files changed, 146 insertions(+), 15 deletions(-)
 create mode 100644 src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/FixedWidthFileReader.java
 create mode 100644 src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/FixedWidthFileReaderTest.java

diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/AgnosticFileReader.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/AgnosticFileReader.java
index 9ee8665..9f5930f 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/AgnosticFileReader.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/AgnosticFileReader.java
@@ -8,8 +8,9 @@
 
 import java.io.IOException;
 import java.util.Arrays;
-import java.util.List;
 import java.util.Map;
+import java.util.Set;
+import java.util.stream.Collectors;
 
 import static com.github.mmolimar.kafka.connect.fs.FsSourceTaskConfig.FILE_READER_PREFIX;
 
@@ -24,11 +25,12 @@ public class AgnosticFileReader extends AbstractFileReader<AgnosticFileReader.Ag
     public static final String FILE_READER_AGNOSTIC_EXTENSIONS_JSON = FILE_READER_AGNOSTIC_EXTENSIONS + "json";
     public static final String FILE_READER_AGNOSTIC_EXTENSIONS_CSV = FILE_READER_AGNOSTIC_EXTENSIONS + "csv";
     public static final String FILE_READER_AGNOSTIC_EXTENSIONS_TSV = FILE_READER_AGNOSTIC_EXTENSIONS + "tsv";
+    public static final String FILE_READER_AGNOSTIC_EXTENSIONS_FIXED = FILE_READER_AGNOSTIC_EXTENSIONS + "fixed";
     public static final String FILE_READER_AGNOSTIC_EXTENSIONS_TEXT = FILE_READER_AGNOSTIC_EXTENSIONS + "text";
 
     private final AbstractFileReader<Object> reader;
-    private List<String> parquetExtensions, avroExtensions, sequenceExtensions,
-            jsonExtensions, csvExtensions, tsvExtensions;
+    private Set<String> parquetExtensions, avroExtensions, sequenceExtensions,
+            jsonExtensions, csvExtensions, tsvExtensions, fixedExtensions;
 
     public AgnosticFileReader(FileSystem fs, Path filePath, Map<String, Object> config) throws IOException {
         super(fs, filePath, new AgnosticAdapter(), config);
@@ -61,6 +63,8 @@ private AbstractFileReader<Object> readerByExtension(FileSystem fs, Path filePat
             clz = CsvFileReader.class;
         } else if (tsvExtensions.contains(extension)) {
             clz = TsvFileReader.class;
+        } else if (fixedExtensions.contains(extension)) {
+            clz = FixedWidthFileReader.class;
         } else {
             clz = TextFileReader.class;
         }
@@ -70,18 +74,20 @@ private AbstractFileReader<Object> readerByExtension(FileSystem fs, Path filePat
 
     @Override
     protected void configure(Map<String, String> config) {
-        this.parquetExtensions = Arrays.asList(config.getOrDefault(FILE_READER_AGNOSTIC_EXTENSIONS_PARQUET, "parquet")
-                .toLowerCase().split(","));
-        this.avroExtensions = Arrays.asList(config.getOrDefault(FILE_READER_AGNOSTIC_EXTENSIONS_AVRO, "avro")
-                .toLowerCase().split(","));
-        this.sequenceExtensions = Arrays.asList(config.getOrDefault(FILE_READER_AGNOSTIC_EXTENSIONS_SEQUENCE, "seq")
-                .toLowerCase().split(","));
-        this.jsonExtensions = Arrays.asList(config.getOrDefault(FILE_READER_AGNOSTIC_EXTENSIONS_JSON, "json")
-                .toLowerCase().split(","));
-        this.csvExtensions = Arrays.asList(config.getOrDefault(FILE_READER_AGNOSTIC_EXTENSIONS_CSV, "csv")
-                .toLowerCase().split(","));
-        this.tsvExtensions = Arrays.asList(config.getOrDefault(FILE_READER_AGNOSTIC_EXTENSIONS_TSV, "tsv")
-                .toLowerCase().split(","));
+        this.parquetExtensions = Arrays.stream(config.getOrDefault(FILE_READER_AGNOSTIC_EXTENSIONS_PARQUET, "parquet")
+                .toLowerCase().split(",")).collect(Collectors.toSet());
+        this.avroExtensions = Arrays.stream(config.getOrDefault(FILE_READER_AGNOSTIC_EXTENSIONS_AVRO, "avro")
+                .toLowerCase().split(",")).collect(Collectors.toSet());
+        this.sequenceExtensions = Arrays.stream(config.getOrDefault(FILE_READER_AGNOSTIC_EXTENSIONS_SEQUENCE, "seq")
+                .toLowerCase().split(",")).collect(Collectors.toSet());
+        this.jsonExtensions = Arrays.stream(config.getOrDefault(FILE_READER_AGNOSTIC_EXTENSIONS_JSON, "json")
+                .toLowerCase().split(",")).collect(Collectors.toSet());
+        this.csvExtensions = Arrays.stream(config.getOrDefault(FILE_READER_AGNOSTIC_EXTENSIONS_CSV, "csv")
+                .toLowerCase().split(",")).collect(Collectors.toSet());
+        this.tsvExtensions = Arrays.stream(config.getOrDefault(FILE_READER_AGNOSTIC_EXTENSIONS_TSV, "tsv")
+                .toLowerCase().split(",")).collect(Collectors.toSet());
+        this.fixedExtensions = Arrays.stream(config.getOrDefault(FILE_READER_AGNOSTIC_EXTENSIONS_FIXED, "fixed")
+                .toLowerCase().split(",")).collect(Collectors.toSet());
     }
 
     @Override
diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/FixedWidthFileReader.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/FixedWidthFileReader.java
new file mode 100644
index 0000000..52f4a95
--- /dev/null
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/FixedWidthFileReader.java
@@ -0,0 +1,50 @@
+package com.github.mmolimar.kafka.connect.fs.file.reader;
+
+import com.univocity.parsers.common.AbstractParser;
+import com.univocity.parsers.fixed.FixedWidthFields;
+import com.univocity.parsers.fixed.FixedWidthParser;
+import com.univocity.parsers.fixed.FixedWidthParserSettings;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Map;
+import java.util.Optional;
+
+public class FixedWidthFileReader extends UnivocityFileReader<FixedWidthParserSettings> {
+
+    public static final String FILE_READER_DELIMITED_SETTINGS_FIELD_LENGTHS = FILE_READER_DELIMITED_SETTINGS + "field_lengths";
+    public static final String FILE_READER_DELIMITED_SETTINGS_KEEP_PADDING = FILE_READER_DELIMITED_SETTINGS + "keep_padding";
+    public static final String FILE_READER_DELIMITED_SETTINGS_PADDING_FOR_HEADERS = FILE_READER_DELIMITED_SETTINGS + "padding_for_headers";
+    public static final String FILE_READER_DELIMITED_SETTINGS_ENDS_ON_NEW_LINE = FILE_READER_DELIMITED_SETTINGS + "ends_on_new_line";
+    public static final String FILE_READER_DELIMITED_SETTINGS_SKIP_TRAILING_CHARS = FILE_READER_DELIMITED_SETTINGS + "skip_trailing_chars";
+
+    public static final String FILE_READER_DELIMITED_SETTINGS_FORMAT_PADDING = FILE_READER_DELIMITED_SETTINGS_FORMAT + "padding";
+
+    public FixedWidthFileReader(FileSystem fs, Path filePath, Map<String, Object> config) throws IOException {
+        super(fs, filePath, config);
+    }
+
+    @Override
+    protected FixedWidthParserSettings parserSettings(Map<String, String> config) {
+        FixedWidthFields fieldLengths = new FixedWidthFields();
+        Optional.ofNullable(config.get(FILE_READER_DELIMITED_SETTINGS_FIELD_LENGTHS))
+                .map(fl -> Arrays.stream(fl.split(",")))
+                .ifPresent(fl -> fl.forEach(field -> fieldLengths.addField(Integer.parseInt(field))));
+
+        FixedWidthParserSettings settings = new FixedWidthParserSettings(fieldLengths);
+        settings.setKeepPadding(getBoolean(config, FILE_READER_DELIMITED_SETTINGS_KEEP_PADDING, false));
+        settings.setUseDefaultPaddingForHeaders(getBoolean(config, FILE_READER_DELIMITED_SETTINGS_PADDING_FOR_HEADERS, true));
+        settings.setRecordEndsOnNewline(getBoolean(config, FILE_READER_DELIMITED_SETTINGS_ENDS_ON_NEW_LINE, true));
+        settings.setSkipTrailingCharsUntilNewline(getBoolean(config, FILE_READER_DELIMITED_SETTINGS_SKIP_TRAILING_CHARS, false));
+        settings.getFormat().setPadding(config.getOrDefault(FILE_READER_DELIMITED_SETTINGS_FORMAT_PADDING, " ").charAt(0));
+
+        return settings;
+    }
+
+    @Override
+    protected AbstractParser<FixedWidthParserSettings> createParser(FixedWidthParserSettings settings) {
+        return new FixedWidthParser(settings);
+    }
+}
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/AgnosticFileReaderTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/AgnosticFileReaderTest.java
index 7f25e66..632b13b 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/AgnosticFileReaderTest.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/AgnosticFileReaderTest.java
@@ -71,6 +71,27 @@ public String getFileExtension() {
         }
     }
 
+    @Nested
+    class AgnosticFixedWidthFileReaderTest extends FixedWidthFileReaderTest {
+
+        @Override
+        protected Map<String, Object> getReaderConfig() {
+            Map<String, Object> config = super.getReaderConfig();
+            config.put(AgnosticFileReader.FILE_READER_AGNOSTIC_EXTENSIONS_FIXED, getFileExtension());
+            return config;
+        }
+
+        @Override
+        public Class<? extends FileReader> getReaderClass() {
+            return AgnosticFileReader.class;
+        }
+
+        @Override
+        public String getFileExtension() {
+            return FILE_EXTENSION;
+        }
+    }
+
     @Nested
     class AgnosticJsonFileReaderTest extends JsonFileReaderTest {
 
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/FixedWidthFileReaderTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/FixedWidthFileReaderTest.java
new file mode 100644
index 0000000..6f0ff01
--- /dev/null
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/FixedWidthFileReaderTest.java
@@ -0,0 +1,54 @@
+package com.github.mmolimar.kafka.connect.fs.file.reader;
+
+import org.apache.hadoop.fs.Path;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.UUID;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+
+public class FixedWidthFileReaderTest extends UnivocityFileReaderTest<FixedWidthFileReader> {
+
+    private static final int[] fieldLengths = new int[]{45, 53, 71, 89};
+
+    @Override
+    protected Path createDataFile(ReaderFsTestConfig fsConfig, Object... args) throws IOException {
+        boolean header = args.length < 1 || (boolean) args[0];
+        CompressionType compression = args.length < 2 ? COMPRESSION_TYPE_DEFAULT : (CompressionType) args[1];
+        File txtFile = File.createTempFile("test-", "." + getFileExtension());
+        try (PrintWriter writer = new PrintWriter(getOutputStream(txtFile, compression))) {
+            if (header) {
+                writer.append(String.format("%-" + fieldLengths[0] + "s", FIELD_COLUMN1) +
+                        String.format("%-" + fieldLengths[1] + "s", FIELD_COLUMN2) +
+                        String.format("%-" + fieldLengths[2] + "s", FIELD_COLUMN3) +
+                        String.format("%-" + fieldLengths[3] + "s", FIELD_COLUMN4) + "\n");
+            }
+            IntStream.range(0, NUM_RECORDS).forEach(index -> {
+                String value = String.format("%d_%s", index, UUID.randomUUID());
+                writer.append(String.format("%-" + fieldLengths[0] + "s", value) +
+                        String.format("%-" + fieldLengths[1] + "s", value) +
+                        String.format("%-" + fieldLengths[2] + "s", value) +
+                        String.format("%-" + fieldLengths[3] + "s", value) + "\n");
+                fsConfig.offsetsByIndex().put(index, (long) index);
+            });
+        }
+        Path path = new Path(new Path(fsConfig.getFsUri()), txtFile.getName());
+        fsConfig.getFs().moveFromLocalFile(new Path(txtFile.getAbsolutePath()), path);
+        return path;
+    }
+
+    @Override
+    protected Map<String, Object> getReaderConfig() {
+        return new HashMap<String, Object>() {{
+            put(FixedWidthFileReader.FILE_READER_DELIMITED_SETTINGS_HEADER, "true");
+            put(FixedWidthFileReader.FILE_READER_DELIMITED_SETTINGS_FIELD_LENGTHS,
+                    Arrays.stream(fieldLengths).mapToObj(String::valueOf).collect(Collectors.joining(",")));
+        }};
+    }
+
+}

From a797971d777f41e13c3cce8ebab68a273f899a1c Mon Sep 17 00:00:00 2001
From: Mario Molina <mmolimar@gmail.com>
Date: Mon, 23 Mar 2020 21:19:10 -0600
Subject: [PATCH 32/51] Added new file reader to the documentation

---
 config/kafka-connect-fs.properties            |   2 +-
 docs/source/config_options.rst                | 196 ++++++++++++++++--
 docs/source/filereaders.rst                   |  16 ++
 .../kafka-connect-fs-version.properties       |   2 +-
 4 files changed, 200 insertions(+), 16 deletions(-)

diff --git a/config/kafka-connect-fs.properties b/config/kafka-connect-fs.properties
index 28ab531..67435af 100644
--- a/config/kafka-connect-fs.properties
+++ b/config/kafka-connect-fs.properties
@@ -5,5 +5,5 @@ fs.uris=file:///data,hdfs://localhost:9000/
 topic=mytopic
 policy.class=com.github.mmolimar.kafka.connect.fs.policy.SimplePolicy
 policy.recursive=true
-policy.regexp=^[0-9]*\.txt$
+policy.regexp=^.*\.txt$
 file_reader.class=com.github.mmolimar.kafka.connect.fs.file.reader.TextFileReader
diff --git a/docs/source/config_options.rst b/docs/source/config_options.rst
index df4a946..70c8b6c 100644
--- a/docs/source/config_options.rst
+++ b/docs/source/config_options.rst
@@ -301,6 +301,13 @@ CSV
 
 To configure custom properties for this reader, the name you must use is ``delimited`` (even though it's for CSV).
 
+``file_reader.delimited.settings.format.delimiter``
+  Field delimiter.
+
+  * Type: string
+  * Default: ``,``
+  * Importance: high
+
 ``file_reader.delimited.settings.header``
   If the file contains header or not.
 
@@ -308,12 +315,12 @@ To configure custom properties for this reader, the name you must use is ``delim
   * Default: ``false``
   * Importance: high
 
-``file_reader.delimited.settings.format.delimiter``
-  Field delimiter.
+``file_reader.delimited.settings.header_names``
+  A comma-separated list of ordered field names to set when reading a file.
 
-  * Type: string
-  * Default: ``,``
-  * Importance: high
+  * Type: string[]
+  * Default: ``null``
+  * Importance: medium
 
 ``file_reader.delimited.settings.null_value``
   Default value for ``null`` values.
@@ -362,14 +369,14 @@ To configure custom properties for this reader, the name you must use is ``delim
 
   * Type: boolean
   * Default: ``false``
-  * Importance: medium
+  * Importance: low
 
 ``file_reader.delimited.settings.delimiter_detection``
   If the reader should detect the delimiter automatically.
 
   * Type: boolean
   * Default: ``false``
-  * Importance: medium
+  * Importance: low
 
 ``file_reader.delimited.settings.ignore_leading_whitespaces``
   Flag to enable/disable skipping leading whitespaces from values.
@@ -449,6 +456,13 @@ To configure custom properties for this reader, the name you must use is ``delim
   * Default: ``false``
   * Importance: high
 
+``file_reader.delimited.settings.header_names``
+  A comma-separated list of ordered field names to set when reading a file.
+
+  * Type: string[]
+  * Default: ``null``
+  * Importance: medium
+
 ``file_reader.delimited.settings.null_value``
   Default value for ``null`` values.
 
@@ -484,13 +498,6 @@ To configure custom properties for this reader, the name you must use is ``delim
   * Default: ``0``
   * Importance: low
 
-``file_reader.delimited.settings.line_separator_detection``
-  If the reader should detect the line separator automatically.
-
-  * Type: boolean
-  * Default: ``false``
-  * Importance: medium
-
 ``file_reader.delimited.settings.line_separator_detection``
   If the reader should detect the line separator automatically.
 
@@ -506,6 +513,13 @@ To configure custom properties for this reader, the name you must use is ``delim
   * Default: ``true``
   * Importance: low
 
+``file_reader.delimited.settings.ignore_leading_whitespaces``
+  Flag to enable/disable skipping leading whitespaces from values.
+
+  * Type: boolean
+  * Default: ``true``
+  * Importance: low
+
 ``file_reader.delimited.settings.ignore_trailing_whitespaces``
   Flag to enable/disable skipping trailing whitespaces from values.
 
@@ -556,6 +570,153 @@ To configure custom properties for this reader, the name you must use is ``delim
   * Default: ``true``
   * Importance: low
 
+.. _config_options-filereaders-fixedwidth:
+
+FixedWidth
+--------------------------------------------
+
+To configure custom properties for this reader, the name you must use is ``delimited`` (even though it's for FixedWidth).
+
+``file_reader.delimited.settings.field_lengths``
+  A comma-separated ordered list of integers with the lengths of each field.
+
+  * Type: int[]
+  * Importance: high
+
+``file_reader.delimited.settings.header``
+  If the file contains header or not.
+
+  * Type: boolean
+  * Default: ``false``
+  * Importance: high
+
+``file_reader.delimited.settings.header_names``
+  A comma-separated list of ordered field names to set when reading a file.
+
+  * Type: string[]
+  * Default: ``null``
+  * Importance: medium
+
+``file_reader.delimited.settings.keep_padding``
+  If the padding character should be kept in each value.
+
+  * Type: boolean
+  * Default: ``false``
+  * Importance: medium
+
+``file_reader.delimited.settings.padding_for_headers``
+  If headers have the default padding specified.
+
+  * Type: boolean
+  * Default: ``true``
+  * Importance: medium
+
+``file_reader.delimited.settings.null_value``
+  Default value for ``null`` values.
+
+  * Type: string
+  * Default: ``null``
+  * Importance: medium
+
+``file_reader.delimited.settings.format.ends_on_new_line``
+  Line separator to be used.
+
+  * Type: boolean
+  * Default: ``true``
+  * Importance: medium
+
+``file_reader.delimited.settings.format.line_separator``
+  Line separator to be used.
+
+  * Type: string
+  * Default: ``\n``
+  * Importance: medium
+
+``file_reader.delimited.settings.format.padding``
+  The padding character used to represent unwritten spaces.
+
+  * Type: char
+  * Default: `` ``
+  * Importance: medium
+
+``file_reader.delimited.settings.max_columns``
+  Default value for ``null`` values.
+
+  * Type: int
+  * Default: ``512``
+  * Importance: low
+
+``file_reader.delimited.settings.max_chars_per_column``
+  Default value for ``null`` values.
+
+  * Type: int
+  * Default: ``4096``
+  * Importance: low
+
+``file_reader.delimited.settings.skip_trailing_chars``
+  If the trailing characters beyond the record's length should be skipped.
+
+  * Type: boolean
+  * Default: ``false``
+  * Importance: low
+
+``file_reader.delimited.settings.rows_to_skip``
+  Number of rows to skip.
+
+  * Type: long
+  * Default: ``0``
+  * Importance: low
+
+``file_reader.delimited.settings.line_separator_detection``
+  If the reader should detect the line separator automatically.
+
+  * Type: boolean
+  * Default: ``false``
+  * Importance: low
+
+``file_reader.delimited.settings.ignore_leading_whitespaces``
+  Flag to enable/disable skipping leading whitespaces from values.
+
+  * Type: boolean
+  * Default: ``true``
+  * Importance: low
+
+``file_reader.delimited.settings.ignore_trailing_whitespaces``
+  Flag to enable/disable skipping trailing whitespaces from values.
+
+  * Type: boolean
+  * Default: ``true``
+  * Importance: low
+
+``file_reader.delimited.settings.format.comment``
+  Character that represents a line comment at the beginning of a line.
+
+  * Type: char
+  * Default: ``#``
+  * Importance: low
+
+``file_reader.delimited.encoding``
+  Encoding to use for reading a file. If not specified, the reader will use the default encoding.
+
+  * Type: string
+  * Default: based on the locale and charset of the underlying operating system.
+  * Importance: medium
+
+``file_reader.delimited.compression.type``
+  Compression type to use when reading a file.
+
+  * Type: enum (available values ``bzip2``, ``gzip`` and ``none``)
+  * Default: ``none``
+  * Importance: medium
+
+``file_reader.delimited.compression.concatenated``
+  Flag to specify if the decompression of the reader will finish at the end of the file or after
+  the first compressed stream.
+
+  * Type: boolean
+  * Default: ``true``
+  * Importance: low
+
 .. _config_options-filereaders-text:
 
 Text
@@ -649,6 +810,13 @@ To configure custom properties for this reader, the name you must use is ``agnos
   * Default: ``tsv``
   * Importance: medium
 
+``file_reader.agnostic.extensions.fixed``
+ A comma-separated string list with the accepted extensions for fixed-width files.
+
+  * Type: string
+  * Default: ``fixed``
+  * Importance: medium
+
 .. note:: The Agnostic reader uses the previous ones as inner readers. So, in case of using this
           reader, you'll probably need to include also the specified properties for those
           readers in the connector configuration as well.
diff --git a/docs/source/filereaders.rst b/docs/source/filereaders.rst
index 2b029c5..f887499 100644
--- a/docs/source/filereaders.rst
+++ b/docs/source/filereaders.rst
@@ -57,6 +57,8 @@ in the message sent to Kafka. If there is no header, the value of each column wi
 the field named ``column_N`` (**N** represents the column index) in the message.
 Also, the token delimiter for columns is configurable.
 
+This reader is based on the `Univocity CSV parser <https://www.univocity.com/pages/univocity_parsers_csv.html#working-with-csv>`__.
+
 More information about properties of this file reader :ref:`here<config_options-filereaders-csv>`.
 
 TSV
@@ -66,8 +68,21 @@ TSV file reader using a tab (``\t``) to distinguish different columns on each li
 
 Its behaviour is the same one for the CSV file reader regarding the header and the column names.
 
+This reader is based on the `Univocity TSV parser <https://www.univocity.com/pages/univocity_parsers_tsv.html#working-with-tsv>`__.
+
 More information about properties of this file reader :ref:`here<config_options-filereaders-tsv>`.
 
+FixedWidth
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+FixedWidth is a plain text file reader which distinguishes each column based on the length of each field.
+
+Its behaviour is the same one for the CSV/TSV file readers regarding the header and the column names.
+
+This reader is based on the `Univocity Fixed-Width parser <https://www.univocity.com/pages/univocity_parsers_fixed_width.html#working-with-fixed-width>`__.
+
+More information about properties of this file reader :ref:`here<config_options-filereaders-fixedwidth>`.
+
 Text
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
@@ -96,6 +111,7 @@ Default extensions for each format (configurable):
 * JSON: ``.json``
 * CSV: ``.csv``
 * TSV: ``.tsv``
+* FixedWidth: ``.fixed``
 * Text: any other sort of file extension.
 
 More information about properties of this file reader :ref:`here<config_options-filereaders-agnostic>`.
diff --git a/src/main/resources/kafka-connect-fs-version.properties b/src/main/resources/kafka-connect-fs-version.properties
index e5683df..defbd48 100644
--- a/src/main/resources/kafka-connect-fs-version.properties
+++ b/src/main/resources/kafka-connect-fs-version.properties
@@ -1 +1 @@
-version=${project.version}
\ No newline at end of file
+version=${project.version}

From 7e15fb90c441f5a43d5ed7fa1f2c0890cdae5af3 Mon Sep 17 00:00:00 2001
From: Mario Molina <mmolimar@gmail.com>
Date: Sun, 5 Apr 2020 20:21:58 -0500
Subject: [PATCH 33/51] Prevent stack overflow error when listing files in the
 FS

---
 .../connect/fs/policy/AbstractPolicy.java     | 21 ++++++---
 .../kafka/connect/fs/util/TailCall.java       | 44 +++++++++++++++++++
 2 files changed, 58 insertions(+), 7 deletions(-)
 create mode 100644 src/main/java/com/github/mmolimar/kafka/connect/fs/util/TailCall.java

diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/policy/AbstractPolicy.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/policy/AbstractPolicy.java
index 849692a..37da859 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/policy/AbstractPolicy.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/policy/AbstractPolicy.java
@@ -4,6 +4,7 @@
 import com.github.mmolimar.kafka.connect.fs.file.FileMetadata;
 import com.github.mmolimar.kafka.connect.fs.file.reader.FileReader;
 import com.github.mmolimar.kafka.connect.fs.util.ReflectionUtils;
+import com.github.mmolimar.kafka.connect.fs.util.TailCall;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.LocatedFileStatus;
@@ -134,25 +135,31 @@ public Iterator<FileMetadata> listFiles(FileSystem fs) throws IOException {
             RemoteIterator<LocatedFileStatus> it = fs.listFiles(fs.getWorkingDirectory(), recursive);
             LocatedFileStatus current = null;
 
-            @Override
-            public boolean hasNext() {
+            private TailCall<Boolean> hasNextRec() {
                 try {
                     if (current == null) {
-                        if (!it.hasNext()) return false;
+                        if (!it.hasNext()) {
+                            return TailCall.done(false);
+                        }
                         current = it.next();
-                        return hasNext();
+                        return this::hasNextRec;
                     }
-                    if (current.isFile() &&
+                    if (current.isFile() &
                             fileRegexp.matcher(current.getPath().getName()).find()) {
-                        return true;
+                        return TailCall.done(true);
                     }
                     current = null;
-                    return hasNext();
+                    return this::hasNextRec;
                 } catch (IOException ioe) {
                     throw new ConnectException(ioe);
                 }
             }
 
+            @Override
+            public boolean hasNext() {
+                return hasNextRec().invoke();
+            }
+
             @Override
             public FileMetadata next() {
                 if (!hasNext() && current == null) {
diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/util/TailCall.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/util/TailCall.java
new file mode 100644
index 0000000..5b82099
--- /dev/null
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/util/TailCall.java
@@ -0,0 +1,44 @@
+package com.github.mmolimar.kafka.connect.fs.util;
+
+import java.util.stream.Stream;
+
+@FunctionalInterface
+public interface TailCall<T> {
+
+    TailCall<T> apply();
+
+    default boolean completed() {
+        return false;
+    }
+
+    default T result() {
+        throw new IllegalStateException("Call does not have a value.");
+    }
+
+    default T invoke() {
+        return Stream.iterate(this, TailCall::apply)
+                .filter(TailCall::completed)
+                .findFirst()
+                .get()
+                .result();
+    }
+
+    static <T> TailCall<T> done(final T value) {
+        return new TailCall<T>() {
+            @Override
+            public boolean completed() {
+                return true;
+            }
+
+            @Override
+            public T result() {
+                return value;
+            }
+
+            @Override
+            public TailCall<T> apply() {
+                throw new IllegalStateException("Done cannot be applied.");
+            }
+        };
+    }
+}

From 709b3808904948352c0dccc5646f13945a333c2c Mon Sep 17 00:00:00 2001
From: Mario Molina <mmolimar@gmail.com>
Date: Sun, 12 Apr 2020 15:07:01 -0500
Subject: [PATCH 34/51] Updating Maven deps

---
 pom.xml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pom.xml b/pom.xml
index fcdad09..130728a 100644
--- a/pom.xml
+++ b/pom.xml
@@ -17,9 +17,9 @@
         <parquet.version>1.11.0</parquet.version>
         <univocity.version>2.8.4</univocity.version>
         <cron-utils.version>9.0.2</cron-utils.version>
-        <junit-jupiter.version>5.6.0</junit-jupiter.version>
+        <junit-jupiter.version>5.6.2</junit-jupiter.version>
         <easymock.version>4.2</easymock.version>
-        <powermock.version>2.0.5</powermock.version>
+        <powermock.version>2.0.7</powermock.version>
         <maven-compiler.source>1.8</maven-compiler.source>
         <maven-compiler.target>${maven-compiler.source}</maven-compiler.target>
         <maven-jar-plugin.version>3.2.0</maven-jar-plugin.version>

From b7ff3e7cdbf6ed8008623c159764e71590db706c Mon Sep 17 00:00:00 2001
From: Mario Molina <mmolimar@gmail.com>
Date: Sun, 12 Apr 2020 17:00:49 -0500
Subject: [PATCH 35/51] Refactor offset management

---
 .../kafka/connect/fs/FsSourceTask.java        | 13 +---
 .../kafka/connect/fs/file/Offset.java         |  7 ---
 .../fs/file/reader/AbstractFileReader.java    | 16 +++++
 .../fs/file/reader/AgnosticFileReader.java    |  5 +-
 .../fs/file/reader/AvroFileReader.java        | 37 ++----------
 .../connect/fs/file/reader/FileReader.java    |  7 +--
 .../fs/file/reader/JsonFileReader.java        |  8 +--
 .../fs/file/reader/ParquetFileReader.java     | 43 ++------------
 .../fs/file/reader/SequenceFileReader.java    | 47 +++------------
 .../fs/file/reader/TextFileReader.java        | 41 ++-----------
 .../fs/file/reader/UnivocityFileReader.java   | 45 +++-----------
 .../connect/fs/policy/AbstractPolicy.java     | 27 ++++-----
 .../file/reader/AgnosticFileReaderTest.java   |  5 ++
 .../fs/file/reader/AvroFileReaderTest.java    |  6 --
 .../fs/file/reader/FileReaderTestBase.java    | 23 +++-----
 .../fs/file/reader/JsonFileReaderTest.java    |  6 --
 .../fs/file/reader/ParquetFileReaderTest.java |  6 --
 .../file/reader/SequenceFileReaderTest.java   | 59 ++++++++++++++++---
 .../fs/file/reader/TextFileReaderTest.java    |  6 --
 .../file/reader/UnivocityFileReaderTest.java  | 20 +++----
 20 files changed, 142 insertions(+), 285 deletions(-)
 delete mode 100644 src/main/java/com/github/mmolimar/kafka/connect/fs/file/Offset.java

diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/FsSourceTask.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/FsSourceTask.java
index 1fdc9b5..db8188f 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/FsSourceTask.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/FsSourceTask.java
@@ -1,7 +1,6 @@
 package com.github.mmolimar.kafka.connect.fs;
 
 import com.github.mmolimar.kafka.connect.fs.file.FileMetadata;
-import com.github.mmolimar.kafka.connect.fs.file.Offset;
 import com.github.mmolimar.kafka.connect.fs.file.reader.FileReader;
 import com.github.mmolimar.kafka.connect.fs.policy.Policy;
 import com.github.mmolimar.kafka.connect.fs.util.ReflectionUtils;
@@ -101,16 +100,10 @@ private <T> Stream<T> asStream(Iterator<T> src) {
         return StreamSupport.stream(iterable.spliterator(), false);
     }
 
-    private SourceRecord convert(FileMetadata metadata, Offset offset, Struct struct) {
+    private SourceRecord convert(FileMetadata metadata, long offset, Struct struct) {
         return new SourceRecord(
-                new HashMap<String, Object>() {
-                    {
-                        put("path", metadata.getPath());
-                        //TODO manage blocks
-                        //put("blocks", metadata.getBlocks().toString());
-                    }
-                },
-                Collections.singletonMap("offset", offset.getRecordOffset()),
+                Collections.singletonMap("path", metadata.getPath()),
+                Collections.singletonMap("offset", offset),
                 config.getTopic(),
                 struct.schema(),
                 struct
diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/Offset.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/Offset.java
deleted file mode 100644
index ca1d530..0000000
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/Offset.java
+++ /dev/null
@@ -1,7 +0,0 @@
-package com.github.mmolimar.kafka.connect.fs.file;
-
-public interface Offset {
-
-    long getRecordOffset();
-
-}
diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/AbstractFileReader.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/AbstractFileReader.java
index dae25af..a54814b 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/AbstractFileReader.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/AbstractFileReader.java
@@ -17,6 +17,7 @@ public abstract class AbstractFileReader<T> implements FileReader {
     private final FileSystem fs;
     private final Path filePath;
     private final ReaderAdapter<T> adapter;
+    private long offset;
 
     public AbstractFileReader(FileSystem fs, Path filePath, ReaderAdapter<T> adapter, Map<String, Object> config) {
         if (fs == null || filePath == null) {
@@ -25,6 +26,7 @@ public AbstractFileReader(FileSystem fs, Path filePath, ReaderAdapter<T> adapter
         this.fs = fs;
         this.filePath = filePath;
         this.adapter = adapter;
+        this.offset = 0;
 
         configure(readerConfig(config));
     }
@@ -47,10 +49,24 @@ public Path getFilePath() {
         return filePath;
     }
 
+    @Override
     public final Struct next() {
         return adapter.apply(nextRecord());
     }
 
+    @Override
+    public long currentOffset() {
+        return offset;
+    }
+
+    protected void incrementOffset() {
+        this.offset++;
+    }
+
+    protected void setOffset(long offset) {
+        this.offset = offset;
+    }
+
     protected abstract T nextRecord();
 
     protected ReaderAdapter<T> getAdapter() {
diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/AgnosticFileReader.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/AgnosticFileReader.java
index 9f5930f..478dacb 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/AgnosticFileReader.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/AgnosticFileReader.java
@@ -1,6 +1,5 @@
 package com.github.mmolimar.kafka.connect.fs.file.reader;
 
-import com.github.mmolimar.kafka.connect.fs.file.Offset;
 import com.github.mmolimar.kafka.connect.fs.util.ReflectionUtils;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -96,12 +95,12 @@ public boolean hasNext() {
     }
 
     @Override
-    public void seek(Offset offset) {
+    public void seek(long offset) {
         reader.seek(offset);
     }
 
     @Override
-    public Offset currentOffset() {
+    public long currentOffset() {
         return reader.currentOffset();
     }
 
diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/AvroFileReader.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/AvroFileReader.java
index 2438f51..589ded7 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/AvroFileReader.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/AvroFileReader.java
@@ -1,6 +1,5 @@
 package com.github.mmolimar.kafka.connect.fs.file.reader;
 
-import com.github.mmolimar.kafka.connect.fs.file.Offset;
 import io.confluent.connect.avro.AvroData;
 import org.apache.avro.AvroRuntimeException;
 import org.apache.avro.Schema;
@@ -26,7 +25,6 @@ public class AvroFileReader extends AbstractFileReader<GenericRecord> {
 
     public static final String FILE_READER_AVRO_SCHEMA = FILE_READER_AVRO + "schema";
 
-    private final AvroOffset offset;
     private final DataFileReader<GenericRecord> reader;
     private Schema schema;
 
@@ -39,7 +37,6 @@ public AvroFileReader(FileSystem fs, Path filePath, Map<String, Object> config)
         } else {
             this.reader = new DataFileReader<>(input, new SpecificDatumReader<>(this.schema));
         }
-        this.offset = new AvroOffset(0);
     }
 
     @Override
@@ -62,7 +59,7 @@ public boolean hasNext() {
     protected GenericRecord nextRecord() {
         try {
             GenericRecord record = reader.next();
-            this.offset.inc();
+            incrementOffset();
 
             return record;
         } catch (AvroRuntimeException are) {
@@ -71,47 +68,21 @@ protected GenericRecord nextRecord() {
     }
 
     @Override
-    public void seek(Offset offset) {
+    public void seek(long offset) {
         try {
-            reader.sync(offset.getRecordOffset());
-            this.offset.setOffset(reader.previousSync() - 16);
+            reader.sync(offset);
+            setOffset(reader.previousSync() - 16L);
         } catch (IOException ioe) {
             throw new ConnectException("Error seeking file " + getFilePath(), ioe);
         }
     }
 
-    @Override
-    public Offset currentOffset() {
-        return offset;
-    }
-
     @Override
     public void close() throws IOException {
         reader.sync(0);
         reader.close();
     }
 
-    public static class AvroOffset implements Offset {
-        private long offset;
-
-        public AvroOffset(long offset) {
-            this.offset = offset;
-        }
-
-        public void setOffset(long offset) {
-            this.offset = offset;
-        }
-
-        void inc() {
-            this.offset++;
-        }
-
-        @Override
-        public long getRecordOffset() {
-            return offset;
-        }
-    }
-
     static class GenericRecordToStruct implements ReaderAdapter<GenericRecord> {
 
         private static final int CACHE_SIZE = 100;
diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/FileReader.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/FileReader.java
index 521ddbb..518e9f8 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/FileReader.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/FileReader.java
@@ -1,6 +1,5 @@
 package com.github.mmolimar.kafka.connect.fs.file.reader;
 
-import com.github.mmolimar.kafka.connect.fs.file.Offset;
 import org.apache.hadoop.fs.Path;
 import org.apache.kafka.connect.data.Struct;
 
@@ -16,12 +15,12 @@ public interface FileReader extends Iterator<Struct>, Closeable {
 
     Struct next();
 
-    void seek(Offset offset);
+    void seek(long offset);
 
-    Offset currentOffset();
+    long currentOffset();
 }
 
 @FunctionalInterface
 interface ReaderAdapter<T> extends Function<T, Struct> {
 
-}
\ No newline at end of file
+}
diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/JsonFileReader.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/JsonFileReader.java
index 76db116..700d38b 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/JsonFileReader.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/JsonFileReader.java
@@ -1,10 +1,8 @@
 package com.github.mmolimar.kafka.connect.fs.file.reader;
 
-import com.fasterxml.jackson.core.JsonProcessingException;
 import com.fasterxml.jackson.databind.DeserializationFeature;
 import com.fasterxml.jackson.databind.JsonNode;
 import com.fasterxml.jackson.databind.ObjectMapper;
-import com.github.mmolimar.kafka.connect.fs.file.Offset;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.kafka.connect.data.Schema;
@@ -50,7 +48,7 @@ public JsonFileReader(FileSystem fs, Path filePath, Map<String, Object> config)
             String line = inner.nextRecord().getValue();
             this.schema = extractSchema(mapper.readTree(line));
             //back to the first line
-            inner.seek(() -> 0);
+            inner.seek(0);
         } else {
             this.schema = SchemaBuilder.struct().build();
         }
@@ -91,12 +89,12 @@ public boolean hasNext() {
     }
 
     @Override
-    public void seek(Offset offset) {
+    public void seek(long offset) {
         inner.seek(offset);
     }
 
     @Override
-    public Offset currentOffset() {
+    public long currentOffset() {
         return inner.currentOffset();
     }
 
diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/ParquetFileReader.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/ParquetFileReader.java
index cf12483..7253a4b 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/ParquetFileReader.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/ParquetFileReader.java
@@ -1,6 +1,5 @@
 package com.github.mmolimar.kafka.connect.fs.file.reader;
 
-import com.github.mmolimar.kafka.connect.fs.file.Offset;
 import io.confluent.connect.avro.AvroData;
 import org.apache.avro.Schema;
 import org.apache.avro.generic.GenericData;
@@ -29,19 +28,15 @@ public class ParquetFileReader extends AbstractFileReader<GenericRecord> {
     public static final String FILE_READER_PARQUET_SCHEMA = FILE_READER_PARQUET + "schema";
     public static final String FILE_READER_PARQUET_PROJECTION = FILE_READER_PARQUET + "projection";
 
-    private final ParquetOffset offset;
-
     private ParquetReader<GenericRecord> reader;
     private GenericRecord currentRecord;
     private Schema schema;
     private Schema projection;
     private boolean closed;
 
-
     public ParquetFileReader(FileSystem fs, Path filePath, Map<String, Object> config) throws IOException {
         super(fs, filePath, new GenericRecordToStruct(), config);
 
-        this.offset = new ParquetOffset(0);
         this.reader = initReader();
         this.closed = false;
     }
@@ -94,64 +89,38 @@ record = new GenericData.Record(this.projection);
             record = currentRecord;
         }
         currentRecord = null;
-        offset.inc();
+        incrementOffset();
         return record;
     }
 
     @Override
-    public void seek(Offset offset) {
+    public void seek(long offset) {
         if (closed) {
             throw new ConnectException("Stream is closed!");
         }
-        if (offset.getRecordOffset() < 0) {
+        if (offset < 0) {
             throw new IllegalArgumentException("Record offset must be greater than 0");
         }
-        if (this.offset.getRecordOffset() > offset.getRecordOffset()) {
+        if (currentOffset() > offset) {
             try {
                 this.reader = initReader();
-                this.offset.setOffset(0);
+                setOffset(0);
                 this.closed = false;
             } catch (IOException ioe) {
                 throw new ConnectException("Error initializing parquet reader", ioe);
             }
         }
-        while (hasNext() && this.offset.getRecordOffset() < offset.getRecordOffset()) {
+        while (hasNext() && currentOffset() < offset) {
             nextRecord();
         }
     }
 
-    @Override
-    public Offset currentOffset() {
-        return offset;
-    }
-
     @Override
     public void close() throws IOException {
         this.closed = true;
         reader.close();
     }
 
-    public static class ParquetOffset implements Offset {
-        private long offset;
-
-        public ParquetOffset(long offset) {
-            this.offset = offset;
-        }
-
-        public void setOffset(long offset) {
-            this.offset = offset;
-        }
-
-        void inc() {
-            this.offset++;
-        }
-
-        @Override
-        public long getRecordOffset() {
-            return offset;
-        }
-    }
-
     static class GenericRecordToStruct implements ReaderAdapter<GenericRecord> {
         private static final int CACHE_SIZE = 100;
         private final AvroData avroData;
diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/SequenceFileReader.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/SequenceFileReader.java
index bdde95b..e21bdf2 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/SequenceFileReader.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/SequenceFileReader.java
@@ -1,6 +1,5 @@
 package com.github.mmolimar.kafka.connect.fs.file.reader;
 
-import com.github.mmolimar.kafka.connect.fs.file.Offset;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.*;
@@ -32,7 +31,6 @@ public class SequenceFileReader extends AbstractFileReader<SequenceFileReader.Se
 
     private final SequenceFile.Reader reader;
     private final Writable key, value;
-    private final SeqOffset offset;
     private final Schema schema;
     private String keyFieldName, valueFieldName;
     private long recordIndex, hasNextIndex;
@@ -51,7 +49,6 @@ public SequenceFileReader(FileSystem fs, Path filePath, Map<String, Object> conf
                 .field(keyFieldName, getSchema(this.key))
                 .field(valueFieldName, getSchema(this.value))
                 .build();
-        this.offset = new SeqOffset(0);
         this.recordIndex = this.hasNextIndex = -1;
         this.hasNext = false;
         this.closed = false;
@@ -63,7 +60,7 @@ protected void configure(Map<String, String> config) {
         this.valueFieldName = config.getOrDefault(FILE_READER_SEQUENCE_FIELD_NAME_VALUE, FIELD_NAME_VALUE_DEFAULT);
     }
 
-    private Schema getSchema(Writable writable) {
+    Schema getSchema(Writable writable) {
         if (writable instanceof ByteWritable) {
             return SchemaBuilder.INT8_SCHEMA;
         } else if (writable instanceof ShortWritable) {
@@ -75,7 +72,7 @@ private Schema getSchema(Writable writable) {
         } else if (writable instanceof FloatWritable) {
             return SchemaBuilder.FLOAT32_SCHEMA;
         } else if (writable instanceof DoubleWritable) {
-            return SchemaBuilder.INT64_SCHEMA;
+            return SchemaBuilder.FLOAT64_SCHEMA;
         } else if (writable instanceof BytesWritable) {
             return SchemaBuilder.BYTES_SCHEMA;
         } else if (writable instanceof BooleanWritable) {
@@ -90,7 +87,7 @@ public boolean hasNext() {
         try {
             if (hasNextIndex == -1 || hasNextIndex == recordIndex) {
                 hasNextIndex++;
-                offset.inc();
+                incrementOffset();
                 hasNext = reader.next(key, value);
             }
             return hasNext;
@@ -111,52 +108,26 @@ protected SequenceRecord<Writable, Writable> nextRecord() {
     }
 
     @Override
-    public void seek(Offset offset) {
-        if (offset.getRecordOffset() < 0) {
+    public void seek(long offset) {
+        if (offset < 0) {
             throw new IllegalArgumentException("Record offset must be greater than 0");
         }
         try {
-            reader.sync(offset.getRecordOffset());
-            hasNextIndex = recordIndex = offset.getRecordOffset();
+            reader.sync(offset);
+            hasNextIndex = recordIndex = offset;
             hasNext = false;
-            this.offset.setOffset(offset.getRecordOffset() - 1);
+            setOffset(offset - 1);
         } catch (IOException ioe) {
             throw new ConnectException("Error seeking file " + getFilePath(), ioe);
         }
     }
 
-    @Override
-    public Offset currentOffset() {
-        return offset;
-    }
-
     @Override
     public void close() throws IOException {
         closed = true;
         reader.close();
     }
 
-    public static class SeqOffset implements Offset {
-        private long offset;
-
-        public SeqOffset(long offset) {
-            this.offset = offset;
-        }
-
-        public void setOffset(long offset) {
-            this.offset = offset;
-        }
-
-        void inc() {
-            this.offset++;
-        }
-
-        @Override
-        public long getRecordOffset() {
-            return offset;
-        }
-    }
-
     static class SeqToStruct implements ReaderAdapter<SequenceRecord<Writable, Writable>> {
 
         @Override
@@ -166,7 +137,7 @@ public Struct apply(SequenceRecord<Writable, Writable> record) {
                     .put(record.valueFieldName, toSchemaValue(record.value));
         }
 
-        private Object toSchemaValue(Writable writable) {
+        Object toSchemaValue(Writable writable) {
             if (writable instanceof ByteWritable) {
                 return ((ByteWritable) writable).get();
             } else if (writable instanceof ShortWritable) {
diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/TextFileReader.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/TextFileReader.java
index a12323e..060de36 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/TextFileReader.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/TextFileReader.java
@@ -1,6 +1,5 @@
 package com.github.mmolimar.kafka.connect.fs.file.reader;
 
-import com.github.mmolimar.kafka.connect.fs.file.Offset;
 import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
 import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
 import org.apache.hadoop.fs.FileSystem;
@@ -33,7 +32,6 @@ public class TextFileReader extends AbstractFileReader<TextFileReader.TextRecord
     public static final String FILE_READER_TEXT_COMPRESSION_CONCATENATED = FILE_READER_TEXT_COMPRESSION + "concatenated";
     public static final String FILE_READER_TEXT_ENCODING = FILE_READER_TEXT + "encoding";
 
-    private final TextOffset offset;
     private String current;
     private boolean finished = false;
     private LineNumberReader reader;
@@ -45,7 +43,6 @@ public class TextFileReader extends AbstractFileReader<TextFileReader.TextRecord
     public TextFileReader(FileSystem fs, Path filePath, Map<String, Object> config) throws IOException {
         super(fs, filePath, new TxtToStruct(), config);
         this.reader = new LineNumberReader(getFileReader(fs.open(filePath)));
-        this.offset = new TextOffset(0);
     }
 
     @Override
@@ -116,62 +113,36 @@ protected TextRecord nextRecord() {
         }
         String aux = current;
         current = null;
-        offset.inc();
+        incrementOffset();
         return new TextRecord(schema, aux);
     }
 
     @Override
-    public void seek(Offset offset) {
-        if (offset.getRecordOffset() < 0) {
+    public void seek(long offset) {
+        if (offset < 0) {
             throw new IllegalArgumentException("Record offset must be greater than 0");
         }
         try {
             current = null;
-            if (offset.getRecordOffset() < reader.getLineNumber()) {
+            if (offset < reader.getLineNumber()) {
                 finished = false;
                 reader.close();
                 reader = new LineNumberReader(getFileReader(getFs().open(getFilePath())));
             }
-            while (reader.getLineNumber() < offset.getRecordOffset()) {
+            while (reader.getLineNumber() < offset) {
                 reader.readLine();
             }
-            this.offset.setOffset(reader.getLineNumber());
+            setOffset(reader.getLineNumber());
         } catch (IOException ioe) {
             throw new ConnectException("Error seeking file " + getFilePath(), ioe);
         }
     }
 
-    @Override
-    public Offset currentOffset() {
-        return offset;
-    }
-
     @Override
     public void close() throws IOException {
         reader.close();
     }
 
-    public static class TextOffset implements Offset {
-        private long offset;
-
-        public TextOffset(long offset) {
-            this.offset = offset;
-        }
-
-        public void setOffset(long offset) {
-            this.offset = offset;
-        }
-
-        void inc() {
-            this.offset++;
-        }
-
-        @Override
-        public long getRecordOffset() {
-            return offset;
-        }
-    }
-
     static class TxtToStruct implements ReaderAdapter<TextRecord> {
 
         @Override
diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/UnivocityFileReader.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/UnivocityFileReader.java
index dac8740..050ba4c 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/UnivocityFileReader.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/UnivocityFileReader.java
@@ -1,6 +1,5 @@
 package com.github.mmolimar.kafka.connect.fs.file.reader;
 
-import com.github.mmolimar.kafka.connect.fs.file.Offset;
 import com.univocity.parsers.common.AbstractParser;
 import com.univocity.parsers.common.CommonParserSettings;
 import com.univocity.parsers.common.ParsingContext;
@@ -55,7 +54,6 @@ abstract class UnivocityFileReader<T extends CommonParserSettings<?>>
 
     private static final String DEFAULT_COLUMN_NAME = "column_";
 
-    private final UnivocityOffset offset;
     private T settings;
     private Schema schema;
     private Charset charset;
@@ -67,7 +65,6 @@ abstract class UnivocityFileReader<T extends CommonParserSettings<?>>
     public UnivocityFileReader(FileSystem fs, Path filePath, Map<String, Object> config) throws IOException {
         super(fs, filePath, new UnivocityToStruct(), config);
 
-        this.offset = new UnivocityOffset(0);
         this.iterator = iterateRecords();
         this.schema = buildSchema(this.iterator, settings.isHeaderExtractionEnabled());
     }
@@ -78,7 +75,7 @@ private Schema buildSchema(ResultIterator<Record, ParsingContext> it, boolean ha
             Record first = it.next();
             IntStream.range(0, first.getValues().length)
                     .forEach(index -> builder.field(DEFAULT_COLUMN_NAME + ++index, SchemaBuilder.STRING_SCHEMA));
-            seek(new UnivocityOffset(0));
+            seek(0);
         } else if (hasHeader) {
             Optional.ofNullable(it.getContext().headers()).ifPresent(headers -> {
                 IntStream.range(0, headers.length)
@@ -150,7 +147,7 @@ private ResultIterator<Record, ParsingContext> iterateRecords() throws IOExcepti
     protected final UnivocityRecord nextRecord() {
         if (!hasNext()) throw new NoSuchElementException("There are no more records in file: " + getFilePath());
 
-        offset.inc();
+        incrementOffset();
         Record record = iterator.next();
         return new UnivocityRecord(schema, record.getValues());
     }
@@ -163,58 +160,32 @@ public final boolean hasNext() {
     }
 
     @Override
-    public final void seek(Offset offset) {
-        if (offset.getRecordOffset() < 0) {
+    public final void seek(long offset) {
+        if (offset < 0) {
             throw new IllegalArgumentException("Record offset must be greater than 0");
         }
         try {
-            if (offset.getRecordOffset() > this.offset.getRecordOffset()) {
+            if (offset > currentOffset()) {
                 iterator.hasNext();
-                iterator.getContext().skipLines(offset.getRecordOffset() - this.offset.getRecordOffset() - 1);
+                iterator.getContext().skipLines(offset - currentOffset() - 1);
                 iterator.next();
             } else {
                 iterator = iterateRecords();
                 iterator.hasNext();
-                iterator.getContext().skipLines(offset.getRecordOffset());
+                iterator.getContext().skipLines(offset);
             }
-            this.offset.setOffset(offset.getRecordOffset());
+            setOffset(offset);
         } catch (IOException ioe) {
             throw new ConnectException("Error seeking file " + getFilePath(), ioe);
         }
     }
 
-    @Override
-    public final Offset currentOffset() {
-        return offset;
-    }
-
     @Override
     public final void close() {
         iterator.getContext().stop();
         closed = true;
     }
 
-    public static class UnivocityOffset implements Offset {
-        private long offset;
-
-        public UnivocityOffset(long offset) {
-            this.offset = offset;
-        }
-
-        public void setOffset(long offset) {
-            this.offset = offset;
-        }
-
-        void inc() {
-            this.offset++;
-        }
-
-        @Override
-        public long getRecordOffset() {
-            return offset;
-        }
-    }
-
     static class UnivocityToStruct implements ReaderAdapter<UnivocityRecord> {
 
         @Override
diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/policy/AbstractPolicy.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/policy/AbstractPolicy.java
index 37da859..b57d0c2 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/policy/AbstractPolicy.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/policy/AbstractPolicy.java
@@ -186,8 +186,7 @@ final int getExecutions() {
     FileMetadata toMetadata(LocatedFileStatus fileStatus) {
 
         List<FileMetadata.BlockInfo> blocks = Arrays.stream(fileStatus.getBlockLocations())
-                .map(block ->
-                        new FileMetadata.BlockInfo(block.getOffset(), block.getLength(), block.isCorrupt()))
+                .map(block -> new FileMetadata.BlockInfo(block.getOffset(), block.getLength(), block.isCorrupt()))
                 .collect(Collectors.toList());
 
         return new FileMetadata(fileStatus.getPath().toString(), fileStatus.getLen(), blocks);
@@ -195,30 +194,24 @@ FileMetadata toMetadata(LocatedFileStatus fileStatus) {
 
     @Override
     public FileReader offer(FileMetadata metadata, OffsetStorageReader offsetStorageReader) {
-        Map<String, Object> partition = new HashMap<String, Object>() {{
-            put("path", metadata.getPath());
-            //TODO manage blocks
-            //put("blocks", metadata.getBlocks().toString());
-        }};
-
         FileSystem current = fileSystems.stream()
                 .filter(fs -> metadata.getPath().startsWith(fs.getWorkingDirectory().toString()))
-                .findFirst().orElse(null);
+                .findFirst()
+                .orElse(null);
 
-        FileReader reader;
         try {
-            reader = ReflectionUtils.makeReader(
+            FileReader reader = ReflectionUtils.makeReader(
                     (Class<? extends FileReader>) conf.getClass(FsSourceTaskConfig.FILE_READER_CLASS),
                     current, new Path(metadata.getPath()), conf.originals());
+            Map<String, Object> partition = Collections.singletonMap("path", metadata.getPath());
+            Map<String, Object> offset = offsetStorageReader.offset(partition);
+            if (offset != null && offset.get("offset") != null) {
+                reader.seek((Long) offset.get("offset"));
+            }
+            return reader;
         } catch (Throwable t) {
             throw new ConnectException("An error has occurred when creating reader for file: " + metadata.getPath(), t);
         }
-
-        Map<String, Object> offset = offsetStorageReader.offset(partition);
-        if (offset != null && offset.get("offset") != null) {
-            reader.seek(() -> (Long) offset.get("offset"));
-        }
-        return reader;
     }
 
     private Iterator<FileMetadata> concat(final Iterator<FileMetadata> it1,
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/AgnosticFileReaderTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/AgnosticFileReaderTest.java
index 632b13b..ab44e27 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/AgnosticFileReaderTest.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/AgnosticFileReaderTest.java
@@ -165,6 +165,11 @@ protected Map<String, Object> getReaderConfig() {
             return config;
         }
 
+        @Override
+        public void schemaMapper(ReaderFsTestConfig fsConfig) {
+
+        }
+
         @Override
         public Class<? extends FileReader> getReaderClass() {
             return AgnosticFileReader.class;
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/AvroFileReaderTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/AvroFileReaderTest.java
index 841c951..bebeff7 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/AvroFileReaderTest.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/AvroFileReaderTest.java
@@ -1,6 +1,5 @@
 package com.github.mmolimar.kafka.connect.fs.file.reader;
 
-import com.github.mmolimar.kafka.connect.fs.file.Offset;
 import org.apache.avro.AvroTypeException;
 import org.apache.avro.Schema;
 import org.apache.avro.SchemaParseException;
@@ -103,11 +102,6 @@ public void readerWithUnparseableSchema(ReaderFsTestConfig fsConfig) throws IOEx
         assertThrows(SchemaParseException.class, () -> getReader(testFs, fsConfig.getDataFile(), readerConfig));
     }
 
-    @Override
-    protected Offset getOffset(long offset) {
-        return new AvroFileReader.AvroOffset(offset);
-    }
-
     @Override
     protected Class<? extends FileReader> getReaderClass() {
         return AvroFileReader.class;
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/FileReaderTestBase.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/FileReaderTestBase.java
index e691d87..f91e9af 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/FileReaderTestBase.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/FileReaderTestBase.java
@@ -1,6 +1,5 @@
 package com.github.mmolimar.kafka.connect.fs.file.reader;
 
-import com.github.mmolimar.kafka.connect.fs.file.Offset;
 import com.github.mmolimar.kafka.connect.fs.util.ReflectionUtils;
 import org.apache.commons.compress.compressors.bzip2.BZip2CompressorOutputStream;
 import org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream;
@@ -130,24 +129,24 @@ public void readAllData(ReaderFsTestConfig fsConfig) {
     public void seekFile(ReaderFsTestConfig fsConfig) {
         FileReader reader = fsConfig.getReader();
         int recordIndex = NUM_RECORDS / 2;
-        reader.seek(getOffset(fsConfig.offsetsByIndex().get(recordIndex)));
+        reader.seek(fsConfig.offsetsByIndex().get(recordIndex));
         assertTrue(reader.hasNext());
-        assertEquals(fsConfig.offsetsByIndex().get(recordIndex), reader.currentOffset().getRecordOffset());
+        assertEquals(fsConfig.offsetsByIndex().get(recordIndex), reader.currentOffset());
         checkData(reader.next(), recordIndex);
 
         recordIndex = 0;
-        reader.seek(getOffset(fsConfig.offsetsByIndex().get(recordIndex)));
+        reader.seek(fsConfig.offsetsByIndex().get(recordIndex));
         assertTrue(reader.hasNext());
-        assertEquals(fsConfig.offsetsByIndex().get(recordIndex), reader.currentOffset().getRecordOffset());
+        assertEquals(fsConfig.offsetsByIndex().get(recordIndex), reader.currentOffset());
         checkData(reader.next(), recordIndex);
 
         recordIndex = NUM_RECORDS - 3;
-        reader.seek(getOffset(fsConfig.offsetsByIndex().get(recordIndex)));
+        reader.seek(fsConfig.offsetsByIndex().get(recordIndex));
         assertTrue(reader.hasNext());
-        assertEquals(fsConfig.offsetsByIndex().get(recordIndex), reader.currentOffset().getRecordOffset());
+        assertEquals(fsConfig.offsetsByIndex().get(recordIndex), reader.currentOffset());
         checkData(reader.next(), recordIndex);
 
-        reader.seek(getOffset(fsConfig.offsetsByIndex().get(NUM_RECORDS - 1) + 1));
+        reader.seek(fsConfig.offsetsByIndex().get(NUM_RECORDS - 1) + 1);
         assertFalse(reader.hasNext());
     }
 
@@ -155,14 +154,14 @@ public void seekFile(ReaderFsTestConfig fsConfig) {
     @MethodSource("fileSystemConfigProvider")
     public void negativeSeek(ReaderFsTestConfig fsConfig) {
         FileReader reader = fsConfig.getReader();
-        assertThrows(RuntimeException.class, () -> reader.seek(getOffset(-1)));
+        assertThrows(RuntimeException.class, () -> reader.seek(-1));
     }
 
     @ParameterizedTest
     @MethodSource("fileSystemConfigProvider")
     public void exceededSeek(ReaderFsTestConfig fsConfig) {
         FileReader reader = fsConfig.getReader();
-        reader.seek(getOffset(fsConfig.offsetsByIndex().get(NUM_RECORDS - 1) + 1));
+        reader.seek(fsConfig.offsetsByIndex().get(NUM_RECORDS - 1) + 1);
         assertFalse(reader.hasNext());
         assertThrows(NoSuchElementException.class, reader::next);
     }
@@ -176,10 +175,6 @@ public void readFileAlreadyClosed(ReaderFsTestConfig fsConfig) throws IOExceptio
         assertThrows(IllegalStateException.class, reader::next);
     }
 
-    protected Offset getOffset(long offset) {
-        return () -> offset;
-    }
-
     protected final FileReader getReader(FileSystem fs, Path path, Map<String, Object> config) throws Throwable {
         return ReflectionUtils.makeReader(getReaderClass(), fs, path, config);
     }
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/JsonFileReaderTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/JsonFileReaderTest.java
index 1df1cd0..fdc2422 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/JsonFileReaderTest.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/JsonFileReaderTest.java
@@ -6,7 +6,6 @@
 import com.fasterxml.jackson.databind.ObjectWriter;
 import com.fasterxml.jackson.databind.node.JsonNodeFactory;
 import com.fasterxml.jackson.databind.node.ObjectNode;
-import com.github.mmolimar.kafka.connect.fs.file.Offset;
 import org.apache.hadoop.fs.Path;
 import org.apache.kafka.connect.data.Struct;
 import org.junit.jupiter.params.ParameterizedTest;
@@ -160,11 +159,6 @@ public void readDifferentCompressionTypes(ReaderFsTestConfig fsConfig) {
         });
     }
 
-    @Override
-    protected Offset getOffset(long offset) {
-        return () -> offset;
-    }
-
     @Override
     protected Class<? extends FileReader> getReaderClass() {
         return JsonFileReader.class;
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/ParquetFileReaderTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/ParquetFileReaderTest.java
index 891eeec..be5e831 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/ParquetFileReaderTest.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/ParquetFileReaderTest.java
@@ -1,6 +1,5 @@
 package com.github.mmolimar.kafka.connect.fs.file.reader;
 
-import com.github.mmolimar.kafka.connect.fs.file.Offset;
 import org.apache.avro.AvroRuntimeException;
 import org.apache.avro.Schema;
 import org.apache.avro.SchemaBuilder;
@@ -167,11 +166,6 @@ protected Map<String, Object> getReaderConfig() {
         return new HashMap<>();
     }
 
-    @Override
-    protected Offset getOffset(long offset) {
-        return new ParquetFileReader.ParquetOffset(offset);
-    }
-
     @Override
     protected Class<? extends FileReader> getReaderClass() {
         return ParquetFileReader.class;
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/SequenceFileReaderTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/SequenceFileReaderTest.java
index ee5bdd8..cc62c0b 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/SequenceFileReaderTest.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/SequenceFileReaderTest.java
@@ -1,17 +1,16 @@
 package com.github.mmolimar.kafka.connect.fs.file.reader;
 
-import com.github.mmolimar.kafka.connect.fs.file.Offset;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.IntWritable;
-import org.apache.hadoop.io.SequenceFile;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.*;
 import org.apache.hadoop.util.ReflectionUtils;
+import org.apache.kafka.connect.data.SchemaBuilder;
 import org.apache.kafka.connect.data.Struct;
 import org.junit.jupiter.params.ParameterizedTest;
 import org.junit.jupiter.params.provider.MethodSource;
 
+import java.io.DataInput;
+import java.io.DataOutput;
 import java.io.File;
 import java.io.IOException;
 import java.util.HashMap;
@@ -81,9 +80,53 @@ public void defaultFieldNames(ReaderFsTestConfig fsConfig) throws Throwable {
         assertEquals(NUM_RECORDS, recordCount, "The number of records in the file does not match");
     }
 
-    @Override
-    protected Offset getOffset(long offset) {
-        return new SequenceFileReader.SeqOffset(offset);
+    @ParameterizedTest
+    @MethodSource("fileSystemConfigProvider")
+    public void schemaMapper(ReaderFsTestConfig fsConfig) {
+        SequenceFileReader reader = (SequenceFileReader) fsConfig.getReader();
+
+        ByteWritable byteWritable = new ByteWritable((byte) 1);
+        ShortWritable shortWritable = new ShortWritable((short) 123);
+        IntWritable intWritable = new IntWritable(123);
+        LongWritable longWritable = new LongWritable(123L);
+        FloatWritable floatWritable = new FloatWritable(0.123F);
+        DoubleWritable doubleWritable = new DoubleWritable(0.123D);
+        BytesWritable bytesWritable = new BytesWritable(new byte[]{1, 2, 3});
+        BooleanWritable booleanWritable = new BooleanWritable(true);
+        Text textWritable = new Text("123");
+
+        assertEquals(SchemaBuilder.INT8_SCHEMA, reader.getSchema(byteWritable));
+        assertEquals(SchemaBuilder.INT16_SCHEMA, reader.getSchema(shortWritable));
+        assertEquals(SchemaBuilder.INT32_SCHEMA, reader.getSchema(intWritable));
+        assertEquals(SchemaBuilder.INT64_SCHEMA, reader.getSchema(longWritable));
+        assertEquals(SchemaBuilder.FLOAT32_SCHEMA, reader.getSchema(floatWritable));
+        assertEquals(SchemaBuilder.FLOAT64_SCHEMA, reader.getSchema(doubleWritable));
+        assertEquals(SchemaBuilder.BYTES_SCHEMA, reader.getSchema(bytesWritable));
+        assertEquals(SchemaBuilder.BOOLEAN_SCHEMA, reader.getSchema(booleanWritable));
+        assertEquals(SchemaBuilder.STRING_SCHEMA, reader.getSchema(textWritable));
+        assertEquals(SchemaBuilder.STRING_SCHEMA, reader.getSchema(new Writable() {
+
+            @Override
+            public void write(DataOutput out) {
+
+            }
+
+            @Override
+            public void readFields(DataInput in) {
+
+            }
+        }));
+
+        SequenceFileReader.SeqToStruct seqToStruct = new SequenceFileReader.SeqToStruct();
+        assertEquals(seqToStruct.toSchemaValue(byteWritable), byteWritable.get());
+        assertEquals(seqToStruct.toSchemaValue(shortWritable), shortWritable.get());
+        assertEquals(seqToStruct.toSchemaValue(intWritable), intWritable.get());
+        assertEquals(seqToStruct.toSchemaValue(longWritable), longWritable.get());
+        assertEquals(seqToStruct.toSchemaValue(floatWritable), floatWritable.get());
+        assertEquals(seqToStruct.toSchemaValue(doubleWritable), doubleWritable.get());
+        assertEquals(seqToStruct.toSchemaValue(bytesWritable), bytesWritable.getBytes());
+        assertEquals(seqToStruct.toSchemaValue(booleanWritable), booleanWritable.get());
+        assertEquals(seqToStruct.toSchemaValue(textWritable), textWritable.toString());
     }
 
     @Override
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/TextFileReaderTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/TextFileReaderTest.java
index 281bb24..5078d24 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/TextFileReaderTest.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/TextFileReaderTest.java
@@ -1,6 +1,5 @@
 package com.github.mmolimar.kafka.connect.fs.file.reader;
 
-import com.github.mmolimar.kafka.connect.fs.file.Offset;
 import org.apache.hadoop.fs.Path;
 import org.apache.kafka.connect.data.Struct;
 import org.junit.jupiter.params.ParameterizedTest;
@@ -112,11 +111,6 @@ public void readDifferentCompressionTypes(ReaderFsTestConfig fsConfig) {
         });
     }
 
-    @Override
-    protected Offset getOffset(long offset) {
-        return new TextFileReader.TextOffset(offset);
-    }
-
     @Override
     protected Class<? extends FileReader> getReaderClass() {
         return TextFileReader.class;
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/UnivocityFileReaderTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/UnivocityFileReaderTest.java
index d224027..438bb1e 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/UnivocityFileReaderTest.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/UnivocityFileReaderTest.java
@@ -1,6 +1,5 @@
 package com.github.mmolimar.kafka.connect.fs.file.reader;
 
-import com.github.mmolimar.kafka.connect.fs.file.Offset;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.kafka.connect.data.Struct;
@@ -119,24 +118,24 @@ public void seekFileWithoutHeader(ReaderFsTestConfig fsConfig) throws Throwable
         assertTrue(reader.hasNext());
 
         int recordIndex = NUM_RECORDS / 2;
-        reader.seek(getOffset(fsConfig.offsetsByIndex().get(recordIndex)));
+        reader.seek(fsConfig.offsetsByIndex().get(recordIndex));
         assertTrue(reader.hasNext());
-        assertEquals(fsConfig.offsetsByIndex().get(recordIndex), reader.currentOffset().getRecordOffset());
+        assertEquals(fsConfig.offsetsByIndex().get(recordIndex), reader.currentOffset());
         checkData(reader.next(), recordIndex);
 
         recordIndex = 0;
-        reader.seek(getOffset(fsConfig.offsetsByIndex().get(recordIndex)));
+        reader.seek(fsConfig.offsetsByIndex().get(recordIndex));
         assertTrue(reader.hasNext());
-        assertEquals(fsConfig.offsetsByIndex().get(recordIndex), reader.currentOffset().getRecordOffset());
+        assertEquals(fsConfig.offsetsByIndex().get(recordIndex), reader.currentOffset());
         checkData(reader.next(), recordIndex);
 
         recordIndex = NUM_RECORDS - 3;
-        reader.seek(getOffset(fsConfig.offsetsByIndex().get(recordIndex)));
+        reader.seek(fsConfig.offsetsByIndex().get(recordIndex));
         assertTrue(reader.hasNext());
-        assertEquals(fsConfig.offsetsByIndex().get(recordIndex), reader.currentOffset().getRecordOffset());
+        assertEquals(fsConfig.offsetsByIndex().get(recordIndex), reader.currentOffset());
         checkData(reader.next(), recordIndex);
 
-        reader.seek(getOffset(fsConfig.offsetsByIndex().get(NUM_RECORDS - 1) + 1));
+        reader.seek(fsConfig.offsetsByIndex().get(NUM_RECORDS - 1) + 1);
         assertFalse(reader.hasNext());
     }
 
@@ -159,11 +158,6 @@ public void invalidFileEncoding(ReaderFsTestConfig fsConfig) {
                 fsConfig.getDataFile(), readerConfig));
     }
 
-    @Override
-    protected Offset getOffset(long offset) {
-        return new T.UnivocityOffset(offset);
-    }
-
     @Override
     protected Class<? extends FileReader> getReaderClass() {
         return (Class<T>) ((ParameterizedType) this.getClass().getGenericSuperclass())

From 1d9b75c2257d729d23ae880fb3cc246a327c8841 Mon Sep 17 00:00:00 2001
From: Mario Molina <mmolimar@gmail.com>
Date: Tue, 14 Apr 2020 20:27:08 -0500
Subject: [PATCH 36/51] Logging improvements

---
 .../kafka/connect/fs/FsSourceConnector.java   |  9 ++-
 .../kafka/connect/fs/FsSourceTask.java        | 37 +++++-----
 .../fs/file/reader/AbstractFileReader.java    | 61 ++++++++++++++++-
 .../fs/file/reader/AgnosticFileReader.java    | 23 ++++---
 .../fs/file/reader/AvroFileReader.java        | 38 +++++------
 .../fs/file/reader/JsonFileReader.java        | 27 ++++----
 .../fs/file/reader/ParquetFileReader.java     | 39 ++++-------
 .../fs/file/reader/SequenceFileReader.java    | 32 +++------
 .../fs/file/reader/TextFileReader.java        | 68 ++++++++-----------
 .../fs/file/reader/UnivocityFileReader.java   | 40 +++++------
 .../connect/fs/policy/AbstractPolicy.java     | 10 ++-
 .../fs/policy/HdfsFileWatcherPolicy.java      |  3 +-
 .../connect/fs/util/ReflectionUtils.java      |  9 +--
 .../fs/file/reader/AvroFileReaderTest.java    | 16 +++--
 .../fs/file/reader/CsvFileReaderTest.java     |  2 +-
 .../fs/file/reader/FileReaderTestBase.java    | 43 +++++++++---
 .../fs/file/reader/JsonFileReaderTest.java    | 21 ++++--
 .../fs/file/reader/ParquetFileReaderTest.java | 37 +++++++---
 .../file/reader/SequenceFileReaderTest.java   |  2 +-
 .../fs/file/reader/TextFileReaderTest.java    | 17 +++--
 .../file/reader/UnivocityFileReaderTest.java  | 27 +++++---
 .../connect/fs/policy/CronPolicyTest.java     | 33 +++++++--
 .../connect/fs/policy/PolicyTestBase.java     | 37 ++++++----
 .../connect/fs/policy/SleepyPolicyTest.java   | 53 +++++++++++----
 .../connect/fs/task/FsSourceTaskTest.java     |  6 +-
 src/test/resources/log4j.properties           |  1 +
 26 files changed, 418 insertions(+), 273 deletions(-)

diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/FsSourceConnector.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/FsSourceConnector.java
index 0316acd..3689452 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/FsSourceConnector.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/FsSourceConnector.java
@@ -32,11 +32,9 @@ public void start(Map<String, String> properties) {
         try {
             config = new FsSourceConnectorConfig(properties);
         } catch (ConfigException ce) {
-            log.error("Couldn't start FsSourceConnector:", ce);
             throw new ConnectException("Couldn't start FsSourceConnector due to configuration error.", ce);
         } catch (Exception ce) {
-            log.error("Couldn't start FsSourceConnector:", ce);
-            throw new ConnectException("An error has occurred when starting FsSourceConnector" + ce);
+            throw new ConnectException("An error has occurred when starting FsSourceConnector." + ce);
         }
     }
 
@@ -48,9 +46,9 @@ public Class<? extends Task> taskClass() {
     @Override
     public List<Map<String, String>> taskConfigs(int maxTasks) {
         if (config == null) {
-            throw new ConnectException("Connector config has not been initialized");
+            throw new ConnectException("Connector config has not been initialized.");
         }
-        List<Map<String, String>> taskConfigs = new ArrayList<>();
+        final List<Map<String, String>> taskConfigs = new ArrayList<>();
 
         int groups = Math.min(config.getFsUris().size(), maxTasks);
         ConnectorUtils.groupPartitions(config.getFsUris(), groups)
@@ -67,6 +65,7 @@ public List<Map<String, String>> taskConfigs(int maxTasks) {
 
     @Override
     public void stop() {
+        log.info("Stopping FsSourceConnector.");
         //Nothing to do
     }
 
diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/FsSourceTask.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/FsSourceTask.java
index db8188f..51a9e3d 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/FsSourceTask.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/FsSourceTask.java
@@ -21,6 +21,7 @@
 import java.util.stream.StreamSupport;
 
 public class FsSourceTask extends SourceTask {
+
     private static final Logger log = LoggerFactory.getLogger(FsSourceTask.class);
 
     private final AtomicBoolean stop = new AtomicBoolean(false);
@@ -34,15 +35,16 @@ public String version() {
 
     @Override
     public void start(Map<String, String> properties) {
+        log.info("Starting FS source task...");
         try {
             config = new FsSourceTaskConfig(properties);
             if (config.getClass(FsSourceTaskConfig.POLICY_CLASS).isAssignableFrom(Policy.class)) {
                 throw new ConfigException("Policy class " +
-                        config.getClass(FsSourceTaskConfig.POLICY_CLASS) + "is not a sublass of " + Policy.class);
+                        config.getClass(FsSourceTaskConfig.POLICY_CLASS) + "is not a subclass of " + Policy.class);
             }
             if (config.getClass(FsSourceTaskConfig.FILE_READER_CLASS).isAssignableFrom(FileReader.class)) {
                 throw new ConfigException("FileReader class " +
-                        config.getClass(FsSourceTaskConfig.FILE_READER_CLASS) + "is not a sublass of " + FileReader.class);
+                        config.getClass(FsSourceTaskConfig.FILE_READER_CLASS) + "is not a subclass of " + FileReader.class);
             }
 
             Class<Policy> policyClass = (Class<Policy>) Class.forName(properties.get(FsSourceTaskConfig.POLICY_CLASS));
@@ -51,10 +53,11 @@ public void start(Map<String, String> properties) {
         } catch (ConfigException ce) {
             log.error("Couldn't start FsSourceTask:", ce);
             throw new ConnectException("Couldn't start FsSourceTask due to configuration error", ce);
-        } catch (Throwable t) {
-            log.error("Couldn't start FsSourceConnector:", t);
-            throw new ConnectException("A problem has occurred reading configuration:" + t.getMessage());
+        } catch (Exception e) {
+            log.error("Couldn't start FsSourceConnector:", e);
+            throw new ConnectException("A problem has occurred reading configuration: " + e.getMessage());
         }
+        log.info("FS source task started with policy {}", policy.getClass().getName());
     }
 
     @Override
@@ -62,36 +65,35 @@ public List<SourceRecord> poll() {
         while (!stop.get() && policy != null && !policy.hasEnded()) {
             log.trace("Polling for new data");
 
-            final List<SourceRecord> results = new ArrayList<>();
-            List<FileMetadata> files = filesToProcess();
-            files.forEach(metadata -> {
+            return filesToProcess().map(metadata -> {
+                List<SourceRecord> records = new ArrayList<>();
                 try (FileReader reader = policy.offer(metadata, context.offsetStorageReader())) {
                     log.info("Processing records for file {}", metadata);
                     while (reader.hasNext()) {
-                        results.add(convert(metadata, reader.currentOffset(), reader.next()));
+                        records.add(convert(metadata, reader.currentOffset(), reader.next()));
                     }
                 } catch (ConnectException | IOException e) {
                     //when an exception happens reading a file, the connector continues
                     log.error("Error reading file from FS: " + metadata.getPath() + ". Keep going...", e);
                 }
-            });
-            return results;
+                return records;
+            }).flatMap(Collection::stream).collect(Collectors.toList());
         }
-
         return null;
     }
 
-    private List<FileMetadata> filesToProcess() {
+    private Stream<FileMetadata> filesToProcess() {
         try {
             return asStream(policy.execute())
                     .filter(metadata -> metadata.getLen() > 0)
-                    .collect(Collectors.toList());
+                    .collect(Collectors.toList())
+                    .stream();
         } catch (IOException | ConnectException e) {
             //when an exception happens executing the policy, the connector continues
-            log.error("Cannot retrieve files to process from the FS: " + policy.getURIs() + ". " +
+            log.error("Cannot retrieve files to process from the FS: {}. " +
                     "There was an error executing the policy but the task tolerates this and continues. " +
-                    "Error message: " + e.getMessage());
-            return Collections.emptyList();
+                    e.getMessage(), policy.getURIs(), e);
+            return Stream.empty();
         }
     }
 
@@ -112,6 +114,7 @@ private SourceRecord convert(FileMetadata metadata, long offset, Struct struct)
 
     @Override
     public void stop() {
+        log.info("Stopping FS source task.");
         stop.set(true);
         if (policy != null) {
             policy.interrupt();
diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/AbstractFileReader.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/AbstractFileReader.java
index a54814b..fec6b73 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/AbstractFileReader.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/AbstractFileReader.java
@@ -3,15 +3,19 @@
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.kafka.connect.data.Struct;
+import org.apache.kafka.connect.errors.ConnectException;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import java.io.IOException;
 import java.util.Map;
+import java.util.NoSuchElementException;
 import java.util.stream.Collectors;
 
 import static com.github.mmolimar.kafka.connect.fs.FsSourceTaskConfig.FILE_READER_PREFIX;
 
 public abstract class AbstractFileReader<T> implements FileReader {
+
     protected final Logger log = LoggerFactory.getLogger(getClass());
 
     private final FileSystem fs;
@@ -21,7 +25,7 @@ public abstract class AbstractFileReader<T> implements FileReader {
 
     public AbstractFileReader(FileSystem fs, Path filePath, ReaderAdapter<T> adapter, Map<String, Object> config) {
         if (fs == null || filePath == null) {
-            throw new IllegalArgumentException("fileSystem and filePath are required");
+            throw new IllegalArgumentException("File system and file path are required.");
         }
         this.fs = fs;
         this.filePath = filePath;
@@ -29,6 +33,7 @@ public AbstractFileReader(FileSystem fs, Path filePath, ReaderAdapter<T> adapter
         this.offset = 0;
 
         configure(readerConfig(config));
+        log.trace("Initialized file reader {} for file {}", getClass(), filePath);
     }
 
     protected final Map<String, String> readerConfig(Map<String, Object> config) {
@@ -51,7 +56,16 @@ public Path getFilePath() {
 
     @Override
     public final Struct next() {
-        return adapter.apply(nextRecord());
+        if (!hasNext()) {
+            throw new NoSuchElementException("There are no more records in file: " + getFilePath());
+        }
+        try {
+            return adapter.apply(nextRecord());
+        } catch (ConnectException ce) {
+            throw ce;
+        } catch (Exception e) {
+            throw new ConnectException("Error processing next record in file: " + getFilePath(), e);
+        }
     }
 
     @Override
@@ -67,9 +81,50 @@ protected void setOffset(long offset) {
         this.offset = offset;
     }
 
-    protected abstract T nextRecord();
+    @Override
+    public final void seek(long offset) {
+        if (offset < 0) {
+            throw new IllegalArgumentException("Record offset must be greater than 0.");
+        }
+        checkClosed();
+        try {
+            log.debug("Seeking file {} to offset {}.", filePath, offset);
+            seekFile(offset);
+        } catch (ConnectException ce) {
+            throw ce;
+        } catch (IOException ioe) {
+            throw new ConnectException("Error seeking file: " + getFilePath(), ioe);
+        }
+    }
+
+    @Override
+    public final boolean hasNext() {
+        checkClosed();
+        try {
+            return hasNextRecord();
+        } catch (ConnectException ce) {
+            throw ce;
+        } catch (Exception e) {
+            throw new ConnectException("Error when checking if the reader has more records.", e);
+        }
+    }
 
     protected ReaderAdapter<T> getAdapter() {
         return adapter;
     }
+
+    private void checkClosed() {
+        if (isClosed()) {
+            throw new ConnectException("File stream is closed!");
+        }
+    }
+
+    protected abstract T nextRecord() throws IOException;
+
+    protected abstract boolean hasNextRecord() throws IOException;
+
+    protected abstract void seekFile(long offset) throws IOException;
+
+    protected abstract boolean isClosed();
+
 }
diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/AgnosticFileReader.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/AgnosticFileReader.java
index 478dacb..2630762 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/AgnosticFileReader.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/AgnosticFileReader.java
@@ -4,6 +4,7 @@
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.kafka.connect.data.Struct;
+import org.apache.kafka.connect.errors.ConnectException;
 
 import java.io.IOException;
 import java.util.Arrays;
@@ -31,20 +32,17 @@ public class AgnosticFileReader extends AbstractFileReader<AgnosticFileReader.Ag
     private Set<String> parquetExtensions, avroExtensions, sequenceExtensions,
             jsonExtensions, csvExtensions, tsvExtensions, fixedExtensions;
 
-    public AgnosticFileReader(FileSystem fs, Path filePath, Map<String, Object> config) throws IOException {
+    public AgnosticFileReader(FileSystem fs, Path filePath, Map<String, Object> config) throws Exception {
         super(fs, filePath, new AgnosticAdapter(), config);
 
         try {
             reader = readerByExtension(fs, filePath, config);
-        } catch (RuntimeException | IOException e) {
-            throw e;
-        } catch (Throwable t) {
-            throw new IOException("An error has occurred when creating a concrete reader", t);
+        } catch (ConnectException ce) {
+            throw (Exception) ce.getCause();
         }
     }
 
-    private AbstractFileReader<Object> readerByExtension(FileSystem fs, Path filePath, Map<String, Object> config)
-            throws Throwable {
+    private AbstractFileReader<Object> readerByExtension(FileSystem fs, Path filePath, Map<String, Object> config) {
         int index = filePath.getName().lastIndexOf('.');
         String extension = index == -1 || index == filePath.getName().length() - 1 ? "" :
                 filePath.getName().substring(index + 1).toLowerCase();
@@ -90,12 +88,12 @@ protected void configure(Map<String, String> config) {
     }
 
     @Override
-    public boolean hasNext() {
+    public boolean hasNextRecord() {
         return reader.hasNext();
     }
 
     @Override
-    public void seek(long offset) {
+    public void seekFile(long offset) {
         reader.seek(offset);
     }
 
@@ -110,7 +108,12 @@ public void close() throws IOException {
     }
 
     @Override
-    protected AgnosticRecord nextRecord() {
+    public boolean isClosed() {
+        return reader.isClosed();
+    }
+
+    @Override
+    protected AgnosticRecord nextRecord() throws IOException {
         return new AgnosticRecord(reader.getAdapter(), reader.nextRecord());
     }
 
diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/AvroFileReader.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/AvroFileReader.java
index 589ded7..3db8e3c 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/AvroFileReader.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/AvroFileReader.java
@@ -1,7 +1,6 @@
 package com.github.mmolimar.kafka.connect.fs.file.reader;
 
 import io.confluent.connect.avro.AvroData;
-import org.apache.avro.AvroRuntimeException;
 import org.apache.avro.Schema;
 import org.apache.avro.file.DataFileReader;
 import org.apache.avro.generic.GenericRecord;
@@ -11,7 +10,6 @@
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.kafka.connect.data.Struct;
-import org.apache.kafka.connect.errors.ConnectException;
 
 import java.io.IOException;
 import java.util.Map;
@@ -27,6 +25,7 @@ public class AvroFileReader extends AbstractFileReader<GenericRecord> {
 
     private final DataFileReader<GenericRecord> reader;
     private Schema schema;
+    private boolean closed;
 
     public AvroFileReader(FileSystem fs, Path filePath, Map<String, Object> config) throws IOException {
         super(fs, filePath, new GenericRecordToStruct(), config);
@@ -37,6 +36,7 @@ public AvroFileReader(FileSystem fs, Path filePath, Map<String, Object> config)
         } else {
             this.reader = new DataFileReader<>(input, new SpecificDatumReader<>(this.schema));
         }
+        this.closed = false;
     }
 
     @Override
@@ -47,42 +47,36 @@ protected void configure(Map<String, String> config) {
     }
 
     @Override
-    public boolean hasNext() {
-        try {
-            return reader.hasNext();
-        } catch (AvroRuntimeException are) {
-            throw new IllegalStateException(are);
-        }
+    public boolean hasNextRecord() {
+        return reader.hasNext();
     }
 
     @Override
     protected GenericRecord nextRecord() {
-        try {
-            GenericRecord record = reader.next();
-            incrementOffset();
+        GenericRecord record = reader.next();
+        incrementOffset();
 
-            return record;
-        } catch (AvroRuntimeException are) {
-            throw new IllegalStateException(are);
-        }
+        return record;
     }
 
     @Override
-    public void seek(long offset) {
-        try {
-            reader.sync(offset);
-            setOffset(reader.previousSync() - 16L);
-        } catch (IOException ioe) {
-            throw new ConnectException("Error seeking file " + getFilePath(), ioe);
-        }
+    public void seekFile(long offset) throws IOException {
+        reader.sync(offset);
+        setOffset(reader.previousSync() - 16L);
     }
 
     @Override
     public void close() throws IOException {
+        closed = true;
         reader.sync(0);
         reader.close();
     }
 
+    @Override
+    public boolean isClosed() {
+        return closed;
+    }
+
     static class GenericRecordToStruct implements ReaderAdapter<GenericRecord> {
 
         private static final int CACHE_SIZE = 100;
diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/JsonFileReader.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/JsonFileReader.java
index 700d38b..3fabc01 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/JsonFileReader.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/JsonFileReader.java
@@ -68,29 +68,25 @@ protected void configure(Map<String, String> config) {
                         mapper.configure(DeserializationFeature.valueOf(feature),
                                 Boolean.parseBoolean(entry.getValue()));
                     } else {
-                        log.warn("Ignoring deserialization configuration '" + feature + "' due to it does not exist.");
+                        log.warn("Ignoring deserialization configuration '{}' due to it does not exist.", feature);
                     }
                 });
     }
 
     @Override
-    protected JsonRecord nextRecord() {
-        try {
-            JsonNode value = mapper.readTree(inner.nextRecord().getValue());
-            return new JsonRecord(schema, value);
-        } catch (IOException ioe) {
-            throw new IllegalStateException(ioe);
-        }
+    protected JsonRecord nextRecord() throws IOException {
+        JsonNode value = mapper.readTree(inner.nextRecord().getValue());
+        return new JsonRecord(schema, value);
     }
 
     @Override
-    public boolean hasNext() {
-        return inner.hasNext();
+    public boolean hasNextRecord() throws IOException {
+        return inner.hasNextRecord();
     }
 
     @Override
-    public void seek(long offset) {
-        inner.seek(offset);
+    public void seekFile(long offset) throws IOException {
+        inner.seekFile(offset);
     }
 
     @Override
@@ -103,6 +99,11 @@ public void close() throws IOException {
         inner.close();
     }
 
+    @Override
+    public boolean isClosed() {
+        return inner.isClosed();
+    }
+
     private static Schema extractSchema(JsonNode jsonNode) {
         switch (jsonNode.getNodeType()) {
             case BOOLEAN:
@@ -189,7 +190,7 @@ private Object mapValue(Schema schema, JsonNode value) {
                     try {
                         return value.binaryValue();
                     } catch (IOException ioe) {
-                        throw new IllegalStateException(ioe);
+                        throw new RuntimeException(ioe);
                     }
                 case OBJECT:
                 case POJO:
diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/ParquetFileReader.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/ParquetFileReader.java
index 7253a4b..0657d0b 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/ParquetFileReader.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/ParquetFileReader.java
@@ -8,7 +8,6 @@
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.kafka.connect.data.Struct;
-import org.apache.kafka.connect.errors.ConnectException;
 import org.apache.parquet.avro.AvroParquetReader;
 import org.apache.parquet.avro.AvroReadSupport;
 import org.apache.parquet.hadoop.ParquetReader;
@@ -16,7 +15,6 @@
 
 import java.io.IOException;
 import java.util.Map;
-import java.util.NoSuchElementException;
 import java.util.Optional;
 
 import static com.github.mmolimar.kafka.connect.fs.FsSourceTaskConfig.FILE_READER_PREFIX;
@@ -64,23 +62,15 @@ protected void configure(Map<String, String> config) {
     }
 
     @Override
-    public boolean hasNext() {
-        if (closed) throw new IllegalStateException("Reader already closed.");
+    public boolean hasNextRecord() throws IOException {
         if (currentRecord == null) {
-            try {
-                currentRecord = reader.read();
-            } catch (IOException ioe) {
-                throw new ConnectException("Error reading parquet record", ioe);
-            }
+            currentRecord = reader.read();
         }
         return currentRecord != null;
     }
 
     @Override
     protected GenericRecord nextRecord() {
-        if (!hasNext()) {
-            throw new NoSuchElementException("There are no more records in file: " + getFilePath());
-        }
         GenericRecord record;
         if (this.projection != null) {
             record = new GenericData.Record(this.projection);
@@ -94,21 +84,11 @@ record = currentRecord;
     }
 
     @Override
-    public void seek(long offset) {
-        if (closed) {
-            throw new ConnectException("Stream is closed!");
-        }
-        if (offset < 0) {
-            throw new IllegalArgumentException("Record offset must be greater than 0");
-        }
+    public void seekFile(long offset) throws IOException {
         if (currentOffset() > offset) {
-            try {
-                this.reader = initReader();
-                setOffset(0);
-                this.closed = false;
-            } catch (IOException ioe) {
-                throw new ConnectException("Error initializing parquet reader", ioe);
-            }
+            this.reader = initReader();
+            this.closed = false;
+            setOffset(0);
         }
         while (hasNext() && currentOffset() < offset) {
             nextRecord();
@@ -117,10 +97,15 @@ public void seek(long offset) {
 
     @Override
     public void close() throws IOException {
-        this.closed = true;
+        closed = true;
         reader.close();
     }
 
+    @Override
+    public boolean isClosed() {
+        return closed;
+    }
+
     static class GenericRecordToStruct implements ReaderAdapter<GenericRecord> {
         private static final int CACHE_SIZE = 100;
         private final AvroData avroData;
diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/SequenceFileReader.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/SequenceFileReader.java
index e21bdf2..3740db9 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/SequenceFileReader.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/SequenceFileReader.java
@@ -7,12 +7,10 @@
 import org.apache.kafka.connect.data.Schema;
 import org.apache.kafka.connect.data.SchemaBuilder;
 import org.apache.kafka.connect.data.Struct;
-import org.apache.kafka.connect.errors.ConnectException;
 
 import java.io.EOFException;
 import java.io.IOException;
 import java.util.Map;
-import java.util.NoSuchElementException;
 
 import static com.github.mmolimar.kafka.connect.fs.FsSourceTaskConfig.FILE_READER_PREFIX;
 
@@ -82,8 +80,7 @@ Schema getSchema(Writable writable) {
     }
 
     @Override
-    public boolean hasNext() {
-        if (closed) throw new IllegalStateException("Reader already closed.");
+    public boolean hasNextRecord() throws IOException {
         try {
             if (hasNextIndex == -1 || hasNextIndex == recordIndex) {
                 hasNextIndex++;
@@ -93,33 +90,21 @@ public boolean hasNext() {
             return hasNext;
         } catch (EOFException eofe) {
             return false;
-        } catch (IOException ioe) {
-            throw new ConnectException(ioe);
         }
     }
 
     @Override
     protected SequenceRecord<Writable, Writable> nextRecord() {
-        if (!hasNext()) {
-            throw new NoSuchElementException("There are no more records in file: " + getFilePath());
-        }
         recordIndex++;
         return new SequenceRecord<>(schema, keyFieldName, key, valueFieldName, value);
     }
 
     @Override
-    public void seek(long offset) {
-        if (offset < 0) {
-            throw new IllegalArgumentException("Record offset must be greater than 0");
-        }
-        try {
-            reader.sync(offset);
-            hasNextIndex = recordIndex = offset;
-            hasNext = false;
-            setOffset(offset - 1);
-        } catch (IOException ioe) {
-            throw new ConnectException("Error seeking file " + getFilePath(), ioe);
-        }
+    public void seekFile(long offset) throws IOException {
+        reader.sync(offset);
+        hasNextIndex = recordIndex = offset;
+        hasNext = false;
+        setOffset(offset - 1);
     }
 
     @Override
@@ -128,6 +113,11 @@ public void close() throws IOException {
         reader.close();
     }
 
+    @Override
+    public boolean isClosed() {
+        return closed;
+    }
+
     static class SeqToStruct implements ReaderAdapter<SequenceRecord<Writable, Writable>> {
 
         @Override
diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/TextFileReader.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/TextFileReader.java
index 060de36..56f5581 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/TextFileReader.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/TextFileReader.java
@@ -7,13 +7,11 @@
 import org.apache.kafka.connect.data.Schema;
 import org.apache.kafka.connect.data.SchemaBuilder;
 import org.apache.kafka.connect.data.Struct;
-import org.apache.kafka.connect.errors.ConnectException;
 
 import java.io.*;
 import java.nio.charset.Charset;
 import java.util.List;
 import java.util.Map;
-import java.util.NoSuchElementException;
 import java.util.stream.Collectors;
 
 import static com.github.mmolimar.kafka.connect.fs.FsSourceTaskConfig.FILE_READER_PREFIX;
@@ -39,10 +37,12 @@ public class TextFileReader extends AbstractFileReader<TextFileReader.TextRecord
     private Charset charset;
     private CompressionType compression;
     private boolean recordPerLine;
+    private boolean closed;
 
     public TextFileReader(FileSystem fs, Path filePath, Map<String, Object> config) throws IOException {
         super(fs, filePath, new TxtToStruct(), config);
         this.reader = new LineNumberReader(getFileReader(fs.open(filePath)));
+        this.closed = false;
     }
 
     @Override
@@ -78,39 +78,32 @@ private Reader getFileReader(InputStream inputStream) throws IOException {
     }
 
     @Override
-    public boolean hasNext() {
+    public boolean hasNextRecord() throws IOException {
         if (current != null) {
             return true;
         } else if (finished) {
             return false;
         } else {
-            try {
-                if (!recordPerLine) {
-                    List<String> lines = new BufferedReader(reader).lines().collect(Collectors.toList());
-                    current = String.join("\n", lines);
+            if (!recordPerLine) {
+                List<String> lines = new BufferedReader(reader).lines().collect(Collectors.toList());
+                current = String.join("\n", lines);
+                finished = true;
+                return true;
+            }
+            for (; ; ) {
+                String line = reader.readLine();
+                if (line == null) {
                     finished = true;
-                    return true;
-                }
-                for (; ; ) {
-                    String line = reader.readLine();
-                    if (line == null) {
-                        finished = true;
-                        return false;
-                    }
-                    current = line;
-                    return true;
+                    return false;
                 }
-            } catch (IOException ioe) {
-                throw new IllegalStateException(ioe);
+                current = line;
+                return true;
             }
         }
     }
 
     @Override
     protected TextRecord nextRecord() {
-        if (!hasNext()) {
-            throw new NoSuchElementException("There are no more records in file: " + getFilePath());
-        }
         String aux = current;
         current = null;
         incrementOffset();
@@ -118,31 +111,30 @@ protected TextRecord nextRecord() {
     }
 
     @Override
-    public void seek(long offset) {
-        if (offset < 0) {
-            throw new IllegalArgumentException("Record offset must be greater than 0");
+    public void seekFile(long offset) throws IOException {
+        current = null;
+        if (offset < reader.getLineNumber()) {
+            finished = false;
+            reader.close();
+            reader = new LineNumberReader(getFileReader(getFs().open(getFilePath())));
         }
-        try {
-            current = null;
-            if (offset < reader.getLineNumber()) {
-                finished = false;
-                reader.close();
-                reader = new LineNumberReader(getFileReader(getFs().open(getFilePath())));
-            }
-            while (reader.getLineNumber() < offset) {
-                reader.readLine();
-            }
-            setOffset(reader.getLineNumber());
-        } catch (IOException ioe) {
-            throw new ConnectException("Error seeking file " + getFilePath(), ioe);
+        while (reader.getLineNumber() < offset) {
+            reader.readLine();
         }
+        setOffset(reader.getLineNumber());
     }
 
     @Override
     public void close() throws IOException {
+        closed = true;
         reader.close();
     }
 
+    @Override
+    public boolean isClosed() {
+        return closed;
+    }
+
     static class TxtToStruct implements ReaderAdapter<TextRecord> {
 
         @Override
diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/UnivocityFileReader.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/UnivocityFileReader.java
index 050ba4c..490af2d 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/UnivocityFileReader.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/UnivocityFileReader.java
@@ -12,7 +12,6 @@
 import org.apache.kafka.connect.data.Schema;
 import org.apache.kafka.connect.data.SchemaBuilder;
 import org.apache.kafka.connect.data.Struct;
-import org.apache.kafka.connect.errors.ConnectException;
 
 import java.io.IOException;
 import java.io.InputStream;
@@ -20,7 +19,6 @@
 import java.io.Reader;
 import java.nio.charset.Charset;
 import java.util.Map;
-import java.util.NoSuchElementException;
 import java.util.Optional;
 import java.util.stream.IntStream;
 
@@ -145,39 +143,28 @@ private ResultIterator<Record, ParsingContext> iterateRecords() throws IOExcepti
 
     @Override
     protected final UnivocityRecord nextRecord() {
-        if (!hasNext()) throw new NoSuchElementException("There are no more records in file: " + getFilePath());
-
         incrementOffset();
         Record record = iterator.next();
         return new UnivocityRecord(schema, record.getValues());
     }
 
     @Override
-    public final boolean hasNext() {
-        if (closed) throw new IllegalStateException("Reader already closed.");
-
+    public final boolean hasNextRecord() {
         return iterator.hasNext();
     }
 
     @Override
-    public final void seek(long offset) {
-        if (offset < 0) {
-            throw new IllegalArgumentException("Record offset must be greater than 0");
-        }
-        try {
-            if (offset > currentOffset()) {
-                iterator.hasNext();
-                iterator.getContext().skipLines(offset - currentOffset() - 1);
-                iterator.next();
-            } else {
-                iterator = iterateRecords();
-                iterator.hasNext();
-                iterator.getContext().skipLines(offset);
-            }
-            setOffset(offset);
-        } catch (IOException ioe) {
-            throw new ConnectException("Error seeking file " + getFilePath(), ioe);
+    public final void seekFile(long offset) throws IOException {
+        if (offset > currentOffset()) {
+            iterator.hasNext();
+            iterator.getContext().skipLines(offset - currentOffset() - 1);
+            iterator.next();
+        } else {
+            iterator = iterateRecords();
+            iterator.hasNext();
+            iterator.getContext().skipLines(offset);
         }
+        setOffset(offset);
     }
 
     @Override
@@ -186,6 +173,11 @@ public final void close() {
         closed = true;
     }
 
+    @Override
+    public final boolean isClosed() {
+        return closed;
+    }
+
     static class UnivocityToStruct implements ReaderAdapter<UnivocityRecord> {
 
         @Override
diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/policy/AbstractPolicy.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/policy/AbstractPolicy.java
index b57d0c2..5908380 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/policy/AbstractPolicy.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/policy/AbstractPolicy.java
@@ -88,7 +88,6 @@ private String convert(String uri) {
                 throw new IllegalArgumentException("Cannot convert dynamic URI: " + matcher.group(1), e);
             }
         }
-
         return converted;
     }
 
@@ -104,7 +103,7 @@ public List<String> getURIs() {
     @Override
     public final Iterator<FileMetadata> execute() throws IOException {
         if (hasEnded()) {
-            throw new IllegalWorkerStateException("Policy has ended. Cannot be retried");
+            throw new IllegalWorkerStateException("Policy has ended. Cannot be retried.");
         }
         preCheck();
 
@@ -163,7 +162,7 @@ public boolean hasNext() {
             @Override
             public FileMetadata next() {
                 if (!hasNext() && current == null) {
-                    throw new NoSuchElementException("There are no more items");
+                    throw new NoSuchElementException("There are no more items.");
                 }
                 FileMetadata metadata = toMetadata(current);
                 current = null;
@@ -198,7 +197,6 @@ public FileReader offer(FileMetadata metadata, OffsetStorageReader offsetStorage
                 .filter(fs -> metadata.getPath().startsWith(fs.getWorkingDirectory().toString()))
                 .findFirst()
                 .orElse(null);
-
         try {
             FileReader reader = ReflectionUtils.makeReader(
                     (Class<? extends FileReader>) conf.getClass(FsSourceTaskConfig.FILE_READER_CLASS),
@@ -209,8 +207,8 @@ public FileReader offer(FileMetadata metadata, OffsetStorageReader offsetStorage
                 reader.seek((Long) offset.get("offset"));
             }
             return reader;
-        } catch (Throwable t) {
-            throw new ConnectException("An error has occurred when creating reader for file: " + metadata.getPath(), t);
+        } catch (Exception e) {
+            throw new ConnectException("An error has occurred when creating reader for file: " + metadata.getPath(), e);
         }
     }
 
diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/policy/HdfsFileWatcherPolicy.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/policy/HdfsFileWatcherPolicy.java
index a6505a3..996d868 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/policy/HdfsFileWatcherPolicy.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/policy/HdfsFileWatcherPolicy.java
@@ -139,7 +139,7 @@ public void run() {
             } catch (FileNotFoundException fnfe) {
                 log.warn("Cannot find file in this FS {}. Stopping watcher...", fs.getWorkingDirectory(), fnfe);
             } catch (IOException ioe) {
-                log.info("An interrupted exception has occurred. Path {} is not watched any more", fs.getWorkingDirectory());
+                log.warn("An interrupted exception has occurred. Path {} is not watched any more", fs.getWorkingDirectory());
             } catch (Exception ioe) {
                 log.warn("Exception watching path {}", fs.getWorkingDirectory(), ioe);
                 throw new IllegalWorkerStateException(ioe);
@@ -153,6 +153,7 @@ private void enqueue(String path) throws IOException {
                 return;
             }
 
+            log.debug("Enqueuing file to process {}", filePath);
             RemoteIterator<LocatedFileStatus> it = fs.listFiles(filePath, false);
             while (it.hasNext()) {
                 LocatedFileStatus status = it.next();
diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/util/ReflectionUtils.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/util/ReflectionUtils.java
index 6b84ca3..04fa75c 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/util/ReflectionUtils.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/util/ReflectionUtils.java
@@ -6,6 +6,7 @@
 import org.apache.commons.lang3.reflect.ConstructorUtils;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.kafka.connect.errors.ConnectException;
 
 import java.lang.reflect.Constructor;
 import java.lang.reflect.InvocationTargetException;
@@ -15,15 +16,15 @@
 public class ReflectionUtils {
 
     public static FileReader makeReader(Class<? extends FileReader> clazz, FileSystem fs,
-                                        Path path, Map<String, Object> config) throws Throwable {
+                                        Path path, Map<String, Object> config) {
         return make(clazz, fs, path, config);
     }
 
-    public static Policy makePolicy(Class<? extends Policy> clazz, FsSourceTaskConfig conf) throws Throwable {
+    public static Policy makePolicy(Class<? extends Policy> clazz, FsSourceTaskConfig conf) {
         return make(clazz, conf);
     }
 
-    private static <T> T make(Class<T> clazz, Object... args) throws Throwable {
+    private static <T> T make(Class<T> clazz, Object... args) {
         try {
             Class[] constClasses = Arrays.stream(args).map(Object::getClass).toArray(Class[]::new);
 
@@ -32,7 +33,7 @@ private static <T> T make(Class<T> clazz, Object... args) throws Throwable {
         } catch (IllegalAccessException |
                 InstantiationException |
                 InvocationTargetException e) {
-            throw e.getCause();
+            throw new ConnectException(e.getCause());
         }
     }
 }
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/AvroFileReaderTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/AvroFileReaderTest.java
index bebeff7..5e9d59e 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/AvroFileReaderTest.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/AvroFileReaderTest.java
@@ -12,6 +12,7 @@
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.kafka.connect.data.Struct;
+import org.apache.kafka.connect.errors.ConnectException;
 import org.junit.jupiter.api.BeforeAll;
 import org.junit.jupiter.params.ParameterizedTest;
 import org.junit.jupiter.params.provider.MethodSource;
@@ -68,7 +69,7 @@ protected Path createDataFile(ReaderFsTestConfig fsConfig, Object... args) throw
 
     @ParameterizedTest
     @MethodSource("fileSystemConfigProvider")
-    public void readerWithSchema(ReaderFsTestConfig fsConfig) throws Throwable {
+    public void readerWithSchema(ReaderFsTestConfig fsConfig) throws IOException {
         Map<String, Object> readerConfig = getReaderConfig();
         readerConfig.put(AvroFileReader.FILE_READER_AVRO_SCHEMA, schema.toString());
         FileSystem testFs = FileSystem.newInstance(fsConfig.getFsUri(), new Configuration());
@@ -78,12 +79,12 @@ public void readerWithSchema(ReaderFsTestConfig fsConfig) throws Throwable {
 
     @ParameterizedTest
     @MethodSource("fileSystemConfigProvider")
-    public void readerWithInvalidSchema(ReaderFsTestConfig fsConfig) throws Throwable {
+    public void readerWithInvalidSchema(ReaderFsTestConfig fsConfig) throws IOException {
         Map<String, Object> readerConfig = getReaderConfig();
         readerConfig.put(AvroFileReader.FILE_READER_AVRO_SCHEMA, Schema.create(Schema.Type.STRING).toString());
         FileSystem testFs = FileSystem.newInstance(fsConfig.getFsUri(), new Configuration());
         fsConfig.setReader(getReader(testFs, fsConfig.getDataFile(), readerConfig));
-        assertThrows(IllegalStateException.class, () -> readAllData(fsConfig));
+        assertThrows(ConnectException.class, () -> readAllData(fsConfig));
         assertThrows(AvroTypeException.class, () -> {
             try {
                 readAllData(fsConfig);
@@ -99,7 +100,14 @@ public void readerWithUnparseableSchema(ReaderFsTestConfig fsConfig) throws IOEx
         Map<String, Object> readerConfig = getReaderConfig();
         readerConfig.put(AvroFileReader.FILE_READER_AVRO_SCHEMA, "invalid schema");
         FileSystem testFs = FileSystem.newInstance(fsConfig.getFsUri(), new Configuration());
-        assertThrows(SchemaParseException.class, () -> getReader(testFs, fsConfig.getDataFile(), readerConfig));
+        assertThrows(ConnectException.class, () -> getReader(testFs, fsConfig.getDataFile(), readerConfig));
+        assertThrows(SchemaParseException.class, () -> {
+            try {
+                getReader(testFs, fsConfig.getDataFile(), readerConfig);
+            } catch (Exception e) {
+                throw e.getCause();
+            }
+        });
     }
 
     @Override
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/CsvFileReaderTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/CsvFileReaderTest.java
index a6b9fbf..a1247d5 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/CsvFileReaderTest.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/CsvFileReaderTest.java
@@ -40,7 +40,7 @@ protected Path createDataFile(ReaderFsTestConfig fsConfig, Object... args) throw
 
     @ParameterizedTest
     @MethodSource("fileSystemConfigProvider")
-    public void readAllDataWithMalformedRows(ReaderFsTestConfig fsConfig) throws Throwable {
+    public void readAllDataWithMalformedRows(ReaderFsTestConfig fsConfig) throws IOException {
         File tmp = File.createTempFile("test-", "." + getFileExtension());
         try (FileWriter writer = new FileWriter(tmp)) {
             writer.append(FIELD_COLUMN1 + "," + FIELD_COLUMN2 + "," + FIELD_COLUMN3 + "," + FIELD_COLUMN4 + "\n");
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/FileReaderTestBase.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/FileReaderTestBase.java
index f91e9af..f21cf49 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/FileReaderTestBase.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/FileReaderTestBase.java
@@ -6,6 +6,7 @@
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.kafka.connect.data.Struct;
+import org.apache.kafka.connect.errors.ConnectException;
 import org.junit.jupiter.api.AfterAll;
 import org.junit.jupiter.api.AfterEach;
 import org.junit.jupiter.api.BeforeAll;
@@ -43,7 +44,7 @@ public static void finishFs() throws IOException {
     }
 
     @BeforeEach
-    public void openReader() throws Throwable {
+    public void openReader() throws IOException {
         for (ReaderFsTestConfig fsConfig : TEST_FILE_SYSTEMS) {
             fsConfig.setDataFile(createDataFile(fsConfig));
             FileReader reader = ReflectionUtils.makeReader(getReaderClass(), fsConfig.getFs(),
@@ -85,28 +86,49 @@ public void invalidArgs(ReaderFsTestConfig fsConfig) {
     @MethodSource("fileSystemConfigProvider")
     public void fileDoesNotExist(ReaderFsTestConfig fsConfig) {
         Path path = new Path(new Path(fsConfig.getFsUri()), UUID.randomUUID().toString());
-        assertThrows(FileNotFoundException.class, () -> getReader(fsConfig.getFs(), path, getReaderConfig()));
+        assertThrows(ConnectException.class, () -> getReader(fsConfig.getFs(), path, getReaderConfig()));
+        assertThrows(FileNotFoundException.class, () -> {
+            try {
+                getReader(fsConfig.getFs(), path, getReaderConfig());
+            } catch (Exception e) {
+                throw e.getCause();
+            }
+        });
     }
 
     @ParameterizedTest
     @MethodSource("fileSystemConfigProvider")
-    public void emptyFile(ReaderFsTestConfig fsConfig) throws Throwable {
+    public void emptyFile(ReaderFsTestConfig fsConfig) throws IOException {
         File tmp = File.createTempFile("test-", "." + getFileExtension());
         Path path = new Path(new Path(fsConfig.getFsUri()), tmp.getName());
         fsConfig.getFs().moveFromLocalFile(new Path(tmp.getAbsolutePath()), path);
-        assertThrows(IOException.class, () -> getReader(fsConfig.getFs(), path, getReaderConfig()));
+        assertThrows(ConnectException.class, () -> getReader(fsConfig.getFs(), path, getReaderConfig()));
+        assertThrows(IOException.class, () -> {
+            try {
+                getReader(fsConfig.getFs(), path, getReaderConfig());
+            } catch (Exception e) {
+                throw e.getCause();
+            }
+        });
     }
 
     @ParameterizedTest
     @MethodSource("fileSystemConfigProvider")
-    public void invalidFileFormat(ReaderFsTestConfig fsConfig) throws Throwable {
+    public void invalidFileFormat(ReaderFsTestConfig fsConfig) throws IOException {
         File tmp = File.createTempFile("test-", "." + getFileExtension());
         try (BufferedWriter writer = new BufferedWriter(new FileWriter(tmp))) {
             writer.write("test");
         }
         Path path = new Path(new Path(fsConfig.getFsUri()), tmp.getName());
         fsConfig.getFs().moveFromLocalFile(new Path(tmp.getAbsolutePath()), path);
-        assertThrows(IOException.class, () -> getReader(fsConfig.getFs(), path, getReaderConfig()));
+        assertThrows(ConnectException.class, () -> getReader(fsConfig.getFs(), path, getReaderConfig()));
+        assertThrows(IOException.class, () -> {
+            try {
+                getReader(fsConfig.getFs(), path, getReaderConfig());
+            } catch (Exception e) {
+                throw e.getCause();
+            }
+        });
     }
 
     @ParameterizedTest
@@ -154,7 +176,7 @@ public void seekFile(ReaderFsTestConfig fsConfig) {
     @MethodSource("fileSystemConfigProvider")
     public void negativeSeek(ReaderFsTestConfig fsConfig) {
         FileReader reader = fsConfig.getReader();
-        assertThrows(RuntimeException.class, () -> reader.seek(-1));
+        assertThrows(IllegalArgumentException.class, () -> reader.seek(-1));
     }
 
     @ParameterizedTest
@@ -171,11 +193,12 @@ public void exceededSeek(ReaderFsTestConfig fsConfig) {
     public void readFileAlreadyClosed(ReaderFsTestConfig fsConfig) throws IOException {
         FileReader reader = fsConfig.getReader();
         reader.close();
-        assertThrows(IllegalStateException.class, reader::hasNext);
-        assertThrows(IllegalStateException.class, reader::next);
+        assertThrows(ConnectException.class, reader::hasNext);
+        assertThrows(ConnectException.class, reader::next);
+        assertThrows(ConnectException.class, () -> reader.seek(1));
     }
 
-    protected final FileReader getReader(FileSystem fs, Path path, Map<String, Object> config) throws Throwable {
+    protected final FileReader getReader(FileSystem fs, Path path, Map<String, Object> config) {
         return ReflectionUtils.makeReader(getReaderClass(), fs, path, config);
     }
 
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/JsonFileReaderTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/JsonFileReaderTest.java
index fdc2422..98e7e5b 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/JsonFileReaderTest.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/JsonFileReaderTest.java
@@ -8,6 +8,7 @@
 import com.fasterxml.jackson.databind.node.ObjectNode;
 import org.apache.hadoop.fs.Path;
 import org.apache.kafka.connect.data.Struct;
+import org.apache.kafka.connect.errors.ConnectException;
 import org.junit.jupiter.params.ParameterizedTest;
 import org.junit.jupiter.params.provider.MethodSource;
 
@@ -77,7 +78,7 @@ protected Path createDataFile(ReaderFsTestConfig fsConfig, Object... args) throw
 
     @ParameterizedTest
     @MethodSource("fileSystemConfigProvider")
-    public void emptyFile(ReaderFsTestConfig fsConfig) throws Throwable {
+    public void emptyFile(ReaderFsTestConfig fsConfig) throws IOException {
         File tmp = File.createTempFile("test-", "." + getFileExtension());
         Path path = new Path(new Path(fsConfig.getFsUri()), tmp.getName());
         fsConfig.getFs().moveFromLocalFile(new Path(tmp.getAbsolutePath()), path);
@@ -87,7 +88,7 @@ public void emptyFile(ReaderFsTestConfig fsConfig) throws Throwable {
 
     @ParameterizedTest
     @MethodSource("fileSystemConfigProvider")
-    public void validFileEncoding(ReaderFsTestConfig fsConfig) throws Throwable {
+    public void validFileEncoding(ReaderFsTestConfig fsConfig) {
         Map<String, Object> readerConfig = getReaderConfig();
         readerConfig.put(JsonFileReader.FILE_READER_JSON_ENCODING, "Cp1252");
         fsConfig.setReader(getReader(fsConfig.getFs(), fsConfig.getDataFile(), readerConfig));
@@ -96,7 +97,7 @@ public void validFileEncoding(ReaderFsTestConfig fsConfig) throws Throwable {
 
     @ParameterizedTest
     @MethodSource("fileSystemConfigProvider")
-    public void invalidDeserializationConfig(ReaderFsTestConfig fsConfig) throws Throwable {
+    public void invalidDeserializationConfig(ReaderFsTestConfig fsConfig) {
         Map<String, Object> readerConfig = getReaderConfig();
         readerConfig.put(JsonFileReader.FILE_READER_JSON_DESERIALIZATION_CONFIGS + "invalid", "false");
         fsConfig.setReader(getReader(fsConfig.getFs(), fsConfig.getDataFile(), readerConfig));
@@ -108,13 +109,19 @@ public void invalidDeserializationConfig(ReaderFsTestConfig fsConfig) throws Thr
     public void invalidFileEncoding(ReaderFsTestConfig fsConfig) {
         Map<String, Object> readerConfig = getReaderConfig();
         readerConfig.put(JsonFileReader.FILE_READER_JSON_ENCODING, "invalid_charset");
-        assertThrows(UnsupportedCharsetException.class, () -> getReader(fsConfig.getFs(),
-                fsConfig.getDataFile(), readerConfig));
+        assertThrows(ConnectException.class, () -> getReader(fsConfig.getFs(), fsConfig.getDataFile(), readerConfig));
+        assertThrows(UnsupportedCharsetException.class, () -> {
+            try {
+                getReader(fsConfig.getFs(), fsConfig.getDataFile(), readerConfig);
+            } catch (Exception e) {
+                throw e.getCause();
+            }
+        });
     }
 
     @ParameterizedTest
     @MethodSource("fileSystemConfigProvider")
-    public void readDataWithRecordPerLineDisabled(ReaderFsTestConfig fsConfig) throws Throwable {
+    public void readDataWithRecordPerLineDisabled(ReaderFsTestConfig fsConfig) throws IOException {
         Path file = createDataFile(fsConfig, 1, false);
         Map<String, Object> readerConfig = getReaderConfig();
         readerConfig.put(JsonFileReader.FILE_READER_JSON_RECORD_PER_LINE, "false");
@@ -153,7 +160,7 @@ public void readDifferentCompressionTypes(ReaderFsTestConfig fsConfig) {
                 }
                 reader.close();
                 assertEquals(NUM_RECORDS, recordCount, "The number of records in the file does not match");
-            } catch (Throwable e) {
+            } catch (Exception e) {
                 throw new RuntimeException(e);
             }
         });
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/ParquetFileReaderTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/ParquetFileReaderTest.java
index be5e831..30dd425 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/ParquetFileReaderTest.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/ParquetFileReaderTest.java
@@ -10,6 +10,7 @@
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.kafka.connect.data.Struct;
+import org.apache.kafka.connect.errors.ConnectException;
 import org.apache.kafka.connect.errors.DataException;
 import org.apache.parquet.avro.AvroParquetWriter;
 import org.apache.parquet.hadoop.ParquetFileWriter;
@@ -76,7 +77,7 @@ protected Path createDataFile(ReaderFsTestConfig fsConfig, Object... args) throw
 
     @ParameterizedTest
     @MethodSource("fileSystemConfigProvider")
-    public void emptyFile(ReaderFsTestConfig fsConfig) throws Throwable {
+    public void emptyFile(ReaderFsTestConfig fsConfig) throws IOException {
         File tmp = File.createTempFile("test-", "." + getFileExtension());
         Path path = new Path(new Path(fsConfig.getFsUri()), tmp.getName());
         fsConfig.getFs().moveFromLocalFile(new Path(tmp.getAbsolutePath()), path);
@@ -85,7 +86,7 @@ public void emptyFile(ReaderFsTestConfig fsConfig) throws Throwable {
 
     @ParameterizedTest
     @MethodSource("fileSystemConfigProvider")
-    public void invalidFileFormat(ReaderFsTestConfig fsConfig) throws Throwable {
+    public void invalidFileFormat(ReaderFsTestConfig fsConfig) throws IOException {
         File tmp = File.createTempFile("test-", "." + getFileExtension());
         try (BufferedWriter writer = new BufferedWriter(new FileWriter(tmp))) {
             writer.write("test");
@@ -97,7 +98,7 @@ public void invalidFileFormat(ReaderFsTestConfig fsConfig) throws Throwable {
 
     @ParameterizedTest
     @MethodSource("fileSystemConfigProvider")
-    public void readerWithSchema(ReaderFsTestConfig fsConfig) throws Throwable {
+    public void readerWithSchema(ReaderFsTestConfig fsConfig) throws IOException {
         Map<String, Object> readerConfig = getReaderConfig();
         readerConfig.put(ParquetFileReader.FILE_READER_PARQUET_SCHEMA, readerSchema.toString());
         readerConfig.put(AgnosticFileReader.FILE_READER_AGNOSTIC_EXTENSIONS_PARQUET, getFileExtension());
@@ -108,7 +109,7 @@ public void readerWithSchema(ReaderFsTestConfig fsConfig) throws Throwable {
 
     @ParameterizedTest
     @MethodSource("fileSystemConfigProvider")
-    public void readerWithProjection(ReaderFsTestConfig fsConfig) throws Throwable {
+    public void readerWithProjection(ReaderFsTestConfig fsConfig) throws IOException {
         Map<String, Object> readerConfig = getReaderConfig();
         readerConfig.put(ParquetFileReader.FILE_READER_PARQUET_PROJECTION, projectionSchema.toString());
         readerConfig.put(AgnosticFileReader.FILE_READER_AGNOSTIC_EXTENSIONS_PARQUET, getFileExtension());
@@ -126,7 +127,7 @@ public void readerWithProjection(ReaderFsTestConfig fsConfig) throws Throwable {
 
     @ParameterizedTest
     @MethodSource("fileSystemConfigProvider")
-    public void readerWithInvalidProjection(ReaderFsTestConfig fsConfig) throws Throwable {
+    public void readerWithInvalidProjection(ReaderFsTestConfig fsConfig) throws IOException {
         Schema testSchema = SchemaBuilder.record("test_projection").namespace("test.avro")
                 .fields()
                 .name("field1").type("string").noDefault()
@@ -136,18 +137,28 @@ public void readerWithInvalidProjection(ReaderFsTestConfig fsConfig) throws Thro
         readerConfig.put(AgnosticFileReader.FILE_READER_AGNOSTIC_EXTENSIONS_PARQUET, getFileExtension());
         FileSystem testFs = FileSystem.newInstance(fsConfig.getFsUri(), new Configuration());
         fsConfig.setReader(getReader(testFs, fsConfig.getDataFile(), readerConfig));
-        assertThrows(InvalidRecordException.class, () -> readAllData(fsConfig));
+        try {
+            readAllData(fsConfig);
+        } catch (Exception e) {
+            assertEquals(ConnectException.class, e.getClass());
+            assertEquals(InvalidRecordException.class, e.getCause().getClass());
+        }
     }
 
     @ParameterizedTest
     @MethodSource("fileSystemConfigProvider")
-    public void readerWithInvalidSchema(ReaderFsTestConfig fsConfig) throws Throwable {
+    public void readerWithInvalidSchema(ReaderFsTestConfig fsConfig) throws IOException {
         Map<String, Object> readerConfig = getReaderConfig();
         readerConfig.put(ParquetFileReader.FILE_READER_PARQUET_SCHEMA, Schema.create(Schema.Type.STRING).toString());
         readerConfig.put(AgnosticFileReader.FILE_READER_AGNOSTIC_EXTENSIONS_PARQUET, getFileExtension());
         FileSystem testFs = FileSystem.newInstance(fsConfig.getFsUri(), new Configuration());
         fsConfig.setReader(getReader(testFs, fsConfig.getDataFile(), readerConfig));
-        assertThrows(AvroRuntimeException.class, () -> readAllData(fsConfig));
+        try {
+            readAllData(fsConfig);
+        } catch (Exception e) {
+            assertEquals(ConnectException.class, e.getClass());
+            assertEquals(AvroRuntimeException.class, e.getCause().getClass());
+        }
     }
 
     @ParameterizedTest
@@ -156,9 +167,17 @@ public void readerWithUnparseableSchema(ReaderFsTestConfig fsConfig) {
         Map<String, Object> readerConfig = getReaderConfig();
         readerConfig.put(ParquetFileReader.FILE_READER_PARQUET_SCHEMA, "invalid schema");
         readerConfig.put(AgnosticFileReader.FILE_READER_AGNOSTIC_EXTENSIONS_PARQUET, getFileExtension());
-        assertThrows(SchemaParseException.class, () ->
+        assertThrows(ConnectException.class, () ->
                 getReader(FileSystem.newInstance(fsConfig.getFsUri(), new Configuration()),
                         fsConfig.getDataFile(), readerConfig));
+        assertThrows(SchemaParseException.class, () -> {
+            try {
+                getReader(FileSystem.newInstance(fsConfig.getFsUri(), new Configuration()),
+                        fsConfig.getDataFile(), readerConfig);
+            } catch (Exception e) {
+                throw e.getCause();
+            }
+        });
     }
 
     @Override
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/SequenceFileReaderTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/SequenceFileReaderTest.java
index cc62c0b..e70d3dd 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/SequenceFileReaderTest.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/SequenceFileReaderTest.java
@@ -62,7 +62,7 @@ protected Path createDataFile(ReaderFsTestConfig fsConfig, Object... args) throw
 
     @ParameterizedTest
     @MethodSource("fileSystemConfigProvider")
-    public void defaultFieldNames(ReaderFsTestConfig fsConfig) throws Throwable {
+    public void defaultFieldNames(ReaderFsTestConfig fsConfig) {
         Map<String, Object> readerConfig = getReaderConfig();
         readerConfig.put(SequenceFileReader.FILE_READER_SEQUENCE_FIELD_NAME_KEY, null);
         readerConfig.put(SequenceFileReader.FILE_READER_SEQUENCE_FIELD_NAME_VALUE, null);
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/TextFileReaderTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/TextFileReaderTest.java
index 5078d24..5e56ac6 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/TextFileReaderTest.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/TextFileReaderTest.java
@@ -2,6 +2,7 @@
 
 import org.apache.hadoop.fs.Path;
 import org.apache.kafka.connect.data.Struct;
+import org.apache.kafka.connect.errors.ConnectException;
 import org.junit.jupiter.params.ParameterizedTest;
 import org.junit.jupiter.params.provider.MethodSource;
 
@@ -41,7 +42,7 @@ protected Path createDataFile(ReaderFsTestConfig fsConfig, Object... args) throw
 
     @ParameterizedTest
     @MethodSource("fileSystemConfigProvider")
-    public void validFileEncoding(ReaderFsTestConfig fsConfig) throws Throwable {
+    public void validFileEncoding(ReaderFsTestConfig fsConfig) {
         Map<String, Object> readerConfig = getReaderConfig();
         readerConfig.put(TextFileReader.FILE_READER_TEXT_FIELD_NAME_VALUE, FIELD_NAME_VALUE);
         readerConfig.put(TextFileReader.FILE_READER_TEXT_ENCODING, "Cp1252");
@@ -58,13 +59,19 @@ public void invalidFileEncoding(ReaderFsTestConfig fsConfig) {
         readerConfig.put(TextFileReader.FILE_READER_TEXT_FIELD_NAME_VALUE, FIELD_NAME_VALUE);
         readerConfig.put(TextFileReader.FILE_READER_TEXT_ENCODING, "invalid_charset");
         readerConfig.put(TextFileReader.FILE_READER_TEXT_COMPRESSION_TYPE, COMPRESSION_TYPE_DEFAULT);
-        assertThrows(UnsupportedCharsetException.class, () -> getReader(fsConfig.getFs(),
-                fsConfig.getDataFile(), readerConfig));
+        assertThrows(ConnectException.class, () -> getReader(fsConfig.getFs(), fsConfig.getDataFile(), readerConfig));
+        assertThrows(UnsupportedCharsetException.class, () -> {
+            try {
+                getReader(fsConfig.getFs(), fsConfig.getDataFile(), readerConfig);
+            } catch (Exception e) {
+                throw e.getCause();
+            }
+        });
     }
 
     @ParameterizedTest
     @MethodSource("fileSystemConfigProvider")
-    public void readDataWithRecordPerLineDisabled(ReaderFsTestConfig fsConfig) throws Throwable {
+    public void readDataWithRecordPerLineDisabled(ReaderFsTestConfig fsConfig) throws IOException {
         Path file = createDataFile(fsConfig, COMPRESSION_TYPE_DEFAULT);
         Map<String, Object> readerConfig = getReaderConfig();
         readerConfig.put(TextFileReader.FILE_READER_TEXT_FIELD_NAME_VALUE, FIELD_NAME_VALUE);
@@ -105,7 +112,7 @@ public void readDifferentCompressionTypes(ReaderFsTestConfig fsConfig) {
                 }
                 reader.close();
                 assertEquals(NUM_RECORDS, recordCount, "The number of records in the file does not match");
-            } catch (Throwable e) {
+            } catch (Exception e) {
                 throw new RuntimeException(e);
             }
         });
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/UnivocityFileReaderTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/UnivocityFileReaderTest.java
index 438bb1e..a5e8d9e 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/UnivocityFileReaderTest.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/UnivocityFileReaderTest.java
@@ -3,12 +3,14 @@
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.kafka.connect.data.Struct;
+import org.apache.kafka.connect.errors.ConnectException;
 import org.junit.jupiter.params.ParameterizedTest;
 import org.junit.jupiter.params.provider.MethodSource;
 
 import java.io.BufferedWriter;
 import java.io.File;
 import java.io.FileWriter;
+import java.io.IOException;
 import java.lang.reflect.ParameterizedType;
 import java.nio.charset.UnsupportedCharsetException;
 import java.util.Arrays;
@@ -28,7 +30,7 @@ abstract class UnivocityFileReaderTest<T extends UnivocityFileReader> extends Fi
 
     @ParameterizedTest
     @MethodSource("fileSystemConfigProvider")
-    public void emptyFile(ReaderFsTestConfig fsConfig) throws Throwable {
+    public void emptyFile(ReaderFsTestConfig fsConfig) throws IOException {
         File tmp = File.createTempFile("test-", "." + getFileExtension());
         Path path = new Path(new Path(fsConfig.getFsUri()), tmp.getName());
         fsConfig.getFs().moveFromLocalFile(new Path(tmp.getAbsolutePath()), path);
@@ -37,7 +39,7 @@ public void emptyFile(ReaderFsTestConfig fsConfig) throws Throwable {
 
     @ParameterizedTest
     @MethodSource("fileSystemConfigProvider")
-    public void invalidFileFormat(ReaderFsTestConfig fsConfig) throws Throwable {
+    public void invalidFileFormat(ReaderFsTestConfig fsConfig) throws IOException {
         File tmp = File.createTempFile("test-", "." + getFileExtension());
         try (BufferedWriter writer = new BufferedWriter(new FileWriter(tmp))) {
             writer.write("test");
@@ -62,7 +64,7 @@ public void invaliConfigArgs(ReaderFsTestConfig fsConfig) {
 
     @ParameterizedTest
     @MethodSource("fileSystemConfigProvider")
-    public void readAllDataWithoutHeader(ReaderFsTestConfig fsConfig) throws Throwable {
+    public void readAllDataWithoutHeader(ReaderFsTestConfig fsConfig) throws IOException {
         Path file = createDataFile(fsConfig, false);
         Map<String, Object> readerConfig = getReaderConfig();
         readerConfig.put(T.FILE_READER_DELIMITED_SETTINGS_HEADER, "false");
@@ -101,7 +103,7 @@ public void readDifferentCompressionTypes(ReaderFsTestConfig fsConfig) {
                 }
                 reader.close();
                 assertEquals(NUM_RECORDS, recordCount, "The number of records in the file does not match");
-            } catch (Throwable e) {
+            } catch (Exception e) {
                 throw new RuntimeException(e);
             }
         });
@@ -109,7 +111,7 @@ public void readDifferentCompressionTypes(ReaderFsTestConfig fsConfig) {
 
     @ParameterizedTest
     @MethodSource("fileSystemConfigProvider")
-    public void seekFileWithoutHeader(ReaderFsTestConfig fsConfig) throws Throwable {
+    public void seekFileWithoutHeader(ReaderFsTestConfig fsConfig) throws IOException {
         Path file = createDataFile(fsConfig, false);
         Map<String, Object> readerConfig = getReaderConfig();
         readerConfig.put(T.FILE_READER_DELIMITED_SETTINGS_HEADER, "false");
@@ -141,7 +143,7 @@ public void seekFileWithoutHeader(ReaderFsTestConfig fsConfig) throws Throwable
 
     @ParameterizedTest
     @MethodSource("fileSystemConfigProvider")
-    public void validFileEncoding(ReaderFsTestConfig fsConfig) throws Throwable {
+    public void validFileEncoding(ReaderFsTestConfig fsConfig) {
         Map<String, Object> readerConfig = getReaderConfig();
         readerConfig.put(T.FILE_READER_DELIMITED_SETTINGS_HEADER, "true");
         readerConfig.put(T.FILE_READER_DELIMITED_ENCODING, "Cp1252");
@@ -154,14 +156,19 @@ public void invalidFileEncoding(ReaderFsTestConfig fsConfig) {
         Map<String, Object> readerConfig = getReaderConfig();
         readerConfig.put(T.FILE_READER_DELIMITED_SETTINGS_HEADER, "true");
         readerConfig.put(T.FILE_READER_DELIMITED_ENCODING, "invalid_charset");
-        assertThrows(UnsupportedCharsetException.class, () -> getReader(fsConfig.getFs(),
-                fsConfig.getDataFile(), readerConfig));
+        assertThrows(ConnectException.class, () -> getReader(fsConfig.getFs(), fsConfig.getDataFile(), readerConfig));
+        assertThrows(UnsupportedCharsetException.class, () -> {
+            try {
+                getReader(fsConfig.getFs(), fsConfig.getDataFile(), readerConfig);
+            } catch (Exception e) {
+                throw e.getCause();
+            }
+        });
     }
 
     @Override
     protected Class<? extends FileReader> getReaderClass() {
-        return (Class<T>) ((ParameterizedType) this.getClass().getGenericSuperclass())
-                .getActualTypeArguments()[0];
+        return (Class<T>) ((ParameterizedType) this.getClass().getGenericSuperclass()).getActualTypeArguments()[0];
     }
 
     @Override
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/CronPolicyTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/CronPolicyTest.java
index 5de4e95..72bac98 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/CronPolicyTest.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/CronPolicyTest.java
@@ -5,6 +5,7 @@
 import com.github.mmolimar.kafka.connect.fs.util.ReflectionUtils;
 import org.apache.hadoop.fs.Path;
 import org.apache.kafka.common.config.ConfigException;
+import org.apache.kafka.connect.errors.ConnectException;
 import org.apache.kafka.connect.errors.IllegalWorkerStateException;
 import org.junit.jupiter.params.ParameterizedTest;
 import org.junit.jupiter.params.provider.MethodSource;
@@ -53,8 +54,17 @@ public void invalidCronExpression(PolicyFsTestConfig fsConfig) {
         Map<String, String> originals = fsConfig.getSourceTaskConfig().originalsStrings();
         originals.put(CronPolicy.CRON_POLICY_EXPRESSION, "invalid");
         FsSourceTaskConfig cfg = new FsSourceTaskConfig(originals);
-        assertThrows(ConfigException.class, () -> ReflectionUtils.makePolicy(
-                (Class<? extends Policy>) fsConfig.getSourceTaskConfig().getClass(FsSourceTaskConfig.POLICY_CLASS), cfg));
+        assertThrows(ConnectException.class, () ->
+                ReflectionUtils.makePolicy((Class<? extends Policy>) fsConfig.getSourceTaskConfig()
+                        .getClass(FsSourceTaskConfig.POLICY_CLASS), cfg));
+        assertThrows(ConfigException.class, () -> {
+            try {
+                ReflectionUtils.makePolicy((Class<? extends Policy>) fsConfig.getSourceTaskConfig()
+                        .getClass(FsSourceTaskConfig.POLICY_CLASS), cfg);
+            } catch (Exception e) {
+                throw e.getCause();
+            }
+        });
     }
 
     @ParameterizedTest
@@ -63,15 +73,24 @@ public void invalidEndDate(PolicyFsTestConfig fsConfig) {
         Map<String, String> originals = fsConfig.getSourceTaskConfig().originalsStrings();
         originals.put(CronPolicy.CRON_POLICY_END_DATE, "invalid");
         FsSourceTaskConfig cfg = new FsSourceTaskConfig(originals);
-        assertThrows(ConfigException.class, () -> ReflectionUtils.makePolicy(
-                (Class<? extends Policy>) fsConfig.getSourceTaskConfig().getClass(FsSourceTaskConfig.POLICY_CLASS), cfg));
+        assertThrows(ConnectException.class, () ->
+                ReflectionUtils.makePolicy((Class<? extends Policy>) fsConfig.getSourceTaskConfig()
+                        .getClass(FsSourceTaskConfig.POLICY_CLASS), cfg));
+        assertThrows(ConfigException.class, () -> {
+            try {
+                ReflectionUtils.makePolicy((Class<? extends Policy>) fsConfig.getSourceTaskConfig()
+                        .getClass(FsSourceTaskConfig.POLICY_CLASS), cfg);
+            } catch (Exception e) {
+                throw e.getCause();
+            }
+        });
     }
 
     @ParameterizedTest
     @MethodSource("fileSystemConfigProvider")
-    public void canBeInterrupted(PolicyFsTestConfig fsConfig) throws Throwable {
-        Policy policy = ReflectionUtils.makePolicy(
-                (Class<? extends Policy>) fsConfig.getSourceTaskConfig().getClass(FsSourceTaskConfig.POLICY_CLASS),
+    public void canBeInterrupted(PolicyFsTestConfig fsConfig) throws IOException {
+        Policy policy = ReflectionUtils.makePolicy((Class<? extends Policy>) fsConfig.getSourceTaskConfig()
+                        .getClass(FsSourceTaskConfig.POLICY_CLASS),
                 fsConfig.getSourceTaskConfig());
 
         for (int i = 0; i < 5; i++) {
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/PolicyTestBase.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/PolicyTestBase.java
index 6aa77b1..6af841b 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/PolicyTestBase.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/PolicyTestBase.java
@@ -6,6 +6,7 @@
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.kafka.common.config.ConfigException;
+import org.apache.kafka.connect.errors.ConnectException;
 import org.apache.kafka.connect.errors.IllegalWorkerStateException;
 import org.junit.jupiter.api.AfterAll;
 import org.junit.jupiter.api.AfterEach;
@@ -47,11 +48,11 @@ public static void finishFs() throws IOException {
     }
 
     @BeforeEach
-    public void initPolicy() throws Throwable {
+    public void initPolicy() {
         for (PolicyFsTestConfig fsConfig : TEST_FILE_SYSTEMS) {
             FsSourceTaskConfig sourceTaskConfig = buildSourceTaskConfig(fsConfig.getDirectories());
-            Policy policy = ReflectionUtils.makePolicy(
-                    (Class<? extends Policy>) sourceTaskConfig.getClass(FsSourceTaskConfig.POLICY_CLASS), sourceTaskConfig);
+            Policy policy = ReflectionUtils.makePolicy((Class<? extends Policy>) sourceTaskConfig
+                    .getClass(FsSourceTaskConfig.POLICY_CLASS), sourceTaskConfig);
             fsConfig.setSourceTaskConfig(sourceTaskConfig);
             fsConfig.setPolicy(policy);
         }
@@ -83,14 +84,15 @@ public void invalidArgs(PolicyFsTestConfig fsConfig) {
     @ParameterizedTest
     @MethodSource("fileSystemConfigProvider")
     public void invalidConfig(PolicyFsTestConfig fsConfig) {
-        assertThrows(ConfigException.class, () -> ReflectionUtils.makePolicy(
-                (Class<? extends Policy>) fsConfig.getSourceTaskConfig().getClass(FsSourceTaskConfig.POLICY_CLASS),
-                new FsSourceTaskConfig(new HashMap<>())));
+        assertThrows(ConfigException.class, () ->
+                ReflectionUtils.makePolicy((Class<? extends Policy>) fsConfig.getSourceTaskConfig()
+                                .getClass(FsSourceTaskConfig.POLICY_CLASS),
+                        new FsSourceTaskConfig(new HashMap<>())));
     }
 
     @ParameterizedTest
     @MethodSource("fileSystemConfigProvider")
-    public void interruptPolicy(PolicyFsTestConfig fsConfig) throws Throwable {
+    public void interruptPolicy(PolicyFsTestConfig fsConfig) throws IOException {
         fsConfig.getPolicy().execute();
         fsConfig.getPolicy().interrupt();
         assertTrue(fsConfig.getPolicy().hasEnded());
@@ -172,14 +174,14 @@ public void execPolicyAlreadyEnded(PolicyFsTestConfig fsConfig) throws IOExcepti
 
     @ParameterizedTest
     @MethodSource("fileSystemConfigProvider")
-    public void dynamicURIs(PolicyFsTestConfig fsConfig) throws Throwable {
+    public void dynamicURIs(PolicyFsTestConfig fsConfig) throws IOException {
         Path dynamic = new Path(fsConfig.getFsUri().toString(), "${G}/${yyyy}/${MM}/${W}");
         fsConfig.getFs().create(dynamic);
         Map<String, String> originals = fsConfig.getSourceTaskConfig().originalsStrings();
         originals.put(FsSourceTaskConfig.FS_URIS, dynamic.toString());
         FsSourceTaskConfig cfg = new FsSourceTaskConfig(originals);
-        Policy policy = ReflectionUtils.makePolicy(
-                (Class<? extends Policy>) fsConfig.getSourceTaskConfig().getClass(FsSourceTaskConfig.POLICY_CLASS), cfg);
+        Policy policy = ReflectionUtils.makePolicy((Class<? extends Policy>) fsConfig.getSourceTaskConfig()
+                .getClass(FsSourceTaskConfig.POLICY_CLASS), cfg);
         fsConfig.setPolicy(policy);
         assertEquals(1, fsConfig.getPolicy().getURIs().size());
 
@@ -200,14 +202,23 @@ public void dynamicURIs(PolicyFsTestConfig fsConfig) throws Throwable {
 
     @ParameterizedTest
     @MethodSource("fileSystemConfigProvider")
-    public void invalidDynamicURIs(PolicyFsTestConfig fsConfig) throws Throwable {
+    public void invalidDynamicURIs(PolicyFsTestConfig fsConfig) throws IOException {
         Path dynamic = new Path(fsConfig.getFsUri().toString(), "${yyyy}/${MM}/${mmmmmmm}");
         fsConfig.getFs().create(dynamic);
         Map<String, String> originals = fsConfig.getSourceTaskConfig().originalsStrings();
         originals.put(FsSourceTaskConfig.FS_URIS, dynamic.toString());
         FsSourceTaskConfig cfg = new FsSourceTaskConfig(originals);
-        assertThrows(IllegalArgumentException.class, () -> ReflectionUtils.makePolicy(
-                (Class<? extends Policy>) fsConfig.getSourceTaskConfig().getClass(FsSourceTaskConfig.POLICY_CLASS), cfg));
+        assertThrows(ConnectException.class, () ->
+                ReflectionUtils.makePolicy((Class<? extends Policy>) fsConfig.getSourceTaskConfig()
+                        .getClass(FsSourceTaskConfig.POLICY_CLASS), cfg));
+        assertThrows(IllegalArgumentException.class, () -> {
+            try {
+                ReflectionUtils.makePolicy((Class<? extends Policy>) fsConfig.getSourceTaskConfig()
+                        .getClass(FsSourceTaskConfig.POLICY_CLASS), cfg);
+            } catch (Exception e) {
+                throw e.getCause();
+            }
+        });
     }
 
     protected abstract FsSourceTaskConfig buildSourceTaskConfig(List<Path> directories);
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/SleepyPolicyTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/SleepyPolicyTest.java
index 9748d15..65c41c7 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/SleepyPolicyTest.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/SleepyPolicyTest.java
@@ -5,9 +5,11 @@
 import com.github.mmolimar.kafka.connect.fs.util.ReflectionUtils;
 import org.apache.hadoop.fs.Path;
 import org.apache.kafka.common.config.ConfigException;
+import org.apache.kafka.connect.errors.ConnectException;
 import org.junit.jupiter.params.ParameterizedTest;
 import org.junit.jupiter.params.provider.MethodSource;
 
+import java.io.IOException;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
@@ -40,8 +42,17 @@ public void invalidSleepTime(PolicyFsTestConfig fsConfig) {
         Map<String, String> originals = fsConfig.getSourceTaskConfig().originalsStrings();
         originals.put(SleepyPolicy.SLEEPY_POLICY_SLEEP_MS, "invalid");
         FsSourceTaskConfig cfg = new FsSourceTaskConfig(originals);
-        assertThrows(ConfigException.class, () -> ReflectionUtils.makePolicy(
-                (Class<? extends Policy>) fsConfig.getSourceTaskConfig().getClass(FsSourceTaskConfig.POLICY_CLASS), cfg));
+        assertThrows(ConnectException.class, () ->
+                ReflectionUtils.makePolicy((Class<? extends Policy>) fsConfig.getSourceTaskConfig()
+                        .getClass(FsSourceTaskConfig.POLICY_CLASS), cfg));
+        assertThrows(ConfigException.class, () -> {
+            try {
+                ReflectionUtils.makePolicy((Class<? extends Policy>) fsConfig.getSourceTaskConfig()
+                        .getClass(FsSourceTaskConfig.POLICY_CLASS), cfg);
+            } catch (Exception e) {
+                throw e.getCause();
+            }
+        });
     }
 
     @ParameterizedTest
@@ -50,8 +61,17 @@ public void invalidMaxExecs(PolicyFsTestConfig fsConfig) {
         Map<String, String> originals = fsConfig.getSourceTaskConfig().originalsStrings();
         originals.put(SleepyPolicy.SLEEPY_POLICY_MAX_EXECS, "invalid");
         FsSourceTaskConfig cfg = new FsSourceTaskConfig(originals);
-        assertThrows(ConfigException.class, () -> ReflectionUtils.makePolicy(
-                (Class<? extends Policy>) fsConfig.getSourceTaskConfig().getClass(FsSourceTaskConfig.POLICY_CLASS), cfg));
+        assertThrows(ConnectException.class, () ->
+                ReflectionUtils.makePolicy((Class<? extends Policy>) fsConfig.getSourceTaskConfig()
+                        .getClass(FsSourceTaskConfig.POLICY_CLASS), cfg));
+        assertThrows(ConfigException.class, () -> {
+            try {
+                ReflectionUtils.makePolicy((Class<? extends Policy>) fsConfig.getSourceTaskConfig()
+                        .getClass(FsSourceTaskConfig.POLICY_CLASS), cfg);
+            } catch (Exception e) {
+                throw e.getCause();
+            }
+        });
     }
 
     @ParameterizedTest
@@ -60,20 +80,29 @@ public void invalidSleepFraction(PolicyFsTestConfig fsConfig) {
         Map<String, String> originals = fsConfig.getSourceTaskConfig().originalsStrings();
         originals.put(SleepyPolicy.SLEEPY_POLICY_SLEEP_FRACTION, "invalid");
         FsSourceTaskConfig cfg = new FsSourceTaskConfig(originals);
-        assertThrows(ConfigException.class, () -> ReflectionUtils.makePolicy(
-                (Class<? extends Policy>) fsConfig.getSourceTaskConfig().getClass(FsSourceTaskConfig.POLICY_CLASS), cfg));
+        assertThrows(ConnectException.class, () ->
+                ReflectionUtils.makePolicy((Class<? extends Policy>) fsConfig.getSourceTaskConfig()
+                        .getClass(FsSourceTaskConfig.POLICY_CLASS), cfg));
+        assertThrows(ConfigException.class, () -> {
+            try {
+                ReflectionUtils.makePolicy((Class<? extends Policy>) fsConfig.getSourceTaskConfig()
+                        .getClass(FsSourceTaskConfig.POLICY_CLASS), cfg);
+            } catch (Exception e) {
+                throw e.getCause();
+            }
+        });
     }
 
     @ParameterizedTest
     @MethodSource("fileSystemConfigProvider")
-    public void sleepExecution(PolicyFsTestConfig fsConfig) throws Throwable {
+    public void sleepExecution(PolicyFsTestConfig fsConfig) throws IOException {
         Map<String, String> tConfig = fsConfig.getSourceTaskConfig().originalsStrings();
         tConfig.put(SleepyPolicy.SLEEPY_POLICY_SLEEP_MS, "1000");
         tConfig.put(SleepyPolicy.SLEEPY_POLICY_MAX_EXECS, "2");
         FsSourceTaskConfig sleepConfig = new FsSourceTaskConfig(tConfig);
 
-        Policy policy = ReflectionUtils.makePolicy(
-                (Class<? extends Policy>) fsConfig.getSourceTaskConfig().getClass(FsSourceTaskConfig.POLICY_CLASS), sleepConfig);
+        Policy policy = ReflectionUtils.makePolicy((Class<? extends Policy>) fsConfig.getSourceTaskConfig()
+                .getClass(FsSourceTaskConfig.POLICY_CLASS), sleepConfig);
         assertFalse(policy.hasEnded());
         policy.execute();
         assertFalse(policy.hasEnded());
@@ -83,14 +112,14 @@ public void sleepExecution(PolicyFsTestConfig fsConfig) throws Throwable {
 
     @ParameterizedTest
     @MethodSource("fileSystemConfigProvider")
-    public void defaultExecutions(PolicyFsTestConfig fsConfig) throws Throwable {
+    public void defaultExecutions(PolicyFsTestConfig fsConfig) throws IOException {
         Map<String, String> tConfig = fsConfig.getSourceTaskConfig().originalsStrings();
         tConfig.put(SleepyPolicy.SLEEPY_POLICY_SLEEP_MS, "1");
         tConfig.remove(SleepyPolicy.SLEEPY_POLICY_MAX_EXECS);
         FsSourceTaskConfig sleepConfig = new FsSourceTaskConfig(tConfig);
 
-        Policy policy = ReflectionUtils.makePolicy(
-                (Class<? extends Policy>) fsConfig.getSourceTaskConfig().getClass(FsSourceTaskConfig.POLICY_CLASS), sleepConfig);
+        Policy policy = ReflectionUtils.makePolicy((Class<? extends Policy>) fsConfig.getSourceTaskConfig()
+                .getClass(FsSourceTaskConfig.POLICY_CLASS), sleepConfig);
 
         //it never ends
         for (int i = 0; i < 100; i++) {
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/task/FsSourceTaskTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/task/FsSourceTaskTest.java
index 8dd610a..b4b5a4e 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/task/FsSourceTaskTest.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/task/FsSourceTaskTest.java
@@ -81,11 +81,11 @@ public void initTask() {
 
             EasyMock.expect(offsetStorageReader.offset(EasyMock.anyObject()))
                     .andReturn(new HashMap<String, Object>() {{
-                        put("offset", 5L);
+                        put("offset", (long) (NUM_RECORDS / 2));
                     }});
             EasyMock.expect(offsetStorageReader.offset(EasyMock.anyObject()))
                     .andReturn(new HashMap<String, Object>() {{
-                        put("offset", 5L);
+                        put("offset", (long) (NUM_RECORDS / 2));
                     }});
 
             EasyMock.checkOrder(taskContext, false);
@@ -152,7 +152,7 @@ public void oneFilePerFs(TaskFsTestConfig fsConfig) throws IOException {
 
         fsConfig.getTask().start(fsConfig.getTaskConfig());
         List<SourceRecord> records = fsConfig.getTask().poll();
-        assertEquals(10, records.size());
+        assertEquals((NUM_RECORDS * fsConfig.getDirectories().size()) / 2, records.size());
         checkRecords(records);
         //policy has ended
         assertNull(fsConfig.getTask().poll());
diff --git a/src/test/resources/log4j.properties b/src/test/resources/log4j.properties
index 493f160..bb7782f 100644
--- a/src/test/resources/log4j.properties
+++ b/src/test/resources/log4j.properties
@@ -7,6 +7,7 @@ log4j.appender.stdout.Target=System.out
 log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
 log4j.appender.stdout.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss} %-5p %c:%L - %m%n
 
+log4j.logger.com.github.mmolimar.kafka.connect.fs=TRACE
 log4j.logger.org.apache.hadoop=ERROR
 log4j.logger.BlockStateChange=WARN
 log4j.logger.org.apache.parquet=WARN

From 543a3a3b91634d1675316f86a9d797096afb3c40 Mon Sep 17 00:00:00 2001
From: Mario Molina <mmolimar@gmail.com>
Date: Fri, 17 Apr 2020 21:03:35 -0500
Subject: [PATCH 37/51] Schema support for Univocity file readers

---
 .../fs/file/reader/UnivocityFileReader.java   | 124 ++++++++++++++++--
 .../fs/file/reader/CsvFileReaderTest.java     |  37 ++++--
 .../file/reader/FixedWidthFileReaderTest.java |  26 ++--
 .../fs/file/reader/TsvFileReaderTest.java     |  12 +-
 .../file/reader/UnivocityFileReaderTest.java  | 118 ++++++++++++++++-
 5 files changed, 276 insertions(+), 41 deletions(-)

diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/UnivocityFileReader.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/UnivocityFileReader.java
index 490af2d..fb93116 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/UnivocityFileReader.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/UnivocityFileReader.java
@@ -18,8 +18,11 @@
 import java.io.InputStreamReader;
 import java.io.Reader;
 import java.nio.charset.Charset;
+import java.util.Arrays;
+import java.util.List;
 import java.util.Map;
 import java.util.Optional;
+import java.util.stream.Collectors;
 import java.util.stream.IntStream;
 
 import static com.github.mmolimar.kafka.connect.fs.FsSourceTaskConfig.FILE_READER_PREFIX;
@@ -34,6 +37,8 @@ abstract class UnivocityFileReader<T extends CommonParserSettings<?>>
     protected static final String FILE_READER_DELIMITED_SETTINGS_FORMAT = FILE_READER_DELIMITED_SETTINGS + "format.";
 
     public static final String FILE_READER_DELIMITED_SETTINGS_HEADER = FILE_READER_DELIMITED_SETTINGS + "header";
+    public static final String FILE_READER_DELIMITED_SETTINGS_SCHEMA = FILE_READER_DELIMITED_SETTINGS + "schema";
+    public static final String FILE_READER_DELIMITED_SETTINGS_DATA_TYPE_MAPPING_ERROR = FILE_READER_DELIMITED_SETTINGS + "data_type_mapping_error";
     public static final String FILE_READER_DELIMITED_SETTINGS_HEADER_NAMES = FILE_READER_DELIMITED_SETTINGS + "header_names";
     public static final String FILE_READER_DELIMITED_SETTINGS_LINE_SEPARATOR_DETECTION = FILE_READER_DELIMITED_SETTINGS + "line_separator_detection";
     public static final String FILE_READER_DELIMITED_SETTINGS_NULL_VALUE = FILE_READER_DELIMITED_SETTINGS + "null_value";
@@ -56,28 +61,43 @@ abstract class UnivocityFileReader<T extends CommonParserSettings<?>>
     private Schema schema;
     private Charset charset;
     private CompressionType compression;
+    private boolean dataTypeMappingError;
     private boolean closed;
 
     private ResultIterator<Record, ParsingContext> iterator;
 
+    public enum DataType {
+        BYTE,
+        SHORT,
+        INT,
+        LONG,
+        FLOAT,
+        DOUBLE,
+        BOOLEAN,
+        BYTES,
+        STRING
+    }
+
     public UnivocityFileReader(FileSystem fs, Path filePath, Map<String, Object> config) throws IOException {
         super(fs, filePath, new UnivocityToStruct(), config);
 
         this.iterator = iterateRecords();
-        this.schema = buildSchema(this.iterator, settings.isHeaderExtractionEnabled());
+        this.schema = buildSchema(this.iterator, settings.isHeaderExtractionEnabled(), config);
     }
 
-    private Schema buildSchema(ResultIterator<Record, ParsingContext> it, boolean hasHeader) {
+    private Schema buildSchema(ResultIterator<Record, ParsingContext> it, boolean hasHeader, Map<String, Object> config) {
         SchemaBuilder builder = SchemaBuilder.struct();
         if (it.hasNext() && !hasHeader) {
             Record first = it.next();
+            List<Schema> dataTypes = getDataTypes(config, first.getValues());
             IntStream.range(0, first.getValues().length)
-                    .forEach(index -> builder.field(DEFAULT_COLUMN_NAME + ++index, SchemaBuilder.STRING_SCHEMA));
+                    .forEach(index -> builder.field(DEFAULT_COLUMN_NAME + (index + 1), dataTypes.get(index)));
             seek(0);
         } else if (hasHeader) {
             Optional.ofNullable(it.getContext().headers()).ifPresent(headers -> {
+                List<Schema> dataTypes = getDataTypes(config, headers);
                 IntStream.range(0, headers.length)
-                        .forEach(index -> builder.field(headers[index], SchemaBuilder.STRING_SCHEMA));
+                        .forEach(index -> builder.field(headers[index], dataTypes.get(index)));
             });
         }
         return builder.build();
@@ -91,6 +111,49 @@ protected void configure(Map<String, String> config) {
         this.compression = CompressionType.fromName(cType, concatenated);
         this.charset = Charset.forName(config.getOrDefault(FILE_READER_DELIMITED_ENCODING, Charset.defaultCharset().name()));
         this.settings = allSettings(config);
+        this.dataTypeMappingError = Boolean.parseBoolean(
+                config.getOrDefault(FILE_READER_DELIMITED_SETTINGS_DATA_TYPE_MAPPING_ERROR, "true"));
+    }
+
+    private List<Schema> getDataTypes(Map<String, Object> config, String[] headers) {
+        List<Schema> dataTypes = Arrays
+                .stream(config.getOrDefault(FILE_READER_DELIMITED_SETTINGS_SCHEMA, "").toString().split(","))
+                .filter(dt -> !dt.trim().isEmpty())
+                .map(this::strToSchema)
+                .collect(Collectors.toList());
+        if (dataTypes.size() > 0 && dataTypes.size() != headers.length) {
+            throw new IllegalArgumentException("The schema defined in property '" + FILE_READER_DELIMITED_SETTINGS_SCHEMA +
+                    "' does not match the number of fields inferred in the file.");
+        } else if (dataTypes.size() == 0) {
+            return IntStream.range(0, headers.length)
+                    .mapToObj(index -> Schema.STRING_SCHEMA)
+                    .collect(Collectors.toList());
+        }
+        return dataTypes;
+    }
+
+    private Schema strToSchema(String dataType) {
+        switch (DataType.valueOf(dataType.trim().toUpperCase())) {
+            case BYTE:
+                return this.dataTypeMappingError ? Schema.INT8_SCHEMA : Schema.OPTIONAL_INT8_SCHEMA;
+            case SHORT:
+                return this.dataTypeMappingError ? Schema.INT16_SCHEMA : Schema.OPTIONAL_INT16_SCHEMA;
+            case INT:
+                return this.dataTypeMappingError ? Schema.INT32_SCHEMA : Schema.OPTIONAL_INT32_SCHEMA;
+            case LONG:
+                return this.dataTypeMappingError ? Schema.INT64_SCHEMA : Schema.OPTIONAL_INT64_SCHEMA;
+            case FLOAT:
+                return this.dataTypeMappingError ? Schema.FLOAT32_SCHEMA : Schema.OPTIONAL_FLOAT32_SCHEMA;
+            case DOUBLE:
+                return this.dataTypeMappingError ? Schema.FLOAT64_SCHEMA : Schema.OPTIONAL_FLOAT64_SCHEMA;
+            case BOOLEAN:
+                return this.dataTypeMappingError ? Schema.BOOLEAN_SCHEMA : Schema.OPTIONAL_BOOLEAN_SCHEMA;
+            case BYTES:
+                return this.dataTypeMappingError ? Schema.BYTES_SCHEMA : Schema.OPTIONAL_BYTES_SCHEMA;
+            case STRING:
+            default:
+                return this.dataTypeMappingError ? Schema.STRING_SCHEMA : Schema.OPTIONAL_STRING_SCHEMA;
+        }
     }
 
     private T allSettings(Map<String, String> config) {
@@ -144,8 +207,7 @@ private ResultIterator<Record, ParsingContext> iterateRecords() throws IOExcepti
     @Override
     protected final UnivocityRecord nextRecord() {
         incrementOffset();
-        Record record = iterator.next();
-        return new UnivocityRecord(schema, record.getValues());
+        return new UnivocityRecord(schema, iterator.next(), dataTypeMappingError);
     }
 
     @Override
@@ -184,19 +246,59 @@ static class UnivocityToStruct implements ReaderAdapter<UnivocityRecord> {
         public Struct apply(UnivocityRecord record) {
             Struct struct = new Struct(record.schema);
             IntStream.range(0, record.schema.fields().size())
-                    .filter(index -> index < record.values.length)
-                    .forEach(index -> struct.put(record.schema.fields().get(index).name(), record.values[index]));
+                    .filter(index -> index < record.value.getValues().length)
+                    .forEach(index -> {
+                        Schema.Type type = record.schema.fields().get(index).schema().type();
+                        String fieldName = record.schema.fields().get(index).name();
+                        struct.put(fieldName, mapDatatype(type, record.value, index, record.dataTypeMappingError));
+                    });
             return struct;
         }
+
+        private Object mapDatatype(Schema.Type type, Record record, int fieldIndex, boolean dataTypeMappingError) {
+            try {
+                switch (type) {
+                    case INT8:
+                        return record.getByte(fieldIndex);
+                    case INT16:
+                        return record.getShort(fieldIndex);
+                    case INT32:
+                        return record.getInt(fieldIndex);
+                    case INT64:
+                        return record.getLong(fieldIndex);
+                    case FLOAT32:
+                        return record.getFloat(fieldIndex);
+                    case FLOAT64:
+                        return record.getDouble(fieldIndex);
+                    case BOOLEAN:
+                        return record.getBoolean(fieldIndex);
+                    case BYTES:
+                        return record.getString(fieldIndex).getBytes();
+                    case ARRAY:
+                    case MAP:
+                    case STRUCT:
+                    case STRING:
+                    default:
+                        return record.getString(fieldIndex);
+                }
+            } catch (RuntimeException re) {
+                if (dataTypeMappingError) {
+                    throw re;
+                }
+                return null;
+            }
+        }
     }
 
     static class UnivocityRecord {
         private final Schema schema;
-        private final String[] values;
+        private final Record value;
+        private final boolean dataTypeMappingError;
 
-        UnivocityRecord(Schema schema, String[] values) {
+        UnivocityRecord(Schema schema, Record value, boolean dataTypeMappingError) {
             this.schema = schema;
-            this.values = values;
+            this.value = value;
+            this.dataTypeMappingError = dataTypeMappingError;
         }
     }
 }
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/CsvFileReaderTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/CsvFileReaderTest.java
index a1247d5..3eba9c0 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/CsvFileReaderTest.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/CsvFileReaderTest.java
@@ -11,7 +11,6 @@
 import java.io.PrintWriter;
 import java.util.HashMap;
 import java.util.Map;
-import java.util.UUID;
 import java.util.stream.IntStream;
 
 import static org.junit.jupiter.api.Assertions.*;
@@ -25,11 +24,15 @@ protected Path createDataFile(ReaderFsTestConfig fsConfig, Object... args) throw
         File txtFile = File.createTempFile("test-", "." + getFileExtension());
         try (PrintWriter writer = new PrintWriter(getOutputStream(txtFile, compression))) {
             if (header) {
-                writer.append(FIELD_COLUMN1 + "#" + FIELD_COLUMN2 + "#" + FIELD_COLUMN3 + "#" + FIELD_COLUMN4 + "\n");
+                String headerValue = String.join("#", FIELD_COLUMN1, FIELD_COLUMN2, FIELD_COLUMN3, FIELD_COLUMN4,
+                        FIELD_COLUMN5, FIELD_COLUMN6, FIELD_COLUMN7, FIELD_COLUMN8, FIELD_COLUMN9);
+                writer.append(headerValue + "\n");
             }
             IntStream.range(0, NUM_RECORDS).forEach(index -> {
-                String value = String.format("%d_%s", index, UUID.randomUUID());
-                writer.append(value + "#" + value + "#" + value + "#" + value + "\n");
+                String value = String.format("%d#%d#%d#%d#%f#%f#%s#%s#%s\n",
+                        (byte) 2, (short) 4, 8, 16L, 32.32f, 64.64d,
+                        true, "test bytes", "test string");
+                writer.append(value);
                 fsConfig.offsetsByIndex().put(index, (long) index);
             });
         }
@@ -43,16 +46,18 @@ protected Path createDataFile(ReaderFsTestConfig fsConfig, Object... args) throw
     public void readAllDataWithMalformedRows(ReaderFsTestConfig fsConfig) throws IOException {
         File tmp = File.createTempFile("test-", "." + getFileExtension());
         try (FileWriter writer = new FileWriter(tmp)) {
-            writer.append(FIELD_COLUMN1 + "," + FIELD_COLUMN2 + "," + FIELD_COLUMN3 + "," + FIELD_COLUMN4 + "\n");
-            writer.append("dummy,\"\",,dummy\n");
+            String headerValue = String.join(",", FIELD_COLUMN1, FIELD_COLUMN2, FIELD_COLUMN3, FIELD_COLUMN4,
+                    FIELD_COLUMN5, FIELD_COLUMN6, FIELD_COLUMN7, FIELD_COLUMN8, FIELD_COLUMN9);
+            writer.append(headerValue + "\n");
+            writer.append(",\"\",,,,,true,test bytes,test string\n");
             writer.append("#comment\n");
-            writer.append("dummy,\"\",,dummy\n");
+            writer.append(",\"\",,,,,true,test bytes,test string\n");
         }
         Map<String, Object> readerConfig = getReaderConfig();
         readerConfig.put(CsvFileReader.FILE_READER_DELIMITED_SETTINGS_FORMAT_DELIMITER, ",");
         readerConfig.put(CsvFileReader.FILE_READER_DELIMITED_SETTINGS_HEADER, "true");
-        readerConfig.put(CsvFileReader.FILE_READER_DELIMITED_SETTINGS_EMPTY_VALUE, "empty_value");
-        readerConfig.put(CsvFileReader.FILE_READER_DELIMITED_SETTINGS_NULL_VALUE, "null_value");
+        readerConfig.put(CsvFileReader.FILE_READER_DELIMITED_SETTINGS_EMPTY_VALUE, "10");
+        readerConfig.put(CsvFileReader.FILE_READER_DELIMITED_SETTINGS_NULL_VALUE, "100");
 
         Path path = new Path(new Path(fsConfig.getFsUri()), tmp.getName());
         fsConfig.getFs().moveFromLocalFile(new Path(tmp.getAbsolutePath()), path);
@@ -64,10 +69,15 @@ public void readAllDataWithMalformedRows(ReaderFsTestConfig fsConfig) throws IOE
         while (reader.hasNext()) {
             Struct record = reader.next();
             assertAll(
-                    () -> assertEquals("dummy", record.get(FIELD_COLUMN1)),
-                    () -> assertEquals("empty_value", record.get(FIELD_COLUMN2)),
-                    () -> assertEquals("null_value", record.get(FIELD_COLUMN3)),
-                    () -> assertEquals("dummy", record.get(FIELD_COLUMN4))
+                    () -> assertEquals(record.get(FIELD_COLUMN1), (byte) 100),
+                    () -> assertEquals(record.get(FIELD_COLUMN2), (short) 10),
+                    () -> assertEquals(record.get(FIELD_COLUMN3), 100),
+                    () -> assertEquals(record.get(FIELD_COLUMN4), 100L),
+                    () -> assertEquals(record.get(FIELD_COLUMN5), 100.00f),
+                    () -> assertEquals(record.get(FIELD_COLUMN6), 100.00d),
+                    () -> assertEquals(record.get(FIELD_COLUMN7), true),
+                    () -> assertEquals(new String((byte[]) record.get(FIELD_COLUMN8)), "test bytes"),
+                    () -> assertEquals(record.get(FIELD_COLUMN9), "test string")
             );
             recordCount++;
         }
@@ -79,6 +89,7 @@ protected Map<String, Object> getReaderConfig() {
         return new HashMap<String, Object>() {{
             put(CsvFileReader.FILE_READER_DELIMITED_SETTINGS_FORMAT_DELIMITER, "#");
             put(CsvFileReader.FILE_READER_DELIMITED_SETTINGS_HEADER, "true");
+            put(CsvFileReader.FILE_READER_DELIMITED_SETTINGS_SCHEMA, "byte,short,int,long,float,double,boolean,bytes,string");
         }};
     }
 }
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/FixedWidthFileReaderTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/FixedWidthFileReaderTest.java
index 6f0ff01..8b1fedc 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/FixedWidthFileReaderTest.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/FixedWidthFileReaderTest.java
@@ -8,13 +8,12 @@
 import java.util.Arrays;
 import java.util.HashMap;
 import java.util.Map;
-import java.util.UUID;
 import java.util.stream.Collectors;
 import java.util.stream.IntStream;
 
 public class FixedWidthFileReaderTest extends UnivocityFileReaderTest<FixedWidthFileReader> {
 
-    private static final int[] fieldLengths = new int[]{45, 53, 71, 89};
+    private static final int[] fieldLengths = new int[]{45, 53, 71, 89, 14, 44, 67, 46, 75};
 
     @Override
     protected Path createDataFile(ReaderFsTestConfig fsConfig, Object... args) throws IOException {
@@ -26,14 +25,24 @@ protected Path createDataFile(ReaderFsTestConfig fsConfig, Object... args) throw
                 writer.append(String.format("%-" + fieldLengths[0] + "s", FIELD_COLUMN1) +
                         String.format("%-" + fieldLengths[1] + "s", FIELD_COLUMN2) +
                         String.format("%-" + fieldLengths[2] + "s", FIELD_COLUMN3) +
-                        String.format("%-" + fieldLengths[3] + "s", FIELD_COLUMN4) + "\n");
+                        String.format("%-" + fieldLengths[3] + "s", FIELD_COLUMN4) +
+                        String.format("%-" + fieldLengths[4] + "s", FIELD_COLUMN5) +
+                        String.format("%-" + fieldLengths[5] + "s", FIELD_COLUMN6) +
+                        String.format("%-" + fieldLengths[6] + "s", FIELD_COLUMN7) +
+                        String.format("%-" + fieldLengths[7] + "s", FIELD_COLUMN8) +
+                        String.format("%-" + fieldLengths[8] + "s", FIELD_COLUMN9) + "\n");
             }
             IntStream.range(0, NUM_RECORDS).forEach(index -> {
-                String value = String.format("%d_%s", index, UUID.randomUUID());
-                writer.append(String.format("%-" + fieldLengths[0] + "s", value) +
-                        String.format("%-" + fieldLengths[1] + "s", value) +
-                        String.format("%-" + fieldLengths[2] + "s", value) +
-                        String.format("%-" + fieldLengths[3] + "s", value) + "\n");
+                writer.append(String.format("%-" + fieldLengths[0] + "s", String.format("%d", (byte) 2)) +
+                        String.format("%-" + fieldLengths[1] + "s", String.format("%d", (short) 4)) +
+                        String.format("%-" + fieldLengths[2] + "s", String.format("%d", 8)) +
+                        String.format("%-" + fieldLengths[3] + "s", String.format("%d", 16L)) +
+                        String.format("%-" + fieldLengths[4] + "s", String.format("%f", 32.32f)) +
+                        String.format("%-" + fieldLengths[5] + "s", String.format("%f", 64.64d)) +
+                        String.format("%-" + fieldLengths[6] + "s", String.format("%s", true)) +
+                        String.format("%-" + fieldLengths[7] + "s", String.format("%s", "test bytes")) +
+                        String.format("%-" + fieldLengths[8] + "s", String.format("%s", "test string")) + "\n"
+                );
                 fsConfig.offsetsByIndex().put(index, (long) index);
             });
         }
@@ -48,6 +57,7 @@ protected Map<String, Object> getReaderConfig() {
             put(FixedWidthFileReader.FILE_READER_DELIMITED_SETTINGS_HEADER, "true");
             put(FixedWidthFileReader.FILE_READER_DELIMITED_SETTINGS_FIELD_LENGTHS,
                     Arrays.stream(fieldLengths).mapToObj(String::valueOf).collect(Collectors.joining(",")));
+            put(FixedWidthFileReader.FILE_READER_DELIMITED_SETTINGS_SCHEMA, "byte,short,int,long,float,double,boolean,bytes,string");
         }};
     }
 
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/TsvFileReaderTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/TsvFileReaderTest.java
index 2f94f28..d82a50e 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/TsvFileReaderTest.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/TsvFileReaderTest.java
@@ -7,7 +7,6 @@
 import java.io.PrintWriter;
 import java.util.HashMap;
 import java.util.Map;
-import java.util.UUID;
 import java.util.stream.IntStream;
 
 public class TsvFileReaderTest extends UnivocityFileReaderTest<TsvFileReader> {
@@ -19,11 +18,15 @@ protected Path createDataFile(ReaderFsTestConfig fsConfig, Object... args) throw
         File txtFile = File.createTempFile("test-", "." + getFileExtension());
         try (PrintWriter writer = new PrintWriter(getOutputStream(txtFile, compression))) {
             if (header) {
-                writer.append(FIELD_COLUMN1 + "\t" + FIELD_COLUMN2 + "\t" + FIELD_COLUMN3 + "\t" + FIELD_COLUMN4 + "\n");
+                String headerValue = String.join("\t", FIELD_COLUMN1, FIELD_COLUMN2, FIELD_COLUMN3, FIELD_COLUMN4,
+                        FIELD_COLUMN5, FIELD_COLUMN6, FIELD_COLUMN7, FIELD_COLUMN8, FIELD_COLUMN9);
+                writer.append(headerValue + "\n");
             }
             IntStream.range(0, NUM_RECORDS).forEach(index -> {
-                String value = String.format("%d_%s", index, UUID.randomUUID());
-                writer.append(value + "\t" + value + "\t" + value + "\t" + value + "\n");
+                String value = String.format("%d\t%d\t%d\t%d\t%f\t%f\t%s\t%s\t%s\n",
+                        (byte) 2, (short) 4, 8, 16L, 32.32f, 64.64d,
+                        true, "test bytes", "test string");
+                writer.append(value);
                 fsConfig.offsetsByIndex().put(index, (long) index);
             });
         }
@@ -36,6 +39,7 @@ protected Path createDataFile(ReaderFsTestConfig fsConfig, Object... args) throw
     protected Map<String, Object> getReaderConfig() {
         return new HashMap<String, Object>() {{
             put(TsvFileReader.FILE_READER_DELIMITED_SETTINGS_HEADER, "true");
+            put(TsvFileReader.FILE_READER_DELIMITED_SETTINGS_SCHEMA, "byte,short,int,long,float,double,boolean,bytes,string");
         }};
     }
 }
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/UnivocityFileReaderTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/UnivocityFileReaderTest.java
index a5e8d9e..79663bc 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/UnivocityFileReaderTest.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/file/reader/UnivocityFileReaderTest.java
@@ -1,5 +1,6 @@
 package com.github.mmolimar.kafka.connect.fs.file.reader;
 
+import com.univocity.parsers.common.DataProcessingException;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.kafka.connect.data.Struct;
@@ -25,6 +26,11 @@ abstract class UnivocityFileReaderTest<T extends UnivocityFileReader> extends Fi
     protected static final String FIELD_COLUMN2 = "column_2";
     protected static final String FIELD_COLUMN3 = "column_3";
     protected static final String FIELD_COLUMN4 = "column_4";
+    protected static final String FIELD_COLUMN5 = "column_5";
+    protected static final String FIELD_COLUMN6 = "column_6";
+    protected static final String FIELD_COLUMN7 = "column_7";
+    protected static final String FIELD_COLUMN8 = "column_8";
+    protected static final String FIELD_COLUMN9 = "column_9";
     protected static final String FILE_EXTENSION = "tcsv";
     protected static final CompressionType COMPRESSION_TYPE_DEFAULT = CompressionType.NONE;
 
@@ -46,7 +52,14 @@ public void invalidFileFormat(ReaderFsTestConfig fsConfig) throws IOException {
         }
         Path path = new Path(new Path(fsConfig.getFsUri()), tmp.getName());
         fsConfig.getFs().moveFromLocalFile(new Path(tmp.getAbsolutePath()), path);
-        getReader(fsConfig.getFs(), path, getReaderConfig());
+        assertThrows(ConnectException.class, () -> getReader(fsConfig.getFs(), path, getReaderConfig()));
+        assertThrows(IllegalArgumentException.class, () -> {
+            try {
+                getReader(fsConfig.getFs(), path, getReaderConfig());
+            } catch (Exception ce) {
+                throw ce.getCause();
+            }
+        });
     }
 
     @ParameterizedTest
@@ -81,6 +94,68 @@ public void readAllDataWithoutHeader(ReaderFsTestConfig fsConfig) throws IOExcep
         assertEquals(NUM_RECORDS, recordCount, "The number of records in the file does not match");
     }
 
+    @ParameterizedTest
+    @MethodSource("fileSystemConfigProvider")
+    public void readAllDataWithoutSchema(ReaderFsTestConfig fsConfig) throws IOException {
+        Path file = createDataFile(fsConfig, true);
+        Map<String, Object> readerConfig = getReaderConfig();
+        readerConfig.remove(T.FILE_READER_DELIMITED_SETTINGS_SCHEMA);
+        FileReader reader = getReader(fsConfig.getFs(), file, readerConfig);
+
+        assertTrue(reader.hasNext());
+
+        int recordCount = 0;
+        while (reader.hasNext()) {
+            Struct record = reader.next();
+            checkDataString(record);
+            recordCount++;
+        }
+        assertEquals(NUM_RECORDS, recordCount, "The number of records in the file does not match");
+    }
+
+    @ParameterizedTest
+    @MethodSource("fileSystemConfigProvider")
+    public void readAllDataWithMappingErrors(ReaderFsTestConfig fsConfig) throws IOException {
+        Path file = createDataFile(fsConfig, true);
+        Map<String, Object> readerConfig = getReaderConfig();
+        readerConfig.put(T.FILE_READER_DELIMITED_SETTINGS_SCHEMA, "boolean,boolean,boolean,boolean,boolean,boolean,int,long,double");
+        FileReader reader = getReader(fsConfig.getFs(), file, readerConfig);
+
+        assertTrue(reader.hasNext());
+
+        int recordCount = 0;
+        while (reader.hasNext()) {
+            try {
+                reader.next();
+            } catch (Exception e) {
+                assertEquals(ConnectException.class, e.getClass());
+                assertEquals(DataProcessingException.class, e.getCause().getClass());
+            }
+            recordCount++;
+        }
+        assertEquals(NUM_RECORDS, recordCount, "The number of records in the file does not match");
+    }
+
+    @ParameterizedTest
+    @MethodSource("fileSystemConfigProvider")
+    public void readAllDataToleratingMappingErrors(ReaderFsTestConfig fsConfig) throws IOException {
+        Path file = createDataFile(fsConfig, true);
+        Map<String, Object> readerConfig = getReaderConfig();
+        readerConfig.put(T.FILE_READER_DELIMITED_SETTINGS_SCHEMA, "boolean,boolean,boolean,boolean,boolean,boolean,int,long,double");
+        readerConfig.put(T.FILE_READER_DELIMITED_SETTINGS_DATA_TYPE_MAPPING_ERROR, "false");
+        FileReader reader = getReader(fsConfig.getFs(), file, readerConfig);
+
+        assertTrue(reader.hasNext());
+
+        int recordCount = 0;
+        while (reader.hasNext()) {
+            Struct record = reader.next();
+            checkDataNull(record);
+            recordCount++;
+        }
+        assertEquals(NUM_RECORDS, recordCount, "The number of records in the file does not match");
+    }
+
     @ParameterizedTest
     @MethodSource("fileSystemConfigProvider")
     public void readDifferentCompressionTypes(ReaderFsTestConfig fsConfig) {
@@ -174,10 +249,43 @@ protected Class<? extends FileReader> getReaderClass() {
     @Override
     protected void checkData(Struct record, long index) {
         assertAll(
-                () -> assertTrue(record.get(FIELD_COLUMN1).toString().startsWith(index + "_")),
-                () -> assertTrue(record.get(FIELD_COLUMN2).toString().startsWith(index + "_")),
-                () -> assertTrue(record.get(FIELD_COLUMN3).toString().startsWith(index + "_")),
-                () -> assertTrue(record.get(FIELD_COLUMN4).toString().startsWith(index + "_"))
+                () -> assertEquals(record.get(FIELD_COLUMN1), (byte) 2),
+                () -> assertEquals(record.get(FIELD_COLUMN2), (short) 4),
+                () -> assertEquals(record.get(FIELD_COLUMN3), 8),
+                () -> assertEquals(record.get(FIELD_COLUMN4), 16L),
+                () -> assertEquals(record.get(FIELD_COLUMN5), 32.32f),
+                () -> assertEquals(record.get(FIELD_COLUMN6), 64.64d),
+                () -> assertEquals(record.get(FIELD_COLUMN7), true),
+                () -> assertEquals(new String((byte[]) record.get(FIELD_COLUMN8)), "test bytes"),
+                () -> assertEquals(record.get(FIELD_COLUMN9), "test string")
+        );
+    }
+
+    protected void checkDataString(Struct record) {
+        assertAll(
+                () -> assertEquals(record.get(FIELD_COLUMN1), "2"),
+                () -> assertEquals(record.get(FIELD_COLUMN2), "4"),
+                () -> assertEquals(record.get(FIELD_COLUMN3), "8"),
+                () -> assertEquals(record.get(FIELD_COLUMN4), "16"),
+                () -> assertEquals(record.get(FIELD_COLUMN5), "32.320000"),
+                () -> assertEquals(record.get(FIELD_COLUMN6), "64.640000"),
+                () -> assertEquals(record.get(FIELD_COLUMN7), "true"),
+                () -> assertEquals(record.get(FIELD_COLUMN8), "test bytes"),
+                () -> assertEquals(record.get(FIELD_COLUMN9), "test string")
+        );
+    }
+
+    protected void checkDataNull(Struct record) {
+        assertAll(
+                () -> assertEquals(record.get(FIELD_COLUMN1), null),
+                () -> assertEquals(record.get(FIELD_COLUMN2), null),
+                () -> assertEquals(record.get(FIELD_COLUMN3), null),
+                () -> assertEquals(record.get(FIELD_COLUMN4), null),
+                () -> assertEquals(record.get(FIELD_COLUMN5), null),
+                () -> assertEquals(record.get(FIELD_COLUMN6), null),
+                () -> assertEquals(record.get(FIELD_COLUMN7), null),
+                () -> assertEquals(record.get(FIELD_COLUMN8), null),
+                () -> assertEquals(record.get(FIELD_COLUMN9), null)
         );
     }
 

From d8907091a1740151230475ff4697864f124001e7 Mon Sep 17 00:00:00 2001
From: Mario Molina <mmolimar@gmail.com>
Date: Sun, 19 Apr 2020 14:34:08 -0500
Subject: [PATCH 38/51] Allow nullable fields in Univocity file readers

---
 .../fs/file/reader/UnivocityFileReader.java   | 27 ++++++++++++-------
 1 file changed, 18 insertions(+), 9 deletions(-)

diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/UnivocityFileReader.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/UnivocityFileReader.java
index fb93116..25a685d 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/UnivocityFileReader.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/UnivocityFileReader.java
@@ -39,6 +39,7 @@ abstract class UnivocityFileReader<T extends CommonParserSettings<?>>
     public static final String FILE_READER_DELIMITED_SETTINGS_HEADER = FILE_READER_DELIMITED_SETTINGS + "header";
     public static final String FILE_READER_DELIMITED_SETTINGS_SCHEMA = FILE_READER_DELIMITED_SETTINGS + "schema";
     public static final String FILE_READER_DELIMITED_SETTINGS_DATA_TYPE_MAPPING_ERROR = FILE_READER_DELIMITED_SETTINGS + "data_type_mapping_error";
+    public static final String FILE_READER_DELIMITED_SETTINGS_ALLOW_NULLS = FILE_READER_DELIMITED_SETTINGS + "allow_nulls";
     public static final String FILE_READER_DELIMITED_SETTINGS_HEADER_NAMES = FILE_READER_DELIMITED_SETTINGS + "header_names";
     public static final String FILE_READER_DELIMITED_SETTINGS_LINE_SEPARATOR_DETECTION = FILE_READER_DELIMITED_SETTINGS + "line_separator_detection";
     public static final String FILE_READER_DELIMITED_SETTINGS_NULL_VALUE = FILE_READER_DELIMITED_SETTINGS + "null_value";
@@ -62,6 +63,7 @@ abstract class UnivocityFileReader<T extends CommonParserSettings<?>>
     private Charset charset;
     private CompressionType compression;
     private boolean dataTypeMappingError;
+    private boolean allowNulls;
     private boolean closed;
 
     private ResultIterator<Record, ParsingContext> iterator;
@@ -113,6 +115,13 @@ protected void configure(Map<String, String> config) {
         this.settings = allSettings(config);
         this.dataTypeMappingError = Boolean.parseBoolean(
                 config.getOrDefault(FILE_READER_DELIMITED_SETTINGS_DATA_TYPE_MAPPING_ERROR, "true"));
+        if (this.dataTypeMappingError) {
+            this.allowNulls = Boolean.parseBoolean(
+                    config.getOrDefault(FILE_READER_DELIMITED_SETTINGS_ALLOW_NULLS, "false"));
+        } else {
+            this.allowNulls = true;
+        }
+
     }
 
     private List<Schema> getDataTypes(Map<String, Object> config, String[] headers) {
@@ -135,24 +144,24 @@ private List<Schema> getDataTypes(Map<String, Object> config, String[] headers)
     private Schema strToSchema(String dataType) {
         switch (DataType.valueOf(dataType.trim().toUpperCase())) {
             case BYTE:
-                return this.dataTypeMappingError ? Schema.INT8_SCHEMA : Schema.OPTIONAL_INT8_SCHEMA;
+                return dataTypeMappingError && !allowNulls ? Schema.INT8_SCHEMA : Schema.OPTIONAL_INT8_SCHEMA;
             case SHORT:
-                return this.dataTypeMappingError ? Schema.INT16_SCHEMA : Schema.OPTIONAL_INT16_SCHEMA;
+                return dataTypeMappingError && !allowNulls ? Schema.INT16_SCHEMA : Schema.OPTIONAL_INT16_SCHEMA;
             case INT:
-                return this.dataTypeMappingError ? Schema.INT32_SCHEMA : Schema.OPTIONAL_INT32_SCHEMA;
+                return dataTypeMappingError && !allowNulls ? Schema.INT32_SCHEMA : Schema.OPTIONAL_INT32_SCHEMA;
             case LONG:
-                return this.dataTypeMappingError ? Schema.INT64_SCHEMA : Schema.OPTIONAL_INT64_SCHEMA;
+                return dataTypeMappingError && !allowNulls ? Schema.INT64_SCHEMA : Schema.OPTIONAL_INT64_SCHEMA;
             case FLOAT:
-                return this.dataTypeMappingError ? Schema.FLOAT32_SCHEMA : Schema.OPTIONAL_FLOAT32_SCHEMA;
+                return dataTypeMappingError && !allowNulls ? Schema.FLOAT32_SCHEMA : Schema.OPTIONAL_FLOAT32_SCHEMA;
             case DOUBLE:
-                return this.dataTypeMappingError ? Schema.FLOAT64_SCHEMA : Schema.OPTIONAL_FLOAT64_SCHEMA;
+                return dataTypeMappingError && !allowNulls ? Schema.FLOAT64_SCHEMA : Schema.OPTIONAL_FLOAT64_SCHEMA;
             case BOOLEAN:
-                return this.dataTypeMappingError ? Schema.BOOLEAN_SCHEMA : Schema.OPTIONAL_BOOLEAN_SCHEMA;
+                return dataTypeMappingError && !allowNulls ? Schema.BOOLEAN_SCHEMA : Schema.OPTIONAL_BOOLEAN_SCHEMA;
             case BYTES:
-                return this.dataTypeMappingError ? Schema.BYTES_SCHEMA : Schema.OPTIONAL_BYTES_SCHEMA;
+                return dataTypeMappingError && !allowNulls ? Schema.BYTES_SCHEMA : Schema.OPTIONAL_BYTES_SCHEMA;
             case STRING:
             default:
-                return this.dataTypeMappingError ? Schema.STRING_SCHEMA : Schema.OPTIONAL_STRING_SCHEMA;
+                return dataTypeMappingError && !allowNulls ? Schema.STRING_SCHEMA : Schema.OPTIONAL_STRING_SCHEMA;
         }
     }
 

From b679d1536fcd48fa229127759069cc44954668be Mon Sep 17 00:00:00 2001
From: Mario Molina <mmolimar@gmail.com>
Date: Tue, 21 Apr 2020 21:43:21 -0500
Subject: [PATCH 39/51] Support por Google Cloud Storage, Azure Blob Storage
 and Azure Data Lake Store

---
 pom.xml                                          | 16 ++++++++++++++++
 .../services/org.apache.hadoop.fs.FileSystem     | 15 +++++++++++++++
 2 files changed, 31 insertions(+)
 create mode 100644 src/main/resources/META-INF/services/org.apache.hadoop.fs.FileSystem

diff --git a/pom.xml b/pom.xml
index 130728a..6ff4fbd 100644
--- a/pom.xml
+++ b/pom.xml
@@ -14,6 +14,7 @@
         <kafka.version>2.4.1</kafka.version>
         <confluent.version>5.4.1</confluent.version>
         <hadoop.version>3.2.1</hadoop.version>
+        <gcs-connector.version>hadoop3-2.1.2</gcs-connector.version>
         <parquet.version>1.11.0</parquet.version>
         <univocity.version>2.8.4</univocity.version>
         <cron-utils.version>9.0.2</cron-utils.version>
@@ -52,6 +53,21 @@
             <artifactId>hadoop-aws</artifactId>
             <version>${hadoop.version}</version>
         </dependency>
+        <dependency>
+            <groupId>org.apache.hadoop</groupId>
+            <artifactId>hadoop-azure</artifactId>
+            <version>${hadoop.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.hadoop</groupId>
+            <artifactId>hadoop-azure-datalake</artifactId>
+            <version>${hadoop.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>com.google.cloud.bigdataoss</groupId>
+            <artifactId>gcs-connector</artifactId>
+            <version>${gcs-connector.version}</version>
+        </dependency>
         <dependency>
             <groupId>org.apache.parquet</groupId>
             <artifactId>parquet-avro</artifactId>
diff --git a/src/main/resources/META-INF/services/org.apache.hadoop.fs.FileSystem b/src/main/resources/META-INF/services/org.apache.hadoop.fs.FileSystem
new file mode 100644
index 0000000..de86f4a
--- /dev/null
+++ b/src/main/resources/META-INF/services/org.apache.hadoop.fs.FileSystem
@@ -0,0 +1,15 @@
+org.apache.hadoop.fs.LocalFileSystem
+org.apache.hadoop.fs.viewfs.ViewFileSystem
+org.apache.hadoop.fs.HarFileSystem
+org.apache.hadoop.fs.http.HttpFileSystem
+org.apache.hadoop.fs.http.HttpsFileSystem
+org.apache.hadoop.fs.ftp.FTPFileSystem
+org.apache.hadoop.hdfs.DistributedFileSystem
+org.apache.hadoop.fs.s3a.S3AFileSystem
+org.apache.hadoop.fs.s3native.NativeS3FileSystem
+org.apache.hadoop.fs.adl.AdlFileSystem
+org.apache.hadoop.fs.azure.NativeAzureFileSystem
+org.apache.hadoop.fs.azure.NativeAzureFileSystem$Secure
+org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem
+org.apache.hadoop.fs.azurebfs.SecureAzureBlobFileSystem
+com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystem

From 64624352b36744818427c11158486ccb1b8e5678 Mon Sep 17 00:00:00 2001
From: Mario Molina <mmolimar@gmail.com>
Date: Tue, 21 Apr 2020 21:44:34 -0500
Subject: [PATCH 40/51] Set next offset to source record to commit

---
 .../com/github/mmolimar/kafka/connect/fs/FsSourceTask.java  | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/FsSourceTask.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/FsSourceTask.java
index 51a9e3d..a5ceb6c 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/FsSourceTask.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/FsSourceTask.java
@@ -70,7 +70,7 @@ public List<SourceRecord> poll() {
                 try (FileReader reader = policy.offer(metadata, context.offsetStorageReader())) {
                     log.info("Processing records for file {}", metadata);
                     while (reader.hasNext()) {
-                        records.add(convert(metadata, reader.currentOffset(), reader.next()));
+                        records.add(convert(metadata, reader.currentOffset() + 1, reader.next()));
                     }
                 } catch (ConnectException | IOException e) {
                     //when an exception happens reading a file, the connector continues
@@ -85,9 +85,7 @@ public List<SourceRecord> poll() {
     private Stream<FileMetadata> filesToProcess() {
         try {
             return asStream(policy.execute())
-                    .filter(metadata -> metadata.getLen() > 0)
-                    .collect(Collectors.toList())
-                    .stream();
+                    .filter(metadata -> metadata.getLen() > 0);
         } catch (IOException | ConnectException e) {
             //when an exception happens executing the policy, the connector continues
             log.error("Cannot retrieve files to process from the FS: {}. " +

From c9372dfa696230b071b27b634f81f875cc5a457a Mon Sep 17 00:00:00 2001
From: Mario Molina <mmolimar@gmail.com>
Date: Fri, 24 Apr 2020 19:31:40 -0500
Subject: [PATCH 41/51] Upgrade to Kafka 2.5.0 and Confluent 5.5.0

---
 pom.xml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pom.xml b/pom.xml
index 6ff4fbd..699c039 100644
--- a/pom.xml
+++ b/pom.xml
@@ -11,8 +11,8 @@
 
     <properties>
         <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
-        <kafka.version>2.4.1</kafka.version>
-        <confluent.version>5.4.1</confluent.version>
+        <kafka.version>2.5.0</kafka.version>
+        <confluent.version>5.5.0</confluent.version>
         <hadoop.version>3.2.1</hadoop.version>
         <gcs-connector.version>hadoop3-2.1.2</gcs-connector.version>
         <parquet.version>1.11.0</parquet.version>

From 26b3f0ef7ab84e0f5f0fdf396d656d93866c77ed Mon Sep 17 00:00:00 2001
From: Mario Molina <mmolimar@gmail.com>
Date: Fri, 24 Apr 2020 21:04:03 -0500
Subject: [PATCH 42/51] Updating documentation

---
 docs/source/config_options.rst | 72 ++++++++++++++++++++++++++++++++++
 docs/source/connector.rst      | 13 +++---
 2 files changed, 80 insertions(+), 5 deletions(-)

diff --git a/docs/source/config_options.rst b/docs/source/config_options.rst
index 70c8b6c..1cce34f 100644
--- a/docs/source/config_options.rst
+++ b/docs/source/config_options.rst
@@ -315,6 +315,30 @@ To configure custom properties for this reader, the name you must use is ``delim
   * Default: ``false``
   * Importance: high
 
+``file_reader.delimited.settings.schema``
+  A comma-separated list of ordered data types for each field in the file. Possible values: ``byte``, ``short``,
+  ``int``, ``long``, ``float``, ``double``, ``boolean``, ``bytes`` and ``string``)
+
+  * Type: string[]
+  * Default: ``null``
+  * Importance: medium
+
+``file_reader.delimited.settings.data_type_mapping_error``
+  Flag to enable/disable throwing errors when mapping data types based on the schema is not possible. If disabled,
+  the returned value which could not be mapped will be ``null``.
+
+  * Type: boolean
+  * Default: ``true``
+  * Importance: medium
+
+``file_reader.delimited.settings.allow_nulls``
+  If the schema supports nullable fields. If ``file_reader.delimited.settings.data_type_mapping_error`` config flag is
+  disabled, the value set for this config will be ignored and set to ``true``.
+
+  * Type: boolean
+  * Default: ``false``
+  * Importance: medium
+
 ``file_reader.delimited.settings.header_names``
   A comma-separated list of ordered field names to set when reading a file.
 
@@ -456,6 +480,30 @@ To configure custom properties for this reader, the name you must use is ``delim
   * Default: ``false``
   * Importance: high
 
+``file_reader.delimited.settings.schema``
+  A comma-separated list of ordered data types for each field in the file. Possible values: ``byte``, ``short``,
+  ``int``, ``long``, ``float``, ``double``, ``boolean``, ``bytes`` and ``string``)
+
+  * Type: string[]
+  * Default: ``null``
+  * Importance: medium
+
+``file_reader.delimited.settings.data_type_mapping_error``
+  Flag to enable/disable throwing errors when mapping data types based on the schema is not possible. If disabled,
+  the returned value which could not be mapped will be ``null``.
+
+  * Type: boolean
+  * Default: ``true``
+  * Importance: medium
+
+``file_reader.delimited.settings.allow_nulls``
+  If the schema supports nullable fields. If ``file_reader.delimited.settings.data_type_mapping_error`` config flag is
+  disabled, the value set for this config will be ignored and set to ``true``.
+
+  * Type: boolean
+  * Default: ``false``
+  * Importance: medium
+
 ``file_reader.delimited.settings.header_names``
   A comma-separated list of ordered field names to set when reading a file.
 
@@ -590,6 +638,30 @@ To configure custom properties for this reader, the name you must use is ``delim
   * Default: ``false``
   * Importance: high
 
+``file_reader.delimited.settings.schema``
+  A comma-separated list of ordered data types for each field in the file. Possible values: ``byte``, ``short``,
+  ``int``, ``long``, ``float``, ``double``, ``boolean``, ``bytes`` and ``string``)
+
+  * Type: string[]
+  * Default: ``null``
+  * Importance: medium
+
+``file_reader.delimited.settings.data_type_mapping_error``
+  Flag to enable/disable throwing errors when mapping data types based on the schema is not possible. If disabled,
+  the returned value which could not be mapped will be ``null``.
+
+  * Type: boolean
+  * Default: ``true``
+  * Importance: medium
+
+``file_reader.delimited.settings.allow_nulls``
+  If the schema supports nullable fields. If ``file_reader.delimited.settings.data_type_mapping_error`` config flag is
+  disabled, the value set for this config will be ignored and set to ``true``.
+
+  * Type: boolean
+  * Default: ``false``
+  * Importance: medium
+
 ``file_reader.delimited.settings.header_names``
   A comma-separated list of ordered field names to set when reading a file.
 
diff --git a/docs/source/connector.rst b/docs/source/connector.rst
index 6c79317..0e02451 100644
--- a/docs/source/connector.rst
+++ b/docs/source/connector.rst
@@ -12,9 +12,11 @@ of this abstraction and using it in a transparent way.
 Among others, these are some file systems it supports:
 
 * HDFS.
-* WebHDFS.
 * S3.
-* FTP and SFTP.
+* Google Cloud Storage.
+* Azure Blob Storage & Azure Data Lake Store.
+* FTP.
+* WebHDFS.
 * Local File System.
 * Hadoop Archive File System.
 
@@ -24,8 +26,9 @@ Getting started
 Prerequisites
 --------------------------------------------
 
--  Confluent Platform 5.4.1
+-  Apache Kafka 2.5.0
 -  Java 8
+-  Confluent Schema Registry (recommended).
 
 Building from source
 --------------------------------------------
@@ -72,13 +75,13 @@ Running in development
 
 .. sourcecode:: bash
 
-   export CONFLUENT_HOME=/path/to/confluent/install/dir
+   export KAFKA_HOME=/path/to/kafka/install/dir
 
 .. sourcecode:: bash
 
    mvn clean package
    export CLASSPATH="$(find target/ -type f -name '*.jar'| grep '\-package' | tr '\n' ':')"
-   $CONFLUENT_HOME/bin/connect-standalone $CONFLUENT_HOME/etc/schema-registry/connect-avro-standalone.properties config/kafka-connect-fs.properties
+   $KAFKA_HOME/bin/connect-distributed.sh config/kafka-connect-fs.properties
 
 Components
 ============================================

From 6da68ba3d10b7f92704d2a182eb7b967e8fa6ce8 Mon Sep 17 00:00:00 2001
From: Mario Molina <mmolimar@gmail.com>
Date: Sat, 25 Apr 2020 14:11:21 -0500
Subject: [PATCH 43/51] Updating license file

---
 LICENSE | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/LICENSE b/LICENSE
index d645695..1c358a6 100644
--- a/LICENSE
+++ b/LICENSE
@@ -187,7 +187,7 @@
       same "printed page" as the copyright notice for easier
       identification within third-party archives.
 
-   Copyright [yyyy] [name of copyright owner]
+   Copyright 2017 Mario Molina
 
    Licensed under the Apache License, Version 2.0 (the "License");
    you may not use this file except in compliance with the License.

From e90f36a60652b3e268506ec7f177b3d6cb0d069b Mon Sep 17 00:00:00 2001
From: Mario Molina <mmolimar@gmail.com>
Date: Sun, 26 Apr 2020 17:32:42 -0500
Subject: [PATCH 44/51] Poll interval ms config

---
 .../kafka/connect/fs/FsSourceConnector.java   |  5 +-
 .../connect/fs/FsSourceConnectorConfig.java   | 31 +++++++--
 .../kafka/connect/fs/FsSourceTask.java        | 61 +++++++++++-----
 .../kafka/connect/fs/FsSourceTaskConfig.java  | 69 +++++++++++++++++--
 4 files changed, 138 insertions(+), 28 deletions(-)

diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/FsSourceConnector.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/FsSourceConnector.java
index 3689452..839477b 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/FsSourceConnector.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/FsSourceConnector.java
@@ -50,8 +50,9 @@ public List<Map<String, String>> taskConfigs(int maxTasks) {
         }
         final List<Map<String, String>> taskConfigs = new ArrayList<>();
 
-        int groups = Math.min(config.getFsUris().size(), maxTasks);
-        ConnectorUtils.groupPartitions(config.getFsUris(), groups)
+        List<String> fsUris = config.getFsUris();
+        int groups = Math.min(fsUris.size(), maxTasks);
+        ConnectorUtils.groupPartitions(fsUris, groups)
                 .forEach(dirs -> {
                     Map<String, String> taskProps = new HashMap<>(config.originalsStrings());
                     taskProps.put(FsSourceConnectorConfig.FS_URIS, String.join(",", dirs));
diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/FsSourceConnectorConfig.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/FsSourceConnectorConfig.java
index d20069f..3a3f1ad 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/FsSourceConnectorConfig.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/FsSourceConnectorConfig.java
@@ -13,9 +13,13 @@ public class FsSourceConnectorConfig extends AbstractConfig {
 
     public static final String FS_URIS = "fs.uris";
     private static final String FS_URIS_DOC = "Comma-separated URIs of the FS(s).";
+    private static final String FS_URIS_DISPLAY = "File system URIs";
 
     public static final String TOPIC = "topic";
     private static final String TOPIC_DOC = "Topic to copy data to.";
+    private static final String TOPIC_DISPLAY = "Topic";
+
+    private static final String CONNECTOR_GROUP = "Connector";
 
     public FsSourceConnectorConfig(ConfigDef config, Map<String, String> parsedConfig) {
         super(config, parsedConfig);
@@ -26,9 +30,29 @@ public FsSourceConnectorConfig(Map<String, String> parsedConfig) {
     }
 
     public static ConfigDef conf() {
+        int order = 0;
         return new ConfigDef()
-                .define(FS_URIS, Type.LIST, Importance.HIGH, FS_URIS_DOC)
-                .define(TOPIC, Type.STRING, Importance.HIGH, TOPIC_DOC);
+                .define(
+                        FS_URIS,
+                        Type.LIST,
+                        ConfigDef.NO_DEFAULT_VALUE,
+                        Importance.HIGH,
+                        FS_URIS_DOC,
+                        CONNECTOR_GROUP,
+                        ++order,
+                        ConfigDef.Width.LONG,
+                        FS_URIS_DISPLAY
+                ).define(
+                        TOPIC,
+                        Type.STRING,
+                        ConfigDef.NO_DEFAULT_VALUE,
+                        Importance.HIGH,
+                        TOPIC_DOC,
+                        CONNECTOR_GROUP,
+                        ++order,
+                        ConfigDef.Width.LONG,
+                        TOPIC_DISPLAY
+                );
     }
 
     public List<String> getFsUris() {
@@ -38,5 +62,4 @@ public List<String> getFsUris() {
     public String getTopic() {
         return this.getString(TOPIC);
     }
-
-}
\ No newline at end of file
+}
diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/FsSourceTask.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/FsSourceTask.java
index a5ceb6c..c3fa38f 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/FsSourceTask.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/FsSourceTask.java
@@ -6,6 +6,8 @@
 import com.github.mmolimar.kafka.connect.fs.util.ReflectionUtils;
 import com.github.mmolimar.kafka.connect.fs.util.Version;
 import org.apache.kafka.common.config.ConfigException;
+import org.apache.kafka.common.utils.SystemTime;
+import org.apache.kafka.common.utils.Time;
 import org.apache.kafka.connect.data.Struct;
 import org.apache.kafka.connect.errors.ConnectException;
 import org.apache.kafka.connect.source.SourceRecord;
@@ -24,9 +26,17 @@ public class FsSourceTask extends SourceTask {
 
     private static final Logger log = LoggerFactory.getLogger(FsSourceTask.class);
 
-    private final AtomicBoolean stop = new AtomicBoolean(false);
+    private final AtomicBoolean stop;
+    private final Time time;
+
     private FsSourceTaskConfig config;
     private Policy policy;
+    private int pollInterval;
+
+    public FsSourceTask() {
+        this.stop = new AtomicBoolean(false);
+        this.time = new SystemTime();
+    }
 
     @Override
     public String version() {
@@ -48,36 +58,47 @@ public void start(Map<String, String> properties) {
             }
 
             Class<Policy> policyClass = (Class<Policy>) Class.forName(properties.get(FsSourceTaskConfig.POLICY_CLASS));
-            FsSourceTaskConfig taskConfig = new FsSourceTaskConfig(properties);
-            policy = ReflectionUtils.makePolicy(policyClass, taskConfig);
+            policy = ReflectionUtils.makePolicy(policyClass, config);
+            pollInterval = config.getInt(FsSourceTaskConfig.POLL_INTERVAL_MS);
         } catch (ConfigException ce) {
-            log.error("Couldn't start FsSourceTask:", ce);
-            throw new ConnectException("Couldn't start FsSourceTask due to configuration error", ce);
+            log.error("Couldn't start FsSourceTask.", ce);
+            throw new ConnectException("Couldn't start FsSourceTask due to configuration error: " + ce.getMessage(), ce);
         } catch (Exception e) {
-            log.error("Couldn't start FsSourceConnector:", e);
-            throw new ConnectException("A problem has occurred reading configuration: " + e.getMessage());
+            log.error("Couldn't start FsSourceConnector.", e);
+            throw new ConnectException("A problem has occurred reading configuration: " + e.getMessage(), e);
         }
-        log.info("FS source task started with policy {}", policy.getClass().getName());
+        log.info("FS source task started with policy [{}].", policy.getClass().getName());
     }
 
     @Override
     public List<SourceRecord> poll() {
         while (!stop.get() && policy != null && !policy.hasEnded()) {
-            log.trace("Polling for new data");
+            log.trace("Polling for new data...");
 
-            return filesToProcess().map(metadata -> {
+            List<SourceRecord> totalRecords = filesToProcess().map(metadata -> {
                 List<SourceRecord> records = new ArrayList<>();
                 try (FileReader reader = policy.offer(metadata, context.offsetStorageReader())) {
-                    log.info("Processing records for file {}", metadata);
+                    log.info("Processing records for file {}.", metadata);
                     while (reader.hasNext()) {
                         records.add(convert(metadata, reader.currentOffset() + 1, reader.next()));
                     }
                 } catch (ConnectException | IOException e) {
                     //when an exception happens reading a file, the connector continues
-                    log.error("Error reading file from FS: " + metadata.getPath() + ". Keep going...", e);
+                    log.error("Error reading file [{}]. Keep going...", metadata.getPath(), e);
                 }
+                log.debug("Read [{}] records from file [{}].", records.size(), metadata.getPath());
+
                 return records;
             }).flatMap(Collection::stream).collect(Collectors.toList());
+
+            log.debug("Returning [{}] records in execution number [{}] for policy [{}].",
+                    totalRecords.size(), policy.getExecutions(), policy.getClass().getName());
+
+            return totalRecords;
+        }
+        if (pollInterval > 0) {
+            log.trace("Waiting [{}] ms for next poll.", pollInterval);
+            time.sleep(pollInterval);
         }
         return null;
     }
@@ -89,8 +110,8 @@ private Stream<FileMetadata> filesToProcess() {
         } catch (IOException | ConnectException e) {
             //when an exception happens executing the policy, the connector continues
             log.error("Cannot retrieve files to process from the FS: {}. " +
-                    "There was an error executing the policy but the task tolerates this and continues. " +
-                    e.getMessage(), policy.getURIs(), e);
+                            "There was an error executing the policy but the task tolerates this and continues.",
+                    policy.getURIs(), e);
             return Stream.empty();
         }
     }
@@ -112,10 +133,16 @@ private SourceRecord convert(FileMetadata metadata, long offset, Struct struct)
 
     @Override
     public void stop() {
-        log.info("Stopping FS source task.");
+        log.info("Stopping FS source task...");
         stop.set(true);
-        if (policy != null) {
-            policy.interrupt();
+        synchronized (this) {
+            if (policy != null) {
+                try {
+                    policy.close();
+                } catch (IOException ioe) {
+                    log.warn("Error closing policy [{}].", policy.getClass().getName(), ioe);
+                }
+            }
         }
     }
 }
diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/FsSourceTaskConfig.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/FsSourceTaskConfig.java
index 3b1f4a5..58231fd 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/FsSourceTaskConfig.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/FsSourceTaskConfig.java
@@ -11,17 +11,29 @@ public class FsSourceTaskConfig extends FsSourceConnectorConfig {
 
     public static final String POLICY_CLASS = POLICY_PREFIX + "class";
     private static final String POLICY_CLASS_DOC = "Policy class to apply to this task.";
+    private static final String POLICY_CLASS_DISPLAY = "Policy";
 
     public static final String POLICY_RECURSIVE = POLICY_PREFIX + "recursive";
     private static final String POLICY_RECURSIVE_DOC = "Flag to activate traversed recursion in subdirectories when listing files.";
+    private static final String POLICY_RECURSIVE_DISPLAY = "Recursive directory listing";
 
     public static final String POLICY_REGEXP = POLICY_PREFIX + "regexp";
     private static final String POLICY_REGEXP_DOC = "Regular expression to filter files from the FS.";
+    private static final String POLICY_REGEXP_DISPLAY = "File filter regex";
 
     public static final String POLICY_PREFIX_FS = POLICY_PREFIX + "fs.";
 
     public static final String FILE_READER_CLASS = FILE_READER_PREFIX + "class";
     private static final String FILE_READER_CLASS_DOC = "File reader class to read files from the FS.";
+    private static final String FILE_READER_CLASS_DISPLAY = "File reader class";
+
+    public static final String POLL_INTERVAL_MS = "poll.interval.ms";
+    private static final String POLL_INTERVAL_MS_DOC = "Frequency in ms to poll for new data.";
+    public static final int POLL_INTERVAL_MS_DEFAULT = 10000;
+    private static final String POLL_INTERVAL_MS_DISPLAY = "Poll Interval (ms)";
+
+    private static final String POLICY_GROUP = "Policy";
+    private static final String CONNECTOR_GROUP = "Connector";
 
     public FsSourceTaskConfig(ConfigDef config, Map<String, String> parsedConfig) {
         super(config, parsedConfig);
@@ -32,11 +44,58 @@ public FsSourceTaskConfig(Map<String, String> parsedConfig) {
     }
 
     public static ConfigDef conf() {
+        int order = 0;
         return FsSourceConnectorConfig.conf()
-                .define(POLICY_CLASS, ConfigDef.Type.CLASS, ConfigDef.Importance.HIGH, POLICY_CLASS_DOC)
-                .define(POLICY_RECURSIVE, ConfigDef.Type.BOOLEAN, Boolean.TRUE, ConfigDef.Importance.LOW, POLICY_RECURSIVE_DOC)
-                .define(POLICY_REGEXP, ConfigDef.Type.STRING, ".*", ConfigDef.Importance.MEDIUM, POLICY_REGEXP_DOC)
-                .define(FILE_READER_CLASS, ConfigDef.Type.CLASS, ConfigDef.Importance.HIGH, FILE_READER_CLASS_DOC);
+                .define(
+                        POLICY_CLASS,
+                        ConfigDef.Type.CLASS,
+                        ConfigDef.NO_DEFAULT_VALUE,
+                        ConfigDef.Importance.HIGH,
+                        POLICY_CLASS_DOC,
+                        POLICY_GROUP,
+                        ++order,
+                        ConfigDef.Width.MEDIUM,
+                        POLICY_CLASS_DISPLAY
+                ).define(
+                        POLICY_RECURSIVE,
+                        ConfigDef.Type.BOOLEAN,
+                        Boolean.TRUE,
+                        ConfigDef.Importance.MEDIUM,
+                        POLICY_RECURSIVE_DOC,
+                        POLICY_GROUP,
+                        ++order,
+                        ConfigDef.Width.SHORT,
+                        POLICY_RECURSIVE_DISPLAY
+                ).define(
+                        POLICY_REGEXP,
+                        ConfigDef.Type.STRING,
+                        ".*",
+                        ConfigDef.Importance.MEDIUM,
+                        POLICY_REGEXP_DOC,
+                        POLICY_GROUP,
+                        ++order,
+                        ConfigDef.Width.MEDIUM,
+                        POLICY_REGEXP_DISPLAY
+                ).define(
+                        FILE_READER_CLASS,
+                        ConfigDef.Type.CLASS,
+                        ConfigDef.NO_DEFAULT_VALUE,
+                        ConfigDef.Importance.HIGH,
+                        FILE_READER_CLASS_DOC,
+                        POLICY_GROUP,
+                        ++order,
+                        ConfigDef.Width.MEDIUM,
+                        FILE_READER_CLASS_DISPLAY
+                ).define(
+                        POLL_INTERVAL_MS,
+                        ConfigDef.Type.INT,
+                        POLL_INTERVAL_MS_DEFAULT,
+                        ConfigDef.Importance.MEDIUM,
+                        POLL_INTERVAL_MS_DOC,
+                        CONNECTOR_GROUP,
+                        ++order,
+                        ConfigDef.Width.SHORT,
+                        POLL_INTERVAL_MS_DISPLAY
+                );
     }
-
 }

From a8e4ca3a77ea76e1f6ffe9a54cf28c181a77f839 Mon Sep 17 00:00:00 2001
From: Mario Molina <mmolimar@gmail.com>
Date: Sun, 26 Apr 2020 17:34:03 -0500
Subject: [PATCH 45/51] Retrievable connections in HDFS file watcher

---
 .../fs/file/reader/AbstractFileReader.java    |   5 +-
 .../connect/fs/policy/AbstractPolicy.java     |  11 +-
 .../kafka/connect/fs/policy/CronPolicy.java   |  12 +-
 .../fs/policy/HdfsFileWatcherPolicy.java      | 139 +++++++++++-------
 .../kafka/connect/fs/policy/Policy.java       |   2 +
 .../fs/policy/HdfsFileWatcherPolicyTest.java  |  60 ++++++++
 6 files changed, 161 insertions(+), 68 deletions(-)

diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/AbstractFileReader.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/AbstractFileReader.java
index fec6b73..d63283f 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/AbstractFileReader.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/file/reader/AbstractFileReader.java
@@ -33,7 +33,7 @@ public AbstractFileReader(FileSystem fs, Path filePath, ReaderAdapter<T> adapter
         this.offset = 0;
 
         configure(readerConfig(config));
-        log.trace("Initialized file reader {} for file {}", getClass(), filePath);
+        log.trace("Initialized file reader [{}] for file [{}].", getClass().getName(), filePath);
     }
 
     protected final Map<String, String> readerConfig(Map<String, Object> config) {
@@ -88,10 +88,7 @@ public final void seek(long offset) {
         }
         checkClosed();
         try {
-            log.debug("Seeking file {} to offset {}.", filePath, offset);
             seekFile(offset);
-        } catch (ConnectException ce) {
-            throw ce;
         } catch (IOException ioe) {
             throw new ConnectException("Error seeking file: " + getFilePath(), ioe);
         }
diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/policy/AbstractPolicy.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/policy/AbstractPolicy.java
index 5908380..d250e76 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/policy/AbstractPolicy.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/policy/AbstractPolicy.java
@@ -21,7 +21,7 @@
 import java.time.LocalDateTime;
 import java.time.format.DateTimeFormatter;
 import java.util.*;
-import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.atomic.AtomicLong;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 import java.util.stream.Collectors;
@@ -34,14 +34,14 @@ abstract class AbstractPolicy implements Policy {
     protected final Pattern fileRegexp;
 
     private final FsSourceTaskConfig conf;
-    private final AtomicInteger executions;
+    private final AtomicLong executions;
     private final boolean recursive;
     private boolean interrupted;
 
     public AbstractPolicy(FsSourceTaskConfig conf) throws IOException {
         this.fileSystems = new ArrayList<>();
         this.conf = conf;
-        this.executions = new AtomicInteger(0);
+        this.executions = new AtomicLong(0);
         this.recursive = conf.getBoolean(FsSourceTaskConfig.POLICY_RECURSIVE);
         this.fileRegexp = Pattern.compile(conf.getString(FsSourceTaskConfig.POLICY_REGEXP));
         this.interrupted = false;
@@ -107,11 +107,11 @@ public final Iterator<FileMetadata> execute() throws IOException {
         }
         preCheck();
 
+        executions.incrementAndGet();
         Iterator<FileMetadata> files = Collections.emptyIterator();
         for (FileSystem fs : fileSystems) {
             files = concat(files, listFiles(fs));
         }
-        executions.incrementAndGet();
 
         postCheck();
 
@@ -178,7 +178,7 @@ public final boolean hasEnded() {
 
     protected abstract boolean isPolicyCompleted();
 
-    final int getExecutions() {
+    public final long getExecutions() {
         return executions.get();
     }
 
@@ -204,6 +204,7 @@ public FileReader offer(FileMetadata metadata, OffsetStorageReader offsetStorage
             Map<String, Object> partition = Collections.singletonMap("path", metadata.getPath());
             Map<String, Object> offset = offsetStorageReader.offset(partition);
             if (offset != null && offset.get("offset") != null) {
+                log.info("Seeking to offset [{}] for file [{}].", offset.get("offset"), metadata.getPath());
                 reader.seek((Long) offset.get("offset"));
             }
             return reader;
diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/policy/CronPolicy.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/policy/CronPolicy.java
index 0774789..307fc23 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/policy/CronPolicy.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/policy/CronPolicy.java
@@ -6,6 +6,8 @@
 import com.cronutils.parser.CronParser;
 import com.github.mmolimar.kafka.connect.fs.FsSourceTaskConfig;
 import org.apache.kafka.common.config.ConfigException;
+import org.apache.kafka.common.utils.SystemTime;
+import org.apache.kafka.common.utils.Time;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -26,11 +28,13 @@ public class CronPolicy extends AbstractPolicy {
     public static final String CRON_POLICY_EXPRESSION = CRON_POLICY_PREFIX + "expression";
     public static final String CRON_POLICY_END_DATE = CRON_POLICY_PREFIX + "end_date";
 
+    private final Time time;
     private ExecutionTime executionTime;
     private Date endDate;
 
     public CronPolicy(FsSourceTaskConfig conf) throws IOException {
         super(conf);
+        this.time = new SystemTime();
     }
 
     @Override
@@ -57,13 +61,7 @@ protected void configPolicy(Map<String, Object> customConfigs) {
     @Override
     protected void preCheck() {
         executionTime.timeToNextExecution(ZonedDateTime.now())
-                .ifPresent(next -> {
-                    try {
-                        Thread.sleep(next.toMillis());
-                    } catch (InterruptedException ie) {
-                        log.warn("An interrupted exception has occurred.", ie);
-                    }
-                });
+                .ifPresent(next -> time.sleep(next.toMillis()));
     }
 
     @Override
diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/policy/HdfsFileWatcherPolicy.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/policy/HdfsFileWatcherPolicy.java
index 996d868..8d2f0d6 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/policy/HdfsFileWatcherPolicy.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/policy/HdfsFileWatcherPolicy.java
@@ -10,12 +10,14 @@
 import org.apache.hadoop.hdfs.client.HdfsAdmin;
 import org.apache.hadoop.hdfs.inotify.Event;
 import org.apache.hadoop.hdfs.inotify.EventBatch;
+import org.apache.kafka.common.config.ConfigException;
+import org.apache.kafka.common.utils.SystemTime;
+import org.apache.kafka.common.utils.Time;
 import org.apache.kafka.connect.errors.ConnectException;
 import org.apache.kafka.connect.errors.IllegalWorkerStateException;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.util.*;
 import java.util.concurrent.ConcurrentLinkedQueue;
@@ -25,26 +27,51 @@ public class HdfsFileWatcherPolicy extends AbstractPolicy {
     private static final Logger log = LoggerFactory.getLogger(HdfsFileWatcherPolicy.class);
     private static final String URI_PREFIX = "hdfs://";
 
+    private static final long DEFAULT_POLL = 5000L;
+    private static final long DEFAULT_RETRY = 20000L;
+    private static final String HDFS_FILE_WATCHER_POLICY_PREFIX = FsSourceTaskConfig.POLICY_PREFIX + "hdfs_file_watcher.";
+
+    public static final String HDFS_FILE_WATCHER_POLICY_POLL_MS = HDFS_FILE_WATCHER_POLICY_PREFIX + "poll";
+    public static final String HDFS_FILE_WATCHER_POLICY_RETRY_MS = HDFS_FILE_WATCHER_POLICY_PREFIX + "retry";
+
     private final Queue<FileMetadata> fileQueue;
+    private final Time time;
     private Map<FileSystem, EventStreamThread> fsEvenStream;
+    private long pollSleepMs;
+    private long retrySleepMs;
 
     public HdfsFileWatcherPolicy(FsSourceTaskConfig conf) throws IOException {
         super(conf);
         this.fileQueue = new ConcurrentLinkedQueue<>();
+        this.time = new SystemTime();
         startWatchers();
     }
 
     @Override
     protected void configPolicy(Map<String, Object> customConfigs) {
+        try {
+            this.pollSleepMs = Long.parseLong((String) customConfigs
+                    .getOrDefault(HDFS_FILE_WATCHER_POLICY_POLL_MS, String.valueOf(DEFAULT_POLL)));
+        } catch (NumberFormatException nfe) {
+            throw new ConfigException(HDFS_FILE_WATCHER_POLICY_POLL_MS + " property is required and must be a " +
+                    "number (long). Got: " + customConfigs.get(HDFS_FILE_WATCHER_POLICY_POLL_MS));
+        }
+        try {
+            this.retrySleepMs = Long.parseLong((String) customConfigs
+                    .getOrDefault(HDFS_FILE_WATCHER_POLICY_RETRY_MS, String.valueOf(DEFAULT_RETRY)));
+        } catch (NumberFormatException nfe) {
+            throw new ConfigException(HDFS_FILE_WATCHER_POLICY_RETRY_MS + " property is required and must be a " +
+                    "number (long). Got: " + customConfigs.get(HDFS_FILE_WATCHER_POLICY_RETRY_MS));
+        }
         this.fsEvenStream = new HashMap<>();
         fileSystems.stream()
                 .filter(fs -> fs.getWorkingDirectory().toString().startsWith(URI_PREFIX))
                 .forEach(fs -> {
                     try {
                         HdfsAdmin admin = new HdfsAdmin(fs.getWorkingDirectory().toUri(), fs.getConf());
-                        fsEvenStream.put(fs, new EventStreamThread(fs, admin));
+                        fsEvenStream.put(fs, new EventStreamThread(fs, admin, retrySleepMs));
                     } catch (IOException ioe) {
-                        throw new ConnectException("Error creating admin for notifications", ioe);
+                        throw new ConnectException("Error creating HDFS notifications.", ioe);
                     }
                 });
     }
@@ -69,14 +96,7 @@ public Iterator<FileMetadata> listFiles(FileSystem fs) {
 
     @Override
     protected boolean isPolicyCompleted() {
-        boolean hasRunningThreads = false;
-        for (EventStreamThread thread : fsEvenStream.values()) {
-            if (thread.isAlive()) {
-                hasRunningThreads = true;
-                break;
-            }
-        }
-        return !hasRunningThreads;
+        return fsEvenStream.values().stream().noneMatch(Thread::isAlive);
     }
 
     @Override
@@ -85,6 +105,11 @@ public void interrupt() {
         super.interrupt();
     }
 
+    @Override
+    public void postCheck() {
+        time.sleep(pollSleepMs);
+    }
+
     @Override
     public void close() throws IOException {
         stopWatchers();
@@ -94,66 +119,76 @@ public void close() throws IOException {
     private class EventStreamThread extends Thread {
         private final FileSystem fs;
         private final HdfsAdmin admin;
+        private final long retrySleepMs;
+        private final Time time;
 
-        EventStreamThread(FileSystem fs, HdfsAdmin admin) {
+        EventStreamThread(FileSystem fs, HdfsAdmin admin, long retrySleepMs) {
             this.fs = fs;
             this.admin = admin;
+            this.retrySleepMs = retrySleepMs;
+            this.time = new SystemTime();
         }
 
         @Override
         public void run() {
-            try {
-                DFSInotifyEventInputStream eventStream = admin.getInotifyEventStream();
-                while (fs.getFileStatus(fs.getWorkingDirectory()) != null &&
-                        fs.exists(fs.getWorkingDirectory())) {
-                    EventBatch batch = eventStream.poll();
-                    if (batch == null) continue;
-
-                    for (Event event : batch.getEvents()) {
-                        switch (event.getEventType()) {
-                            case CREATE:
-                                if (!((Event.CreateEvent) event).getPath().endsWith("._COPYING_")) {
-                                    enqueue(((Event.CreateEvent) event).getPath());
-                                }
-                                break;
-                            case APPEND:
-                                if (!((Event.AppendEvent) event).getPath().endsWith("._COPYING_")) {
-                                    enqueue(((Event.AppendEvent) event).getPath());
-                                }
-                                break;
-                            case RENAME:
-                                if (((Event.RenameEvent) event).getSrcPath().endsWith("._COPYING_")) {
-                                    enqueue(((Event.RenameEvent) event).getDstPath());
-                                }
-                                break;
-                            case CLOSE:
-                                if (!((Event.CloseEvent) event).getPath().endsWith("._COPYING_")) {
-                                    enqueue(((Event.CloseEvent) event).getPath());
-                                }
-                                break;
-                            default:
-                                break;
+            while (true) {
+                try {
+                    DFSInotifyEventInputStream eventStream = admin.getInotifyEventStream();
+                    if (fs.getFileStatus(fs.getWorkingDirectory()) != null &&
+                            fs.exists(fs.getWorkingDirectory())) {
+                        EventBatch batch = eventStream.poll();
+                        if (batch == null) continue;
+
+                        for (Event event : batch.getEvents()) {
+                            switch (event.getEventType()) {
+                                case CREATE:
+                                    if (!((Event.CreateEvent) event).getPath().endsWith("._COPYING_")) {
+                                        enqueue(((Event.CreateEvent) event).getPath());
+                                    }
+                                    break;
+                                case APPEND:
+                                    if (!((Event.AppendEvent) event).getPath().endsWith("._COPYING_")) {
+                                        enqueue(((Event.AppendEvent) event).getPath());
+                                    }
+                                    break;
+                                case RENAME:
+                                    if (((Event.RenameEvent) event).getSrcPath().endsWith("._COPYING_")) {
+                                        enqueue(((Event.RenameEvent) event).getDstPath());
+                                    }
+                                    break;
+                                case CLOSE:
+                                    if (!((Event.CloseEvent) event).getPath().endsWith("._COPYING_")) {
+                                        enqueue(((Event.CloseEvent) event).getPath());
+                                    }
+                                    break;
+                                default:
+                                    break;
+                            }
                         }
                     }
+                } catch (IOException ioe) {
+                    if (retrySleepMs > 0) {
+                        time.sleep(retrySleepMs);
+                    } else {
+                        log.warn("Error watching path [{}]. Stopping it...", fs.getWorkingDirectory(), ioe);
+                        throw new IllegalWorkerStateException(ioe);
+                    }
+                } catch (Exception e) {
+                    log.warn("Stopping watcher due to an unexpected exception when watching path [{}].",
+                            fs.getWorkingDirectory(), e);
+                    throw new IllegalWorkerStateException(e);
                 }
-            } catch (FileNotFoundException fnfe) {
-                log.warn("Cannot find file in this FS {}. Stopping watcher...", fs.getWorkingDirectory(), fnfe);
-            } catch (IOException ioe) {
-                log.warn("An interrupted exception has occurred. Path {} is not watched any more", fs.getWorkingDirectory());
-            } catch (Exception ioe) {
-                log.warn("Exception watching path {}", fs.getWorkingDirectory(), ioe);
-                throw new IllegalWorkerStateException(ioe);
             }
         }
 
         private void enqueue(String path) throws IOException {
             Path filePath = new Path(path);
             if (!fs.exists(filePath) || fs.getFileStatus(filePath) == null) {
-                log.info("Cannot enqueue file {} because it does not exist but got an event from the FS", filePath.toString());
+                log.info("Cannot enqueue file [{}] because it does not exist but got an event from the FS", filePath);
                 return;
             }
 
-            log.debug("Enqueuing file to process {}", filePath);
+            log.debug("Enqueuing file to process [{}]", filePath);
             RemoteIterator<LocatedFileStatus> it = fs.listFiles(filePath, false);
             while (it.hasNext()) {
                 LocatedFileStatus status = it.next();
diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/policy/Policy.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/policy/Policy.java
index 8cb3232..370288f 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/policy/Policy.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/policy/Policy.java
@@ -19,5 +19,7 @@ public interface Policy extends Closeable {
 
     List<String> getURIs();
 
+    long getExecutions();
+
     void interrupt();
 }
diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/HdfsFileWatcherPolicyTest.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/HdfsFileWatcherPolicyTest.java
index ddf69b7..a29ae5d 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/HdfsFileWatcherPolicyTest.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/HdfsFileWatcherPolicyTest.java
@@ -2,7 +2,10 @@
 
 import com.github.mmolimar.kafka.connect.fs.FsSourceTaskConfig;
 import com.github.mmolimar.kafka.connect.fs.file.reader.TextFileReader;
+import com.github.mmolimar.kafka.connect.fs.util.ReflectionUtils;
 import org.apache.hadoop.fs.Path;
+import org.apache.kafka.common.config.ConfigException;
+import org.apache.kafka.connect.errors.ConnectException;
 import org.apache.kafka.connect.errors.IllegalWorkerStateException;
 import org.junit.jupiter.api.BeforeAll;
 import org.junit.jupiter.params.ParameterizedTest;
@@ -76,4 +79,61 @@ public void execPolicyAlreadyEnded(PolicyFsTestConfig fsConfig) throws IOExcepti
         assertThrows(IllegalWorkerStateException.class, () -> fsConfig.getPolicy().execute());
     }
 
+    @ParameterizedTest
+    @MethodSource("fileSystemConfigProvider")
+    public void notReachableFileSystem(PolicyFsTestConfig fsConfig) throws InterruptedException {
+        Map<String, String> originals = fsConfig.getSourceTaskConfig().originalsStrings();
+        originals.put(FsSourceTaskConfig.FS_URIS, "hdfs://localhost:65432/data");
+        originals.put(HdfsFileWatcherPolicy.HDFS_FILE_WATCHER_POLICY_POLL_MS, "0");
+        originals.put(HdfsFileWatcherPolicy.HDFS_FILE_WATCHER_POLICY_RETRY_MS, "0");
+        FsSourceTaskConfig cfg = new FsSourceTaskConfig(originals);
+        Policy policy = ReflectionUtils.makePolicy((Class<? extends Policy>) fsConfig.getSourceTaskConfig()
+                .getClass(FsSourceTaskConfig.POLICY_CLASS), cfg);
+        int count = 0;
+        while (!policy.hasEnded() && count < 10) {
+            Thread.sleep(500);
+            count++;
+        }
+        assertTrue(count < 10);
+        assertTrue(policy.hasEnded());
+    }
+
+    @ParameterizedTest
+    @MethodSource("fileSystemConfigProvider")
+    public void invalidPollTime(PolicyFsTestConfig fsConfig) {
+        Map<String, String> originals = fsConfig.getSourceTaskConfig().originalsStrings();
+        originals.put(HdfsFileWatcherPolicy.HDFS_FILE_WATCHER_POLICY_POLL_MS, "invalid");
+        FsSourceTaskConfig cfg = new FsSourceTaskConfig(originals);
+        assertThrows(ConnectException.class, () ->
+                ReflectionUtils.makePolicy((Class<? extends Policy>) fsConfig.getSourceTaskConfig()
+                        .getClass(FsSourceTaskConfig.POLICY_CLASS), cfg));
+        assertThrows(ConfigException.class, () -> {
+            try {
+                ReflectionUtils.makePolicy((Class<? extends Policy>) fsConfig.getSourceTaskConfig()
+                        .getClass(FsSourceTaskConfig.POLICY_CLASS), cfg);
+            } catch (Exception e) {
+                throw e.getCause();
+            }
+        });
+    }
+
+    @ParameterizedTest
+    @MethodSource("fileSystemConfigProvider")
+    public void invalidRetryTime(PolicyFsTestConfig fsConfig) {
+        Map<String, String> originals = fsConfig.getSourceTaskConfig().originalsStrings();
+        originals.put(HdfsFileWatcherPolicy.HDFS_FILE_WATCHER_POLICY_RETRY_MS, "invalid");
+        FsSourceTaskConfig cfg = new FsSourceTaskConfig(originals);
+        assertThrows(ConnectException.class, () ->
+                ReflectionUtils.makePolicy((Class<? extends Policy>) fsConfig.getSourceTaskConfig()
+                        .getClass(FsSourceTaskConfig.POLICY_CLASS), cfg));
+        assertThrows(ConfigException.class, () -> {
+            try {
+                ReflectionUtils.makePolicy((Class<? extends Policy>) fsConfig.getSourceTaskConfig()
+                        .getClass(FsSourceTaskConfig.POLICY_CLASS), cfg);
+            } catch (Exception e) {
+                throw e.getCause();
+            }
+        });
+    }
+
 }

From b50d963ac9a49e5cb25967b2d0c52593059ad508 Mon Sep 17 00:00:00 2001
From: Mario Molina <mmolimar@gmail.com>
Date: Sun, 26 Apr 2020 17:37:02 -0500
Subject: [PATCH 46/51] Docker files

---
 Dockerfile                    |  7 ++++
 docker-compose.yml            | 77 +++++++++++++++++++++++++++++++++++
 pom.xml                       | 59 +++++++++++++++++++++++++++
 src/main/assembly/package.xml |  3 +-
 4 files changed, 144 insertions(+), 2 deletions(-)
 create mode 100644 Dockerfile
 create mode 100644 docker-compose.yml

diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..7ba7a9d
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,7 @@
+FROM confluentinc/cp-kafka-connect-base:5.5.0
+
+ARG PROJECT_VERSION
+ENV CONNECT_PLUGIN_PATH="/usr/share/java,/usr/share/confluent-hub-components"
+
+COPY ./target/components/packages/mmolimar-kafka-connect-fs-${PROJECT_VERSION}.zip /tmp/kafka-connect-fs.zip
+RUN confluent-hub install --no-prompt /tmp/kafka-connect-fs.zip && rm -rf /tmp/kafka-connect-fs.zip
diff --git a/docker-compose.yml b/docker-compose.yml
new file mode 100644
index 0000000..498b645
--- /dev/null
+++ b/docker-compose.yml
@@ -0,0 +1,77 @@
+version: '3'
+services:
+  cp-zookeeper:
+    image: confluentinc/cp-zookeeper:5.5.0
+    hostname: zookeeper
+    container_name: zookeeper
+    ports:
+      - "2181:2181"
+    environment:
+      ZOOKEEPER_CLIENT_PORT: 2181
+      ZOOKEEPER_TICK_TIME: 2000
+
+  cp-kafka:
+    image: confluentinc/cp-kafka:5.5.0
+    hostname: kafka
+    container_name: kafka
+    depends_on:
+      - cp-zookeeper
+    ports:
+      - "29092:29092"
+      - "9092:9092"
+    environment:
+      KAFKA_BROKER_ID: 1
+      KAFKA_ZOOKEEPER_CONNECT: 'zookeeper:2181'
+      KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT
+      KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka:29092,PLAINTEXT_HOST://localhost:9092
+      KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
+      KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS: 0
+      CONFLUENT_METRICS_REPORTER_BOOTSTRAP_SERVERS: kafka:29092
+      CONFLUENT_METRICS_REPORTER_ZOOKEEPER_CONNECT: zookeeper:2181
+      CONFLUENT_METRICS_REPORTER_TOPIC_REPLICAS: 1
+      CONFLUENT_METRICS_ENABLE: 'false'
+
+  cp-schema-registry:
+    image: confluentinc/cp-schema-registry:5.5.0
+    hostname: schema-registry
+    container_name: schema-registry
+    depends_on:
+      - cp-zookeeper
+      - cp-kafka
+    ports:
+      - "8081:8081"
+    environment:
+      SCHEMA_REGISTRY_HOST_NAME: schema-registry
+      SCHEMA_REGISTRY_KAFKASTORE_CONNECTION_URL: 'zookeeper:2181'
+
+  connect-fs:
+    image: mmolimar/kafka-connect-fs:1.0.0-SNAPSHOT
+    container_name: connect
+    depends_on:
+      - cp-kafka
+      - cp-schema-registry
+    ports:
+      - "8083:8083"
+      - "8000:8000"
+    environment:
+      CONNECT_BOOTSTRAP_SERVERS: 'kafka:29092'
+      CONNECT_REST_ADVERTISED_HOST_NAME: connect
+      CONNECT_REST_PORT: 8083
+      CONNECT_GROUP_ID: compose-connect-group
+      CONNECT_CONFIG_STORAGE_TOPIC: docker-connect-configs
+      CONNECT_CONFIG_STORAGE_REPLICATION_FACTOR: 1
+      CONNECT_OFFSET_FLUSH_INTERVAL_MS: 10000
+      CONNECT_OFFSET_STORAGE_TOPIC: docker-connect-offsets
+      CONNECT_OFFSET_STORAGE_REPLICATION_FACTOR: 1
+      CONNECT_STATUS_STORAGE_TOPIC: docker-connect-status
+      CONNECT_STATUS_STORAGE_REPLICATION_FACTOR: 1
+      CONNECT_KEY_CONVERTER: org.apache.kafka.connect.storage.StringConverter
+      CONNECT_VALUE_CONVERTER: io.confluent.connect.avro.AvroConverter
+      CONNECT_VALUE_CONVERTER_SCHEMA_REGISTRY_URL: http://schema-registry:8081
+      CONNECT_INTERNAL_KEY_CONVERTER: "org.apache.kafka.connect.json.JsonConverter"
+      CONNECT_INTERNAL_VALUE_CONVERTER: "org.apache.kafka.connect.json.JsonConverter"
+      CONNECT_ZOOKEEPER_CONNECT: 'zookeeper:2181'
+      CONNECT_PLUGIN_PATH: "/usr/share/java,/usr/share/confluent-hub-components/"
+      CONNECT_LOG4J_ROOT_LOGLEVEL: "INFO"
+      CONNECT_LOG4J_LOGGERS: org.apache.zookeeper=ERROR,org.I0Itec.zkclient=ERROR,org.reflections=ERROR
+      KAFKA_OPTS: "-agentlib:jdwp=transport=dt_socket,server=y,address=8000,suspend=n"
diff --git a/pom.xml b/pom.xml
index 699c039..2a1ae2c 100644
--- a/pom.xml
+++ b/pom.xml
@@ -29,6 +29,7 @@
         <maven-jacoco-plugin.version>0.8.5</maven-jacoco-plugin.version>
         <maven-coveralls-plugin.version>4.3.0</maven-coveralls-plugin.version>
         <maven-surfire-plugin.version>3.0.0-M4</maven-surfire-plugin.version>
+        <maven-kafka-connect-plugin.version>0.11.3</maven-kafka-connect-plugin.version>
     </properties>
 
     <dependencies>
@@ -179,6 +180,64 @@
                 <artifactId>coveralls-maven-plugin</artifactId>
                 <version>${maven-coveralls-plugin.version}</version>
             </plugin>
+            <plugin>
+                <groupId>io.confluent</groupId>
+                <artifactId>kafka-connect-maven-plugin</artifactId>
+                <version>${maven-kafka-connect-plugin.version}</version>
+                <executions>
+                    <execution>
+                        <goals>
+                            <goal>kafka-connect</goal>
+                        </goals>
+                        <configuration>
+                            <name>kafka-connect-fs</name>
+                            <title>Kafka Connect FileSystem</title>
+                            <documentationUrl>https://kafka-connect-fs.readthedocs.io/</documentationUrl>
+                            <sourceUrl>https://github.com/mmolimar/kafka-connect-fs</sourceUrl>
+                            <description>
+                                Kafka Connect FileSystem Connector is a source connector for reading records from files
+                                in the file systems specified and load them into Kafka.
+                            </description>
+                            <supportProviderName>Mario Molina</supportProviderName>
+                            <supportSummary>This connector is supported by the open source community.</supportSummary>
+                            <supportUrl>https://github.com/mmolimar/kafka-connect-fs/issues</supportUrl>
+                            <ownerUsername>mmolimar</ownerUsername>
+                            <ownerType>user</ownerType>
+                            <ownerName>Mario Molina</ownerName>
+                            <ownerUrl>https://github.com/mmolimar</ownerUrl>
+                            <dockerNamespace>mmolimar</dockerNamespace>
+                            <dockerName>kafka-connect-fs</dockerName>
+                            <dockerTag>${project.version}</dockerTag>
+                            <componentTypes>
+                                <componentType>source</componentType>
+                            </componentTypes>
+                            <tags>
+                                <tag>filesystem</tag>
+                                <tag>files</tag>
+                                <tag>hadoop</tag>
+                                <tag>hdfs</tag>
+                                <tag>aws</tag>
+                                <tag>s3</tag>
+                                <tag>google</tag>
+                                <tag>gcs</tag>
+                                <tag>azure</tag>
+                                <tag>txt</tag>
+                                <tag>csv</tag>
+                                <tag>tsv</tag>
+                                <tag>json</tag>
+                                <tag>avro</tag>
+                                <tag>parquet</tag>
+                                <tag>sequence</tag>
+                            </tags>
+                            <requirements/>
+                            <deliveryGuarantee>
+                                <deliveryGuarantee>atLeastOnce</deliveryGuarantee>
+                            </deliveryGuarantee>
+                            <confluentControlCenterIntegration>true</confluentControlCenterIntegration>
+                        </configuration>
+                    </execution>
+                </executions>
+            </plugin>
         </plugins>
         <resources>
             <resource>
diff --git a/src/main/assembly/package.xml b/src/main/assembly/package.xml
index 7962c49..a1b9d19 100644
--- a/src/main/assembly/package.xml
+++ b/src/main/assembly/package.xml
@@ -36,9 +36,8 @@
         <exclude>org.apache.kafka:connect-api</exclude>
         <exclude>org.mortbay.jetty:*</exclude>
         <exclude>com.sun.jersey:*</exclude>
-        <exclude>org.eclipse.jetty.aggregate:jetty-all</exclude>
+        <exclude>org.eclipse.jetty:jetty-util</exclude>
         <exclude>com.sun.jersey.contribs:jersey-guice</exclude>
-        <exclude>com.google.guava:guava</exclude>
         <exclude>org.apache.zookeeper:zookeeper</exclude>
         <exclude>log4j:log4j</exclude>
         <exclude>org.slf4j:slf4j-api</exclude>

From 2eda92941b95df1f0a636e0bca1ba1d40e36b16f Mon Sep 17 00:00:00 2001
From: Mario Molina <mmolimar@gmail.com>
Date: Sun, 26 Apr 2020 17:47:47 -0500
Subject: [PATCH 47/51] Updating docs

---
 config/kafka-connect-fs.properties |  2 +-
 docs/source/config_options.rst     | 25 +++++++++++++++++++++++--
 docs/source/connector.rst          | 24 +++++++++++++++++++++---
 3 files changed, 45 insertions(+), 6 deletions(-)

diff --git a/config/kafka-connect-fs.properties b/config/kafka-connect-fs.properties
index 67435af..aab1ae6 100644
--- a/config/kafka-connect-fs.properties
+++ b/config/kafka-connect-fs.properties
@@ -1,7 +1,7 @@
 name=FsSourceConnector
 connector.class=com.github.mmolimar.kafka.connect.fs.FsSourceConnector
 tasks.max=1
-fs.uris=file:///data,hdfs://localhost:9000/
+fs.uris=file:///data,hdfs://localhost:8020/data
 topic=mytopic
 policy.class=com.github.mmolimar.kafka.connect.fs.policy.SimplePolicy
 policy.recursive=true
diff --git a/docs/source/config_options.rst b/docs/source/config_options.rst
index 1cce34f..0a69105 100644
--- a/docs/source/config_options.rst
+++ b/docs/source/config_options.rst
@@ -54,7 +54,7 @@ General config properties for this connector.
   ``file:///data/${yyyy}/${MM}/${dd}/${HH}${mm}``
   
 .. tip:: 
-  If you want to ingest data from S3, you can add credentials with :
+  If you want to ingest data from S3, you can add credentials with:
   ``policy.fs.fs.s3a.access.key=<ACCESS_KEY>``
   and
   ``policy.fs.fs.s3a.secret.key=<SECRET_KEY>``
@@ -65,6 +65,13 @@ General config properties for this connector.
   * Type: string
   * Importance: high
 
+``poll.interval.ms``
+  Frequency in milliseconds to poll for new data. This config just applies when the policies have ended.
+
+  * Type: int
+  * Default: ``10000``
+  * Importance: medium
+
 ``policy.class``
   Policy class to apply (must implement ``com.github.mmolimar.kafka.connect.fs.policy.Policy`` interface).
 
@@ -179,7 +186,21 @@ In order to configure custom properties for this policy, the name you must use i
 HDFS file watcher
 --------------------------------------------
 
-This policy does not have any additional configuration.
+In order to configure custom properties for this policy, the name you must use is ``hdfs_file_watcher``.
+
+``policy.hdfs_file_watcher.poll``
+  Time to wait until the records retrieved from the file watcher will be sent to the source task.
+
+  * Type: long
+  * Default: ``5000``
+  * Importance: medium
+
+``policy.hdfs_file_watcher.retry``
+  Sleep time to retry connections to HDFS in case of connection errors happened.
+
+  * Type: long
+  * Default: ``20000``
+  * Importance: medium
 
 .. _config_options-filereaders:
 
diff --git a/docs/source/connector.rst b/docs/source/connector.rst
index 0e02451..476aa7b 100644
--- a/docs/source/connector.rst
+++ b/docs/source/connector.rst
@@ -47,7 +47,7 @@ The ``kafka-connect-fs.properties`` file defines the following properties as req
    name=FsSourceConnector
    connector.class=com.github.mmolimar.kafka.connect.fs.FsSourceConnector
    tasks.max=1
-   fs.uris=file:///data,hdfs://localhost:9000/
+   fs.uris=file:///data,hdfs://localhost:8020/data
    topic=mytopic
    policy.class=<Policy class>
    policy.recursive=true
@@ -70,7 +70,7 @@ The ``kafka-connect-fs.properties`` file defines the following properties as req
 
 A more detailed information about these properties can be found :ref:`here<config_options-general>`.
 
-Running in development
+Running in local
 --------------------------------------------
 
 .. sourcecode:: bash
@@ -81,7 +81,25 @@ Running in development
 
    mvn clean package
    export CLASSPATH="$(find target/ -type f -name '*.jar'| grep '\-package' | tr '\n' ':')"
-   $KAFKA_HOME/bin/connect-distributed.sh config/kafka-connect-fs.properties
+   $KAFKA_HOME/bin/connect-standalone.sh $KAFKA_HOME/config/connect-standalone.properties config/kafka-connect-fs.properties
+
+Running in Docker
+--------------------------------------------
+
+.. sourcecode:: bash
+
+   mvn clean package
+
+.. sourcecode:: bash
+
+   docker build --build-arg PROJECT_VERSION=<VERSION> .
+   docker-compose build
+   docker-compose up -d
+   docker logs --tail="all" -f connect
+
+.. sourcecode:: bash
+
+   curl -sX GET http://localhost:8083/connector-plugins | grep FsSourceConnector
 
 Components
 ============================================

From 4455bd47fb00a5b184838640270717254aaa868b Mon Sep 17 00:00:00 2001
From: Mario Molina <mmolimar@gmail.com>
Date: Sun, 26 Apr 2020 21:48:50 -0500
Subject: [PATCH 48/51] Change to Oracle JDK8 in Travis

---
 .travis.yml | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 2d90a0c..9a9aab4 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,10 +1,10 @@
+dist: trusty
 language: java
 jdk:
- - openjdk8
-sudo: false
+ - oraclejdk8
 install:
-  - mvn test-compile -DskipTests=true -Dmaven.javadoc.skip=true -B -V
+ - mvn test-compile -DskipTests=true -Dmaven.javadoc.skip=true -B -V
 script:
-  - mvn test jacoco:report
+ - mvn test jacoco:report
 after_success:
-  - mvn coveralls:report
\ No newline at end of file
+ - mvn coveralls:report

From aa3f45091e5bd84e886cfdac6e555625f1111e77 Mon Sep 17 00:00:00 2001
From: Mario Molina <mmolimar@gmail.com>
Date: Mon, 27 Apr 2020 07:54:58 -0500
Subject: [PATCH 49/51] Fix typo in doc

---
 docs/source/policies.rst                                 | 9 +++++----
 .../github/mmolimar/kafka/connect/fs/FsSourceTask.java   | 4 ++--
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/docs/source/policies.rst b/docs/source/policies.rst
index dc0f607..1a5f654 100644
--- a/docs/source/policies.rst
+++ b/docs/source/policies.rst
@@ -1,10 +1,9 @@
 Simple
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-It's a never-ending policy which just filters and processes files included in the corresponding URIs.
+It's a policy which just filters and processes files included in the corresponding URIs one time.
 
 .. attention:: This policy is more oriented for testing purposes.
-               It never stops and Kafka Connect is continuously trying to poll data from the FS(s).
 
 Sleepy
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -28,10 +27,12 @@ HDFS file watcher
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 It uses Hadoop notifications events and all create/append/rename/close events will be reported
-as new files to be ingested.
+as files to be ingested.
 
 Just use it when you have HDFS URIs.
 
-.. attention:: The URIs included in general property ``fs.uris`` will be filtered and only those
+You can learn more about the properties of this policy :ref:`here<config_options-policies-hdfs>`.
+
+.. attention:: The URIs included in the general property ``fs.uris`` will be filtered and only those
                ones which start with the prefix ``hdfs://`` will be watched. Also, this policy
                will only work for Hadoop versions 2.6.0 or higher.
diff --git a/src/main/java/com/github/mmolimar/kafka/connect/fs/FsSourceTask.java b/src/main/java/com/github/mmolimar/kafka/connect/fs/FsSourceTask.java
index c3fa38f..bb2169a 100644
--- a/src/main/java/com/github/mmolimar/kafka/connect/fs/FsSourceTask.java
+++ b/src/main/java/com/github/mmolimar/kafka/connect/fs/FsSourceTask.java
@@ -50,11 +50,11 @@ public void start(Map<String, String> properties) {
             config = new FsSourceTaskConfig(properties);
             if (config.getClass(FsSourceTaskConfig.POLICY_CLASS).isAssignableFrom(Policy.class)) {
                 throw new ConfigException("Policy class " +
-                        config.getClass(FsSourceTaskConfig.POLICY_CLASS) + "is not a subclass of " + Policy.class);
+                        config.getClass(FsSourceTaskConfig.POLICY_CLASS) + " is not a subclass of " + Policy.class);
             }
             if (config.getClass(FsSourceTaskConfig.FILE_READER_CLASS).isAssignableFrom(FileReader.class)) {
                 throw new ConfigException("FileReader class " +
-                        config.getClass(FsSourceTaskConfig.FILE_READER_CLASS) + "is not a subclass of " + FileReader.class);
+                        config.getClass(FsSourceTaskConfig.FILE_READER_CLASS) + " is not a subclass of " + FileReader.class);
             }
 
             Class<Policy> policyClass = (Class<Policy>) Class.forName(properties.get(FsSourceTaskConfig.POLICY_CLASS));

From 9de08e0ad96b6fe687da48a54f74fe7ed4043a64 Mon Sep 17 00:00:00 2001
From: Mario Molina <mmolimar@gmail.com>
Date: Mon, 27 Apr 2020 10:16:27 -0500
Subject: [PATCH 50/51] Wait to receive events in HDFS file watcher test policy

---
 .../kafka/connect/fs/policy/PolicyTestBase.java      | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/PolicyTestBase.java b/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/PolicyTestBase.java
index 6af841b..ba77775 100644
--- a/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/PolicyTestBase.java
+++ b/src/test/java/com/github/mmolimar/kafka/connect/fs/policy/PolicyTestBase.java
@@ -130,10 +130,10 @@ public void oneFilePerFs(PolicyFsTestConfig fsConfig) throws IOException, Interr
             fs.createNewFile(new Path(dir, System.nanoTime() + ".txt"));
             //this file does not match the regexp
             fs.createNewFile(new Path(dir, System.nanoTime() + ".invalid"));
-        }
-        //we wait till FS has registered the files
-        Thread.sleep(500);
 
+            //we wait till FS has registered the files
+            Thread.sleep(3000);
+        }
         Iterator<FileMetadata> it = fsConfig.getPolicy().execute();
         assertTrue(it.hasNext());
         it.next();
@@ -152,10 +152,10 @@ public void recursiveDirectory(PolicyFsTestConfig fsConfig) throws IOException,
             fs.createNewFile(new Path(tmpDir, System.nanoTime() + ".txt"));
             //this file does not match the regexp
             fs.createNewFile(new Path(tmpDir, System.nanoTime() + ".invalid"));
-        }
-        //we wait till FS has registered the files
-        Thread.sleep(500);
 
+            //we wait till FS has registered the files
+            Thread.sleep(3000);
+        }
         Iterator<FileMetadata> it = fsConfig.getPolicy().execute();
         assertTrue(it.hasNext());
         it.next();

From 175df2ae54b9ccc14cc540d678e74d6623501ab5 Mon Sep 17 00:00:00 2001
From: Mario Molina <mmolimar@gmail.com>
Date: Mon, 27 Apr 2020 10:17:01 -0500
Subject: [PATCH 51/51] Release version 1.0.0

---
 docker-compose.yml | 2 +-
 pom.xml            | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docker-compose.yml b/docker-compose.yml
index 498b645..e763372 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -45,7 +45,7 @@ services:
       SCHEMA_REGISTRY_KAFKASTORE_CONNECTION_URL: 'zookeeper:2181'
 
   connect-fs:
-    image: mmolimar/kafka-connect-fs:1.0.0-SNAPSHOT
+    image: mmolimar/kafka-connect-fs:1.0.0
     container_name: connect
     depends_on:
       - cp-kafka
diff --git a/pom.xml b/pom.xml
index 2a1ae2c..bd22791 100644
--- a/pom.xml
+++ b/pom.xml
@@ -4,7 +4,7 @@
 
     <groupId>com.github.mmolimar.kafka.connect</groupId>
     <artifactId>kafka-connect-fs</artifactId>
-    <version>1.0.0-SNAPSHOT</version>
+    <version>1.0.0</version>
     <packaging>jar</packaging>
 
     <name>kafka-connect-fs</name>