Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/gtfs schedule parser #1

Merged
merged 4 commits into from
Apr 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 9 additions & 9 deletions .github/workflows/maven-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,15 @@ jobs:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v3
- name: Set up JDK 21
uses: actions/setup-java@v3
with:
java-version: '21'
distribution: 'temurin'
cache: maven
- name: Build with Maven
run: mvn -B package --file pom.xml
- uses: actions/checkout@v3
- name: Set up JDK 21
uses: actions/setup-java@v3
with:
java-version: '21'
distribution: 'temurin'
cache: maven
- name: Build with Maven
run: mvn -B verify --file pom.xml

# Optional: Uploads the full dependency graph to GitHub to improve the quality of Dependabot alerts this repository can receive
# - name: Update dependency graph
Expand Down
98 changes: 98 additions & 0 deletions src/main/java/ch/naviqore/gtfs/schedule/GtfsScheduleParser.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
package ch.naviqore.gtfs.schedule;

import ch.naviqore.gtfs.schedule.model.GtfsScheduleBuilder;
import ch.naviqore.gtfs.schedule.type.ExceptionType;
import ch.naviqore.gtfs.schedule.type.RouteType;
import ch.naviqore.gtfs.schedule.type.ServiceDayTime;
import lombok.RequiredArgsConstructor;
import lombok.extern.log4j.Log4j2;
import org.apache.commons.csv.CSVRecord;

import java.time.DayOfWeek;
import java.time.LocalDate;
import java.time.format.DateTimeFormatter;
import java.util.EnumSet;
import java.util.List;
import java.util.Map;

/**
* GTFS CSV Records Parser
*
* @author munterfi
*/
@RequiredArgsConstructor
@Log4j2
class GtfsScheduleParser {

private static final DateTimeFormatter DATE_FORMATTER = DateTimeFormatter.ofPattern("yyyyMMdd");
private static final Map<String, DayOfWeek> DAY_MAPPINGS = Map.of("monday", DayOfWeek.MONDAY, "tuesday",
DayOfWeek.TUESDAY, "wednesday", DayOfWeek.WEDNESDAY, "thursday", DayOfWeek.THURSDAY, "friday",
DayOfWeek.FRIDAY, "saturday", DayOfWeek.SATURDAY, "sunday", DayOfWeek.SUNDAY);
private final GtfsScheduleBuilder builder;

void parseAgencies(List<CSVRecord> records) {
log.info("Parsing {} agency records", records.size());
for (CSVRecord record : records) {
builder.addAgency(record.get("agency_id"), record.get("agency_name"), record.get("agency_url"),
record.get("agency_timezone"));
}
}

void parseCalendars(List<CSVRecord> records) {
log.info("Parsing {} calendar records", records.size());
for (CSVRecord record : records) {
EnumSet<DayOfWeek> serviceDays = EnumSet.noneOf(DayOfWeek.class);
DAY_MAPPINGS.forEach((key, value) -> {
if ("1".equals(record.get(key))) {
serviceDays.add(value);
}
});
builder.addCalendar(record.get("service_id"), serviceDays,
LocalDate.parse(record.get("start_date"), DATE_FORMATTER),
LocalDate.parse(record.get("end_date"), DATE_FORMATTER));
}
}

void parseCalendarDates(List<CSVRecord> records) {
log.info("Parsing {} calendar date records", records.size());
for (CSVRecord record : records) {
builder.addCalendarDate(record.get("service_id"), LocalDate.parse(record.get("date"), DATE_FORMATTER),
ExceptionType.parse(record.get("exception_type")));
}
}

void parseStops(List<CSVRecord> records) {
log.info("Parsing {} stop records", records.size());
for (CSVRecord record : records) {
builder.addStop(record.get("stop_id"), record.get("stop_name"), Double.parseDouble(record.get("stop_lat")),
Double.parseDouble(record.get("stop_lon")));
}
}

void parseRoutes(List<CSVRecord> records) {
log.info("Parsing {} route records", records.size());
for (CSVRecord record : records) {
// TODO: Route types are not standardized in any way.
// RouteType.parse(record.get("route_type"))
builder.addRoute(record.get("route_id"), record.get("agency_id"), record.get("route_short_name"),
record.get("route_long_name"), RouteType.RAIL);
}
}

void parseTrips(List<CSVRecord> records) {
log.info("Parsing {} trip records", records.size());
for (CSVRecord record : records) {
builder.addTrip(record.get("trip_id"), record.get("route_id"), record.get("service_id"));
}
}

void parseStopTimes(List<CSVRecord> records) {
log.info("Parsing {} stop time records", records.size());
for (CSVRecord record : records) {
builder.addStopTime(record.get("trip_id"), record.get("stop_id"),
ServiceDayTime.parse(record.get("arrival_time")),
ServiceDayTime.parse(record.get("departure_time")));
}
}

}
43 changes: 35 additions & 8 deletions src/main/java/ch/naviqore/gtfs/schedule/GtfsScheduleReader.java
Original file line number Diff line number Diff line change
@@ -1,15 +1,19 @@
package ch.naviqore.gtfs.schedule;

import ch.naviqore.gtfs.schedule.model.GtfsSchedule;
import ch.naviqore.gtfs.schedule.model.GtfsScheduleBuilder;
import lombok.Getter;
import lombok.NoArgsConstructor;
import lombok.RequiredArgsConstructor;
import lombok.extern.log4j.Log4j2;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVRecord;
import org.apache.commons.io.ByteOrderMark;
import org.apache.commons.io.input.BOMInputStream;

import java.io.File;
import java.io.FileReader;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
Expand Down Expand Up @@ -60,9 +64,9 @@ public enum GtfsFile {
private final String fileName;
}

public Map<GtfsFile, List<CSVRecord>> read(String path) throws IOException {
public GtfsSchedule read(String path) throws IOException {
File file = new File(path);
Map<GtfsFile, List<CSVRecord>> records = new HashMap<>();
Map<GtfsFile, List<CSVRecord>> records;

if (file.isDirectory()) {
log.info("Reading GTFS CSV files from directory: {}", path);
Expand All @@ -73,7 +77,21 @@ public Map<GtfsFile, List<CSVRecord>> read(String path) throws IOException {
} else {
throw new IllegalArgumentException("Path must be a directory or a .zip file");
}
return records;

return buildSchedule(records);
}

private GtfsSchedule buildSchedule(Map<GtfsFile, List<CSVRecord>> records) {
GtfsScheduleBuilder builder = GtfsScheduleBuilder.builder();
GtfsScheduleParser parser = new GtfsScheduleParser(builder);
parser.parseAgencies(records.get(GtfsFile.AGENCY));
parser.parseCalendars(records.get(GtfsFile.CALENDAR));
parser.parseCalendarDates(records.get(GtfsFile.CALENDAR_DATES));
parser.parseStops(records.get(GtfsFile.STOPS));
parser.parseRoutes(records.get(GtfsFile.ROUTES));
parser.parseTrips(records.get(GtfsFile.TRIPS));
parser.parseStopTimes(records.get(GtfsFile.STOP_TIMES));
return builder.build();
}

private Map<GtfsFile, List<CSVRecord>> readFromDirectory(File directory) throws IOException {
Expand All @@ -82,7 +100,7 @@ private Map<GtfsFile, List<CSVRecord>> readFromDirectory(File directory) throws
for (GtfsFile fileType : GtfsFile.values()) {
File csvFile = new File(directory, fileType.getFileName());
if (csvFile.exists()) {
log.debug("Reading GTFS CSV file: {}", csvFile.getAbsolutePath());
log.info("Reading GTFS CSV file: {}", csvFile.getAbsolutePath());
records.put(fileType, readCsvFile(csvFile));
} else {
log.warn("GTFS CSV file {} not found", csvFile.getAbsolutePath());
Expand All @@ -99,8 +117,12 @@ private Map<GtfsFile, List<CSVRecord>> readFromZip(File zipFile) throws IOExcept
for (GtfsFile fileType : GtfsFile.values()) {
ZipEntry entry = zf.getEntry(fileType.getFileName());
if (entry != null) {
log.debug("Reading GTFS file from ZIP: {}", entry.getName());
try (InputStreamReader reader = new InputStreamReader(zf.getInputStream(entry), StandardCharsets.UTF_8)) {
log.info("Reading GTFS file from ZIP: {}", entry.getName());
try (InputStreamReader reader = new InputStreamReader(BOMInputStream.builder()
.setInputStream(zf.getInputStream(entry))
.setByteOrderMarks(ByteOrderMark.UTF_8)
.setInclude(false)
.get(), StandardCharsets.UTF_8)) {
records.put(fileType, readCsv(reader));
}
} else {
Expand All @@ -113,14 +135,19 @@ private Map<GtfsFile, List<CSVRecord>> readFromZip(File zipFile) throws IOExcept
}

private List<CSVRecord> readCsvFile(File file) throws IOException {
try (FileReader reader = new FileReader(file)) {
try (FileInputStream fileInputStream = new FileInputStream(file);
BOMInputStream bomInputStream = BOMInputStream.builder()
.setInputStream(fileInputStream)
.setByteOrderMarks(ByteOrderMark.UTF_8)
.get(); InputStreamReader reader = new InputStreamReader(bomInputStream, StandardCharsets.UTF_8)) {
return readCsv(reader);
}
}

private List<CSVRecord> readCsv(InputStreamReader reader) throws IOException {
CSVFormat format = CSVFormat.DEFAULT.builder().setHeader().setIgnoreHeaderCase(true).setTrim(true).build();
try (CSVParser parser = new CSVParser(reader, format)) {
log.debug("CSV Headers: {}", parser.getHeaderMap().keySet());
return parser.getRecords();
}
}
Expand Down
18 changes: 18 additions & 0 deletions src/main/java/ch/naviqore/gtfs/schedule/RunExample.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
package ch.naviqore.gtfs.schedule;

import ch.naviqore.gtfs.schedule.model.GtfsSchedule;
import ch.naviqore.gtfs.schedule.model.GtfsScheduleDay;

import java.io.IOException;
import java.time.LocalDate;

public class RunExample {
private static final String GTFS_FILE = "/Users/munterfi/Downloads/gtfs_fp2024_2024-04-11_09-11.zip";

public static void main(String[] args) throws IOException, InterruptedException {
GtfsSchedule schedule = new GtfsScheduleReader().read(GTFS_FILE);
GtfsScheduleDay scheduleDay = schedule.getScheduleForDay(LocalDate.now());
System.gc();
Thread.sleep(30000);
}
}
4 changes: 4 additions & 0 deletions src/main/java/ch/naviqore/gtfs/schedule/model/Agency.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
package ch.naviqore.gtfs.schedule.model;

public record Agency(String agency, String name, String url, String timezone) {
}
64 changes: 64 additions & 0 deletions src/main/java/ch/naviqore/gtfs/schedule/model/Calendar.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
package ch.naviqore.gtfs.schedule.model;

import ch.naviqore.gtfs.schedule.type.ExceptionType;
import lombok.AccessLevel;
import lombok.Getter;
import lombok.RequiredArgsConstructor;

import java.time.DayOfWeek;
import java.time.LocalDate;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.Map;
import java.util.Objects;

@RequiredArgsConstructor(access = AccessLevel.PACKAGE)
@Getter
public final class Calendar {
private final String id;
private final EnumSet<DayOfWeek> serviceDays;
private final LocalDate startDate;
private final LocalDate endDate;
private final Map<LocalDate, CalendarDate> calendarDates = new HashMap<>();

/**
* Determines if the service is operational on a specific day, considering both regular service days and
* exceptions.
*
* @param date the date to check for service availability
* @return true if the service is operational on the given date, false otherwise
*/
public boolean isServiceAvailable(LocalDate date) {
if (date.isBefore(startDate) || date.isAfter(endDate)) {
return false;
}
CalendarDate exception = calendarDates.get(date);
if (exception != null) {
return exception.type() == ExceptionType.ADDED;
}
return serviceDays.contains(date.getDayOfWeek());
}

void addCalendarDate(CalendarDate calendarDate) {
calendarDates.put(calendarDate.date(), calendarDate);
}

@Override
public boolean equals(Object obj) {
if (obj == this) return true;
if (obj == null || obj.getClass() != this.getClass()) return false;
var that = (Calendar) obj;
return Objects.equals(this.id, that.id);
}

@Override
public int hashCode() {
return Objects.hash(id);
}

@Override
public String toString() {
return "Calendar[" + "id=" + id + ", " + "serviceDays=" + serviceDays + ", " + "startDate=" + startDate + ", " + "endDate=" + endDate + ']';
}

}
12 changes: 12 additions & 0 deletions src/main/java/ch/naviqore/gtfs/schedule/model/CalendarDate.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
package ch.naviqore.gtfs.schedule.model;

import ch.naviqore.gtfs.schedule.type.ExceptionType;

import java.time.LocalDate;

public record CalendarDate(Calendar calendar, LocalDate date, ExceptionType type) implements Comparable<CalendarDate> {
@Override
public int compareTo(CalendarDate o) {
return this.date.compareTo(o.date);
}
}
55 changes: 55 additions & 0 deletions src/main/java/ch/naviqore/gtfs/schedule/model/GtfsSchedule.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
package ch.naviqore.gtfs.schedule.model;

import lombok.AccessLevel;
import lombok.RequiredArgsConstructor;

import java.time.LocalDate;
import java.util.Collections;
import java.util.Map;
import java.util.stream.Collectors;

@RequiredArgsConstructor(access = AccessLevel.PACKAGE)
public class GtfsSchedule {

private final Map<String, Agency> agencies;
private final Map<String, Calendar> calendars;
private final Map<String, Stop> stops;
private final Map<String, Route> routes;
private final Map<String, Trip> trips;

/**
* Retrieves a snapshot of the GTFS schedule active on a specific date.
*
* @param date the date for which the active schedule is requested.
* @return GtfsScheduleDay containing only the active routes, stops, and trips for the specified date.
*/
public GtfsScheduleDay getScheduleForDay(LocalDate date) {
Map<String, Trip> activeTrips = trips.entrySet().stream()
.filter(entry -> entry.getValue().getCalendar().isServiceAvailable(date))
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));

// TODO: Implement efficiently without copying.
// return new GtfsScheduleDay(date, activeStops, activeRoutes, activeTrips);
return null;
}

public Map<String, Agency> getAgencies() {
return Collections.unmodifiableMap(agencies);
}

public Map<String, Calendar> getCalendars() {
return Collections.unmodifiableMap(calendars);
}

public Map<String, Stop> getStops() {
return Collections.unmodifiableMap(stops);
}

public Map<String, Route> getRoutes() {
return Collections.unmodifiableMap(routes);
}

public Map<String, Trip> getTrips() {
return Collections.unmodifiableMap(trips);
}
}
Loading
Loading