diff --git a/packages/analysis-engine/src/constant.ts b/packages/analysis-engine/src/constant.ts new file mode 100644 index 00000000..07a83dad --- /dev/null +++ b/packages/analysis-engine/src/constant.ts @@ -0,0 +1,2 @@ +export const COMMIT_SEPARATOR = "4itc2s8hH-oA64s08h19"; +export const GIT_LOG_SEPARATOR = "I9M-0XOzvHlYPegVPpzb"; diff --git a/packages/analysis-engine/src/index.ts b/packages/analysis-engine/src/index.ts index f6de204b..1549803d 100644 --- a/packages/analysis-engine/src/index.ts +++ b/packages/analysis-engine/src/index.ts @@ -92,3 +92,4 @@ export class AnalysisEngine { } export default AnalysisEngine; +export { COMMIT_SEPARATOR, GIT_LOG_SEPARATOR } from "./constant"; diff --git a/packages/analysis-engine/src/parser.spec.ts b/packages/analysis-engine/src/parser.spec.ts index 50feb085..d86a36eb 100644 --- a/packages/analysis-engine/src/parser.spec.ts +++ b/packages/analysis-engine/src/parser.spec.ts @@ -1,5 +1,6 @@ import { getCommitMessageType } from "./commit.util"; -import getCommitRaws from "./parser"; +import { COMMIT_SEPARATOR, GIT_LOG_SEPARATOR } from "./constant"; +import getCommitRaws from "./parser"; import type { CommitRaw, DifferenceStatistic } from "./types"; describe("commit message type", () => { @@ -34,117 +35,229 @@ describe("commit message type", () => { }); }); -describe('getCommitRaws', () => { - const testCommitLines = [ - "commit a b (HEAD)", - "commit a b (HEAD -> main, origin/main, origin/HEAD)", - "commit a b (HEAD, tag: v1.0.0)", - "commit a b (HEAD -> main, origin/main, origin/HEAD, tag: v2.0.0)", - "commit a b (HEAD, tag: v2.0.0, tag: v1.4)" - ]; +describe("getCommitRaws", () => { + const fakeAuthorAndCommitter = `${GIT_LOG_SEPARATOR}John Park${GIT_LOG_SEPARATOR}mail@gmail.com${GIT_LOG_SEPARATOR}Sun Sep 4 20:17:59 2022 +0900${GIT_LOG_SEPARATOR}John Park 2${GIT_LOG_SEPARATOR}mail2@gmail.com${GIT_LOG_SEPARATOR}Sun Sep 5 20:17:59 2022 +0900`; + const fakeCommitMessage = `${GIT_LOG_SEPARATOR}commit message${GIT_LOG_SEPARATOR}`; + const fakeCommitMessageAndBody = `${GIT_LOG_SEPARATOR}commit message title\n\ncommit message body${GIT_LOG_SEPARATOR}`; + const fakeCommitHash = `a${GIT_LOG_SEPARATOR}b`; + const fakeCommitRef = `${GIT_LOG_SEPARATOR}HEAD`; + const fakeCommitFileChange = "10\t0\ta.ts\n1\t0\tREADME.md"; - const expectedBranches = [ - ['HEAD'], - ['HEAD', 'main', 'origin/main', 'origin/HEAD'], - ['HEAD'], - ['HEAD', 'main', 'origin/main', 'origin/HEAD'], - ['HEAD'] - ]; - - const expectedTags = [ - [], - [], - ['v1.0.0'], - ['v2.0.0'], - ['v2.0.0', 'v1.4'] - ]; - - const testCommitFileChanges = [ - "10\t0\ta.ts\n1\t0\tREADME.md", - "3\t3\ta.ts", - "4\t0\ta.ts", - "0\t6\ta.ts\n2\t0\tb.ts\n3\t3\tc.ts" - ]; - - const expectedFileChanged:DifferenceStatistic[] = [ - { - totalInsertionCount: 11, - totalDeletionCount: 0, - fileDictionary: { - 'a.ts': { insertionCount: 10, deletionCount: 0 }, - 'README.md': { insertionCount: 1, deletionCount: 0 }, - } - }, - { - totalInsertionCount: 3, - totalDeletionCount: 3, - fileDictionary: { 'a.ts': { insertionCount: 3, deletionCount: 3 } } - }, - { - totalInsertionCount: 4, - totalDeletionCount: 0, - fileDictionary: { 'a.ts': { insertionCount: 4, deletionCount: 0 } } - }, - { - totalInsertionCount: 5, - totalDeletionCount: 9, - fileDictionary: { - 'a.ts': { insertionCount: 0, deletionCount: 6 }, - 'b.ts': { insertionCount: 2, deletionCount: 0 }, - 'c.ts': { insertionCount: 3, deletionCount: 3 }, - } - } - ]; - - const commonExpectatedResult: CommitRaw={ + const commonExpectatedResult: CommitRaw = { sequence: 0, - id: 'a', - parents: ['b'], - branches: ['HEAD'], + id: "a", + parents: ["b"], + branches: ["HEAD"], tags: [], - author: { name: 'John Park', email: 'mail@gmail.com' }, - authorDate: new Date('Sun Sep 4 20:17:59 2022 +0900'), - committer: { name: 'John Park', email: 'mail@gmail.com' }, - committerDate: new Date('Sun Sep 4 20:17:59 2022 +0900'), - message: 'commit message', + author: { name: "John Park", email: "mail@gmail.com" }, + authorDate: new Date("Sun Sep 4 20:17:59 2022 +0900"), + committer: { name: "John Park 2", email: "mail2@gmail.com" }, + committerDate: new Date("Sun Sep 5 20:17:59 2022 +0900"), + message: "commit message", differenceStatistic: { totalInsertionCount: 0, totalDeletionCount: 0, fileDictionary: {}, }, - commitMessageType: "" + commitMessageType: "", }; + const expectedCommitMessageBody = "commit message title\n\ncommit message body"; + const expectedFileChange: DifferenceStatistic = { + totalInsertionCount: 11, + totalDeletionCount: 0, + fileDictionary: { + "a.ts": { insertionCount: 10, deletionCount: 0 }, + "README.md": { insertionCount: 1, deletionCount: 0 }, + }, + }; + + it.each([ + [ + `${COMMIT_SEPARATOR}${`a${GIT_LOG_SEPARATOR}`}${fakeCommitRef}${fakeAuthorAndCommitter}${fakeCommitMessage}`, + { + ...commonExpectatedResult, + id: "a", + parents: [""], + }, + ], + [ + `${COMMIT_SEPARATOR}${`c${GIT_LOG_SEPARATOR}b`}${fakeCommitRef}${fakeAuthorAndCommitter}${fakeCommitMessage}`, + { + ...commonExpectatedResult, + id: "c", + parents: ["b"], + }, + ], + [ + `${COMMIT_SEPARATOR}${`d${GIT_LOG_SEPARATOR}e f`}${fakeCommitRef}${fakeAuthorAndCommitter}${fakeCommitMessage}`, + { + ...commonExpectatedResult, + id: "d", + parents: ["e", "f"], + }, + ], + ])("should parse gitlog to commitRaw(hash)", (mockLog, expectedResult) => { + const result = getCommitRaws(mockLog); + expect(result).toEqual([expectedResult]); + }); + + it.each([ + [ + `${COMMIT_SEPARATOR}${fakeCommitHash}${`${GIT_LOG_SEPARATOR}HEAD`}${fakeAuthorAndCommitter}${fakeCommitMessage}`, + { + ...commonExpectatedResult, + branches: ["HEAD"], + tags: [], + }, + ], + [ + `${COMMIT_SEPARATOR}${fakeCommitHash}${`${GIT_LOG_SEPARATOR}HEAD -> main, origin/main, origin/HEAD`}${fakeAuthorAndCommitter}${fakeCommitMessage}`, + { + ...commonExpectatedResult, + branches: ["HEAD", "main", "origin/main", "origin/HEAD"], + tags: [], + }, + ], + [ + `${COMMIT_SEPARATOR}${fakeCommitHash}${`${GIT_LOG_SEPARATOR}HEAD, tag: v1.0.0`}${fakeAuthorAndCommitter}${fakeCommitMessage}`, + { + ...commonExpectatedResult, + branches: ["HEAD"], + tags: ["v1.0.0"], + }, + ], + [ + `${COMMIT_SEPARATOR}${fakeCommitHash}${`${GIT_LOG_SEPARATOR}HEAD -> main, origin/main, origin/HEAD, tag: v2.0.0`}${fakeAuthorAndCommitter}${fakeCommitMessage}`, + { + ...commonExpectatedResult, + branches: ["HEAD", "main", "origin/main", "origin/HEAD"], + tags: ["v2.0.0"], + }, + ], + [ + `${COMMIT_SEPARATOR}${fakeCommitHash}${`${GIT_LOG_SEPARATOR}HEAD, tag: v2.0.0, tag: v1.4`}${fakeAuthorAndCommitter}${fakeCommitMessage}`, + { + ...commonExpectatedResult, + branches: ["HEAD"], + tags: ["v2.0.0", "v1.4"], + }, + ], + [ + `${COMMIT_SEPARATOR}${fakeCommitHash}${GIT_LOG_SEPARATOR}${fakeAuthorAndCommitter}${fakeCommitMessage}`, + { + ...commonExpectatedResult, + branches: [], + tags: [], + }, + ], + ])("should parse gitlog to commitRaw(branch, tag)", (mockLog, expectedResult) => { + const result = getCommitRaws(mockLog); + expect(result).toEqual([expectedResult]); + }); + + it.each([ + [ + `${COMMIT_SEPARATOR}${fakeCommitHash}${fakeCommitRef}${fakeAuthorAndCommitter}${fakeCommitMessage}\n${"10\t0\ta.ts\n1\t0\tREADME.md"}`, + { + ...commonExpectatedResult, + differenceStatistic: { + totalInsertionCount: 11, + totalDeletionCount: 0, + fileDictionary: { + "a.ts": { insertionCount: 10, deletionCount: 0 }, + "README.md": { insertionCount: 1, deletionCount: 0 }, + }, + }, + }, + ], + [ + `${COMMIT_SEPARATOR}${fakeCommitHash}${fakeCommitRef}${fakeAuthorAndCommitter}${fakeCommitMessage}\n${"3\t3\ta.ts"}`, + { + ...commonExpectatedResult, + differenceStatistic: { + totalInsertionCount: 3, + totalDeletionCount: 3, + fileDictionary: { "a.ts": { insertionCount: 3, deletionCount: 3 } }, + }, + }, + ], + [ + `${COMMIT_SEPARATOR}${fakeCommitHash}${fakeCommitRef}${fakeAuthorAndCommitter}${fakeCommitMessage}\n${"4\t0\ta.ts"}`, + { + ...commonExpectatedResult, + differenceStatistic: { + totalInsertionCount: 4, + totalDeletionCount: 0, + fileDictionary: { "a.ts": { insertionCount: 4, deletionCount: 0 } }, + }, + }, + ], + [ + `${COMMIT_SEPARATOR}${fakeCommitHash}${fakeCommitRef}${fakeAuthorAndCommitter}${fakeCommitMessage}\n${"0\t6\ta.ts\n2\t0\tb.ts\n3\t3\tc.ts"}`, + { + ...commonExpectatedResult, + differenceStatistic: { + totalInsertionCount: 5, + totalDeletionCount: 9, + fileDictionary: { + "a.ts": { insertionCount: 0, deletionCount: 6 }, + "b.ts": { insertionCount: 2, deletionCount: 0 }, + "c.ts": { insertionCount: 3, deletionCount: 3 }, + }, + }, + }, + ], + ])("should parse gitlog to commitRaw(file changed)", (mockLog, expectedResult) => { + const result = getCommitRaws(mockLog); + expect(result).toEqual([expectedResult]); + }); + + it(`should parse gitlog to commitRaw(multiple commits)`, () => { + const mockLog = `${COMMIT_SEPARATOR}${fakeCommitHash}${fakeCommitRef}${fakeAuthorAndCommitter}${fakeCommitMessage}\n${fakeCommitFileChange}${COMMIT_SEPARATOR}${fakeCommitHash}${fakeCommitRef}${fakeAuthorAndCommitter}${fakeCommitMessage}`; + const result = getCommitRaws(mockLog); + const expectedResult = [ + { ...commonExpectatedResult, differenceStatistic: expectedFileChange }, + { ...commonExpectatedResult, sequence: 1 }, + ]; - testCommitLines.forEach((mockLog, index) => { - it(`should parse gitlog to commitRaw(branch, tag)`, () => { - const mock = `${mockLog} -Author: John Park -AuthorDate: Sun Sep 4 20:17:59 2022 +0900 -Commit: John Park -CommitDate: Sun Sep 4 20:17:59 2022 +0900 -\n\tcommit message -`; - const result = getCommitRaws(mock); - const expectedResult = { ...commonExpectatedResult, branches: expectedBranches[index], tags: expectedTags[index] }; - - expect(result).toEqual([expectedResult]); - }); + expect(result).toEqual(expectedResult); + }); + + it.each([ + [ + `${COMMIT_SEPARATOR}${fakeCommitHash}${fakeCommitRef}${fakeAuthorAndCommitter}${`${GIT_LOG_SEPARATOR}commit message title${GIT_LOG_SEPARATOR}`}`, + { ...commonExpectatedResult, message: "commit message title" }, + ], + [ + `${COMMIT_SEPARATOR}${fakeCommitHash}${fakeCommitRef}${fakeAuthorAndCommitter}${`${GIT_LOG_SEPARATOR}commit message title\ncommit message${GIT_LOG_SEPARATOR}`}`, + { ...commonExpectatedResult, message: "commit message title\ncommit message" }, + ], + [ + `${COMMIT_SEPARATOR}${fakeCommitHash}${fakeCommitRef}${fakeAuthorAndCommitter}${`${GIT_LOG_SEPARATOR}commit message title\n\ncommit message body${GIT_LOG_SEPARATOR}`}`, + { ...commonExpectatedResult, message: "commit message title\n\ncommit message body" }, + ], + [ + `${COMMIT_SEPARATOR}${fakeCommitHash}${fakeCommitRef}${fakeAuthorAndCommitter}${`${GIT_LOG_SEPARATOR}commit message title\n\n\ncommit message body${GIT_LOG_SEPARATOR}`}`, + { ...commonExpectatedResult, message: "commit message title\n\n\ncommit message body" }, + ], + [ + `${COMMIT_SEPARATOR}${fakeCommitHash}${fakeCommitRef}${fakeAuthorAndCommitter}${`${GIT_LOG_SEPARATOR}${GIT_LOG_SEPARATOR}`}`, + { ...commonExpectatedResult, message: "" }, + ], + ])("should parse gitlog to commitRaw(commit message)", (mockLog, expectedResult) => { + const result = getCommitRaws(mockLog); + expect(result).toEqual([expectedResult]); }); - testCommitFileChanges.forEach((mockLog, index) => { - it(`should parse gitlog to commitRaw(file changed)`, () => { - const mock = `commit a b (HEAD) -Author: John Park -AuthorDate: Sun Sep 4 20:17:59 2022 +0900 -Commit: John Park -CommitDate: Sun Sep 4 20:17:59 2022 +0900 -\n\tcommit message -\n${mockLog} -`; - const result = getCommitRaws(mock); - const expectedResult = { ...commonExpectatedResult, differenceStatistic: expectedFileChanged[index] }; + it(`should parse gitlog to commitRaw(commit message body and file change)`, () => { + const mockLog = `${COMMIT_SEPARATOR}${fakeCommitHash}${fakeCommitRef}${fakeAuthorAndCommitter}${fakeCommitMessageAndBody}\n${fakeCommitFileChange}`; + const result = getCommitRaws(mockLog); + const expectedResult = { + ...commonExpectatedResult, + message: expectedCommitMessageBody, + differenceStatistic: expectedFileChange, + }; - expect(result).toEqual([expectedResult]); - }); + expect(result).toEqual([expectedResult]); }); }); diff --git a/packages/analysis-engine/src/parser.ts b/packages/analysis-engine/src/parser.ts index d2f3ef1f..eb17eb44 100644 --- a/packages/analysis-engine/src/parser.ts +++ b/packages/analysis-engine/src/parser.ts @@ -1,121 +1,92 @@ import { getCommitMessageType } from "./commit.util"; -import type { CommitMessageType, CommitRaw, DifferenceStatistic, GitUser } from "./types"; - -function getNameAndEmail(category: GitUser[], preParsedInfo: string) { - category.push({ - name: preParsedInfo.split(": ")[1].split("<")[0].trim(), - email: preParsedInfo.split(": ")[1].split("<")[1].split(">")[0].trim(), - }); -} +import { COMMIT_SEPARATOR, GIT_LOG_SEPARATOR } from "./constant"; +import type { CommitRaw } from "./types"; export default function getCommitRaws(log: string) { if (!log) return []; + const EOL_REGEX = /\r?\n/; - // line 별로 분리하기 - const splitByNewLine = log.split(/\r?\n/); - - // 분리한 것들을 쭉 돌면서 각 카테고리별로 담을 예정 - type Refs = string[]; - - const ids: string[] = []; - const parentsMatrix: string[][] = []; - const branchesMatrix: Refs[] = []; - const tagsMatrix: Refs[] = []; - const authors: GitUser[] = []; - const authorDates: Date[] = []; - const committers: GitUser[] = []; - const commitDates: Date[] = []; - const messages: string[] = []; - const commitTypes: CommitMessageType[] = []; - const differenceStatistics: DifferenceStatistic[] = []; - - // commit별 fileChanged를 분리시키기 위한 임시 index - let commitIdx = -1; - - if (splitByNewLine) { - splitByNewLine.forEach((str, idx) => { - if (str.startsWith("commit")) { - commitIdx += 1; - tagsMatrix.push([]); - branchesMatrix.push([]); - differenceStatistics.push({ - totalInsertionCount: 0, - totalDeletionCount: 0, - fileDictionary: {}, - }); - const splitedCommitLine = str.split("("); - const commitInfos = splitedCommitLine[0] - .replace("commit ", "") - .split(" ") - .filter((e) => e); - ids.push(commitInfos[0]); - commitInfos.splice(0, 1); - parentsMatrix.push(commitInfos); - const branchAndTagInfos = splitedCommitLine[1]?.replace(")", "").replace(" -> ", ", ").split(", "); - if (branchAndTagInfos) { - branchAndTagInfos.forEach((branchAndTagInfo) => { - if (branchAndTagInfo.startsWith("tag:")) - return tagsMatrix[commitIdx].push(branchAndTagInfo.replace("tag: ", "")); - return branchesMatrix[commitIdx].push(branchAndTagInfo); - }); - } - return false; - } - if (str.startsWith("Author:")) return getNameAndEmail(authors, str); - if (str.startsWith("AuthorDate")) return authorDates.push(new Date(str.split(": ")[1].trim())); - if (str.startsWith("Commit:")) return getNameAndEmail(committers, str); - if (str.startsWith("CommitDate")) { - let indexCheckFileChanged = idx + 2; - let eachCommitMessage = ""; - while (splitByNewLine[indexCheckFileChanged] !== "") { - if (eachCommitMessage !== "") { - eachCommitMessage += "\n"; - } - eachCommitMessage += splitByNewLine[indexCheckFileChanged].trim(); - indexCheckFileChanged += 1; + // step 0: Split log into commits + const commits = log.split(COMMIT_SEPARATOR); + const commitRaws: CommitRaw[] = []; + // skip the first empty element + for (let commitIdx = 1; commitIdx < commits.length; commitIdx += 1) { + // step 1: Extract commitData from the first line of the commit + const commitData = commits[commitIdx].split(GIT_LOG_SEPARATOR); + const [ + id, + parents, + refs, + authorName, + authorEmail, + authorDate, + committerName, + committerEmail, + committerDate, + message, + diffStats, + ] = commitData; + // Extract branch and tag data from refs + const refsArray = refs.replace(" -> ", ", ").split(", "); + const [branches, tags]: string[][] = refsArray.reduce( + ([branches, tags], ref) => { + if (ref === "") return [branches, tags]; + if (ref.startsWith("tag: ")) { + tags.push(ref.replace("tag: ", "")); + } else { + branches.push(ref); } - commitDates.push(new Date(str.split(": ")[1].trim())); - messages.push(eachCommitMessage); - commitTypes.push(getCommitMessageType(eachCommitMessage)); - } - if (/^\d/.test(str) || /^-/.test(str)) { - const [addition, deletion, path] = str - .split(" ") - .filter((e) => e)[0] - .split("\t"); + return [branches, tags]; + }, + [new Array(), new Array()] + ); - const numberedAddition = addition === "-" ? 0 : Number(addition); - const numberedDeletion = deletion === "-" ? 0 : Number(deletion); - differenceStatistics[commitIdx].totalInsertionCount += numberedAddition; - differenceStatistics[commitIdx].totalDeletionCount += numberedDeletion; - differenceStatistics[commitIdx].fileDictionary[path] = { - insertionCount: numberedAddition, - deletionCount: numberedDeletion, - }; - } - return false; - }); - } - - // 각 카테고리로 담은 다음 다시 JSON으로 변환하기 위함 - const commitRaws: CommitRaw[] = []; + // make base commitRaw object + const commitRaw: CommitRaw = { + sequence: commitIdx - 1, + id, + parents: parents.split(" "), + branches, + tags, + author: { + name: authorName, + email: authorEmail, + }, + authorDate: new Date(authorDate), + committer: { + name: committerName, + email: committerEmail, + }, + committerDate: new Date(committerDate), + message, + commitMessageType: getCommitMessageType(message), + differenceStatistic: { + totalInsertionCount: 0, + totalDeletionCount: 0, + fileDictionary: {}, + }, + }; - // 카테고리 별로 담은 것을 JSON화 시키기 - for (let i = 0; i < ids.length; i += 1) { - commitRaws.push({ - sequence: i, - id: ids[i], - parents: parentsMatrix[i], - branches: branchesMatrix[i], - tags: tagsMatrix[i], - author: authors[i], - authorDate: authorDates[i], - committer: committers[i], - committerDate: commitDates[i], - message: messages[i], - commitMessageType: commitTypes[i], - differenceStatistic: differenceStatistics[i], - }); + // step 2: Extract diffStats from the rest of the commit + if (!diffStats) { + commitRaws.push(commitRaw); + continue; + } + const diffStatsArray = diffStats.split(EOL_REGEX); + // pass the first empty element + for (let diffIdx = 1; diffIdx < diffStatsArray.length; diffIdx += 1) { + if (diffStatsArray[diffIdx] === "") continue; + const [insertions, deletions, path] = diffStatsArray[diffIdx].split("\t"); + const numberedInsertions = insertions === "-" ? 0 : Number(insertions); + const numberedDeletions = deletions === "-" ? 0 : Number(deletions); + commitRaw.differenceStatistic.totalInsertionCount += numberedInsertions; + commitRaw.differenceStatistic.totalDeletionCount += numberedDeletions; + commitRaw.differenceStatistic.fileDictionary[path] = { + insertionCount: numberedInsertions, + deletionCount: numberedDeletions, + }; + } + commitRaws.push(commitRaw); } return commitRaws; diff --git a/packages/vscode/src/utils/git.util.ts b/packages/vscode/src/utils/git.util.ts index 25a4ab22..d18bc8e2 100644 --- a/packages/vscode/src/utils/git.util.ts +++ b/packages/vscode/src/utils/git.util.ts @@ -1,3 +1,4 @@ +import { COMMIT_SEPARATOR, GIT_LOG_SEPARATOR } from "@githru-vscode-ext/analysis-engine"; import * as cp from "child_process"; import * as fs from "fs"; import * as path from "path"; @@ -154,6 +155,21 @@ export async function getGitExecutableFromPaths(paths: string[]): Promise { return new Promise((resolve, reject) => { + const gitLogFormat = + COMMIT_SEPARATOR + + [ + "%H", // commit hash (id) + "%P", // parent hashes + "%D", // ref names (branches, tags) + "%an", // author name + "%ae", // author email + "%ad", // author date + "%cn", + "%ce", + "%cd", // committer name, committer email and committer date + "%B", // commit message (subject and body) + ].join(GIT_LOG_SEPARATOR) + + GIT_LOG_SEPARATOR; const args = [ "--no-pager", "log", @@ -161,7 +177,7 @@ export async function getGitLog(gitPath: string, currentWorkspacePath: string): "--parents", "--numstat", "--date-order", - "--pretty=fuller", + `--pretty=format:${gitLogFormat}`, "--decorate", "-c", ];