Skip to content

Commit

Permalink
Handle invalid UTF-8 and a "DEL" ASCII code
Browse files Browse the repository at this point in the history
In user output, e.g. for debugging, and in test failure messages there
may be invalid UTF-8 or an ASCII DEL character.

Invalid UTF-8 stopped PHP from producing JSON completely. This was found
while working on Rotational Cipher, where ASCII characters are rotated
and might result in invalid UTF-8.

ASCII DEL character was also produced during that exercise and resulted
in wrongly displayed output vs. JSON. PHPUnit detects ASCII control
chars and turns output to "binary string" - but not for invisible DEL
character (0x7F), which consumes the following character on screen.
  • Loading branch information
mk-mxp committed Oct 1, 2024
1 parent 3b2bb08 commit 211300d
Show file tree
Hide file tree
Showing 7 changed files with 49 additions and 5 deletions.
18 changes: 16 additions & 2 deletions src/Result.php
Original file line number Diff line number Diff line change
Expand Up @@ -50,11 +50,25 @@ public function jsonSerialize(): mixed
}

if ($this->userOutput !== '') {
$result['output'] = $this->userOutput;
// In 2024 some innocent ASCII code still fools displays and editors.
$result['output'] = \str_replace(
[
"\u{7F}", // Delete
],
"\u{FFFD}", // Unicode substitute for invalid characters
$this->userOutput
);
}

if ($this->phpUnitMessage !== '') {
$result['message'] = $this->phpUnitMessage;
// In 2024 some innocent ASCII code still fools displays and editors.
$result['message'] = \str_replace(
[
"\u{7F}", // Delete
],
"\u{FFFD}", // Unicode substitute for invalid characters
$this->phpUnitMessage
);
}

return $result;
Expand Down
4 changes: 3 additions & 1 deletion src/Tracer.php
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,9 @@ private function saveResults(): void
$this->outFileName,
\json_encode(
$this->result,
self::DEBUG_PRETTY_JSON ? JSON_PRETTY_PRINT : 0
JSON_INVALID_UTF8_SUBSTITUTE | (
self::DEBUG_PRETTY_JSON ? JSON_PRETTY_PRINT : 0
)
) . "\n",
);
}
Expand Down
11 changes: 11 additions & 0 deletions tests/fail-with-invalid-chars-in-message/HelloWorld.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
<?php

function helloWorld()
{
$invalidChars = [
"\u{7F}", // Delete
"\xFF\xFF\xFF\xFF\xFF\xFF", // Invalid UTF-8
];

return 'Handle invalid chars: ' . implode(' ', $invalidChars) . '!';
}
16 changes: 16 additions & 0 deletions tests/fail-with-invalid-chars-in-message/HelloWorldTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
<?php

declare(strict_types=1);

class HelloWorldTest extends PHPUnit\Framework\TestCase
{
public static function setUpBeforeClass(): void
{
require_once 'HelloWorld.php';
}

public function testHelloWorld(): void
{
$this->assertEquals('Never matches', helloWorld());
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"version":3,"status":"fail","tests":[{"name":"Hello world","status":"fail","test_code":"$this->assertEquals('Never matches', helloWorld());\n","message":"HelloWorldTest::testHelloWorld\nFailed asserting that two strings are equal.\n--- Expected\n+++ Actual\n@@ @@\n-'Never matches'\n+'Handle invalid chars: \ufffd \ufffd\ufffd\ufffd\ufffd\ufffd\ufffd!'\n\nHelloWorldTest.php:14"}]}
2 changes: 1 addition & 1 deletion tests/success-with-user-output/HelloWorld.php
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

function helloWorld()
{
echo "Some 'user üâ`|| \r\toutput\n"
echo "Some 'user üâ`|| \u{7F} \r\toutput \xFF\xFF\xFF\xFF\xFF\xFF \n"
. 'containing \\ various "problematic" and UTF-8 chars' . PHP_EOL;
var_dump(new stdClass());

Expand Down
2 changes: 1 addition & 1 deletion tests/success-with-user-output/expected_results.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"version":3,"status":"pass","tests":[{"name":"Hello world","status":"pass","test_code":"$this->assertEquals('Hello, World!', helloWorld());\n","output":"Some 'user \u00fc\u00e2`|| \r\toutput\ncontaining \\ various \"problematic\" and UTF-8 chars\nobject(stdClass)#79 (0) {\n}\n"}]}
{"version":3,"status":"pass","tests":[{"name":"Hello world","status":"pass","test_code":"$this->assertEquals('Hello, World!', helloWorld());\n","output":"Some 'user \u00fc\u00e2`|| \ufffd \r\toutput \ufffd\ufffd\ufffd\ufffd\ufffd\ufffd \ncontaining \\ various \"problematic\" and UTF-8 chars\nobject(stdClass)#79 (0) {\n}\n"}]}

0 comments on commit 211300d

Please sign in to comment.