Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add utf8 string view range/iter #1047

Merged
merged 1 commit into from
May 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 8 additions & 14 deletions src/lib/fcitx-utils/inputbuffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,33 +71,27 @@ const std::string &InputBuffer::userInput() const {

bool InputBuffer::typeImpl(const char *s, size_t length) {
FCITX_D();
auto utf8Length = fcitx::utf8::lengthValidated(s, s + length);
std::string_view view(s, length);
auto utf8Length = fcitx::utf8::lengthValidated(view);
if (utf8Length == fcitx::utf8::INVALID_LENGTH) {
throw std::invalid_argument("Invalid UTF-8 string");
}
if (d->isAsciiOnly() && utf8Length != length) {
if (d->isAsciiOnly() && utf8Length != view.size()) {
throw std::invalid_argument(
"ascii only buffer only accept ascii only string");
}
if (d->maxSize_ && (utf8Length + size() > d->maxSize_)) {
return false;
}
d->input_.insert(std::next(d->input_.begin(), cursorByChar()), s,
s + length);
d->input_.insert(std::next(d->input_.begin(), cursorByChar()), view.begin(),
view.end());
if (!d->isAsciiOnly()) {
const auto *iter = s;
auto func = [&iter]() {
const auto *next = fcitx::utf8::nextChar(iter);
auto diff = std::distance(iter, next);
iter = next;
return diff;
};

auto pos = d->cursor_;
while (iter < s + length) {
d->sz_.insert(std::next(d->sz_.begin(), pos), func());
for (auto chrView : utf8::MakeUTF8StringViewRange(view)) {
d->sz_.insert(std::next(d->sz_.begin(), pos), chrView.size());
pos++;
}

d->acc_.resize(d->sz_.size() + 1);
auto newDirty = d->cursor_ > 0 ? d->cursor_ - 1 : 0;
if (d->accDirty_ > newDirty) {
Expand Down
71 changes: 71 additions & 0 deletions src/lib/fcitx-utils/utf8.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,10 @@
/// \file
/// \brief C++ Utility functions for handling utf8 strings.

#include <iterator>
#include <stdexcept>
#include <string>
#include <string_view>
#include <fcitx-utils/cutf8.h>
#include <fcitx-utils/misc.h>
#include "fcitxutils_export.h"
Expand Down Expand Up @@ -222,6 +224,75 @@ auto MakeUTF8CharRange(const T &str) {
MakeUTF8CharIterator(std::end(str), std::end(str)));
}

template <typename Iter>
class UTF8StringViewIter {
public:
using iterator_category = std::input_iterator_tag;
using value_type = std::string_view;
using difference_type = std::ptrdiff_t;
using reference = const value_type &;
using pointer = const value_type *;

UTF8StringViewIter(Iter iter, Iter end) : iter_(iter), end_(end) {
update();
}
FCITX_INLINE_DEFINE_DEFAULT_DTOR_AND_COPY(UTF8StringViewIter)

reference operator*() const { return currentView_; }

pointer operator->() const { return &currentView_; }

size_t charLength() const { return currentView_.size(); }

uint32_t chr() const { return currentChar_; }

UTF8StringViewIter &operator++() {
iter_ = next_;
update();
return *this;
}

UTF8StringViewIter operator++(int) {
auto old = *this;
++(*this);
return old;
}

bool operator==(const UTF8StringViewIter &other) {
return iter_ == other.iter_;
}
bool operator!=(const UTF8StringViewIter &other) {
return !operator==(other);
}

private:
void update() {
next_ = getNextChar(iter_, end_, &currentChar_);
if (iter_ != end_ && iter_ == next_) {
throw std::runtime_error("Invalid UTF8 character.");
}
currentView_ = std::string_view(&*iter_, std::distance(iter_, next_));
}

std::string_view currentView_;
uint32_t currentChar_ = 0;
Iter iter_;
Iter next_;
Iter end_;
};

template <typename Iter>
auto MakeUTF8StringViewIterator(Iter iter, Iter end) {
return UTF8StringViewIter<Iter>(iter, end);
}

template <typename T>
auto MakeUTF8StringViewRange(const T &str) {
return MakeIterRange(
MakeUTF8StringViewIterator(std::begin(str), std::end(str)),
MakeUTF8StringViewIterator(std::end(str), std::end(str)));
}

} // namespace fcitx::utf8

#endif // _FCITX_UTILS_UTF8_H_
8 changes: 8 additions & 0 deletions test/testutf8.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,14 @@ int main() {
FCITX_ASSERT(iter.view() == expectCharStr[i]);
}

auto rangeView = fcitx::utf8::MakeUTF8StringViewRange(str);
i = 0;
for (auto iter = std::begin(rangeView), end = std::end(rangeView);
iter != end; ++iter, ++i) {
FCITX_ASSERT(iter->size() == expectLength[i]);
FCITX_ASSERT(*iter == expectCharStr[i]);
}

FCITX_ASSERT(fcitx::utf8::getLastChar(str) == 0xa);

std::string invalidStr = "\xe4\xff";
Expand Down
Loading