Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Checking in my changes. #1

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
layoutex_env/*
src/layoutex/generated-OUTLINE/*
# Temporary and binary files
*~
*.py[cod]
Expand Down Expand Up @@ -63,3 +65,4 @@ venv/
*.idea
*.pydevproject

layoutex_env/bin/activate
31 changes: 31 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# layoutex

### Requirements

Download and decompress the PubLayNet `labels.tar.gz` here: https://dax-cdn.cdn.appdomain.cloud/dax-publaynet/1.0.0/labels.tar.gz

You can find all of the PubLayNet datasets here: https://developer.ibm.com/exchanges/data/all/publaynet/

Move the publaynet directory into `~/datasets/publaynet`, make a directory called `annotations` and copy the `val.json` into it.

### Running layoutex

To run, do the following:

```bash
# create the virtual environment
python3 -m venv layoutex_env

# activate it
source layoutex_env/bin/activate

# install requirements
pip install -e .
pip install -r requirements.txt

# run pytest
pytest -s tests/test_document_generator.py

# deactivate the venv when finished
deactivate
```
1 change: 1 addition & 0 deletions assets/datasets/publaynet/annotations/val.json

Large diffs are not rendered by default.

Binary file not shown.
21 changes: 21 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
python_barcode==0.14.0
codetiming==1.4.0
Faker==18.5.1
imgaug==0.4.0
importlib_metadata==4.6.4
matplotlib==3.6.3
numpy==1.24.1
#numpy==1.21.5
opencv_python==4.7.0.68
#Pillow==9.0.1
Pillow==9.5.0
qrcode==7.4.2
resize_image==0.4.0
seaborn==0.12.2
torch==1.13.1
torchvision==0.14.1
tqdm==4.64.1
typing_extensions==4.5.0
wandb==0.15.0
pytest==7.3.1
pytest-asyncio==0.21.0
49 changes: 49 additions & 0 deletions src/layoutex/component_util.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
"""
Contains document component utility functions.
"""

import logging
logger = logging.getLogger(__name__)

def estimate_component_sizing(box, target_size, margin_size):
"""
estimate the component sizing based on the bounding box size and the target size of the document including margins
"""
logger.debug(f"estimate_component_sizing(target_size={target_size}, margin_size={margin_size})")
x1, y1, x2, y2 = box
target_size = target_size - 2 * margin_size
logger.debug(f"computed target_size = {target_size}")
w = int(x2 - x1)
h = int(y2 - y1)
logger.debug(f"computed dimensions = (w={w}, h={h})")
ratio_w = w / target_size
ratio_h = h / target_size
logger.debug(f"computed ratios = (ratio_w={ratio_w}, ratio_h={ratio_h})")

component_w = "FULL_WIDTH"
component_h = "FULL_HEIGHT"

if ratio_w > 0.75:
component_w = "FULL_WIDTH"
elif ratio_w > 0.5:
component_w = "TWO_THIRDS_WIDTH"
elif ratio_w > 0.25:
component_w = "HALF_WIDTH"
elif ratio_w > 0.01:
component_w = "QUARTER_WIDTH"

logger.debug(f"computed component_w = {component_w}")

if ratio_h > 0.75:
component_h = "FULL_HEIGHT"
elif ratio_h > 0.25:
component_h = "HALF_HEIGHT"
elif ratio_h > 0.05:
component_h = "QUARTER_HEIGHT"
elif ratio_h > 0.01:
component_h = "LINE_HEIGHT"

logger.debug(f"computed component_h = {component_h}")

# logger.debug(f"estimated component sizing - ratio_w: {ratio_w}, ratio_h: {ratio_h} -> {component_w}, {component_h}")
return component_w, component_h
8 changes: 4 additions & 4 deletions src/layoutex/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,13 @@
noisy_dir = 'image'
debug_dir = 'debug'

patch_dir = '/home/greg/datasets/dataset/rms/pix2pix-overlay'
asset_dir = '/home/greg/datasets/dataset/rms/assets'
patch_dir = '/home/sstauffer/specops/gitlab/layoutex/assets/pix2pix-overlay'
asset_dir = '/home/sstauffer/specops/gitlab/layoutex/assets'

txt_file_dir = 'text.txt'
txt_file_dir = '/home/sstauffer/specops/words/words_alpha.txt'

# maximun number of synthetic words to generate
num_synthetic_imgs = 3000
num_synthetic_imgs = 100
train_percentage = 0.8

test_dir = os.path.join(data_dir, val_dir, noisy_dir)
Loading