From 7481fdc1e36816796899e50bc6f48ce24ddb2118 Mon Sep 17 00:00:00 2001 From: Josh Heyer Date: Tue, 24 Dec 2024 02:47:35 +0000 Subject: [PATCH 1/4] tag product root index files for PDF builds --- advocacy_docs/.gitignore | 2 ++ advocacy_docs/edb-postgres-ai/cloud-service/index.mdx | 2 +- product_docs/docs/.gitignore | 1 + product_docs/docs/edb_plus/41/index.mdx | 1 + product_docs/docs/efm/4/index.mdx | 1 + product_docs/docs/epas/11/index.mdx | 1 + product_docs/docs/epas/12/index.mdx | 1 + product_docs/docs/epas/13/index.mdx | 1 + product_docs/docs/epas/14/index.mdx | 1 + product_docs/docs/epas/15/index.mdx | 1 + product_docs/docs/epas/16/index.mdx | 1 + product_docs/docs/epas/17/index.mdx | 1 + product_docs/docs/eprs/7/index.mdx | 1 + product_docs/docs/hadoop_data_adapter/2/index.mdx | 2 +- product_docs/docs/jdbc_connector/42.7.3.2/index.mdx | 1 + product_docs/docs/language_pack/4/index.mdx | 1 + product_docs/docs/lasso/4/index.mdx | 1 + product_docs/docs/livecompare/1/index.mdx | 1 + product_docs/docs/livecompare/2/index.mdx | 1 + product_docs/docs/livecompare/3/index.mdx | 1 + product_docs/docs/migration_portal/4/index.mdx | 1 + product_docs/docs/migration_toolkit/55/index.mdx | 1 + product_docs/docs/mongo_data_adapter/5/index.mdx | 1 + product_docs/docs/mysql_data_adapter/2/index.mdx | 1 + product_docs/docs/net_connector/4.0.10.1/index.mdx | 1 + product_docs/docs/net_connector/4.0.10.2/index.mdx | 1 + product_docs/docs/net_connector/4.0.6.1/index.mdx | 1 + product_docs/docs/net_connector/4.1.3.1/index.mdx | 1 + product_docs/docs/net_connector/4.1.5.1/index.mdx | 1 + product_docs/docs/net_connector/4.1.6.1/index.mdx | 1 + product_docs/docs/net_connector/5.0.7.1/index.mdx | 1 + product_docs/docs/net_connector/6.0.2.1/index.mdx | 1 + product_docs/docs/net_connector/7.0.4.1/index.mdx | 1 + product_docs/docs/net_connector/7.0.6.1/index.mdx | 1 + product_docs/docs/net_connector/7.0.6.2/index.mdx | 1 + product_docs/docs/net_connector/8.0.2.1/index.mdx | 1 + product_docs/docs/net_connector/8.0.5.1/index.mdx | 1 + product_docs/docs/ocl_connector/12/index.mdx | 1 + product_docs/docs/ocl_connector/13/index.mdx | 1 + product_docs/docs/ocl_connector/14/index.mdx | 1 + product_docs/docs/ocl_connector/15/index.mdx | 1 + product_docs/docs/ocl_connector/16/index.mdx | 1 + product_docs/docs/ocl_connector/17/index.mdx | 1 + product_docs/docs/odbc_connector/12/index.mdx | 1 + product_docs/docs/odbc_connector/13/index.mdx | 1 + product_docs/docs/odbc_connector/16/index.mdx | 1 + product_docs/docs/pem/9/index.mdx | 1 + product_docs/docs/pgbouncer/1/index.mdx | 1 + product_docs/docs/pgd/3.7/index.mdx | 1 + product_docs/docs/pgd/4/index.mdx | 1 + product_docs/docs/pgd/5.6/index.mdx | 1 + product_docs/docs/pgd/5/index.mdx | 1 + product_docs/docs/pge/13/index.mdx | 1 + product_docs/docs/pge/14/index.mdx | 1 + product_docs/docs/pge/15/index.mdx | 1 + product_docs/docs/pge/16/index.mdx | 1 + product_docs/docs/pge/17/index.mdx | 1 + product_docs/docs/pgpool/4/index.mdx | 1 + product_docs/docs/postgis/3/index.mdx | 1 + .../docs/postgres_distributed_for_kubernetes/1/index.mdx | 1 + product_docs/docs/postgres_for_kubernetes/1/index.mdx | 2 +- product_docs/docs/pwr/1/index.mdx | 1 + product_docs/docs/repmgr/5.3.3/index.mdx | 1 + product_docs/docs/slony/2.2.10/index.mdx | 1 + product_docs/docs/tde/15/index.mdx | 2 +- product_docs/docs/tpa/23/index.mdx | 2 +- 66 files changed, 67 insertions(+), 5 deletions(-) create mode 100644 advocacy_docs/.gitignore diff --git a/advocacy_docs/.gitignore b/advocacy_docs/.gitignore new file mode 100644 index 00000000000..8790af26329 --- /dev/null +++ b/advocacy_docs/.gitignore @@ -0,0 +1,2 @@ +*/*/*.pdf +*/*/*.pdf-hash diff --git a/advocacy_docs/edb-postgres-ai/cloud-service/index.mdx b/advocacy_docs/edb-postgres-ai/cloud-service/index.mdx index 32c15a91985..774890247d0 100644 --- a/advocacy_docs/edb-postgres-ai/cloud-service/index.mdx +++ b/advocacy_docs/edb-postgres-ai/cloud-service/index.mdx @@ -18,7 +18,7 @@ redirects: - /purl/upm/home/ - /biganimal/latest/ - /biganimal/latest/overview/ - +pdf: true --- The EDB Postgres® AI Cloud Service is a holistic platform which includes hybrid data estate management, observability, analytics, and AI capabilities. diff --git a/product_docs/docs/.gitignore b/product_docs/docs/.gitignore index 6971327c529..8790af26329 100644 --- a/product_docs/docs/.gitignore +++ b/product_docs/docs/.gitignore @@ -1 +1,2 @@ */*/*.pdf +*/*/*.pdf-hash diff --git a/product_docs/docs/edb_plus/41/index.mdx b/product_docs/docs/edb_plus/41/index.mdx index 999c44dc2c5..8621eea5e35 100644 --- a/product_docs/docs/edb_plus/41/index.mdx +++ b/product_docs/docs/edb_plus/41/index.mdx @@ -15,6 +15,7 @@ legacyRedirectsGenerated: - "/edb-docs/d/edb-postgres-advanced-server/user-guides/database-compatibility-for-oracle-developers-tools-and-utilities-guide/9.6/DB_Compat_for_Oracle_Dev_Tools_Guide.1.05.html" - "/edb-docs/d/edb-postgres-advanced-server/user-guides/database-compatibility-for-oracle-developers-guide/9.5/Database_Compatibility_for_Oracle_Developers_Guide.1.304.html" - "/edb-docs/d/edb-postgres-advanced-server/user-guides/user-guide/9.5/EDB_Postgres_Enterprise_Guide.1.095.html" +pdf: true --- diff --git a/product_docs/docs/efm/4/index.mdx b/product_docs/docs/efm/4/index.mdx index 2188cbc417c..1ae0dc6bc30 100644 --- a/product_docs/docs/efm/4/index.mdx +++ b/product_docs/docs/efm/4/index.mdx @@ -41,6 +41,7 @@ legacyRedirectsGenerated: - "/edb-docs/d/edb-postgres-failover-manager/user-guides/user-guide/4.1/genindex.html" - "/edb-docs/d/edb-postgres-failover-manager/user-guides/user-guide/4.1/conclusion.html" - "/edb-docs/d/edb-postgres-failover-manager/user-guides/user-guide/4.1/index.html" +pdf: true --- diff --git a/product_docs/docs/epas/11/index.mdx b/product_docs/docs/epas/11/index.mdx index 21b407bbe41..ed92f692954 100644 --- a/product_docs/docs/epas/11/index.mdx +++ b/product_docs/docs/epas/11/index.mdx @@ -33,6 +33,7 @@ legacyRedirectsGenerated: # This list is generated by a script. If you need add entries, use the `legacyRedirects` key. - "/edb-docs/d/edb-postgres-advanced-server/installation-getting-started/release-notes/11/EPAS_Release_Notes.1.1.html" - "/edb-docs/p/edb-postgres-advanced-server/11" +pdf: true --- With EDB Postgres Advanced Server, EnterpriseDB leads as the only worldwide company to deliver innovative and low cost open-source-derived database solutions with commercial quality, ease of use, compatibility, scalability, and performance for small or large-scale enterprises. \ No newline at end of file diff --git a/product_docs/docs/epas/12/index.mdx b/product_docs/docs/epas/12/index.mdx index 2ff8356e75a..2a3b5f867ac 100644 --- a/product_docs/docs/epas/12/index.mdx +++ b/product_docs/docs/epas/12/index.mdx @@ -32,6 +32,7 @@ legacyRedirectsGenerated: # This list is generated by a script. If you need add entries, use the `legacyRedirects` key. - "/edb-docs/d/edb-postgres-advanced-server/installation-getting-started/release-notes/12/EPAS_Release_Notes.1.1.html" - "/edb-docs/p/edb-postgres-advanced-server/12" +pdf: true --- With EDB Postgres Advanced Server, EnterpriseDB leads as the only worldwide company to deliver innovative and low cost open-source-derived database solutions with commercial quality, ease of use, compatibility, scalability, and performance for small or large-scale enterprises. \ No newline at end of file diff --git a/product_docs/docs/epas/13/index.mdx b/product_docs/docs/epas/13/index.mdx index 4ab1053b26a..27cef9f5387 100644 --- a/product_docs/docs/epas/13/index.mdx +++ b/product_docs/docs/epas/13/index.mdx @@ -39,6 +39,7 @@ legacyRedirectsGenerated: - "/edb-docs/p/edbplus/39" - "/edb-docs/p/edbplus/38" - "/edb-docs/p/edbplus/36" +pdf: true --- With EDB Postgres Advanced Server, EnterpriseDB leads as the only worldwide company to deliver innovative and low cost open-source-derived database solutions with commercial quality, ease of use, compatibility, scalability, and performance for small or large-scale enterprises. diff --git a/product_docs/docs/epas/14/index.mdx b/product_docs/docs/epas/14/index.mdx index 3f24e2a23db..f08e7496650 100644 --- a/product_docs/docs/epas/14/index.mdx +++ b/product_docs/docs/epas/14/index.mdx @@ -43,6 +43,7 @@ legacyRedirectsGenerated: # This list is generated by a script. If you need add entries, use the `legacyRedirects` key. - "/edb-docs/p/edb-postgres-advanced-server/9.6" - "/edb-docs/p/edb-postgres-advanced-server/9.5" +pdf: true --- With EDB Postgres Advanced Server, we continue to lead as the only worldwide company to deliver innovative and low-cost, open-source-derived database solutions with commercial quality, ease of use, compatibility, scalability, and performance for small or large-scale enterprises. diff --git a/product_docs/docs/epas/15/index.mdx b/product_docs/docs/epas/15/index.mdx index 5e1ce45b138..c69467c1555 100644 --- a/product_docs/docs/epas/15/index.mdx +++ b/product_docs/docs/epas/15/index.mdx @@ -24,6 +24,7 @@ redirects: - epas_guide/ - epas_guide/03_database_administration/ - epas_guide/01_introduction/ +pdf: true --- EDB is the only worldwide company to deliver innovative and low-cost, open-source-derived database solutions. These solutions offer commercial quality, ease of use, compatibility, scalability, and performance for small or large-scale enterprises. EDB Postgres Advanced Server (sometimes referred to as EPAS in this documentation) adds extended functionality to open-source PostgreSQL, including: diff --git a/product_docs/docs/epas/16/index.mdx b/product_docs/docs/epas/16/index.mdx index a57540a542e..84490b84156 100644 --- a/product_docs/docs/epas/16/index.mdx +++ b/product_docs/docs/epas/16/index.mdx @@ -24,6 +24,7 @@ redirects: - epas_guide/ - epas_guide/03_database_administration/ - epas_guide/01_introduction/ +pdf: true --- EDB is the only worldwide company to deliver innovative and low-cost, open-source-derived database solutions. These solutions offer commercial quality, ease of use, compatibility, scalability, and performance for small or large-scale enterprises. EDB Postgres Advanced Server (sometimes referred to as EPAS in this documentation) adds extended functionality to open-source PostgreSQL, including: diff --git a/product_docs/docs/epas/17/index.mdx b/product_docs/docs/epas/17/index.mdx index b3ab8ce1f5c..62701bbaf07 100644 --- a/product_docs/docs/epas/17/index.mdx +++ b/product_docs/docs/epas/17/index.mdx @@ -24,6 +24,7 @@ redirects: - epas_guide/ - epas_guide/03_database_administration/ - epas_guide/01_introduction/ +pdf: true --- EDB is the only worldwide company to deliver innovative and low-cost, open-source-derived database solutions. These solutions offer commercial quality, ease of use, compatibility, scalability, and performance for small or large-scale enterprises. EDB Postgres Advanced Server (sometimes referred to as EPAS in this documentation) adds extended functionality to open-source PostgreSQL, including: diff --git a/product_docs/docs/eprs/7/index.mdx b/product_docs/docs/eprs/7/index.mdx index ee5cec2ec2a..218ec8a5957 100644 --- a/product_docs/docs/eprs/7/index.mdx +++ b/product_docs/docs/eprs/7/index.mdx @@ -10,6 +10,7 @@ navigation: - 02_overview - supported_platforms - installing +pdf: true --- EDB Postgres Replication Server (EPRS) replicates data between Postgres databases, or from non-Postgres databases to Postgres databases, diff --git a/product_docs/docs/hadoop_data_adapter/2/index.mdx b/product_docs/docs/hadoop_data_adapter/2/index.mdx index 370313b2fb0..7373e9563ea 100644 --- a/product_docs/docs/hadoop_data_adapter/2/index.mdx +++ b/product_docs/docs/hadoop_data_adapter/2/index.mdx @@ -21,7 +21,7 @@ navigation: - 10d_example_limit_offset_pushdown - "#Troubleshooting" - 10_identifying_data_adapter_version - +pdf: true --- The Hadoop Foreign Data Wrapper (`hdfs_fdw`) is a Postgres extension that allows you to access data that resides on a Hadoop file system from EDB Postgres Advanced Server. The foreign data wrapper makes the Hadoop file system a read-only data source that you can use with Postgres functions and utilities or with other data that resides on a Postgres host. diff --git a/product_docs/docs/jdbc_connector/42.7.3.2/index.mdx b/product_docs/docs/jdbc_connector/42.7.3.2/index.mdx index 59e60b90d46..3daf65e31fa 100644 --- a/product_docs/docs/jdbc_connector/42.7.3.2/index.mdx +++ b/product_docs/docs/jdbc_connector/42.7.3.2/index.mdx @@ -38,6 +38,7 @@ legacyRedirectsGenerated: - "/edb-docs/d/jdbc-connector/user-guides/jdbc-guide/42.2.12.1/whats_new.html" - "/edb-docs/d/jdbc-connector/user-guides/jdbc-guide/42.2.12.1/conclusion.html" - "/edb-docs/d/jdbc-connector/user-guides/jdbc-guide/42.2.12.1/index.html" +pdf: true --- The EDB JDBC Connector provides connectivity between a Java application and an EDB Postgres Advanced Server database. The EDB JDBC Connector is written in Java and conforms to Sun's JDK architecture. For more information, see [JDBC driver types](03_advanced_server_jdbc_connector_overview/#jdbc-driver-types) diff --git a/product_docs/docs/language_pack/4/index.mdx b/product_docs/docs/language_pack/4/index.mdx index d3c32dd323b..3f7be60e748 100644 --- a/product_docs/docs/language_pack/4/index.mdx +++ b/product_docs/docs/language_pack/4/index.mdx @@ -12,6 +12,7 @@ directoryDefaults: redirects: - /epas/latest/language_pack/ - 01_supported_database_server_versions/ +pdf: true --- Learn how to install, configure, and use the procedural languages (PL/Perl, PL/Python, and PL/Tcl). diff --git a/product_docs/docs/lasso/4/index.mdx b/product_docs/docs/lasso/4/index.mdx index 7ac214a661e..05022b463d3 100644 --- a/product_docs/docs/lasso/4/index.mdx +++ b/product_docs/docs/lasso/4/index.mdx @@ -11,6 +11,7 @@ navigation: - collected-data - describe - appendix-a +pdf: true --- EDB developed a small multi-platform application called Lasso. Lasso diff --git a/product_docs/docs/livecompare/1/index.mdx b/product_docs/docs/livecompare/1/index.mdx index 775ee129f5a..ccea887013a 100644 --- a/product_docs/docs/livecompare/1/index.mdx +++ b/product_docs/docs/livecompare/1/index.mdx @@ -1,6 +1,7 @@ --- title: LiveCompare originalFilePath: index.md +pdf: true --- LiveCompare v1 is no longer supported. Please consider upgrading to the latest [LiveCompare](/livecompare/latest/). diff --git a/product_docs/docs/livecompare/2/index.mdx b/product_docs/docs/livecompare/2/index.mdx index 6a532499509..d19a92c03d0 100644 --- a/product_docs/docs/livecompare/2/index.mdx +++ b/product_docs/docs/livecompare/2/index.mdx @@ -12,6 +12,7 @@ navigation: - licenses title: LiveCompare originalFilePath: index.md +pdf: true --- LiveCompare is designed to compare any number of databases to verify they're identical. The tool compares the databases and generates a comparison report, a list of differences, and handy DML scripts so you can optionally diff --git a/product_docs/docs/livecompare/3/index.mdx b/product_docs/docs/livecompare/3/index.mdx index 983abf0ff60..0d2034f76ef 100644 --- a/product_docs/docs/livecompare/3/index.mdx +++ b/product_docs/docs/livecompare/3/index.mdx @@ -13,6 +13,7 @@ navigation: - licenses title: LiveCompare originalFilePath: index.md +pdf: true --- LiveCompare is designed to compare any number of databases to verify they're identical. The tool compares the databases and generates a comparison report, a list of differences, and handy DML scripts so you can optionally diff --git a/product_docs/docs/migration_portal/4/index.mdx b/product_docs/docs/migration_portal/4/index.mdx index 634610b917e..61bf0848cdc 100644 --- a/product_docs/docs/migration_portal/4/index.mdx +++ b/product_docs/docs/migration_portal/4/index.mdx @@ -15,6 +15,7 @@ legacyRedirectsGenerated: - "/edb-docs/d/edb-postgres-migration-portal/user-guides/user-guide/3.0.1/conclusion.html" - "/edb-docs/d/edb-postgres-migration-portal/user-guides/user-guide/3.0.1/whats_new.html" - "/edb-docs/d/edb-postgres-migration-portal/user-guides/user-guide/3.0.1/genindex.html" +pdf: true --- diff --git a/product_docs/docs/migration_toolkit/55/index.mdx b/product_docs/docs/migration_toolkit/55/index.mdx index 971edc627b8..060002db4a2 100644 --- a/product_docs/docs/migration_toolkit/55/index.mdx +++ b/product_docs/docs/migration_toolkit/55/index.mdx @@ -16,6 +16,7 @@ navigation: - "#Troubleshooting" - 09_mtk_errors - 10_error_codes +pdf: true --- diff --git a/product_docs/docs/mongo_data_adapter/5/index.mdx b/product_docs/docs/mongo_data_adapter/5/index.mdx index 202a5c93e72..793f0f321ba 100644 --- a/product_docs/docs/mongo_data_adapter/5/index.mdx +++ b/product_docs/docs/mongo_data_adapter/5/index.mdx @@ -21,6 +21,7 @@ navigation: - 08c_example_where_pushdown - "#Troubleshooting" - 09_identifying_data_adapter_version +pdf: true --- The MongoDB Foreign Data Wrapper (`mongo_fdw`) is a Postgres extension that lets you access data that resides on a MongoDB database from EDB Postgres Advanced Server. It's a writable foreign data wrapper that you can use with Postgres functions and utilities or with other data that resides on a Postgres host. diff --git a/product_docs/docs/mysql_data_adapter/2/index.mdx b/product_docs/docs/mysql_data_adapter/2/index.mdx index 5db83d0f021..b794dd71a36 100644 --- a/product_docs/docs/mysql_data_adapter/2/index.mdx +++ b/product_docs/docs/mysql_data_adapter/2/index.mdx @@ -22,6 +22,7 @@ navigation: - "#Troubleshooting" - 11_identifying_data_adapter_version - 13_troubleshooting +pdf: true --- The MySQL Foreign Data Wrapper (`mysql_fdw`) is a Postgres extension that lets you access data that resides on a MySQL database from EDB Postgres Advanced Server. It's a writable foreign data wrapper that you can use with Postgres functions and utilities or with other data that resides on a Postgres host. diff --git a/product_docs/docs/net_connector/4.0.10.1/index.mdx b/product_docs/docs/net_connector/4.0.10.1/index.mdx index d2bf94255c1..51f022aaa7d 100644 --- a/product_docs/docs/net_connector/4.0.10.1/index.mdx +++ b/product_docs/docs/net_connector/4.0.10.1/index.mdx @@ -11,6 +11,7 @@ legacyRedirectsGenerated: - "/edb-docs/d/edb-postgres-net-connector/user-guides/net-guide/4.0.10.1/genindex.html" - "/edb-docs/d/edb-postgres-net-connector/user-guides/net-guide/4.0.10.1/whats_new.html" - "/edb-docs/d/edb-postgres-net-connector/user-guides/net-guide/4.0.10.1/index.html" +pdf: true --- diff --git a/product_docs/docs/net_connector/4.0.10.2/index.mdx b/product_docs/docs/net_connector/4.0.10.2/index.mdx index e8303b3449e..89cce9ef8ae 100644 --- a/product_docs/docs/net_connector/4.0.10.2/index.mdx +++ b/product_docs/docs/net_connector/4.0.10.2/index.mdx @@ -11,6 +11,7 @@ legacyRedirectsGenerated: - "/edb-docs/d/edb-postgres-net-connector/user-guides/net-guide/4.0.10.2/whats_new.html" - "/edb-docs/d/edb-postgres-net-connector/user-guides/net-guide/4.0.10.2/genindex.html" - "/edb-docs/d/edb-postgres-net-connector/user-guides/net-guide/4.0.10.2/index.html" +pdf: true --- The EDB .NET Connector distributed with EDB Postgres Advanced Server (Advanced Server) provides connectivity between a .NET client application and an Advanced Server database server. This guide provides installation instructions, usage instructions, and examples that demonstrate the functionality of the EDB .NET Connector: diff --git a/product_docs/docs/net_connector/4.0.6.1/index.mdx b/product_docs/docs/net_connector/4.0.6.1/index.mdx index 3637ba4af5a..2b856ec9845 100644 --- a/product_docs/docs/net_connector/4.0.6.1/index.mdx +++ b/product_docs/docs/net_connector/4.0.6.1/index.mdx @@ -11,6 +11,7 @@ legacyRedirectsGenerated: - "/edb-docs/p/edb-postgres-net-connector/4.0.6.1" - "/edb-docs/d/edb-postgres-net-connector/user-guides/net-guide/4.0.6.1/genindex.html" - "/edb-docs/d/edb-postgres-net-connector/user-guides/net-guide/4.0.6.1/index.html" +pdf: true --- diff --git a/product_docs/docs/net_connector/4.1.3.1/index.mdx b/product_docs/docs/net_connector/4.1.3.1/index.mdx index d1983bf1818..f5363261fa7 100644 --- a/product_docs/docs/net_connector/4.1.3.1/index.mdx +++ b/product_docs/docs/net_connector/4.1.3.1/index.mdx @@ -11,6 +11,7 @@ legacyRedirectsGenerated: - "/edb-docs/d/edb-postgres-net-connector/user-guides/net-guide/4.1.3.1/genindex.html" - "/edb-docs/d/edb-postgres-net-connector/user-guides/net-guide/4.1.3.1/conclusion.html" - "/edb-docs/d/edb-postgres-net-connector/user-guides/net-guide/4.1.3.1/index.html" +pdf: true --- diff --git a/product_docs/docs/net_connector/4.1.5.1/index.mdx b/product_docs/docs/net_connector/4.1.5.1/index.mdx index dad52c0d343..e2fac8fda32 100644 --- a/product_docs/docs/net_connector/4.1.5.1/index.mdx +++ b/product_docs/docs/net_connector/4.1.5.1/index.mdx @@ -11,6 +11,7 @@ legacyRedirectsGenerated: - "/edb-docs/d/edb-postgres-net-connector/user-guides/net-guide/4.1.5.1/genindex.html" - "/edb-docs/d/edb-postgres-net-connector/user-guides/net-guide/4.1.5.1/conclusion.html" - "/edb-docs/d/edb-postgres-net-connector/user-guides/net-guide/4.1.5.1/index.html" +pdf: true --- diff --git a/product_docs/docs/net_connector/4.1.6.1/index.mdx b/product_docs/docs/net_connector/4.1.6.1/index.mdx index cf1c0d2ff97..7bebbabc516 100644 --- a/product_docs/docs/net_connector/4.1.6.1/index.mdx +++ b/product_docs/docs/net_connector/4.1.6.1/index.mdx @@ -10,6 +10,7 @@ legacyRedirectsGenerated: - "/edb-docs/d/edb-postgres-net-connector/user-guides/net-guide/4.1.6.1/genindex.html" - "/edb-docs/d/edb-postgres-net-connector/user-guides/net-guide/4.1.6.1/whats_new.html" - "/edb-docs/p/edb-postgres-net-connector/4.1.6.1" +pdf: true --- The EDB .NET Connector distributed with EDB Postgres Advanced Server (Advanced Server) provides connectivity between a .NET client application and an Advanced Server database server. This guide provides installation instructions, usage instructions, and examples that demonstrate the functionality of the EDB .NET Connector: diff --git a/product_docs/docs/net_connector/5.0.7.1/index.mdx b/product_docs/docs/net_connector/5.0.7.1/index.mdx index c64ca407201..96856a8aa09 100644 --- a/product_docs/docs/net_connector/5.0.7.1/index.mdx +++ b/product_docs/docs/net_connector/5.0.7.1/index.mdx @@ -2,6 +2,7 @@ title: "EDB .NET Connector" directoryDefaults: description: "EDB .NET Connector Version 5.0.7.1 Documentation and release notes." +pdf: true --- The EDB .NET Connector distributed with EDB Postgres Advanced Server provides connectivity between a .NET client application and an EDB Postgres Advanced Server database server. You can: diff --git a/product_docs/docs/net_connector/6.0.2.1/index.mdx b/product_docs/docs/net_connector/6.0.2.1/index.mdx index d2c4ffbca2b..0e336b58c12 100644 --- a/product_docs/docs/net_connector/6.0.2.1/index.mdx +++ b/product_docs/docs/net_connector/6.0.2.1/index.mdx @@ -2,6 +2,7 @@ title: "EDB .NET Connector" directoryDefaults: description: "EDB .NET Connector Version 6.0.2.1 Documentation and release notes." +pdf: true --- The EDB .NET Connector distributed with EDB Postgres Advanced Server provides connectivity between a .NET client application and an EDB Postgres Advanced Server database server. You can: diff --git a/product_docs/docs/net_connector/7.0.4.1/index.mdx b/product_docs/docs/net_connector/7.0.4.1/index.mdx index 81bba2f40ec..12bab6247b1 100644 --- a/product_docs/docs/net_connector/7.0.4.1/index.mdx +++ b/product_docs/docs/net_connector/7.0.4.1/index.mdx @@ -2,6 +2,7 @@ title: "EDB .NET Connector" directoryDefaults: description: "EDB .NET Connector version 7.0.4.1 documentation and release notes." +pdf: true --- The EDB .NET Connector distributed with EDB Postgres Advanced Server provides connectivity between a .NET client application and an EDB Postgres Advanced Server database server. You can: diff --git a/product_docs/docs/net_connector/7.0.6.1/index.mdx b/product_docs/docs/net_connector/7.0.6.1/index.mdx index fdcdfafff77..b163617fbcf 100644 --- a/product_docs/docs/net_connector/7.0.6.1/index.mdx +++ b/product_docs/docs/net_connector/7.0.6.1/index.mdx @@ -2,6 +2,7 @@ title: "EDB .NET Connector" directoryDefaults: description: "EDB .NET Connector version 7.0.6.1 documentation and release notes." +pdf: true --- The EDB .NET Connector distributed with EDB Postgres Advanced Server provides connectivity between a .NET client application and an EDB Postgres Advanced Server database server. You can: diff --git a/product_docs/docs/net_connector/7.0.6.2/index.mdx b/product_docs/docs/net_connector/7.0.6.2/index.mdx index 21f9c63dc89..463008600b2 100644 --- a/product_docs/docs/net_connector/7.0.6.2/index.mdx +++ b/product_docs/docs/net_connector/7.0.6.2/index.mdx @@ -2,6 +2,7 @@ title: "EDB .NET Connector" directoryDefaults: description: "EDB .NET Connector version 7.0.6.2 documentation and release notes." +pdf: true --- The EDB .NET Connector distributed with EDB Postgres Advanced Server provides connectivity between a .NET client application and an EDB Postgres Advanced Server database server. You can: diff --git a/product_docs/docs/net_connector/8.0.2.1/index.mdx b/product_docs/docs/net_connector/8.0.2.1/index.mdx index 54903390cca..9080aaed7be 100644 --- a/product_docs/docs/net_connector/8.0.2.1/index.mdx +++ b/product_docs/docs/net_connector/8.0.2.1/index.mdx @@ -2,6 +2,7 @@ title: "EDB .NET Connector" directoryDefaults: description: "EDB .NET Connector version 8.0.2.1 documentation and release notes." +pdf: true --- The EDB .NET Connector distributed with EDB Postgres Advanced Server provides connectivity between a .NET client application and an EDB Postgres Advanced Server database server. You can: diff --git a/product_docs/docs/net_connector/8.0.5.1/index.mdx b/product_docs/docs/net_connector/8.0.5.1/index.mdx index 01dd6268d09..225651121ca 100644 --- a/product_docs/docs/net_connector/8.0.5.1/index.mdx +++ b/product_docs/docs/net_connector/8.0.5.1/index.mdx @@ -22,6 +22,7 @@ navigation: - 16_scram_compatibility - 17_advanced_server_net_connector_logging - 18_api_reference +pdf: true --- The EDB .NET Connector distributed with EDB Postgres Advanced Server provides connectivity between a .NET client application and an EDB Postgres Advanced Server database server. You can: diff --git a/product_docs/docs/ocl_connector/12/index.mdx b/product_docs/docs/ocl_connector/12/index.mdx index 96912547142..2d48d34b187 100644 --- a/product_docs/docs/ocl_connector/12/index.mdx +++ b/product_docs/docs/ocl_connector/12/index.mdx @@ -9,6 +9,7 @@ legacyRedirectsGenerated: - "/edb-docs/d/edb-postgres-ocl-connector/user-guides/ocl-guide/12.1.2.1/whats_new.html" - "/edb-docs/p/edb-postgres-ocl-connector/12.1.2.1" - "/edb-docs/d/edb-postgres-ocl-connector/user-guides/ocl-guide/12.1.2.1/index.html" +pdf: true --- The EDB OCL Connector provides an API similar to the Oracle Call Interface. Applications that are written to use the Oracle Call Interface may be recompiled using EDB's OCL connector in order to interact with an Advanced Server database server. diff --git a/product_docs/docs/ocl_connector/13/index.mdx b/product_docs/docs/ocl_connector/13/index.mdx index 96ab9b777f1..95423444afb 100644 --- a/product_docs/docs/ocl_connector/13/index.mdx +++ b/product_docs/docs/ocl_connector/13/index.mdx @@ -15,6 +15,7 @@ legacyRedirectsGenerated: - "/edb-docs/d/edb-postgres-ocl-connector/user-guides/ocl-guide/13.1.4.1/genindex.html" - "/edb-docs/p/edb-postgres-ocl-connector/13.1.4.1" - "/edb-docs/d/edb-postgres-ocl-connector/user-guides/ocl-guide/13.1.4.1/index.html" +pdf: true --- The EDB OCL Connector provides an API similar to the Oracle Call Interface. Applications that are written to use the Oracle Call Interface may be recompiled using EDB's OCL connector in order to interact with an Advanced Server database server. diff --git a/product_docs/docs/ocl_connector/14/index.mdx b/product_docs/docs/ocl_connector/14/index.mdx index 2686ecf6a18..cd6ad67313b 100644 --- a/product_docs/docs/ocl_connector/14/index.mdx +++ b/product_docs/docs/ocl_connector/14/index.mdx @@ -11,6 +11,7 @@ navigation: - 05_generating_the_ocl_trace - 06_using_ssl - 07_scram_compatibility +pdf: true --- The EDB OCL Connector provides an API similar to the Oracle Call Interface. You can use EDB's OCL Connector to compile applications that are written to use the Oracle Call Interface to interact with an EDB Postgres Advanced Server database server. diff --git a/product_docs/docs/ocl_connector/15/index.mdx b/product_docs/docs/ocl_connector/15/index.mdx index 3c8dcba4dcc..24b682a6123 100644 --- a/product_docs/docs/ocl_connector/15/index.mdx +++ b/product_docs/docs/ocl_connector/15/index.mdx @@ -11,6 +11,7 @@ navigation: - 05_generating_the_ocl_trace - 06_using_ssl - 07_scram_compatibility +pdf: true --- The EDB OCL Connector provides an API similar to the Oracle Call Interface. You can use EDB's OCL Connector to compile applications that are written to use the Oracle Call Interface to interact with an EDB Postgres Advanced Server database server. diff --git a/product_docs/docs/ocl_connector/16/index.mdx b/product_docs/docs/ocl_connector/16/index.mdx index 3c8dcba4dcc..24b682a6123 100644 --- a/product_docs/docs/ocl_connector/16/index.mdx +++ b/product_docs/docs/ocl_connector/16/index.mdx @@ -11,6 +11,7 @@ navigation: - 05_generating_the_ocl_trace - 06_using_ssl - 07_scram_compatibility +pdf: true --- The EDB OCL Connector provides an API similar to the Oracle Call Interface. You can use EDB's OCL Connector to compile applications that are written to use the Oracle Call Interface to interact with an EDB Postgres Advanced Server database server. diff --git a/product_docs/docs/ocl_connector/17/index.mdx b/product_docs/docs/ocl_connector/17/index.mdx index 320791a4102..9d69c9fdeba 100644 --- a/product_docs/docs/ocl_connector/17/index.mdx +++ b/product_docs/docs/ocl_connector/17/index.mdx @@ -11,6 +11,7 @@ navigation: - 05_generating_the_ocl_trace - 06_using_ssl - 07_scram_compatibility +pdf: true --- The EDB OCL Connector provides an API similar to the Oracle Call Interface. You can use EDB's OCL Connector to compile applications that are written to use the Oracle Call Interface to interact with an EDB Postgres Advanced Server database server. diff --git a/product_docs/docs/odbc_connector/12/index.mdx b/product_docs/docs/odbc_connector/12/index.mdx index b45dc2aeaa1..172e774f157 100644 --- a/product_docs/docs/odbc_connector/12/index.mdx +++ b/product_docs/docs/odbc_connector/12/index.mdx @@ -15,6 +15,7 @@ legacyRedirectsGenerated: - "/edb-docs/d/edb-postgres-odbc-connector/user-guides/odbc-guide/12.2.0.1/genindex.html" - "/edb-docs/p/edb-postgres-odbc-connector/12.2.0.1" - "/edb-docs/d/edb-postgres-odbc-connector/user-guides/odbc-guide/12.2.0.1/index.html" +pdf: true --- ODBC (Open Database Connectivity) is a programming interface that allows a client application to connect to any database that provides an ODBC driver. The EDB ODBC Connector provides connectivity between EDB Postgres Advanced Server (Advanced Server) and ODBC-compliant applications. diff --git a/product_docs/docs/odbc_connector/13/index.mdx b/product_docs/docs/odbc_connector/13/index.mdx index 532a1bbe899..d78d216e9f9 100644 --- a/product_docs/docs/odbc_connector/13/index.mdx +++ b/product_docs/docs/odbc_connector/13/index.mdx @@ -12,6 +12,7 @@ navigation: - 05_edb-odbc_connection_properties - 06_edb-odbc_driver_functionality - 07_scram_compatibility +pdf: true --- ODBC (Open Database Connectivity) is a programming interface that allows a client application to connect to any database that provides an ODBC driver. EDB ODBC Connector is an interface that allows an ODBC-compliant client application to connect to an EDB Postgres Advanced Server database. The EDB ODBC Connector allows an application that was designed to work with other databases to run on EDB Postgres Advanced Server. The ODBC Connector provides a way for the client application to establish a connection, send queries, and retrieve results from EDB Postgres Advanced Server. diff --git a/product_docs/docs/odbc_connector/16/index.mdx b/product_docs/docs/odbc_connector/16/index.mdx index a73d8756bf7..14e5d415e96 100644 --- a/product_docs/docs/odbc_connector/16/index.mdx +++ b/product_docs/docs/odbc_connector/16/index.mdx @@ -12,6 +12,7 @@ navigation: - 05_edb-odbc_connection_properties - 06_edb-odbc_driver_functionality - 07_scram_compatibility +pdf: true --- ODBC (Open Database Connectivity) is a programming interface that allows a client application to connect to any database that provides an ODBC driver. EDB ODBC Connector is an interface that allows an ODBC-compliant client application to connect to an EDB Postgres Advanced Server database. The EDB ODBC Connector allows an application that was designed to work with other databases to run on EDB Postgres Advanced Server. The ODBC Connector provides a way for the client application to establish a connection, send queries, and retrieve results from EDB Postgres Advanced Server. diff --git a/product_docs/docs/pem/9/index.mdx b/product_docs/docs/pem/9/index.mdx index 19db59efe42..0bdbf7408bd 100644 --- a/product_docs/docs/pem/9/index.mdx +++ b/product_docs/docs/pem/9/index.mdx @@ -68,6 +68,7 @@ redirects: - /pem/latest/pem_online_help/06_toc_pem_bart_management/06_scheduling_bart_obsolete_backups_deletion/ - /pem/latest/pem_online_help/06_toc_pem_bart_management/08_restoring_bart_backups/ - /pem/latest/pem_online_help/06_toc_pem_bart_management/ +pdf: true --- Welcome to Postgres Enterprise Manager (PEM). PEM consists of components that provide the management and analytical functionality for your EDB Postgres Advanced Server or PostgreSQL database. PEM is based on the Open Source pgAdmin 4 project. diff --git a/product_docs/docs/pgbouncer/1/index.mdx b/product_docs/docs/pgbouncer/1/index.mdx index 1e95ef262f5..df1716d7512 100644 --- a/product_docs/docs/pgbouncer/1/index.mdx +++ b/product_docs/docs/pgbouncer/1/index.mdx @@ -14,6 +14,7 @@ legacyRedirectsGenerated: - "/edb-docs/d/pgbouncer/user-guides/pgbouncer-guide/1.0/conclusion.html" - "/edb-docs/p/pgbouncer/1.0" - "/edb-docs/d/pgbouncer/user-guides/pgbouncer-guide/1.0/genindex.html" +pdf: true --- diff --git a/product_docs/docs/pgd/3.7/index.mdx b/product_docs/docs/pgd/3.7/index.mdx index 3a1d208deea..27e30341ed0 100644 --- a/product_docs/docs/pgd/3.7/index.mdx +++ b/product_docs/docs/pgd/3.7/index.mdx @@ -1,5 +1,6 @@ --- title: "EDB Postgres Distributed" +pdf: true --- EDB Postgres Distributed provides multi-master replication and data distribution with advanced conflict management, data-loss protection, and [throughput up to 5X faster than native logical replication](https://www.enterprisedb.com/blog/performance-improvements-edb-postgres-distributed), and enables distributed PostgreSQL clusters with high availability up to five 9s. diff --git a/product_docs/docs/pgd/4/index.mdx b/product_docs/docs/pgd/4/index.mdx index 6b0c9e090ad..147d7550f9b 100644 --- a/product_docs/docs/pgd/4/index.mdx +++ b/product_docs/docs/pgd/4/index.mdx @@ -25,6 +25,7 @@ navigation: - backup - monitoring - cli +pdf: true --- diff --git a/product_docs/docs/pgd/5.6/index.mdx b/product_docs/docs/pgd/5.6/index.mdx index 7e1a1c41d1b..68cd6d9e2d1 100644 --- a/product_docs/docs/pgd/5.6/index.mdx +++ b/product_docs/docs/pgd/5.6/index.mdx @@ -46,6 +46,7 @@ navigation: - tssnapshots - "#Reference" - reference +pdf: true --- diff --git a/product_docs/docs/pgd/5/index.mdx b/product_docs/docs/pgd/5/index.mdx index cc7c02dcb15..6f333ff00e9 100644 --- a/product_docs/docs/pgd/5/index.mdx +++ b/product_docs/docs/pgd/5/index.mdx @@ -40,6 +40,7 @@ navigation: - "#Reference" - reference - compatibility +pdf: true --- diff --git a/product_docs/docs/pge/13/index.mdx b/product_docs/docs/pge/13/index.mdx index 1cf6409d7b6..0a29d39f521 100644 --- a/product_docs/docs/pge/13/index.mdx +++ b/product_docs/docs/pge/13/index.mdx @@ -2,6 +2,7 @@ title: 2ndQuadrant Postgres/EDB Postgres Extended Server 13 navTitle: EDB Postgres Extended Server 13 indexCards: simple +pdf: true --- EDB Postgres Extended Server is a Postgres database server distribution built on open-source, community PostgreSQL. It's fully compatible with PostgreSQL. If you have applications written and tested to work with PostgreSQL, they will behave the same with EDB Postgres Extended Server. We will support and fix any functionality or behavior differences between community PostgreSQL and EDB Postgres Extended Server. diff --git a/product_docs/docs/pge/14/index.mdx b/product_docs/docs/pge/14/index.mdx index 7b91e43fd92..74349c9f3a7 100644 --- a/product_docs/docs/pge/14/index.mdx +++ b/product_docs/docs/pge/14/index.mdx @@ -2,6 +2,7 @@ title: EDB Postgres Extended Server 14 navTitle: EDB Postgres Extended Server 14 indexCards: simple +pdf: true --- EDB Postgres Extended Server is a Postgres database server distribution built on open-source, community PostgreSQL. It's fully compatible with PostgreSQL. If you have applications written and tested to work with PostgreSQL, they will behave the same with EDB Postgres Extended Server. We will support and fix any functionality or behavior differences between community PostgreSQL and EDB Postgres Extended Server. diff --git a/product_docs/docs/pge/15/index.mdx b/product_docs/docs/pge/15/index.mdx index 2380afb2ad8..81a463a59c5 100644 --- a/product_docs/docs/pge/15/index.mdx +++ b/product_docs/docs/pge/15/index.mdx @@ -13,6 +13,7 @@ navigation: - parameters - sql_features - operation +pdf: true --- EDB Postgres Extended Server is a Postgres database server distribution built on open-source, community PostgreSQL. It is fully compatible with PostgreSQL. If you have applications written and tested to work with PostgreSQL, they will behave the same with EDB Postgres Extended Server. We will support and fix any functionality or behavior differences between community PostgreSQL and EDB Postgres Extended Server. diff --git a/product_docs/docs/pge/16/index.mdx b/product_docs/docs/pge/16/index.mdx index daaeaa72cb7..791fb632921 100644 --- a/product_docs/docs/pge/16/index.mdx +++ b/product_docs/docs/pge/16/index.mdx @@ -18,6 +18,7 @@ navigation: - sql_features - operation indexCards: simple +pdf: true --- EDB Postgres Extended Server is a Postgres database server distribution built on open-source, community PostgreSQL. It's fully compatible with PostgreSQL. If you have applications written and tested to work with PostgreSQL, they will behave the same with EDB Postgres Extended Server. We will support and fix any functionality or behavior differences between community PostgreSQL and EDB Postgres Extended Server. diff --git a/product_docs/docs/pge/17/index.mdx b/product_docs/docs/pge/17/index.mdx index daaeaa72cb7..791fb632921 100644 --- a/product_docs/docs/pge/17/index.mdx +++ b/product_docs/docs/pge/17/index.mdx @@ -18,6 +18,7 @@ navigation: - sql_features - operation indexCards: simple +pdf: true --- EDB Postgres Extended Server is a Postgres database server distribution built on open-source, community PostgreSQL. It's fully compatible with PostgreSQL. If you have applications written and tested to work with PostgreSQL, they will behave the same with EDB Postgres Extended Server. We will support and fix any functionality or behavior differences between community PostgreSQL and EDB Postgres Extended Server. diff --git a/product_docs/docs/pgpool/4/index.mdx b/product_docs/docs/pgpool/4/index.mdx index 84bf70921ed..095b2eadc36 100644 --- a/product_docs/docs/pgpool/4/index.mdx +++ b/product_docs/docs/pgpool/4/index.mdx @@ -15,6 +15,7 @@ legacyRedirectsGenerated: - "/edb-docs/d/pgpool-ii/user-guides/pgpool-ii-guide/1.0/conclusion.html" - "/edb-docs/d/pgpool-ii/user-guides/pgpool-ii-guide/1.0/genindex.html" - "/edb-docs/p/pgpool-ii/1.0" +pdf: true --- diff --git a/product_docs/docs/postgis/3/index.mdx b/product_docs/docs/postgis/3/index.mdx index 1a14a2c64f7..2082a0b885c 100644 --- a/product_docs/docs/postgis/3/index.mdx +++ b/product_docs/docs/postgis/3/index.mdx @@ -14,6 +14,7 @@ legacyRedirectsGenerated: - "/edb-docs/d/edb-postgres-postgis/user-guides/postgis-guide/1.0/conclusion.html" - "/edb-docs/p/edb-postgres-postgis/1.0" - "/edb-docs/d/edb-postgres-postgis/user-guides/postgis-guide/1.0/genindex.html" +pdf: true --- EDB PostGIS is a PostgreSQL extension that allows you to store geographic information systems (GIS) objects in an EDB Postgres Advanced Server database. It includes functions for analyzing and processing GIS objects and support for GiST-based R-Tree spatial indexes. diff --git a/product_docs/docs/postgres_distributed_for_kubernetes/1/index.mdx b/product_docs/docs/postgres_distributed_for_kubernetes/1/index.mdx index ce5f62ebeb3..00826c93ea3 100644 --- a/product_docs/docs/postgres_distributed_for_kubernetes/1/index.mdx +++ b/product_docs/docs/postgres_distributed_for_kubernetes/1/index.mdx @@ -38,6 +38,7 @@ navigation: directoryDefaults: iconName: logos/KubernetesMono displayBanner: "We are in the process of migrating the documentation from the previous EDB Postgres Distributed for Kubernetes name to the new EDB CloudNativePG Global Cluster name. You may see references to both names in this documentation." +pdf: true --- EDB Postgres Distributed for Kubernetes (`PGD4K`) is an diff --git a/product_docs/docs/postgres_for_kubernetes/1/index.mdx b/product_docs/docs/postgres_for_kubernetes/1/index.mdx index b204be190e5..2bc05a6c09b 100644 --- a/product_docs/docs/postgres_for_kubernetes/1/index.mdx +++ b/product_docs/docs/postgres_for_kubernetes/1/index.mdx @@ -71,7 +71,7 @@ navigation: - backup_recovery - '#Appendix' - object_stores - +pdf: true --- The EDB Postgres for Kubernetes operator is a fork based on [CloudNativePG](https://cloudnative-pg.io). diff --git a/product_docs/docs/pwr/1/index.mdx b/product_docs/docs/pwr/1/index.mdx index e3d2ab747b2..7dc702f65c7 100644 --- a/product_docs/docs/pwr/1/index.mdx +++ b/product_docs/docs/pwr/1/index.mdx @@ -6,6 +6,7 @@ navigation: - configuring - using indexCards: simple +pdf: true --- Postgres Workload Report (PWR) is a Python-based tool used for building PostgreSQL workload reports in HTML, Markdown, DOCX, and PDF mode. These reports mimic the reports provided by the Automatic Workload Repository (AWR) reporting tool from Oracle. diff --git a/product_docs/docs/repmgr/5.3.3/index.mdx b/product_docs/docs/repmgr/5.3.3/index.mdx index f31112d44a6..ea8822b1654 100644 --- a/product_docs/docs/repmgr/5.3.3/index.mdx +++ b/product_docs/docs/repmgr/5.3.3/index.mdx @@ -3,6 +3,7 @@ navTitle: repmgr title: "Replication Manager (repmgr)" directoryDefaults: description: "Replication Manager (repmgr) is among the most popular open-source tools for managing replication and failover of PostgreSQL clusters. It relies on PostgreSQL’s streaming replication and hot standby capabilities allowing DBAs to simplify the process of setting up and managing clusters having high availability as well as read scalability requirements. repmgr is distributed under GNU GPL 3 and maintained by EDB." +pdf: true --- Replication Manager (repmgr) is among the most popular open-source tools for managing replication and failover of PostgreSQL clusters. It relies on PostgreSQL’s streaming replication and hot standby capabilities allowing DBAs to simplify the process of setting up and managing clusters having high availability as well as read scalability requirements. repmgr is distributed under GNU GPL 3 and maintained by EDB. diff --git a/product_docs/docs/slony/2.2.10/index.mdx b/product_docs/docs/slony/2.2.10/index.mdx index 4a4d8cfa928..30f322fb2b2 100644 --- a/product_docs/docs/slony/2.2.10/index.mdx +++ b/product_docs/docs/slony/2.2.10/index.mdx @@ -9,6 +9,7 @@ legacyRedirectsGenerated: - "/edb-docs/d/edb-postgres-slony-replication/user-guides/slony-replication-guide/1.0/conclusion.html" - "/edb-docs/p/edb-postgres-slony-replication/1.0" - "/edb-docs/d/edb-postgres-slony-replication/user-guides/slony-replication-guide/1.0/genindex.html" +pdf: true --- EDB Slony is built from Slony-I and used for replicating EDB Postgres Advanced Server data. It is a primary-standby logical replication solution that is well-suited for large databases with a limited number of standby systems. It shares data in one direction only (from a primary node to a standby node), is cascading (a standby node may pass data to another standby node), and does not allow data to be modified on standby nodes. diff --git a/product_docs/docs/tde/15/index.mdx b/product_docs/docs/tde/15/index.mdx index 16d366fa521..4048d2d56b9 100644 --- a/product_docs/docs/tde/15/index.mdx +++ b/product_docs/docs/tde/15/index.mdx @@ -17,7 +17,7 @@ navigation: - initdb_tde_options - pg_upgrade_arguments - limitations - +pdf: true --- diff --git a/product_docs/docs/tpa/23/index.mdx b/product_docs/docs/tpa/23/index.mdx index b2564eedc6a..7b6d1f0deb3 100644 --- a/product_docs/docs/tpa/23/index.mdx +++ b/product_docs/docs/tpa/23/index.mdx @@ -39,7 +39,7 @@ navigation: - reference title: Trusted Postgres Architect originalFilePath: index.md - +pdf: true --- © Copyright EnterpriseDB UK Limited 2015-2024 - All rights reserved. From 8e90568ee5deff8251d2bc2e8ade8e9037e41f81 Mon Sep 17 00:00:00 2001 From: Josh Heyer Date: Mon, 30 Dec 2024 16:12:59 +0000 Subject: [PATCH 2/4] pdf build workflows --- .github/workflows/build-pages.yml | 94 + .github/workflows/build-pdfs.yml | 91 + .github/workflows/deploy-draft.yml | 122 +- .github/workflows/deploy-to-netlify.yml | 45 + .github/workflows/sync-and-process-files.yml | 5 +- README.md | 2 +- docker/images/Dockerfile.pdf-builder | 13 +- gatsby-node.js | 7 + package.json | 5 +- scripts/pdf/build-all.sh | 16 + scripts/pdf/generate_pdf.py | 245 +- scripts/pdf/parallel | 10836 +++++++++++++++++ src/components/left-nav.js | 27 +- src/components/pdf-download.js | 65 +- src/templates/doc.js | 2 + src/templates/learn-doc.js | 2 + 16 files changed, 11336 insertions(+), 241 deletions(-) create mode 100644 .github/workflows/build-pages.yml create mode 100644 .github/workflows/build-pdfs.yml create mode 100644 .github/workflows/deploy-to-netlify.yml create mode 100755 scripts/pdf/build-all.sh create mode 100755 scripts/pdf/parallel diff --git a/.github/workflows/build-pages.yml b/.github/workflows/build-pages.yml new file mode 100644 index 00000000000..75e4e6e03d7 --- /dev/null +++ b/.github/workflows/build-pages.yml @@ -0,0 +1,94 @@ +name: Build pages +on: + workflow_dispatch: + inputs: + ref: + description: Ref name to build + required: true + type: string + sha: + description: SHA, should correspond to ref + required: true + type: string + workflow_call: + inputs: + ref: + description: Ref name to build + required: true + type: string + sha: + description: SHA, should correspond to ref + required: true + type: string + +jobs: + build: + runs-on: docs-16c-64gb-600gb + steps: + - uses: actions/checkout@v4 + with: + ref: ${{ inputs.sha }} + fetch-depth: 0 # fetch whole repo so git-restore-mtime can work + lfs: true + + - name: Adjust file watchers limit + run: echo fs.inotify.max_user_watches=524288 | sudo tee -a /etc/sysctl.conf && sudo sysctl -p + + - uses: actions/setup-node@v4 + with: + node-version-file: ".nvmrc" + cache: "npm" + cache-dependency-path: "package-lock.json" + env: + NODE_ENV: ${{ secrets.NODE_ENV }} + NPM_TOKEN: ${{ secrets.NPM_TOKEN }} + + - name: Install dependencies + run: | + npm run presetup + npm ci --ignore-scripts + env: + NODE_ENV: ${{ secrets.NODE_ENV }} + NPM_TOKEN: ${{ secrets.NPM_TOKEN }} + + - name: Run NPM install scripts + run: | + npm rebuild + + - name: Checking Gatsby cache + id: gatsby-cache-build + uses: actions/cache@v4 + with: + path: | + public/* + !public/pdfs + .cache + key: ${{ runner.os }}-gatsby-build-${{ hashFiles('package.json', 'gatsby-config.js', 'gatsby-node.js') }}-${{ inputs.ref }}-${{ inputs.sha }} + restore-keys: | + ${{ runner.os }}-gatsby-build-${{ hashFiles('package.json', 'gatsby-config.js', 'gatsby-node.js') }}-${{ inputs.ref }} + ${{ runner.os }}-gatsby-build-${{ hashFiles('package.json', 'gatsby-config.js', 'gatsby-node.js') }} + ${{ runner.os }}-gatsby-build + + - name: Fix mtimes + run: npm run fix-mtimes + + - name: Gatsby build + run: npm run build + env: + APP_ENV: staging + NODE_ENV: ${{ vars.NODE_ENV }} + NODE_OPTIONS: --max-old-space-size=4096 + FATHOM_SITE_ID: ${{ vars.FATHOM_SITE_ID }} + ALGOLIA_API_KEY: ${{ secrets.ALGOLIA_API_KEY }} + ALGOLIA_SEARCH_ONLY_KEY: ${{ vars.ALGOLIA_SEARCH_ONLY_KEY }} + ALGOLIA_APP_ID: ${{ vars.ALGOLIA_APP_ID }} + ALGOLIA_INDEX_NAME: edb-docs-staging + INDEX_ON_BUILD: ${{ inputs.ref == 'develop' }} + + - name: Store pages + uses: actions/upload-artifact@v4 + with: + name: pages-build + path: | + public/* + !public/pdfs diff --git a/.github/workflows/build-pdfs.yml b/.github/workflows/build-pdfs.yml new file mode 100644 index 00000000000..eeb26a6fb34 --- /dev/null +++ b/.github/workflows/build-pdfs.yml @@ -0,0 +1,91 @@ +name: Generate PDFs +on: + workflow_dispatch: + inputs: + ref: + description: Ref name to build + required: true + type: string + sha: + description: SHA, should correspond to ref + required: true + type: string + workflow_call: + inputs: + ref: + description: Ref name to build + required: true + type: string + sha: + description: SHA, should correspond to ref + required: true + type: string + +jobs: + build: + runs-on: docs-16c-64gb-600gb + steps: + - uses: actions/checkout@v4 + with: + ref: ${{ inputs.sha }} + lfs: true + + - uses: actions/setup-node@v4 + with: + node-version-file: ".nvmrc" + cache: "npm" + cache-dependency-path: "package-lock.json" + env: + NODE_ENV: ${{ vars.NODE_ENV }} + + - uses: actions/setup-python@v5 + with: + python-version: "3.11.6" + + - uses: r-lib/actions/setup-pandoc@v2 + with: + pandoc-version: "2.14.1" + + - name: Install wkhtmltopdf + run: | + curl -L https://github.com/wkhtmltopdf/packaging/releases/download/0.12.6.1-3/wkhtmltox_0.12.6.1-3.jammy_amd64.deb > wkhtmltopdf.deb + sudo apt update + sudo apt install -y ./wkhtmltopdf.deb + sudo apt install -y rsync + + - name: Install Python dependencies + run: pip install -r requirements-ci.txt + + - name: Install PDF Node dependencies + working-directory: ./scripts/pdf + run: | + npm install + + - name: Checking PDF cache + id: pdf-cache-build + uses: actions/cache@v4 + with: + path: | + product_docs/**/*.pdf + product_docs/**/*.pdf-hash + advocacy_docs/**/*.pdf + advocacy_docs/**/*.pdf-hash + key: ${{ runner.os }}-build-pdfs-${{ hashFiles('scripts/pdf/*', 'scripts/pdf/lib/*') }}-${{ inputs.sha }} + restore-keys: | + ${{ runner.os }}-build-pdfs-${{ hashFiles('scripts/pdf/*', 'scripts/pdf/lib/*') }} + + - name: Build all pdfs + run: npm run pdf:build-all-ci + + - name: Copy pdfs to build output + run: | + mkdir -p public/pdfs + shopt -s globstar + rsync -avm --filter="+ */" --filter="-! *.pdf" advocacy_docs/ public/pdfs/ + rsync -avm --filter="+ */" --filter="-! *.pdf" product_docs/docs/ public/pdfs/ + + - name: Store pdfs + uses: actions/upload-artifact@v4 + with: + name: pdf-build + path: public/* diff --git a/.github/workflows/deploy-draft.yml b/.github/workflows/deploy-draft.yml index 0f51d0a7fb1..78c7c8354d7 100644 --- a/.github/workflows/deploy-draft.yml +++ b/.github/workflows/deploy-draft.yml @@ -9,90 +9,50 @@ concurrency: cancel-in-progress: true jobs: - build-deploy: + build-pages: if: | ( (github.event.action == 'labeled' && github.event.label.name == 'deploy') || - (github.event.action != 'labeled' && contains(github.event.pull_request.labels.*.name, 'deploy')) + (contains(github.event.pull_request.labels.*.name, 'deploy')) ) && !github.event.pull_request.head.repo.fork - runs-on: docs-16c-64gb-600gb - steps: - - name: inject slug/short variables - uses: rlespinasse/github-slug-action@v4 - - - name: compose a name for the build environment - run: echo "BUILD_ENV_NAME=pr-${{ github.event.number }}-${{ env.GITHUB_HEAD_REF_SLUG }}" >> $GITHUB_ENV - - - uses: actions/checkout@v4 - with: - ref: ${{ github.event.pull_request.head.sha }} - fetch-depth: 0 # fetch whole repo so git-restore-mtime can work - lfs: true - - - name: Adjust file watchers limit - run: echo fs.inotify.max_user_watches=524288 | sudo tee -a /etc/sysctl.conf && sudo sysctl -p - - - uses: actions/setup-node@v4 - with: - node-version-file: ".nvmrc" - cache: "npm" - cache-dependency-path: "package-lock.json" - env: - NODE_ENV: ${{ secrets.NODE_ENV }} - NPM_TOKEN: ${{ secrets.NPM_TOKEN }} - - - name: Install dependencies - run: | - npm run presetup - npm ci --ignore-scripts - env: - NODE_ENV: ${{ secrets.NODE_ENV }} - NPM_TOKEN: ${{ secrets.NPM_TOKEN }} + uses: ./.github/workflows/build-pages.yml + with: + ref: ${{ github.head_ref }} + sha: ${{ github.event.pull_request.head.sha }} + secrets: inherit - - name: Run NPM install scripts + build-pdfs: + if: | + ( + (github.event.action == 'labeled' && github.event.label.name == 'deploy-pdfs') || + (contains(github.event.pull_request.labels.*.name, 'deploy-pdfs')) + ) && !github.event.pull_request.head.repo.fork + uses: ./.github/workflows/build-pdfs.yml + with: + ref: ${{ github.head_ref }} + sha: ${{ github.event.pull_request.head.sha }} + + # this serves to allow building PDFs to be optional without preventing deployment, + # while still failing the workflow if we *try* to build PDFs and fail + # there are ways to do this in conditions on a single job, but this keeps things clear + # this job will fail only if build-pdfs failed (not if it was skipped) + # however, it will set an output - check-has-pdfs.steps.check.has-pdfs - true if PDFs were generated + check-has-pdfs: + needs: build-pdfs + if: ${{ always() }} + runs-on: ubuntu-22.04 + steps: + - name: check run: | - npm rebuild - - - name: Checking Gatsby cache - id: gatsby-cache-build - uses: actions/cache@v4 - with: - path: | - public/* - !public/pdfs - .cache - key: ${{ runner.os }}-gatsby-build-draft-${{ github.head_ref }}-${{ hashFiles('package.json', 'gatsby-config.js', 'gatsby-node.js') }}-${{ github.sha }} - restore-keys: | - ${{ runner.os }}-gatsby-build-draft-${{ github.head_ref }} - ${{ runner.os }}-gatsby-build-develop-${{ hashFiles('package.json', 'gatsby-config.js', 'gatsby-node.js') }} - ${{ runner.os }}-gatsby-build-develop - - - name: Fix mtimes - run: npm run fix-mtimes - - - name: Gatsby build - run: npm run build - env: - APP_ENV: staging - NODE_ENV: ${{ vars.NODE_ENV }} - NODE_OPTIONS: --max-old-space-size=4096 - FATHOM_SITE_ID: ${{ vars.FATHOM_SITE_ID }} - ALGOLIA_API_KEY: ${{ secrets.ALGOLIA_API_KEY }} - ALGOLIA_SEARCH_ONLY_KEY: ${{ vars.ALGOLIA_SEARCH_ONLY_KEY }} - ALGOLIA_APP_ID: ${{ vars.ALGOLIA_APP_ID }} - ALGOLIA_INDEX_NAME: edb-docs-staging - INDEX_ON_BUILD: false - - - name: Deploy to Netlify - id: netlify-deploy - uses: nwtgck/actions-netlify@v3 - with: - publish-dir: "./public" - github-token: ${{ secrets.GITHUB_TOKEN }} - deploy-message: ${{ github.event.pull_request.title }} - enable-commit-comment: false - github-deployment-environment: ${{ env.BUILD_ENV_NAME }} - alias: deploy-preview-${{ github.event.number }} - env: - NETLIFY_AUTH_TOKEN: ${{ secrets.NETLIFY_AUTH_TOKEN }} - NETLIFY_SITE_ID: ${{ secrets.NETLIFY_DEVELOP_SITE_ID }} + echo "has-pdfs=${{ needs.build-pdfs.result == 'success' }}" >> "$GITHUB_OUTPUT" + exit ${{ needs.build-pdfs.result == 'failure' && 1 || 0 }} + + deploy-to-netlify: + needs: [build-pages, check-has-pdfs] + # succeeded() (the default check) will be false at this point if PDFs were skipped + if: ${{ !cancelled() && needs.build-pages.result == 'success' && needs.check-has-pdfs.result == 'success' }} + uses: ./.github/workflows/deploy-to-netlify.yml + with: + enable-commit-comment: false + alias: deploy-preview-${{ github.event.number }} + secrets: inherit diff --git a/.github/workflows/deploy-to-netlify.yml b/.github/workflows/deploy-to-netlify.yml new file mode 100644 index 00000000000..94c922effd9 --- /dev/null +++ b/.github/workflows/deploy-to-netlify.yml @@ -0,0 +1,45 @@ +name: Deploy build to Netlify +on: + workflow_dispatch: + workflow_call: + inputs: + enable-pull-request-comment: + description: whether to comment on the triggering PR + required: false + default: true + type: boolean + enable-commit-comment: + description: whether to comment on the triggering commit + required: false + default: true + type: boolean + alias: + description: name for the deployment (used to set custom / stable Netlify url) + required: false + type: string + +jobs: + deploy: + runs-on: ubuntu-22.04 + steps: + - name: Pull build artifacts + uses: actions/download-artifact@v4 + with: + path: ./public + merge-multiple: true + + - name: list + run: ls -lfR ./public + + - name: Deploy to Netlify + id: netlify-deploy + uses: nwtgck/actions-netlify@v3 + with: + publish-dir: "./public" + github-token: ${{ secrets.GITHUB_TOKEN }} + enable-pull-request-comment: ${{ inputs.enable-pull-request-comment }} + enable-commit-comment: ${{ inputs.enable-commit-comment }} + alias: ${{ inputs.alias }} + env: + NETLIFY_AUTH_TOKEN: ${{ secrets.NETLIFY_AUTH_TOKEN }} + NETLIFY_SITE_ID: ${{ secrets.NETLIFY_DEVELOP_SITE_ID }} diff --git a/.github/workflows/sync-and-process-files.yml b/.github/workflows/sync-and-process-files.yml index 56d6fdd7c21..0dd64317912 100644 --- a/.github/workflows/sync-and-process-files.yml +++ b/.github/workflows/sync-and-process-files.yml @@ -67,10 +67,7 @@ jobs: - name: setup node uses: actions/setup-node@v4 with: - node-version: "18" - - - name: update npm - run: npm install -g npm@10 + node-version-file: "destination/.nvmrc" - name: Process changes id: changes diff --git a/README.md b/README.md index 500dd298a52..3dc26d33c76 100644 --- a/README.md +++ b/README.md @@ -120,7 +120,7 @@ These instructions are for members of the EnterpriseDB Github Org only. The icon - If you already have a different version of Node installed, you may want to consider using Node Version Manager (NVM) for a simpler way to manage multiple versions of Node.js. Follow the [directions to install NVM](https://github.com/nvm-sh/nvm#installing-and-updating), then run `nvm install` in the cloned repo directory, followed by `nvm use` which will auto-detect the correct version of Node.js to use (currently 20 LTS). -1. Install Python 3 with `brew install python3`, if it's not already installed. (Use `python3 -V` to check that you have version 3.8 or higher.) Python is not needed for the core Gatsby system, but is required by several source scripts. +1. Install Python 3 with `brew install python3`, if it's not already installed. (Use `python3 -V` to check that you have version 3.11 or higher.) Python is not needed for the core Gatsby system, but is required by several source scripts. 1. NPM 10 is the package manager we're using for this project. diff --git a/docker/images/Dockerfile.pdf-builder b/docker/images/Dockerfile.pdf-builder index 280161f8797..2eb30b5627e 100644 --- a/docker/images/Dockerfile.pdf-builder +++ b/docker/images/Dockerfile.pdf-builder @@ -1,16 +1,13 @@ -FROM node:bullseye as base +FROM node:bookworm as base ARG TARGETARCH ARG PANDOC_VERSION=2.14.1 ARG PANDOC_DEB=pandoc-${PANDOC_VERSION}-1-${TARGETARCH}.deb -ARG WKHTML_TO_PDF_VERSION=0.12.6.1-2 -ARG WKHTML_TO_PDF_DEB=wkhtmltox_${WKHTML_TO_PDF_VERSION}.bullseye_${TARGETARCH}.deb +ARG WKHTML_TO_PDF_VERSION=0.12.6.1-3 +ARG WKHTML_TO_PDF_DEB=wkhtmltox_${WKHTML_TO_PDF_VERSION}.bookworm_${TARGETARCH}.deb RUN echo "Building for ${TARGETARCH}" -# Install Node dependencies -RUN npm install -g npm@7 - WORKDIR /app FROM base as dependencies @@ -22,7 +19,7 @@ FROM base as renderer COPY --from=dependencies /app/node_modules /app/node_modules -# Install Python3 +# Install Python3 & tools RUN apt-get update && apt-get install --no-install-recommends -y \ python3 \ @@ -48,5 +45,5 @@ RUN wget --quiet --no-check-certificate -P /tmp https://github.com/wkhtmltopdf/p # Install python dependencies COPY requirements-ci.txt . -RUN pip install --no-cache-dir -r requirements-ci.txt +RUN pip install --no-cache-dir --break-system-packages -r requirements-ci.txt diff --git a/gatsby-node.js b/gatsby-node.js index b98f6ae9457..e65ed71f369 100644 --- a/gatsby-node.js +++ b/gatsby-node.js @@ -209,6 +209,8 @@ exports.createPages = async ({ actions, graphql, reporter }) => { } hideVersion hidePDF + pdfExclude + pdf displayBanner directoryDefaults { description @@ -221,6 +223,8 @@ exports.createPages = async ({ actions, graphql, reporter }) => { showInteractiveBadge hideKBLink hideVersion + hidePDF + pdfExclude displayBanner } } @@ -556,6 +560,8 @@ exports.createSchemaCustomization = ({ actions }) => { katacodaPanel: DemoPanel hideVersion: Boolean hidePDF: Boolean + pdfExclude: String + pdf: Boolean hideKBLink: Boolean displayBanner: String directoryDefaults: DirectoryDefaults @@ -596,6 +602,7 @@ exports.createSchemaCustomization = ({ actions }) => { showInteractiveBadge: Boolean hideVersion: Boolean hidePDF: Boolean + pdfExclude: Boolean hideKBLink: Boolean displayBanner: String } diff --git a/package.json b/package.json index fc29bc39706..67f346ed622 100644 --- a/package.json +++ b/package.json @@ -24,8 +24,9 @@ "links:rebuild-docker-image": "docker compose -f docker/docker-compose.check-links.yaml build --pull --no-cache", "logs": "docker compose -f docker/docker-compose.quickstart.yaml logs -f", "pdf:build": "docker compose -f docker/docker-compose.build-pdf.yaml run --rm --entrypoint scripts/pdf/generate_pdf.py docs-pdf-builder", - "pdf:build-all": "for i in product_docs/docs/**/*/ ; do echo \"$i\"; npm run pdf:build ${i%} || exit 1; done", - "pdf:build-all-ci": "for i in product_docs/docs/**/*/ ; do echo \"$i\"; python3 scripts/pdf/generate_pdf.py ${i%} || exit 1; done", + "pdf:build-all": "docker compose -f docker/docker-compose.build-pdf.yaml run --rm docs-pdf-builder scripts/pdf/build-all.sh .", + "pdf:build-all-ci": "./scripts/pdf/build-all.sh .", + "pdf:clean": "yes | rm product_docs/docs/*/*/*.pdf product_docs/docs/*/*/*.pdf-hash advocacy_docs/*/*/*.pdf advocacy_docs/*/*/*.pdf-hash", "pdf:rebuild-docker-image": "docker compose -f docker/docker-compose.build-pdf.yaml build --pull --no-cache", "postinstall": "patch-package", "prepare": "./scripts/husky-install.sh", diff --git a/scripts/pdf/build-all.sh b/scripts/pdf/build-all.sh new file mode 100755 index 00000000000..7cd0da9bb50 --- /dev/null +++ b/scripts/pdf/build-all.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +if [ -z $1 ] || ! [ -d $1/product_docs/docs ] +then + echo "provide the path to the content root directory (should contain product_docs/docs and advocacy_docs subdirectories)" + exit 1 +fi + +content_root=$1 +pdf_script_dir=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) + +find $content_root '(' -wholename './product_docs/*/index.mdx' \ + -o -wholename './advocacy_docs/*/index.mdx' ')' \ + -exec grep -q 'pdf: true' {} ';' -print0 \ + | $pdf_script_dir/parallel --halt soon,fail=1 --memfree=3G -P 3 -0 \ + python3 $pdf_script_dir/generate_pdf.py {//} diff --git a/scripts/pdf/generate_pdf.py b/scripts/pdf/generate_pdf.py index 8ca46c028f4..12d6d9cbc02 100755 --- a/scripts/pdf/generate_pdf.py +++ b/scripts/pdf/generate_pdf.py @@ -8,6 +8,7 @@ from subprocess import run from typing import List import frontmatter +import hashlib BASE_DIR = Path(__file__).resolve().parent @@ -17,142 +18,152 @@ def main(args): - doc_path, product, version, mdx_file, html_file, cover_file, pdf_file = setup(args) + doc_path, index_meta, product, version, mdx_file, html_file, cover_file, pdf_file, pdf_hash_file = setup(args) - files = list_files(doc_path) + files = list_files(doc_path, index_meta) if len(files) == 0: print(f"{ANSI_YELLOW}skipping {pdf_file}{ANSI_STOP}") return - print(f"{ANSI_BLUE}building {pdf_file}{ANSI_STOP}") + print(f"{ANSI_BLUE}building {pdf_file}{ANSI_STOP}", flush=True) with open(mdx_file, "w") as output: resource_search_paths = {doc_path, *combine_mdx(files, output)} - output = run( - [ - "node", - "--max-old-space-size=4096", - BASE_DIR / "cleanup_combined_markdown.mjs", - mdx_file - ] - ) - output.check_returncode() - - # TODO: Pick up refactoring from this point - title = get_title(doc_path) or product - - print("generating docs html") - output = run( - [ - "pandoc", - mdx_file, - "--from=gfm", - "--self-contained", - "--highlight-style=tango", - f"--include-in-header={BASE_DIR / 'pdf-script.html'}", - f"--css={BASE_DIR / 'pdf-styles.css'}", - f"--resource-path={':'.join((str(p) for p in resource_search_paths))}", - f"--output={html_file}", - ] - ) - output.check_returncode() - - if not html_file.exists(): + if hash_check(pdf_file, pdf_hash_file, mdx_file): mdx_file.unlink() - raise Exception(f"\033[91m html file failed to generate for {mdx_file} \033[0m") + print(f"{ANSI_YELLOW}skipping {pdf_file} - content unchanged{ANSI_STOP}") + return + + try: + + output = run( + [ + "node", + "--max-old-space-size=4096", + BASE_DIR / "cleanup_combined_markdown.mjs", + mdx_file + ] + ) + output.check_returncode() - if args.generate_html_only: - run(["open", html_file]) - else: - print("generating cover page") - data = (BASE_DIR / "cover.html").read_text() - data = data.replace("[PRODUCT]", title) - data = data.replace("[VERSION]", version) - cover_file.write_text(data) - - header_footer_common = [ - "--header-font-name", - "Signika", - "--header-font-size", - "8", - "--header-spacing", - "7", - "--footer-font-name", - "Signika", - "--footer-font-size", - "8", - "--footer-spacing", - "7", - "--footer-left", - f"Copyright © 2009 - {datetime.now().year} EnterpriseDB Corporation. " - "All rights reserved.", - ] - - header_footer_options = [ - "--header-right", - "[doctitle]", - "--footer-right", - "[page]", - ] - - print("converting html to pdf") + # TODO: Pick up refactoring from this point + title = get_title(doc_path) or product + + print("generating docs html") output = run( [ - "wkhtmltopdf", - "--log-level", - "error", - "--title", - title, - "--margin-top", - "15mm", - "--margin-bottom", - "15mm", - *header_footer_common, - cover_file, - "--footer-right", - f"Built at {datetime.utcnow().replace(microsecond=0).isoformat()}", - "toc", - "--xsl-style-sheet", - "scripts/pdf/toc-style.xsl", - *header_footer_options, - html_file, - *header_footer_options, - pdf_file, + "pandoc", + mdx_file, + "--from=gfm", + "--self-contained", + "--highlight-style=tango", + f"--include-in-header={BASE_DIR / 'pdf-script.html'}", + f"--css={BASE_DIR / 'pdf-styles.css'}", + f"--resource-path={':'.join((str(p) for p in resource_search_paths))}", + f"--output={html_file}", ] ) output.check_returncode() - if args.open_pdf: - run(["open", pdf_file]) + if not html_file.exists(): + mdx_file.unlink() + raise Exception(f"\033[91m html file failed to generate for {mdx_file} \033[0m") + + if args.generate_html_only: + run(["open", html_file]) + else: + print("generating cover page") + data = (BASE_DIR / "cover.html").read_text() + data = data.replace("[PRODUCT]", title) + data = data.replace("[VERSION]", version) + cover_file.write_text(data) + + header_footer_common = [ + "--header-font-name", + "Signika", + "--header-font-size", + "8", + "--header-spacing", + "7", + "--footer-font-name", + "Signika", + "--footer-font-size", + "8", + "--footer-spacing", + "7", + "--footer-left", + f"Copyright © 2009 - {datetime.now().year} EnterpriseDB Corporation. " + "All rights reserved.", + ] + + header_footer_options = [ + "--header-right", + "[doctitle]", + "--footer-right", + "[page]", + ] + + print("converting html to pdf") + output = run( + [ + "wkhtmltopdf", + "--log-level", + "error", + "--title", + title, + "--margin-top", + "15mm", + "--margin-bottom", + "15mm", + *header_footer_common, + cover_file, + "--footer-right", + f"Built at {datetime.utcnow().replace(microsecond=0).isoformat()}", + "toc", + "--xsl-style-sheet", + "scripts/pdf/toc-style.xsl", + *header_footer_options, + html_file, + *header_footer_options, + pdf_file, + ] + ) + output.check_returncode() + + if args.open_pdf: + run(["open", pdf_file]) - mdx_file.unlink() - if not args.generate_html_only: - html_file.unlink() - cover_file.unlink() + finally: + mdx_file.unlink(missing_ok=True) + if not args.generate_html_only: + html_file.unlink(missing_ok=True) + cover_file.unlink(missing_ok=True) def setup(args): doc_path = args.doc_path - product = doc_path.parts[-2] if len(doc_path.parts) >= 4 else doc_path.parts[-1] - version = doc_path.parts[-1] if len(doc_path.parts) >= 4 else '' + index_meta = load_index_metadata(doc_path) + product = index_meta.get("product", None) or doc_path.parts[-2] if len(doc_path.parts) >= 4 else doc_path.parts[-1] + version = index_meta.get("version", None) or doc_path.parts[-1] if len(doc_path.parts) >= 4 else '' - _file_prefix = f"{product}_v{version}_documentation" + _file_prefix = product + (version and "_v" + version or "") + "_documentation" mdx_file = doc_path / f"{_file_prefix}.mdx" html_file = doc_path / f"{_file_prefix}.html" cover_file = doc_path / f"{_file_prefix}_cover.html" pdf_file = doc_path / f"{_file_prefix}.pdf" + pdf_hash_file = doc_path / f"{_file_prefix}.pdf-hash" for f in [mdx_file, html_file, cover_file]: f.unlink(missing_ok=True) - return doc_path, product, version, mdx_file, html_file, cover_file, pdf_file + return doc_path, index_meta, product, version, mdx_file, html_file, cover_file, pdf_file, pdf_hash_file -def list_files(doc_path, chapter=None, is_root_dir=True): +def list_files(doc_path, index_meta, chapter=None, is_root_dir=True): chapter = [1] if chapter is None else chapter all_files = [] - nav_order = load_nav_index(doc_path) + nav_order = index_meta.get("navigation", {}) directory_contents = sorted( filter(filter_path, doc_path.iterdir()), key=lambda file: put_index_first(file, nav_order) ) @@ -171,10 +182,29 @@ def list_files(doc_path, chapter=None, is_root_dir=True): if entry.is_file(): all_files.append(TocItem(filename=entry, chapter=chapter)) else: - all_files += list_files(entry, chapter, is_root_dir=False) + all_files += list_files(entry, load_index_metadata(entry), chapter, is_root_dir=False) return all_files +def hash_check(pdf_file, pdf_hash_file, mdx_file): + try: + existingHash = "a" + newHash = "b" + + if pdf_hash_file.exists(): + existingHash = pdf_hash_file.read_text() + with mdx_file.open(mode="rb") as output: + newHash = hashlib.file_digest(output, "md5").hexdigest() + + if pdf_file.exists() and newHash == existingHash: + return True + + pdf_file.unlink() + pdf_hash_file.write_text(newHash) + return False + + except (FileNotFoundError): + return False def combine_mdx(files, output): resource_search_paths = set() @@ -238,12 +268,13 @@ def parse_mdx(mdx_file): if c == '\n': lines_before_content += 1 return front_matter.strip(), content.strip(), lines_before_content -def load_nav_index(path): +def load_index_metadata(path): try: - index = frontmatter.load(path / "index.mdx")["navigation"] - return index - except (FileNotFoundError, KeyError): - return None + indexMeta = frontmatter.load(path / "index.mdx") + return indexMeta + except (FileNotFoundError): + return {} + def put_index_first(path, nav_order): filename = path.name if path.suffix != ".mdx" else path.stem diff --git a/scripts/pdf/parallel b/scripts/pdf/parallel new file mode 100755 index 00000000000..4226fd5a6f9 --- /dev/null +++ b/scripts/pdf/parallel @@ -0,0 +1,10836 @@ +#!/usr/bin/env perl + +# Copyright (C) 2007,2008,2009,2010,2011,2012,2013,2014,2015,2016 +# Ole Tange and Free Software Foundation, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, see +# or write to the Free Software Foundation, Inc., 51 Franklin St, +# Fifth Floor, Boston, MA 02110-1301 USA + +# open3 used in Job::start +use IPC::Open3; +# &WNOHANG used in reaper +use POSIX qw(:sys_wait_h setsid ceil :errno_h); +# gensym used in Job::start +use Symbol qw(gensym); +# tempfile used in Job::start +use File::Temp qw(tempfile tempdir); +# mkpath used in openresultsfile +use File::Path; +# GetOptions used in get_options_from_array +use Getopt::Long; +# Used to ensure code quality +use strict; +use File::Basename; + +save_stdin_stdout_stderr(); +save_original_signal_handler(); +parse_options(); +::debug("init", "Open file descriptors: ", join(" ",keys %Global::fd), "\n"); +my $number_of_args; +if($Global::max_number_of_args) { + $number_of_args = $Global::max_number_of_args; +} elsif ($opt::X or $opt::m or $opt::xargs) { + $number_of_args = undef; +} else { + $number_of_args = 1; +} + +my @command = @ARGV; + +my @input_source_fh; +if($opt::pipepart) { + # -a is used for data - not for command line args + @input_source_fh = map { open_or_exit($_) } "/dev/null"; +} else { + @input_source_fh = map { open_or_exit($_) } @opt::a; + if(not @input_source_fh and not $opt::pipe) { + @input_source_fh = (*STDIN); + } +} +if($opt::sqlmaster) { + # Create SQL table to hold joblog + output + $Global::sql->create_table($#input_source_fh+1); + if($opt::sqlworker) { + # Start a real --sqlworker in the background later + $Global::start_sqlworker = 1; + $opt::sqlworker = undef; + } +} + +if($opt::skip_first_line) { + # Skip the first line for the first file handle + my $fh = $input_source_fh[0]; + <$fh>; +} + +set_input_source_header(); + +sub set_input_source_header { + if($opt::header and not $opt::pipe) { + # split with colsep or \t + # $header force $colsep = \t if undef? + my $delimiter = defined $opt::colsep ? $opt::colsep : "\t"; + # regexp for {= + my $left = "\Q$Global::parensleft\E"; + my $l = $Global::parensleft; + # regexp for =} + my $right = "\Q$Global::parensright\E"; + my $r = $Global::parensright; + my $id = 1; + for my $fh (@input_source_fh) { + my $line = <$fh>; + chomp($line); + ::debug("init", "Delimiter: '$delimiter'"); + for my $s (split /$delimiter/o, $line) { + ::debug("init", "Colname: '$s'"); + # Replace {colname} with {2} + for(@command,@Global::ret_files,@Global::transfer_files) { + s:\{$s(|/|//|\.|/\.)\}:\{$id$1\}:g; + # {=header1 ... =} => {=1 ... =} + s:$left $s (.*?) $right:$l$id$1$r:gx; + } + $Global::input_source_header{$id} = $s; + $id++; + } + } + } else { + my $id = 1; + for my $fh (@input_source_fh) { + $Global::input_source_header{$id} = $id; + $id++; + } + } +} + +if($opt::filter_hosts and (@opt::sshlogin or @opt::sshloginfile)) { + # Parallel check all hosts are up. Remove hosts that are down + filter_hosts(); +} + +if($opt::nonall or $opt::onall) { + onall(\@input_source_fh,@command); + wait_and_exit(min(undef_as_zero($Global::exitstatus),254)); +} + +# TODO --transfer foo/./bar --cleanup +# multiple --transfer and --basefile with different /./ + +$Global::JobQueue = JobQueue->new( + \@command,\@input_source_fh,$Global::ContextReplace, + $number_of_args,\@Global::transfer_files,\@Global::ret_files); + +if($opt::pipepart) { + if(not $opt::blocksize) { + # --blocksize with 10 jobs per jobslot + $opt::blocksize = -10; + } + if($opt::roundrobin) { + # --blocksize with 1 job per jobslot + $opt::blocksize = -1; + } + if($opt::blocksize < 0) { + my $size = 0; + # Compute size of -a + for(@opt::a) { + if(-f $_) { + $size += -s $_; + } elsif(-b $_) { + $size += size_of_block_dev($_); + } else { + ::error("$_ is neither a file nor a block device"); + wait_and_exit(255); + } + } + # Compute $Global::max_jobs_running + $Global::dummy_jobs = 1; + for my $sshlogin (values %Global::host) { + $sshlogin->max_jobs_running(); + } + $Global::max_jobs_running or + ::die_bug("Global::max_jobs_running not set"); + # Run in total $job_slots*(- $blocksize) jobs + # Set --blocksize = size / no of proc / 10 + $Global::blocksize = 1 + + int($size / $Global::max_jobs_running / -$opt::blocksize); + } + @Global::cat_partials = map { pipe_part_files($_) } @opt::a; + # Unget the empty arg as many times as there are parts + $Global::JobQueue->{'commandlinequeue'}{'arg_queue'}->unget( + map { [Arg->new("\0")] } @Global::cat_partials + ); +} +if($opt::eta or $opt::bar or $opt::shuf or $Global::halt_pct) { + # Count the number of jobs or shuffle all jobs + # before starting any. + # Must be done after ungetting any --pipepart jobs. + $Global::JobQueue->total_jobs(); +} +# Compute $Global::max_jobs_running +# Must be done after ungetting any --pipepart jobs. +for my $sshlogin (values %Global::host) { + $sshlogin->max_jobs_running(); +} + +init_run_jobs(); +my $sem; +if($Global::semaphore) { + $sem = acquire_semaphore(); +} +$SIG{TERM} = \&start_no_new_jobs; +start_more_jobs(); +if($opt::pipe and not $opt::pipepart) { + spreadstdin(); +} +::debug("init", "Start draining\n"); +drain_job_queue(); +::debug("init", "Done draining\n"); +reaper(); +::debug("init", "Done reaping\n"); +if($opt::pipe and @opt::a) { + for my $job (@Global::tee_jobs) { + ::rm($job->fh(2,"name")); + $job->set_fh(2,"name",""); + $job->print(); + ::rm($job->fh(1,"name")); + } +} +::debug("init", "Cleaning\n"); +if($Global::semaphore) { + $sem->release(); +} +cleanup(); +::debug("init", "Halt\n"); +if($opt::halt and $Global::halt_when ne "never") { + if(not defined $Global::halt_exitstatus) { + if($Global::halt_pct) { + $Global::halt_exitstatus = + ::ceil($Global::total_failed / $Global::total_started * 100); + } elsif($Global::halt_count) { + $Global::halt_exitstatus = + ::min(undef_as_zero($Global::total_failed),101); + } + } + wait_and_exit($Global::halt_exitstatus); +} else { + wait_and_exit(min(undef_as_zero($Global::exitstatus),101)); +} + + +sub __PIPE_MODE__ {} + + +sub pipe_part_files { + # Input: + # $file = the file to read + # Returns: + # @commands that will cat_partial each part + my ($file) = @_; + my $buf = ""; + if(not -f $file and not -b $file) { + ::error("$file is not a seekable file."); + ::wait_and_exit(255); + } + my $header = find_header(\$buf,open_or_exit($file)); + # find positions + my @pos = find_split_positions($file,$Global::blocksize,length $header); + # Make @cat_partials + my @cat_partials = (); + for(my $i=0; $i<$#pos; $i++) { + push @cat_partials, cat_partial($file, 0, length($header), $pos[$i], $pos[$i+1]); + } + return @cat_partials; +} + +sub find_header { + # Input: + # $buf_ref = reference to read-in buffer + # $fh = filehandle to read from + # Uses: + # $opt::header + # $Global::blocksize + # Returns: + # $header string + my ($buf_ref, $fh) = @_; + my $header = ""; + if($opt::header) { + if($opt::header eq ":") { $opt::header = "(.*\n)"; } + # Number = number of lines + $opt::header =~ s/^(\d+)$/"(.*\n)"x$1/e; + while(read($fh,substr($$buf_ref,length $$buf_ref,0),$Global::blocksize)) { + if($$buf_ref=~s/^($opt::header)//) { + $header = $1; + last; + } + } + } + return $header; +} + +sub find_split_positions { + # Input: + # $file = the file to read + # $block = (minimal) --block-size of each chunk + # $headerlen = length of header to be skipped + # Uses: + # $opt::recstart + # $opt::recend + # Returns: + # @positions of block start/end + my($file, $block, $headerlen) = @_; + my $size = -s $file; + if(-b $file) { + # $file is a blockdevice + $size = size_of_block_dev($file); + } + $block = int $block; + # The optimal dd blocksize for mint, redhat, solaris, openbsd = 2^17..2^20 + # The optimal dd blocksize for freebsd = 2^15..2^17 + my $dd_block_size = 131072; # 2^17 + my @pos; + my ($recstart,$recend) = recstartrecend(); + my $recendrecstart = $recend.$recstart; + my $fh = ::open_or_exit($file); + push(@pos,$headerlen); + for(my $pos = $block+$headerlen; $pos < $size; $pos += $block) { + my $buf; + if($recendrecstart eq "") { + push(@pos,$pos); + } else { + seek($fh, $pos, 0) || die; + while(read($fh,substr($buf,length $buf,0),$dd_block_size)) { + if($opt::regexp) { + # If match /$recend$recstart/ => Record position + if($buf =~ /^(.*$recend)$recstart/os) { + # Start looking for next record _after_ this match + $pos += length($1); + push(@pos,$pos); + last; + } + } else { + # If match $recend$recstart => Record position + my $i = index64(\$buf,$recendrecstart); + if($i != -1) { + # Start looking for next record _after_ this match + $pos += $i + length($recendrecstart); + push(@pos,$pos); + last; + } + } + } + } + } + push(@pos,$size); + close $fh; + return @pos; +} + +sub cat_partial { + # Input: + # $file = the file to read + # ($start, $end, [$start2, $end2, ...]) = start byte, end byte + # Returns: + # Efficient perl command to copy $start..$end, $start2..$end2, ... to stdout + my($file, @start_end) = @_; + my($start, $i); + # Convert start_end to start_len + my @start_len = map { if(++$i % 2) { $start = $_; } else { $_-$start } } @start_end; + return "<". shell_quote_scalar($file) . + q{ perl -e 'while(@ARGV) { sysseek(STDIN,shift,0) || die; $left = shift; while($read = sysread(STDIN,$buf, ($left > 131072 ? 131072 : $left))){ $left -= $read; syswrite(STDOUT,$buf); } }' } . + " @start_len"; +} + +sub spreadstdin { + # read a record + # Spawn a job and print the record to it. + # Uses: + # $Global::blocksize + # STDIN + # $opt::r + # $Global::max_lines + # $Global::max_number_of_args + # $opt::regexp + # $Global::start_no_new_jobs + # $opt::roundrobin + # %Global::running + # Returns: N/A + + my $buf = ""; + my ($recstart,$recend) = recstartrecend(); + my $recendrecstart = $recend.$recstart; + my $chunk_number = 1; + my $one_time_through; + my $two_gb = 2**31-1; + my $blocksize = $Global::blocksize; + my $in = *STDIN; + my $header = find_header(\$buf,$in); + while(1) { + my $anything_written = 0; + my $buflen = length $buf; + my $readsize = ($buflen < $blocksize) ? $blocksize-$buflen : $blocksize; + # If $buf < $blocksize, append so it is $blocksize long after reading. + # Otherwise append a full $blocksize + if(not read($in,substr($buf,$buflen,0),$readsize)) { + # End-of-file + $chunk_number != 1 and last; + # Force the while-loop once if everything was read by header reading + $one_time_through++ and last; + } + if($opt::r) { + # Remove empty lines + $buf =~ s/^\s*\n//gm; + if(length $buf == 0) { + next; + } + } + if($Global::max_lines and not $Global::max_number_of_args) { + # Read n-line records + my $n_lines = $buf =~ tr/\n/\n/; + my $last_newline_pos = rindex64(\$buf,"\n"); + while($n_lines % $Global::max_lines) { + $n_lines--; + $last_newline_pos = rindex64(\$buf,"\n",$last_newline_pos-1); + } + # Chop at $last_newline_pos as that is where n-line record ends + $anything_written += + write_record_to_pipe($chunk_number++,\$header,\$buf, + $recstart,$recend,$last_newline_pos+1); + shorten(\$buf,$last_newline_pos+1); + } elsif($opt::regexp) { + if($Global::max_number_of_args) { + # -N => (start..*?end){n} + # -L -N => (start..*?end){n*l} + my $read_n_lines = $Global::max_number_of_args * ($Global::max_lines || 1); + while($buf =~ s/((?:$recstart.*?$recend){$read_n_lines})($recstart.*)$/$2/os) { + # Copy to modifiable variable + my $b = $1; + $anything_written += + write_record_to_pipe($chunk_number++,\$header,\$b, + $recstart,$recend,length $1); + } + } else { + eof($in) and last; + # Find the last recend-recstart in $buf + if($buf =~ s/(.*$recend)($recstart.*?)$/$2/os) { + # Copy to modifiable variable + my $b = $1; + $anything_written += + write_record_to_pipe($chunk_number++,\$header,\$b, + $recstart,$recend,length $1); + } + } + } else { + if($Global::max_number_of_args) { + # -N => (start..*?end){n} + my $i = 0; + my $read_n_lines = $Global::max_number_of_args * ($Global::max_lines || 1); + while(($i = nindex(\$buf,$recendrecstart,$read_n_lines)) != -1) { + $i += length $recend; # find the actual splitting location + $anything_written += + write_record_to_pipe($chunk_number++,\$header,\$buf, + $recstart,$recend,$i); + shorten(\$buf,$i); + } + } else { + eof($in) and last; + # Find the last recend+recstart in $buf + my $i = rindex64(\$buf,$recendrecstart); + if($i != -1) { + $i += length $recend; # find the actual splitting location + $anything_written += + write_record_to_pipe($chunk_number++,\$header,\$buf, + $recstart,$recend,$i); + shorten(\$buf,$i); + } + } + } + if(not $anything_written + and not eof($in) + and not $Global::no_autoexpand_block) { + # Nothing was written - maybe the block size < record size? + # Increase blocksize exponentially up to 2GB-1 (2GB causes problems) + if($blocksize < $two_gb) { + my $old_blocksize = $blocksize; + $blocksize = ::min(ceil($blocksize * 1.3 + 1), $two_gb); + ::warning("A record was longer than $old_blocksize. " . + "Increasing to --blocksize $blocksize."); + } + } + } + ::debug("init", "Done reading input\n"); + + # If there is anything left in the buffer write it + write_record_to_pipe($chunk_number++,\$header,\$buf,$recstart,$recend,length $buf); + + if($opt::retries) { + $Global::no_more_input = 1; + # We need to start no more jobs: At most we need to retry some + # of the already running. + my @running = values %Global::running; + # Stop any virgins. + for my $job (@running) { + if(defined $job and $job->virgin()) { + close $job->fh(0,"w"); + } + } + # Wait for running jobs to be done + my $sleep =1; + while($Global::total_running > 0) { + $sleep = ::reap_usleep($sleep); + } + } + $Global::start_no_new_jobs ||= 1; + if($opt::roundrobin) { + # Flush blocks to roundrobin procs + my $sleep = 1; + while(%Global::running) { + my $something_written = 0; + for my $job (values %Global::running) { + if($job->block_length()) { + $something_written += $job->non_blocking_write(); + } else { + close $job->fh(0,"w"); + } + } + if($something_written) { + $sleep = $sleep/2+0.001; + } + $sleep = ::reap_usleep($sleep); + } + } +} + +sub recstartrecend { + # Uses: + # $opt::recstart + # $opt::recend + # Returns: + # $recstart,$recend with default values and regexp conversion + my($recstart,$recend); + if(defined($opt::recstart) and defined($opt::recend)) { + # If both --recstart and --recend is given then both must match + $recstart = $opt::recstart; + $recend = $opt::recend; + } elsif(defined($opt::recstart)) { + # If --recstart is given it must match start of record + $recstart = $opt::recstart; + $recend = ""; + } elsif(defined($opt::recend)) { + # If --recend is given then it must match end of record + $recstart = ""; + $recend = $opt::recend; + } + + if($opt::regexp) { + # If $recstart/$recend contains '|' this should only apply to the regexp + $recstart = "(?:".$recstart.")"; + $recend = "(?:".$recend.")"; + } else { + # $recstart/$recend = printf strings (\n) + $recstart =~ s/\\([0rnt\'\"\\])/"qq|\\$1|"/gee; + $recend =~ s/\\([0rnt\'\"\\])/"qq|\\$1|"/gee; + } + return ($recstart,$recend); +} + +sub nindex { + # See if string is in buffer N times + # Returns: + # the position where the Nth copy is found + my ($buf_ref, $str, $n) = @_; + my $i = 0; + for(1..$n) { + $i = index64($buf_ref,$str,$i+1); + if($i == -1) { last } + } + return $i; +} + +{ + my @robin_queue; + my $sleep = 1; + + sub round_robin_write { + # Input: + # $header_ref = ref to $header string + # $block_ref = ref to $block to be written + # $recstart = record start string + # $recend = record end string + # $endpos = end position of $block + # Uses: + # %Global::running + # Returns: + # $something_written = amount of bytes written + my ($header_ref,$buffer_ref,$recstart,$recend,$endpos) = @_; + my $written = 0; + my $block_passed = 0; + while(not $block_passed) { + # Continue flushing existing buffers + # until one is empty and a new block is passed + if(@robin_queue) { + # Rotate queue once so new blocks get a fair chance + # to be given to another block + push @robin_queue, shift @robin_queue; + } else { + # Make a queue to spread the blocks evenly + push @robin_queue, (sort { $a->seq() <=> $b->seq() } + values %Global::running); + } + do { + $written = 0; + for my $job (@robin_queue) { + if($job->block_length() > 0) { + $written += $job->non_blocking_write(); + } else { + $job->set_block($header_ref,$buffer_ref, + $endpos,$recstart,$recend); + $block_passed = 1; + $job->set_virgin(0); + $written += $job->non_blocking_write(); + last; + } + } + if($written) { + $sleep = $sleep/1.5+0.001; + } + } while($written and not $block_passed); + $sleep = ::reap_usleep($sleep); + } + return $written; + } +} + +sub index64 { + # Do index on strings > 2GB. + # index in Perl < v5.22 does not work for > 2GB + # Input: + # as index except STR which must be passed as a reference + # Output: + # as index + my $ref = shift; + my $match = shift; + my $pos = shift || 0; + my $block_size = 2**31-1; + my $strlen = length($$ref); + # No point in doing extra work if we don't need to. + if($strlen < $block_size or $] > 5.022) { + return index($$ref, $match, $pos); + } + + my $matchlen = length($match); + my $ret; + my $offset = $pos; + while($offset < $strlen) { + $ret = index( + substr($$ref, $offset, $block_size), + $match, $pos-$offset); + if($ret != -1) { + return $ret + $offset; + } + $offset += ($block_size - $matchlen - 1); + } + return -1; +} + +sub rindex64 { + # Do rindex on strings > 2GB. + # rindex in Perl < v5.22 does not work for > 2GB + # Input: + # as rindex except STR which must be passed as a reference + # Output: + # as rindex + my $ref = shift; + my $match = shift; + my $pos = shift; + my $block_size = 2**31-1; + my $strlen = length($$ref); + # Default: search from end + $pos = defined $pos ? $pos : $strlen; + # No point in doing extra work if we don't need to. + if($strlen < $block_size) { + return rindex($$ref, $match, $pos); + } + + my $matchlen = length($match); + my $ret; + my $offset = $pos - $block_size + $matchlen; + if($offset < 0) { + # The offset is less than a $block_size + # Set the $offset to 0 and + # Adjust block_size accordingly + $block_size = $block_size + $offset; + $offset = 0; + } + while($offset >= 0) { + $ret = rindex( + substr($$ref, $offset, $block_size), + $match); + if($ret != -1) { + return $ret + $offset; + } + $offset -= ($block_size - $matchlen - 1); + } + return -1; +} + +sub shorten { + # Do: substr($buf,0,$i) = ""; + # Some Perl versions do not support $i > 2GB, so do this in 2GB chunks + # Input: + # $buf_ref = \$buf + # $i = position to shorten to + # Returns: N/A + my ($buf_ref, $i) = @_; + my $two_gb = 2**31-1; + while($i > $two_gb) { + substr($$buf_ref,0,$two_gb) = ""; + $i -= $two_gb; + } + substr($$buf_ref,0,$i) = ""; +} + +sub write_record_to_pipe { + # Fork then + # Write record from pos 0 .. $endpos to pipe + # Input: + # $chunk_number = sequence number - to see if already run + # $header_ref = reference to header string to prepend + # $buffer_ref = reference to record to write + # $recstart = start string of record + # $recend = end string of record + # $endpos = position in $buffer_ref where record ends + # Uses: + # $Global::job_already_run + # $opt::roundrobin + # @Global::virgin_jobs + # Returns: + # Number of chunks written (0 or 1) + my ($chunk_number,$header_ref,$buffer_ref,$recstart,$recend,$endpos) = @_; + if($endpos == 0) { return 0; } + if(vec($Global::job_already_run,$chunk_number,1)) { return 1; } + if($opt::roundrobin) { + return round_robin_write($header_ref,$buffer_ref,$recstart,$recend,$endpos); + } + # If no virgin found, backoff + my $sleep = 0.0001; # 0.01 ms - better performance on highend + while(not @Global::virgin_jobs) { + ::debug("pipe", "No virgin jobs"); + $sleep = ::reap_usleep($sleep); + # Jobs may not be started because of loadavg + # or too little time between each ssh login + # or retrying failed jobs. + start_more_jobs(); + } + my $job = shift @Global::virgin_jobs; + # Job is no longer virgin + $job->set_virgin(0); + + if($opt::retries) { + # Copy $buffer[0..$endpos] to $job->{'block'} + # Remove rec_sep + # Run $job->add_transfersize + $job->set_block($header_ref,$buffer_ref,$endpos,$recstart,$recend); + if(fork()) { + # Skip + } else { + $job->write($job->block_ref()); + close $job->fh(0,"w"); + exit(0); + } + } else { + # We ignore the removed rec_sep which is technically wrong. + $job->add_transfersize($endpos + length $$header_ref); + if(fork()) { + # Skip + } else { + # Chop of at $endpos as we do not know how many rec_sep will + # be removed. + substr($$buffer_ref,$endpos,length $$buffer_ref) = ""; + # Remove rec_sep + if($opt::remove_rec_sep) { + Job::remove_rec_sep($buffer_ref,$recstart,$recend); + } + $job->write($header_ref); + $job->write($buffer_ref); + close $job->fh(0,"w"); + exit(0); + } + } + close $job->fh(0,"w"); + return 1; +} + + +sub __SEM_MODE__ {} + + +sub acquire_semaphore { + # Acquires semaphore. If needed: spawns to the background + # Uses: + # @Global::host + # Returns: + # The semaphore to be released when jobs is complete + $Global::host{':'} = SSHLogin->new(":"); + my $sem = Semaphore->new($Semaphore::name,$Global::host{':'}->max_jobs_running()); + $sem->acquire(); + if($Semaphore::fg) { + # skip + } else { + if(fork()) { + exit(0); + } else { + # If run in the background, the PID will change + $sem->pid_change(); + } + } + return $sem; +} + + +sub __PARSE_OPTIONS__ {} + + +sub options_hash { + # Returns: + # %hash = the GetOptions config + return + ("debug|D=s" => \$opt::D, + "xargs" => \$opt::xargs, + "m" => \$opt::m, + "X" => \$opt::X, + "v" => \@opt::v, + "sql=s" => \$opt::retired, + "sqlmaster=s" => \$opt::sqlmaster, + "sqlworker=s" => \$opt::sqlworker, + "sqlandworker=s" => \$opt::sqlandworker, + "joblog|jl=s" => \$opt::joblog, + "results|result|res=s" => \$opt::results, + "resume" => \$opt::resume, + "resume-failed|resumefailed" => \$opt::resume_failed, + "retry-failed|retryfailed" => \$opt::retry_failed, + "silent" => \$opt::silent, + "keep-order|keeporder|k" => \$opt::keeporder, + "no-keep-order|nokeeporder|nok|no-k" => \$opt::nokeeporder, + "group" => \$opt::group, + "g" => \$opt::retired, + "ungroup|u" => \$opt::ungroup, + "linebuffer|linebuffered|line-buffer|line-buffered|lb" => \$opt::linebuffer, + "tmux" => \$opt::tmux, + "tmuxpane" => \$opt::tmuxpane, + "null|0" => \$opt::0, + "quote|q" => \$opt::q, + # Replacement strings + "parens=s" => \$opt::parens, + "rpl=s" => \@opt::rpl, + "plus" => \$opt::plus, + "I=s" => \$opt::I, + "extensionreplace|er=s" => \$opt::U, + "U=s" => \$opt::retired, + "basenamereplace|bnr=s" => \$opt::basenamereplace, + "dirnamereplace|dnr=s" => \$opt::dirnamereplace, + "basenameextensionreplace|bner=s" => \$opt::basenameextensionreplace, + "seqreplace=s" => \$opt::seqreplace, + "slotreplace=s" => \$opt::slotreplace, + "jobs|j=s" => \$opt::jobs, + "delay=f" => \$opt::delay, + "sshdelay=f" => \$opt::sshdelay, + "load=s" => \$opt::load, + "noswap" => \$opt::noswap, + "max-line-length-allowed" => \$opt::max_line_length_allowed, + "number-of-cpus" => \$opt::number_of_cpus, + "number-of-cores" => \$opt::number_of_cores, + "use-cpus-instead-of-cores" => \$opt::use_cpus_instead_of_cores, + "shellquote|shell_quote|shell-quote" => \$opt::shellquote, + "nice=i" => \$opt::nice, + "tag" => \$opt::tag, + "tagstring|tag-string=s" => \$opt::tagstring, + "onall" => \$opt::onall, + "nonall" => \$opt::nonall, + "filter-hosts|filterhosts|filter-host" => \$opt::filter_hosts, + "sshlogin|S=s" => \@opt::sshlogin, + "sshloginfile|slf=s" => \@opt::sshloginfile, + "controlmaster|M" => \$opt::controlmaster, + "ssh=s" => \$opt::ssh, + "transfer-file|transferfile|transfer-files|transferfiles|tf=s" + => \@opt::transfer_files, + "return=s" => \@opt::return, + "trc=s" => \@opt::trc, + "transfer" => \$opt::transfer, + "cleanup" => \$opt::cleanup, + "basefile|bf=s" => \@opt::basefile, + "B=s" => \$opt::retired, + "ctrlc|ctrl-c" => \$opt::retired, + "noctrlc|no-ctrlc|no-ctrl-c" => \$opt::retired, + "workdir|work-dir|wd=s" => \$opt::workdir, + "W=s" => \$opt::retired, + "tmpdir=s" => \$opt::tmpdir, + "tempdir=s" => \$opt::tmpdir, + "use-compress-program|compress-program=s" => \$opt::compress_program, + "use-decompress-program|decompress-program=s" => \$opt::decompress_program, + "compress" => \$opt::compress, + "tty" => \$opt::tty, + "T" => \$opt::retired, + "H=i" => \$opt::retired, + "dry-run|dryrun" => \$opt::dryrun, + "progress" => \$opt::progress, + "eta" => \$opt::eta, + "bar" => \$opt::bar, + "shuf" => \$opt::shuf, + "arg-sep|argsep=s" => \$opt::arg_sep, + "arg-file-sep|argfilesep=s" => \$opt::arg_file_sep, + "trim=s" => \$opt::trim, + "env=s" => \@opt::env, + "recordenv|record-env" => \$opt::record_env, + "plain" => \$opt::plain, + "profile|J=s" => \@opt::profile, + "pipe|spreadstdin" => \$opt::pipe, + "robin|round-robin|roundrobin" => \$opt::roundrobin, + "recstart=s" => \$opt::recstart, + "recend=s" => \$opt::recend, + "regexp|regex" => \$opt::regexp, + "remove-rec-sep|removerecsep|rrs" => \$opt::remove_rec_sep, + "files|output-as-files|outputasfiles" => \$opt::files, + "block|block-size|blocksize=s" => \$opt::blocksize, + "tollef" => \$opt::tollef, + "gnu" => \$opt::gnu, + "link|xapply" => \$opt::link, + "linkinputsource|xapplyinputsource=i" => \@opt::linkinputsource, + "wc|willcite|will-cite|nn|nonotice|no-notice" => \$opt::willcite, + # Termination and retries + "halt-on-error|halt=s" => \$opt::halt, + "memfree=s" => \$opt::memfree, + "retries=i" => \$opt::retries, + "timeout=s" => \$opt::timeout, + "termseq|term-seq=s" => \$opt::termseq, + # xargs-compatibility - implemented, man, testsuite + "max-procs|P=s" => \$opt::jobs, + "delimiter|d=s" => \$opt::d, + "max-chars|s=i" => \$opt::max_chars, + "arg-file|a=s" => \@opt::a, + "no-run-if-empty|r" => \$opt::r, + "replace|i:s" => \$opt::i, + "E=s" => \$opt::eof, + "eof|e:s" => \$opt::eof, + "max-args|n=i" => \$opt::max_args, + "max-replace-args|N=i" => \$opt::max_replace_args, + "colsep|col-sep|C=s" => \$opt::colsep, + "help|h" => \$opt::help, + "L=f" => \$opt::L, + "max-lines|l:f" => \$opt::max_lines, + "interactive|p" => \$opt::interactive, + "verbose|t" => \$opt::verbose, + "version|V" => \$opt::version, + "minversion|min-version=i" => \$opt::minversion, + "show-limits|showlimits" => \$opt::show_limits, + "exit|x" => \$opt::x, + # Semaphore + "semaphore" => \$opt::semaphore, + "semaphoretimeout|st=i" => \$opt::semaphoretimeout, + "semaphorename|id=s" => \$opt::semaphorename, + "fg" => \$opt::fg, + "bg" => \$opt::bg, + "wait" => \$opt::wait, + # Shebang #!/usr/bin/parallel --shebang + "shebang|hashbang" => \$opt::shebang, + "internal-pipe-means-argfiles" => \$opt::internal_pipe_means_argfiles, + "Y" => \$opt::retired, + "skip-first-line" => \$opt::skip_first_line, + "header=s" => \$opt::header, + "cat" => \$opt::cat, + "fifo" => \$opt::fifo, + "pipepart|pipe-part" => \$opt::pipepart, + "hgrp|hostgrp|hostgroup|hostgroups" => \$opt::hostgroups, + ); +} + +sub get_options_from_array { + # Run GetOptions on @array + # Input: + # $array_ref = ref to @ARGV to parse + # @keep_only = Keep only these options + # Uses: + # @ARGV + # Returns: + # true if parsing worked + # false if parsing failed + # @$array_ref is changed + my ($array_ref, @keep_only) = @_; + if(not @$array_ref) { + # Empty array: No need to look more at that + return 1; + } + # A bit of shuffling of @ARGV needed as GetOptionsFromArray is not + # supported everywhere + my @save_argv; + my $this_is_ARGV = (\@::ARGV == $array_ref); + if(not $this_is_ARGV) { + @save_argv = @::ARGV; + @::ARGV = @{$array_ref}; + } + # If @keep_only set: Ignore all values except @keep_only + my %options = options_hash(); + if(@keep_only) { + my (%keep,@dummy); + @keep{@keep_only} = @keep_only; + for my $k (grep { not $keep{$_} } keys %options) { + # Store the value of the option in @dummy + $options{$k} = \@dummy; + } + } + my $retval = GetOptions(%options); + if(not $this_is_ARGV) { + @{$array_ref} = @::ARGV; + @::ARGV = @save_argv; + } + return $retval; +} + +sub parse_options { + # Returns: N/A + init_globals(); + @ARGV = read_options(); + + # no-* overrides * + if($opt::nokeeporder) { $opt::keeporder = undef; } + + if(@opt::v) { $Global::verbose = $#opt::v+1; } # Convert -v -v to v=2 + $Global::debug = $opt::D; + $Global::shell = $ENV{'PARALLEL_SHELL'} || parent_shell($$) || $ENV{'SHELL'} || "/bin/sh"; + $Global::cshell = $Global::shell =~ m:/csh:; + if(defined $opt::X) { $Global::ContextReplace = 1; } + if(defined $opt::silent) { $Global::verbose = 0; } + if(defined $opt::0) { $/ = "\0"; } + if(defined $opt::d) { $/ = unquote_printf($opt::d) } + if(defined $opt::tagstring) { $opt::tagstring = unquote_printf($opt::tagstring); } + if(defined $opt::interactive) { $Global::interactive = $opt::interactive; } + if(defined $opt::q) { $Global::quoting = 1; } + if(defined $opt::r) { $Global::ignore_empty = 1; } + if(defined $opt::verbose) { $Global::stderr_verbose = 1; } + parse_replacement_string_options(); + if(defined $opt::eof) { $Global::end_of_file_string = $opt::eof; } + if(defined $opt::max_args) { $Global::max_number_of_args = $opt::max_args; } + if(defined $opt::timeout) { $Global::timeoutq = TimeoutQueue->new($opt::timeout); } + if(defined $opt::tmpdir) { $ENV{'TMPDIR'} = $opt::tmpdir; } + $opt::nice ||= 0; + if(defined $opt::help) { usage(); exit(0); } + if(defined $opt::sqlandworker) { $opt::sqlmaster = $opt::sqlworker = $opt::sqlandworker; } + if(defined $opt::tmuxpane) { $opt::tmux = $opt::tmuxpane; } + if(defined $opt::colsep) { $Global::trim = 'lr'; } + if(defined $opt::header) { $opt::colsep = defined $opt::colsep ? $opt::colsep : "\t"; } + if(defined $opt::trim) { $Global::trim = $opt::trim; } + if(defined $opt::arg_sep) { $Global::arg_sep = $opt::arg_sep; } + if(defined $opt::arg_file_sep) { $Global::arg_file_sep = $opt::arg_file_sep; } + if(defined $opt::number_of_cpus) { print SSHLogin::no_of_cpus(),"\n"; wait_and_exit(0); } + if(defined $opt::number_of_cores) { + print SSHLogin::no_of_cores(),"\n"; wait_and_exit(0); + } + if(defined $opt::max_line_length_allowed) { + print Limits::Command::real_max_length(),"\n"; wait_and_exit(0); + } + if(defined $opt::version) { version(); wait_and_exit(0); } + if(defined $opt::record_env) { record_env(); wait_and_exit(0); } + if(defined $opt::show_limits) { show_limits(); } + if(@opt::sshlogin) { @Global::sshlogin = @opt::sshlogin; } + if(@opt::sshloginfile) { read_sshloginfiles(@opt::sshloginfile); } + if(@opt::return) { push @Global::ret_files, @opt::return; } + if($opt::transfer) { push @Global::transfer_files, $opt::i || $opt::I || "{}"; } + if(@opt::transfer_files) { push @Global::transfer_files, @opt::transfer_files; } + if(not defined $opt::recstart and + not defined $opt::recend) { $opt::recend = "\n"; } + $Global::blocksize = multiply_binary_prefix($opt::blocksize || "1M"); + if($Global::blocksize > 2**31-1) { + warning("--blocksize >= 2G causes problems. Using 2G-1."); + $Global::blocksize = 2**31-1; + } + if($^O eq "cygwin" and + ($opt::pipe or $opt::pipepart or $opt::roundrobin) + and $Global::blocksize > 65535) { + warning("--blocksize >= 64K causes problems on Cygwin."); + } + $opt::memfree = multiply_binary_prefix($opt::memfree); + check_invalid_option_combinations(); + if((defined $opt::fifo or defined $opt::cat) + and not $opt::pipepart) { + $opt::pipe = 1; + } + if(defined $opt::minversion) { + print $Global::version,"\n"; + if($Global::version < $opt::minversion) { + wait_and_exit(255); + } else { + wait_and_exit(0); + } + } + if(not defined $opt::delay) { + # Set --delay to --sshdelay if not set + $opt::delay = $opt::sshdelay; + } + if($opt::compress_program) { + $opt::compress = 1; + $opt::decompress_program ||= $opt::compress_program." -dc"; + } + + if(defined $opt::results) { + # Is the output a dir or CSV-file? + if($opt::results =~ /\.csv$/i) { + # CSV with , as separator + $Global::csv = ","; + $Global::membuffer ||= 1; + } elsif($opt::results =~ /\.tsv$/i) { + # CSV with TAB as separator + $Global::csv = "\t"; + $Global::membuffer ||= 1; + } + } + if($opt::compress) { + my ($compress, $decompress) = find_compression_program(); + $opt::compress_program ||= $compress; + $opt::decompress_program ||= $decompress; + if(($opt::results and not $Global::csv) or $opt::files) { + # No need for decompressing + $opt::decompress_program = "cat >/dev/null"; + } + } + if(defined $opt::nonall) { + # Append a dummy empty argument if there are no arguments + # on the command line to avoid reading from STDIN. + # arg_sep = random 50 char + # \0 => nothing (not the empty string) + $Global::arg_sep = join "", + map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..50); + push @ARGV, $Global::arg_sep, "\0"; + } + if(defined $opt::tty) { + # Defaults for --tty: -j1 -u + # Can be overridden with -jXXX -g + if(not defined $opt::jobs) { + $opt::jobs = 1; + } + if(not defined $opt::group) { + $opt::ungroup = 1; + } + } + if(@opt::trc) { + push @Global::ret_files, @opt::trc; + if(not @Global::transfer_files) { + # Defaults to --transferfile {} + push @Global::transfer_files, $opt::i || $opt::I || "{}"; + } + $opt::cleanup = 1; + } + if(defined $opt::max_lines) { + if($opt::max_lines eq "-0") { + # -l -0 (swallowed -0) + $opt::max_lines = 1; + $opt::0 = 1; + $/ = "\0"; + } elsif ($opt::max_lines == 0) { + # If not given (or if 0 is given) => 1 + $opt::max_lines = 1; + } + $Global::max_lines = $opt::max_lines; + if(not $opt::pipe) { + # --pipe -L means length of record - not max_number_of_args + $Global::max_number_of_args ||= $Global::max_lines; + } + } + + # Read more than one arg at a time (-L, -N) + if(defined $opt::L) { + $Global::max_lines = $opt::L; + if(not $opt::pipe) { + # --pipe -L means length of record - not max_number_of_args + $Global::max_number_of_args ||= $Global::max_lines; + } + } + if(defined $opt::max_replace_args) { + $Global::max_number_of_args = $opt::max_replace_args; + $Global::ContextReplace = 1; + } + if((defined $opt::L or defined $opt::max_replace_args) + and + not ($opt::xargs or $opt::m)) { + $Global::ContextReplace = 1; + } + if(defined $opt::tag and not defined $opt::tagstring) { + # Default = {} + $opt::tagstring = $Global::parensleft.$Global::parensright; + } + if(grep /^$Global::arg_sep$|^$Global::arg_file_sep$/o, @ARGV) { + # Deal with ::: and :::: + @ARGV=read_args_from_command_line(); + } + parse_semaphore(); + + if(defined $opt::eta) { + $opt::progress = $opt::eta; + } + if(defined $opt::bar) { + $opt::progress = $opt::bar; + } + + parse_halt(); + parse_sshlogin(); + + if(remote_hosts() and ($opt::X or $opt::m or $opt::xargs)) { + # As we do not know the max line length on the remote machine + # long commands generated by xargs may fail + # If $opt::max_replace_args is set, it is probably safe + ::warning("Using -X or -m with --sshlogin may fail."); + } + + if(not defined $opt::jobs) { + $opt::jobs = "100%"; + } + open_joblog(); + open_csv(); + if($opt::sqlmaster or $opt::sqlworker) { + $Global::sql = SQL->new($opt::sqlmaster || $opt::sqlworker); + } + if($opt::sqlworker) { + $Global::membuffer ||= 1; + } +} + +sub check_invalid_option_combinations { + if(defined $opt::timeout and $opt::timeout !~ /^\d+(\.\d+)?%?$/) { + ::error("--timeout must be seconds or percentage."); + wait_and_exit(255); + } + if(defined $opt::fifo and defined $opt::cat) { + ::error("--fifo cannot be combined with --cat."); + ::wait_and_exit(255); + } + if(defined $opt::retries and defined $opt::roundrobin) { + ::error("--retries cannot be combined with --roundrobin."); + ::wait_and_exit(255); + } + if(defined $opt::pipepart and + (defined $opt::L or defined $opt::max_lines + or defined $opt::max_replace_args)) { + ::error("--pipepart is incompatible with --max-replace-args, ". + "--max-lines, and -L."); + wait_and_exit(255); + } + if(defined $opt::group and $opt::ungroup) { + ::error("--group cannot be combined with --ungroup."); + ::wait_and_exit(255); + } + if(defined $opt::group and $opt::linebuffer) { + ::error("--group cannot be combined with --line-buffer."); + ::wait_and_exit(255); + } + if(defined $opt::ungroup and $opt::linebuffer) { + ::error("--ungroup cannot be combined with --line-buffer."); + ::wait_and_exit(255); + } + if(defined $opt::tollef and not $opt::gnu) { + ::error("--tollef has been retired.","Remove --tollef or use --gnu to override --tollef."); + ::wait_and_exit(255); + } + if(defined $opt::retired) { + ::error("-g has been retired. Use --group.", + "-B has been retired. Use --bf.", + "-T has been retired. Use --tty.", + "-U has been retired. Use --er.", + "-W has been retired. Use --wd.", + "-Y has been retired. Use --shebang.", + "-H has been retired. Use --halt.", + "--sql has been retired. Use --sqlmaster.", + "--ctrlc has been retired.", + "--noctrlc has been retired."); + ::wait_and_exit(255); + } +} + +sub init_globals { + # Defaults: + $Global::version = 20161222; + $Global::progname = 'parallel'; + $Global::infinity = 2**31; + $Global::debug = 0; + $Global::verbose = 0; + $Global::quoting = 0; + $Global::total_completed = 0; + # Read only table with default --rpl values + %Global::replace = + ( + '{}' => '', + '{#}' => '1 $_=$job->seq()', + '{%}' => '1 $_=$job->slot()', + '{/}' => 's:.*/::', + '{//}' => '$Global::use{"File::Basename"} ||= eval "use File::Basename; 1;"; $_ = dirname($_);', + '{/.}' => 's:.*/::; s:\.[^/.]+$::;', + '{.}' => 's:\.[^/.]+$::', + ); + %Global::plus = + ( + # {} = {+/}/{/} + # = {.}.{+.} = {+/}/{/.}.{+.} + # = {..}.{+..} = {+/}/{/..}.{+..} + # = {...}.{+...} = {+/}/{/...}.{+...} + '{+/}' => 's:/[^/]*$::', + '{+.}' => 's:.*\.::', + '{+..}' => 's:.*\.([^.]*\.):$1:', + '{+...}' => 's:.*\.([^.]*\.[^.]*\.):$1:', + '{..}' => 's:\.[^/.]+$::; s:\.[^/.]+$::', + '{...}' => 's:\.[^/.]+$::; s:\.[^/.]+$::; s:\.[^/.]+$::', + '{/..}' => 's:.*/::; s:\.[^/.]+$::; s:\.[^/.]+$::', + '{/...}' => 's:.*/::; s:\.[^/.]+$::; s:\.[^/.]+$::; s:\.[^/.]+$::', + # {##} = number of jobs + '{##}' => '$_=total_jobs()', + ); + # Modifiable copy of %Global::replace + %Global::rpl = %Global::replace; + $/ = "\n"; + $Global::ignore_empty = 0; + $Global::interactive = 0; + $Global::stderr_verbose = 0; + $Global::default_simultaneous_sshlogins = 9; + $Global::exitstatus = 0; + $Global::arg_sep = ":::"; + $Global::arg_file_sep = "::::"; + $Global::trim = 'n'; + $Global::max_jobs_running = 0; + $Global::job_already_run = ''; + $ENV{'TMPDIR'} ||= "/tmp"; + if(not $ENV{HOME}) { + # $ENV{HOME} is sometimes not set if called from PHP + ::warning("\$HOME not set. Using /tmp."); + $ENV{HOME} = "/tmp"; + } +} + +sub parse_halt { + # $opt::halt flavours + # Uses: + # $opt::halt + # $Global::halt_when + # $Global::halt_fail + # $Global::halt_success + # $Global::halt_pct + # $Global::halt_count + if(defined $opt::halt) { + my %halt_expansion = ( + "0" => "never", + "1" => "soon,fail=1", + "2" => "now,fail=1", + "-1" => "soon,success=1", + "-2" => "now,success=1", + ); + # Expand -2,-1,0,1,2 into long form + $opt::halt = $halt_expansion{$opt::halt} || $opt::halt; + # --halt 5% == --halt soon,fail=5% + $opt::halt =~ s/^(\d+)%$/soon,fail=$1%/; + # Split: soon,fail=5% + my ($when,$fail_success,$pct_count) = split /[,=]/, $opt::halt; + if(not grep { $when eq $_ } qw(never soon now)) { + ::error("--halt must have 'never', 'soon', or 'now'."); + ::wait_and_exit(255); + } + $Global::halt_when = $when; + if($when ne "never") { + if($fail_success eq "fail") { + $Global::halt_fail = 1; + } elsif($fail_success eq "success") { + $Global::halt_success = 1; + } else { + ::error("--halt $when must be followed by ,success or ,fail."); + ::wait_and_exit(255); + } + if($pct_count =~ /^(\d+)%$/) { + $Global::halt_pct = $1/100; + } elsif($pct_count =~ /^(\d+)$/) { + $Global::halt_count = $1; + } else { + ::error("--halt $when,$fail_success ". + "must be followed by ,number or ,percent%."); + ::wait_and_exit(255); + } + } + } +} + +sub parse_replacement_string_options { + # Deal with --rpl + # Uses: + # %Global::rpl + # $Global::parensleft + # $Global::parensright + # $opt::parens + # $Global::parensleft + # $Global::parensright + # $opt::plus + # %Global::plus + # $opt::I + # $opt::U + # $opt::i + # $opt::basenamereplace + # $opt::dirnamereplace + # $opt::seqreplace + # $opt::slotreplace + # $opt::basenameextensionreplace + + sub rpl { + # Modify %Global::rpl + # Replace $old with $new + my ($old,$new) = @_; + if($old ne $new) { + $Global::rpl{$new} = $Global::rpl{$old}; + delete $Global::rpl{$old}; + } + } + my $parens = "{==}"; + if(defined $opt::parens) { $parens = $opt::parens; } + my $parenslen = 0.5*length $parens; + $Global::parensleft = substr($parens,0,$parenslen); + $Global::parensright = substr($parens,$parenslen); + if(defined $opt::plus) { %Global::rpl = (%Global::plus,%Global::rpl); } + if(defined $opt::I) { rpl('{}',$opt::I); } + if(defined $opt::i and $opt::i) { rpl('{}',$opt::i); } + if(defined $opt::U) { rpl('{.}',$opt::U); } + if(defined $opt::basenamereplace) { rpl('{/}',$opt::basenamereplace); } + if(defined $opt::dirnamereplace) { rpl('{//}',$opt::dirnamereplace); } + if(defined $opt::seqreplace) { rpl('{#}',$opt::seqreplace); } + if(defined $opt::slotreplace) { rpl('{%}',$opt::slotreplace); } + if(defined $opt::basenameextensionreplace) { + rpl('{/.}',$opt::basenameextensionreplace); + } + for(@opt::rpl) { + # Create $Global::rpl entries for --rpl options + # E.g: "{..} s:\.[^.]+$:;s:\.[^.]+$:;" + my ($shorthand,$long) = split/ /,$_,2; + $Global::rpl{$shorthand} = $long; + } +} + +sub parse_semaphore { + # Semaphore defaults + # Must be done before computing number of processes and max_line_length + # because when running as a semaphore GNU Parallel does not read args + # Uses: + # $opt::semaphore + # $Global::semaphore + # $opt::semaphoretimeout + # $Semaphore::timeout + # $opt::semaphorename + # $Semaphore::name + # $opt::fg + # $Semaphore::fg + # $opt::wait + # $Semaphore::wait + # $opt::bg + # @opt::a + # @Global::unget_argv + # $Global::default_simultaneous_sshlogins + # $opt::jobs + # $Global::interactive + $Global::semaphore ||= ($0 =~ m:(^|/)sem$:); # called as 'sem' + if(defined $opt::semaphore) { $Global::semaphore = 1; } + if(defined $opt::semaphoretimeout) { $Global::semaphore = 1; } + if(defined $opt::semaphorename) { $Global::semaphore = 1; } + if(defined $opt::fg) { $Global::semaphore = 1; } + if(defined $opt::bg) { $Global::semaphore = 1; } + if(defined $opt::wait and not $opt::sqlmaster) { + $Global::semaphore = 1; @ARGV = "true"; + } + if($Global::semaphore) { + if(@opt::a) { + # A semaphore does not take input from neither stdin nor file + ::error("A semaphore does not take input from neither stdin nor a file\n"); + ::wait_and_exit(255); + } + @opt::a = ("/dev/null"); + # Append a dummy empty argument + # \0 => nothing (not the empty string) + push(@Global::unget_argv, [Arg->new("\0")]); + $Semaphore::timeout = $opt::semaphoretimeout || 0; + if(defined $opt::semaphorename) { + $Semaphore::name = $opt::semaphorename; + } else { + $Semaphore::name = `tty`; + chomp $Semaphore::name; + } + $Semaphore::fg = $opt::fg; + $Semaphore::wait = $opt::wait; + $Global::default_simultaneous_sshlogins = 1; + if(not defined $opt::jobs) { + $opt::jobs = 1; + } + if($Global::interactive and $opt::bg) { + ::error("Jobs running in the ". + "background cannot be interactive."); + ::wait_and_exit(255); + } + } +} + +sub record_env { + # Record current %ENV-keys in ~/.parallel/ignored_vars + # Returns: N/A + my $ignore_filename = $ENV{'HOME'} . "/.parallel/ignored_vars"; + if(open(my $vars_fh, ">", $ignore_filename)) { + print $vars_fh map { $_,"\n" } keys %ENV; + } else { + ::error("Cannot write to $ignore_filename."); + ::wait_and_exit(255); + } +} + +sub open_joblog { + # Open joblog as specified by --joblog + # Uses: + # $opt::resume + # $opt::resume_failed + # $opt::joblog + # $opt::results + # $Global::job_already_run + # %Global::fd + my $append = 0; + if(($opt::resume or $opt::resume_failed) + and + not ($opt::joblog or $opt::results)) { + ::error("--resume and --resume-failed require --joblog or --results."); + ::wait_and_exit(255); + } + if($opt::joblog + and + ($opt::sqlmaster + or + not $opt::sqlworker)) { + if($opt::resume || $opt::resume_failed || $opt::retry_failed) { + if(open(my $joblog_fh, "<", $opt::joblog)) { + # Read the joblog + $append = <$joblog_fh>; # If there is a header: Open as append later + my $joblog_regexp; + if($opt::retry_failed) { + # Make a regexp that only matches commands with exit+signal=0 + # 4 host 1360490623.067 3.445 1023 1222 0 0 command + $joblog_regexp='^(\d+)(?:\t[^\t]+){5}\t0\t0\t'; + my @group; + while(<$joblog_fh>) { + if(/$joblog_regexp/o) { + # This is 30% faster than set_job_already_run($1); + vec($Global::job_already_run,($1||0),1) = 1; + $Global::total_completed++; + $group[$1-1] = "true"; + } elsif(/(\d+)\s+\S+(\s+[-0-9.]+){6}\s+(.*)$/) { + $group[$1-1] = $3 + } else { + chomp; + ::error("Format of '$opt::joblog' is wrong: $_"); + ::wait_and_exit(255); + } + } + if(@group) { + my ($outfh,$name) = ::tmpfile(SUFFIX => ".arg"); + unlink($name); + # Put args into argfile + print $outfh map { $_,$/ } @group; + seek $outfh, 0, 0; + exit_if_disk_full(); + # Set filehandle to -a + @opt::a = ($outfh); + } + # Remove $command (so -a is run) + @ARGV = (); + } + if($opt::resume || $opt::resume_failed) { + if($opt::resume_failed) { + # Make a regexp that only matches commands with exit+signal=0 + # 4 host 1360490623.067 3.445 1023 1222 0 0 command + $joblog_regexp='^(\d+)(?:\t[^\t]+){5}\t0\t0\t'; + } else { + # Just match the job number + $joblog_regexp='^(\d+)'; + } + while(<$joblog_fh>) { + if(/$joblog_regexp/o) { + # This is 30% faster than set_job_already_run($1); + vec($Global::job_already_run,($1||0),1) = 1; + $Global::total_completed++; + } elsif(not /\d+\s+[^\s]+\s+([-0-9.]+\s+){6}/) { + ::error("Format of '$opt::joblog' is wrong: $_"); + ::wait_and_exit(255); + } + } + } + close $joblog_fh; + } + } + if($append) { + # Append to joblog + if(not open($Global::joblog, ">>", $opt::joblog)) { + ::error("Cannot append to --joblog $opt::joblog."); + ::wait_and_exit(255); + } + } else { + if($opt::dryrun) { + # Do not write to joblog in a dry-run + if(not open($Global::joblog, ">", "/dev/null")) { + ::error("Cannot write to --joblog $opt::joblog."); + ::wait_and_exit(255); + } + } elsif($opt::joblog eq "-") { + # Use STDOUT as joblog + $Global::joblog = $Global::fd{1}; + } elsif(not open($Global::joblog, ">", $opt::joblog)) { + # Overwrite the joblog + ::error("Cannot write to --joblog $opt::joblog."); + ::wait_and_exit(255); + } + print $Global::joblog + join("\t", "Seq", "Host", "Starttime", "JobRuntime", + "Send", "Receive", "Exitval", "Signal", "Command" + ). "\n"; + } + } +} + +sub open_csv { + if($opt::results) { + # Output as CSV/TSV + if($opt::results eq "-.csv" + or + $opt::results eq "-.tsv") { + # Output as CSV/TSV on stdout + open $Global::csv_fh, ">&", "STDOUT" or + ::die_bug("Can't dup STDOUT in csv: $!"); + # Do not print any other output to STDOUT + # by forcing all other output to /dev/null + open my $fd, ">", "/dev/null" or + ::die_bug("Can't >/dev/null in csv: $!"); + $Global::fd{1} = $fd; + $Global::fd{2} = $fd; + } elsif($Global::csv) { + if(not open($Global::csv_fh,">",$opt::results)) { + ::error("Cannot open results file `$opt::results': ". + "$!."); + wait_and_exit(255); + } + } + } +} + +sub find_compression_program { + # Find a fast compression program + # Returns: + # $compress_program = compress program with options + # $decompress_program = decompress program with options + + # Search for these. Sorted by speed on 32 core + # apt install zstd clzip liblz4-tool lzop pigz pxz gzip plzip pbzip2 lzma xz-utils lzip bzip2 lbzip2 lrzip + # git clone https://github.com/facebook/zstd.git + # cd zstd/contrib/pzstd; make -j; cp pzstd /usr/local/bin + # echo 'lrzip -L $((-$1))' >/usr/local/bin/lrz + # chmod +x /usr/local/bin/lrz + # seq 120000000|shuf > 1gb + # onethread="zstd clzip lz4 lzop gzip lzma xz bzip2" + # multithread="pzstd pigz pxz plzip pbzip2 lzip lbzip2 lrz" + # parallel --shuf -j50% --delay 1 --joblog jl-s --arg-sep , parallel --compress-program \'{3}" "-{2}\' cat ::: 1gb '>'/dev/null , 1 2 3 , {1..3} , $onethread + # parallel --shuf -j1 --joblog jl-m --arg-sep , parallel --compress-program \'{3}" "-{2}\' cat ::: 1gb '>'/dev/null , 1 2 3 , {1..3} , $multithread + # sort -nk4 jl-? + # 1-core: + # 2-cores: lz4 lzop zstd gzip bzip2 lzma xz clzip pigz lbzip2 pbzip2 lzip lrz plzip pxz pzstd + # 4-cores: + # 8-cores: pzstd lz4 zstd pigz lzop lbzip2 pbzip2 gzip lzip lrz plzip pxz bzip2 lzma xz clzip + # 16-cores: pzstd lz4 pigz lzop lbzip2 pbzip2 plzip lzip lrz pxz gzip lzma xz bzip2 + # 32-cores: pzstd lbzip2 pbzip2 zstd pigz lz4 lzop plzip lzip lrz gzip pxz lzma bzip2 xz clzip + + my @prg = qw(pzstd lbzip2 pbzip2 zstd pigz lz4 lzop plzip lzip lrz + gzip pxz lzma bzip2 xz clzip); + for my $p (@prg) { + if(which($p)) { + return ("$p -c -1","$p -dc"); + } + } + # Fall back to cat + return ("cat","cat"); +} + +sub read_options { + # Read options from command line, profile and $PARALLEL + # Uses: + # $opt::shebang_wrap + # $opt::shebang + # @ARGV + # $opt::plain + # @opt::profile + # $ENV{'HOME'} + # $ENV{'PARALLEL'} + # Returns: + # @ARGV_no_opt = @ARGV without --options + + # This must be done first as this may exec myself + if(defined $ARGV[0] and ($ARGV[0] =~ /^--shebang/ or + $ARGV[0] =~ /^--shebang-?wrap/ or + $ARGV[0] =~ /^--hashbang/)) { + # Program is called from #! line in script + # remove --shebang-wrap if it is set + $opt::shebang_wrap = ($ARGV[0] =~ s/^--shebang-?wrap *//); + # remove --shebang if it is set + $opt::shebang = ($ARGV[0] =~ s/^--shebang *//); + # remove --hashbang if it is set + $opt::shebang .= ($ARGV[0] =~ s/^--hashbang *//); + if($opt::shebang) { + my $argfile = shell_quote_scalar(pop @ARGV); + # exec myself to split $ARGV[0] into separate fields + exec "$0 --skip-first-line -a $argfile @ARGV"; + } + if($opt::shebang_wrap) { + my @options; + my @parser; + if ($^O eq 'freebsd') { + # FreeBSD's #! puts different values in @ARGV than Linux' does. + my @nooptions = @ARGV; + get_options_from_array(\@nooptions); + while($#ARGV > $#nooptions) { + push @options, shift @ARGV; + } + while(@ARGV and $ARGV[0] ne ":::") { + push @parser, shift @ARGV; + } + if(@ARGV and $ARGV[0] eq ":::") { + shift @ARGV; + } + } else { + @options = shift @ARGV; + } + my $script = shell_quote_scalar(shift @ARGV); + # exec myself to split $ARGV[0] into separate fields + exec "$0 --internal-pipe-means-argfiles @options @parser $script ::: @ARGV"; + } + } + if($ARGV[0] =~ / --shebang(-?wrap)? /) { + ::warning("--shebang and --shebang-wrap must be the first argument.\n"); + } + + Getopt::Long::Configure("bundling","require_order"); + my @ARGV_copy = @ARGV; + my @ARGV_orig = @ARGV; + # Check if there is a --profile to set @opt::profile + get_options_from_array(\@ARGV_copy,"profile|J=s","plain") || die_usage(); + my @ARGV_profile = (); + my @ARGV_env = (); + if(not $opt::plain) { + # Add options from .parallel/config and other profiles + my @config_profiles = ( + "/etc/parallel/config", + $ENV{'HOME'}."/.parallel/config", + $ENV{'HOME'}."/.parallelrc"); + my @profiles = @config_profiles; + if(@opt::profile) { + # --profile overrides default profiles + @profiles = (); + for my $profile (@opt::profile) { + if(-r $profile) { + push @profiles, $profile; + } else { + push @profiles, $ENV{'HOME'}."/.parallel/".$profile; + } + } + } + for my $profile (@profiles) { + if(-r $profile) { + open (my $in_fh, "<", $profile) || ::die_bug("read-profile: $profile"); + while(<$in_fh>) { + /^\s*\#/ and next; + chomp; + push @ARGV_profile, shell_words($_); + } + close $in_fh; + } else { + if(grep /^$profile$/, @config_profiles) { + # config file is not required to exist + } else { + ::error("$profile not readable."); + wait_and_exit(255); + } + } + } + # Add options from shell variable $PARALLEL + if($ENV{'PARALLEL'}) { + @ARGV_env = shell_words($ENV{'PARALLEL'}); + } + } + Getopt::Long::Configure("bundling","require_order"); + get_options_from_array(\@ARGV_profile) || die_usage(); + get_options_from_array(\@ARGV_env) || die_usage(); + get_options_from_array(\@ARGV) || die_usage(); + # What were the options given on the command line? + # Used to start --sqlworker + my $ai = arrayindex(\@ARGV_orig, \@ARGV); + @Global::options_in_argv = @ARGV_orig[0..$ai-1]; + # Prepend non-options to @ARGV (such as commands like 'nice') + unshift @ARGV, @ARGV_profile, @ARGV_env; + return @ARGV; +} + +sub arrayindex { + # Similar to Perl's index function, but for arrays + # Input: + # $arr_ref1 = ref to @array1 to search in + # $arr_ref2 = ref to @array2 to search for + my ($arr_ref1,$arr_ref2) = @_; + my $array1_as_string = join "", map { "\257\257".$_ } @$arr_ref1; + my $array2_as_string = join "", map { "\257\257".$_ } @$arr_ref2; + my $i = index($array1_as_string,$array2_as_string,0); + if($i == -1) { return -1 } + my @before = split /\257\257/, substr($array1_as_string,0,$i); + return $#before; +} + +sub read_args_from_command_line { + # Arguments given on the command line after: + # ::: ($Global::arg_sep) + # :::: ($Global::arg_file_sep) + # :::+ ($Global::arg_sep with --link) + # ::::+ ($Global::arg_file_sep with --link) + # Removes the arguments from @ARGV and: + # - puts filenames into -a + # - puts arguments into files and add the files to -a + # - adds --linkinputsource with 0/1 for each -a depending on :::+/::::+ + # Input: + # @::ARGV = command option ::: arg arg arg :::: argfiles + # Uses: + # $Global::arg_sep + # $Global::arg_file_sep + # $opt::internal_pipe_means_argfiles + # $opt::pipe + # @opt::a + # Returns: + # @argv_no_argsep = @::ARGV without ::: and :::: and following args + my @new_argv = (); + for(my $arg = shift @ARGV; @ARGV; $arg = shift @ARGV) { + if($arg eq $Global::arg_sep + or + $arg eq $Global::arg_sep."+" + or + $arg eq $Global::arg_file_sep + or + $arg eq $Global::arg_file_sep."+") { + my $group_sep = $arg; # This group of arguments is args or argfiles + my @group; + while(defined ($arg = shift @ARGV)) { + if($arg eq $Global::arg_sep + or + $arg eq $Global::arg_sep."+" + or + $arg eq $Global::arg_file_sep + or + $arg eq $Global::arg_file_sep."+") { + # exit while loop if finding new separator + last; + } else { + # If not hitting ::: :::+ :::: or ::::+ + # Append it to the group + push @group, $arg; + } + } + my $is_linked = ($group_sep =~ /\+$/) ? 1 : 0; + my $is_file = ($group_sep eq $Global::arg_file_sep + or + $group_sep eq $Global::arg_file_sep."+"); + if($is_file) { + # :::: / ::::+ + push @opt::linkinputsource, map { $is_linked } @group; + } else { + # ::: / :::+ + push @opt::linkinputsource, $is_linked; + } + if($is_file + or ($opt::internal_pipe_means_argfiles and $opt::pipe) + ) { + # Group of file names on the command line. + # Append args into -a + push @opt::a, @group; + } else { + # Group of arguments on the command line. + # Put them into a file. + # Create argfile + my ($outfh,$name) = ::tmpfile(SUFFIX => ".arg"); + unlink($name); + # Put args into argfile + print $outfh map { $_,$/ } @group; + seek $outfh, 0, 0; + exit_if_disk_full(); + # Append filehandle to -a + push @opt::a, $outfh; + } + if(defined($arg)) { + # $arg is ::: :::+ :::: or ::::+ + # so there is another group + redo; + } else { + # $arg is undef -> @ARGV empty + last; + } + } + push @new_argv, $arg; + } + # Output: @ARGV = command to run with options + return @new_argv; +} + +sub cleanup { + # Returns: N/A + unlink keys %Global::unlink; + map { rmdir $_ } keys %Global::unlink; + if(@opt::basefile and $opt::cleanup) { cleanup_basefile(); } + for(keys %Global::sshmaster) { + # If 'ssh -M's are running: kill them + kill "TERM", $_; + } +} + + +sub __QUOTING_ARGUMENTS_FOR_SHELL__ {} + + +sub shell_quote { + # Input: + # @strings = strings to be quoted + # Output: + # @shell_quoted_strings = string quoted with \ as needed by the shell + return wantarray ? + (map { shell_quote_scalar($_) } @_) + : (join" ",map { shell_quote_scalar($_) } @_); +} + +sub shell_quote_scalar_rc { + # Quote for the rc-shell + my $a = $_[0]; + if(defined $a) { + if(($a =~ s/'/''/g) + + + ($a =~ s/[\n\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\^\*\<\=\>\~\|\; \"\!\$\&\'\202-\377]+/'$&'/go)) { + # A string was replaced + # No need to test for "" or \0 + } elsif($a eq "") { + $a = "''"; + } elsif($a eq "\0") { + $a = ""; + } + } + return $a; +} + +sub shell_quote_scalar_csh { + # Quote for (t)csh + my $a = $_[0]; + if(defined $a) { + # $a =~ s/([\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\^\*\>\<\~\|\; \"\!\$\&\'\202-\377])/\\$1/g; + # This is 1% faster than the above + if(($a =~ s/[\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\^\*\<\=\>\~\|\; \"\!\$\&\'\202-\377]/\\$&/go) + + + # quote newline in csh as \\\n + ($a =~ s/[\n]/"\\\n"/go)) { + # A string was replaced + # No need to test for "" or \0 + } elsif($a eq "") { + $a = "''"; + } elsif($a eq "\0") { + $a = ""; + } + } + return $a; +} + +sub shell_quote_scalar_default { + # Quote for other shells + my $a = $_[0]; + if(defined $a) { + # zsh wants '=' quoted + # Solaris sh wants ^ quoted. + # $a =~ s/([\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\^\*\>\<\~\|\; \"\!\$\&\'\202-\377])/\\$1/g; + # This is 1% faster than the above + if(($a =~ s/[\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\^\*\<\=\>\~\|\; \"\!\$\&\'\202-\377]/\\$&/go) + + + # quote newline as '\n' + ($a =~ s/[\n]/'\n'/go)) { + # A string was replaced + # No need to test for "" or \0 + } elsif($a eq "") { + $a = "''"; + } elsif($a eq "\0") { + $a = ""; + } + } + return $a; +} + +sub shell_quote_scalar { + # Quote the string so the shell will not expand any special chars + # Inputs: + # $string = string to be quoted + # Returns: + # $shell_quoted = string quoted as needed by the shell + + # Speed optimization: Choose the correct shell_quote_scalar_* + # and call that directly from now on + no warnings 'redefine'; + if($Global::shell =~ m:(^|/)t?csh$:) { + # (t)csh + *shell_quote_scalar = \&shell_quote_scalar_csh; + } elsif($Global::shell =~ m:(^|/)rc$:) { + # rc-shell + *shell_quote_scalar = \&shell_quote_scalar_rc; + } else { + # other shells + *shell_quote_scalar = \&shell_quote_scalar_default; + } + # The sub is now redefined. Call it + return shell_quote_scalar(@_); +} + +sub shell_quote_file { + # Quote the string so shell will not expand any special chars and prepend ./ if needed + # Input: + # $filename = filename to be shell quoted + # Returns: + # $quoted_filename = filename quoted with \ as needed by the shell and ./ if needed + my $a = shell_quote_scalar(shift); + if(defined $a) { + if($a =~ m:^/: or $a =~ m:^\./:) { + # /abs/path or ./rel/path => skip + } else { + # rel/path => ./rel/path + $a = "./".$a; + } + } + return $a; +} + +sub shell_words { + # Input: + # $string = shell line + # Returns: + # @shell_words = $string split into words as shell would do + $Global::use{"Text::ParseWords"} ||= eval "use Text::ParseWords; 1;"; + return Text::ParseWords::shellwords(@_); +} + +sub perl_quote_scalar { + # Quote the string so perl's eval will not expand any special chars + # Inputs: + # $string = string to be quoted + # Returns: + # $shell_quoted = string quoted with \ as needed by perl's eval + my $a = $_[0]; + if(defined $a) { + $a =~ s/[\\\"\$\@]/\\$&/go; + } + return $a; +} + +sub unquote_printf { + # Convert \t \n \r \000 \0 + $_ = shift; + s/\\t/\t/g; + s/\\n/\n/g; + s/\\r/\r/g; + s/\\(\d\d\d)/eval 'sprintf "\\'.$1.'"'/ge; + s/\\(\d)/eval 'sprintf "\\'.$1.'"'/ge; + return $_; +} + + +sub __FILEHANDLES__ {} + + +sub save_stdin_stdout_stderr { + # Remember the original STDIN, STDOUT and STDERR + # and file descriptors opened by the shell (e.g. 3>/tmp/foo) + # Uses: + # %Global::fd + # $Global::original_stderr + # $Global::original_stdin + # Returns: N/A + + # TODO Disabled until we have an open3 that will take n filehandles + # for my $fdno (1..61) { + # # /dev/fd/62 and above are used by bash for <(cmd) + # # Find file descriptors that are already opened (by the shell) + # Only focus on stdout+stderr for now + for my $fdno (1..2) { + my $fh; + # 2-argument-open is used to be compatible with old perl 5.8.0 + # bug #43570: Perl 5.8.0 creates 61 files + if(open($fh,">&=$fdno")) { + $Global::fd{$fdno}=$fh; + } + } + open $Global::original_stderr, ">&", "STDERR" or + ::die_bug("Can't dup STDERR: $!"); + open $Global::status_fd, ">&", "STDERR" or + ::die_bug("Can't dup STDERR: $!"); + open $Global::original_stdin, "<&", "STDIN" or + ::die_bug("Can't dup STDIN: $!"); +} + +sub enough_file_handles { + # Check that we have enough filehandles available for starting + # another job + # Uses: + # $opt::ungroup + # %Global::fd + # Returns: + # 1 if ungrouped (thus not needing extra filehandles) + # 0 if too few filehandles + # 1 if enough filehandles + if(not $opt::ungroup) { + my %fh; + my $enough_filehandles = 1; + # perl uses 7 filehandles for something? + # open3 uses 2 extra filehandles temporarily + # We need a filehandle for each redirected file descriptor + # (normally just STDOUT and STDERR) + for my $i (1..(7+2+keys %Global::fd)) { + $enough_filehandles &&= open($fh{$i}, "<", "/dev/null"); + } + for (values %fh) { close $_; } + return $enough_filehandles; + } else { + # Ungrouped does not need extra file handles + return 1; + } +} + +sub open_or_exit { + # Open a file name or exit if the file cannot be opened + # Inputs: + # $file = filehandle or filename to open + # Uses: + # $Global::original_stdin + # Returns: + # $fh = file handle to read-opened file + my $file = shift; + if($file eq "-") { + return ($Global::original_stdin || *STDIN); + } + if(ref $file eq "GLOB") { + # This is an open filehandle + return $file; + } + my $fh = gensym; + if(not open($fh, "<", $file)) { + ::error("Cannot open input file `$file': No such file or directory."); + wait_and_exit(255); + } + return $fh; +} + +sub set_fh_blocking { + # Set filehandle as blocking + # Inputs: + # $fh = filehandle to be blocking + # Returns: + # N/A + my $fh = shift; + $Global::use{"Fcntl"} ||= eval "use Fcntl qw(:DEFAULT :flock); 1;"; + my $flags; + fcntl($fh, &F_GETFL, $flags) || die $!; # Get the current flags on the filehandle + $flags &= ~&O_NONBLOCK; # Remove non-blocking from the flags + fcntl($fh, &F_SETFL, $flags) || die $!; # Set the flags on the filehandle +} + +sub set_fh_non_blocking { + # Set filehandle as non-blocking + # Inputs: + # $fh = filehandle to be blocking + # Returns: + # N/A + my $fh = shift; + $Global::use{"Fcntl"} ||= eval "use Fcntl qw(:DEFAULT :flock); 1;"; + my $flags; + fcntl($fh, &F_GETFL, $flags) || die $!; # Get the current flags on the filehandle + $flags |= &O_NONBLOCK; # Add non-blocking to the flags + fcntl($fh, &F_SETFL, $flags) || die $!; # Set the flags on the filehandle +} + + +sub __RUNNING_THE_JOBS_AND_PRINTING_PROGRESS__ {} + + +# Variable structure: +# +# $Global::running{$pid} = Pointer to Job-object +# @Global::virgin_jobs = Pointer to Job-object that have received no input +# $Global::host{$sshlogin} = Pointer to SSHLogin-object +# $Global::total_running = total number of running jobs +# $Global::total_started = total jobs started +# $Global::tty_taken = is the tty in use by a running job? +# $Global::max_procs_file = filename if --jobs is given a filename +# $Global::JobQueue = JobQueue object for the queue of jobs +# $Global::timeoutq = queue of times where jobs timeout +# $Global::newest_job = Job object of the most recent job started +# $Global::newest_starttime = timestamp of $Global::newest_job +# @Global::sshlogin +# $Global::minimal_command_line_length = minimum length supported by all sshlogins +# $Global::start_no_new_jobs = should more jobs be started? +# $Global::original_stderr = file handle for STDERR when the program started +# $Global::total_started = total number of jobs started +# $Global::joblog = filehandle of joblog +# $Global::debug = Is debugging on? +# $Global::exitstatus = status code of GNU Parallel +# $Global::quoting = quote the command to run + +sub init_run_jobs { + # Set Global variables and progress signal handlers + # Do the copying of basefiles + # Returns: N/A + $Global::total_running = 0; + $Global::total_started = 0; + $Global::tty_taken = 0; + $SIG{USR1} = \&list_running_jobs; + $SIG{USR2} = \&toggle_progress; + if(@opt::basefile) { setup_basefile(); } +} + +{ + my $last_time; + my %last_mtime; + my $max_procs_file_last_mod; + + sub changed_procs_file { + # If --jobs is a file and it is modfied: + # Force recomputing of max_jobs_running for each $sshlogin + # Uses: + # $Global::max_procs_file + # %Global::host + # Returns: N/A + if($Global::max_procs_file) { + # --jobs filename + my $mtime = (stat($Global::max_procs_file))[9]; + $max_procs_file_last_mod ||= 0; + if($mtime > $max_procs_file_last_mod) { + # file changed: Force re-computing max_jobs_running + $max_procs_file_last_mod = $mtime; + for my $sshlogin (values %Global::host) { + $sshlogin->set_max_jobs_running(undef); + } + } + } + } + + sub changed_sshloginfile { + # If --slf is changed: + # reload --slf + # filter_hosts + # setup_basefile + # Uses: + # @opt::sshloginfile + # @Global::sshlogin + # %Global::host + # $opt::filter_hosts + # Returns: N/A + if(@opt::sshloginfile) { + # Is --sshloginfile changed? + for my $slf (@opt::sshloginfile) { + my $actual_file = expand_slf_shorthand($slf); + my $mtime = (stat($actual_file))[9]; + $last_mtime{$actual_file} ||= $mtime; + if($mtime - $last_mtime{$actual_file} > 1) { + ::debug("run","--sshloginfile $actual_file changed. reload\n"); + $last_mtime{$actual_file} = $mtime; + # Reload $slf + # Empty sshlogins + @Global::sshlogin = (); + for (values %Global::host) { + # Don't start new jobs on any host + # except the ones added back later + $_->set_max_jobs_running(0); + } + # This will set max_jobs_running on the SSHlogins + read_sshloginfile($actual_file); + parse_sshlogin(); + $opt::filter_hosts and filter_hosts(); + setup_basefile(); + } + } + } + } + + sub start_more_jobs { + # Run start_another_job() but only if: + # * not $Global::start_no_new_jobs set + # * not JobQueue is empty + # * not load on server is too high + # * not server swapping + # * not too short time since last remote login + # Uses: + # %Global::host + # $Global::start_no_new_jobs + # $Global::JobQueue + # $opt::pipe + # $opt::load + # $opt::noswap + # $opt::delay + # $Global::newest_starttime + # Returns: + # $jobs_started = number of jobs started + my $jobs_started = 0; + my $jobs_started_this_round = 0; + if($Global::start_no_new_jobs) { + return $jobs_started; + } + if(time - ($last_time||0) > 1) { + # At most do this every second + $last_time = time; + changed_procs_file(); + changed_sshloginfile(); + } + do { + $jobs_started_this_round = 0; + # This will start 1 job on each --sshlogin (if possible) + # thus distribute the jobs on the --sshlogins round robin + for my $sshlogin (values %Global::host) { + if($Global::JobQueue->empty() and not $opt::pipe) { + # No more jobs in the queue + last; + } + debug("run", "Running jobs before on ", $sshlogin->string(), ": ", + $sshlogin->jobs_running(), "\n"); + if ($sshlogin->jobs_running() < $sshlogin->max_jobs_running()) { + if($opt::delay and $opt::delay > ::now() - $Global::newest_starttime) { + # It has been too short since last start + next; + } + if($opt::load and $sshlogin->loadavg_too_high()) { + # The load is too high or unknown + next; + } + if($opt::noswap and $sshlogin->swapping()) { + # The server is swapping + next; + } + if($opt::memfree and $sshlogin->memfree() < $opt::memfree) { + # The server has not enough mem free + ::debug("mem", "Not starting job: not enough mem\n"); + next; + } + if($sshlogin->too_fast_remote_login()) { + # It has been too short since + next; + } + debug("run", $sshlogin->string(), " has ", $sshlogin->jobs_running(), + " out of ", $sshlogin->max_jobs_running(), + " jobs running. Start another.\n"); + if(start_another_job($sshlogin) == 0) { + # No more jobs to start on this $sshlogin + debug("run","No jobs started on ", $sshlogin->string(), "\n"); + next; + } + $sshlogin->inc_jobs_running(); + $sshlogin->set_last_login_at(::now()); + $jobs_started++; + $jobs_started_this_round++; + } + debug("run","Running jobs after on ", $sshlogin->string(), ": ", + $sshlogin->jobs_running(), " of ", + $sshlogin->max_jobs_running(), "\n"); + } + } while($jobs_started_this_round); + + return $jobs_started; + } +} + +{ + my $no_more_file_handles_warned; + + sub start_another_job { + # If there are enough filehandles + # and JobQueue not empty + # and not $job is in joblog + # Then grab a job from Global::JobQueue, + # start it at sshlogin + # mark it as virgin_job + # Inputs: + # $sshlogin = the SSHLogin to start the job on + # Uses: + # $Global::JobQueue + # $opt::pipe + # $opt::results + # $opt::resume + # @Global::virgin_jobs + # Returns: + # 1 if another jobs was started + # 0 otherwise + my $sshlogin = shift; + # Do we have enough file handles to start another job? + if(enough_file_handles()) { + if($Global::JobQueue->empty() and not $opt::pipe) { + # No more commands to run + debug("start", "Not starting: JobQueue empty\n"); + return 0; + } else { + my $job; + # Skip jobs already in job log + # Skip jobs already in results + do { + $job = get_job_with_sshlogin($sshlogin); + if(not defined $job) { + # No command available for that sshlogin + debug("start", "Not starting: no jobs available for ", + $sshlogin->string(), "\n"); + return 0; + } + if($job->is_already_in_joblog()) { + $job->free_slot(); + } + } while ($job->is_already_in_joblog() + or + ($opt::results and $opt::resume and $job->is_already_in_results())); + debug("start", "Command to run on '", $job->sshlogin()->string(), "': '", + $job->replaced(),"'\n"); + if($job->start()) { + if($opt::pipe) { + if($job->virgin()) { + push(@Global::virgin_jobs,$job); + } else { + # Block already set: This is a retry + if(fork()) { + ::debug("pipe","\n\nWriting ",length ${$job->block_ref()}, + " to ", $job->seq(),"\n"); + close $job->fh(0,"w"); + } else { + $job->write($job->block_ref()); + close $job->fh(0,"w"); + exit(0); + } + } + } + debug("start", "Started as seq ", $job->seq(), + " pid:", $job->pid(), "\n"); + return 1; + } else { + # Not enough processes to run the job. + # Put it back on the queue. + $Global::JobQueue->unget($job); + # Count down the number of jobs to run for this SSHLogin. + my $max = $sshlogin->max_jobs_running(); + if($max > 1) { $max--; } else { + my @arg; + for my $record (@{$job->{'commandline'}->{'arg_list'}}) { + push @arg, map { $_->orig() } @$record; + } + ::error("No more processes: cannot run a single job. Something is wrong at @arg."); + ::wait_and_exit(255); + } + $sshlogin->set_max_jobs_running($max); + # Sleep up to 300 ms to give other processes time to die + ::usleep(rand()*300); + ::warning("No more processes: ". + "Decreasing number of running jobs to $max.", + "Raising ulimit -u or /etc/security/limits.conf may help."); + return 0; + } + } + } else { + # No more file handles + $no_more_file_handles_warned++ or + ::warning("No more file handles. ", + "Raising ulimit -n or /etc/security/limits.conf may help."); + return 0; + } + } +} + +sub init_progress { + # Uses: + # $opt::bar + # Returns: + # list of computers for progress output + $|=1; + if($opt::bar) { + return("",""); + } + my %progress = progress(); + return ("\nComputers / CPU cores / Max jobs to run\n", + $progress{'workerlist'}); +} + +sub drain_job_queue { + # Uses: + # $opt::progress + # $Global::total_running + # $Global::max_jobs_running + # %Global::running + # $Global::JobQueue + # %Global::host + # $Global::start_no_new_jobs + # Returns: N/A + if($opt::progress) { + ::status_no_nl(init_progress()); + } + my $last_header = ""; + my $sleep = 0.2; + do { + while($Global::total_running > 0) { + debug($Global::total_running, "==", scalar + keys %Global::running," slots: ", $Global::max_jobs_running); + if($opt::pipe) { + # When using --pipe sometimes file handles are not closed properly + for my $job (values %Global::running) { + close $job->fh(0,"w"); + } + } + if($opt::progress) { + my %progress = progress(); + if($last_header ne $progress{'header'}) { + ::status("", $progress{'header'}); + $last_header = $progress{'header'}; + } + ::status_no_nl("\r",$progress{'status'}); + } + if($Global::total_running < $Global::max_jobs_running + and not $Global::JobQueue->empty()) { + # These jobs may not be started because of loadavg + # or too little time between each ssh login. + if(start_more_jobs() > 0) { + # Exponential back-on if jobs were started + $sleep = $sleep/2+0.001; + } + } + # Exponential back-off sleeping + $sleep = ::reap_usleep($sleep); + } + if(not $Global::JobQueue->empty()) { + # These jobs may not be started: + # * because there the --filter-hosts has removed all + if(not %Global::host) { + ::error("There are no hosts left to run on."); + ::wait_and_exit(255); + } + # * because of loadavg + # * because of too little time between each ssh login. + start_more_jobs(); + $sleep = ::reap_usleep($sleep); + if($Global::max_jobs_running == 0) { + ::warning("There are no job slots available. Increase --jobs."); + } + } + while($opt::sqlmaster and not $Global::sql->finished()) { + # SQL master + $sleep = ::reap_usleep($sleep); + if($Global::start_sqlworker) { + # Start an SQL worker as we are now sure there is work to do + $Global::start_sqlworker = 0; + if(fork()) { + # skip + } else { + # Replace --sql/--sqlandworker with --sqlworker + my @ARGV = map { s/^--sql(andworker)?$/--sqlworker/; $_ } @Global::options_in_argv; + # exec the --sqlworker + exec($0,::shell_quote(@ARGV),@command); + } + } + } + } while ($Global::total_running > 0 + or + not $Global::start_no_new_jobs and not $Global::JobQueue->empty() + or + $opt::sqlmaster and not $Global::sql->finished()); + if($opt::progress) { + my %progress = progress(); + ::status("\r".$progress{'status'}); + } +} + +sub toggle_progress { + # Turn on/off progress view + # Uses: + # $opt::progress + # Returns: N/A + $opt::progress = not $opt::progress; + if($opt::progress) { + ::status_no_nl(init_progress()); + } +} + +sub progress { + # Uses: + # $opt::bar + # $opt::eta + # %Global::host + # $Global::total_started + # Returns: + # $workerlist = list of workers + # $header = that will fit on the screen + # $status = message that will fit on the screen + if($opt::bar) { + return ("workerlist" => "", "header" => "", "status" => bar()); + } + my $eta = ""; + my ($status,$header)=("",""); + if($opt::eta) { + my($total, $completed, $left, $pctcomplete, $avgtime, $this_eta) = + compute_eta(); + $eta = sprintf("ETA: %ds Left: %d AVG: %.2fs ", + $this_eta, $left, $avgtime); + } + my $termcols = terminal_columns(); + my @workers = sort keys %Global::host; + my %sshlogin = map { $_ eq ":" ? ($_=>"local") : ($_=>$_) } @workers; + my $workerno = 1; + my %workerno = map { ($_=>$workerno++) } @workers; + my $workerlist = ""; + for my $w (@workers) { + $workerlist .= + $workerno{$w}.":".$sshlogin{$w} ." / ". + ($Global::host{$w}->ncpus() || "-")." / ". + $Global::host{$w}->max_jobs_running()."\n"; + } + $status = "x"x($termcols+1); + # Select an output format that will fit on a single line + if(length $status > $termcols) { + # sshlogin1:XX/XX/XX%/XX.Xs sshlogin2:XX/XX/XX%/XX.Xs sshlogin3:XX/XX/XX%/XX.Xs + $header = "Computer:jobs running/jobs completed/%of started jobs/Average seconds to complete"; + $status = $eta . + join(" ",map + { + if($Global::total_started) { + my $completed = ($Global::host{$_}->jobs_completed()||0); + my $running = $Global::host{$_}->jobs_running(); + my $time = $completed ? (time-$^T)/($completed) : "0"; + sprintf("%s:%d/%d/%d%%/%.1fs ", + $sshlogin{$_}, $running, $completed, + ($running+$completed)*100 + / $Global::total_started, $time); + } + } @workers); + } + if(length $status > $termcols) { + # 1:XX/XX/XX%/XX.Xs 2:XX/XX/XX%/XX.Xs 3:XX/XX/XX%/XX.Xs 4:XX/XX/XX%/XX.Xs + $header = "Computer:jobs running/jobs completed/%of started jobs"; + $status = $eta . + join(" ",map + { + my $completed = ($Global::host{$_}->jobs_completed()||0); + my $running = $Global::host{$_}->jobs_running(); + my $time = $completed ? (time-$^T)/($completed) : "0"; + sprintf("%s:%d/%d/%d%%/%.1fs ", + $workerno{$_}, $running, $completed, + ($running+$completed)*100 + / $Global::total_started, $time); + } @workers); + } + if(length $status > $termcols) { + # sshlogin1:XX/XX/XX% sshlogin2:XX/XX/XX% sshlogin3:XX/XX/XX% + $header = "Computer:jobs running/jobs completed/%of started jobs"; + $status = $eta . + join(" ",map + { sprintf("%s:%d/%d/%d%%", + $sshlogin{$_}, + $Global::host{$_}->jobs_running(), + ($Global::host{$_}->jobs_completed()||0), + ($Global::host{$_}->jobs_running()+ + ($Global::host{$_}->jobs_completed()||0))*100 + / $Global::total_started) } + @workers); + } + if(length $status > $termcols) { + # 1:XX/XX/XX% 2:XX/XX/XX% 3:XX/XX/XX% 4:XX/XX/XX% 5:XX/XX/XX% 6:XX/XX/XX% + $header = "Computer:jobs running/jobs completed/%of started jobs"; + $status = $eta . + join(" ",map + { sprintf("%s:%d/%d/%d%%", + $workerno{$_}, + $Global::host{$_}->jobs_running(), + ($Global::host{$_}->jobs_completed()||0), + ($Global::host{$_}->jobs_running()+ + ($Global::host{$_}->jobs_completed()||0))*100 + / $Global::total_started) } + @workers); + } + if(length $status > $termcols) { + # sshlogin1:XX/XX/XX% sshlogin2:XX/XX/XX% sshlogin3:XX/XX sshlogin4:XX/XX + $header = "Computer:jobs running/jobs completed"; + $status = $eta . + join(" ",map + { sprintf("%s:%d/%d", + $sshlogin{$_}, $Global::host{$_}->jobs_running(), + ($Global::host{$_}->jobs_completed()||0)) } + @workers); + } + if(length $status > $termcols) { + # sshlogin1:XX/XX sshlogin2:XX/XX sshlogin3:XX/XX sshlogin4:XX/XX + $header = "Computer:jobs running/jobs completed"; + $status = $eta . + join(" ",map + { sprintf("%s:%d/%d", + $sshlogin{$_}, $Global::host{$_}->jobs_running(), + ($Global::host{$_}->jobs_completed()||0)) } + @workers); + } + if(length $status > $termcols) { + # 1:XX/XX 2:XX/XX 3:XX/XX 4:XX/XX 5:XX/XX 6:XX/XX + $header = "Computer:jobs running/jobs completed"; + $status = $eta . + join(" ",map + { sprintf("%s:%d/%d", + $workerno{$_}, $Global::host{$_}->jobs_running(), + ($Global::host{$_}->jobs_completed()||0)) } + @workers); + } + if(length $status > $termcols) { + # sshlogin1:XX sshlogin2:XX sshlogin3:XX sshlogin4:XX sshlogin5:XX + $header = "Computer:jobs completed"; + $status = $eta . + join(" ",map + { sprintf("%s:%d", + $sshlogin{$_}, + ($Global::host{$_}->jobs_completed()||0)) } + @workers); + } + if(length $status > $termcols) { + # 1:XX 2:XX 3:XX 4:XX 5:XX 6:XX + $header = "Computer:jobs completed"; + $status = $eta . + join(" ",map + { sprintf("%s:%d", + $workerno{$_}, + ($Global::host{$_}->jobs_completed()||0)) } + @workers); + } + return ("workerlist" => $workerlist, "header" => $header, "status" => $status); +} + +{ + my ($total, $first_completed, $smoothed_avg_time); + + sub compute_eta { + # Calculate important numbers for ETA + # Returns: + # $total = number of jobs in total + # $completed = number of jobs completed + # $left = number of jobs left + # $pctcomplete = percent of jobs completed + # $avgtime = averaged time + # $eta = smoothed eta + $total ||= $Global::JobQueue->total_jobs(); + my $completed = $Global::total_completed; + my $left = $total - $completed; + if(not $completed) { + return($total, $completed, $left, 0, 0, 0); + } + my $pctcomplete = $completed / $total; + $first_completed ||= time; + my $timepassed = (time - $first_completed); + my $avgtime = $timepassed / $completed; + $smoothed_avg_time ||= $avgtime; + # Smooth the eta so it does not jump wildly + $smoothed_avg_time = (1 - $pctcomplete) * $smoothed_avg_time + + $pctcomplete * $avgtime; + my $eta = int($left * $smoothed_avg_time); + return($total, $completed, $left, $pctcomplete, $avgtime, $eta); + } +} + +{ + my ($rev,$reset); + + sub bar { + # Return: + # $status = bar with eta, completed jobs, arg and pct + $rev ||= "\033[7m"; + $reset ||= "\033[0m"; + my($total, $completed, $left, $pctcomplete, $avgtime, $eta) = + compute_eta(); + my $arg = $Global::newest_job ? + $Global::newest_job->{'commandline'}->replace_placeholders(["\257<\257>"],0,0) : ""; + # These chars mess up display in the terminal + $arg =~ tr/[\011-\016\033\302-\365]//d; + my $bar_text = + sprintf("%d%% %d:%d=%ds %s", + $pctcomplete*100, $completed, $left, $eta, $arg); + my $terminal_width = terminal_columns(); + my $s = sprintf("%-${terminal_width}s", + substr($bar_text." "x$terminal_width, + 0,$terminal_width)); + my $width = int($terminal_width * $pctcomplete); + substr($s,$width,0) = $reset; + my $zenity = sprintf("%-${terminal_width}s", + substr("# $eta sec $arg", + 0,$terminal_width)); + $s = "\r" . $zenity . "\r" . $pctcomplete*100 . # Prefix with zenity header + "\r" . $rev . $s . $reset; + return $s; + } +} + +{ + my ($columns,$last_column_time); + + sub terminal_columns { + # Get the number of columns of the terminal. + # Only update once per second. + # Returns: + # number of columns of the screen + if(not $columns or $last_column_time < time) { + $last_column_time = time; + $columns = $ENV{'COLUMNS'}; + if(not $columns) { + my $stty = ::qqx("stty -a get()) { + if($sshlogin->in_hostgroups($job->hostgroups())) { + # Found a job to be run on a hostgroup of this + # $sshlogin + last; + } else { + # This job was not in the hostgroups of $sshlogin + push @other_hostgroup_jobs, $job; + } + } + $Global::JobQueue->unget(@other_hostgroup_jobs); + if(not defined $job) { + # No more jobs + return undef; + } + } else { + $job = $Global::JobQueue->get(); + if(not defined $job) { + # No more jobs + ::debug("start", "No more jobs: JobQueue empty\n"); + return undef; + } + } + + my $clean_command = $job->replaced(); + if($clean_command =~ /^\s*$/) { + # Do not run empty lines + if(not $Global::JobQueue->empty()) { + return get_job_with_sshlogin($sshlogin); + } else { + return undef; + } + } + $job->set_sshlogin($sshlogin); + if($opt::retries and $clean_command and + $job->failed_here()) { + # This command with these args failed for this sshlogin + my ($no_of_failed_sshlogins,$min_failures) = $job->min_failed(); + # Only look at the Global::host that have > 0 jobslots + if($no_of_failed_sshlogins == + grep { $_->max_jobs_running() > 0 } values %Global::host + and $job->failed_here() == $min_failures) { + # It failed the same or more times on another host: + # run it on this host + } else { + # If it failed fewer times on another host: + # Find another job to run + my $nextjob; + if(not $Global::JobQueue->empty()) { + # This can potentially recurse for all args + no warnings 'recursion'; + $nextjob = get_job_with_sshlogin($sshlogin); + } + # Push the command back on the queue + $Global::JobQueue->unget($job); + return $nextjob; + } + } + return $job; +} + + +sub __REMOTE_SSH__ {} + + +sub read_sshloginfiles { + # Read a list of --slf's + # Input: + # @files = files or symbolic file names to read + # Returns: N/A + for my $s (@_) { + read_sshloginfile(expand_slf_shorthand($s)); + } +} + +sub expand_slf_shorthand { + # Expand --slf shorthand into a read file name + # Input: + # $file = file or symbolic file name to read + # Returns: + # $file = actual file name to read + my $file = shift; + if($file eq "-") { + # skip: It is stdin + } elsif($file eq "..") { + $file = $ENV{'HOME'}."/.parallel/sshloginfile"; + } elsif($file eq ".") { + $file = "/etc/parallel/sshloginfile"; + } elsif(not -r $file) { + if(not -r $ENV{'HOME'}."/.parallel/".$file) { + # Try prepending ~/.parallel + ::error("Cannot open $file."); + ::wait_and_exit(255); + } else { + $file = $ENV{'HOME'}."/.parallel/".$file; + } + } + return $file; +} + +sub read_sshloginfile { + # Read sshloginfile into @Global::sshlogin + # Input: + # $file = file to read + # Uses: + # @Global::sshlogin + # Returns: N/A + my $file = shift; + my $close = 1; + my $in_fh; + ::debug("init","--slf ",$file); + if($file eq "-") { + $in_fh = *STDIN; + $close = 0; + } else { + if(not open($in_fh, "<", $file)) { + # Try the filename + ::error("Cannot open $file."); + ::wait_and_exit(255); + } + } + while(<$in_fh>) { + chomp; + /^\s*#/ and next; + /^\s*$/ and next; + push @Global::sshlogin, $_; + } + if($close) { + close $in_fh; + } +} + +sub parse_sshlogin { + # Parse @Global::sshlogin into %Global::host. + # Keep only hosts that are in one of the given ssh hostgroups. + # Uses: + # @Global::sshlogin + # $Global::minimal_command_line_length + # %Global::host + # $opt::transfer + # @opt::return + # $opt::cleanup + # @opt::basefile + # @opt::trc + # Returns: N/A + my @login; + if(not @Global::sshlogin) { @Global::sshlogin = (":"); } + for my $sshlogin (@Global::sshlogin) { + # Split up -S sshlogin,sshlogin + for my $s (split /,|\n/, $sshlogin) { + if ($s eq ".." or $s eq "-") { + # This may add to @Global::sshlogin - possibly bug + read_sshloginfile(expand_slf_shorthand($s)); + } else { + $s =~ s/\s*$//; + push (@login, $s); + } + } + } + $Global::minimal_command_line_length = 8_000_000; + my @allowed_hostgroups; + for my $ncpu_sshlogin_string (::uniq(@login)) { + my $sshlogin = SSHLogin->new($ncpu_sshlogin_string); + my $sshlogin_string = $sshlogin->string(); + if($sshlogin_string eq "") { + # This is an ssh group: -S @webservers + push @allowed_hostgroups, $sshlogin->hostgroups(); + next; + } + if($Global::host{$sshlogin_string}) { + # This sshlogin has already been added: + # It is probably a host that has come back + # Set the max_jobs_running back to the original + debug("run","Already seen $sshlogin_string\n"); + if($sshlogin->{'ncpus'}) { + # If ncpus set by '#/' of the sshlogin, overwrite it: + $Global::host{$sshlogin_string}->set_ncpus($sshlogin->ncpus()); + } + $Global::host{$sshlogin_string}->set_max_jobs_running(undef); + next; + } + if($sshlogin_string eq ":") { + $sshlogin->set_maxlength(Limits::Command::max_length()); + } else { + # If all chars needs to be quoted, every other character will be \ + $sshlogin->set_maxlength(int(Limits::Command::max_length()/2)); + } + $Global::minimal_command_line_length = + ::min($Global::minimal_command_line_length, $sshlogin->maxlength()); + $Global::host{$sshlogin_string} = $sshlogin; + } + if(@allowed_hostgroups) { + # Remove hosts that are not in these groups + while (my ($string, $sshlogin) = each %Global::host) { + if(not $sshlogin->in_hostgroups(@allowed_hostgroups)) { + delete $Global::host{$string}; + } + } + } + + # debug("start", "sshlogin: ", my_dump(%Global::host),"\n"); + if(@Global::transfer_files or @opt::return or $opt::cleanup or @opt::basefile) { + if(not remote_hosts()) { + # There are no remote hosts + if(@opt::trc) { + ::warning("--trc ignored as there are no remote --sshlogin."); + } elsif (defined $opt::transfer) { + ::warning("--transfer ignored as there are no remote --sshlogin."); + } elsif (@opt::transfer_files) { + ::warning("--transferfile ignored as there are no remote --sshlogin."); + } elsif (@opt::return) { + ::warning("--return ignored as there are no remote --sshlogin."); + } elsif (defined $opt::cleanup) { + ::warning("--cleanup ignored as there are no remote --sshlogin."); + } elsif (@opt::basefile) { + ::warning("--basefile ignored as there are no remote --sshlogin."); + } + } + } +} + +sub remote_hosts { + # Return sshlogins that are not ':' + # Uses: + # %Global::host + # Returns: + # list of sshlogins with ':' removed + return grep !/^:$/, keys %Global::host; +} + +sub setup_basefile { + # Transfer basefiles to each $sshlogin + # This needs to be done before first jobs on $sshlogin is run + # Uses: + # %Global::host + # @opt::basefile + # Returns: N/A + my @cmd; + my $rsync_destdir; + my $workdir; + for my $sshlogin (values %Global::host) { + if($sshlogin->string() eq ":") { next } + for my $file (@opt::basefile) { + if($file !~ m:^/: and $opt::workdir eq "...") { + ::error("Work dir '...' will not work with relative basefiles."); + ::wait_and_exit(255); + } + if(not $workdir) { + my $dummycmdline = CommandLine->new(1,["true"],{},0,0,[],[],{},{},{}); + my $dummyjob = Job->new($dummycmdline); + $workdir = $dummyjob->workdir(); + } + push @cmd, $sshlogin->rsync_transfer_cmd($file,$workdir); + } + } + debug("init", "basesetup: @cmd\n"); + my ($exitstatus,$stdout_ref,$stderr_ref) = + run_parallel((join "\n",@cmd),"-j0","--retries",5); + if($exitstatus) { + my @stdout = @$stdout_ref; + my @stderr = @$stderr_ref; + ::error("Copying of --basefile failed: @stdout@stderr"); + ::wait_and_exit(255); + } +} + +sub cleanup_basefile { + # Remove the basefiles transferred + # Uses: + # %Global::host + # @opt::basefile + # Returns: N/A + my @cmd; + my $workdir; + if(not $workdir) { + my $dummycmdline = CommandLine->new(1,"true",0,0,0,0,0,{},{},{}); + my $dummyjob = Job->new($dummycmdline); + $workdir = $dummyjob->workdir(); + } + for my $sshlogin (values %Global::host) { + if($sshlogin->string() eq ":") { next } + for my $file (@opt::basefile) { + push @cmd, $sshlogin->cleanup_cmd($file,$workdir); + } + } + debug("init", "basecleanup: @cmd\n"); + my ($exitstatus,$stdout_ref,$stderr_ref) = + run_parallel(join("\n",@cmd),"-j0","--retries",5); + if($exitstatus) { + my @stdout = @$stdout_ref; + my @stderr = @$stderr_ref; + ::error("Cleanup of --basefile failed: @stdout@stderr"); + ::wait_and_exit(255); + } +} + +sub run_parallel { + my ($stdin,@args) = @_; + my $cmd = join "",map { " $_ & " } split /\n/, $stdin; + print $Global::original_stderr ` $cmd wait` ; + return 0 +} + +sub _run_parallel { + # Run GNU Parallel + # This should ideally just fork an internal copy + # and not start it through a shell + # Input: + # $stdin = data to provide on stdin for GNU Parallel + # @args = command line arguments + # Returns: + # $exitstatus = exitcode of GNU Parallel run + # \@stdout = standard output + # \@stderr = standard error + my ($stdin,@args) = @_; + my ($exitstatus,@stdout,@stderr); + my ($stdin_fh,$stdout_fh)=(gensym(),gensym()); + my ($stderr_fh, $stderrname) = ::tmpfile(SUFFIX => ".par"); + unlink $stderrname; + + my $pid = ::open3($stdin_fh,$stdout_fh,$stderr_fh, + $0,qw(--plain --shell /bin/sh --will-cite), @args); + if(my $writerpid = fork()) { + close $stdin_fh; + @stdout = <$stdout_fh>; + # Now stdout is closed: + # These pids should be dead or die very soon + while(kill 0, $writerpid) { ::usleep(1); } + die; +# reap $writerpid; +# while(kill 0, $pid) { ::usleep(1); } +# reap $writerpid; + $exitstatus = $?; + seek $stderr_fh, 0, 0; + @stderr = <$stderr_fh>; + close $stdout_fh; + close $stderr_fh; + } else { + close $stdout_fh; + close $stderr_fh; + print $stdin_fh $stdin; + close $stdin_fh; + exit(0); + } + return ($exitstatus,\@stdout,\@stderr); +} + +sub filter_hosts { + # Remove down --sshlogins from active duty. + # Find ncpus, ncores, maxlen, time-to-login for each host. + # Uses: + # %Global::host + # $Global::minimal_command_line_length + # $opt::use_cpus_instead_of_cores + # Returns: N/A + + my ($ncores_ref, $ncpus_ref, $time_to_login_ref, $maxlen_ref, + $echo_ref, $down_hosts_ref) = + parse_host_filtering(parallelized_host_filtering()); + + delete @Global::host{@$down_hosts_ref}; + @$down_hosts_ref and ::warning("Removed @$down_hosts_ref."); + + $Global::minimal_command_line_length = 8_000_000; + while (my ($sshlogin, $obj) = each %Global::host) { + if($sshlogin eq ":") { next } + $ncpus_ref->{$sshlogin} or + ::die_bug("ncpus missing: ".$obj->serverlogin()); + $ncores_ref->{$sshlogin} or + ::die_bug("ncores missing: ".$obj->serverlogin()); + $time_to_login_ref->{$sshlogin} or + ::die_bug("time_to_login missing: ".$obj->serverlogin()); + $maxlen_ref->{$sshlogin} or + ::die_bug("maxlen missing: ".$obj->serverlogin()); + if($opt::use_cpus_instead_of_cores) { + $obj->set_ncpus($ncpus_ref->{$sshlogin}); + } else { + $obj->set_ncpus($ncores_ref->{$sshlogin}); + } + $obj->set_time_to_login($time_to_login_ref->{$sshlogin}); + $obj->set_maxlength($maxlen_ref->{$sshlogin}); + $Global::minimal_command_line_length = + ::min($Global::minimal_command_line_length, + int($maxlen_ref->{$sshlogin}/2)); + ::debug("init", "Timing from -S:$sshlogin ", + " ncpus:",$ncpus_ref->{$sshlogin}, + " ncores:", $ncores_ref->{$sshlogin}, + " time_to_login:", $time_to_login_ref->{$sshlogin}, + " maxlen:", $maxlen_ref->{$sshlogin}, + " min_max_len:", $Global::minimal_command_line_length,"\n"); + } +} + +sub parse_host_filtering { + # Input: + # @lines = output from parallelized_host_filtering() + # Returns: + # \%ncores = number of cores of {host} + # \%ncpus = number of cpus of {host} + # \%time_to_login = time_to_login on {host} + # \%maxlen = max command len on {host} + # \%echo = echo received from {host} + # \@down_hosts = list of hosts with no answer + my (%ncores, %ncpus, %time_to_login, %maxlen, %echo, @down_hosts); + for (@_) { + ::debug("init",$_); + chomp; + my @col = split /\t/, $_; + if(defined $col[6]) { + # This is a line from --joblog + # seq host time spent sent received exit signal command + # 2 : 1372607672.654 0.675 0 0 0 0 eval true\ m\;ssh\ m\ parallel\ --number-of-cores + if($col[0] eq "Seq" and $col[1] eq "Host" and + $col[2] eq "Starttime") { + # Header => skip + next; + } + # Get server from: eval true server\; + $col[8] =~ /eval true..([^;]+).;/ or + ::die_bug("col8 does not contain host: $col[8]"); + my $host = $1; + $host =~ tr/\\//d; + $Global::host{$host} or next; + if($col[6] eq "255" or $col[6] eq "-1" or $col[6] eq "1") { + # exit == 255 or exit == timeout (-1): ssh failed/timedout + # exit == 1: lsh failed + # Remove sshlogin + ::debug("init", "--filtered $host\n"); + push(@down_hosts, $host); + } elsif($col[6] eq "127") { + # signal == 127: parallel not installed remote + # Set ncpus and ncores = 1 + ::warning("Could not figure out ". + "number of cpus on $host. Using 1."); + $ncores{$host} = 1; + $ncpus{$host} = 1; + $maxlen{$host} = Limits::Command::max_length(); + } elsif($col[0] =~ /^\d+$/ and $Global::host{$host}) { + # Remember how log it took to log in + # 2 : 1372607672.654 0.675 0 0 0 0 eval true\ m\;ssh\ m\ echo + $time_to_login{$host} = ::min($time_to_login{$host},$col[3]); + } else { + ::die_bug("host check unmatched long jobline: $_"); + } + } elsif($Global::host{$col[0]}) { + # This output from --number-of-cores, --number-of-cpus, + # --max-line-length-allowed + # ncores: server 8 + # ncpus: server 2 + # maxlen: server 131071 + if(/parallel: Warning: Cannot figure out number of/) { + next; + } + if(not $ncores{$col[0]}) { + $ncores{$col[0]} = $col[1]; + } elsif(not $ncpus{$col[0]}) { + $ncpus{$col[0]} = $col[1]; + } elsif(not $maxlen{$col[0]}) { + $maxlen{$col[0]} = $col[1]; + } elsif(not $echo{$col[0]}) { + $echo{$col[0]} = $col[1]; + } elsif(m/perl: warning:|LANGUAGE =|LC_ALL =|LANG =|are supported and installed/) { + # Skip these: + # perl: warning: Setting locale failed. + # perl: warning: Please check that your locale settings: + # LANGUAGE = (unset), + # LC_ALL = (unset), + # LANG = "en_US.UTF-8" + # are supported and installed on your system. + # perl: warning: Falling back to the standard locale ("C"). + } else { + ::die_bug("host check too many col0: $_"); + } + } else { + ::die_bug("host check unmatched short jobline ($col[0]): $_"); + } + } + @down_hosts = uniq(@down_hosts); + return(\%ncores, \%ncpus, \%time_to_login, \%maxlen, \%echo, \@down_hosts); +} + +sub parallelized_host_filtering { + # Uses: + # %Global::host + # Returns: + # text entries with: + # * joblog line + # * hostname \t number of cores + # * hostname \t number of cpus + # * hostname \t max-line-length-allowed + # * hostname \t empty + + sub sshwrapped { + # Wrap with ssh and --env + my $sshlogin = shift; + my $command = shift; + my $commandline = CommandLine->new(1,[$command],{},0,0,[],[],{},{},{}); + my $job = Job->new($commandline); + $job->set_sshlogin($sshlogin); + $job->wrapped(); + return($job->{'wrapped'}); + } + + my(@cores, @cpus, @maxline, @echo); + while (my ($host, $sshlogin) = each %Global::host) { + if($host eq ":") { next } + # The 'true' is used to get the $host out later + push(@cores, $host."\t"."true $host; ". + sshwrapped($sshlogin,"parallel --number-of-cores")."\n\0"); + push(@cpus, $host."\t"."true $host; ". + sshwrapped($sshlogin,"parallel --number-of-cpus")."\n\0"); + push(@maxline, $host."\t"."true $host; ". + sshwrapped($sshlogin,"parallel --max-line-length-allowed")."\n\0"); + # 'echo' is used to get the fastest possible ssh login time + my $sshcmd = "true $host; exec " .$sshlogin->sshcommand()." ". + $sshlogin->serverlogin(); + push(@echo, $host."\t".$sshcmd." -- echo\n\0"); + } + + # --timeout 10: Setting up an SSH connection and running a simple + # command should never take > 10 sec. + # --delay 0.1: If multiple sshlogins use the same proxy the delay + # will make it less likely to overload the ssh daemon. + # --retries 3: If the ssh daemon is overloaded, try 3 times + my $cmd = + "$0 -j0 --timeout 10 --joblog - --plain --delay 0.1 --retries 3 ". + "--tag --tagstring '{1}' -0 --colsep '\t' -k eval '{2}' && true "; + $cmd = $Global::shell." -c ".::shell_quote_scalar($cmd); + ::debug("init", $cmd, "\n"); + my @out; + my $prepend = ""; + + my ($host_fh,$in,$err); + open3($in, $host_fh, $err, $cmd) || ::die_bug("parallel host check: $cmd"); + if(not fork()) { + # Give the commands to run to the $cmd + close $host_fh; + print $in @cores, @cpus, @maxline, @echo; + close $in; + exit(); + } + close $in; + for(<$host_fh>) { + if(/\'$/) { + # if last char = ' then append next line + # This may be due to quoting of \n in environment var + $prepend .= $_; + next; + } + $_ = $prepend . $_; + $prepend = ""; + push @out, $_; + } + close $host_fh; + return @out; +} + +sub onall { + # Runs @command on all hosts. + # Uses parallel to run @command on each host. + # --jobs = number of hosts to run on simultaneously. + # For each host a parallel command with the args will be running. + # Uses: + # $Global::quoting + # @opt::basefile + # $opt::jobs + # $opt::linebuffer + # $opt::ungroup + # $opt::group + # $opt::keeporder + # $opt::D + # $opt::plain + # $opt::max_chars + # $opt::linebuffer + # $opt::files + # $opt::colsep + # $opt::timeout + # $opt::plain + # $opt::retries + # $opt::max_chars + # $opt::arg_sep + # $opt::arg_file_sep + # @opt::v + # @opt::env + # %Global::host + # $Global::exitstatus + # $Global::debug + # $Global::joblog + # $opt::tag + # $opt::joblog + # Input: + # @command = command to run on all hosts + # Returns: N/A + sub tmp_joblog { + # Input: + # $joblog = filename of joblog - undef if none + # Returns: + # $tmpfile = temp file for joblog - undef if none + my $joblog = shift; + if(not defined $joblog) { + return undef; + } + my ($fh, $tmpfile) = ::tmpfile(SUFFIX => ".log"); + close $fh; + return $tmpfile; + } + my ($input_source_fh_ref,@command) = @_; + if($Global::quoting) { + @command = shell_quote(@command); + } + + # Copy all @input_source_fh (-a and :::) into tempfiles + my @argfiles = (); + for my $fh (@$input_source_fh_ref) { + my ($outfh, $name) = ::tmpfile(SUFFIX => ".all", UNLINK => not $opt::D); + print $outfh (<$fh>); + close $outfh; + push @argfiles, $name; + } + if(@opt::basefile) { setup_basefile(); } + # for each sshlogin do: + # parallel -S $sshlogin $command :::: @argfiles + # + # Pass some of the options to the sub-parallels, not all of them as + # -P should only go to the first, and -S should not be copied at all. + my $options = + join(" ", + ((defined $opt::D) ? "-D $opt::D" : ""), + ((defined $opt::group) ? "-g" : ""), + ((defined $opt::jobs) ? "-P $opt::jobs" : ""), + ((defined $opt::keeporder) ? "--keeporder" : ""), + ((defined $opt::linebuffer) ? "--linebuffer" : ""), + ((defined $opt::max_chars) ? "--max-chars ".$opt::max_chars : ""), + ((defined $opt::plain) ? "--plain" : ""), + ((defined $opt::ungroup) ? "-u" : ""), + ); + my $suboptions = + join(" ", + ((defined $opt::D) ? "-D $opt::D" : ""), + ((defined $opt::arg_file_sep) ? "--arg-file-sep ".$opt::arg_file_sep : ""), + ((defined $opt::arg_sep) ? "--arg-sep ".$opt::arg_sep : ""), + ((defined $opt::colsep) ? "--colsep ".shell_quote($opt::colsep) : ""), + ((defined $opt::files) ? "--files" : ""), + ((defined $opt::group) ? "-g" : ""), + ((defined $opt::cleanup) ? "--cleanup" : ""), + ((defined $opt::keeporder) ? "--keeporder" : ""), + ((defined $opt::linebuffer) ? "--linebuffer" : ""), + ((defined $opt::max_chars) ? "--max-chars ".$opt::max_chars : ""), + ((defined $opt::plain) ? "--plain" : ""), + ((defined $opt::retries) ? "--retries ".$opt::retries : ""), + ((defined $opt::timeout) ? "--timeout ".$opt::timeout : ""), + ((defined $opt::ungroup) ? "-u" : ""), + ((defined $opt::workdir) ? "--wd ".$opt::workdir : ""), + (@Global::transfer_files ? map { "--tf ".::shell_quote_scalar($_) } + @Global::transfer_files : ""), + (@Global::ret_files ? map { "--return ".::shell_quote_scalar($_) } + @Global::ret_files : ""), + (@opt::env ? map { "--env ".::shell_quote_scalar($_) } @opt::env : ""), + (@opt::v ? "-vv" : ""), + ); + ::debug("init", "| $0 $options\n"); + open(my $parallel_fh, "|-", "$0 --will-cite -j0 $options") || + ::die_bug("This does not run GNU Parallel: $0 $options"); + my @joblogs; + for my $host (sort keys %Global::host) { + my $sshlogin = $Global::host{$host}; + my $joblog = tmp_joblog($opt::joblog); + if($joblog) { + push @joblogs, $joblog; + $joblog = "--joblog $joblog"; + } + my $quad = $opt::arg_file_sep || "::::"; + ::debug("init", "$0 $suboptions -j1 $joblog ", + ((defined $opt::tag) ? + "--tagstring ".shell_quote_scalar($sshlogin->string()) : ""), + " -S ", shell_quote_scalar($sshlogin->string())," ", + join(" ",shell_quote(@command))," $quad @argfiles\n"); + print $parallel_fh "$0 $suboptions -j1 $joblog ", + ((defined $opt::tag) ? + "--tagstring ".shell_quote_scalar($sshlogin->string()) : ""), + " -S ", shell_quote_scalar($sshlogin->string())," ", + join(" ",shell_quote(@command))," $quad @argfiles\n"; + } + close $parallel_fh; + $Global::exitstatus = $? >> 8; + debug("init", "--onall exitvalue ", $?); + if(@opt::basefile and $opt::cleanup) { cleanup_basefile(); } + $Global::debug or unlink(@argfiles); + my %seen; + for my $joblog (@joblogs) { + # Append to $joblog + open(my $fh, "<", $joblog) || ::die_bug("Cannot open tmp joblog $joblog"); + # Skip first line (header); + <$fh>; + print $Global::joblog (<$fh>); + close $fh; + unlink($joblog); + } +} + + +sub __SIGNAL_HANDLING__ {} + + +sub tstp { + # Send TSTP signal (Ctrl-Z) to all children process groups + # Uses: + # %SIG + # Returns: N/A + kill "TSTP", map { -$_ } keys %Global::running; + # Use default signal handler to suspend GNU Parallel self + $SIG{TSTP} = undef; + kill "TSTP", $$; +} + +sub save_original_signal_handler { + # Remember the original signal handler + # Uses: + # %Global::original_sig + # Returns: N/A + $SIG{INT} = sub { + if($opt::tmux) { ::qqx("tmux kill-session -t p$$"); } + wait_and_exit(255); + }; + $SIG{TERM} = sub { + if($opt::tmux) { ::qqx("tmux kill-session -t p$$"); } + wait_and_exit(255); + }; + %Global::original_sig = %SIG; + $SIG{TERM} = sub {}; # Dummy until jobs really start + $SIG{ALRM} = 'IGNORE'; + # Allow Ctrl-Z to suspend and `fg` to continue + $SIG{TSTP} = \&tstp; + $SIG{CONT} = sub { + # Set $SIG{TSTP} again (it is undef'ed in tstp() ) + $SIG{TSTP} = \&tstp; + # Send continue signal to all children process groups + kill "CONT", map { -$_ } keys %Global::running; + }; +} + +sub list_running_jobs { + # Print running jobs on tty + # Uses: + # %Global::running + # Returns: N/A + for my $job (values %Global::running) { + ::status("$Global::progname: ".$job->replaced()); + } +} + +sub start_no_new_jobs { + # Start no more jobs + # Uses: + # %Global::original_sig + # %Global::unlink + # $Global::start_no_new_jobs + # Returns: N/A + $SIG{TERM} = $Global::original_sig{TERM}; + unlink keys %Global::unlink; + ::status + ("$Global::progname: SIGTERM received. No new jobs will be started.", + "$Global::progname: Waiting for these ".(keys %Global::running). + " jobs to finish. Send SIGTERM again to stop now."); + list_running_jobs(); + $Global::start_no_new_jobs ||= 1; +} + +sub reaper { + # A job finished. + # Print the output. + # Start another job + # Uses: + # %Global::sshmaster + # %Global::running + # $Global::tty_taken + # $opt::timeout + # $Global::timeoutq + # $opt::halt + # $opt::keeporder + # $Global::total_running + # Returns: + # @pids_reaped = PIDs of children finished + my $stiff; + my @pids_reaped; + my $children_reaped = 0; + debug("run", "Reaper "); + # For efficiency surround with BEGIN/COMMIT when using $opt::sqlmaster + $opt::sqlmaster and $Global::sql->run("BEGIN;"); + while (($stiff = waitpid(-1, &WNOHANG)) > 0) { + # $stiff = pid of dead process + if(wantarray) { + push(@pids_reaped,$stiff); + } else { + $children_reaped++; + } + if($Global::sshmaster{$stiff}) { + # This is one of the ssh -M: ignore + next; + } + my $job = $Global::running{$stiff}; + + # '-a <(seq 10)' will give us a pid not in %Global::running + $job or next; + delete $Global::running{$stiff}; + $Global::total_running--; + if($job->{'commandline'}{'skip'}) { + # $job->skip() was called + $job->set_exitstatus(-2); + $job->set_exitsignal(0); + } else { + $job->set_exitstatus($? >> 8); + $job->set_exitsignal($? & 127); + } + + debug("run", "seq ",$job->seq()," died (", $job->exitstatus(), ")"); + $job->set_endtime(::now()); + if($stiff == $Global::tty_taken) { + # The process that died had the tty => release it + $Global::tty_taken = 0; + } + my $sshlogin = $job->sshlogin(); + $sshlogin->dec_jobs_running(); + if(not $job->should_be_retried()) { + # The job is done + $sshlogin->inc_jobs_completed(); + # Free the jobslot + $job->free_slot(); + if($opt::timeout and not $job->exitstatus()) { + # Update average runtime for timeout + $Global::timeoutq->update_median_runtime($job->runtime()); + } + if($opt::keeporder) { + $job->print_earlier_jobs(); + } else { + $job->print(); + } + if($job->should_we_halt() eq "now") { + # Kill children + ::kill_sleep_seq($job->pid()); + ::killall(); + ::wait_and_exit($Global::halt_exitstatus); + } + } + $job->cleanup(); + start_more_jobs(); + if($opt::progress) { + my %progress = progress(); + ::status_no_nl("\r",$progress{'status'}); + } + } + $opt::sqlmaster and $Global::sql->run("COMMIT;"); + debug("run", "done "); + return wantarray ? @pids_reaped : $children_reaped; +} + + +sub __USAGE__ {} + + +sub killall { + # Kill all jobs by killing their process groups + + $Global::start_no_new_jobs ||= 1; + $Global::killall ||= 1; + kill_sleep_seq(keys %Global::running); +} + +sub kill_sleep_seq { + # Send jobs TERM,TERM,KILL to processgroups + # Input: + # @pids = list of pids that are also processgroups + # Convert pids to process groups ($processgroup = -$pid) + my @pgrps = map { -$_ } @_; + my @term_seq = split/,/,$opt::termseq; + if(not @term_seq) { + @term_seq = ("TERM",200,"TERM",100,"TERM",50,"KILL",25); + } + while(@term_seq) { + @pgrps = kill_sleep(shift @term_seq, shift @term_seq, @pgrps); + } +} + +sub kill_sleep { + my ($signal, $sleep_max, @pids) = @_; + ::debug("kill","kill_sleep $signal ",(join " ",sort @pids),"\n"); + kill $signal, @pids; + my $sleepsum = 0; + my $sleep = 0.001; + my @dead; + + while(@pids and $sleepsum < $sleep_max) { + if($Global::killall) { + # Killall => don't run reaper + my $stiff; + while (($stiff = waitpid(-1, &WNOHANG)) > 0) { + # remove $stiff from @pids + @pids = grep { $_ != $stiff } @pids; + $sleep = $sleep/2+0.001; + } + } elsif(@dead = reaper()) { + # Remove reaped pids + for my $stiff (@dead) { + @pids = grep { $_ != $stiff } @pids; + } + $sleep = $sleep/2+0.001; + } + @pids = grep { kill( 0, $_) } @pids; + $sleep *= 1.1; + ::usleep($sleep); + $sleepsum += $sleep; + # Remove dead children + @pids = grep { kill( 0, $_) } @pids; + } + return @pids; +} + +sub wait_and_exit { + # If we do not wait, we sometimes get segfault + # Returns: N/A + my $error = shift; + unlink keys %Global::unlink; + if($error) { + # Kill all jobs without printing + killall(); + } + for (keys %Global::unkilled_children) { + # Kill any (non-jobs) children + kill 9, $_; + waitpid($_,0); + delete $Global::unkilled_children{$_}; + } + wait(); + exit($error); +} + +sub die_usage { + # Returns: N/A + usage(); + wait_and_exit(255); +} + +sub usage { + # Returns: N/A + print join + ("\n", + "Usage:", + "", + "$Global::progname [options] [command [arguments]] < list_of_arguments", + "$Global::progname [options] [command [arguments]] (::: arguments|:::: argfile(s))...", + "cat ... | $Global::progname --pipe [options] [command [arguments]]", + "", + "-j n Run n jobs in parallel", + "-k Keep same order", + "-X Multiple arguments with context replace", + "--colsep regexp Split input on regexp for positional replacements", + "{} {.} {/} {/.} {#} {%} {= perl code =} Replacement strings", + "{3} {3.} {3/} {3/.} {=3 perl code =} Positional replacement strings", + "With --plus: {} = {+/}/{/} = {.}.{+.} = {+/}/{/.}.{+.} = {..}.{+..} =", + " {+/}/{/..}.{+..} = {...}.{+...} = {+/}/{/...}.{+...}", + "", + "-S sshlogin Example: foo\@server.example.com", + "--slf .. Use ~/.parallel/sshloginfile as the list of sshlogins", + "--trc {}.bar Shorthand for --transfer --return {}.bar --cleanup", + "--onall Run the given command with argument on all sshlogins", + "--nonall Run the given command with no arguments on all sshlogins", + "", + "--pipe Split stdin (standard input) to multiple jobs.", + "--recend str Record end separator for --pipe.", + "--recstart str Record start separator for --pipe.", + "", + "See 'man $Global::progname' for details", + "", + "Academic tradition requires you to cite works you base your article on.", + "When using programs that use GNU Parallel to process data for publication", + "please cite:", + "", + " O. Tange (2011): GNU Parallel - The Command-Line Power Tool,", + " ;login: The USENIX Magazine, February 2011:42-47.", + "", + "",); +} + +sub status { + my @w = @_; + my $fh = $Global::status_fd || *STDERR; + print $fh map { ($_, "\n") } @w; + flush $fh; +} + +sub status_no_nl { + my @w = @_; + my $fh = $Global::status_fd || *STDERR; + print $fh @w; + flush $fh; +} + +sub warning { + my @w = @_; + my $prog = $Global::progname || "parallel"; + status_no_nl(map { ($prog, ": Warning: ", $_, "\n"); } @w); +} + +sub error { + my @w = @_; + my $prog = $Global::progname || "parallel"; + status(map { ($prog.": Error: ". $_); } @w); +} + +sub die_bug { + my $bugid = shift; + print STDERR + ("$Global::progname: This should not happen. You have found a bug.\n", + "Please contact and include:\n", + "* The version number: $Global::version\n", + "* The bugid: $bugid\n", + "* The command line being run\n", + "* The files being read (put the files on a webserver if they are big)\n", + "\n", + "If you get the error on smaller/fewer files, please include those instead.\n"); + ::wait_and_exit(255); +} + +sub version { + # Returns: N/A + print join("\n", + "GNU $Global::progname $Global::version", + "Copyright (C) 2007,2008,2009,2010,2011,2012,2013,2014,2015,2016", + "Ole Tange and Free Software Foundation, Inc.", + "License GPLv3+: GNU GPL version 3 or later ", + "This is free software: you are free to change and redistribute it.", + "GNU $Global::progname comes with no warranty.", + "", + "Web site: http://www.gnu.org/software/${Global::progname}\n", + ); +} + +sub show_limits { + # Returns: N/A + print("Maximal size of command: ",Limits::Command::real_max_length(),"\n", + "Maximal used size of command: ",Limits::Command::max_length(),"\n", + "\n", + "Execution of will continue now, and it will try to read its input\n", + "and run commands; if this is not what you wanted to happen, please\n", + "press CTRL-D or CTRL-C\n"); +} + + +sub __GENERIC_COMMON_FUNCTION__ {} + + +sub mkdir_or_die { + # If dir is not executable: die + my $dir = shift; + # The eval is needed to catch exception from mkdir + eval { File::Path::mkpath($dir); }; + if(not -x $dir) { + ::error("Cannot change into non-executable dir $dir: $!"); + ::wait_and_exit(255); + } +} + +sub tmpfile { + # Create tempfile as $TMPDIR/parXXXXX + # Returns: + # $filehandle = opened file handle + # $filename = file name created + my($filehandle,$filename) = + ::tempfile(DIR=>$ENV{'TMPDIR'}, TEMPLATE => 'parXXXXX', @_); + if(wantarray) { + return($filehandle,$filename); + } else { + # Separate unlink due to NFS dealing badly with File::Temp + unlink $filename; + return $filehandle; + } +} + +sub tmpname { + # Select a name that does not exist + # Do not create the file as it may be used for creating a socket (by tmux) + # Remember the name in $Global::unlink to avoid hitting the same name twice + my $name = shift; + my($tmpname); + if(not -w $ENV{'TMPDIR'}) { + if(not -e $ENV{'TMPDIR'}) { + ::error("Tmpdir '$ENV{'TMPDIR'}' does not exist.","Try 'mkdir $ENV{'TMPDIR'}'"); + } else { + ::error("Tmpdir '$ENV{'TMPDIR'}' is not writable.","Try 'chmod +w $ENV{'TMPDIR'}'"); + } + ::wait_and_exit(255); + } + do { + $tmpname = $ENV{'TMPDIR'}."/".$name. + join"", map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..5); + } while($Global::unlink{$tmpname}++ or -e $tmpname); + return $tmpname; +} + +#sub tmpfifo { +# # Find an unused name and mkfifo on it +# use POSIX qw(mkfifo); +# my $tmpfifo = tmpname("fif",@_); +# mkfifo($tmpfifo,0600); +# return $tmpfifo; +#} + +sub rm { + # Remove file and remove it from %Global::unlink + # Uses: + # %Global::unlink + delete @Global::unlink{@_}; + unlink @_; +} + +sub size_of_block_dev { + # Like -s but for block devices + # Input: + # $blockdev = file name of block device + # Returns: + # $size = in bytes, undef if error + my $blockdev = shift; + if(open(my $fh, "<", $blockdev)) { + seek($fh,0,2) || ::die_bug("cannot seek $blockdev"); + my $size = tell($fh); + close $fh; + return $size; + } else { + ::error("cannot open $blockdev"); + wait_and_exit(255); + } +} + +sub qqx { + # Like qx but with clean environment (except for @keep) + # and STDERR ignored + # This is needed if the environment contains functions + # that /bin/sh does not understand + my $PATH = $ENV{'PATH'}; + my %env; + my @keep = qw(PATH SSH_AUTH_SOCK SSH_AGENT_PID); + @env{@keep} = @ENV{@keep}; + local(%ENV); + %ENV = %env; + if($Global::debug) { + return qx{ @_ && true }; + } else { + return qx{ ( @_ ) 2>/dev/null }; + } +} + +sub uniq { + # Remove duplicates and return unique values + return keys %{{ map { $_ => 1 } @_ }}; +} + +sub min { + # Returns: + # Minimum value of array + my $min; + for (@_) { + # Skip undefs + defined $_ or next; + defined $min or do { $min = $_; next; }; # Set $_ to the first non-undef + $min = ($min < $_) ? $min : $_; + } + return $min; +} + +sub max { + # Returns: + # Maximum value of array + my $max; + for (@_) { + # Skip undefs + defined $_ or next; + defined $max or do { $max = $_; next; }; # Set $_ to the first non-undef + $max = ($max > $_) ? $max : $_; + } + return $max; +} + +sub sum { + # Returns: + # Sum of values of array + my @args = @_; + my $sum = 0; + for (@args) { + # Skip undefs + $_ and do { $sum += $_; } + } + return $sum; +} + +sub undef_as_zero { + my $a = shift; + return $a ? $a : 0; +} + +sub undef_as_empty { + my $a = shift; + return $a ? $a : ""; +} + +sub undef_if_empty { + if(defined($_[0]) and $_[0] eq "") { + return undef; + } + return $_[0]; +} + +sub multiply_binary_prefix { + # Evalualte numbers with binary prefix + # Ki=2^10, Mi=2^20, Gi=2^30, Ti=2^40, Pi=2^50, Ei=2^70, Zi=2^80, Yi=2^80 + # ki=2^10, mi=2^20, gi=2^30, ti=2^40, pi=2^50, ei=2^70, zi=2^80, yi=2^80 + # K =2^10, M =2^20, G =2^30, T =2^40, P =2^50, E =2^70, Z =2^80, Y =2^80 + # k =10^3, m =10^6, g =10^9, t=10^12, p=10^15, e=10^18, z=10^21, y=10^24 + # 13G = 13*1024*1024*1024 = 13958643712 + # Input: + # $s = string with prefixes + # Returns: + # $value = int with prefixes multiplied + my $s = shift; + if(not $s) { + return $s; + } + $s =~ s/ki/*1024/gi; + $s =~ s/mi/*1024*1024/gi; + $s =~ s/gi/*1024*1024*1024/gi; + $s =~ s/ti/*1024*1024*1024*1024/gi; + $s =~ s/pi/*1024*1024*1024*1024*1024/gi; + $s =~ s/ei/*1024*1024*1024*1024*1024*1024/gi; + $s =~ s/zi/*1024*1024*1024*1024*1024*1024*1024/gi; + $s =~ s/yi/*1024*1024*1024*1024*1024*1024*1024*1024/gi; + $s =~ s/xi/*1024*1024*1024*1024*1024*1024*1024*1024*1024/gi; + + $s =~ s/K/*1024/g; + $s =~ s/M/*1024*1024/g; + $s =~ s/G/*1024*1024*1024/g; + $s =~ s/T/*1024*1024*1024*1024/g; + $s =~ s/P/*1024*1024*1024*1024*1024/g; + $s =~ s/E/*1024*1024*1024*1024*1024*1024/g; + $s =~ s/Z/*1024*1024*1024*1024*1024*1024*1024/g; + $s =~ s/Y/*1024*1024*1024*1024*1024*1024*1024*1024/g; + $s =~ s/X/*1024*1024*1024*1024*1024*1024*1024*1024*1024/g; + + $s =~ s/k/*1000/g; + $s =~ s/m/*1000*1000/g; + $s =~ s/g/*1000*1000*1000/g; + $s =~ s/t/*1000*1000*1000*1000/g; + $s =~ s/p/*1000*1000*1000*1000*1000/g; + $s =~ s/e/*1000*1000*1000*1000*1000*1000/g; + $s =~ s/z/*1000*1000*1000*1000*1000*1000*1000/g; + $s =~ s/y/*1000*1000*1000*1000*1000*1000*1000*1000/g; + $s =~ s/x/*1000*1000*1000*1000*1000*1000*1000*1000*1000/g; + + $s = eval $s; + ::debug($s); + return $s; +} + +{ + my ($disk_full_fh, $b8193, $error_printed); + sub exit_if_disk_full { + # Checks if $TMPDIR is full by writing 8kb to a tmpfile + # If the disk is full: Exit immediately. + # Returns: + # N/A + if(not $disk_full_fh) { + $disk_full_fh = ::tmpfile(SUFFIX => ".df"); + $b8193 = "x"x8193; + } + # Linux does not discover if a disk is full if writing <= 8192 + # Tested on: + # bfs btrfs cramfs ext2 ext3 ext4 ext4dev jffs2 jfs minix msdos + # ntfs reiserfs tmpfs ubifs vfat xfs + # TODO this should be tested on different OS similar to this: + # + # doit() { + # sudo mount /dev/ram0 /mnt/loop; sudo chmod 1777 /mnt/loop + # seq 100000 | parallel --tmpdir /mnt/loop/ true & + # seq 6900000 > /mnt/loop/i && echo seq OK + # seq 6980868 > /mnt/loop/i + # seq 10000 > /mnt/loop/ii + # sleep 3 + # sudo umount /mnt/loop/ || sudo umount -l /mnt/loop/ + # echo >&2 + # } + print $disk_full_fh $b8193; + if(not $disk_full_fh + or + tell $disk_full_fh != 8193) { + # On raspbian the disk can be full except for 10 chars. + if(not $error_printed) { + ::error("Output is incomplete. Cannot append to buffer file in $ENV{'TMPDIR'}. Is the disk full?", + "Change \$TMPDIR with --tmpdir or use --compress."); + $error_printed = 1; + } + ::wait_and_exit(255); + } + truncate $disk_full_fh, 0; + seek($disk_full_fh, 0, 0) || die; + } +} + +sub spacefree { + # Remove comments and spaces + # Inputs: + # $spaces = keep 1 space? + # $s = string to remove spaces from + # Returns: + # $s = with spaces removed + my $spaces = shift; + my $s = shift; + $s =~ s/#.*//mg; + if($spaces) { + $s =~ s/\s+/ /mg; + } else { + $s =~ s/\s//mg; + } + return $s; +} + +{ + my $hostname; + sub hostname { + if(not $hostname) { + $hostname = `hostname`; + chomp($hostname); + $hostname ||= "nohostname"; + } + return $hostname; + } +} + +sub which { + # Input: + # @programs = programs to find the path to + # Returns: + # @full_path = full paths to @programs. Nothing if not found + my @which; + for my $prg (@_) { + push(@which, grep { not -d $_ and -x $_ } + map { $_."/".$prg } split(":",$ENV{'PATH'})); + } + return @which; +} + +{ + my ($regexp,%fakename); + + sub parent_shell { + # Input: + # $pid = pid to see if (grand)*parent is a shell + # Returns: + # $shellpath = path to shell - undef if no shell found + my $pid = shift; + if(not $regexp) { + # All shells known to mankind + # + # ash bash csh dash fdsh fish fizsh ksh ksh93 mksh pdksh + # posh rbash rc rush rzsh sash sh static-sh tcsh yash zsh + my @shells = (qw(ash bash csh dash fdsh fish fizsh ksh + ksh93 lksh mksh pdksh posh rbash rc rush rzsh sash sh + static-sh tcsh yash zsh -sh -csh -bash), + '-sh (sh)' # sh on FreeBSD + ); + # Can be formatted as: + # [sh] -sh sh busybox sh -sh (sh) + # /bin/sh /sbin/sh /opt/csw/sh + # But not: foo.sh sshd crash flush pdflush scosh fsflush ssh + my $shell = "(?:".join("|",map { "\Q$_\E" } @shells).")"; + $regexp = '^((\[)('. $shell. ')(\])|(|\S+/|busybox )('. $shell. '))($| [^(])'; + %fakename = ( + # sh disguises itself as -sh (sh) on FreeBSD + "-sh (sh)" => ["sh"], + # csh and tcsh disguise themselves as -sh/-csh + "-sh" => ["csh", "tcsh"], + "-csh" => ["tcsh", "csh"], + # bash disguises itself as -bash + "-bash" => ["bash", "sh"], + ); + } + my ($children_of_ref, $parent_of_ref, $name_of_ref) = pid_table(); + my $shellpath; + my $testpid = $pid; + while($testpid) { + ::debug("init", "shell? ". $name_of_ref->{$testpid}."\n"); + if($name_of_ref->{$testpid} =~ /$regexp/o) { + ::debug("init", "which ".($3||$6)." => "); + $shellpath = (which($3 || $6,@{$fakename{$3 || $6}}))[0]; + ::debug("init", "shell path $shellpath\n"); + $shellpath and last; + } + if($testpid == $parent_of_ref->{$testpid}) { + # In Solaris zones, the PPID of the zsched process is itself + last; + } + $testpid = $parent_of_ref->{$testpid}; + } + return $shellpath; + } +} + +{ + my %pid_parentpid_cmd; + + sub pid_table { + # Returns: + # %children_of = { pid -> children of pid } + # %parent_of = { pid -> pid of parent } + # %name_of = { pid -> commandname } + + if(not %pid_parentpid_cmd) { + # Filter for SysV-style `ps` + my $sysv = q( ps -ef | perl -ane '1..1 and /^(.*)CO?MM?A?N?D/ and $s=length $1;). + q(s/^.{$s}//; print "@F[1,2] $_"' ); + # Crazy msys: ' is not accepted on the cmd line, but " are treated as ' + my $msys = q( ps -ef | perl -ane "1..1 and /^(.*)CO?MM?A?N?D/ and $s=length $1;). + q(s/^.{$s}//; print qq{@F[1,2] $_}" ); + # BSD-style `ps` + my $bsd = q(ps -o pid,ppid,command -ax); + %pid_parentpid_cmd = + ( + 'aix' => $sysv, + 'android' => $sysv, + 'cygwin' => $sysv, + 'darwin' => $bsd, + 'dec_osf' => $sysv, + 'dragonfly' => $bsd, + 'freebsd' => $bsd, + 'gnu' => $sysv, + 'hpux' => $sysv, + 'linux' => $sysv, + 'mirbsd' => $bsd, + 'msys' => $msys, + 'MSWin32' => $sysv, + 'netbsd' => $bsd, + 'nto' => $sysv, + 'openbsd' => $bsd, + 'solaris' => $sysv, + 'svr5' => $sysv, + 'syllable' => "echo ps not supported", + ); + } + $pid_parentpid_cmd{$^O} or ::die_bug("pid_parentpid_cmd for $^O missing"); + + my (@pidtable,%parent_of,%children_of,%name_of); + # Table with pid -> children of pid + @pidtable = `$pid_parentpid_cmd{$^O}`; + my $p=$$; + for (@pidtable) { + # must match: 24436 21224 busybox ash + # must match: 24436 21224 <> + # must match: 24436 21224 <> + # or: perl -e 'while($0=" "){}' + if(/^\s*(\S+)\s+(\S+)\s+(\S+.*)/ + or + /^\s*(\S+)\s+(\S+)\s+()$/) { + $parent_of{$1} = $2; + push @{$children_of{$2}}, $1; + $name_of{$1} = $3; + } else { + ::die_bug("pidtable format: $_"); + } + } + return(\%children_of, \%parent_of, \%name_of); + } +} + +sub now { + # Returns time since epoch as in seconds with 3 decimals + # Uses: + # @Global::use + # Returns: + # $time = time now with millisecond accuracy + if(not $Global::use{"Time::HiRes"}) { + if(eval "use Time::HiRes qw ( time );") { + eval "sub TimeHiRestime { return Time::HiRes::time };"; + } else { + eval "sub TimeHiRestime { return time() };"; + } + $Global::use{"Time::HiRes"} = 1; + } + + return (int(TimeHiRestime()*1000))/1000; +} + +sub usleep { + # Sleep this many milliseconds. + # Input: + # $ms = milliseconds to sleep + my $ms = shift; + ::debug("timing",int($ms),"ms "); + select(undef, undef, undef, $ms/1000); +} + +sub reap_usleep { + # Reap dead children. + # If no dead children: Sleep specified amount with exponential backoff + # Input: + # $ms = milliseconds to sleep + # Returns: + # $ms/2+0.001 if children reaped + # $ms*1.1 if no children reaped + my $ms = shift; + if(reaper()) { + if(not $Global::total_completed % 100) { + if($opt::timeout) { + # Force cleaning the timeout queue for every 1000 jobs + # Fixes potential memleak + $Global::timeoutq->process_timeouts(); + } + } + # Sleep exponentially shorter (1/2^n) if a job finished + return $ms/2+0.001; + } else { + if($opt::timeout) { + $Global::timeoutq->process_timeouts(); + } + if($opt::memfree) { + kill_youngster_if_not_enough_mem(); + } + # When a child dies, wake up from sleep (or select(,,,)) + $SIG{CHLD} = sub { kill "ALRM", $$ }; + usleep($ms); + # --compress needs $SIG{CHLD} unset + $SIG{CHLD} = 'DEFAULT'; + exit_if_disk_full(); + if($opt::linebuffer) { + if($opt::keeporder) { + for my $job (values %Global::running) { + $job->print_earlier_jobs(); + } + } else { + for my $job (values %Global::running) { + $job->print(); + } + } + } + # Sleep exponentially longer (1.1^n) if a job did not finish, + # though at most 1000 ms. + return (($ms < 1000) ? ($ms * 1.1) : ($ms)); + } +} + +sub kill_youngster_if_not_enough_mem { + # Check each $sshlogin if there is enough mem. + # If less than 50% enough free mem: kill off the youngest child + # Put the child back in the queue. + # Uses: + # %Global::running + my %jobs_of; + my @sshlogins; + + for my $job (values %Global::running) { + if(not $jobs_of{$job->sshlogin()}) { + push @sshlogins, $job->sshlogin(); + } + push @{$jobs_of{$job->sshlogin()}}, $job; + } + for my $sshlogin (@sshlogins) { + for my $job (sort { $b->seq() <=> $a->seq() } @{$jobs_of{$sshlogin}}) { + if($sshlogin->memfree() < $opt::memfree * 0.5) { + ::debug("mem","\n",map { $_->seq()." " } + (sort { $b->seq() <=> $a->seq() } + @{$jobs_of{$sshlogin}})); + ::debug("mem","\n", $job->seq(), "killed ", + $sshlogin->memfree()," < ",$opt::memfree * 0.5); + $job->kill(); + $sshlogin->memfree_recompute(); + } else { + last; + } + } + ::debug("mem","Free mem OK ", + $sshlogin->memfree()," > ",$opt::memfree * 0.5); + } +} + + +sub __DEBUGGING__ {} + + +sub debug { + # Uses: + # $Global::debug + # %Global::fd + # Returns: N/A + $Global::debug or return; + @_ = grep { defined $_ ? $_ : "" } @_; + if($Global::debug eq "all" or $Global::debug eq $_[0]) { + if($Global::fd{1}) { + # Original stdout was saved + my $stdout = $Global::fd{1}; + print $stdout @_[1..$#_]; + } else { + print @_[1..$#_]; + } + } +} + +sub my_memory_usage { + # Returns: + # memory usage if found + # 0 otherwise + use strict; + use FileHandle; + + my $pid = $$; + if(-e "/proc/$pid/stat") { + my $fh = FileHandle->new("; + chomp $data; + $fh->close; + + my @procinfo = split(/\s+/,$data); + + return undef_as_zero($procinfo[22]); + } else { + return 0; + } +} + +sub my_size { + # Returns: + # $size = size of object if Devel::Size is installed + # -1 otherwise + my @size_this = (@_); + eval "use Devel::Size qw(size total_size)"; + if ($@) { + return -1; + } else { + return total_size(@_); + } +} + +sub my_dump { + # Returns: + # ascii expression of object if Data::Dump(er) is installed + # error code otherwise + my @dump_this = (@_); + eval "use Data::Dump qw(dump);"; + if ($@) { + # Data::Dump not installed + eval "use Data::Dumper;"; + if ($@) { + my $err = "Neither Data::Dump nor Data::Dumper is installed\n". + "Not dumping output\n"; + ::status($err); + return $err; + } else { + return Dumper(@dump_this); + } + } else { + # Create a dummy Data::Dump:dump as Hans Schou sometimes has + # it undefined + eval "sub Data::Dump:dump {}"; + eval "use Data::Dump qw(dump);"; + return (Data::Dump::dump(@dump_this)); + } +} + +sub my_croak { + eval "use Carp; 1"; + $Carp::Verbose = 1; + croak(@_); +} + +sub my_carp { + eval "use Carp; 1"; + $Carp::Verbose = 1; + carp(@_); +} + + +sub __OBJECT_ORIENTED_PARTS__ {} + + +package SSHLogin; + +sub new { + my $class = shift; + my $sshlogin_string = shift; + my $ncpus; + my %hostgroups; + # SSHLogins can have these formats: + # @grp+grp/ncpu//usr/bin/ssh user@server + # ncpu//usr/bin/ssh user@server + # /usr/bin/ssh user@server + # user@server + # ncpu/user@server + # @grp+grp/user@server + if($sshlogin_string =~ s:^\@([^/]+)/?::) { + # Look for SSHLogin hostgroups + %hostgroups = map { $_ => 1 } split(/\+/, $1); + } + if ($sshlogin_string =~ s:^(\d+)/::) { + # Override default autodetected ncpus unless missing + $ncpus = $1; + } + my $string = $sshlogin_string; + # An SSHLogin is always in the hostgroup of its $string-name + $hostgroups{$string} = 1; + @Global::hostgroups{keys %hostgroups} = values %hostgroups; + my @unget = (); + my $no_slash_string = $string; + $no_slash_string =~ s/[^-a-z0-9:]/_/gi; + return bless { + 'string' => $string, + 'jobs_running' => 0, + 'jobs_completed' => 0, + 'maxlength' => undef, + 'max_jobs_running' => undef, + 'orig_max_jobs_running' => undef, + 'ncpus' => $ncpus, + 'hostgroups' => \%hostgroups, + 'sshcommand' => undef, + 'serverlogin' => undef, + 'control_path_dir' => undef, + 'control_path' => undef, + 'time_to_login' => undef, + 'last_login_at' => undef, + 'loadavg_file' => $ENV{'HOME'} . "/.parallel/tmp/loadavg-" . + $no_slash_string, + 'loadavg' => undef, + 'last_loadavg_update' => 0, + 'swap_activity_file' => $ENV{'HOME'} . "/.parallel/tmp/swap_activity-" . + $no_slash_string, + 'swap_activity' => undef, + }, ref($class) || $class; +} + +sub DESTROY { + my $self = shift; + # Remove temporary files if they are created. + ::rm($self->{'loadavg_file'}); + ::rm($self->{'swap_activity_file'}); +} + +sub string { + my $self = shift; + return $self->{'string'}; +} + +sub jobs_running { + my $self = shift; + return ($self->{'jobs_running'} || "0"); +} + +sub inc_jobs_running { + my $self = shift; + $self->{'jobs_running'}++; +} + +sub dec_jobs_running { + my $self = shift; + $self->{'jobs_running'}--; +} + +sub set_maxlength { + my $self = shift; + $self->{'maxlength'} = shift; +} + +sub maxlength { + my $self = shift; + return $self->{'maxlength'}; +} + +sub jobs_completed { + my $self = shift; + return $self->{'jobs_completed'}; +} + +sub in_hostgroups { + # Input: + # @hostgroups = the hostgroups to look for + # Returns: + # true if intersection of @hostgroups and the hostgroups of this + # SSHLogin is non-empty + my $self = shift; + return grep { defined $self->{'hostgroups'}{$_} } @_; +} + +sub hostgroups { + my $self = shift; + return keys %{$self->{'hostgroups'}}; +} + +sub inc_jobs_completed { + my $self = shift; + $self->{'jobs_completed'}++; + $Global::total_completed++; +} + +sub set_max_jobs_running { + my $self = shift; + if(defined $self->{'max_jobs_running'}) { + $Global::max_jobs_running -= $self->{'max_jobs_running'}; + } + $self->{'max_jobs_running'} = shift; + if(defined $self->{'max_jobs_running'}) { + # max_jobs_running could be resat if -j is a changed file + $Global::max_jobs_running += $self->{'max_jobs_running'}; + } + # Initialize orig to the first non-zero value that comes around + $self->{'orig_max_jobs_running'} ||= $self->{'max_jobs_running'}; +} + +sub memfree { + # Returns: + # $memfree in bytes + my $self = shift; + $self->memfree_recompute(); + return (not defined $self->{'memfree'} or $self->{'memfree'}) +} + +sub memfree_recompute { + my $self = shift; + my $script = memfreescript(); + + # TODO add sshlogin and backgrounding + # Run the script twice if it gives 0 (typically intermittent error) + $self->{'memfree'} = ::qqx($script) || ::qqx($script); + if(not $self->{'memfree'}) { + ::die_bug("Less than 1 byte free"); + } + #::debug("mem","New free:",$self->{'memfree'}," "); +} + +{ + my $script; + + sub memfreescript { + # Returns: + # shellscript for giving available memory in bytes + if(not $script) { + my %script_of = ( + # /proc/meminfo + # MemFree: 7012 kB + # Buffers: 19876 kB + # Cached: 431192 kB + # SwapCached: 0 kB + "linux" => + q[ print 1024 * qx{ ]. + q[ awk '/^((Swap)?Cached|MemFree|Buffers):/ ]. + q[ { sum += \$2} END { print sum }' ]. + q[ /proc/meminfo } ], + # $ vmstat 1 1 + # procs memory page faults cpu + # r b w avm free re at pi po fr de sr in sy cs us sy id + # 1 0 0 242793 389737 5 1 0 0 0 0 0 107 978 60 1 1 99 + "hpux" => + q[ print (((reverse `vmstat 1 1`)[0] ]. + q[ =~ /(?:\d+\D+){4}(\d+)/)[0]*1024) ], + # $ vmstat 1 2 + # kthr memory page disk faults cpu + # r b w swap free re mf pi po fr de sr s3 s4 -- -- in sy cs us sy id + # 0 0 0 6496720 5170320 68 260 8 2 1 0 0 -0 3 0 0 309 1371 255 1 2 97 + # 0 0 0 6434088 5072656 7 15 8 0 0 0 0 0 261 0 0 1889 1899 3222 0 8 92 + # + # The second free value is correct + "solaris" => + q[ print (((reverse `vmstat 1 2`)[0] ]. + q[ =~ /(?:\d+\D+){4}(\d+)/)[0]*1024) ], + "freebsd" => q{ + for(qx{/sbin/sysctl -a}) { + if (/^([^:]+):\s+(.+)\s*$/s) { + $sysctl->{$1} = $2; + } + } + print $sysctl->{"hw.pagesize"} * + ($sysctl->{"vm.stats.vm.v_cache_count"} + + $sysctl->{"vm.stats.vm.v_inactive_count"} + + $sysctl->{"vm.stats.vm.v_free_count"}); + }, + # Mach Virtual Memory Statistics: (page size of 4096 bytes) + # Pages free: 198061. + # Pages active: 159701. + # Pages inactive: 47378. + # Pages speculative: 29707. + # Pages wired down: 89231. + # "Translation faults": 928901425. + # Pages copy-on-write: 156988239. + # Pages zero filled: 271267894. + # Pages reactivated: 48895. + # Pageins: 1798068. + # Pageouts: 257. + # Object cache: 6603 hits of 1713223 lookups (0% hit rate) + 'darwin' => + q[ $vm = `vm_stat`; + print (($vm =~ /page size of (\d+)/)[0] * + (($vm =~ /Pages free:\s+(\d+)/)[0] + + ($vm =~ /Pages inactive:\s+(\d+)/)[0])); + ], + ); + my $perlscript = ""; + # Make a perl script that detects the OS ($^O) and runs + # the appropriate command + for my $os (keys %script_of) { + $perlscript .= 'if($^O eq "'.$os.'") { '.$script_of{$os}.'}'; + } + $perlscript =~ s/[\t\n ]+/ /g; + $script = "perl -e " . ::shell_quote_scalar($perlscript); + } + return $script + } +} + +sub swapping { + my $self = shift; + my $swapping = $self->swap_activity(); + return (not defined $swapping or $swapping) +} + +sub swap_activity { + # If the currently known swap activity is too old: + # Recompute a new one in the background + # Returns: + # last swap activity computed + my $self = shift; + # Should we update the swap_activity file? + my $update_swap_activity_file = 0; + if(-r $self->{'swap_activity_file'}) { + open(my $swap_fh, "<", $self->{'swap_activity_file'}) || ::die_bug("swap_activity_file-r"); + my $swap_out = <$swap_fh>; + close $swap_fh; + if($swap_out =~ /^(\d+)$/) { + $self->{'swap_activity'} = $1; + ::debug("swap", "New swap_activity: ", $self->{'swap_activity'}); + } + ::debug("swap", "Last update: ", $self->{'last_swap_activity_update'}); + if(time - $self->{'last_swap_activity_update'} > 10) { + # last swap activity update was started 10 seconds ago + ::debug("swap", "Older than 10 sec: ", $self->{'swap_activity_file'}); + $update_swap_activity_file = 1; + } + } else { + ::debug("swap", "No swap_activity file: ", $self->{'swap_activity_file'}); + $self->{'swap_activity'} = undef; + $update_swap_activity_file = 1; + } + if($update_swap_activity_file) { + ::debug("swap", "Updating swap_activity file ", $self->{'swap_activity_file'}); + $self->{'last_swap_activity_update'} = time; + -e $ENV{'HOME'}."/.parallel" or mkdir $ENV{'HOME'}."/.parallel"; + -e $ENV{'HOME'}."/.parallel/tmp" or mkdir $ENV{'HOME'}."/.parallel/tmp"; + my $swap_activity; + $swap_activity = swapactivityscript(); + if($self->{'string'} ne ":") { + $swap_activity = $self->sshcommand() . " " . $self->serverlogin() . " " . + ::shell_quote_scalar($swap_activity); + } + # Run swap_activity measuring. + # As the command can take long to run if run remote + # save it to a tmp file before moving it to the correct file + my $file = $self->{'swap_activity_file'}; + my ($dummy_fh, $tmpfile) = ::tmpfile(SUFFIX => ".swp"); + ::debug("swap", "\n", $swap_activity, "\n"); + ::qqx("($swap_activity > $tmpfile && mv $tmpfile $file || rm $tmpfile &)"); + } + return $self->{'swap_activity'}; +} + +{ + my $script; + + sub swapactivityscript { + # Returns: + # shellscript for detecting swap activity + # + # arguments for vmstat are OS dependant + # swap_in and swap_out are in different columns depending on OS + # + if(not $script) { + my %vmstat = ( + # linux: $7*$8 + # $ vmstat 1 2 + # procs -----------memory---------- ---swap-- -----io---- -system-- ----cpu---- + # r b swpd free buff cache si so bi bo in cs us sy id wa + # 5 0 51208 1701096 198012 18857888 0 0 37 153 28 19 56 11 33 1 + # 3 0 51208 1701288 198012 18857972 0 0 0 0 3638 10412 15 3 82 0 + 'linux' => ['vmstat 1 2 | tail -n1', '$7*$8'], + + # solaris: $6*$7 + # $ vmstat -S 1 2 + # kthr memory page disk faults cpu + # r b w swap free si so pi po fr de sr s3 s4 -- -- in sy cs us sy id + # 0 0 0 4628952 3208408 0 0 3 1 1 0 0 -0 2 0 0 263 613 246 1 2 97 + # 0 0 0 4552504 3166360 0 0 0 0 0 0 0 0 0 0 0 246 213 240 1 1 98 + 'solaris' => ['vmstat -S 1 2 | tail -1', '$6*$7'], + + # darwin (macosx): $21*$22 + # $ vm_stat -c 2 1 + # Mach Virtual Memory Statistics: (page size of 4096 bytes) + # free active specul inactive throttle wired prgable faults copy 0fill reactive purged file-backed anonymous cmprssed cmprssor dcomprs comprs pageins pageout swapins swapouts + # 346306 829050 74871 606027 0 240231 90367 544858K 62343596 270837K 14178 415070 570102 939846 356 370 116 922 4019813 4 0 0 + # 345740 830383 74875 606031 0 239234 90369 2696 359 553 0 0 570110 941179 356 370 0 0 0 0 0 0 + 'darwin' => ['vm_stat -c 2 1 | tail -n1', '$21*$22'], + + # ultrix: $12*$13 + # $ vmstat -S 1 2 + # procs faults cpu memory page disk + # r b w in sy cs us sy id avm fre si so pi po fr de sr s0 + # 1 0 0 4 23 2 3 0 97 7743 217k 0 0 0 0 0 0 0 0 + # 1 0 0 6 40 8 0 1 99 7743 217k 0 0 3 0 0 0 0 0 + 'ultrix' => ['vmstat -S 1 2 | tail -1', '$12*$13'], + + # aix: $6*$7 + # $ vmstat 1 2 + # System configuration: lcpu=1 mem=2048MB + # + # kthr memory page faults cpu + # ----- ----------- ------------------------ ------------ ----------- + # r b avm fre re pi po fr sr cy in sy cs us sy id wa + # 0 0 333933 241803 0 0 0 0 0 0 10 143 90 0 0 99 0 + # 0 0 334125 241569 0 0 0 0 0 0 37 5368 184 0 9 86 5 + 'aix' => ['vmstat 1 2 | tail -n1', '$6*$7'], + + # freebsd: $8*$9 + # $ vmstat -H 1 2 + # procs memory page disks faults cpu + # r b w avm fre flt re pi po fr sr ad0 ad1 in sy cs us sy id + # 1 0 0 596716 19560 32 0 0 0 33 8 0 0 11 220 277 0 0 99 + # 0 0 0 596716 19560 2 0 0 0 0 0 0 0 11 144 263 0 1 99 + 'freebsd' => ['vmstat -H 1 2 | tail -n1', '$8*$9'], + + # mirbsd: $8*$9 + # $ vmstat 1 2 + # procs memory page disks traps cpu + # r b w avm fre flt re pi po fr sr wd0 cd0 int sys cs us sy id + # 0 0 0 25776 164968 34 0 0 0 0 0 0 0 230 259 38 4 0 96 + # 0 0 0 25776 164968 24 0 0 0 0 0 0 0 237 275 37 0 0 100 + 'mirbsd' => ['vmstat 1 2 | tail -n1', '$8*$9'], + + # netbsd: $7*$8 + # $ vmstat 1 2 + # procs memory page disks faults cpu + # r b avm fre flt re pi po fr sr w0 w1 in sy cs us sy id + # 0 0 138452 6012 54 0 0 0 1 2 3 0 4 100 23 0 0 100 + # 0 0 138456 6008 1 0 0 0 0 0 0 0 7 26 19 0 0 100 + 'netbsd' => ['vmstat 1 2 | tail -n1', '$7*$8'], + + # openbsd: $8*$9 + # $ vmstat 1 2 + # procs memory page disks traps cpu + # r b w avm fre flt re pi po fr sr wd0 wd1 int sys cs us sy id + # 0 0 0 76596 109944 73 0 0 0 0 0 0 1 5 259 22 0 1 99 + # 0 0 0 76604 109936 24 0 0 0 0 0 0 0 7 114 20 0 1 99 + 'openbsd' => ['vmstat 1 2 | tail -n1', '$8*$9'], + + # hpux: $8*$9 + # $ vmstat 1 2 + # procs memory page faults cpu + # r b w avm free re at pi po fr de sr in sy cs us sy id + # 1 0 0 247211 216476 4 1 0 0 0 0 0 102 73005 54 6 11 83 + # 1 0 0 247211 216421 43 9 0 0 0 0 0 144 1675 96 25269512791222387000 25269512791222387000 105 + 'hpux' => ['vmstat 1 2 | tail -n1', '$8*$9'], + + # dec_osf (tru64): $11*$12 + # $ vmstat 1 2 + # Virtual Memory Statistics: (pagesize = 8192) + # procs memory pages intr cpu + # r w u act free wire fault cow zero react pin pout in sy cs us sy id + # 3 181 36 51K 1895 8696 348M 59M 122M 259 79M 0 5 218 302 4 1 94 + # 3 181 36 51K 1893 8696 3 15 21 0 28 0 4 81 321 1 1 98 + 'dec_osf' => ['vmstat 1 2 | tail -n1', '$11*$12'], + + # gnu (hurd): $7*$8 + # $ vmstat -k 1 2 + # (pagesize: 4, size: 512288, swap size: 894972) + # free actv inact wired zeroed react pgins pgouts pfaults cowpfs hrat caobj cache swfree + # 371940 30844 89228 20276 298348 0 48192 19016 756105 99808 98% 876 20628 894972 + # 371940 30844 89228 20276 +0 +0 +0 +0 +42 +2 98% 876 20628 894972 + 'gnu' => ['vmstat -k 1 2 | tail -n1', '$7*$8'], + + # -nto (qnx has no swap) + #-irix + #-svr5 (scosysv) + ); + my $perlscript = ""; + # Make a perl script that detects the OS ($^O) and runs + # the appropriate vmstat command + for my $os (keys %vmstat) { + $vmstat{$os}[1] =~ s/\$/\\\\\\\$/g; # $ => \\\$ + $perlscript .= 'if($^O eq "'.$os.'") { print `'.$vmstat{$os}[0].' | awk "{print ' . + $vmstat{$os}[1] . '}"` }'; + } + $script = "perl -e " . ::shell_quote_scalar($perlscript); + } + return $script; + } +} + +sub too_fast_remote_login { + my $self = shift; + if($self->{'last_login_at'} and $self->{'time_to_login'}) { + # sshd normally allows 10 simultaneous logins + # A login takes time_to_login + # So time_to_login/5 should be safe + # If now <= last_login + time_to_login/5: Then it is too soon. + my $too_fast = (::now() <= $self->{'last_login_at'} + + $self->{'time_to_login'}/5); + ::debug("run", "Too fast? $too_fast "); + return $too_fast; + } else { + # No logins so far (or time_to_login not computed): it is not too fast + return 0; + } +} + +sub last_login_at { + my $self = shift; + return $self->{'last_login_at'}; +} + +sub set_last_login_at { + my $self = shift; + $self->{'last_login_at'} = shift; +} + +sub loadavg_too_high { + my $self = shift; + my $loadavg = $self->loadavg(); + return (not defined $loadavg or + $loadavg > $self->max_loadavg()); +} + +{ + my $cmd; + sub loadavg_cmd { + if(not $cmd) { + # aix => "ps -ae -o state,command" # state wrong + # bsd => "ps ax -o state,command" + # sysv => "ps -ef -o s -o comm" + # cygwin => perl -ne 'close STDERR; /Name/ and print"\n"; \ + # /(Name|Pid|Ppid|State):\s+(\S+)/ and print "$2\t";' /proc/*/status | + # awk '{print $2,$1}' + # dec_osf => bsd + # dragonfly => bsd + # freebsd => bsd + # gnu => bsd + # hpux => ps -el|awk '{print $2,$14,$15}' + # irix => ps -ef -o state -o comm + # linux => bsd + # minix => ps el|awk '{print \$1,\$11}' + # mirbsd => bsd + # netbsd => bsd + # openbsd => bsd + # solaris => sysv + # svr5 => sysv + # ultrix => ps -ax | awk '{print $3,$5}' + # unixware => ps -el|awk '{print $2,$14,$15}' + my $ps = ::spacefree(1,q{ + $sysv="ps -ef -o s -o comm"; + $sysv2="ps -ef -o state -o comm"; + $bsd="ps ax -o state,command"; + # Treat threads as processes + $bsd2="ps axH -o state,command"; + $psel="ps -el|awk '{ print \$2,\$14,\$15 }'"; + $cygwin=q{ perl -ne 'close STDERR; /Name/ and print"\n"; + /(Name|Pid|Ppid|State):\s+(\S+)/ and print "$2\t";' /proc/*/status | + awk '{print $2,$1}' }; + $dummy="echo S COMMAND;echo R dummy"; + %ps=( + 'aix' => "uptime", + 'cygwin' => $cygwin, + 'darwin' => $bsd, + 'dec_osf' => $sysv2, + 'dragonfly' => $bsd, + 'freebsd' => $bsd2, + 'gnu' => $bsd, + 'hpux' => $psel, + 'irix' => $sysv2, + 'linux' => $bsd2, + 'minix' => "ps el|awk '{print \$1,\$11}'", + 'mirbsd' => $bsd, + 'msys' => $sysv, + 'MSWin32' => $sysv, + 'netbsd' => $bsd, + 'nto' => $dummy, + 'openbsd' => $bsd, + 'solaris' => $sysv, + 'svr5' => $psel, + 'ultrix' => "ps -ax | awk '{print \$3,\$5}'", + ); + print `$ps{$^O}`; + }); + $cmd = "perl -e ".::shell_quote_scalar($ps); + } + return $cmd; + } +} + + +sub loadavg { + # If the currently know loadavg is too old: + # Recompute a new one in the background + # The load average is computed as the number of processes waiting for disk + # or CPU right now. So it is the server load this instant and not averaged over + # several minutes. This is needed so GNU Parallel will at most start one job + # that will push the load over the limit. + # + # Returns: + # $last_loadavg = last load average computed (undef if none) + my $self = shift; + # Should we update the loadavg file? + my $update_loadavg_file = 0; + if(open(my $load_fh, "<", $self->{'loadavg_file'})) { + local $/; # $/ = undef => slurp whole file + my $load_out = <$load_fh>; + close $load_fh; + # Count lines starting with D,O,R but command does not start with [ + my $load =()= ($load_out=~/(^\s?[DOR]\S* +(?=[^\[])\S)/gm); + if($load > 0) { + # load is overestimated by 1 + $self->{'loadavg'} = $load - 1; + ::debug("load", "New loadavg: ", $self->{'loadavg'},"\n"); + } elsif ($load_out=~/average: (\d+.\d+)/) { + # AIX does not support instant load average + # 04:11AM up 21 days, 12:55, 1 user, load average: 1.85, 1.57, 1.55 + $self->{'loadavg'} = $1; + } else { + ::die_bug("loadavg_invalid_content: " . + $self->{'loadavg_file'} . "\n$load_out"); + } + $update_loadavg_file = 1; + } else { + ::debug("load", "No loadavg file: ", $self->{'loadavg_file'}); + $self->{'loadavg'} = undef; + $update_loadavg_file = 1; + } + if($update_loadavg_file) { + ::debug("load", "Updating loadavg file", $self->{'loadavg_file'}, "\n"); + $self->{'last_loadavg_update'} = time; + -w $ENV{'HOME'}."/.parallel" or mkdir $ENV{'HOME'}."/.parallel" + or ::die_bug("Cannot write to $ENV{'HOME'}/.parallel"); + -w $ENV{'HOME'}."/.parallel/tmp" or mkdir $ENV{'HOME'}."/.parallel/tmp" + or ::die_bug("Cannot write to $ENV{'HOME'}/.parallel/tmp"); + my $cmd = ""; + if($self->{'string'} ne ":") { + $cmd = $self->sshcommand() . " " . $self->serverlogin() . " " . + ::shell_quote_scalar(loadavg_cmd()); + } else { + $cmd .= loadavg_cmd(); + } + # As the command can take long to run if run remote + # save it to a tmp file before moving it to the correct file + ::debug("load", "Cmd: ", $cmd,"\n"); + my $file = $self->{'loadavg_file'}; + my ($dummy_fh, $tmpfile) = ::tmpfile(SUFFIX => ".loa"); + ::qqx("($cmd > $tmpfile && mv $tmpfile $file || rm $tmpfile & )"); + } + return $self->{'loadavg'}; +} + +sub max_loadavg { + my $self = shift; + # If --load is a file it might be changed + if($Global::max_load_file) { + my $mtime = (stat($Global::max_load_file))[9]; + if($mtime > $Global::max_load_file_last_mod) { + $Global::max_load_file_last_mod = $mtime; + for my $sshlogin (values %Global::host) { + $sshlogin->set_max_loadavg(undef); + } + } + } + if(not defined $self->{'max_loadavg'}) { + $self->{'max_loadavg'} = + $self->compute_max_loadavg($opt::load); + } + ::debug("load", "max_loadavg: ", $self->string(), " ", $self->{'max_loadavg'}); + return $self->{'max_loadavg'}; +} + +sub set_max_loadavg { + my $self = shift; + $self->{'max_loadavg'} = shift; +} + +sub compute_max_loadavg { + # Parse the max loadaverage that the user asked for using --load + # Returns: + # max loadaverage + my $self = shift; + my $loadspec = shift; + my $load; + if(defined $loadspec) { + if($loadspec =~ /^\+(\d+)$/) { + # E.g. --load +2 + my $j = $1; + $load = + $self->ncpus() + $j; + } elsif ($loadspec =~ /^-(\d+)$/) { + # E.g. --load -2 + my $j = $1; + $load = + $self->ncpus() - $j; + } elsif ($loadspec =~ /^(\d+)\%$/) { + my $j = $1; + $load = + $self->ncpus() * $j / 100; + } elsif ($loadspec =~ /^(\d+(\.\d+)?)$/) { + $load = $1; + } elsif (-f $loadspec) { + $Global::max_load_file = $loadspec; + $Global::max_load_file_last_mod = (stat($Global::max_load_file))[9]; + if(open(my $in_fh, "<", $Global::max_load_file)) { + my $opt_load_file = join("",<$in_fh>); + close $in_fh; + $load = $self->compute_max_loadavg($opt_load_file); + } else { + ::error("Cannot open $loadspec."); + ::wait_and_exit(255); + } + } else { + ::error("Parsing of --load failed."); + ::die_usage(); + } + if($load < 0.01) { + $load = 0.01; + } + } + return $load; +} + +sub time_to_login { + my $self = shift; + return $self->{'time_to_login'}; +} + +sub set_time_to_login { + my $self = shift; + $self->{'time_to_login'} = shift; +} + +sub max_jobs_running { + my $self = shift; + if(not defined $self->{'max_jobs_running'}) { + my $nproc = $self->compute_number_of_processes($opt::jobs); + $self->set_max_jobs_running($nproc); + } + return $self->{'max_jobs_running'}; +} + +sub orig_max_jobs_running { + my $self = shift; + return $self->{'orig_max_jobs_running'}; +} + +sub compute_number_of_processes { + # Number of processes wanted and limited by system resources + # Returns: + # Number of processes + my $self = shift; + my $opt_P = shift; + my $wanted_processes = $self->user_requested_processes($opt_P); + if(not defined $wanted_processes) { + $wanted_processes = $Global::default_simultaneous_sshlogins; + } + ::debug("load", "Wanted procs: $wanted_processes\n"); + my $system_limit = + $self->processes_available_by_system_limit($wanted_processes); + ::debug("load", "Limited to procs: $system_limit\n"); + return $system_limit; +} + +{ + my @children; + my $max_system_proc_reached; + my $more_filehandles; + my %fh; + my $tmpfhname; + my $count_jobs_already_read; + my @jobs; + my $job; + my @args; + my $arg; + + sub reserve_filehandles { + # Reserves filehandle + my $n = shift; + for (1..$n) { + $more_filehandles &&= open($fh{$tmpfhname++}, "<", "/dev/null"); + } + } + + sub reserve_process { + # Spawn a dummy process + my $child; + if($child = fork()) { + push @children, $child; + $Global::unkilled_children{$child} = 1; + } elsif(defined $child) { + # This is the child + # The child takes one process slot + # It will be killed later + $SIG{'TERM'} = $Global::original_sig{'TERM'}; + sleep 10101010; + exit(0); + } else { + # Failed to spawn + $max_system_proc_reached = 1; + } + } + + sub get_args_or_jobs { + # Get an arg or a job (depending on mode) + if($Global::semaphore or $opt::pipe) { + # Skip: No need to get args + return 1; + } elsif(defined $opt::retries and $count_jobs_already_read) { + # For retries we may need to run all jobs on this sshlogin + # so include the already read jobs for this sshlogin + $count_jobs_already_read--; + return 1; + } else { + if($opt::X or $opt::m) { + # The arguments may have to be re-spread over several jobslots + # So pessimistically only read one arg per jobslot + # instead of a full commandline + if($Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->empty()) { + if($Global::JobQueue->empty()) { + return 0; + } else { + $job = $Global::JobQueue->get(); + push(@jobs, $job); + return 1; + } + } else { + $arg = $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->get(); + push(@args, $arg); + return 1; + } + } else { + # If there are no more command lines, then we have a process + # per command line, so no need to go further + if($Global::JobQueue->empty()) { + return 0; + } else { + $job = $Global::JobQueue->get(); + push(@jobs, $job); + return 1; + } + } + } + } + + sub cleanup { + # Cleanup: Close the files + for (values %fh) { close $_ } + # Cleanup: Kill the children + for my $pid (@children) { + kill 9, $pid; + waitpid($pid,0); + delete $Global::unkilled_children{$pid}; + } + # Cleanup: Unget the command_lines or the @args + $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->unget(@args); + @args = (); + $Global::JobQueue->unget(@jobs); + @jobs = (); + } + + sub processes_available_by_system_limit { + # If the wanted number of processes is bigger than the system limits: + # Limit them to the system limits + # Limits are: File handles, number of input lines, processes, + # and taking > 1 second to spawn 10 extra processes + # Returns: + # Number of processes + my $self = shift; + my $wanted_processes = shift; + my $system_limit = 0; + my $slow_spawining_warning_printed = 0; + my $time = time; + $more_filehandles = 1; + $tmpfhname = "TmpFhNamE"; + + # perl uses 7 filehandles for something? + # parallel uses 1 for memory_usage + # parallel uses 4 for ? + reserve_filehandles(12); + # Two processes for load avg and ? + reserve_process(); + reserve_process(); + + # For --retries count also jobs already run + $count_jobs_already_read = $Global::JobQueue->next_seq(); + my $wait_time_for_getting_args = 0; + my $start_time = time; + while(1) { + $system_limit >= $wanted_processes and last; + not $more_filehandles and last; + $max_system_proc_reached and last; + + my $before_getting_arg = time; + if(!$Global::dummy_jobs) { + get_args_or_jobs() or last; + } + $wait_time_for_getting_args += time - $before_getting_arg; + $system_limit++; + + # Every simultaneous process uses 2 filehandles to write to + # and 2 filehandles to read from + reserve_filehandles(4); + + # System process limit + reserve_process(); + + my $forktime = time - $time - $wait_time_for_getting_args; + ::debug("run", "Time to fork $system_limit procs: $wait_time_for_getting_args ", + $forktime, + " (processes so far: ", $system_limit,")\n"); + if($system_limit > 10 and + $forktime > 1 and + $forktime > $system_limit * 0.01 + and not $slow_spawining_warning_printed) { + # It took more than 0.01 second to fork a processes on avg. + # Give the user a warning. He can press Ctrl-C if this + # sucks. + ::warning("Starting $system_limit processes took > $forktime sec.", + "Consider adjusting -j. Press CTRL-C to stop."); + $slow_spawining_warning_printed = 1; + } + } + cleanup(); + + if($system_limit < $wanted_processes) { + # The system_limit is less than the wanted_processes + if($system_limit < 1 and not $Global::JobQueue->empty()) { + ::warning("Cannot spawn any jobs. ". + "Raising ulimit -u or /etc/security/limits.conf", + "or /proc/sys/kernel/pid_max may help."); + ::wait_and_exit(255); + } + if(not $more_filehandles) { + ::warning("Only enough file handles to run ". + $system_limit. " jobs in parallel.", + "Running 'parallel -j0 -N $system_limit --pipe parallel -j0' or", + "raising ulimit -n or /etc/security/limits.conf may help."); + } + if($max_system_proc_reached) { + ::warning("Only enough available processes to run ". + $system_limit. " jobs in parallel.", + "Raising ulimit -u or /etc/security/limits.conf ", + "or /proc/sys/kernel/pid_max may help."); + } + } + if($] == 5.008008 and $system_limit > 1000) { + # https://savannah.gnu.org/bugs/?36942 + $system_limit = 1000; + } + if($Global::JobQueue->empty()) { + $system_limit ||= 1; + } + if($self->string() ne ":" and + $system_limit > $Global::default_simultaneous_sshlogins) { + $system_limit = + $self->simultaneous_sshlogin_limit($system_limit); + } + return $system_limit; + } +} + +sub simultaneous_sshlogin_limit { + # Test by logging in wanted number of times simultaneously + # Returns: + # min($wanted_processes,$working_simultaneous_ssh_logins-1) + my $self = shift; + my $wanted_processes = shift; + if($self->{'time_to_login'}) { + return $wanted_processes; + } + + # Try twice because it guesses wrong sometimes + # Choose the minimal + my $ssh_limit = + ::min($self->simultaneous_sshlogin($wanted_processes), + $self->simultaneous_sshlogin($wanted_processes)); + if($ssh_limit < $wanted_processes) { + my $serverlogin = $self->serverlogin(); + ::warning("ssh to $serverlogin only allows ". + "for $ssh_limit simultaneous logins.", + "You may raise this by changing ". + "/etc/ssh/sshd_config:MaxStartups and MaxSessions on $serverlogin.", + "You can also try --sshdelay 0.1", + "Using only ".($ssh_limit-1)." connections ". + "to avoid race conditions."); + # Race condition can cause problem if using all sshs. + if($ssh_limit > 1) { $ssh_limit -= 1; } + } + return $ssh_limit; +} + +sub simultaneous_sshlogin { + # Using $sshlogin try to see if we can do $wanted_processes + # simultaneous logins + # (ssh host echo simultaneouslogin & ssh host echo simultaneouslogin & ...)|grep simul|wc -l + # Returns: + # Number of succesful logins + my $self = shift; + my $wanted_processes = shift; + my $sshcmd = $self->sshcommand(); + my $serverlogin = $self->serverlogin(); + my $sshdelay = $opt::sshdelay ? "sleep $opt::sshdelay;" : ""; + # TODO sh -c wrapper to work for csh + my $cmd = "$sshdelay$sshcmd $serverlogin -- echo simultaneouslogin &1 &"x$wanted_processes; + ::debug("init", "Trying $wanted_processes logins at $serverlogin\n"); + open (my $simul_fh, "-|", "($cmd)|grep simultaneouslogin | wc -l") or + ::die_bug("simultaneouslogin"); + my $ssh_limit = <$simul_fh>; + close $simul_fh; + chomp $ssh_limit; + return $ssh_limit; +} + +sub set_ncpus { + my $self = shift; + $self->{'ncpus'} = shift; +} + +sub user_requested_processes { + # Parse the number of processes that the user asked for using -j + # Returns: + # the number of processes to run on this sshlogin + my $self = shift; + my $opt_P = shift; + my $processes; + if(defined $opt_P) { + if($opt_P =~ /^\+(\d+)$/) { + # E.g. -P +2 + my $j = $1; + $processes = + $self->ncpus() + $j; + } elsif ($opt_P =~ /^-(\d+)$/) { + # E.g. -P -2 + my $j = $1; + $processes = + $self->ncpus() - $j; + } elsif ($opt_P =~ /^(\d+(\.\d+)?)\%$/) { + # E.g. -P 10.5% + my $j = $1; + $processes = + $self->ncpus() * $j / 100; + } elsif ($opt_P =~ /^(\d+)$/) { + $processes = $1; + if($processes == 0) { + # -P 0 = infinity (or at least close) + $processes = $Global::infinity; + } + } elsif (-f $opt_P) { + $Global::max_procs_file = $opt_P; + if(open(my $in_fh, "<", $Global::max_procs_file)) { + my $opt_P_file = join("",<$in_fh>); + close $in_fh; + $processes = $self->user_requested_processes($opt_P_file); + } else { + ::error("Cannot open $opt_P."); + ::wait_and_exit(255); + } + } else { + ::error("Parsing of --jobs/-j/--max-procs/-P failed."); + ::die_usage(); + } + $processes = ::ceil($processes); + } + return $processes; +} + +sub ncpus { + my $self = shift; + if(not defined $self->{'ncpus'}) { + my $sshcmd = $self->sshcommand(); + my $serverlogin = $self->serverlogin(); + if($serverlogin eq ":") { + if($opt::use_cpus_instead_of_cores) { + $self->{'ncpus'} = no_of_cpus(); + } else { + $self->{'ncpus'} = no_of_cores(); + } + } else { + my $ncpu; + if($opt::use_cpus_instead_of_cores) { + $ncpu = ::qqx("echo|$sshcmd $serverlogin -- parallel --number-of-cpus"); + } else { + ::debug("init",qq(echo|$sshcmd $serverlogin -- parallel --number-of-cores\n)); + $ncpu = ::qqx("echo|$sshcmd $serverlogin -- parallel --number-of-cores"); + } + chomp $ncpu; + if($ncpu =~ /^\s*[0-9]+\s*$/s) { + $self->{'ncpus'} = $ncpu; + } else { + ::warning("Could not figure out ". + "number of cpus on $serverlogin ($ncpu). Using 1."); + $self->{'ncpus'} = 1; + } + } + } + return $self->{'ncpus'}; +} + +sub no_of_cpus { + # Returns: + # Number of physical CPUs + local $/ = "\n"; # If delimiter is set, then $/ will be wrong + my $no_of_cpus; + if ($^O eq 'linux') { + $no_of_cpus = no_of_cpus_gnu_linux() || no_of_cores_gnu_linux(); + } elsif ($^O eq 'freebsd') { + $no_of_cpus = no_of_cpus_freebsd(); + } elsif ($^O eq 'netbsd') { + $no_of_cpus = no_of_cpus_netbsd(); + } elsif ($^O eq 'openbsd') { + $no_of_cpus = no_of_cpus_openbsd(); + } elsif ($^O eq 'gnu') { + $no_of_cpus = no_of_cpus_hurd(); + } elsif ($^O eq 'darwin') { + $no_of_cpus = no_of_cpus_darwin(); + } elsif ($^O eq 'solaris') { + $no_of_cpus = no_of_cpus_solaris() || nproc(); + } elsif ($^O eq 'aix') { + $no_of_cpus = no_of_cpus_aix(); + } elsif ($^O eq 'hpux') { + $no_of_cpus = no_of_cpus_hpux(); + } elsif ($^O eq 'nto') { + $no_of_cpus = no_of_cpus_qnx(); + } elsif ($^O eq 'svr5') { + $no_of_cpus = no_of_cpus_openserver(); + } elsif ($^O eq 'irix') { + $no_of_cpus = no_of_cpus_irix(); + } elsif ($^O eq 'dec_osf') { + $no_of_cpus = no_of_cpus_tru64(); + } else { + $no_of_cpus = (no_of_cpus_gnu_linux() + || no_of_cpus_freebsd() + || no_of_cpus_netbsd() + || no_of_cpus_openbsd() + || no_of_cpus_hurd() + || no_of_cpus_darwin() + || no_of_cpus_solaris() + || no_of_cpus_aix() + || no_of_cpus_hpux() + || no_of_cpus_qnx() + || no_of_cpus_openserver() + || no_of_cpus_irix() + || no_of_cpus_tru64() + # Number of cores is better than no guess for #CPUs + || nproc() + ); + } + if($no_of_cpus) { + chomp $no_of_cpus; + return $no_of_cpus; + } else { + ::warning("Cannot figure out number of cpus. Using 1."); + return 1; + } +} + +sub no_of_cores { + # Returns: + # Number of CPU cores + local $/ = "\n"; # If delimiter is set, then $/ will be wrong + my $no_of_cores; + if ($^O eq 'linux') { + $no_of_cores = no_of_cores_gnu_linux(); + } elsif ($^O eq 'freebsd') { + $no_of_cores = no_of_cores_freebsd(); + } elsif ($^O eq 'netbsd') { + $no_of_cores = no_of_cores_netbsd(); + } elsif ($^O eq 'openbsd') { + $no_of_cores = no_of_cores_openbsd(); + } elsif ($^O eq 'gnu') { + $no_of_cores = no_of_cores_hurd(); + } elsif ($^O eq 'darwin') { + $no_of_cores = no_of_cores_darwin(); + } elsif ($^O eq 'solaris') { + $no_of_cores = no_of_cores_solaris() || nproc(); + } elsif ($^O eq 'aix') { + $no_of_cores = no_of_cores_aix(); + } elsif ($^O eq 'hpux') { + $no_of_cores = no_of_cores_hpux(); + } elsif ($^O eq 'nto') { + $no_of_cores = no_of_cores_qnx(); + } elsif ($^O eq 'svr5') { + $no_of_cores = no_of_cores_openserver(); + } elsif ($^O eq 'irix') { + $no_of_cores = no_of_cores_irix(); + } elsif ($^O eq 'dec_osf') { + $no_of_cores = no_of_cores_tru64(); + } else { + $no_of_cores = (no_of_cores_gnu_linux() + || no_of_cores_freebsd() + || no_of_cores_netbsd() + || no_of_cores_openbsd() + || no_of_cores_hurd() + || no_of_cores_darwin() + || no_of_cores_solaris() + || no_of_cores_aix() + || no_of_cores_hpux() + || no_of_cores_qnx() + || no_of_cores_openserver() + || no_of_cores_irix() + || no_of_cores_tru64() + || nproc() + ); + } + if($no_of_cores) { + chomp $no_of_cores; + return $no_of_cores; + } else { + ::warning("Cannot figure out number of CPU cores. Using 1."); + return 1; + } +} + +sub nproc { + # Returns: + # Number of cores using `nproc` + my $no_of_cores = ::qqx("nproc"); + return $no_of_cores; +} + +sub no_of_cpus_gnu_linux { + # Returns: + # Number of physical CPUs on GNU/Linux + # undef if not GNU/Linux + my $no_of_cpus; + my $no_of_cores; + my $no_of_active_cores; + if(-e "/proc/cpuinfo") { + $no_of_cpus = 0; + $no_of_cores = 0; + my %seen; + if(open(my $in_fh, "<", "/proc/cpuinfo")) { + while(<$in_fh>) { + if(/^physical id.*[:](.*)/ and not $seen{$1}++) { + $no_of_cpus++; + } + /^processor.*[:]/i and $no_of_cores++; + } + close $in_fh; + } + } + if(-e "/proc/self/status") { + # if 'taskset' is used to limit number of cores + if(open(my $in_fh, "<", "/proc/self/status")) { + while(<$in_fh>) { + if(/^Cpus_allowed:\s*(\S+)/) { + my $a = $1; + $a =~ tr/,//d; + $no_of_active_cores = unpack ("%32b*", pack ("H*",$a)); + } + } + close $in_fh; + } + } + return (::min($no_of_cpus || $no_of_cores,$no_of_active_cores)); +} + +sub no_of_cores_gnu_linux { + # Returns: + # Number of CPU cores on GNU/Linux + # undef if not GNU/Linux + my $no_of_cores; + my $no_of_active_cores; + if(-e "/proc/cpuinfo") { + $no_of_cores = 0; + open(my $in_fh, "<", "/proc/cpuinfo") || return undef; + while(<$in_fh>) { + /^processor.*[:]/i and $no_of_cores++; + } + close $in_fh; + } + if(-e "/proc/self/status") { + # if 'taskset' is used to limit number of cores + if(open(my $in_fh, "<", "/proc/self/status")) { + while(<$in_fh>) { + if(/^Cpus_allowed:\s*(\S+)/) { + my $a = $1; + $a =~ tr/,//d; + $no_of_active_cores = unpack ("%32b*", pack ("H*",$a)); + } + } + close $in_fh; + } + } + return (::min($no_of_cores,$no_of_active_cores)); +} + +sub no_of_cpus_freebsd { + # Returns: + # Number of physical CPUs on FreeBSD + # undef if not FreeBSD + my $no_of_cpus = + (::qqx(qq{ sysctl -a dev.cpu | grep \%parent | awk '{ print \$2 }' | uniq | wc -l | awk '{ print \$1 }' }) + or + ::qqx(qq{ sysctl hw.ncpu | awk '{ print \$2 }' })); + chomp $no_of_cpus; + return $no_of_cpus; +} + +sub no_of_cores_freebsd { + # Returns: + # Number of CPU cores on FreeBSD + # undef if not FreeBSD + my $no_of_cores = + (::qqx(qq{ sysctl hw.ncpu | awk '{ print \$2 }' }) + or + ::qqx(qq{ sysctl -a dev.cpu | grep \%parent | awk '{ print \$2 }' | uniq | wc -l | awk '{ print \$1 }' })); + chomp $no_of_cores; + return $no_of_cores; +} + +sub no_of_cpus_netbsd { + # Returns: + # Number of physical CPUs on NetBSD + # undef if not NetBSD + my $no_of_cpus = ::qqx("sysctl -n hw.ncpu"); + chomp $no_of_cpus; + return $no_of_cpus; +} + +sub no_of_cores_netbsd { + # Returns: + # Number of CPU cores on NetBSD + # undef if not NetBSD + my $no_of_cores = ::qqx("sysctl -n hw.ncpu"); + chomp $no_of_cores; + return $no_of_cores; +} + +sub no_of_cpus_openbsd { + # Returns: + # Number of physical CPUs on OpenBSD + # undef if not OpenBSD + my $no_of_cpus = ::qqx('sysctl -n hw.ncpu'); + chomp $no_of_cpus; + return $no_of_cpus; +} + +sub no_of_cores_openbsd { + # Returns: + # Number of CPU cores on OpenBSD + # undef if not OpenBSD + my $no_of_cores = ::qqx('sysctl -n hw.ncpu'); + chomp $no_of_cores; + return $no_of_cores; +} + +sub no_of_cpus_hurd { + # Returns: + # Number of physical CPUs on HURD + # undef if not HURD + my $no_of_cpus = ::qqx("nproc"); + chomp $no_of_cpus; + return $no_of_cpus; +} + +sub no_of_cores_hurd { + # Returns: + # Number of physical CPUs on HURD + # undef if not HURD + my $no_of_cores = ::qqx("nproc"); + chomp $no_of_cores; + return $no_of_cores; +} + +sub no_of_cpus_darwin { + # Returns: + # Number of physical CPUs on MacOSX Darwin + # undef if not MacOSX Darwin + my $no_of_cpus = + (::qqx('sysctl -n hw.physicalcpu') + or + ::qqx(qq{ sysctl -a hw | grep [^a-z]physicalcpu[^a-z] | awk '{ print \$2 }' })); + return $no_of_cpus; +} + +sub no_of_cores_darwin { + # Returns: + # Number of CPU cores on Mac Darwin + # undef if not Mac Darwin + my $no_of_cores = + (::qqx('sysctl -n hw.logicalcpu') + or + ::qqx(qq{ sysctl -a hw | grep [^a-z]logicalcpu[^a-z] | awk '{ print \$2 }' })); + return $no_of_cores; +} + +sub no_of_cpus_solaris { + # Returns: + # Number of physical CPUs on Solaris + # undef if not Solaris + if(-x "/usr/sbin/psrinfo") { + my @psrinfo = ::qqx("/usr/sbin/psrinfo"); + if($#psrinfo >= 0) { + return $#psrinfo +1; + } + } + if(-x "/usr/sbin/prtconf") { + my @prtconf = ::qqx("/usr/sbin/prtconf | grep cpu..instance"); + if($#prtconf >= 0) { + return $#prtconf +1; + } + } + if(-x "/usr/sbin/prtconf") { + my @prtconf = ::qqx("/usr/sbin/prtconf | grep cpu..instance"); + if($#prtconf >= 0) { + return $#prtconf +1; + } + } + return undef; +} + +sub no_of_cores_solaris { + # Returns: + # Number of CPU cores on Solaris + # undef if not Solaris + if(-x "/usr/sbin/psrinfo") { + my @psrinfo = ::qqx("/usr/sbin/psrinfo"); + if($#psrinfo >= 0) { + return $#psrinfo +1; + } + } + if(-x "/usr/sbin/prtconf") { + my @prtconf = ::qqx("/usr/sbin/prtconf | grep cpu..instance"); + if($#prtconf >= 0) { + return $#prtconf +1; + } + } + return undef; +} + +sub no_of_cpus_aix { + # Returns: + # Number of physical CPUs on AIX + # undef if not AIX + my $no_of_cpus = 0; + if(-x "/usr/sbin/lscfg") { + open(my $in_fh, "-|", "/usr/sbin/lscfg -vs |grep proc | wc -l|tr -d ' '") + || return undef; + $no_of_cpus = <$in_fh>; + chomp ($no_of_cpus); + close $in_fh; + } + return $no_of_cpus; +} + +sub no_of_cores_aix { + # Returns: + # Number of CPU cores on AIX + # undef if not AIX + my $no_of_cores; + if(-x "/usr/bin/vmstat") { + open(my $in_fh, "-|", "/usr/bin/vmstat 1 1") || return undef; + while(<$in_fh>) { + /lcpu=([0-9]*) / and $no_of_cores = $1; + } + close $in_fh; + } + return $no_of_cores; +} + +sub no_of_cpus_hpux { + # Returns: + # Number of physical CPUs on HP-UX + # undef if not HP-UX + my $no_of_cpus = + ::qqx(qq{ /usr/bin/mpsched -s 2>&1 | grep 'Locality Domain Count' | awk '{ print \$4 }'}); + return $no_of_cpus; +} + +sub no_of_cores_hpux { + # Returns: + # Number of CPU cores on HP-UX + # undef if not HP-UX + my $no_of_cores = + ::qqx(qq{ /usr/bin/mpsched -s 2>&1 | perl -ne '/Processor Count\\D+(\\d+)/ and print "\$1\n"'}); + return $no_of_cores; +} + +sub no_of_cpus_qnx { + # Returns: + # Number of physical CPUs on QNX + # undef if not QNX + # BUG: It is not known how to calculate this. + my $no_of_cpus = 0; + return $no_of_cpus; +} + +sub no_of_cores_qnx { + # Returns: + # Number of CPU cores on QNX + # undef if not QNX + # BUG: It is not known how to calculate this. + my $no_of_cores = 0; + return $no_of_cores; +} + +sub no_of_cpus_openserver { + # Returns: + # Number of physical CPUs on SCO OpenServer + # undef if not SCO OpenServer + my $no_of_cpus = 0; + if(-x "/usr/sbin/psrinfo") { + my @psrinfo = ::qqx("/usr/sbin/psrinfo"); + if($#psrinfo >= 0) { + return $#psrinfo +1; + } + } + return $no_of_cpus; +} + +sub no_of_cores_openserver { + # Returns: + # Number of CPU cores on SCO OpenServer + # undef if not SCO OpenServer + my $no_of_cores = 0; + if(-x "/usr/sbin/psrinfo") { + my @psrinfo = ::qqx("/usr/sbin/psrinfo"); + if($#psrinfo >= 0) { + return $#psrinfo +1; + } + } + return $no_of_cores; +} + +sub no_of_cpus_irix { + # Returns: + # Number of physical CPUs on IRIX + # undef if not IRIX + my $no_of_cpus = ::qqx(qq{ hinv | grep HZ | grep Processor | awk '{print \$1}' }); + return $no_of_cpus; +} + +sub no_of_cores_irix { + # Returns: + # Number of CPU cores on IRIX + # undef if not IRIX + my $no_of_cores = ::qqx(qq{ hinv | grep HZ | grep Processor | awk '{print \$1}' }); + return $no_of_cores; +} + +sub no_of_cpus_tru64 { + # Returns: + # Number of physical CPUs on Tru64 + # undef if not Tru64 + my $no_of_cpus = ::qqx("sizer -pr"); + return $no_of_cpus; +} + +sub no_of_cores_tru64 { + # Returns: + # Number of CPU cores on Tru64 + # undef if not Tru64 + my $no_of_cores = ::qqx("sizer -pr"); + return $no_of_cores; +} + +sub sshcommand { + my $self = shift; + if (not defined $self->{'sshcommand'}) { + $self->sshcommand_of_sshlogin(); + } + return $self->{'sshcommand'}; +} + +sub serverlogin { + my $self = shift; + if (not defined $self->{'serverlogin'}) { + $self->sshcommand_of_sshlogin(); + } + return $self->{'serverlogin'}; +} + +sub sshcommand_of_sshlogin { + # 'server' -> ('ssh -S /tmp/parallel-ssh-RANDOM/host-','server') + # 'user@server' -> ('ssh','user@server') + # 'myssh user@server' -> ('myssh','user@server') + # 'myssh -l user server' -> ('myssh -l user','server') + # '/usr/bin/myssh -l user server' -> ('/usr/bin/myssh -l user','server') + # Returns: + # sshcommand - defaults to 'ssh' + # login@host + my $self = shift; + my ($sshcmd, $serverlogin); + # If $opt::ssh is unset, use $PARALLEL_SSH or 'ssh' + $opt::ssh ||= $ENV{'PARALLEL_SSH'} || "ssh"; + if($self->{'string'} =~ /(.+) (\S+)$/) { + # Own ssh command + $sshcmd = $1; $serverlogin = $2; + } else { + # Normal ssh + if($opt::controlmaster) { + # Use control_path to make ssh faster + my $control_path = $self->control_path_dir()."/ssh-%r@%h:%p"; + $sshcmd = $opt::ssh." -S ".$control_path; + $serverlogin = $self->{'string'}; + if(not $self->{'control_path'}{$control_path}++) { + # Master is not running for this control_path + # Start it + my $pid = fork(); + if($pid) { + $Global::sshmaster{$pid} ||= 1; + } else { + $SIG{'TERM'} = undef; + # Ignore the 'foo' being printed + open(STDOUT,">","/dev/null"); + # STDERR >/dev/null to ignore + open(STDERR,">","/dev/null"); + open(STDIN,"<","/dev/null"); + # Run a sleep that outputs data, so it will discover + # if the ssh connection closes. + my $sleep = ::shell_quote_scalar + ('$|=1;while(1){sleep 1;print "foo\n"}'); + my @master = ($opt::ssh, "-MTS", + $control_path, $serverlogin, "--", "perl", "-e", + $sleep); + exec(@master); + } + } + } else { + $sshcmd = $opt::ssh; $serverlogin = $self->{'string'}; + } + } + + if($serverlogin =~ s/(\S+)\@(\S+)/$2/) { + # convert user@server to '-l user server' + # because lsh does not support user@server + $sshcmd = $sshcmd." -l ".$1; + } + + $self->{'sshcommand'} = $sshcmd; + $self->{'serverlogin'} = $serverlogin; +} + +sub control_path_dir { + # Returns: + # path to directory + my $self = shift; + if(not defined $self->{'control_path_dir'}) { + $self->{'control_path_dir'} = + # Use $ENV{'TMPDIR'} as that is typically not + # NFS mounted + File::Temp::tempdir($ENV{'TMPDIR'} + . "/control_path_dir-XXXX", + CLEANUP => 1); + } + return $self->{'control_path_dir'}; +} + +sub rsync_transfer_cmd { + # Command to run to transfer a file + # Input: + # $file = filename of file to transfer + # $workdir = destination dir + # Returns: + # $cmd = rsync command to run to transfer $file ("" if unreadable) + my $self = shift; + my $file = shift; + my $workdir = shift; + if(not -r $file) { + ::warning($file. " is not readable and will not be transferred."); + return "true"; + } + my $rsync_destdir; + my $relpath = ($file !~ m:^/:) || ($file =~ m:/\./:); # Is the path relative or /./? + if($relpath) { + $rsync_destdir = ::shell_quote_file($workdir); + } else { + # rsync /foo/bar / + $rsync_destdir = "/"; + } + $file = ::shell_quote_file($file); + my $sshcmd = $self->sshcommand(); + my $rsync_opt = "-rlDzR -e" . ::shell_quote_scalar($sshcmd); + my $serverlogin = $self->serverlogin(); + # Make dir if it does not exist + return "$sshcmd $serverlogin -- mkdir -p $rsync_destdir && " . + rsync()." $rsync_opt $file $serverlogin:$rsync_destdir"; +} + +sub cleanup_cmd { + # Command to run to remove the remote file + # Input: + # $file = filename to remove + # $workdir = destination dir + # Returns: + # $cmd = ssh command to run to remove $file and empty parent dirs + my $self = shift; + my $file = shift; + my $workdir = shift; + my $f = $file; + if($f =~ m:/\./:) { + # foo/bar/./baz/quux => workdir/baz/quux + # /foo/bar/./baz/quux => workdir/baz/quux + $f =~ s:.*/\./:$workdir/:; + } elsif($f =~ m:^[^/]:) { + # foo/bar => workdir/foo/bar + $f = $workdir."/".$f; + } + my @subdirs = split m:/:, ::dirname($f); + my @rmdir; + my $dir = ""; + for(@subdirs) { + $dir .= $_."/"; + unshift @rmdir, ::shell_quote_file($dir); + } + my $rmdir = @rmdir ? "sh -c ".::shell_quote_scalar("rmdir @rmdir 2>/dev/null;") : ""; + if(defined $opt::workdir and $opt::workdir eq "...") { + $rmdir .= ::shell_quote_scalar("rm -rf " . ::shell_quote_file($workdir).';'); + } + + $f = ::shell_quote_file($f); + my $sshcmd = $self->sshcommand(); + my $serverlogin = $self->serverlogin(); + return "$sshcmd $serverlogin -- ".::shell_quote_scalar("rm -f $f; $rmdir"); +} + +{ + my $rsync; + + sub rsync { + # rsync 3.1.x uses protocol 31 which is unsupported by 2.5.7. + # If the version >= 3.1.0: downgrade to protocol 30 + if(not $rsync) { + my @out = `rsync --version`; + for (@out) { + if(/version (\d+.\d+)(.\d+)?/) { + if($1 >= 3.1) { + # Version 3.1.0 or later: Downgrade to protocol 30 + $rsync = "rsync --protocol 30"; + } else { + $rsync = "rsync"; + } + } + } + $rsync or ::die_bug("Cannot figure out version of rsync: @out"); + } + return $rsync; + } +} + + +package JobQueue; + +sub new { + my $class = shift; + my $commandref = shift; + my $read_from = shift; + my $context_replace = shift; + my $max_number_of_args = shift; + my $transfer_files = shift; + my $return_files = shift; + my $commandlinequeue = CommandLineQueue->new + ($commandref, $read_from, $context_replace, $max_number_of_args, + $transfer_files, $return_files); + my @unget = (); + return bless { + 'unget' => \@unget, + 'commandlinequeue' => $commandlinequeue, + 'this_job_no' => 0, + 'total_jobs' => undef, + }, ref($class) || $class; +} + +sub get { + my $self = shift; + + $self->{'this_job_no'}++; + if(@{$self->{'unget'}}) { + return shift @{$self->{'unget'}}; + } else { + my $commandline = $self->{'commandlinequeue'}->get(); + if(defined $commandline) { + return Job->new($commandline); + } else { + $self->{'this_job_no'}--; + return undef; + } + } +} + +sub unget { + my $self = shift; + unshift @{$self->{'unget'}}, @_; + $self->{'this_job_no'} -= @_; +} + +sub empty { + my $self = shift; + my $empty = (not @{$self->{'unget'}}) + && $self->{'commandlinequeue'}->empty(); + ::debug("run", "JobQueue->empty $empty "); + return $empty; +} + +sub total_jobs { + my $self = shift; + if(not defined $self->{'total_jobs'}) { + if($opt::pipe) { + ::error("--pipe is incompatible with --eta/--bar/--shuf"); + ::wait_and_exit(255); + } + if($opt::sqlworker) { + $self->{'total_jobs'} = $Global::sql->total_jobs(); + } else { + my $record; + my @arg_records; + my $record_queue = $self->{'commandlinequeue'}{'arg_queue'}; + my $start = time; + while($record = $record_queue->get()) { + if(time - $start > 10) { + ::warning("Reading ".scalar(@arg_records). + " arguments took longer than 10 seconds."); + $opt::eta && ::warning("Consider removing --eta."); + $opt::bar && ::warning("Consider removing --bar."); + $opt::shuf && ::warning("Consider removing --shuf."); + last; + } + push @arg_records, $record; + } + while($record = $record_queue->get()) { + push @arg_records, $record; + } + if($opt::shuf) { + my $i = @arg_records; + while (--$i) { + my $j = int rand($i+1); + @arg_records[$i,$j] = @arg_records[$j,$i]; + } + } + $record_queue->unget(@arg_records); + $self->{'total_jobs'} = + ::ceil((1+$#arg_records+$self->{'this_job_no'}) + / ::max($Global::max_number_of_args,1)); + ::debug("init","Total jobs: ".$self->{'total_jobs'}. + " (".(1+$#arg_records)."+".$self->{'this_job_no'}.")\n"); + } + } + return $self->{'total_jobs'}; +} + +sub next_seq { + my $self = shift; + + return $self->{'commandlinequeue'}->seq(); +} + +sub quote_args { + my $self = shift; + return $self->{'commandlinequeue'}->quote_args(); +} + + +package Job; + +sub new { + my $class = shift; + my $commandlineref = shift; + return bless { + 'commandline' => $commandlineref, # CommandLine object + 'workdir' => undef, # --workdir + # filehandle for stdin (used for --pipe) + # filename for writing stdout to (used for --files) + # remaining data not sent to stdin (used for --pipe) + # tmpfiles to cleanup when job is done + 'unlink' => [], + # amount of data sent via stdin (used for --pipe) + 'transfersize' => 0, # size of files using --transfer + 'returnsize' => 0, # size of files using --return + 'pid' => undef, + # hash of { SSHLogins => number of times the command failed there } + 'failed' => undef, + 'sshlogin' => undef, + # The commandline wrapped with rsync and ssh + 'sshlogin_wrap' => undef, + 'exitstatus' => undef, + 'exitsignal' => undef, + # Timestamp for timeout if any + 'timeout' => undef, + 'virgin' => 1, + # Output used for SQL and CSV-output + 'output' => { 1 => [], 2 => [] }, + }, ref($class) || $class; +} + +sub replaced { + my $self = shift; + $self->{'commandline'} or ::die_bug("commandline empty"); + return $self->{'commandline'}->replaced(); +} + +sub seq { + my $self = shift; + return $self->{'commandline'}->seq(); +} + +sub set_seq { + my $self = shift; + return $self->{'commandline'}->set_seq(shift); +} + +sub slot { + my $self = shift; + return $self->{'commandline'}->slot(); +} + +sub free_slot { + my $self = shift; + push @Global::slots, $self->slot(); +} + +{ + my($cattail); + + sub cattail { + # Returns: + # $cattail = perl program for: + # cattail "decompress program" writerpid [file_to_decompress or stdin] [file_to_unlink] + if(not $cattail) { + $cattail = q{ + # cat followed by tail (possibly with rm as soon at the file is opened) + # If $writerpid dead: finish after this round + use Fcntl; + $|=1; + + my ($comfile, $cmd, $writerpid, $read_file, $unlink_file) = @ARGV; + if($read_file) { + open(IN,"<",$read_file) || die("cattail: Cannot open $read_file"); + } else { + *IN = *STDIN; + } + while(! -s $comfile) { + # Writer has not opened the buffer file, so we cannot remove it yet + $sleep = ($sleep < 30) ? ($sleep * 1.001 + 0.01) : ($sleep); + usleep($sleep); + } + # The writer and we have both opened the file, so it is safe to unlink it + unlink $unlink_file; + unlink $comfile; + + my $first_round = 1; + my $flags; + fcntl(IN, F_GETFL, $flags) || die $!; # Get the current flags on the filehandle + $flags |= O_NONBLOCK; # Add non-blocking to the flags + fcntl(IN, F_SETFL, $flags) || die $!; # Set the flags on the filehandle + + while(1) { + # clear EOF + seek(IN,0,1); + my $writer_running = kill 0, $writerpid; + $read = sysread(IN,$buf,131072); + if($read) { + if($first_round) { + # Only start the command if there any input to process + $first_round = 0; + open(OUT,"|-",$cmd) || die("cattail: Cannot run $cmd"); + } + + # Blocking print + while($buf) { + my $bytes_written = syswrite(OUT,$buf); + # syswrite may be interrupted by SIGHUP + substr($buf,0,$bytes_written) = ""; + } + # Something printed: Wait less next time + $sleep /= 2; + } else { + if(eof(IN) and not $writer_running) { + # Writer dead: There will never be sent more to the decompressor + close OUT; + exit; + } + # TODO This could probably be done more efficiently using select(2) + # Nothing read: Wait longer before next read + # Up to 100 milliseconds + $sleep = ($sleep < 100) ? ($sleep * 1.001 + 0.01) : ($sleep); + usleep($sleep); + } + } + + sub usleep { + # Sleep this many milliseconds. + my $secs = shift; + select(undef, undef, undef, $secs/1000); + } + }; + $cattail =~ s/#.*//mg; + $cattail =~ s/\s+/ /g; + } + return $cattail; + } +} + +sub openoutputfiles { + # Open files for STDOUT and STDERR + # Set file handles in $self->fh + my $self = shift; + my ($outfhw, $errfhw, $outname, $errname); + + if($opt::results and not $Global::csv) { + # Results as dir + my $args_as_dirname = $self->{'commandline'}->args_as_dirname(); + # Output in: prefix/name1/val1/name2/val2/stdout + my $dir = $self->{'commandline'}-> + replace_placeholders([$opt::results],0,0) . + "/".$args_as_dirname; + if(eval{ File::Path::mkpath($dir); }) { + # OK + } else { + # mkpath failed: Argument probably too long. + # Set $Global::max_file_length, which will keep the individual + # dir names shorter than the max length + max_file_name_length($opt::results); + $args_as_dirname = $self->{'commandline'}->args_as_dirname(); + # prefix/name1/val1/name2/val2/ + $dir = $opt::results."/".$args_as_dirname; + File::Path::mkpath($dir); + } + # prefix/name1/val1/name2/val2/seq + my $seqname = "$dir/seq"; + my $seqfhw; + if(not open($seqfhw, "+>", $seqname)) { + ::error("Cannot write to `$seqname'."); + ::wait_and_exit(255); + } + print $seqfhw $self->seq(); + close $seqfhw; + # prefix/name1/val1/name2/val2/stdout + $outname = "$dir/stdout"; + if(not open($outfhw, "+>", $outname)) { + ::error("Cannot write to `$outname'."); + ::wait_and_exit(255); + } + # prefix/name1/val1/name2/val2/stderr + $errname = "$dir/stderr"; + if(not open($errfhw, "+>", $errname)) { + ::error("Cannot write to `$errname'."); + ::wait_and_exit(255); + } + $self->set_fh(1,"unlink",""); + $self->set_fh(2,"unlink",""); + if($opt::sqlworker) { + # Save the filenames in SQL table + $Global::sql->update("SET Stdout = ?, Stderr = ? ". + "WHERE Seq = ". $self->seq(), + $outname, $errname); + } + } elsif(not $opt::ungroup) { + # To group we create temporary files for STDOUT and STDERR + # To avoid the cleanup unlink the files immediately (but keep them open) + if(@Global::tee_jobs) { + # files must be removed when the tee is done + } elsif($opt::files) { + ($outfhw, $outname) = ::tmpfile(SUFFIX => ".par"); + ($errfhw, $errname) = ::tmpfile(SUFFIX => ".par"); + # --files => only remove stderr + $self->set_fh(1,"unlink",""); + $self->set_fh(2,"unlink",$errname); + } else { + ($outfhw, $outname) = ::tmpfile(SUFFIX => ".par"); + ($errfhw, $errname) = ::tmpfile(SUFFIX => ".par"); + $self->set_fh(1,"unlink",$outname); + $self->set_fh(2,"unlink",$errname); + } + } else { + # --ungroup + open($outfhw,">&",$Global::fd{1}) || die; + open($errfhw,">&",$Global::fd{2}) || die; + # File name must be empty as it will otherwise be printed + $outname = ""; + $errname = ""; + $self->set_fh(1,"unlink",$outname); + $self->set_fh(2,"unlink",$errname); + } + # Set writing FD + $self->set_fh(1,'w',$outfhw); + $self->set_fh(2,'w',$errfhw); + $self->set_fh(1,'name',$outname); + $self->set_fh(2,'name',$errname); + if($opt::compress) { + $self->filter_through_compress(); + } elsif(not $opt::ungroup) { + $self->grouped(); + } + if($opt::linebuffer) { + # Make it possible to read non-blocking from + # the buffer files + for my $fdno (1,2) { + ::set_fh_non_blocking($self->fh($fdno,'r')); + } + } +} + +sub add_rm { + # Files to remove when job is done + my $self = shift; + push @{$self->{'unlink'}}, @_; +} + +sub get_rm { + # Files to remove when job is done + my $self = shift; + return @{$self->{'unlink'}}; +} + +sub cleanup { + # Remove files when job is done + my $self = shift; + unlink $self->get_rm(); + delete @Global::unlink{$self->get_rm()}; +} + +sub grouped { + my $self = shift; + # Set reading FD if using --group (--ungroup does not need) + for my $fdno (1,2) { + # Re-open the file for reading + # so fdw can be closed seperately + # and fdr can be seeked seperately (for --line-buffer) + open(my $fdr,"<", $self->fh($fdno,'name')) || + ::die_bug("fdr: Cannot open ".$self->fh($fdno,'name')); + $self->set_fh($fdno,'r',$fdr); + # Unlink if required + $Global::debug or ::rm($self->fh($fdno,"unlink")); + } +} + +sub empty_input_wrapper { + # If no input: exit(0) + # If some input: Pass input as input to command on STDIN + # This avoids starting the command if there is no input. + # Input: + # $command = command to pipe data to + # Returns: + # $wrapped_command = the wrapped command + my $command = shift; + my $script = + ::spacefree(0,q{ + if(sysread(STDIN, $buf, 1)) { + open($fh, "|-", @ARGV) || die; + syswrite($fh, $buf); + # Align up to 128k block + if($read = sysread(STDIN, $buf, 131071)) { + syswrite($fh, $buf); + } + while($read = sysread(STDIN, $buf, 131072)) { + syswrite($fh, $buf); + } + close $fh; + exit ($?&127 ? 128+($?&127) : 1+$?>>8) + } + }); + ::debug("run",'Empty wrap: perl -e '.::shell_quote_scalar($script)."\n"); + return 'perl -e '.::shell_quote_scalar($script)." ". + $Global::shell." -c ".::shell_quote_scalar($command); +} + +sub filter_through_compress { + my $self = shift; + # Send stdout to stdin for $opt::compress_program(1) + # Send stderr to stdin for $opt::compress_program(2) + # cattail get pid: $pid = $self->fh($fdno,'rpid'); + my $cattail = cattail(); + + for my $fdno (1,2) { + # Make a communication file. + my ($fh, $comfile) = ::tmpfile(SUFFIX => ".pac"); + close $fh; + # Compressor: (echo > $comfile; compress pipe) > output + # When the echo is written to $comfile, + # it is known that output file is opened, + # thus output file can then be removed by the decompressor. + my $wpid = open(my $fdw,"|-", "(echo > $comfile; ". + empty_input_wrapper($opt::compress_program).") >". + $self->fh($fdno,'name')) || die $?; + $self->set_fh($fdno,'w',$fdw); + $self->set_fh($fdno,'wpid',$wpid); + # Decompressor: open output; -s $comfile > 0: rm $comfile output; + # decompress output > stdout + my $rpid = open(my $fdr, "-|", "perl", "-e", $cattail, $comfile, + $opt::decompress_program, $wpid, + $self->fh($fdno,'name'),$self->fh($fdno,'unlink')) + || die $?; + $self->set_fh($fdno,'r',$fdr); + $self->set_fh($fdno,'rpid',$rpid); + } +} + +sub max_file_name_length { + # Figure out the max length of a subdir + # TODO and the max total length + # Ext4 = 255,130816 + # Uses: + # $Global::max_file_length is set + # Returns: + # $Global::max_file_length + my $testdir = shift; + + my $upper = 8_000_000; + # Dir length of 8 chars is supported everywhere + my $len = 8; + my $dir = "x"x$len; + do { + rmdir($testdir."/".$dir); + $len *= 16; + $dir = "x"x$len; + } while ($len < $upper and mkdir $testdir."/".$dir); + # Then search for the actual max length between $len/16 and $len + my $min = $len/16; + my $max = $len; + while($max-$min > 5) { + # If we are within 5 chars of the exact value: + # it is not worth the extra time to find the exact value + my $test = int(($min+$max)/2); + $dir = "x"x$test; + if(mkdir $testdir."/".$dir) { + rmdir($testdir."/".$dir); + $min = $test; + } else { + $max = $test; + } + } + $Global::max_file_length = $min; + return $min; +} + +sub set_fh { + # Set file handle + my ($self, $fd_no, $key, $fh) = @_; + $self->{'fd'}{$fd_no,$key} = $fh; +} + +sub fh { + # Get file handle + my ($self, $fd_no, $key) = @_; + return $self->{'fd'}{$fd_no,$key}; +} + +sub write { + my $self = shift; + my $remaining_ref = shift; + my $stdin_fh = $self->fh(0,"w"); + + my $len = length $$remaining_ref; + # syswrite may not write all in one go, + # so make sure everything is written. + my $written; + while($written = syswrite($stdin_fh,$$remaining_ref)){ + substr($$remaining_ref,0,$written) = ""; + } +} + +sub set_block { + # Copy stdin buffer from $block_ref up to $endpos + # Prepend with $header_ref if virgin (i.e. not --roundrobin) + # Remove $recstart and $recend if needed + # Input: + # $header_ref = ref to $header to prepend + # $buffer_ref = ref to $buffer containing the block + # $endpos = length of $block to pass on + # $recstart = --recstart regexp + # $recend = --recend regexp + # Returns: + # N/A + my $self = shift; + my ($header_ref,$buffer_ref,$endpos,$recstart,$recend) = @_; + $self->{'block'} = ($self->virgin() ? $$header_ref : ""). + substr($$buffer_ref,0,$endpos); + if($opt::remove_rec_sep) { + remove_rec_sep(\$self->{'block'},$recstart,$recend); + } + $self->{'block_length'} = length $self->{'block'}; + $self->{'block_pos'} = 0; + $self->add_transfersize($self->{'block_length'}); +} + +sub block_ref { + my $self = shift; + return \$self->{'block'}; +} + + +sub block_length { + my $self = shift; + return $self->{'block_length'}; +} + +sub remove_rec_sep { + my ($block_ref,$recstart,$recend) = @_; + # Remove record separator + $$block_ref =~ s/$recend$recstart//gos; + $$block_ref =~ s/^$recstart//os; + $$block_ref =~ s/$recend$//os; +} + +sub non_blocking_write { + my $self = shift; + my $something_written = 0; + use POSIX qw(:errno_h); + + my $in = $self->fh(0,"w"); + my $rv = syswrite($in, + substr($self->{'block'},$self->{'block_pos'})); + if (!defined($rv) && $! == EAGAIN) { + # would block - but would have written + $something_written = 0; + # avoid triggering auto expanding block + $Global::no_autoexpand_block ||= 1; + } elsif ($self->{'block_pos'}+$rv != $self->{'block_length'}) { + # incomplete write + # Remove the written part + $self->{'block_pos'} += $rv; + $something_written = $rv; + } else { + # successfully wrote everything + # Empty block to free memory + my $a = ""; + $self->set_block(\$a,\$a,0,"",""); + $something_written = $rv; + } + ::debug("pipe", "Non-block: ", $something_written); + return $something_written; +} + + +sub virgin { + my $self = shift; + return $self->{'virgin'}; +} + +sub set_virgin { + my $self = shift; + $self->{'virgin'} = shift; +} + +sub pid { + my $self = shift; + return $self->{'pid'}; +} + +sub set_pid { + my $self = shift; + $self->{'pid'} = shift; +} + +sub starttime { + # Returns: + # UNIX-timestamp this job started + my $self = shift; + return sprintf("%.3f",$self->{'starttime'}); +} + +sub set_starttime { + my $self = shift; + my $starttime = shift || ::now(); + $self->{'starttime'} = $starttime; + $opt::sqlworker and + $Global::sql->update("SET Starttime = ? WHERE Seq = ".$self->seq(), + $starttime); +} + +sub runtime { + # Returns: + # Run time in seconds with 3 decimals + my $self = shift; + return sprintf("%.3f",int(($self->endtime() - $self->starttime())*1000)/1000); +} + +sub endtime { + # Returns: + # UNIX-timestamp this job ended + # 0 if not ended yet + my $self = shift; + return ($self->{'endtime'} || 0); +} + +sub set_endtime { + my $self = shift; + my $endtime = shift; + $self->{'endtime'} = $endtime; + $opt::sqlworker and + $Global::sql->update("SET JobRuntime = ? WHERE Seq = ".$self->seq(), + $self->runtime()); +} + +sub is_timedout { + # Is the job timedout? + # Input: + # $delta_time = time that the job may run + # Returns: + # True or false + my $self = shift; + my $delta_time = shift; + return time > $self->{'starttime'} + $delta_time; +} + +sub kill { + my $self = shift; + $self->set_exitstatus(-1); + ::kill_sleep_seq($self->pid()); +} + +sub failed { + # return number of times failed for this $sshlogin + # Input: + # $sshlogin + # Returns: + # Number of times failed for $sshlogin + my $self = shift; + my $sshlogin = shift; + return $self->{'failed'}{$sshlogin}; +} + +sub failed_here { + # return number of times failed for the current $sshlogin + # Returns: + # Number of times failed for this sshlogin + my $self = shift; + return $self->{'failed'}{$self->sshlogin()}; +} + +sub add_failed { + # increase the number of times failed for this $sshlogin + my $self = shift; + my $sshlogin = shift; + $self->{'failed'}{$sshlogin}++; +} + +sub add_failed_here { + # increase the number of times failed for the current $sshlogin + my $self = shift; + $self->{'failed'}{$self->sshlogin()}++; +} + +sub reset_failed { + # increase the number of times failed for this $sshlogin + my $self = shift; + my $sshlogin = shift; + delete $self->{'failed'}{$sshlogin}; +} + +sub reset_failed_here { + # increase the number of times failed for this $sshlogin + my $self = shift; + delete $self->{'failed'}{$self->sshlogin()}; +} + +sub min_failed { + # Returns: + # the number of sshlogins this command has failed on + # the minimal number of times this command has failed + my $self = shift; + my $min_failures = + ::min(map { $self->{'failed'}{$_} } keys %{$self->{'failed'}}); + my $number_of_sshlogins_failed_on = scalar keys %{$self->{'failed'}}; + return ($number_of_sshlogins_failed_on,$min_failures); +} + +sub total_failed { + # Returns: + # $total_failures = the number of times this command has failed + my $self = shift; + my $total_failures = 0; + for (values %{$self->{'failed'}}) { + $total_failures += $_; + } + return $total_failures; +} + +{ + my $script; + + sub postpone_exit_and_cleanup { + # Command to remove files and dirs (given as args) without + # affecting the exit value in $?/$status. + if(not $script) { + $script = "perl -e '". + ::spacefree(0,q{ + $bash=shift; + $csh=shift; + for(@ARGV){ + unlink; + rmdir; + } + if($bash=~s/h//) { + exit $bash; + } + exit $csh; + }). + "' ".'"$?h" "$status" '; + } + return $script + } +} + +{ + my $script; + + sub fifo_wrap { + # Script to create a fifo, run a command on the fifo + # while copying STDIN to the fifo, and finally + # remove the fifo and return the exit code of the command. + if(not $script) { + # {} == $PARALLEL_TMP for --fifo + # To make it csh compatible a wrapper needs to: + # * mkfifo + # * spawn $command & + # * cat > fifo + # * waitpid to get the exit code from $command + # * be less than 1000 chars long + $script = "perl -e '". + (::spacefree + (0, q{ + ($s,$c,$f) = @ARGV; + # mkfifo $PARALLEL_TMP + system "mkfifo", $f; + # spawn $shell -c $command & + $pid = fork || exec $s, "-c", $c; + open($o,">",$f) || die $!; + # cat > $PARALLEL_TMP + while(sysread(STDIN,$buf,131072)){ + syswrite $o, $buf; + } + close $o; + # waitpid to get the exit code from $command + waitpid $pid,0; + # Cleanup + unlink $f; + exit $?/256; + }))."'"; + } + return $script; + } +} + +sub wrapped { + # Wrap command with: + # * --shellquote + # * --nice + # * --cat + # * --fifo + # * --sshlogin + # * --pipepart (@Global::cat_partials) + # * --pipe + # * --tmux + # The ordering of the wrapping is important: + # * --nice/--cat/--fifo should be done on the remote machine + # * --pipepart/--pipe should be done on the local machine inside --tmux + # Uses: + # $opt::shellquote + # $opt::nice + # $Global::shell + # $opt::cat + # $opt::fifo + # @Global::cat_partials + # $opt::pipe + # $opt::tmux + # Returns: + # $self->{'wrapped'} = the command wrapped with the above + my $self = shift; + if(not defined $self->{'wrapped'}) { + my $command = $self->replaced(); + if($opt::shellquote) { + # Prepend /bin/echo (echo no-/bin is wrong in csh) + # and quote twice + $command = "/bin/echo " . + ::shell_quote_scalar(::shell_quote_scalar($command)); + } + if($opt::cat) { + # In '--cat' and '--fifo' {} == $PARALLEL_TMP. + # This is to make it possible to compute $PARALLEL_TMP on + # the fly when running remotely. + # $ENV{PARALLEL_TMP} is set in the remote wrapper before + # the command is run. + # + # Prepend 'cat > $PARALLEL_TMP;' + # Append 'unlink $PARALLEL_TMP without affecting $?' + $command = + 'cat > $PARALLEL_TMP;'. + $command.";". postpone_exit_and_cleanup(). + '$PARALLEL_TMP'; + } elsif($opt::fifo) { + # Prepend 'mkfifo {}; (' + # Append ') & cat > {}; wait; ' + # Append 'unlink {} without affecting $?' + $command = fifo_wrap(). " ". + $Global::shell. " ". + ::shell_quote_scalar($command). + ' $PARALLEL_TMP'. + ';'; + } + if($ENV{'PARALLEL_ENV'}) { + if(-e $ENV{'PARALLEL_ENV'}) { + # This is a file/fifo: Replace envvar with content of file + open(my $parallel_env, "<", $ENV{'PARALLEL_ENV'}) || + ::die_bug("Cannot read parallel_env from $ENV{'PARALLEL_ENV'}"); + local $/; + $ENV{'PARALLEL_ENV'} = <$parallel_env>; + close $parallel_env; + } + # If $PARALLEL_ENV set, put that in front of the command + # Used for env_parallel.* + # Map \001 to \n to make it easer to quote \n in $PARALLEL_ENV + $ENV{'PARALLEL_ENV'} =~ s/\001/\n/g; + if($Global::shell =~ /zsh/) { + # The extra 'eval' will make aliases work, too + $command = $ENV{'PARALLEL_ENV'}."\n". + "eval ".::shell_quote_scalar($command); + } else { + $command = $ENV{'PARALLEL_ENV'}."\n".$command; + } + } + # Wrap with ssh + tranferring of files + $command = $self->sshlogin_wrap($command); + if(@Global::cat_partials) { + # Prepend: + # < /tmp/foo perl -e 'while(@ARGV) { + # sysseek(STDIN,shift,0) || die; $left = shift; + # while($read = sysread(STDIN,$buf, ($left > 131072 ? 131072 : $left))){ + # $left -= $read; syswrite(STDOUT,$buf); + # } + # }' 0 0 0 11 | + $command = (shift @Global::cat_partials). " | ($command)"; + } elsif($opt::pipe) { + # Wrap with EOF-detector to avoid starting $command if EOF. + $command = empty_input_wrapper($command); + } + if($opt::tmux) { + # Wrap command with 'tmux' + $command = $self->tmux_wrap($command); + } + if($Global::cshell + and + length $command > 499) { + # csh does not like words longer than 1000 (499 quoted) + # $command = "perl -e '".base64_zip_eval()."' ". + # join" ",string_zip_base64( + # 'exec "'.::perl_quote_scalar($command).'"'); + $command = base64_wrap("exec \"$Global::shell\",'-c',\"". + ::perl_quote_scalar($command).'"'); + } + $self->{'wrapped'} = $command; + } + return $self->{'wrapped'}; +} + +sub set_sshlogin { + my $self = shift; + my $sshlogin = shift; + $self->{'sshlogin'} = $sshlogin; + delete $self->{'sshlogin_wrap'}; # If sshlogin is changed the wrap is wrong + delete $self->{'wrapped'}; + $opt::sqlworker and + $Global::sql->update("SET Host = ? WHERE Seq = ".$self->seq(), + $sshlogin->string()); +} + +sub sshlogin { + my $self = shift; + return $self->{'sshlogin'}; +} + +sub string_base64 { + # Base64 encode strings into 1000 byte blocks. + # 1000 bytes is the largest word size csh supports + # Input: + # @strings = to be encoded + # Returns: + # @base64 = 1000 byte block + $Global::use{"MIME::Base64"} ||= eval "use MIME::Base64; 1;"; + my @base64 = unpack("(A1000)*",encode_base64((join"",@_),"")); + return @base64; +} + +sub string_zip_base64 { + # Pipe string through 'bzip2 -9' and base64 encode it into 1000 + # byte blocks. + # 1000 bytes is the largest word size csh supports + # Zipping will make exporting big environments work, too + # Input: + # @strings = to be encoded + # Returns: + # @base64 = 1000 byte block + my($zipin_fh, $zipout_fh,@base64); + ::open3($zipin_fh,$zipout_fh,">&STDERR","bzip2 -9"); + if(fork) { + close $zipin_fh; + $Global::use{"MIME::Base64"} ||= eval "use MIME::Base64; 1;"; + # Split base64 encoded into 1000 byte blocks + @base64 = unpack("(A1000)*",encode_base64((join"",<$zipout_fh>),"")); + close $zipout_fh; + } else { + close $zipout_fh; + print $zipin_fh @_; + close $zipin_fh; + exit; + } + ::debug("base64","Orig:@_\nAs bzip2 base64:@base64\n"); + return @base64; +} + +sub base64_zip_eval { + # Script that: + # * reads base64 strings from @ARGV + # * decodes them + # * pipes through 'bzip2 -dc' + # * evals the result + # Reverse of string_zip_base64 + eval + # Will be wrapped in ' so single quote is forbidden + # Returns: + # $script = 1-liner for perl -e + my $script = ::spacefree(0,q{ + @GNU_Parallel=split /_/,"use_IPC::Open3;_use_MIME::Base64"; + eval"@GNU_Parallel"; + + $SIG{CHLD}="IGNORE"; + # Search for bzip2. Not found => use default path + my $zip = (grep { -x $_ } "/usr/local/bin/bzip2")[0] || "bzip2"; + # $in = stdin on $zip, $out = stdout from $zip + # Forget my() to save chars for csh + # my($in, $out,$eval); + open3($in,$out,">&STDERR",$zip,"-dc"); + if(my $perlpid = fork) { + close $in; + $eval = join "", <$out>; + close $out; + } else { + close $out; + # Pipe decoded base64 into 'bzip2 -dc' + print $in (decode_base64(join"",@ARGV)); + close $in; + exit; + } + wait; + eval $eval; + }); + ::debug("base64",$script,"\n"); + return $script; +} + +sub base64_wrap { + # base64 encode Perl code + # Split it into chunks of < 1000 bytes + # Prepend it with a decoder that eval's it + # Input: + # $eval_string = Perl code to run + # Returns: + # $shell_command = shell command that runs $eval_string + my $eval_string = shift; + return + "perl -e ". + ::shell_quote_scalar(base64_zip_eval())." ". + join" ",::shell_quote(string_zip_base64($eval_string)); +} + +sub base64_eval { + # Script that: + # * reads base64 strings from @ARGV + # * decodes them + # * evals the result + # Reverse of string_base64 + eval + # Will be wrapped in ' so single quote is forbidden. + # Spaces are stripped so spaces cannot be significant. + # The funny 'use IPC::Open3'-syntax is to avoid spaces and + # to make it clear that this is a GNU Parallel command + # when looking at the process table. + # Returns: + # $script = 1-liner for perl -e + my $script = ::spacefree(0,q{ + @GNU_Parallel=("use","IPC::Open3;","use","MIME::Base64"); + eval "@GNU_Parallel"; + my $eval = decode_base64(join"",@ARGV); + eval $eval; + }); + ::debug("base64",$script,"\n"); + return $script; +} + +sub sshlogin_wrap { + # Wrap the command with the commands needed to run remotely + # Input: + # $command = command to run + # Returns: + # $self->{'sshlogin_wrap'} = command wrapped with ssh+transfer commands + sub monitor_parent_sshd_script { + # This script is to solve the problem of + # * not mixing STDERR and STDOUT + # * terminating with ctrl-c + # If its parent is ssh: all good + # If its parent is init(1): ssh died, so kill children + my $monitor_parent_sshd_script; + + if(not $monitor_parent_sshd_script) { + $monitor_parent_sshd_script = + # This will be packed in ', so only use " + ::spacefree(0,'$shell = "'.($ENV{'PARALLEL_SHELL'} || '$ENV{SHELL}').'";'. + '$tmpdir = "'.::perl_quote_scalar($ENV{'TMPDIR'}).'";'. + '$nice = '.$opt::nice.';'. + q{ + # Set $PARALLEL_TMP to a non-existent file name in $TMPDIR + do { + $ENV{PARALLEL_TMP} = $tmpdir."/par". + join"", map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..5); + } while(-e $ENV{PARALLEL_TMP}); + $SIG{CHLD} = sub { $done = 1; }; + $pid = fork; + unless($pid) { + # Make own process group to be able to kill HUP it later + setpgrp; + eval { setpriority(0,0,$nice) }; + exec $shell, "-c", ($bashfunc."@ARGV"); + die "exec: $!\n"; + } + do { + # Parent is not init (ppid=1), so sshd is alive + # Exponential sleep up to 1 sec + $s = $s < 1 ? 0.001 + $s * 1.03 : $s; + select(undef, undef, undef, $s); + } until ($done || getppid == 1); + # Kill HUP the process group if job not done + kill(SIGHUP, -${pid}) unless $done; + wait; + exit ($?&127 ? 128+($?&127) : 1+$?>>8) + }); + } + return $monitor_parent_sshd_script; + } + + sub vars_to_export { + # Uses: + # @opt::env + my @vars = ("parallel_bash_environment"); + for my $varstring (@opt::env) { + # Split up --env VAR1,VAR2 + push @vars, split /,/, $varstring; + } + for (@vars) { + if(-r $_ and not -d) { + # Read as environment definition bug #44041 + # TODO parse this + my $fh = ::open_or_exit($_); + $Global::envdef = join("",<$fh>); + close $fh; + } + } + if(grep { /^_$/ } @vars) { + # --env _ + # Include all vars that are not in a clean environment + if(open(my $vars_fh, "<", $ENV{'HOME'} . "/.parallel/ignored_vars")) { + my @ignore = <$vars_fh>; + chomp @ignore; + my %ignore; + @ignore{@ignore} = @ignore; + close $vars_fh; + push @vars, grep { not defined $ignore{$_} } keys %ENV; + @vars = grep { not /^_$/ } @vars; + } else { + ::error("Run '$Global::progname --record-env' in a clean environment first."); + ::wait_and_exit(255); + } + } + # Duplicate vars as BASH functions to include post-shellshock functions (v1+v2) + # So --env myfunc should look for BASH_FUNC_myfunc() and BASH_FUNC_myfunc%% + push(@vars, "PARALLEL_PID", "PARALLEL_SEQ", + map { ("BASH_FUNC_$_()", "BASH_FUNC_$_%%") } @vars); + # Keep only defined variables + return grep { defined($ENV{$_}) } @vars; + } + + sub env_as_eval { + # Returns: + # $eval = '$ENV{"..."}=...; ...' + my @vars = vars_to_export(); + my $csh_friendly = not grep { /\n/ } @ENV{@vars}; + my @bash_functions = grep { substr($ENV{$_},0,4) eq "() {" } @vars; + my @non_functions = (grep { !/PARALLEL_ENV/ } + grep { substr($ENV{$_},0,4) ne "() {" } @vars); + + # eval of @envset will set %ENV + my $envset = join"", map { + '$ENV{"'.::perl_quote_scalar($_).'"}="'. + ::perl_quote_scalar($ENV{$_}).'";'; } @non_functions; + + # running @bashfunc on the command line, will set the functions + my @bashfunc = map { + my $v=$_; + s/BASH_FUNC_(.*)(\(\)|%%)/$1/; + "$_$ENV{$v};export -f $_ >/dev/null;" } @bash_functions; + # eval $bashfuncset will set $bashfunc + my $bashfuncset; + if(@bashfunc) { + # Functions are not supported for all shells + if($Global::shell !~ m:(bash|rbash|zsh|rzsh|dash|ksh):) { + ::warning("Shell functions may not be supported in $Global::shell."); + } + $bashfuncset = + '@bash_functions=qw('."@bash_functions".");". + ::spacefree(1,'$shell="'.($ENV{'PARALLEL_SHELL'} || '$ENV{SHELL}').'";'.q{ + if($shell=~/csh/) { + print STDERR "CSH/TCSH DO NOT SUPPORT newlines IN VARIABLES/FUNCTIONS. Unset @bash_functions\n"; + exec "false"; + } + }). + "\n".'$bashfunc = "'.::perl_quote_scalar("@bashfunc").'";'; + } else { + $bashfuncset = '$bashfunc = "";' + } + if($ENV{"parallel_bash_environment"}) { + $bashfuncset .= '$bashfunc .= "eval\ \"\$parallel_bash_environment\"\;";'; + } + ::debug("base64",$envset,$bashfuncset,"\n"); + return $csh_friendly,$envset,$bashfuncset; + } + + my $self = shift; + my $command = shift; + # TODO test that *sh -c 'parallel --env' use *sh + if(not defined $self->{'sshlogin_wrap'}) { + my $sshlogin = $self->sshlogin(); + my $serverlogin = $sshlogin->serverlogin(); + my $quoted_remote_command; + $ENV{'PARALLEL_SEQ'} = $self->seq(); + $ENV{'PARALLEL_PID'} = $$; + if($serverlogin eq ":") { + if($opt::workdir) { + # Create workdir if needed. Then cd to it. + my $wd = $self->workdir(); + if($opt::workdir eq "." or $opt::workdir eq "...") { + # If $wd does not start with '/': Prepend $HOME + $wd =~ s:^([^/]):$ENV{'HOME'}/$1:; + } + ::mkdir_or_die($wd); + $command = "cd ".::shell_quote_scalar($wd)." || exit 255; ".$command; + } + if(@opt::env) { + # Prepend with environment setter, which sets functions in zsh + my ($csh_friendly,$envset,$bashfuncset) = env_as_eval(); + my $perl_code = $envset.$bashfuncset. + '@ARGV="'.::perl_quote_scalar($command).'";'. + "exec\"$Global::shell\",\"-c\",\(\$bashfunc.\"\@ARGV\"\)\;die\"exec:\$\!\\n\"\;"; + if(length $perl_code > 999 + or + not $csh_friendly + or + $command =~ /\n/) { + # csh does not deal well with > 1000 chars in one word + # csh does not deal well with $ENV with \n + $self->{'sshlogin_wrap'} = base64_wrap($perl_code); + } else { + $self->{'sshlogin_wrap'} = "perl -e ".::shell_quote_scalar($perl_code); + } + } else { + $self->{'sshlogin_wrap'} = $command; + } + } else { + my $pwd = ""; + if($opt::workdir) { + # Create remote workdir if needed. Then cd to it. + my $wd = $self->workdir(); + $pwd = qq{system("mkdir","-p","--","$wd"); chdir "$wd" ||}. + qq{print(STDERR "parallel: Cannot chdir to $wd\\n") && exit 255;}; + } + my ($csh_friendly,$envset,$bashfuncset) = env_as_eval(); + my $remote_command = $pwd.$envset.$bashfuncset. + '@ARGV="'.::perl_quote_scalar($command).'";'. + monitor_parent_sshd_script(); + $quoted_remote_command = "perl -e ". + ::shell_quote_scalar($remote_command); + my $dq_remote_command = + ::shell_quote_scalar($quoted_remote_command); + if(length $dq_remote_command > 999 + or + not $csh_friendly + or + $command =~ /\n/) { + # csh does not deal well with > 1000 chars in one word + # csh does not deal well with $ENV with \n + $quoted_remote_command = + "perl -e ". + ::shell_quote_scalar(::shell_quote_scalar(base64_zip_eval()))." ". + join" ",::shell_quote(::shell_quote(string_zip_base64($remote_command))); + } else { + $quoted_remote_command = $dq_remote_command; + } + + my $sshcmd = $sshlogin->sshcommand(); + my ($pre,$post,$cleanup)=("","",""); + # --transfer + $pre .= $self->sshtransfer(); + # --return + $post .= $self->sshreturn(); + # --cleanup + $post .= $self->sshcleanup(); + if($post) { + # We need to save the exit status of the job + $post = '_EXIT_status=$?; ' . $post . ' exit $_EXIT_status;'; + } + $self->{'sshlogin_wrap'} = + ($pre + . "$sshcmd $serverlogin -- exec " + . $quoted_remote_command + . ";" + . $post); + } + } + return $self->{'sshlogin_wrap'}; +} + +sub transfer { + # Files to transfer + # Non-quoted and with {...} substituted + # Returns: + # @transfer - File names of files to transfer + my $self = shift; + + my $transfersize = 0; + my @transfer = $self->{'commandline'}-> + replace_placeholders($self->{'commandline'}{'transfer_files'},0,0); + for(@transfer) { + # filesize + if(-e $_) { + $transfersize += (stat($_))[7]; + } + } + $self->add_transfersize($transfersize); + return @transfer; +} + +sub transfersize { + my $self = shift; + return $self->{'transfersize'}; +} + +sub add_transfersize { + my $self = shift; + my $transfersize = shift; + $self->{'transfersize'} += $transfersize; + $opt::sqlworker and + $Global::sql->update("SET Send = ? WHERE Seq = ".$self->seq(), + $self->{'transfersize'}); +} + +sub sshtransfer { + # Returns for each transfer file: + # rsync $file remote:$workdir + my $self = shift; + my @pre; + my $sshlogin = $self->sshlogin(); + my $workdir = $self->workdir(); + for my $file ($self->transfer()) { + push @pre, $sshlogin->rsync_transfer_cmd($file,$workdir).";"; + } + return join("",@pre); +} + +sub return { + # Files to return + # Non-quoted and with {...} substituted + # Returns: + # @non_quoted_filenames + my $self = shift; + return $self->{'commandline'}-> + replace_placeholders($self->{'commandline'}{'return_files'},0,0); +} + +sub returnsize { + # This is called after the job has finished + # Returns: + # $number_of_bytes transferred in return + my $self = shift; + for my $file ($self->return()) { + if(-e $file) { + $self->{'returnsize'} += (stat($file))[7]; + } + } + return $self->{'returnsize'}; +} + +sub add_returnsize { + my $self = shift; + my $returnsize = shift; + $self->{'returnsize'} += $returnsize; + $opt::sqlworker and + $Global::sql->update("SET Receive = ? WHERE Seq = ".$self->seq(), + $self->{'returnsize'}); +} + +sub sshreturn { + # Returns for each return-file: + # rsync remote:$workdir/$file . + my $self = shift; + my $sshlogin = $self->sshlogin(); + my $sshcmd = $sshlogin->sshcommand(); + my $serverlogin = $sshlogin->serverlogin(); + my $rsync_opt = "-rlDzR -e".::shell_quote_scalar($sshcmd); + my $pre = ""; + for my $file ($self->return()) { + $file =~ s:^\./::g; # Remove ./ if any + my $relpath = ($file !~ m:^/:) || ($file =~ m:/\./:); # Is the path relative or /./? + my $cd = ""; + my $wd = ""; + if($relpath) { + # rsync -avR /foo/./bar/baz.c remote:/tmp/ + # == (on old systems) + # rsync -avR --rsync-path="cd /foo; rsync" remote:bar/baz.c /tmp/ + $wd = ::shell_quote_file($self->workdir()."/"); + } + # Only load File::Basename if actually needed + $Global::use{"File::Basename"} ||= eval "use File::Basename; 1;"; + # dir/./file means relative to dir, so remove dir on remote + $file =~ m:(.*)/\./:; + my $basedir = $1 ? ::shell_quote_file($1."/") : ""; + my $nobasedir = $file; + $nobasedir =~ s:.*/\./::; + $cd = ::shell_quote_file(::dirname($nobasedir)); + my $rsync_cd = '--rsync-path='.::shell_quote_scalar("cd $wd$cd; rsync"); + my $basename = ::shell_quote_scalar(::shell_quote_file(::basename($file))); + # --return + # mkdir -p /home/tange/dir/subdir/; + # rsync (--protocol 30) -rlDzR --rsync-path="cd /home/tange/dir/subdir/; rsync" + # server:file.gz /home/tange/dir/subdir/ + $pre .= "mkdir -p $basedir$cd && ".$sshlogin->rsync()." $rsync_cd $rsync_opt $serverlogin:". + $basename . " ".$basedir.$cd.";"; + } + return $pre; +} + +sub sshcleanup { + # Return the sshcommand needed to remove the file + # Returns: + # ssh command needed to remove files from sshlogin + my $self = shift; + my $sshlogin = $self->sshlogin(); + my $sshcmd = $sshlogin->sshcommand(); + my $serverlogin = $sshlogin->serverlogin(); + my $workdir = $self->workdir(); + my $cleancmd = ""; + + for my $file ($self->remote_cleanup()) { + my @subworkdirs = parentdirs_of($file); + $cleancmd .= $sshlogin->cleanup_cmd($file,$workdir).";"; + } + if(defined $opt::workdir and $opt::workdir eq "...") { + $cleancmd .= "$sshcmd $serverlogin -- rm -rf " . ::shell_quote_scalar($workdir).';'; + } + return $cleancmd; +} + +sub remote_cleanup { + # Returns: + # Files to remove at cleanup + my $self = shift; + if($opt::cleanup) { + my @transfer = $self->transfer(); + my @return = $self->return(); + return (@transfer,@return); + } else { + return (); + } +} + +sub workdir { + # Returns: + # the workdir on a remote machine + my $self = shift; + if(not defined $self->{'workdir'}) { + my $workdir; + if(defined $opt::workdir) { + if($opt::workdir eq ".") { + # . means current dir + my $home = $ENV{'HOME'}; + eval 'use Cwd'; + my $cwd = cwd(); + $workdir = $cwd; + if($home) { + # If homedir exists: remove the homedir from + # workdir if cwd starts with homedir + # E.g. /home/foo/my/dir => my/dir + # E.g. /tmp/my/dir => /tmp/my/dir + my ($home_dev, $home_ino) = (stat($home))[0,1]; + my $parent = ""; + my @dir_parts = split(m:/:,$cwd); + my $part; + while(defined ($part = shift @dir_parts)) { + $part eq "" and next; + $parent .= "/".$part; + my ($parent_dev, $parent_ino) = (stat($parent))[0,1]; + if($parent_dev == $home_dev and $parent_ino == $home_ino) { + # dev and ino is the same: We found the homedir. + $workdir = join("/",@dir_parts); + last; + } + } + } + if($workdir eq "") { + $workdir = "."; + } + } elsif($opt::workdir eq "...") { + $workdir = ".parallel/tmp/" . ::hostname() . "-" . $$ + . "-" . $self->seq(); + } else { + $workdir = $self->{'commandline'}-> + replace_placeholders([$opt::workdir],0,0); + #$workdir = $opt::workdir; + # Rsync treats /./ special. We dont want that + $workdir =~ s:/\./:/:g; # Remove /./ + $workdir =~ s:(.)/+$:$1:; # Remove ending / if any + $workdir =~ s:^\./::g; # Remove starting ./ if any + } + } else { + $workdir = "."; + } + $self->{'workdir'} = ::shell_quote_scalar($workdir); + } + return $self->{'workdir'}; +} + +sub parentdirs_of { + # Return: + # all parentdirs except . of this dir or file - sorted desc by length + my $d = shift; + my @parents = (); + while($d =~ s:/[^/]+$::) { + if($d ne ".") { + push @parents, $d; + } + } + return @parents; +} + +sub start { + # Setup STDOUT and STDERR for a job and start it. + # Returns: + # job-object or undef if job not to run + + sub open3_setpgrp_internal { + # Run open3+setpgrp followed by the command + # Input: + # $stdin_fh = Filehandle to use as STDIN + # $stdout_fh = Filehandle to use as STDOUT + # $stderr_fh = Filehandle to use as STDERR + # $command = Command to run + # Returns: + # $pid = Process group of job started + my ($stdin_fh,$stdout_fh,$stderr_fh,$command) = @_; + my $pid; + local (*OUT,*ERR); + open OUT, '>&', $stdout_fh or ::die_bug("Can't dup STDOUT: $!"); + open ERR, '>&', $stderr_fh or ::die_bug("Can't dup STDERR: $!"); + # The eval is needed to catch exception from open3 + eval { + if(not $pid = ::open3($stdin_fh, ">&OUT", ">&ERR", "-")) { + # Each child gets its own process group to make it safe to killall + setpgrp(0,0) || ::die_bug("setpgrp failed"); + exec($Global::shell,"-c",$command) + || ::die_bug("open3-$stdin_fh $command"); + } + }; + return $pid; + } + + sub open3_setpgrp_external { + # Run open3 on $command wrapped with a perl script doing setpgrp + # Works on systems that do not support open3(,,,"-") + # Input: + # $stdin_fh = Filehandle to use as STDIN + # $stdout_fh = Filehandle to use as STDOUT + # $stderr_fh = Filehandle to use as STDERR + # $command = Command to run + # Returns: + # $pid = Process group of job started + my ($stdin_fh,$stdout_fh,$stderr_fh,$command) = @_; + local (*OUT,*ERR); + open OUT, '>&', $stdout_fh or ::die_bug("Can't dup STDOUT: $!"); + open ERR, '>&', $stderr_fh or ::die_bug("Can't dup STDERR: $!"); + + my $pid; + my @setpgrp_wrap = + ('perl','-e', + "setpgrp\;eval\{setpriority\(0,0,$opt::nice\)\}\;". + "exec '$Global::shell', '-c', \@ARGV"); + # The eval is needed to catch exception from open3 + eval { + $pid = ::open3($stdin_fh, ">&OUT", ">&ERR", @setpgrp_wrap, $command) + || ::die_bug("open3-$stdin_fh"); + 1; + }; + return $pid; + } + + sub open3_setpgrp { + # Select and run open3_setpgrp_internal/open3_setpgrp_external + no warnings 'redefine'; + my ($outfh,$name) = ::tmpfile(SUFFIX => ".tst"); + # Test to see if open3(x,x,x,"-") is fully supported + # Can an exported bash function be called via open3? + my $script = 'if($pid=::open3($i,$o,$e,"-")) { wait; } '. + 'else { exec("bash","-c","testfun && true"); }'; + my $bash = + ::shell_quote_scalar_default( + "testfun() { rm $name; }; export -f testfun; ". + "perl -MIPC::Open3 -e ". + ::shell_quote_scalar_default($script) + ); + # Redirect STDERR temporarily, + # so errors on MacOS X are ignored. + open my $saveerr, ">&STDERR"; + open STDERR, '>', "/dev/null"; + # Run the test + ::debug("init",qq{bash -c $bash 2>/dev/null}); + qx{ bash -c $bash 2>/dev/null }; + open STDERR, ">&", $saveerr; + + if(-e $name) { + # Does not support open3(x,x,x,"-") + # or does not have bash: + # Use (slow) external version + unlink($name); + *open3_setpgrp = \&open3_setpgrp_external; + ::debug("init","open3_setpgrp_external chosen\n"); + } else { + # Supports open3(x,x,x,"-") + # This is 0.5 ms faster to run + *open3_setpgrp = \&open3_setpgrp_internal; + ::debug("init","open3_setpgrp_internal chosen\n"); + } + # The sub is now redefined. Call it + return open3_setpgrp(@_); + } + + my $job = shift; + # Get the shell command to be executed (possibly with ssh infront). + my $command = $job->wrapped(); + my $pid; + + if($Global::interactive or $Global::stderr_verbose) { + $job->interactive_start(); + } + # Must be run after $job->interactive_start(): + # $job->interactive_start() may call $job->skip() + if($job->{'commandline'}{'skip'}) { + # $job->skip() was called + $command = "true"; + } + $job->openoutputfiles(); + my($stdout_fh,$stderr_fh) = ($job->fh(1,"w"),$job->fh(2,"w")); + if($opt::ungroup or $opt::sqlworker) { + print_dryrun_and_verbose($stdout_fh,$job,$command); + } + if($opt::dryrun or $opt::sqlmaster) { $command = "true"; } + $ENV{'PARALLEL_SEQ'} = $job->seq(); + $ENV{'PARALLEL_PID'} = $$; + $ENV{'PARALLEL_TMP'} = ::tmpname("par"); + $job->add_rm($ENV{'PARALLEL_TMP'}); + ::debug("run", $Global::total_running, " processes . Starting (", + $job->seq(), "): $command\n"); + if($opt::pipe) { + my ($stdin_fh) = ::gensym(); + $pid = open3_setpgrp($stdin_fh,$stdout_fh,$stderr_fh,$command); + if($opt::roundrobin) { + ::set_fh_non_blocking($stdin_fh); + } + $job->set_fh(0,"w",$stdin_fh); + } elsif ($opt::tty and not $Global::tty_taken and -c "/dev/tty" and + open(my $devtty_fh, "<", "/dev/tty")) { + # Give /dev/tty to the command if no one else is using it + # The eval is needed to catch exception from open3 + local (*IN,*OUT,*ERR); + open OUT, '>&', $stdout_fh or ::die_bug("Can't dup STDOUT: $!"); + open ERR, '>&', $stderr_fh or ::die_bug("Can't dup STDERR: $!"); + *IN = $devtty_fh; + # The eval is needed to catch exception from open3 + my @wrap = ('perl','-e', + "eval\{setpriority\(0,0,$opt::nice\)\}\;". + "exec '$Global::shell', '-c', \@ARGV"); + eval { + $pid = ::open3("<&IN", ">&OUT", ">&ERR", @wrap, $command) + || ::die_bug("open3-/dev/tty"); + 1; + }; + $Global::tty_taken = $pid; + close $devtty_fh; + } else { + $pid = open3_setpgrp(::gensym(),$stdout_fh,$stderr_fh,$command); + } + if($pid) { + # A job was started + $Global::total_running++; + $Global::total_started++; + $job->set_pid($pid); + $job->set_starttime(); + $Global::running{$job->pid()} = $job; + if($opt::timeout) { + $Global::timeoutq->insert($job); + } + $Global::newest_job = $job; + $Global::newest_starttime = ::now(); + return $job; + } else { + # No more processes + ::debug("run", "Cannot spawn more jobs.\n"); + return undef; + } +} + +sub interactive_start { + my $self = shift; + my $command = $self->wrapped(); + if($Global::interactive) { + ::status_no_nl("$command ?..."); + open(my $tty_fh, "<", "/dev/tty") || ::die_bug("interactive-tty"); + my $answer = <$tty_fh>; + close $tty_fh; + my $run_yes = ($answer =~ /^\s*y/i); + if (not $run_yes) { + $self->{'commandline'}->skip(); + } + } else { + print $Global::original_stderr "$command\n"; + } +} + +sub print_dryrun_and_verbose { + # For $opt::ungroup we print these ASAP + # For $opt::group they are part of print() + my $stdout_fh = shift; + my $job = shift; + my $command = shift; + if($opt::dryrun or $Global::verbose) { + if($Global::verbose <= 1) { + print $stdout_fh $job->replaced(),"\n"; + } else { + # Verbose level > 1: Print the rsync and stuff + print $stdout_fh $command,"\n"; + } + } + if($opt::sqlworker) { + $Global::sql->update("SET Command = ? WHERE Seq = ".$job->seq(), + $job->replaced()); + } +} + +{ + my $tmuxsocket; + + sub tmux_wrap { + # Wrap command with tmux for session pPID + # Input: + # $actual_command = the actual command being run (incl ssh wrap) + my $self = shift; + my $actual_command = shift; + # Temporary file name. Used for fifo to communicate exit val + my $tmpfifo=::tmpname("tmx"); + $self->add_rm($tmpfifo); + + if(length($tmpfifo) >=100) { + ::error("tmux does not support sockets with path > 100."); + ::wait_and_exit(255); + } + if($opt::tmuxpane) { + # Move the command into a pane in window 0 + $actual_command = 'tmux joinp -t :0 ; '. + 'tmux select-layout -t :0 tiled ; '. + $actual_command; + } + my $visual_command = $self->replaced(); + my $title = $visual_command; + if($visual_command =~ /\0/) { + ::error("Command line contains NUL. tmux is confused by NUL."); + ::wait_and_exit(255); + } + # ; causes problems + # ascii 194-245 annoys tmux + $title =~ tr/[\011-\016;\302-\365]//d; + $title = ::shell_quote_scalar($title); + + my $l_act = length($actual_command); + my $l_tit = length($title); + my $l_fifo = length($tmpfifo); + # The line to run contains a 118 chars extra code + the title 2x + my $l_tot = 2 * $l_tit + $l_act + $l_fifo; + + my $quoted_space75 = ::shell_quote_scalar(" ")x75; + while($l_tit < 1000 and + ( + (890 < $l_tot and $l_tot < 1350) + or + (9250 < $l_tot and $l_tot < 9800) + )) { + # tmux blocks for certain lengths: + # 900 < title + command < 1200 + # 9250 < title + command < 9800 + # but only if title < 1000, so expand the title with 75 spaces + # The measured lengths are: + # 996 < (title + whole command) < 1127 + # 9331 < (title + whole command) < 9636 + $title .= $quoted_space75; + $l_tit = length($title); + $l_tot = 2 * $l_tit + $l_act + $l_fifo; + } + + my $tmux; + $ENV{'PARALLEL_TMUX'} ||= "tmux"; + if(not $tmuxsocket) { + $tmuxsocket = ::tmpname("tms"); + ::status("See output with: $ENV{'PARALLEL_TMUX'} -S $tmuxsocket attach"); + } + $tmux = "sh -c '". + $ENV{'PARALLEL_TMUX'}." -S $tmuxsocket new-session -s p$$ -d \"sleep .2\" >/dev/null 2>&1';" . + $ENV{'PARALLEL_TMUX'}." -S $tmuxsocket new-window -t p$$ -n $title"; + + ::debug("tmux", "title len:", $l_tit, " act ", $l_act, " max ", + $Limits::Command::line_max_len, " tot ", + $l_tot, "\n"); + + return "mkfifo $tmpfifo && $tmux ". + # Run in tmux + ::shell_quote_scalar + ( + "(".$actual_command.');'. + # The triple print is needed - otherwise the testsuite fails + q[ perl -e 'while($t++<3){ print $ARGV[0],"\n" }' $?h/$status >> ].$tmpfifo."&". + "echo $title; echo \007Job finished at: `date`;sleep 10" + ). + # Run outside tmux + # Read a / separated line: 0h/2 for csh, 2/0 for bash. + # If csh the first will be 0h, so use the second as exit value. + # Otherwise just use the first value as exit value. + q{; exec perl -e '$/="/";$_=<>;$c=<>;unlink $ARGV; /(\d+)h/ and exit($1);exit$c' }.$tmpfifo; + } +} + +sub is_already_in_results { + # Do we already have results for this job? + # Returns: + # $job_already_run = bool whether there is output for this or not + my $job = $_[0]; + my $args_as_dirname = $job->{'commandline'}->args_as_dirname(); + # prefix/name1/val1/name2/val2/ + my $dir = $opt::results."/".$args_as_dirname; + ::debug("run", "Test $dir/stdout", -e "$dir/stdout", "\n"); + return -e "$dir/stdout"; +} + +sub is_already_in_joblog { + my $job = shift; + return vec($Global::job_already_run,$job->seq(),1); +} + +sub set_job_in_joblog { + my $job = shift; + vec($Global::job_already_run,$job->seq(),1) = 1; +} + +sub should_be_retried { + # Should this job be retried? + # Returns + # 0 - do not retry + # 1 - job queued for retry + my $self = shift; + if (not $opt::retries) { + return 0; + } + if(not $self->exitstatus() and not $self->exitsignal()) { + # Completed with success. If there is a recorded failure: forget it + $self->reset_failed_here(); + return 0; + } else { + # The job failed. Should it be retried? + $self->add_failed_here(); + if($self->total_failed() == $opt::retries) { + # This has been retried enough + return 0; + } else { + # This command should be retried + $self->set_endtime(undef); + $self->reset_exitstatus(); + $Global::JobQueue->unget($self); + ::debug("run", "Retry ", $self->seq(), "\n"); + return 1; + } + } +} + +{ + my (%print_later,$job_seq_to_print); + + sub print_earlier_jobs { + # Print jobs whose output is postponed due to --keep-order + # Returns: N/A + my $job = shift; + $print_later{$job->seq()} = $job; + $job_seq_to_print ||= 1; + ::debug("run", "Looking for: $job_seq_to_print ", + "This: ", $job->seq(), "\n"); + for(;vec($Global::job_already_run,$job_seq_to_print,1); + $job_seq_to_print++) {} + while(my $j = $print_later{$job_seq_to_print}) { + $j->print(); + if($j->endtime()) { + # Job finished - look at the next + delete $print_later{$job_seq_to_print}; + $job_seq_to_print++; + next; + } else { + # Job not finished yet - look at it again next round + last; + } + } + } +} + +sub print { + # Print the output of the jobs + # Returns: N/A + + my $self = shift; + ::debug("print", ">>joboutput ", $self->replaced(), "\n"); + if($opt::dryrun) { + # Nothing was printed to this job: + # cleanup tmp files if --files was set + ::rm($self->fh(1,"name")); + } + if($opt::pipe and $self->virgin()) { + # Skip --joblog, --dryrun, --verbose + } else { + if($opt::ungroup) { + # NULL returnsize = 0 returnsize + $self->returnsize() or $self->add_returnsize(0); + if($Global::joblog and defined $self->{'exitstatus'}) { + # Add to joblog when finished + $self->print_joblog(); + # Printing is only relevant for grouped/--line-buffer output. + $opt::ungroup and return; + } + } + + # Check for disk full + ::exit_if_disk_full(); + + if(($opt::dryrun or $Global::verbose) + and + not $self->{'verbose_printed'} + and + not $opt::sqlmaster + and + not $opt::sqlworker) { + $self->{'verbose_printed'}++; + if($Global::verbose <= 1) { + print STDOUT $self->replaced(),"\n"; + } else { + # Verbose level > 1: Print the rsync and stuff + print STDOUT $self->wrapped(),"\n"; + } + # If STDOUT and STDERR are merged, + # we want the command to be printed first + # so flush to avoid STDOUT being buffered + flush STDOUT; + } + } + for my $fdno (sort { $a <=> $b } keys %Global::fd) { + # Sort by file descriptor numerically: 1,2,3,..,9,10,11 + $fdno == 0 and next; + my $out_fd = $Global::fd{$fdno}; + my $in_fh = $self->fh($fdno,"r"); + if(not $in_fh) { + if(not $Job::file_descriptor_warning_printed{$fdno}++) { + # ::warning("File descriptor $fdno not defined\n"); + } + next; + } + ::debug("print", "File descriptor $fdno (", $self->fh($fdno,"name"), "):\n"); + if($opt::linebuffer) { + # Line buffered print out + $self->print_linebuffer($fdno,$in_fh,$out_fd); + } elsif($opt::files) { + $self->print_files($fdno,$in_fh,$out_fd); + } elsif($opt::tag or defined $opt::tagstring) { + $self->print_tag($fdno,$in_fh,$out_fd); + } else { + $self->print_normal($fdno,$in_fh,$out_fd); + } + flush $out_fd; + } + ::debug("print", "<{'exitstatus'} + and not ($self->virgin() and $opt::pipe)) { + if($Global::joblog and not $opt::sqlworker) { + # Add to joblog when finished + $self->print_joblog(); + } + if($opt::sqlworker and not $opt::results) { + $Global::sql->output($self); + } + if($Global::csv) { + # Add output to CSV when finished + $self->print_csv(); + } + } +} + +{ + my $header_printed; + + sub print_csv { + my $self = shift; + my $cmd; + if($Global::verbose <= 1) { + $cmd = $self->replaced(); + } else { + # Verbose level > 1: Print the rsync and stuff + $cmd = "@command"; + } + my $record_ref = $self->{'commandline'}{'arg_list_flat_orig'}; + + if(not $header_printed) { + # Variable headers + # Normal => V1..Vn + # --header : => first value from column + my @V; + if($opt::header) { + my $i = 1; + @V = (map { $Global::input_source_header{$i++} } + @$record_ref[1..$#$record_ref]); + } else { + my $V = "V1"; + @V = (map { $V++ } @$record_ref[1..$#$record_ref]); + } + print $Global::csv_fh + (map { $$_ } + combine_ref("Seq", "Host", "Starttime", "JobRuntime", + "Send", "Receive", "Exitval", "Signal", "Command", + @V, + "Stdout","Stderr" + )),"\n"; + $header_printed++; + } + # Memory optimization: Overwrite with the joined output + $self->{'output'}{1} = join("", @{$self->{'output'}{1}}); + $self->{'output'}{2} = join("", @{$self->{'output'}{2}}); + print $Global::csv_fh + (map { $$_ } + combine_ref + ($self->seq(), + $self->sshlogin()->string(), + $self->starttime(), sprintf("%0.3f",$self->runtime()), + $self->transfersize(), $self->returnsize(), + $self->exitstatus(), $self->exitsignal(), \$cmd, + \@$record_ref[1..$#$record_ref], + \$self->{'output'}{1}, + \$self->{'output'}{2})),"\n"; + } +} + +sub combine_ref { + # Inspired by Text::CSV_PP::_combine (by Makamaka Hannyaharamitu) + my @part = @_; + my $sep = $Global::csv; + my $quot = '"'; + my @out = (); + + my $must_be_quoted; + for my $column (@part) { + # Memory optimization: Content transferred as reference + if(ref $column ne "SCALAR") { + # Convert all columns to scalar references + my $v = $column; + $column = \$v; + } + if(not defined $$column) { + $$column = ''; + next; + } + + $must_be_quoted = 0; + + if($$column =~ s/$quot/$quot$quot/go){ + # Replace " => "" + $must_be_quoted ||=1; + } + if($$column =~ /[\s\Q$sep\E]/o){ + # Put quotes around if the column contains , + $must_be_quoted ||=1; + } + + $Global::use{"bytes"} ||= eval "use bytes; 1;"; + if ($$column =~ /\0/) { + # Contains \0 => put quotes around + $must_be_quoted ||=1; + } + if($must_be_quoted){ + push @out, \$sep, \$quot, $column, \$quot; + } else { + push @out, \$sep, $column; + } + } + # Pop off a $sep + shift @out; + return @out; +} + +sub print_files { + # Print the name of the file containing stdout on stdout + # Uses: + # $opt::pipe + # $opt::group = Print when job is done + # $opt::linebuffer = Print ASAP + # Returns: N/A + my $self = shift; + my ($fdno,$in_fh,$out_fd) = @_; + + # If the job is dead: close printing fh. Needed for --compress + close $self->fh($fdno,"w"); + if($? and $opt::compress) { + ::error($opt::compress_program." failed."); + $self->set_exitstatus(255); + } + if($opt::compress) { + # Kill the decompressor which will not be needed + CORE::kill "TERM", $self->fh($fdno,"rpid"); + } + close $in_fh; + + if($opt::pipe and $self->virgin()) { + # Nothing was printed to this job: + # cleanup unused tmp files if --files was set + for my $fdno (1,2) { + ::rm($self->fh($fdno,"name")); + ::rm($self->fh($fdno,"unlink")); + } + } elsif($fdno == 1 and $self->fh($fdno,"name")) { + print $out_fd $self->tag(),$self->fh($fdno,"name"),"\n"; + if($Global::membuffer) { + push @{$self->{'output'}{$fdno}}, $self->tag(), $self->fh($fdno,"name"); + } + $self->add_returnsize(-s $self->fh($fdno,"name")); + # Mark as printed - do not print again + $self->set_fh($fdno,"name",undef); + } +} + +sub print_linebuffer { + my $self = shift; + my ($fdno,$in_fh,$out_fd) = @_; + my $partial = \$self->{'partial_line',$fdno}; + + if(defined $self->{'exitstatus'}) { + # If the job is dead: close printing fh. Needed for --compress + close $self->fh($fdno,"w"); + if($? and $opt::compress) { + ::error($opt::compress_program." failed."); + $self->set_exitstatus(255); + } + if($opt::compress) { + # Blocked reading in final round + for my $fdno (1,2) { + ::set_fh_blocking($self->fh($fdno,'r')); + } + } + } + if($opt::files or ($opt::results and not $Global::csv)) { + if($fdno == 1 and not $self->fh($fdno,"printed")) { + print $out_fd $self->tag(),$self->fh($fdno,"name"),"\n"; + if($Global::membuffer) { + push @{$self->{'output'}{$fdno}}, $self->tag(), $self->fh($fdno,"name"); + } + $self->set_fh($fdno,"printed",1); + } + # No need for reading $in_fh, as it it from "cat >/dev/null" + } else { + # This seek will clear EOF + seek $in_fh, tell($in_fh), 0; + my $outputlength = 0; + # The read is non-blocking: The $in_fh is set to non-blocking. + # 32768 --tag = 5.1s + # 327680 --tag = 4.4s + # 1024000 --tag = 4.4s + # 3276800 --tag = 4.3s + # 10240000 --tag = 4.3s + # 32768000 --tag = 4.7s + while(read($in_fh,substr($$partial,length $$partial),3276800)) { + # Append to $$partial + # Find the last \n or \r + my $i = (::rindex64($partial,"\n")+1) || (::rindex64($partial,"\r")+1); + if($i) { + # One or more complete lines were found + $outputlength += $i; + if($opt::tag or defined $opt::tagstring) { + # Replace ^ with $tag within the full line + my $tag = $self->tag(); + substr($$partial,0,$i) =~ s/^/$tag/gm; + # Length of partial line has changed: Find the last \n/\r again + $i = (::rindex64($partial,"\n")+1) || (::rindex64($partial,"\r")+1); + } + # Print up to and including the last \n + print $out_fd substr($$partial,0,$i); + # Buffer in memory for SQL and CSV-output + if($Global::membuffer) { + push @{$self->{'output'}{$fdno}}, substr($$partial,0,$i); + } + # Remove the printed part + substr($$partial,0,$i) = ""; + } + } + $self->add_returnsize($outputlength); + } + if(defined $self->{'exitstatus'}) { + if($opt::files or ($opt::results and not $Global::csv)) { + $self->add_returnsize(-s $self->fh($fdno,"name")); + } else { + # If the job is dead: print the remaining partial line + # read remaining + $self->add_returnsize(length $$partial); + if(length $$partial and ($opt::tag or defined $opt::tagstring)) { + my $tag = $self->tag(); + $$partial =~ s/^/$tag/gm; + } + print $out_fd $$partial; + if($Global::membuffer) { + push @{$self->{'output'}{$fdno}}, $$partial; + } + } + # Release the memory + undef $$partial; + if($self->fh($fdno,"rpid") and CORE::kill 0, $self->fh($fdno,"rpid")) { + # decompress still running + } else { + # decompress done: close fh + close $in_fh; + if($? and $opt::compress) { + ::error($opt::decompress_program." failed."); + $self->set_exitstatus(255); + } + } + } +} + +sub print_tag { + my $self = shift; + my ($fdno,$in_fh,$out_fd) = @_; + my $buf; + local $/ = "\n"; + close $self->fh($fdno,"w"); + if($? and $opt::compress) { + ::error($opt::compress_program." failed."); + $self->set_exitstatus(255); + } + seek $in_fh, 0, 0; + # $in_fh is now ready for reading at position 0 + my $tag = $self->tag(); + my $outputlength = 0; + my @output; + while(<$in_fh>) { + print $out_fd $tag,$_; + $outputlength += length $_; + if($Global::membuffer) { + push @{$self->{'output'}{$fdno}}, $tag, $_; + } + } + if($fdno == 1) { + $self->add_returnsize($outputlength); + } + close $in_fh; + if($? and $opt::compress) { + ::error($opt::decompress_program." failed."); + $self->set_exitstatus(255); + } +} + +sub print_normal { + my $self = shift; + my ($fdno,$in_fh,$out_fd) = @_; + my $buf; + close $self->fh($fdno,"w"); + if($? and $opt::compress) { + ::error($opt::compress_program." failed."); + $self->set_exitstatus(255); + } + seek $in_fh, 0, 0; + # $in_fh is now ready for reading at position 0 + my $outputlength = 0; + my @output; + while(sysread($in_fh,$buf,131072)) { + print $out_fd $buf; + $outputlength += length $buf; + if($Global::membuffer) { + push @{$self->{'output'}{$fdno}}, $buf; + } + } + if($fdno == 1) { + $self->add_returnsize($outputlength); + } + close $in_fh; + if($? and $opt::compress) { + ::error($opt::decompress_program." failed."); + $self->set_exitstatus(255); + } +} + +sub print_joblog { + my $self = shift; + my $cmd; + if($Global::verbose <= 1) { + $cmd = $self->replaced(); + } else { + # Verbose level > 1: Print the rsync and stuff + $cmd = "@command"; + } + print $Global::joblog + join("\t", $self->seq(), $self->sshlogin()->string(), + $self->starttime(), sprintf("%10.3f",$self->runtime()), + $self->transfersize(), $self->returnsize(), + $self->exitstatus(), $self->exitsignal(), $cmd + ). "\n"; + flush $Global::joblog; + $self->set_job_in_joblog(); +} + +sub tag { + my $self = shift; + if(not defined $self->{'tag'}) { + if($opt::tag or defined $opt::tagstring) { + $self->{'tag'} = $self->{'commandline'}-> + replace_placeholders([$opt::tagstring],0,0)."\t"; + } else { + $self->{'tag'} = ""; + } + } + return $self->{'tag'}; +} + +sub hostgroups { + my $self = shift; + if(not defined $self->{'hostgroups'}) { + $self->{'hostgroups'} = $self->{'commandline'}->{'arg_list'}[0][0]->{'hostgroups'}; + } + return @{$self->{'hostgroups'}}; +} + +sub exitstatus { + my $self = shift; + return $self->{'exitstatus'}; +} + +sub set_exitstatus { + my $self = shift; + my $exitstatus = shift; + if($exitstatus) { + # Overwrite status if non-zero + $self->{'exitstatus'} = $exitstatus; + } else { + # Set status but do not overwrite + # Status may have been set by --timeout + $self->{'exitstatus'} ||= $exitstatus; + } + $opt::sqlworker and + $Global::sql->update("SET Exitval = ? WHERE Seq = ".$self->seq(), + $exitstatus); +} + +sub reset_exitstatus { + my $self = shift; + undef $self->{'exitstatus'}; +} + +sub exitsignal { + my $self = shift; + return $self->{'exitsignal'}; +} + +sub set_exitsignal { + my $self = shift; + my $exitsignal = shift; + $self->{'exitsignal'} = $exitsignal; + $opt::sqlworker and + $Global::sql->update("SET _Signal = ? WHERE Seq = ".$self->seq(), + $exitsignal); +} + +{ + my $status_printed; + my $total_jobs; + + sub should_we_halt { + # Should we halt? Immediately? Gracefully? + # Returns: N/A + my $job = shift; + # --halt # => 1..100 (number of jobs failed, 101 means > 100) + # --halt % => 1..100 (pct of jobs failed) + if($Global::halt_pct and not $Global::halt_count) { + $total_jobs ||= $Global::JobQueue->total_jobs(); + # From the pct compute the number of jobs that must fail/succeed + $Global::halt_count = $total_jobs * $Global::halt_pct; + } + if($job->exitstatus() or $job->exitsignal()) { + # Job failed + $Global::exitstatus++; + $Global::total_failed++; + if($Global::halt_fail) { + ::status("$Global::progname: This job failed:", + $job->replaced()); + if($Global::halt_count <= $Global::total_failed) { + # At least N jobs had failed + if(not defined $Global::halt_exitstatus) { + if($Global::halt_pct) { + # --halt now,fail=X% or soon,fail=X% + $Global::halt_exitstatus = + ::ceil($Global::total_failed / $total_jobs * 100); + } elsif($Global::halt_count) { + # --halt now,fail=X or soon,fail=X + $Global::halt_exitstatus = ::min($Global::total_failed,101); + } + if($Global::halt_count and $Global::halt_count == 1) { + # --halt now,fail=1 or soon,fail=1 + $Global::halt_exitstatus = $job->exitstatus(); + } + } + ::debug("halt","Pct: ",$Global::halt_pct, + " count: ",$Global::halt_count,"\n"); + if($Global::halt_when eq "soon" + and scalar(keys %Global::running) > 0) { + ::status + ("$Global::progname: Starting no more jobs. ". + "Waiting for ". (keys %Global::running). + " jobs to finish."); + $Global::start_no_new_jobs ||= 1; + } + return($Global::halt_when); + } + } + } else { + if($Global::halt_success) { + ::debug("halt","Pct: ",$Global::halt_pct,"<=", + " count: ",$Global::halt_count,"\n"); + ::status("$Global::progname: This job succeeded:", + $job->replaced()); + if($Global::halt_count <= + $Global::total_completed-$Global::total_failed) { + # At least N jobs had success + # or at least N% had success + $Global::halt_exitstatus = 0; + if($Global::halt_when eq "soon" + and scalar(keys %Global::running) > 0) { + ::status + ("$Global::progname: Starting no more jobs. ". + "Waiting for ". (keys %Global::running). + " jobs to finish."); + $Global::start_no_new_jobs ||= 1; + } + return($Global::halt_when); + } + } + } + return ""; + } +} + + +package CommandLine; + +sub new { + my $class = shift; + my $seq = shift; + my $commandref = shift; + $commandref || die; + my $arg_queue = shift; + my $context_replace = shift; + my $max_number_of_args = shift; # for -N and normal (-n1) + my $transfer_files = shift; + my $return_files = shift; + my $replacecount_ref = shift; + my $len_ref = shift; + my %replacecount = %$replacecount_ref; + my %len = %$len_ref; + for (keys %$replacecount_ref) { + # Total length of this replacement string {} replaced with all args + $len{$_} = 0; + } + return bless { + 'command' => $commandref, + 'seq' => $seq, + 'len' => \%len, + 'arg_list' => [], + 'arg_list_flat' => [], + 'arg_list_flat_orig' => [undef], + 'arg_queue' => $arg_queue, + 'max_number_of_args' => $max_number_of_args, + 'replacecount' => \%replacecount, + 'context_replace' => $context_replace, + 'transfer_files' => $transfer_files, + 'return_files' => $return_files, + 'replaced' => undef, + }, ref($class) || $class; +} + +sub seq { + my $self = shift; + return $self->{'seq'}; +} + +sub set_seq { + my $self = shift; + $self->{'seq'} = shift; +} + +sub slot { + # Find the number of a free job slot and return it + # Uses: + # @Global::slots - list with free jobslots + # Returns: + # $jobslot = number of jobslot + my $self = shift; + if(not $self->{'slot'}) { + if(not @Global::slots) { + # $max_slot_number will typically be $Global::max_jobs_running + push @Global::slots, ++$Global::max_slot_number; + } + $self->{'slot'} = shift @Global::slots; + } + return $self->{'slot'}; +} + +sub populate { + # Add arguments from arg_queue until the number of arguments or + # max line length is reached + # Uses: + # $Global::minimal_command_line_length + # $opt::cat + # $opt::fifo + # $Global::JobQueue + # $opt::m + # $opt::X + # $CommandLine::already_spread + # $Global::max_jobs_running + # Returns: N/A + my $self = shift; + my $next_arg; + my $max_len = $Global::minimal_command_line_length + || Limits::Command::max_length(); + + if($opt::cat or $opt::fifo) { + # Get the empty arg added by --pipepart (if any) + $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->get(); + # $PARALLEL_TMP will point to a tempfile that will be used as {} + $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}-> + unget([Arg->new('$PARALLEL_TMP')]); + } + while (not $self->{'arg_queue'}->empty()) { + $next_arg = $self->{'arg_queue'}->get(); + if(not defined $next_arg) { + next; + } + $self->push($next_arg); + if($self->len() >= $max_len) { + # Command length is now > max_length + # If there are arguments: remove the last + # If there are no arguments: Error + # TODO stuff about -x opt_x + if($self->number_of_args() > 1) { + # There is something to work on + $self->{'arg_queue'}->unget($self->pop()); + last; + } else { + my $args = join(" ", map { $_->orig() } @$next_arg); + ::error("Command line too long (". + $self->len(). " >= ". + $max_len. + ") at input ". + $self->{'arg_queue'}->arg_number(). + ": ". + ((length $args > 50) ? + (substr($args,0,50))."..." : + $args)); + $self->{'arg_queue'}->unget($self->pop()); + ::wait_and_exit(255); + } + } + + if(defined $self->{'max_number_of_args'}) { + if($self->number_of_args() >= $self->{'max_number_of_args'}) { + last; + } + } + } + if(($opt::m or $opt::X) and not $CommandLine::already_spread + and $self->{'arg_queue'}->empty() and $Global::max_jobs_running) { + # -m or -X and EOF => Spread the arguments over all jobslots + # (unless they are already spread) + $CommandLine::already_spread ||= 1; + if($self->number_of_args() > 1) { + $self->{'max_number_of_args'} = + ::ceil($self->number_of_args()/$Global::max_jobs_running); + $Global::JobQueue->{'commandlinequeue'}->{'max_number_of_args'} = + $self->{'max_number_of_args'}; + $self->{'arg_queue'}->unget($self->pop_all()); + while($self->number_of_args() < $self->{'max_number_of_args'}) { + $self->push($self->{'arg_queue'}->get()); + } + } + } + + if($opt::sqlmaster) { + # Insert the V1..Vn for this $seq in SQL table instead of generating one + $Global::sql->insert_records($self->seq(),$self->{'arg_list_flat_orig'}); + } +} + +sub push { + # Add one or more records as arguments + # Returns: N/A + my $self = shift; + my $record = shift; + push @{$self->{'arg_list_flat_orig'}}, map { $_->orig() } @$record; + push @{$self->{'arg_list_flat'}}, @$record; + push @{$self->{'arg_list'}}, $record; + # Make @arg available for {= =} + *Arg::arg = $self->{'arg_list_flat_orig'}; + + my $quote_arg = $Global::noquote ? 0 : not $Global::quoting; + for my $perlexpr (keys %{$self->{'replacecount'}}) { + if($perlexpr =~ /^(\d+) /) { + # Positional + defined($record->[$1-1]) or next; + $self->{'len'}{$perlexpr} += + length $record->[$1-1]->replace($perlexpr,$quote_arg,$self); + } else { + for my $arg (@$record) { + if(defined $arg) { + $self->{'len'}{$perlexpr} += + length $arg->replace($perlexpr,$quote_arg,$self); + } + } + } + } +} + +sub pop { + # Remove last argument + # Returns: + # the last record + my $self = shift; + my $record = pop @{$self->{'arg_list'}}; + # pop off arguments from @$record + splice @{$self->{'arg_list_flat_orig'}}, -($#$record+1), $#$record+1; + splice @{$self->{'arg_list_flat'}}, -($#$record+1), $#$record+1; + my $quote_arg = $Global::noquote ? 0 : not $Global::quoting; + for my $perlexpr (keys %{$self->{'replacecount'}}) { + if($perlexpr =~ /^(\d+) /) { + # Positional + defined($record->[$1-1]) or next; + $self->{'len'}{$perlexpr} -= length $record->[$1-1]->replace($perlexpr,$quote_arg,$self); + } else { + for my $arg (@$record) { + if(defined $arg) { + $self->{'len'}{$perlexpr} -= length $arg->replace($perlexpr,$quote_arg,$self); + } + } + } + } + return $record; +} + +sub pop_all { + # Remove all arguments and zeros the length of replacement perlexpr + # Returns: + # all records + my $self = shift; + my @popped = @{$self->{'arg_list'}}; + for my $perlexpr (keys %{$self->{'replacecount'}}) { + $self->{'len'}{$perlexpr} = 0; + } + $self->{'arg_list'} = []; + $self->{'arg_list_flat_orig'} = [undef]; + $self->{'arg_list_flat'} = []; + return @popped; +} + +sub number_of_args { + # The number of records + # Returns: + # number of records + my $self = shift; + # This is really the number of records + return $#{$self->{'arg_list'}}+1; +} + +sub number_of_recargs { + # The number of args in records + # Returns: + # number of args records + my $self = shift; + my $sum = 0; + my $nrec = scalar @{$self->{'arg_list'}}; + if($nrec) { + $sum = $nrec * (scalar @{$self->{'arg_list'}[0]}); + } + return $sum; +} + +sub args_as_string { + # Returns: + # all unmodified arguments joined with ' ' (similar to {}) + my $self = shift; + return (join " ", map { $_->orig() } + map { @$_ } @{$self->{'arg_list'}}); +} + +sub args_as_dirname { + # Returns: + # all unmodified arguments joined with '/' (similar to {}) + # \t \0 \\ and / are quoted as: \t \0 \\ \_ + # If $Global::max_file_length: Keep subdirs < $Global::max_file_length + my $self = shift; + my @res = (); + + for my $rec_ref (@{$self->{'arg_list'}}) { + # If headers are used, sort by them. + # Otherwise keep the order from the command line. + my @header_indexes_sorted = header_indexes_sorted($#$rec_ref+1); + for my $n (@header_indexes_sorted) { + CORE::push(@res, + $Global::input_source_header{$n}, + map { my $s = $_; + # \t \0 \\ and / are quoted as: \t \0 \\ \_ + $s =~ s/\\/\\\\/g; + $s =~ s/\t/\\t/g; + $s =~ s/\0/\\0/g; + $s =~ s:/:\\_:g; + if($Global::max_file_length) { + # Keep each subdir shorter than the longest + # allowed file name + $s = substr($s,0,$Global::max_file_length); + } + $s; } + $rec_ref->[$n-1]->orig()); + } + } + return join "/", @res; +} + +sub header_indexes_sorted { + # Sort headers first by number then by name. + # E.g.: 1a 1b 11a 11b + # Returns: + # Indexes of %Global::input_source_header sorted + my $max_col = shift; + + no warnings 'numeric'; + for my $col (1 .. $max_col) { + # Make sure the header is defined. If it is not: use column number + if(not defined $Global::input_source_header{$col}) { + $Global::input_source_header{$col} = $col; + } + } + my @header_indexes_sorted = sort { + # Sort headers numerically then asciibetically + $Global::input_source_header{$a} <=> $Global::input_source_header{$b} + or + $Global::input_source_header{$a} cmp $Global::input_source_header{$b} + } 1 .. $max_col; + return @header_indexes_sorted; +} + +sub len { + # Uses: + # $opt::shellquote + # The length of the command line with args substituted + my $self = shift; + my $len = 0; + # Add length of the original command with no args + # Length of command w/ all replacement args removed + $len += $self->{'len'}{'noncontext'} + @{$self->{'command'}} -1; + ::debug("length", "noncontext + command: $len\n"); + my $recargs = $self->number_of_recargs(); + if($self->{'context_replace'}) { + # Context is duplicated for each arg + $len += $recargs * $self->{'len'}{'context'}; + for my $replstring (keys %{$self->{'replacecount'}}) { + # If the replacements string is more than once: mulitply its length + $len += $self->{'len'}{$replstring} * + $self->{'replacecount'}{$replstring}; + ::debug("length", $replstring, " ", $self->{'len'}{$replstring}, "*", + $self->{'replacecount'}{$replstring}, "\n"); + } + # echo 11 22 33 44 55 66 77 88 99 1010 + # echo 1 2 3 4 5 6 7 8 9 10 1 2 3 4 5 6 7 8 9 10 + # 5 + ctxgrp*arg + ::debug("length", "Ctxgrp: ", $self->{'len'}{'contextgroups'}, + " Groups: ", $self->{'len'}{'noncontextgroups'}, "\n"); + # Add space between context groups + $len += ($recargs-1) * ($self->{'len'}{'contextgroups'}); + } else { + # Each replacement string may occur several times + # Add the length for each time + $len += 1*$self->{'len'}{'context'}; + ::debug("length", "context+noncontext + command: $len\n"); + for my $replstring (keys %{$self->{'replacecount'}}) { + # (space between regargs + length of replacement) + # * number this replacement is used + $len += ($recargs -1 + $self->{'len'}{$replstring}) * + $self->{'replacecount'}{$replstring}; + } + } + if($Global::quoting) { + # Pessimistic length if -q is set + # Worse than worst case: every char needs to be quoted with \ + $len *= 2; + } + if($opt::shellquote) { + # Pessimistic length if --shellquote is set + # Worse than worst case: every char needs to be quoted with \ twice + $len *= 4; + } + # If we are using --env, add the prefix for that, too. + $len += 0; + return $len; +} + +sub replaced { + # Uses: + # $Global::noquote + # $Global::quoting + # Returns: + # $replaced = command with place holders replaced and prepended + my $self = shift; + if(not defined $self->{'replaced'}) { + # Don't quote arguments if the input is the full command line + my $quote_arg = $Global::noquote ? 0 : not $Global::quoting; + # or if ($opt::cat or $opt::pipe) as they use $PARALLEL_TMP + $quote_arg = ($opt::cat || $opt::fifo) ? 0 : $quote_arg; + $self->{'replaced'} = $self-> + replace_placeholders($self->{'command'},$Global::quoting, + $quote_arg); + my $len = length $self->{'replaced'}; + if ($len != $self->len()) { + ::debug("length", $len, " != ", $self->len(), + " ", $self->{'replaced'}, "\n"); + } else { + ::debug("length", $len, " == ", $self->len(), + " ", $self->{'replaced'}, "\n"); + } + } + return $self->{'replaced'}; +} + +{ + my @target; + my $context_replace; + my $perl_expressions_as_re; + my @arg; + my %words_containing_replacement_strings; + + sub fish_out_words_containing_replacement_strings { + if(not $words_containing_replacement_strings{$context_replace,@target}) { + my %word; + for (@target) { + my $tt = $_; + ::debug("replace", "Target: $tt"); + # Command line template: + # a{1}b{}c{}d + # becomes: + # a{=1 $_=$_ =}b{= $_=$_ =}c{= $_=$_ =}d + # becomes: + # a\257<1 $_=$_ \257>b\257< $_=$_ \257>c\257< $_=$_ \257>d + # Input A B C (no context) becomes: + # A B C => aAbA B CcA B Cd + # Input A B C (context -X) becomes: + # A B C => aAbAcAd aAbBcBd aAbCcCd + if($context_replace) { + while($tt =~ s/([^\s\257]* # before {= + (?: + \257< # {= + [^\257]*? # The perl expression + \257> # =} + [^\s\257]* # after =} + )+)/ /x) { + # $1 = pre \257 perlexpr \257 post + $word{"$1"} ||= 1; + } + } else { + while($tt =~ s/( (?: \257<([^\257]*?)\257>) )//x) { + # $f = \257 perlexpr \257 + $word{$1} ||= 1; + } + } + } + @{$words_containing_replacement_strings{$context_replace,@target}} = keys %word + } + return @{$words_containing_replacement_strings{$context_replace,@target}}; + } + + sub replace_placeholders { + # Replace foo{}bar with fooargbar + # Uses: + # @Arg::arg = arguments as strings to be use in {= =} + # Input: + # $targetref = command as shell words + # $quote = should everything be quoted? + # $quote_arg = should replaced arguments be quoted? + # Returns: + # @target with placeholders replaced + my $self = shift; + my $targetref = shift; + my $quote = shift; + my $quote_arg = shift; + my %replace; + # -X = context replace (fish_out_words_containing_replacement_strings) + $context_replace = $self->{'context_replace'}; + @target = @$targetref; + ::debug("replace", "Replace @target\n"); + if(not @target) { + # @target is empty: Return empty array + return @target; + } + # Make it possible to use $arg[2] in {= =} + *Arg::arg = $self->{'arg_list_flat_orig'}; + # Flat list: + # $self->{'arg_list'} = [ [Arg11, Arg12], [Arg21, Arg22], [Arg31, Arg32] ] + # $self->{'arg_list_flat'} = [ Arg11, Arg12, Arg21, Arg22, Arg31, Arg32 ] + if(not @{$self->{'arg_list_flat'}}) { + @{$self->{'arg_list_flat'}} = Arg->new(""); + } + my $argref = $self->{'arg_list_flat'}; + # Number of arguments - used for positional arguments + my $n = $#$argref+1; + + # $self is actually a CommandLine-object, + # but it looks nice to be able to say {= $job->slot() =} + my $job = $self; + # Fish out the words that have replacement strings in them + for my $word ( + fish_out_words_containing_replacement_strings()) { + # word = AB \257< perlexpr \257> CD \257< perlexpr \257> EF + ::debug("replace", "Replacing in $word\n"); + my $normal_replace; + + # for each arg: + # replace replacement strings with replacement in the word value + # push to replace word value + $perl_expressions_as_re ||= + join("|", map {s/^-?\d+//; "\Q$_\E"} keys %{$self->{'replacecount'}}); + for my $arg (@$argref) { + my $val = $word; + # Replace {= perl expr =} with value for each arg + $val =~ s{\257<(-?\d+)?($perl_expressions_as_re)\257>} + { + if($1) { + # Positional replace + # Find the relevant arg and replace it + ($argref->[$1 > 0 ? $1-1 : $n+$1] ? # If defined: replace + $argref->[$1 > 0 ? $1-1 : $n+$1]-> + replace($2,$quote_arg,$self) + : ""); + } else { + # Normal replace + $normal_replace ||= 1; + ($arg ? $arg->replace($2,$quote_arg,$self) : ""); + } + }goxe; + if($quote) { + CORE::push(@{$replace{::shell_quote_scalar($word)}}, + ::shell_quote_scalar($val)); + } else { + CORE::push(@{$replace{$word}}, $val); + } + # No normal replacements => only run once + $normal_replace or last; + } + } + *Arg::arg = []; + if($quote) { + @target = ::shell_quote(@target); + } + if(%replace) { + # Substitute the replace strings with the replacement values + # Must be sorted by length if a short word is a substring of a long word + my $regexp = join('|', map { my $s = $_; $s =~ s/(\W)/\\$1/g; $s } + sort { length $b <=> length $a } keys %replace); + for(@target) { + s/($regexp)/join(" ",@{$replace{$1}})/ge; + } + } + ::debug("replace", "Return @target\n"); + return wantarray ? @target : "@target"; + } +} + +sub skip { + # Skip this job + my $self = shift; + $self->{'skip'} = 1; +} + + +package CommandLineQueue; + +sub new { + my $class = shift; + my $commandref = shift; + my $read_from = shift; + my $context_replace = shift || 0; + my $max_number_of_args = shift; + my $transfer_files = shift; + my $return_files = shift; + my @unget = (); + my ($count,$posrpl,$perlexpr); + my ($replacecount_ref, $len_ref); + my @command = @$commandref; + my $dummy = ''; + my $seq = 1; + # If the first command start with '-' it is probably an option + if($command[0] =~ /^\s*(-\S+)/) { + # Is this really a command in $PATH starting with '-'? + my $cmd = $1; + if(not ::which($cmd)) { + ::error("Command ($cmd) starts with '-'. Is this a wrong option?"); + ::wait_and_exit(255); + } + } + # Replace replacement strings with {= perl expr =} + # '{=' 'perlexpr' '=}' => '{= perlexpr =}' + @command = merge_rpl_parts(@command); + + # Protect matching inside {= perl expr =} + # by replacing {= and =} with \257< and \257> + # in options that can contain replacement strings: + # @command, --transferfile, --return, + # --tagstring, --workdir, --results + for(@command, @$transfer_files, @$return_files, + $opt::tagstring, $opt::workdir, $opt::results) { + # Skip if undefined + $_ or next; + # Disallow \257 to avoid nested {= {= =} =} + if(/\257/) { + ::error("Command cannot contain the character \257. Use a function for that."); + ::wait_and_exit(255); + } + # Needs to match rightmost left parens (Perl defaults to leftmost) + # to deal with: {={==} + while(s{([^\257]*) \Q$Global::parensleft\E ([^\257]*?) \Q$Global::parensright\E } + {$1\257<$2\257>}gx) {} + for my $rpl (sort { length $b <=> length $a } keys %Global::rpl) { + # Replace long --rpl's before short ones, as a short may be a + # substring of a long: + # --rpl '% s/a/b/' --rpl '%% s/b/a/' + # Replace the short hand string (--rpl) + # with the {= perl expr =} + # Avoid replacing inside existing {= perl expr =} + while(s{((^|\257>)[^\257]*?) # Don't replace after \257 unless \257> + \Q$rpl\E} + {$1\257<$Global::rpl{$rpl}\257>}xg) { + } + # Do the same for the positional replacement strings + # A bit harder as we have to put in the position number + $posrpl = $rpl; + if($posrpl =~ s/^\{//) { + # Only do this if the shorthand start with { + while(s{((^|\257>)[^\257]*?) # Don't replace after \257 unless \257> + \{(-?\d+)\Q$posrpl\E} + {$1\257<$3 $Global::rpl{$rpl}\257>}xg) { + } + } + } + } + + # Add {} if no replacement strings in @command + ($replacecount_ref, $len_ref, @command) = + replacement_counts_and_lengths($transfer_files,$return_files,@command); + if("@command" =~ /^[^ \t\n=]*\257append()) { + $seq = $Global::sql->max_seq() + 1; + } + + return bless { + 'unget' => \@unget, + 'command' => \@command, + 'replacecount' => $replacecount_ref, + 'arg_queue' => RecordQueue->new($read_from,$opt::colsep), + 'context_replace' => $context_replace, + 'len' => $len_ref, + 'max_number_of_args' => $max_number_of_args, + 'size' => undef, + 'transfer_files' => $transfer_files, + 'return_files' => $return_files, + 'seq' => $seq, + }, ref($class) || $class; +} + +sub merge_rpl_parts { + # '{=' 'perlexpr' '=}' => '{= perlexpr =}' + # Input: + # @in = the @command as given by the user + # Uses: + # $Global::parensleft + # $Global::parensright + # Returns: + # @command with parts merged to keep {= and =} as one + my @in = @_; + my @out; + my $l = quotemeta($Global::parensleft); + my $r = quotemeta($Global::parensright); + + while(@in) { + my $s = shift @in; + $_ = $s; + # Remove matching (right most) parens + while(s/(.*)$l.*?$r/$1/o) {} + if(/$l/o) { + # Missing right parens + while(@in) { + $s .= " ".shift @in; + $_ = $s; + while(s/(.*)$l.*?$r/$1/o) {} + if(not /$l/o) { + last; + } + } + } + push @out, $s; + } + return @out; +} + +sub replacement_counts_and_lengths { + # Count the number of different replacement strings. + # Find the lengths of context for context groups and non-context + # groups. + # If no {} found in @command: add it to @command + # + # Input: + # \@transfer_files = array of filenames to transfer + # \@return_files = array of filenames to return + # @command = command template + # Output: + # \%replacecount, \%len, @command + my $transfer_files = shift; + my $return_files = shift; + my @command = @_; + my (%replacecount,%len); + my $sum = 0; + while($sum == 0) { + # Count how many times each replacement string is used + my @cmd = @command; + my $contextlen = 0; + my $noncontextlen = 0; + my $contextgroups = 0; + for my $c (@cmd) { + while($c =~ s/ \257<([^\257]*?)\257> /\000/x) { + # %replacecount = { "perlexpr" => number of times seen } + # e.g { "s/a/b/" => 2 } + $replacecount{$1}++; + $sum++; + } + # Measure the length of the context around the {= perl expr =} + # Use that {=...=} has been replaced with \000 above + # So there is no need to deal with \257< + while($c =~ s/ (\S*\000\S*) //x) { + my $w = $1; + $w =~ tr/\000//d; # Remove all \000's + $contextlen += length($w); + $contextgroups++; + } + # All {= perl expr =} have been removed: The rest is non-context + $noncontextlen += length $c; + } + for(@$transfer_files, @$return_files, + $opt::tagstring, $opt::workdir, $opt::results) { + # Options that can contain replacement strings + $_ or next; + my $t = $_; + while($t =~ s/ \257<([^\257]*)\257> //x) { + # %replacecount = { "perlexpr" => number of times seen } + # e.g { "$_++" => 2 } + # But for tagstring we just need to mark it as seen + $replacecount{$1} ||= 1; + } + } + if($opt::bar) { + # If the command does not contain {} force it to be computed + # as it is being used by --bar + $replacecount{""} ||= 1; + } + + $len{'context'} = 0+$contextlen; + $len{'noncontext'} = $noncontextlen; + $len{'contextgroups'} = $contextgroups; + $len{'noncontextgroups'} = @cmd-$contextgroups; + ::debug("length", "@command Context: ", $len{'context'}, + " Non: ", $len{'noncontext'}, " Ctxgrp: ", $len{'contextgroups'}, + " NonCtxGrp: ", $len{'noncontextgroups'}, "\n"); + if($sum == 0) { + if(not @command) { + # Default command = {} + @command = ("\257<\257>"); + } elsif(($opt::pipe or $opt::pipepart) + and not $opt::fifo and not $opt::cat) { + # With --pipe / --pipe-part you can have no replacement + last; + } else { + # Append {} to the command if there are no {...}'s and no {=...=} + push @command, ("\257<\257>"); + } + } + } + return(\%replacecount,\%len,@command); +} + +sub get { + my $self = shift; + if(@{$self->{'unget'}}) { + my $cmd_line = shift @{$self->{'unget'}}; + return ($cmd_line); + } else { + my $cmd_line = CommandLine->new($self->seq(), + $self->{'command'}, + $self->{'arg_queue'}, + $self->{'context_replace'}, + $self->{'max_number_of_args'}, + $self->{'transfer_files'}, + $self->{'return_files'}, + $self->{'replacecount'}, + $self->{'len'}, + ); + if($opt::sqlworker) { + # Get the sequence number from the SQL table + $cmd_line->set_seq($SQL::next_seq); + } + $cmd_line->populate(); + ::debug("init","cmd_line->number_of_args ", + $cmd_line->number_of_args(), "\n"); + if(not $Global::no_more_input and ($opt::pipe or $opt::pipepart)) { + if($cmd_line->replaced() eq "") { + # Empty command - pipe requires a command + ::error("--pipe/--pipepart must have a command to pipe into ". + "(e.g. 'cat')."); + ::wait_and_exit(255); + } + } else { + if($cmd_line->number_of_args() == 0) { + # We did not get more args - maybe at EOF string? + return undef; + } elsif($cmd_line->replaced() eq "") { + # Empty command - get the next instead + return $self->get(); + } + } + $self->set_seq($self->seq()+1); + return $cmd_line; + } +} + +sub unget { + my $self = shift; + unshift @{$self->{'unget'}}, @_; +} + +sub empty { + my $self = shift; + my $empty = (not @{$self->{'unget'}}) && $self->{'arg_queue'}->empty(); + ::debug("run", "CommandLineQueue->empty $empty"); + return $empty; +} + +sub seq { + my $self = shift; + return $self->{'seq'}; +} + +sub set_seq { + my $self = shift; + $self->{'seq'} = shift; +} + +sub quote_args { + my $self = shift; + # If there is not command emulate |bash + return $self->{'command'}; +} + + +package Limits::Command; + +# Maximal command line length (for -m and -X) +sub max_length { + # Find the max_length of a command line and cache it + # Returns: + # number of chars on the longest command line allowed + if(not $Limits::Command::line_max_len) { + # Disk cache of max command line length + my $len_cache = $ENV{'HOME'} . "/.parallel/tmp/linelen-" . ::hostname(); + my $cached_limit; + if(-e $len_cache) { + open(my $fh, "<", $len_cache) || ::die_bug("Cannot read $len_cache"); + $cached_limit = <$fh>; + close $fh; + } else { + $cached_limit = real_max_length(); + # If $HOME is write protected: Do not fail + mkdir($ENV{'HOME'} . "/.parallel"); + mkdir($ENV{'HOME'} . "/.parallel/tmp"); + open(my $fh, ">", $len_cache); + print $fh $cached_limit; + close $fh; + } + $Limits::Command::line_max_len = tmux_length($cached_limit); + if($opt::max_chars) { + if($opt::max_chars * 2 <= $cached_limit) { + # $opt::max_chars quoting causes the length to double + $Limits::Command::line_max_len = $opt::max_chars * 2; + } else { + ::warning("Value for -s option should be < $cached_limit."); + } + } + } + return int($Limits::Command::line_max_len/2); +} + +sub real_max_length { + # Find the max_length of a command line + # Returns: + # The maximal command line length + # Use an upper bound of 8 MB if the shell allows for for infinite long lengths + my $upper = 8_000_000; + my $len = 8; + do { + if($len > $upper) { return $len }; + $len *= 16; + } while (is_acceptable_command_line_length($len)); + # Then search for the actual max length between 0 and upper bound + return binary_find_max_length(int($len/16),$len); +} + +sub binary_find_max_length { + # Given a lower and upper bound find the max_length of a command line + # Returns: + # number of chars on the longest command line allowed + my ($lower, $upper) = (@_); + if($lower == $upper or $lower == $upper-1) { return $lower; } + my $middle = int (($upper-$lower)/2 + $lower); + ::debug("init", "Maxlen: $lower,$upper,$middle : "); + if (is_acceptable_command_line_length($middle)) { + return binary_find_max_length($middle,$upper); + } else { + return binary_find_max_length($lower,$middle); + } +} + +sub is_acceptable_command_line_length { + # Test if a command line of this length can run + # in the current environment + # Returns: + # 0 if the command line length is too long + # 1 otherwise + my $len = shift; + if($ENV{PARALLEL_ENV}) { + $len += length $ENV{PARALLEL_ENV} + (-s $ENV{PARALLEL_ENV})*2; + } + ::qqx("true "."x"x$len); + ::debug("init", "$len=$? "); + return not $?; +} + +sub tmux_length { + # If $opt::tmux set, find the limit for tmux + # tmux 1.8 has a 2kB limit + # tmux 1.9 has a 16kB limit + # tmux 2.0 has a 16kB limit + # tmux 2.1 has a 16kB limit + # tmux 2.2 has a 16kB limit + # Input: + # $len = maximal command line length + # Returns: + # $tmux_len = maximal length runable in tmux + my $len = shift; + if($opt::tmux) { + $ENV{'PARALLEL_TMUX'} ||= "tmux"; + if(not ::which($ENV{'PARALLEL_TMUX'})) { + ::error($ENV{'PARALLEL_TMUX'}." not found in \$PATH."); + ::wait_and_exit(255); + } + my @out; + for my $l (1, 2020, 16320, 100000, $len) { + my $tmpfile = ::tmpname("tms"); + my $tmuxcmd = $ENV{'PARALLEL_TMUX'}." -S $tmpfile new-session -d -n echo $l". + ("x"x$l). " && echo $l; rm -f $tmpfile"; + push @out, ::qqx($tmuxcmd); + ::rm($tmpfile); + } + ::debug("tmux","tmux-out ",@out); + chomp @out; + # The arguments is given 3 times on the command line + # and the wrapping is around 30 chars + # (29 for tmux1.9, 33 for tmux1.8) + my $tmux_len = (::max(@out)); + $len = ::min($len,int($tmux_len/4-33)); + ::debug("tmux","tmux-length ",$len); + } + return $len; +} + + +package RecordQueue; + +sub new { + my $class = shift; + my $fhs = shift; + my $colsep = shift; + my @unget = (); + my $arg_sub_queue; + if($opt::sqlworker) { + # Open SQL table + $arg_sub_queue = SQLRecordQueue->new(); + } elsif(defined $colsep) { + # Open one file with colsep + $arg_sub_queue = RecordColQueue->new($fhs); + } else { + # Open one or more files if multiple -a + $arg_sub_queue = MultifileQueue->new($fhs); + } + return bless { + 'unget' => \@unget, + 'arg_number' => 0, + 'arg_sub_queue' => $arg_sub_queue, + }, ref($class) || $class; +} + +sub get { + # Returns: + # reference to array of Arg-objects + my $self = shift; + if(@{$self->{'unget'}}) { + $self->{'arg_number'}++; + # Flush cached computed replacements in Arg-objects + # To fix: parallel --bar echo {%} ::: a b c ::: d e f + my $ret = shift @{$self->{'unget'}}; + if($ret) { + map { $_->flush_cache() } @$ret; + } + return $ret; + } + my $ret = $self->{'arg_sub_queue'}->get(); + if($ret and + grep { index($_->orig(),"\0") > 0 } @$ret) { + # Allow for \0 in position 0 because GNU Parallel uses "\0" + # to mean no-string + ::warning("a NUL character occurred in the input.", + "It cannot be passed through in the argument list.", + "Did you mean to use the --null option?"); + } + if(defined $Global::max_number_of_args + and $Global::max_number_of_args == 0) { + ::debug("run", "Read 1 but return 0 args\n"); + # \0 => nothing (not the empty string) + return [Arg->new("\0")]; + } else { + # Flush cached computed replacements in Arg-objects + # To fix: parallel --bar echo {%} ::: a b c ::: d e f + if($ret) { + map { $_->flush_cache() } @$ret; + } + return $ret; + } +} + +sub unget { + my $self = shift; + ::debug("run", "RecordQueue-unget '@_'\n"); + $self->{'arg_number'} -= @_; + unshift @{$self->{'unget'}}, @_; +} + +sub empty { + my $self = shift; + my $empty = not @{$self->{'unget'}}; + $empty &&= $self->{'arg_sub_queue'}->empty(); + ::debug("run", "RecordQueue->empty $empty"); + return $empty; +} + +sub arg_number { + my $self = shift; + return $self->{'arg_number'}; +} + + +package RecordColQueue; + +sub new { + my $class = shift; + my $fhs = shift; + my @unget = (); + my $arg_sub_queue = MultifileQueue->new($fhs); + return bless { + 'unget' => \@unget, + 'arg_sub_queue' => $arg_sub_queue, + }, ref($class) || $class; +} + +sub get { + # Returns: + # reference to array of Arg-objects + my $self = shift; + if(@{$self->{'unget'}}) { + return shift @{$self->{'unget'}}; + } + my $unget_ref = $self->{'unget'}; + if($self->{'arg_sub_queue'}->empty()) { + return undef; + } + my $in_record = $self->{'arg_sub_queue'}->get(); + if(defined $in_record) { + my @out_record = (); + for my $arg (@$in_record) { + ::debug("run", "RecordColQueue::arg $arg\n"); + my $line = $arg->orig(); + ::debug("run", "line='$line'\n"); + if($line ne "") { + for my $s (split /$opt::colsep/o, $line, -1) { + push @out_record, Arg->new($s); + } + } else { + push @out_record, Arg->new(""); + } + } + return \@out_record; + } else { + return undef; + } +} + +sub unget { + my $self = shift; + ::debug("run", "RecordColQueue-unget '@_'\n"); + unshift @{$self->{'unget'}}, @_; +} + +sub empty { + my $self = shift; + my $empty = (not @{$self->{'unget'}} and $self->{'arg_sub_queue'}->empty()); + ::debug("run", "RecordColQueue->empty $empty"); + return $empty; +} + + +package SQLRecordQueue; + +sub new { + my $class = shift; + my @unget = (); + return bless { + 'unget' => \@unget, + }, ref($class) || $class; +} + +sub get { + # Returns: + # reference to array of Arg-objects + my $self = shift; + if(@{$self->{'unget'}}) { + return shift @{$self->{'unget'}}; + } + return $Global::sql->get_record(); +} + +sub unget { + my $self = shift; + ::debug("run", "SQLRecordQueue-unget '@_'\n"); + unshift @{$self->{'unget'}}, @_; +} + +sub empty { + my $self = shift; + if(@{$self->{'unget'}}) { return 0; } + my $get = $self->get(); + if(defined $get) { + $self->unget($get); + } + my $empty = not $get; + ::debug("run", "SQLRecordQueue->empty $empty"); + return $empty; +} + + +package MultifileQueue; + +@Global::unget_argv=(); + +sub new { + my $class = shift; + my $fhs = shift; + for my $fh (@$fhs) { + if(-t $fh and -t ($Global::status_fd || *STDERR)) { + ::warning("Input is read from the terminal. You either know what you", + "are doing (in which case: YOU ARE AWESOME!) or you forgot", + "::: or :::: or to pipe data into parallel. If so", + "consider going through the tutorial: man parallel_tutorial", + "Press CTRL-D to exit."); + } + } + return bless { + 'unget' => \@Global::unget_argv, + 'fhs' => $fhs, + 'arg_matrix' => undef, + }, ref($class) || $class; +} + +sub get { + my $self = shift; + if($opt::link) { + return $self->link_get(); + } else { + return $self->nest_get(); + } +} + +sub unget { + my $self = shift; + ::debug("run", "MultifileQueue-unget '@_'\n"); + unshift @{$self->{'unget'}}, @_; +} + +sub empty { + my $self = shift; + my $empty = (not @Global::unget_argv + and not @{$self->{'unget'}}); + for my $fh (@{$self->{'fhs'}}) { + $empty &&= eof($fh); + } + ::debug("run", "MultifileQueue->empty $empty "); + return $empty; +} + +sub link_get { + my $self = shift; + if(@{$self->{'unget'}}) { + return shift @{$self->{'unget'}}; + } + my @record = (); + my $prepend; + my $empty = 1; + for my $fh (@{$self->{'fhs'}}) { + my $arg = read_arg_from_fh($fh); + if(defined $arg) { + # Record $arg for recycling at end of file + push @{$self->{'arg_matrix'}{$fh}}, $arg; + push @record, $arg; + $empty = 0; + } else { + ::debug("run", "EOA "); + # End of file: Recycle arguments + push @{$self->{'arg_matrix'}{$fh}}, shift @{$self->{'arg_matrix'}{$fh}}; + # return last @{$args->{'args'}{$fh}}; + push @record, @{$self->{'arg_matrix'}{$fh}}[-1]; + } + } + if($empty) { + return undef; + } else { + return \@record; + } +} + +sub nest_get { + my $self = shift; + if(@{$self->{'unget'}}) { + return shift @{$self->{'unget'}}; + } + my @record = (); + my $prepend; + my $empty = 1; + my $no_of_inputsources = $#{$self->{'fhs'}} + 1; + if(not $self->{'arg_matrix'}) { + # Initialize @arg_matrix with one arg from each file + # read one line from each file + my @first_arg_set; + my $all_empty = 1; + for (my $fhno = 0; $fhno < $no_of_inputsources ; $fhno++) { + my $arg = read_arg_from_fh($self->{'fhs'}[$fhno]); + if(defined $arg) { + $all_empty = 0; + } + $self->{'arg_matrix'}[$fhno][0] = $arg || Arg->new(""); + push @first_arg_set, $self->{'arg_matrix'}[$fhno][0]; + } + if($all_empty) { + # All filehandles were at eof or eof-string + return undef; + } + return [@first_arg_set]; + } + + # Treat the case with one input source special. For multiple + # input sources we need to remember all previously read values to + # generate all combinations. But for one input source we can + # forget the value after first use. + if($no_of_inputsources == 1) { + my $arg = read_arg_from_fh($self->{'fhs'}[0]); + if(defined($arg)) { + return [$arg]; + } + return undef; + } + for (my $fhno = $no_of_inputsources - 1; $fhno >= 0; $fhno--) { + if(eof($self->{'fhs'}[$fhno])) { + next; + } else { + # read one + my $arg = read_arg_from_fh($self->{'fhs'}[$fhno]); + defined($arg) || next; # If we just read an EOF string: Treat this as EOF + my $len = $#{$self->{'arg_matrix'}[$fhno]} + 1; + $self->{'arg_matrix'}[$fhno][$len] = $arg; + # make all new combinations + my @combarg = (); + for (my $fhn = 0; $fhn < $no_of_inputsources; $fhn++) { + push(@combarg, [0, $#{$self->{'arg_matrix'}[$fhn]}], + # Is input source --link'ed to the next? + $opt::linkinputsource[$fhn+1]); + } + $combarg[2*$fhno] = [$len,$len]; # Find only combinations with this new entry + # map combinations + # [ 1, 3, 7 ], [ 2, 4, 1 ] + # => + # [ m[0][1], m[1][3], m[2][7] ], [ m[0][2], m[1][4], m[2][1] ] + my @mapped; + for my $c (expand_combinations(@combarg)) { + my @a; + for my $n (0 .. $no_of_inputsources - 1 ) { + push @a, $self->{'arg_matrix'}[$n][$$c[$n]]; + } + push @mapped, \@a; + } + # append the mapped to the ungotten arguments + push @{$self->{'unget'}}, @mapped; + # get the first + if(@mapped) { + return shift @{$self->{'unget'}}; + } + } + } + # all are eof or at EOF string; return from the unget queue + return shift @{$self->{'unget'}}; +} + +sub read_arg_from_fh { + # Read one Arg from filehandle + # Returns: + # Arg-object with one read line + # undef if end of file + my $fh = shift; + my $prepend; + my $arg; + do {{ + # This makes 10% faster + if(not ($arg = <$fh>)) { + if(defined $prepend) { + return Arg->new($prepend); + } else { + return undef; + } + } + # Remove delimiter + $arg =~ s:$/$::; + if($Global::end_of_file_string and + $arg eq $Global::end_of_file_string) { + # Ignore the rest of input file + close $fh; + ::debug("run", "EOF-string ($arg) met\n"); + if(defined $prepend) { + return Arg->new($prepend); + } else { + return undef; + } + } + if(defined $prepend) { + $arg = $prepend.$arg; # For line continuation + undef $prepend; + } + if($Global::ignore_empty) { + if($arg =~ /^\s*$/) { + redo; # Try the next line + } + } + if($Global::max_lines) { + if($arg =~ /\s$/) { + # Trailing space => continued on next line + $prepend = $arg; + redo; + } + } + }} while (1 == 0); # Dummy loop {{}} for redo + if(defined $arg) { + return Arg->new($arg); + } else { + ::die_bug("multiread arg undefined"); + } +} + +sub expand_combinations { + # Input: + # ([xmin,xmax], [ymin,ymax], ...) + # Returns: ([x,y,...],[x,y,...]) + # where xmin <= x <= xmax and ymin <= y <= ymax + my $minmax_ref = shift; + my $link = shift; # This is linked to the next input source + my $xmin = $$minmax_ref[0]; + my $xmax = $$minmax_ref[1]; + my @p; + if(@_) { + my @rest = expand_combinations(@_); + if($link) { + # Linked to next col with --link/:::+/::::+ + # TODO BUG does not wrap values if not same number of vals + push(@p, map { [$$_[0], @$_] } + grep { $xmin <= $$_[0] and $$_[0] <= $xmax } @rest); + } else { + # If there are more columns: Compute those recursively + for(my $x = $xmin; $x <= $xmax; $x++) { + push @p, map { [$x, @$_] } @rest; + } + } + } else { + for(my $x = $xmin; $x <= $xmax; $x++) { + push @p, [$x]; + } + } + return @p; +} + + +package Arg; + +sub new { + my $class = shift; + my $orig = shift; + my @hostgroups; + if($opt::hostgroups) { + if($orig =~ s:@(.+)::) { + # We found hostgroups on the arg + @hostgroups = split(/\+/, $1); + if(not grep { defined $Global::hostgroups{$_} } @hostgroups) { + ::warning("No such hostgroup (@hostgroups)."); + @hostgroups = (keys %Global::hostgroups); + } + } else { + @hostgroups = (keys %Global::hostgroups); + } + } + return bless { + 'orig' => $orig, + 'hostgroups' => \@hostgroups, + }, ref($class) || $class; +} + +sub Q { + # Q alias for ::shell_quote_scalar + # Run shell_quote_scalar once to set the reference to the sub + my $a = ::shell_quote_scalar(@_); + *Q = \&::shell_quote_scalar; + return $a; +} + +sub pQ { + # pQ alias for ::perl_quote_scalar + *pQ = \&::perl_quote_scalar; + return pQ(@_); +} + +sub total_jobs { + return $Global::JobQueue->total_jobs() +} + +{ + my %perleval; + my $job; + sub skip { + # shorthand for $job->skip(); + $job->skip(); + } + sub slot { + # shorthand for $job->slot(); + $job->slot(); + } + + sub replace { + # Calculates the corresponding value for a given perl expression + # Returns: + # The calculated string (quoted if asked for) + my $self = shift; + my $perlexpr = shift; # E.g. $_=$_ or s/.gz// + my $quote = (shift) ? 1 : 0; # should the string be quoted? + # This is actually a CommandLine-object, + # but it looks nice to be able to say {= $job->slot() =} + $job = shift; + $perlexpr =~ s/^(-?\d+)? *//; # Positional replace treated as normal replace + if(not $self->{'cache'}{$perlexpr}) { + # Only compute the value once + # Use $_ as the variable to change + local $_; + if($Global::trim eq "n") { + $_ = $self->{'orig'}; + } else { + # Trim the input + $_ = trim_of($self->{'orig'}); + } + ::debug("replace", "eval ", $perlexpr, " ", $_, "\n"); + if(not $perleval{$perlexpr}) { + # Make an anonymous function of the $perlexpr + # And more importantly: Compile it only once + if($perleval{$perlexpr} = + eval('sub { no strict; no warnings; my $job = shift; '. + $perlexpr.' }')) { + # All is good + } else { + # The eval failed. Maybe $perlexpr is invalid perl? + ::error("Cannot use $perlexpr: $@"); + ::wait_and_exit(255); + } + } + # Execute the function + $perleval{$perlexpr}->($job); + $self->{'cache'}{$perlexpr} = $_; + } + # Return the value quoted if needed + return($quote ? ::shell_quote_scalar($self->{'cache'}{$perlexpr}) + : $self->{'cache'}{$perlexpr}); + } +} + +sub flush_cache { + # Flush cache of computed values + my $self = shift; + $self->{'cache'} = undef; +} + +sub orig { + my $self = shift; + return $self->{'orig'}; +} + +sub trim_of { + # Removes white space as specifed by --trim: + # n = nothing + # l = start + # r = end + # lr|rl = both + # Returns: + # string with white space removed as needed + my @strings = map { defined $_ ? $_ : "" } (@_); + my $arg; + if($Global::trim eq "n") { + # skip + } elsif($Global::trim eq "l") { + for my $arg (@strings) { $arg =~ s/^\s+//; } + } elsif($Global::trim eq "r") { + for my $arg (@strings) { $arg =~ s/\s+$//; } + } elsif($Global::trim eq "rl" or $Global::trim eq "lr") { + for my $arg (@strings) { $arg =~ s/^\s+//; $arg =~ s/\s+$//; } + } else { + ::error("--trim must be one of: r l rl lr."); + ::wait_and_exit(255); + } + return wantarray ? @strings : "@strings"; +} + + +package TimeoutQueue; + +sub new { + my $class = shift; + my $delta_time = shift; + my ($pct); + if($delta_time =~ /(\d+(\.\d+)?)%/) { + # Timeout in percent + $pct = $1/100; + $delta_time = 1_000_000; + } + return bless { + 'queue' => [], + 'delta_time' => $delta_time, + 'pct' => $pct, + 'remedian_idx' => 0, + 'remedian_arr' => [], + 'remedian' => undef, + }, ref($class) || $class; +} + +sub delta_time { + my $self = shift; + return $self->{'delta_time'}; +} + +sub set_delta_time { + my $self = shift; + $self->{'delta_time'} = shift; +} + +sub remedian { + my $self = shift; + return $self->{'remedian'}; +} + +sub set_remedian { + # Set median of the last 999^3 (=997002999) values using Remedian + # + # Rousseeuw, Peter J., and Gilbert W. Bassett Jr. "The remedian: A + # robust averaging method for large data sets." Journal of the + # American Statistical Association 85.409 (1990): 97-104. + my $self = shift; + my $val = shift; + my $i = $self->{'remedian_idx'}++; + my $rref = $self->{'remedian_arr'}; + $rref->[0][$i%999] = $val; + $rref->[1][$i/999%999] = (sort @{$rref->[0]})[$#{$rref->[0]}/2]; + $rref->[2][$i/999/999%999] = (sort @{$rref->[1]})[$#{$rref->[1]}/2]; + $self->{'remedian'} = (sort @{$rref->[2]})[$#{$rref->[2]}/2]; +} + +sub update_median_runtime { + # Update delta_time based on runtime of finished job if timeout is + # a percentage + my $self = shift; + my $runtime = shift; + if($self->{'pct'}) { + $self->set_remedian($runtime); + $self->{'delta_time'} = $self->{'pct'} * $self->remedian(); + ::debug("run", "Timeout: $self->{'delta_time'}s "); + } +} + +sub process_timeouts { + # Check if there was a timeout + my $self = shift; + # $self->{'queue'} is sorted by start time + while (@{$self->{'queue'}}) { + my $job = $self->{'queue'}[0]; + if($job->endtime()) { + # Job already finished. No need to timeout the job + # This could be because of --keep-order + shift @{$self->{'queue'}}; + } elsif($job->is_timedout($self->{'delta_time'})) { + # Need to shift off queue before kill + # because kill calls usleep that calls process_timeouts + shift @{$self->{'queue'}}; + $job->kill(); + } else { + # Because they are sorted by start time the rest are later + last; + } + } +} + +sub insert { + my $self = shift; + my $in = shift; + push @{$self->{'queue'}}, $in; +} + + +package SQL; + +sub new { + my $class = shift; + my $dburl = shift; + $Global::use{"DBI"} ||= eval "use DBI; 1;"; + # +DBURL = append to this DBURL + my $append = $dburl=~s/^\+//; + my %options = parse_dburl(get_alias($dburl)); + my %driveralias = ("sqlite" => "SQLite", + "sqlite3" => "SQLite", + "pg" => "Pg", + "postgres" => "Pg", + "postgresql" => "Pg", + "csv" => "CSV", + "oracle" => "Oracle", + "ora" => "Oracle"); + my $driver = $driveralias{$options{'databasedriver'}} || + $options{'databasedriver'}; + my $database = $options{'database'}; + my $host = $options{'host'} ? ";host=".$options{'host'} : ""; + my $port = $options{'port'} ? ";port=".$options{'port'} : ""; + my $dsn = "DBI:$driver:dbname=$database$host$port"; + my $userid = $options{'user'}; + my $password = $options{'password'};; + my $dbh = DBI->connect($dsn, $userid, $password, + { RaiseError => 1, AutoInactiveDestroy => 1 }) + or die $DBI::errstr; + $dbh->{'PrintWarn'} = $Global::debug || 0; + $dbh->{'PrintError'} = $Global::debug || 0; + $dbh->{'RaiseError'} = 1; + $dbh->{'ShowErrorStatement'} = 1; + $dbh->{'HandleError'} = sub {}; + + + return bless { + 'dbh' => $dbh, + 'driver' => $driver, + 'max_number_of_args' => undef, + 'table' => $options{'table'}, + 'append' => $append, + }, ref($class) || $class; +} + +sub get_alias { + my $alias = shift; + $alias =~ s/^(sql:)*//; # Accept aliases prepended with sql: + if ($alias !~ /^:/) { + return $alias; + } + + # Find the alias + my $path; + if (-l $0) { + ($path) = readlink($0) =~ m|^(.*)/|; + } else { + ($path) = $0 =~ m|^(.*)/|; + } + + my @deprecated = ("$ENV{HOME}/.dburl.aliases", + "$path/dburl.aliases", "$path/dburl.aliases.dist"); + for (@deprecated) { + if(-r $_) { + ::warning("$_ is deprecated. ". + "Use .sql/aliases instead (read man sql)."); + } + } + my @urlalias=(); + check_permissions("$ENV{HOME}/.sql/aliases"); + check_permissions("$ENV{HOME}/.dburl.aliases"); + my @search = ("$ENV{HOME}/.sql/aliases", + "$ENV{HOME}/.dburl.aliases", "/etc/sql/aliases", + "$path/dburl.aliases", "$path/dburl.aliases.dist"); + for my $alias_file (@search) { + if(-r $alias_file) { + push @urlalias, `cat "$alias_file"`; + } + } + my ($alias_part,$rest) = $alias=~/(:\w*)(.*)/; + # If we saw this before: we have an alias loop + if(grep {$_ eq $alias_part } @Private::seen_aliases) { + ::error("$alias_part is a cyclic alias."); + exit -1; + } else { + push @Private::seen_aliases, $alias_part; + } + + my $dburl; + for (@urlalias) { + /^$alias_part\s+(\S+.*)/ and do { $dburl = $1; last; } + } + + if($dburl) { + return get_alias($dburl.$rest); + } else { + ::error("$alias is not defined in @search"); + exit(-1); + } +} + +sub check_permissions { + my $file = shift; + + if(-e $file) { + if(not -o $file) { + my $username = (getpwuid($<))[0]; + ::warning("$file should be owned by $username: ". + "chown $username $file"); + } + my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size, + $atime,$mtime,$ctime,$blksize,$blocks) = stat($file); + if($mode & 077) { + my $username = (getpwuid($<))[0]; + ::warning("$file should be only be readable by $username: ". + "chmod 600 $file"); + } + } +} + +sub parse_dburl { + my $url = shift; + my %options = (); + # sql:mysql://[[user][:password]@][host][:port]/[database[/table][?sql query]] + + if($url=~m!(?:sql:)? # You can prefix with 'sql:' + ((?:oracle|ora|mysql|pg|postgres|postgresql)(?:s|ssl|)| + (?:sqlite|sqlite2|sqlite3|csv)):// # Databasedriver ($1) + (?: + ([^:@/][^:@]*|) # Username ($2) + (?: + :([^@]*) # Password ($3) + )? + @)? + ([^:/]*)? # Hostname ($4) + (?: + : + ([^/]*)? # Port ($5) + )? + (?: + / + ([^/?]*)? # Database ($6) + )? + (?: + / + ([^?]*)? # Table ($7) + )? + (?: + \? + (.*)? # Query ($8) + )? + !ix) { + $options{databasedriver} = ::undef_if_empty(lc(uri_unescape($1))); + $options{user} = ::undef_if_empty(uri_unescape($2)); + $options{password} = ::undef_if_empty(uri_unescape($3)); + $options{host} = ::undef_if_empty(uri_unescape($4)); + $options{port} = ::undef_if_empty(uri_unescape($5)); + $options{database} = ::undef_if_empty(uri_unescape($6)); + $options{table} = ::undef_if_empty(uri_unescape($7)); + $options{query} = ::undef_if_empty(uri_unescape($8)); + ::debug("sql","dburl $url\n"); + ::debug("sql","databasedriver ",$options{databasedriver}, " user ", $options{user}, + " password ", $options{password}, " host ", $options{host}, + " port ", $options{port}, " database ", $options{database}, + " table ",$options{table}," query ",$options{query}, "\n"); + + } else { + ::error("$url is not a valid DBURL"); + exit 255; + } + return %options; +} + +sub uri_unescape { + # Copied from http://cpansearch.perl.org/src/GAAS/URI-1.55/URI/Escape.pm + # to avoid depending on URI::Escape + # This section is (C) Gisle Aas. + # Note from RFC1630: "Sequences which start with a percent sign + # but are not followed by two hexadecimal characters are reserved + # for future extension" + my $str = shift; + if (@_ && wantarray) { + # not executed for the common case of a single argument + my @str = ($str, @_); # need to copy + foreach (@str) { + s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg; + } + return @str; + } + $str =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg if defined $str; + $str; +} + +sub run { + my $self = shift; + my $stmt = shift; + if($self->{'driver'} eq "CSV") { + $stmt=~ s/;$//; + if($stmt eq "BEGIN" or + $stmt eq "COMMIT") { + return undef; + } + } + my @retval; + my $dbh = $self->{'dbh'}; + ::debug("sql","$opt::sqlmaster$opt::sqlworker run $stmt\n"); + # Execute with the rest of the args - if any + my $rv; + my $sth; + my $lockretry = 0; + while($lockretry < 10) { + $sth = $dbh->prepare($stmt); + if($sth + and + eval { $rv = $sth->execute(@_) }) { + last; + } else { + if($@ =~ /no such table|Table .* doesn.t exist|relation ".*" does not exist/ + or + $DBI::errstr =~ /no such table|Table .* doesn.t exist|relation ".*" does not exist/) { + # This is fine: + # It is just a worker that reported back too late - + # another worker had finished the job first + # and the table was then dropped + $rv = $sth = 0; + last; + } + if($DBI::errstr =~ /locked/) { + ::debug("sql","Lock retry: $lockretry"); + $lockretry++; + ::usleep(rand()*300); + } elsif(not $sth) { + # Try again + $lockretry++; + } else { + ::error($DBI::errstr); + ::wait_and_exit(255); + } + } + } + if($lockretry >= 10) { + ::die_bug("retry > 10: $DBI::errstr"); + } + if($rv < 0 and $DBI::errstr){ + ::error($DBI::errstr); + ::wait_and_exit(255); + } + return $sth; +} + +sub get { + my $self = shift; + my $sth = $self->run(@_); + my @retval; + # If $sth = 0 it means the table was dropped by another process + while($sth) { + my @row = $sth->fetchrow_array(); + @row or last; + push @retval, \@row; + } + return \@retval; +} + +sub table { + my $self = shift; + return $self->{'table'}; +} + +sub append { + my $self = shift; + return $self->{'append'}; +} + +sub update { + my $self = shift; + my $stmt = shift; + my $table = $self->table(); + $self->run("UPDATE $table $stmt",@_); +} + +sub output { + my $self = shift; + my $commandline = shift; + + $self->update("SET Stdout = ?, Stderr = ? WHERE Seq = ".$commandline->seq(), + join("",@{$commandline->{'output'}{1}}), + join("",@{$commandline->{'output'}{2}})); +} + +sub max_number_of_args { + # Maximal number of args for this table + my $self = shift; + if(not $self->{'max_number_of_args'}) { + # Read the number of args from the SQL table + my $table = $self->table(); + my $v = $self->get("SELECT * FROM $table LIMIT 1;"); + my @reserved_columns = qw(Seq Host Starttime JobRuntime Send + Receive Exitval _Signal Command Stdout Stderr); + if(not $v) { + ::error("$table contains no records"); + } + # Count the number of Vx columns + $self->{'max_number_of_args'} = $#{$v->[0]} - $#reserved_columns; + } + return $self->{'max_number_of_args'}; +} + +sub set_max_number_of_args { + my $self = shift; + $self->{'max_number_of_args'} = shift; +} + +sub create_table { + my $self = shift; + if($self->append()) { return; } + my $max_number_of_args = shift; + $self->set_max_number_of_args($max_number_of_args); + my $table = $self->table(); + $self->run(qq(DROP TABLE IF EXISTS $table;)); + # BIGINT and TEXT are not supported in these databases or are too small + my %vartype = ( + "Oracle" => { "BIGINT" => "NUMBER(19,0)", + "TEXT" => "CLOB", }, + "mysql" => { "TEXT" => "LONGTEXT", }, + "CSV" => { "BIGINT" => "INT", + "FLOAT" => "REAL", }, + ); + my $BIGINT = $vartype{$self->{'driver'}}{"BIGINT"} || "BIGINT"; + my $TEXT = $vartype{$self->{'driver'}}{"TEXT"} || "TEXT"; + my $FLOAT = $vartype{$self->{'driver'}}{"FLOAT"} || "FLOAT(44)"; + my $v_def = join "", map { "V$_ $TEXT," } (1..$self->max_number_of_args()); + $self->run(qq{CREATE TABLE $table + (Seq $BIGINT, + Host $TEXT, + Starttime $FLOAT, + JobRuntime $FLOAT, + Send $BIGINT, + Receive $BIGINT, + Exitval $BIGINT, + _Signal $BIGINT, + Command $TEXT,}. + $v_def. + qq{Stdout $TEXT, + Stderr $TEXT);}); +} + +sub insert_records { + my $self = shift; + my $seq = shift; + my $record_ref = shift; + my $table = $self->table(); + my $v_cols = join ",", map { "V$_" } (1..$self->max_number_of_args()); + # Two extra value due to $seq, Exitval, Send + my $v_vals = join ",", map { "?" } (1..$self->max_number_of_args()+3); + $self->run("INSERT INTO $table (Seq,Exitval,Send,$v_cols) ". + "VALUES ($v_vals);", $seq, -1000, 0, @$record_ref[1..$#$record_ref]); +} + +sub get_record { + my $self = shift; + my @retval; + my $table = $self->table(); + my $v_cols = join ",", map { "V$_" } (1..$self->max_number_of_args()); + my $v = $self->get("SELECT Seq, $v_cols FROM $table ". + "WHERE Exitval = -1000 ORDER BY Seq LIMIT 1;"); + if($v->[0]) { + my $val_ref = $v->[0]; + # Mark record as taken + my $seq = shift @$val_ref; + # Save the sequence number to use when running the job + $SQL::next_seq = $seq; + $self->update("SET Exitval = ? WHERE Seq = ".$seq, -1220); + for (@$val_ref) { + push @retval, Arg->new($_); + } + } + if(@retval) { + return \@retval; + } else { + return undef; + } +} + +sub total_jobs { + my $self = shift; + my $table = $self->table(); + my $v = $self->get("SELECT count(*) FROM $table;"); + if($v->[0]) { + return $v->[0]->[0]; + } else { + ::die_bug("SQL::total_jobs"); + } +} + +sub max_seq { + my $self = shift; + my $table = $self->table(); + my $v = $self->get("SELECT max(Seq) FROM $table;"); + if($v->[0]) { + return $v->[0]->[0]; + } else { + ::die_bug("SQL::max_seq"); + } +} + +sub finished { + # Check if there are any jobs left in the SQL table that do not + # have a "real" exitval + my $self = shift; + if($opt::wait or $Global::start_sqlworker) { + my $table = $self->table(); + my $rv = $self->get("select Seq,Exitval from $table where Exitval <= -1000 limit 1"); + return not $rv->[0]; + } else { + return 1; + } +} + +package Semaphore; + +# This package provides a counting semaphore +# +# If a process dies without releasing the semaphore the next process +# that needs that entry will clean up dead semaphores +# +# The semaphores are stored in ~/.parallel/semaphores/id- Each +# file in ~/.parallel/semaphores/id-/ is the process ID of the +# process holding the entry. If the process dies, the entry can be +# taken by another process. + +sub new { + my $class = shift; + my $id = shift; + my $count = shift; + $id =~ s/([^-_a-z0-9])/unpack("H*",$1)/ige; # Convert non-word chars to hex + $id = "id-".$id; # To distinguish it from a process id + my $parallel_dir = $ENV{'HOME'}."/.parallel"; + -d $parallel_dir or ::mkdir_or_die($parallel_dir); + my $parallel_locks = $parallel_dir."/semaphores"; + -d $parallel_locks or ::mkdir_or_die($parallel_locks); + my $lockdir = "$parallel_locks/$id"; + my $lockfile = $lockdir.".lock"; + if($count < 1) { ::die_bug("semaphore-count: $count"); } + return bless { + 'lockfile' => $lockfile, + 'lockfh' => Symbol::gensym(), + 'lockdir' => $lockdir, + 'id' => $id, + 'idfile' => $lockdir."/".$id, + 'pid' => $$, + 'pidfile' => $lockdir."/".$$.'@'.::hostname(), + 'count' => $count + 1 # nlinks returns a link for the 'id-' as well + }, ref($class) || $class; +} + +sub remove_dead_locks { + my $self = shift; + my $lockdir = $self->{'lockdir'}; + + for my $d (glob "$lockdir/*") { + $d =~ m:$lockdir/([0-9]+)\@([-\._a-z0-9]+)$:o or next; + my ($pid, $host) = ($1, $2); + if($host eq ::hostname()) { + if(kill 0, $pid) { + ::debug("sem", "Alive: $pid $d\n"); + } else { + ::debug("sem", "Dead: $d\n"); + ::rm($d); + } + } + } +} + +sub acquire { + my $self = shift; + my $sleep = 1; # 1 ms + my $start_time = time; + while(1) { + # Can we get a lock? + $self->atomic_link_if_count_less_than() and last; + $self->remove_dead_locks(); + # Retry slower and slower up to 1 second + $sleep = ($sleep < 1000) ? ($sleep * 1.1) : ($sleep); + # Random to avoid every sleeping job waking up at the same time + ::usleep(rand()*$sleep); + if($opt::semaphoretimeout) { + if($opt::semaphoretimeout > 0 + and + time - $start_time > $opt::semaphoretimeout) { + # Timeout: Take the semaphore anyway + ::warning("Semaphore timed out. Stealing the semaphore."); + if(not -e $self->{'idfile'}) { + open (my $fh, ">", $self->{'idfile'}) or + ::die_bug("timeout_write_idfile: $self->{'idfile'}"); + close $fh; + } + link $self->{'idfile'}, $self->{'pidfile'}; + last; + } + if($opt::semaphoretimeout < 0 + and + time - $start_time > -$opt::semaphoretimeout) { + # Timeout: Exit + ::warning("Semaphore timed out. Exiting."); + exit(1); + last; + } + } + } + ::debug("sem", "acquired $self->{'pid'}\n"); +} + +sub release { + my $self = shift; + ::rm($self->{'pidfile'}); + if($self->nlinks() == 1) { + # This is the last link, so atomic cleanup + $self->lock(); + if($self->nlinks() == 1) { + ::rm($self->{'idfile'}); + rmdir $self->{'lockdir'}; + } + $self->unlock(); + } + ::debug("run", "released $self->{'pid'}\n"); +} + +sub pid_change { + # This should do what release()+acquire() would do without having + # to re-acquire the semaphore + my $self = shift; + + my $old_pidfile = $self->{'pidfile'}; + $self->{'pid'} = $$; + $self->{'pidfile'} = $self->{'lockdir'}."/".$$.'@'.::hostname(); + my $retval = link $self->{'idfile'}, $self->{'pidfile'}; + ::debug("sem","link($self->{'idfile'},$self->{'pidfile'})=$retval\n"); + ::rm($old_pidfile); +} + +sub atomic_link_if_count_less_than { + # Link $file1 to $file2 if nlinks to $file1 < $count + my $self = shift; + my $retval = 0; + $self->lock(); + my $nlinks = $self->nlinks(); + ::debug("sem","$nlinks<$self->{'count'} "); + if($nlinks < $self->{'count'}) { + -d $self->{'lockdir'} or ::mkdir_or_die($self->{'lockdir'}); + if(not -e $self->{'idfile'}) { + open (my $fh, ">", $self->{'idfile'}) or + ::die_bug("write_idfile: $self->{'idfile'}"); + close $fh; + } + $retval = link $self->{'idfile'}, $self->{'pidfile'}; + ::debug("sem","link($self->{'idfile'},$self->{'pidfile'})=$retval\n"); + } + $self->unlock(); + ::debug("sem", "atomic $retval"); + return $retval; +} + +sub nlinks { + my $self = shift; + if(-e $self->{'idfile'}) { + return (stat(_))[3]; + } else { + return 0; + } +} + +sub lock { + my $self = shift; + my $sleep = 100; # 100 ms + my $total_sleep = 0; + $Global::use{"Fcntl"} ||= eval "use Fcntl qw(:DEFAULT :flock); 1;"; + my $locked = 0; + while(not $locked) { + if(tell($self->{'lockfh'}) == -1) { + # File not open + open($self->{'lockfh'}, ">", $self->{'lockfile'}) + or ::debug("run", "Cannot open $self->{'lockfile'}"); + } + if($self->{'lockfh'}) { + # File is open + chmod 0666, $self->{'lockfile'}; # assuming you want it a+rw + if(flock($self->{'lockfh'}, LOCK_EX()|LOCK_NB())) { + # The file is locked: No need to retry + $locked = 1; + last; + } else { + if ($! =~ m/Function not implemented/) { + ::warning("flock: $!", + "Will wait for a random while."); + ::usleep(rand(5000)); + # File cannot be locked: No need to retry + $locked = 2; + last; + } + } + } + # Locking failed in first round + # Sleep and try again + $sleep = ($sleep < 1000) ? ($sleep * 1.1) : ($sleep); + # Random to avoid every sleeping job waking up at the same time + ::usleep(rand()*$sleep); + $total_sleep += $sleep; + if($opt::semaphoretimeout) { + if($opt::semaphoretimeout > 0 + and + $total_sleep/1000 > $opt::semaphoretimeout) { + # Timeout: Take the semaphore anyway + ::warning("Semaphore timed out. Taking the semaphore."); + $locked = 3; + last; + } + if($opt::semaphoretimeout < 0 + and + $total_sleep/1000 > -$opt::semaphoretimeout) { + # Timeout: Exit + ::warning("Semaphore timed out. Exiting."); + $locked = 4; + last; + } + } else { + if($total_sleep/1000 > 30) { + ::warning("Semaphore stuck for 30 seconds. Consider using --semaphoretimeout."); + } + } + } + ::debug("run", "locked $self->{'lockfile'}"); +} + +sub unlock { + my $self = shift; + ::rm($self->{'lockfile'}); + close $self->{'lockfh'}; + ::debug("run", "unlocked\n"); +} + +# Keep perl -w happy + +$opt::x = $Semaphore::timeout = $Semaphore::wait = +$Job::file_descriptor_warning_printed = $Global::envdef = @Arg::arg = +$Global::max_slot_number; diff --git a/src/components/left-nav.js b/src/components/left-nav.js index 6a2ed406a29..88c3da4887c 100644 --- a/src/components/left-nav.js +++ b/src/components/left-nav.js @@ -4,14 +4,13 @@ import VersionDropdown from "./version-dropdown"; import { products } from "../constants/products"; import { Link, PdfDownload, TreeNode } from "./"; -const productIcon = (path) => { - const product = path.split("/")[1]; +const productIcon = (product) => { return products[product] ? products[product].iconName : null; }; -const SectionHeading = ({ navTree, path, iconName }) => { +const SectionHeading = ({ navTree, path, iconName, product }) => { // if iconName starts with "edb_postgres_ai" then set the fill color to black - let myIconName = iconName || productIcon(path) || iconNames.DOTTED_BOX; + let myIconName = iconName || productIcon(product) || iconNames.DOTTED_BOX; let className = "fill-orange me-3"; if (myIconName && myIconName.startsWith("edb_postgres_ai")) { className = "fill-aquamarine me-3"; @@ -49,6 +48,7 @@ const SectionHeadingWithVersions = ({ path, versionArray, iconName, + product, hideVersion, }) => { return ( @@ -58,7 +58,7 @@ const SectionHeadingWithVersions = ({ className="d-block py-1 align-middle balance-text h5 m-0 text-dark" > ) : ( - + )} {navTree.items.map((node) => ( ))}
  • - +
  • ); diff --git a/src/components/pdf-download.js b/src/components/pdf-download.js index b02728f95ef..8f257f0e46a 100644 --- a/src/components/pdf-download.js +++ b/src/components/pdf-download.js @@ -3,50 +3,53 @@ import { useStaticQuery, graphql } from "gatsby"; import Icon, { iconNames } from "./icon"; import usePathPrefix from "../hooks/use-path-prefix"; -const PdfDownload = ({ pagePath, hidePDF }) => { +const PdfDownload = ({ pagePath, product, version, hidePDF }) => { const data = useStaticQuery(graphql` - { - allPublicFile(filter: { ext: { eq: ".pdf" } }) { + query { + allMdx(filter: { frontmatter: { pdf: { eq: true } } }) { nodes { - absolutePath - urlPath + fields { + path + } } } } `); - const productPath = pagePath.split("/").slice(0, 3).join("/"); - - const file = data.allPublicFile.nodes.find((pdf) => { - const productVersionPath = pdf.absolutePath - .split("/") - .slice(-3, -1) - .join("/"); - return `/${productVersionPath}` === productPath; - }); - const pathPrefix = usePathPrefix(); if (hidePDF) { return null; } - if (file) { - return ( - - ); - } - return null; + const pdfBase = data.allMdx.nodes.find((pdfIndex) => { + const basePath = pdfIndex.fields.path; + return pagePath.startsWith(basePath); + })?.fields?.path; + + if (!pdfBase || !product) return null; + + const pdfFile = + pathPrefix + + "/pdfs" + + pdfBase + + product + + (version ? "_v" + version : "") + + "_documentation.pdf"; + + return ( + + ); }; export default PdfDownload; diff --git a/src/templates/doc.js b/src/templates/doc.js index bec7b486a56..cace84c5244 100644 --- a/src/templates/doc.js +++ b/src/templates/doc.js @@ -238,6 +238,8 @@ const DocTemplate = ({ data, pageContext }) => { iconName={iconName} hideVersion={frontmatter.hideVersion} hidePDF={hidePDF} + product={product} + version={version} /> diff --git a/src/templates/learn-doc.js b/src/templates/learn-doc.js index 91132384f08..022babcb0dd 100644 --- a/src/templates/learn-doc.js +++ b/src/templates/learn-doc.js @@ -202,6 +202,8 @@ const LearnDocTemplate = ({ data, pageContext }) => { path={mdx.fields.path} pagePath={pagePath} iconName={iconName} + product={frontmatter.product} + version={frontmatter.version} /> From d7b974486398f297fe8510b69b264ee59579c8fb Mon Sep 17 00:00:00 2001 From: Josh Heyer Date: Mon, 13 Jan 2025 17:28:55 +0000 Subject: [PATCH 3/4] Switch to cache for transferring data --- .github/workflows/build-pages.yml | 20 +++++++----- .github/workflows/build-pdfs.yml | 28 ++++++++-------- .github/workflows/deploy-draft.yml | 9 +++++- .github/workflows/deploy-to-netlify.yml | 43 +++++++++++++++++++++---- 4 files changed, 70 insertions(+), 30 deletions(-) diff --git a/.github/workflows/build-pages.yml b/.github/workflows/build-pages.yml index 75e4e6e03d7..1cfb0406b3b 100644 --- a/.github/workflows/build-pages.yml +++ b/.github/workflows/build-pages.yml @@ -20,10 +20,16 @@ on: description: SHA, should correspond to ref required: true type: string + outputs: + cache-key: + description: key to use to pull built pages from cache + value: ${{ jobs.build.outputs.cache-key }} jobs: build: runs-on: docs-16c-64gb-600gb + outputs: + cache-key: ${{ steps.cache-key.outputs.key }} steps: - uses: actions/checkout@v4 with: @@ -54,6 +60,11 @@ jobs: - name: Run NPM install scripts run: | npm rebuild + + - name: Compose cache key + id: cache-key + run: | + echo "key=${{ runner.os }}-gatsby-build-${{ hashFiles('package.json', 'gatsby-config.js', 'gatsby-node.js') }}-${{ inputs.ref }}-${{ inputs.sha }}" >> "$GITHUB_OUTPUT" - name: Checking Gatsby cache id: gatsby-cache-build @@ -63,7 +74,7 @@ jobs: public/* !public/pdfs .cache - key: ${{ runner.os }}-gatsby-build-${{ hashFiles('package.json', 'gatsby-config.js', 'gatsby-node.js') }}-${{ inputs.ref }}-${{ inputs.sha }} + key: ${{ steps.cache-key.outputs.key }} restore-keys: | ${{ runner.os }}-gatsby-build-${{ hashFiles('package.json', 'gatsby-config.js', 'gatsby-node.js') }}-${{ inputs.ref }} ${{ runner.os }}-gatsby-build-${{ hashFiles('package.json', 'gatsby-config.js', 'gatsby-node.js') }} @@ -85,10 +96,3 @@ jobs: ALGOLIA_INDEX_NAME: edb-docs-staging INDEX_ON_BUILD: ${{ inputs.ref == 'develop' }} - - name: Store pages - uses: actions/upload-artifact@v4 - with: - name: pages-build - path: | - public/* - !public/pdfs diff --git a/.github/workflows/build-pdfs.yml b/.github/workflows/build-pdfs.yml index eeb26a6fb34..1577451b948 100644 --- a/.github/workflows/build-pdfs.yml +++ b/.github/workflows/build-pdfs.yml @@ -20,10 +20,16 @@ on: description: SHA, should correspond to ref required: true type: string - + outputs: + cache-key: + description: key to use to pull built pages from cache + value: ${{ jobs.build.outputs.cache-key }} + jobs: build: runs-on: docs-16c-64gb-600gb + outputs: + cache-key: ${{ steps.cache-key.outputs.key }} steps: - uses: actions/checkout@v4 with: @@ -61,6 +67,11 @@ jobs: run: | npm install + - name: Compose cache key + id: cache-key + run: | + echo "key=${{ runner.os }}-build-pdfs-${{ hashFiles('scripts/pdf/*', 'scripts/pdf/lib/*') }}-${{ inputs.sha }}" >> "$GITHUB_OUTPUT" + - name: Checking PDF cache id: pdf-cache-build uses: actions/cache@v4 @@ -70,22 +81,9 @@ jobs: product_docs/**/*.pdf-hash advocacy_docs/**/*.pdf advocacy_docs/**/*.pdf-hash - key: ${{ runner.os }}-build-pdfs-${{ hashFiles('scripts/pdf/*', 'scripts/pdf/lib/*') }}-${{ inputs.sha }} + key: ${{ steps.cache-key.outputs.key }} restore-keys: | ${{ runner.os }}-build-pdfs-${{ hashFiles('scripts/pdf/*', 'scripts/pdf/lib/*') }} - name: Build all pdfs run: npm run pdf:build-all-ci - - - name: Copy pdfs to build output - run: | - mkdir -p public/pdfs - shopt -s globstar - rsync -avm --filter="+ */" --filter="-! *.pdf" advocacy_docs/ public/pdfs/ - rsync -avm --filter="+ */" --filter="-! *.pdf" product_docs/docs/ public/pdfs/ - - - name: Store pdfs - uses: actions/upload-artifact@v4 - with: - name: pdf-build - path: public/* diff --git a/.github/workflows/deploy-draft.yml b/.github/workflows/deploy-draft.yml index 78c7c8354d7..30e3d6a4526 100644 --- a/.github/workflows/deploy-draft.yml +++ b/.github/workflows/deploy-draft.yml @@ -38,12 +38,17 @@ jobs: # this job will fail only if build-pdfs failed (not if it was skipped) # however, it will set an output - check-has-pdfs.steps.check.has-pdfs - true if PDFs were generated check-has-pdfs: + runs-on: ubuntu-22.04 needs: build-pdfs if: ${{ always() }} - runs-on: ubuntu-22.04 + outputs: + cache-key: ${{ steps.check.outputs.cache-key }} + has-pdfs: ${{ steps.check.outputs.has-pdfs }} steps: - name: check + id: check run: | + echo "cache-key=${{ needs.build-pdfs.outputs.cache-key }}" >> "$GITHUB_OUTPUT" echo "has-pdfs=${{ needs.build-pdfs.result == 'success' }}" >> "$GITHUB_OUTPUT" exit ${{ needs.build-pdfs.result == 'failure' && 1 || 0 }} @@ -53,6 +58,8 @@ jobs: if: ${{ !cancelled() && needs.build-pages.result == 'success' && needs.check-has-pdfs.result == 'success' }} uses: ./.github/workflows/deploy-to-netlify.yml with: + pages-cache-key: ${{ needs.build-pages.outputs.cache-key }} + pdf-cache-key: ${{ needs.check-has-pdfs.outputs.cache-key }} enable-commit-comment: false alias: deploy-preview-${{ github.event.number }} secrets: inherit diff --git a/.github/workflows/deploy-to-netlify.yml b/.github/workflows/deploy-to-netlify.yml index 94c922effd9..3e08396aee3 100644 --- a/.github/workflows/deploy-to-netlify.yml +++ b/.github/workflows/deploy-to-netlify.yml @@ -3,6 +3,14 @@ on: workflow_dispatch: workflow_call: inputs: + pages-cache-key: + description: key for pages cache + required: true + type: string + pdf-cache-key: + description: key for pdf cache + required: false + type: string enable-pull-request-comment: description: whether to comment on the triggering PR required: false @@ -20,14 +28,37 @@ on: jobs: deploy: - runs-on: ubuntu-22.04 + runs-on: docs-16c-64gb-600gb steps: - - name: Pull build artifacts - uses: actions/download-artifact@v4 - with: - path: ./public - merge-multiple: true + - name: Pull pages cache + uses: actions/cache/restore@v4 + with: + fail-on-cache-miss: true + path: | + public/* + !public/pdfs + .cache + key: ${{ inputs.pages-cache-key }} + + - name: Pull PDF cache + uses: actions/cache/restore@v4 + with: + path: | + product_docs/**/*.pdf + product_docs/**/*.pdf-hash + advocacy_docs/**/*.pdf + advocacy_docs/**/*.pdf-hash + key: ${{ inputs.pdf-cache-key }} + + - name: Copy pdfs to build output + run: | + sudo apt update + sudo apt install -y rsync + mkdir -p public/pdfs + rsync -avm --filter="+ */" --filter="-! *.pdf" advocacy_docs/ public/pdfs/ + rsync -avm --filter="+ */" --filter="-! *.pdf" product_docs/docs/ public/pdfs/ + - name: list run: ls -lfR ./public From de454166cd0d022e3b5be71bdabc1f4f11f92a0d Mon Sep 17 00:00:00 2001 From: Josh Heyer Date: Tue, 14 Jan 2025 04:51:49 +0000 Subject: [PATCH 4/4] expand to staging & production builds --- .github/workflows/build-pages.yml | 11 ++- .github/workflows/deploy-develop.yml | 123 ++++++------------------ .github/workflows/deploy-draft.yml | 5 +- .github/workflows/deploy-main.yml | 112 ++++++--------------- .github/workflows/deploy-to-netlify.yml | 17 +++- gatsby-config.js | 2 +- 6 files changed, 87 insertions(+), 183 deletions(-) diff --git a/.github/workflows/build-pages.yml b/.github/workflows/build-pages.yml index 1cfb0406b3b..4862b879ffa 100644 --- a/.github/workflows/build-pages.yml +++ b/.github/workflows/build-pages.yml @@ -20,6 +20,10 @@ on: description: SHA, should correspond to ref required: true type: string + gtm-id: + description: Google Tag Manager ID + required: false + type: string outputs: cache-key: description: key to use to pull built pages from cache @@ -86,13 +90,14 @@ jobs: - name: Gatsby build run: npm run build env: - APP_ENV: staging + APP_ENV: ${{ inputs.ref == 'main' && 'production' || 'staging' }} NODE_ENV: ${{ vars.NODE_ENV }} NODE_OPTIONS: --max-old-space-size=4096 FATHOM_SITE_ID: ${{ vars.FATHOM_SITE_ID }} ALGOLIA_API_KEY: ${{ secrets.ALGOLIA_API_KEY }} ALGOLIA_SEARCH_ONLY_KEY: ${{ vars.ALGOLIA_SEARCH_ONLY_KEY }} ALGOLIA_APP_ID: ${{ vars.ALGOLIA_APP_ID }} - ALGOLIA_INDEX_NAME: edb-docs-staging - INDEX_ON_BUILD: ${{ inputs.ref == 'develop' }} + ALGOLIA_INDEX_NAME: ${{ inputs.ref == 'refs/heads/main' && 'edb-docs' || 'edb-docs-staging' }} + INDEX_ON_BUILD: ${{ inputs.ref == 'refs/heads/develop' || inputs.ref == 'refs/heads/main' }} + GTM_ID: ${{ inputs.gtm-id }} diff --git a/.github/workflows/deploy-develop.yml b/.github/workflows/deploy-develop.yml index bebf7b7777a..538c8c3aff2 100644 --- a/.github/workflows/deploy-develop.yml +++ b/.github/workflows/deploy-develop.yml @@ -10,101 +10,38 @@ concurrency: cancel-in-progress: true jobs: - build-deploy: - runs-on: docs-16c-64gb-600gb + build-pages: + uses: ./.github/workflows/build-pages.yml + with: + ref: ${{ github.ref }} + sha: ${{ github.sha }} + secrets: inherit + + build-pdfs: + uses: ./.github/workflows/build-pdfs.yml + with: + ref: ${{ github.ref }} + sha: ${{ github.sha }} + + deploy-to-netlify: + needs: [build-pages, build-pdfs] + uses: ./.github/workflows/deploy-to-netlify.yml + with: + production: true + pages-cache-key: ${{ needs.build-pages.outputs.cache-key }} + pdf-cache-key: ${{ needs.build-pdfs.outputs.cache-key }} + enable-pull-request-comment: false + secrets: + NETLIFY_SITE_ID: ${{ secrets.NETLIFY_DEVELOP_SITE_ID }} + NETLIFY_AUTH_TOKEN: ${{ secrets.NETLIFY_AUTH_TOKEN }} + + report-failure: + needs: [deploy-to-netlify] + if: ${{ failure() }} + runs-on: ubuntu steps: - - uses: actions/checkout@v4 - with: - fetch-depth: 0 # fetch whole repo so git-restore-mtime can work - lfs: true - - - name: Adjust file watchers limit - run: echo fs.inotify.max_user_watches=524288 | sudo tee -a /etc/sysctl.conf && sudo sysctl -p - - - uses: actions/setup-node@v4 - with: - node-version-file: ".nvmrc" - cache: "npm" - cache-dependency-path: "package-lock.json" - env: - NODE_ENV: ${{ vars.NODE_ENV }} - NPM_TOKEN: ${{ secrets.NPM_TOKEN }} - - - name: Install dependencies - run: | - npm run presetup - npm ci --ignore-scripts - env: - NODE_ENV: ${{ vars.NODE_ENV }} - NPM_TOKEN: ${{ secrets.NPM_TOKEN }} - - - name: Run NPM install scripts - run: | - npm rebuild - - - name: Checking Gatsby cache - id: gatsby-cache-build - uses: actions/cache@v4 - with: - path: | - public/* - !public/pdfs - .cache - key: ${{ runner.os }}-gatsby-build-develop-${{ hashFiles('package.json', 'gatsby-config.js', 'gatsby-node.js') }}-${{ github.sha }} - restore-keys: | - ${{ runner.os }}-gatsby-build-develop-${{ hashFiles('package.json', 'gatsby-config.js', 'gatsby-node.js') }} - ${{ runner.os }}-gatsby-build-develop - - - uses: actions/setup-python@v5 - with: - python-version: "3.11.6" - - uses: r-lib/actions/setup-pandoc@v2 - with: - pandoc-version: "2.14.1" - - name: Install wkhtmltopdf - run: | - curl -L https://github.com/wkhtmltopdf/packaging/releases/download/0.12.6-1/wkhtmltox_0.12.6-1.focal_amd64.deb > wkhtmltopdf.deb - sudo apt-get install ./wkhtmltopdf.deb - - name: Install Python dependencies - run: pip install -r requirements-ci.txt - - name: Install PDF Node dependencies - working-directory: ./scripts/pdf - run: | - npm install - - name: Build all pdfs - run: npm run pdf:build-all-ci - - - name: Fix mtimes - run: npm run fix-mtimes - - - name: Gatsby build - run: npm run build - env: - APP_ENV: staging - NODE_ENV: ${{ vars.NODE_ENV }} - NODE_OPTIONS: --max-old-space-size=4096 - FATHOM_SITE_ID: ${{ vars.FATHOM_SITE_ID }} - ALGOLIA_API_KEY: ${{ secrets.ALGOLIA_API_KEY }} - ALGOLIA_SEARCH_ONLY_KEY: ${{ vars.ALGOLIA_SEARCH_ONLY_KEY }} - ALGOLIA_APP_ID: ${{ vars.ALGOLIA_APP_ID }} - ALGOLIA_INDEX_NAME: edb-docs-staging - INDEX_ON_BUILD: true - - - name: Deploy to Netlify - id: netlify-deploy - uses: nwtgck/actions-netlify@v3 - with: - publish-dir: "./public" - production-branch: "develop" - github-token: ${{ secrets.GITHUB_TOKEN }} - enable-commit-comment: true - env: - NETLIFY_AUTH_TOKEN: ${{ secrets.NETLIFY_AUTH_TOKEN }} - NETLIFY_SITE_ID: ${{ secrets.NETLIFY_DEVELOP_SITE_ID }} - - uses: act10ns/slack@v2 with: - status: ${{ job.status }} - if: failure() + status: ${{ needs.deploy-to-netlify.result }} env: SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }} diff --git a/.github/workflows/deploy-draft.yml b/.github/workflows/deploy-draft.yml index 30e3d6a4526..cde5298fcb6 100644 --- a/.github/workflows/deploy-draft.yml +++ b/.github/workflows/deploy-draft.yml @@ -62,4 +62,7 @@ jobs: pdf-cache-key: ${{ needs.check-has-pdfs.outputs.cache-key }} enable-commit-comment: false alias: deploy-preview-${{ github.event.number }} - secrets: inherit + secrets: + NETLIFY_SITE_ID: ${{ secrets.NETLIFY_DEVELOP_SITE_ID }} + NETLIFY_AUTH_TOKEN: ${{ secrets.NETLIFY_AUTH_TOKEN }} + diff --git a/.github/workflows/deploy-main.yml b/.github/workflows/deploy-main.yml index d19661dd647..644e6e8b7c9 100644 --- a/.github/workflows/deploy-main.yml +++ b/.github/workflows/deploy-main.yml @@ -10,90 +10,40 @@ concurrency: cancel-in-progress: true jobs: - build-deploy: - runs-on: docs-16c-64gb-600gb + build-pages: + uses: ./.github/workflows/build-pages.yml + with: + ref: ${{ github.ref }} + sha: ${{ github.sha }} + gtm-id: GTM-5W8M67 + secrets: inherit + + build-pdfs: + uses: ./.github/workflows/build-pdfs.yml + with: + ref: ${{ github.ref }} + sha: ${{ github.sha }} + + deploy-to-netlify: + needs: [build-pages, build-pdfs] + uses: ./.github/workflows/deploy-to-netlify.yml + with: + production: true + pages-cache-key: ${{ needs.build-pages.outputs.cache-key }} + pdf-cache-key: ${{ needs.build-pdfs.outputs.cache-key }} + enable-pull-request-comment: false + secrets: + NETLIFY_SITE_ID: ${{ secrets.NETLIFY_MAIN_SITE_ID }} + NETLIFY_AUTH_TOKEN: ${{ secrets.NETLIFY_AUTH_TOKEN }} + + report-results: + needs: [deploy-to-netlify] + if: ${{ !cancelled() }} + runs-on: ubuntu steps: - - uses: actions/checkout@v4 - with: - ref: main - fetch-depth: 0 # fetch whole repo so git-restore-mtime can work - lfs: true - - - name: Adjust file watchers limit - run: echo fs.inotify.max_user_watches=524288 | sudo tee -a /etc/sysctl.conf && sudo sysctl -p - - - uses: actions/setup-node@v4 - with: - node-version-file: ".nvmrc" - cache: "npm" - cache-dependency-path: "package-lock.json" - env: - NODE_ENV: ${{ vars.NODE_ENV }} - NPM_TOKEN: ${{ secrets.NPM_TOKEN }} - - - name: Install dependencies - run: | - npm run presetup - npm ci --ignore-scripts - env: - NODE_ENV: ${{ vars.NODE_ENV }} - NPM_TOKEN: ${{ secrets.NPM_TOKEN }} - - - name: Run NPM install scripts - run: | - npm rebuild - - - uses: actions/setup-python@v5 - with: - python-version: "3.11.6" - - uses: r-lib/actions/setup-pandoc@v2 - with: - pandoc-version: "2.14.1" - - name: Install wkhtmltopdf - run: | - curl -L https://github.com/wkhtmltopdf/packaging/releases/download/0.12.6-1/wkhtmltox_0.12.6-1.focal_amd64.deb > wkhtmltopdf.deb - sudo apt-get install ./wkhtmltopdf.deb - - name: Install Python dependencies - run: pip install -r requirements-ci.txt - - name: Install PDF Node dependencies - working-directory: ./scripts/pdf - run: | - npm install - - name: Build all pdfs - run: npm run pdf:build-all-ci - - - name: Fix mtimes - run: npm run fix-mtimes - - - name: Gatsby build - run: npm run build - env: - APP_ENV: production - NODE_ENV: ${{ vars.NODE_ENV }} - NODE_OPTIONS: --max-old-space-size=4096 - FATHOM_SITE_ID: ${{ vars.FATHOM_SITE_ID }} - ALGOLIA_API_KEY: ${{ secrets.ALGOLIA_API_KEY }} - ALGOLIA_SEARCH_ONLY_KEY: ${{ vars.ALGOLIA_SEARCH_ONLY_KEY }} - ALGOLIA_APP_ID: ${{ vars.ALGOLIA_APP_ID }} - ALGOLIA_INDEX_NAME: edb-docs - GTM_ID: GTM-5W8M67 - INDEX_ON_BUILD: true - - - name: Deploy to Netlify - id: netlify-deploy - uses: nwtgck/actions-netlify@v3 - with: - publish-dir: "./public" - production-branch: "main" - github-token: ${{ secrets.GITHUB_TOKEN }} - enable-commit-comment: true - env: - NETLIFY_AUTH_TOKEN: ${{ secrets.NETLIFY_AUTH_TOKEN }} - NETLIFY_SITE_ID: ${{ secrets.NETLIFY_MAIN_SITE_ID }} - - uses: act10ns/slack@v2 with: - status: ${{ job.status }} + status: ${{ needs.deploy-to-netlify.result }} if: always() env: SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }} diff --git a/.github/workflows/deploy-to-netlify.yml b/.github/workflows/deploy-to-netlify.yml index 3e08396aee3..35b3ff137b1 100644 --- a/.github/workflows/deploy-to-netlify.yml +++ b/.github/workflows/deploy-to-netlify.yml @@ -25,6 +25,17 @@ on: description: name for the deployment (used to set custom / stable Netlify url) required: false type: string + production: + description: whether to mark this deployment as the current production deploy for the site + required: false + default: false + type: boolean + secrets: + NETLIFY_SITE_ID: + description: Netlify site ID to deploy to + required: true + NETLIFY_AUTH_TOKEN: + required: true jobs: deploy: @@ -59,9 +70,6 @@ jobs: rsync -avm --filter="+ */" --filter="-! *.pdf" advocacy_docs/ public/pdfs/ rsync -avm --filter="+ */" --filter="-! *.pdf" product_docs/docs/ public/pdfs/ - - name: list - run: ls -lfR ./public - - name: Deploy to Netlify id: netlify-deploy uses: nwtgck/actions-netlify@v3 @@ -71,6 +79,7 @@ jobs: enable-pull-request-comment: ${{ inputs.enable-pull-request-comment }} enable-commit-comment: ${{ inputs.enable-commit-comment }} alias: ${{ inputs.alias }} + production-deploy: ${{ inputs.production }} env: NETLIFY_AUTH_TOKEN: ${{ secrets.NETLIFY_AUTH_TOKEN }} - NETLIFY_SITE_ID: ${{ secrets.NETLIFY_DEVELOP_SITE_ID }} + NETLIFY_SITE_ID: ${{ secrets.NETLIFY_SITE_ID }} \ No newline at end of file diff --git a/gatsby-config.js b/gatsby-config.js index d2ce185befe..26535bd3fe3 100644 --- a/gatsby-config.js +++ b/gatsby-config.js @@ -374,7 +374,7 @@ module.exports = { { resolve: "gatsby-plugin-google-tagmanager", options: { - id: process.env.GTM_ID, + id: process.env.GTM_ID || void 0, }, }, process.env.FATHOM_SITE_ID && {