|
|
|
|
@@ -1,14 +1,15 @@
|
|
|
|
|
# Template file for 'tesseract-ocr'
|
|
|
|
|
pkgname=tesseract-ocr
|
|
|
|
|
version=4.1.1
|
|
|
|
|
revision=9
|
|
|
|
|
_tessdataver=4.0.0
|
|
|
|
|
version=5.3.3
|
|
|
|
|
revision=1
|
|
|
|
|
_tessdataver=4.1.0
|
|
|
|
|
create_wrksrc=yes
|
|
|
|
|
build_style=gnu-configure
|
|
|
|
|
configure_args="LIBLEPT_HEADERSDIR=${XBPS_CROSS_BASE}/usr/include $(vopt_enable openmp)"
|
|
|
|
|
make_build_args="all training"
|
|
|
|
|
hostmakedepends="automake libtool pkg-config leptonica libxslt asciidoc"
|
|
|
|
|
makedepends="cairo-devel pango-devel leptonica-devel $(vopt_if openmp libgomp-devel) icu-devel"
|
|
|
|
|
makedepends="cairo-devel pango-devel leptonica-devel $(vopt_if openmp libgomp-devel) icu-devel
|
|
|
|
|
libarchive-devel libcurl-devel"
|
|
|
|
|
short_desc="Tesseract Open Source OCR engine"
|
|
|
|
|
maintainer="Orphaned <orphan@voidlinux.org>"
|
|
|
|
|
license="Apache-2.0"
|
|
|
|
|
@@ -16,13 +17,15 @@ homepage="https://github.com/tesseract-ocr/tesseract"
|
|
|
|
|
distfiles="
|
|
|
|
|
https://github.com/tesseract-ocr/tesseract/archive/${version}.tar.gz>${pkgname}-${version}.tar.gz
|
|
|
|
|
https://github.com/tesseract-ocr/tessdata/archive/${_tessdataver}.tar.gz>tessdata-${_tessdataver}.tar.gz"
|
|
|
|
|
checksum="2a66ff0d8595bff8f04032165e6c936389b1e5727c3ce5a27b3e059d218db1cb
|
|
|
|
|
38c637d3a1763f6c3d32e8f1d979f045668676ec5feb8ee1869ee77cedd31b08"
|
|
|
|
|
checksum="dc4329f85f41191b2d813b71b528ba6047745813474e583ccce8795ff2ff5681
|
|
|
|
|
990fffb9b7a9b52dc9a2d053a9ef6852ca2b72bd8dfb22988b0b990a700fd3c7"
|
|
|
|
|
|
|
|
|
|
build_options="openmp"
|
|
|
|
|
build_options_default="openmp"
|
|
|
|
|
desc_option_openmp="Enable Open MP (gomp)"
|
|
|
|
|
|
|
|
|
|
disable_parallel_build=yes # fails to build otherwise
|
|
|
|
|
|
|
|
|
|
# Create a package for one specific language $1
|
|
|
|
|
pkg_lang() {
|
|
|
|
|
local f script lang=$1
|
|
|
|
|
@@ -46,8 +49,8 @@ pkg_lang() {
|
|
|
|
|
|
|
|
|
|
post_extract() {
|
|
|
|
|
mv tesseract-${version}/* .
|
|
|
|
|
rm -rf tessdata-${_tessdataver}/{tessconfigs,configs,pdf.ttf}
|
|
|
|
|
mv tessdata-${_tessdataver}/* ${wrksrc}/tessdata
|
|
|
|
|
rmdir tessdata-${_tessdataver}
|
|
|
|
|
}
|
|
|
|
|
pre_configure() {
|
|
|
|
|
NOCONFIGURE=1 ./autogen.sh
|
|
|
|
|
@@ -62,7 +65,6 @@ post_install() {
|
|
|
|
|
mv ${DESTDIR}/usr/share/man/man1/tesseract{,-ocr}.1
|
|
|
|
|
vdoc ChangeLog
|
|
|
|
|
vdoc README.md
|
|
|
|
|
vlicense ${FILESDIR}/COPYING LICENSE-tessdata
|
|
|
|
|
# Move the pseudo languges "equ" (math / equation detection) and
|
|
|
|
|
# "osd" (orientation and script detection) to the main package
|
|
|
|
|
for lang in equ osd; do
|
|
|
|
|
@@ -79,13 +81,6 @@ tesseract-ocr-tools_package() {
|
|
|
|
|
vmkdir usr/share/tesseract
|
|
|
|
|
vmkdir usr/share/man/man1
|
|
|
|
|
vmkdir usr/share/man/man5
|
|
|
|
|
# Copy shell scripts
|
|
|
|
|
for f in language-specific.sh tesstrain.sh tesstrain_utils.sh; do
|
|
|
|
|
if [ -e ${wrksrc}/training/${f} ]; then
|
|
|
|
|
cp -a ${wrksrc}/training/${f} \
|
|
|
|
|
${PKGDESTDIR}/usr/share/tesseract
|
|
|
|
|
fi
|
|
|
|
|
done
|
|
|
|
|
# Move tool manual pages
|
|
|
|
|
for f in ambiguous_words cntraining combine_tessdata \
|
|
|
|
|
dawg2wordlist mftraining shapeclustering unicharambigs \
|
|
|
|
|
@@ -99,7 +94,8 @@ tesseract-ocr-tools_package() {
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
tesseract-ocr-devel_package() {
|
|
|
|
|
depends="${sourcepkg}>=${version}_${revision}"
|
|
|
|
|
depends="${sourcepkg}>=${version}_${revision} leptonica-devel
|
|
|
|
|
libarchive-devel libcurl-devel"
|
|
|
|
|
short_desc+=" - development files"
|
|
|
|
|
pkg_install() {
|
|
|
|
|
vmove usr/include/tesseract
|
|
|
|
|
@@ -129,7 +125,7 @@ tesseract-ocr-all_package() {
|
|
|
|
|
for lang in afr amh ara asm aze aze_cyrl bel ben bod bos bre bul cat ceb \
|
|
|
|
|
ces chi_sim chi_tra chr cos cym dan deu div dzo ell eng enm epo est eus fao \
|
|
|
|
|
fas fil fin fra frk frm fry gla gle glg grc guj hat heb hin hrv hun hye iku ind isl ita \
|
|
|
|
|
ita_old jav jpn kan kat kat_old kaz khm kir kor kur kur_ara lao lat lav lit ltz mal mar \
|
|
|
|
|
ita_old jav jpn kan kat kat_old kaz khm kir kmr kor lao lat lav lit ltz mal mar \
|
|
|
|
|
mkd mlt mon mri msa mya nep nld nor oci ori pan pol por que pus ron rus san sin slk slv \
|
|
|
|
|
snd spa spa_old sqi srp srp_latn sun swa swe syr tam tat tel tgk tgl tha tir ton tur \
|
|
|
|
|
uig ukr urd uzb uzb_cyrl vie yid yor \
|
|
|
|
|
@@ -576,6 +572,13 @@ tesseract-ocr-kir_package() {
|
|
|
|
|
$(pkg_lang ${pkgname#tesseract-ocr-})
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
tesseract-ocr-kmr_package() {
|
|
|
|
|
depends="${sourcepkg}>=${version}_${revision}"
|
|
|
|
|
short_desc+=" - Kurmanji (Kurdish - Latin Script) language data"
|
|
|
|
|
pkg_install() {
|
|
|
|
|
$(pkg_lang ${pkgname#tesseract-ocr-})
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
tesseract-ocr-kor_package() {
|
|
|
|
|
depends="${sourcepkg}>=${version}_${revision}"
|
|
|
|
|
short_desc+=" - Korean language data"
|
|
|
|
|
@@ -583,20 +586,6 @@ tesseract-ocr-kor_package() {
|
|
|
|
|
$(pkg_lang ${pkgname#tesseract-ocr-})
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
tesseract-ocr-kur_package() {
|
|
|
|
|
depends="${sourcepkg}>=${version}_${revision}"
|
|
|
|
|
short_desc+=" - Kurdish language data"
|
|
|
|
|
pkg_install() {
|
|
|
|
|
$(pkg_lang ${pkgname#tesseract-ocr-})
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
tesseract-ocr-kur_ara_package() {
|
|
|
|
|
depends="${sourcepkg}>=${version}_${revision}"
|
|
|
|
|
short_desc+=" - Kurdish (Arabic) language data"
|
|
|
|
|
pkg_install() {
|
|
|
|
|
$(pkg_lang ${pkgname#tesseract-ocr-})
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
tesseract-ocr-lao_package() {
|
|
|
|
|
depends="${sourcepkg}>=${version}_${revision}"
|
|
|
|
|
short_desc+=" - Lao language data"
|
|
|
|
|
|