From b825401cb0fa00acafe48c9e9895ace94c5a7983 Mon Sep 17 00:00:00 2001 From: Jim White Date: Thu, 20 Mar 2014 15:07:21 -0700 Subject: [PATCH 01/17] Compile on Mavericks with gcc47. --- Makefile | 31 ++++++++++++++++++++++++----- second-stage/programs/wlle/Makefile | 10 +++++----- 2 files changed, 31 insertions(+), 10 deletions(-) diff --git a/Makefile b/Makefile index bc8325d..680a4e2 100644 --- a/Makefile +++ b/Makefile @@ -31,7 +31,7 @@ # # The following high-level goals may also be useful: # -# make nbestrain-clean # removes temporary files used in nbesttrain +# make nbesttrain-clean # removes temporary files used in nbesttrain # make nbest-oracle # oracle evaluation of n-best results # make features # extracts features from 20-fold parses # make train-reranker # trains reranker model @@ -70,10 +70,25 @@ # # GCCFLAGS = -march=native -mfpmath=sse -msse2 -mmmx -m32 +# GCCFLAGS = -march=x86_64 -mfpmath=sse -msse2 -mssse3 -mmmx -m64 + +# Must use export because otherwise second-stage/programs/wlle/Makefile doesn't get the message. + +GCCFLAGS = -m64 -march=core2 -mfpmath=sse +export GCCFLAGS + +# CC = condor_compile gcc +CC = gcc +export CC + +# CXX = condor_compile g++ +CXX = g++ +export CXX + # CFLAGS is used for all C and C++ compilation # CFLAGS = -MMD -O3 -Wall -ffast-math -finline-functions -fomit-frame-pointer -fstrict-aliasing $(GCCFLAGS) -LDFLAGS = $(GCCLDFLAGS) +LDFLAGS = -L/opt/local/lib $(GCCLDFLAGS) EXEC = time # for SWIG wrappers, use these flags instead @@ -93,6 +108,11 @@ export CFLAGS export CXXFLAGS export LDFLAGS +CC=gcc +CXX=g++ +export CC +export CXX + # Building the 20-fold training data with nbesttrain # -------------------------------------------------- @@ -101,7 +121,8 @@ export LDFLAGS # # PENNWSJTREEBANK must be set to the base directory of the Penn WSJ Treebank # -PENNWSJTREEBANK=/usr/local/data/Penn3/parsed/mrg/wsj/ +# PENNWSJTREEBANK=/usr/local/data/Penn3/parsed/mrg/wsj/ +PENNWSJTREEBANK=/corpora/LDC/LDC99T42/RAW/parsed/mrg/wsj # NPARSES is the number of alternative parses to consider for each sentence # @@ -193,11 +214,11 @@ FEATURESNICKNAME=sp ESTIMATOR=second-stage/programs/wlle/cvlm-lbfgs # ESTIMATORFLAGS are flags given to the estimator -# + ESTIMATORFLAGS=-l 1 -c 10 -F 1 -n -1 -p 2 # ESTIMATORNICKNAME is used to name the feature weights file -# + ESTIMATORNICKNAME=lbfgs-l1c10F1n1p2 # ESTIMATORSTACKSIZE is the size (in KB) of the per-thread stacks diff --git a/second-stage/programs/wlle/Makefile b/second-stage/programs/wlle/Makefile index 75a803c..26cbd20 100644 --- a/second-stage/programs/wlle/Makefile +++ b/second-stage/programs/wlle/Makefile @@ -46,15 +46,15 @@ libdata.a: data.o liblmdata.a: lmdata.o ar rcv liblmdata.a lmdata.o; ranlib liblmdata.a -CC=gcc +# CC=gcc # fast options # Compilation help: you may need to remove -march=native on older compilers. -GCCFLAGS=-march=native -mfpmath=sse -msse2 -mmmx +# GCCFLAGS=-march=native -mfpmath=sse -msse2 -mmmx FOPENMP=-fopenmp -CFLAGS=-MMD -O3 -ffast-math -fstrict-aliasing -Wall -finline-functions $(GCCFLAGS) $(FOPENMP) -LDFLAGS=$(FOPENMP) -CXXFLAGS=${CFLAGS} -Wno-deprecated +# CFLAGS=-MMD -O3 -ffast-math -fstrict-aliasing -Wall -finline-functions $(GCCFLAGS) $(FOPENMP) +LDFLAGS+=$(FOPENMP) +# CXXFLAGS=${CFLAGS} -Wno-deprecated # debugging options # GCCFLAGS= From 7c55d76201436fe9f8b539220ffa8c0a4d531752 Mon Sep 17 00:00:00 2001 From: Jim White Date: Thu, 20 Mar 2014 15:11:51 -0700 Subject: [PATCH 02/17] Add my MacPorts setup steps. --- Makefile | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Makefile b/Makefile index 680a4e2..8e3370f 100644 --- a/Makefile +++ b/Makefile @@ -72,6 +72,12 @@ # GCCFLAGS = -march=x86_64 -mfpmath=sse -msse2 -mssse3 -mmmx -m64 +# For Mavericks (and Mountain Lion) I set up gcc using macports: +# sudo port install gcc47 +# sudo port select --set gcc mp-gcc47 +# sudo port install boost +# sudo port install liblbfgs + # Must use export because otherwise second-stage/programs/wlle/Makefile doesn't get the message. GCCFLAGS = -m64 -march=core2 -mfpmath=sse From f37a1b50edf5ae6df30902ddc0eb80b2b32c3fd2 Mon Sep 17 00:00:00 2001 From: Jim White Date: Thu, 20 Mar 2014 21:07:52 -0700 Subject: [PATCH 03/17] Chase down why -march=native isn't working for gcc after 4.2. Add some comments about that. --- Makefile | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/Makefile b/Makefile index 8e3370f..2c8c7d9 100644 --- a/Makefile +++ b/Makefile @@ -69,8 +69,23 @@ # versions will need -march=pentium4 or -march=opteron # # GCCFLAGS = -march=native -mfpmath=sse -msse2 -mmmx -m32 +# +# On Mac OS X using -march=native doesn't seem to work, so we need to set it. +# This should be safe for any 64bit machine: +# GCCFLAGS = -m64 -march=x86-64 +# You can find out what switches gcc would use for your machine this way: +# gcc -Q --help=target -march=native +# So for a 2013 MacBook Air we might expect to use something like this: +# GCCFLAGS = -m64 -march=ivybridge -mfpmath=sse -msse -msse2 -msse3 -msse4 -msse4.1 -msse4.2 -mavx -mssse3 +# But we can't use the -mavx switch because of a problem with the assembler setup. +# Which is presumably why -march=native fails, as indicated by other folks' experience as well: +# http://stackoverflow.com/questions/12016281/g-no-such-instruction-with-avx +# http://mac-os-forge.2317878.n4.nabble.com/gcc-as-AVX-binutils-and-MacOS-X-10-7-td144472.html +# So I wind up with this: +GCCFLAGS = -m64 -march=x86-64 -mfpmath=sse -msse -msse2 -msse3 -msse4 -msse4.1 -msse4.2 -mssse3 -# GCCFLAGS = -march=x86_64 -mfpmath=sse -msse2 -mssse3 -mmmx -m64 +# Must use export because otherwise second-stage/programs/wlle/Makefile doesn't get the message. +export GCCFLAGS # For Mavericks (and Mountain Lion) I set up gcc using macports: # sudo port install gcc47 @@ -78,11 +93,6 @@ # sudo port install boost # sudo port install liblbfgs -# Must use export because otherwise second-stage/programs/wlle/Makefile doesn't get the message. - -GCCFLAGS = -m64 -march=core2 -mfpmath=sse -export GCCFLAGS - # CC = condor_compile gcc CC = gcc export CC @@ -94,6 +104,7 @@ export CXX # CFLAGS is used for all C and C++ compilation # CFLAGS = -MMD -O3 -Wall -ffast-math -finline-functions -fomit-frame-pointer -fstrict-aliasing $(GCCFLAGS) +# For some reason macports does put liblbfgs' files on the right path, so I add it on here. LDFLAGS = -L/opt/local/lib $(GCCLDFLAGS) EXEC = time @@ -127,8 +138,7 @@ export CXX # # PENNWSJTREEBANK must be set to the base directory of the Penn WSJ Treebank # -# PENNWSJTREEBANK=/usr/local/data/Penn3/parsed/mrg/wsj/ -PENNWSJTREEBANK=/corpora/LDC/LDC99T42/RAW/parsed/mrg/wsj +PENNWSJTREEBANK=/usr/local/data/Penn3/parsed/mrg/wsj/ # NPARSES is the number of alternative parses to consider for each sentence # @@ -220,11 +230,11 @@ FEATURESNICKNAME=sp ESTIMATOR=second-stage/programs/wlle/cvlm-lbfgs # ESTIMATORFLAGS are flags given to the estimator - +# ESTIMATORFLAGS=-l 1 -c 10 -F 1 -n -1 -p 2 # ESTIMATORNICKNAME is used to name the feature weights file - +# ESTIMATORNICKNAME=lbfgs-l1c10F1n1p2 # ESTIMATORSTACKSIZE is the size (in KB) of the per-thread stacks From 00b2ea8fadec73d613153930196840b4291f427d Mon Sep 17 00:00:00 2001 From: Jim White Date: Thu, 20 Mar 2014 21:22:10 -0700 Subject: [PATCH 04/17] The simple switch is just disabling AVX. --- Makefile | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/Makefile b/Makefile index 2c8c7d9..a1fefe3 100644 --- a/Makefile +++ b/Makefile @@ -70,19 +70,12 @@ # # GCCFLAGS = -march=native -mfpmath=sse -msse2 -mmmx -m32 # -# On Mac OS X using -march=native doesn't seem to work, so we need to set it. -# This should be safe for any 64bit machine: -# GCCFLAGS = -m64 -march=x86-64 -# You can find out what switches gcc would use for your machine this way: -# gcc -Q --help=target -march=native -# So for a 2013 MacBook Air we might expect to use something like this: -# GCCFLAGS = -m64 -march=ivybridge -mfpmath=sse -msse -msse2 -msse3 -msse4 -msse4.1 -msse4.2 -mavx -mssse3 -# But we can't use the -mavx switch because of a problem with the assembler setup. -# Which is presumably why -march=native fails, as indicated by other folks' experience as well: +# On Mac OS X using -march=native doesn't seem to work (a compilation error will occur). +# Turns out there is a problem with AVX instructions on OSX for gcc after 4.2. # http://stackoverflow.com/questions/12016281/g-no-such-instruction-with-avx # http://mac-os-forge.2317878.n4.nabble.com/gcc-as-AVX-binutils-and-MacOS-X-10-7-td144472.html -# So I wind up with this: -GCCFLAGS = -m64 -march=x86-64 -mfpmath=sse -msse -msse2 -msse3 -msse4 -msse4.1 -msse4.2 -mssse3 +# So here's what works for me (with or without the -mfpmath=sse - the default is 387): +GCCFLAGS = -m64 -march=native -mno-avx -mfpmath=sse # Must use export because otherwise second-stage/programs/wlle/Makefile doesn't get the message. export GCCFLAGS From 95680e92399f7f061477976009594cb6110ed2fb Mon Sep 17 00:00:00 2001 From: Jim White Date: Thu, 20 Mar 2014 21:31:39 -0700 Subject: [PATCH 05/17] Missing a negation does bad things to meaning. --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index a1fefe3..b1b32ea 100644 --- a/Makefile +++ b/Makefile @@ -97,7 +97,7 @@ export CXX # CFLAGS is used for all C and C++ compilation # CFLAGS = -MMD -O3 -Wall -ffast-math -finline-functions -fomit-frame-pointer -fstrict-aliasing $(GCCFLAGS) -# For some reason macports does put liblbfgs' files on the right path, so I add it on here. +# For some reason MacPorts does not put liblbfgs' files on the right path, so I add it on here. LDFLAGS = -L/opt/local/lib $(GCCLDFLAGS) EXEC = time From 39a51eae3e4536480e99fba515f08c444c240f43 Mon Sep 17 00:00:00 2001 From: Jim White Date: Thu, 20 Mar 2014 21:36:32 -0700 Subject: [PATCH 06/17] MacPorts will gladly install more than one library at a time. --- Makefile | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Makefile b/Makefile index b1b32ea..8a1dde1 100644 --- a/Makefile +++ b/Makefile @@ -83,8 +83,7 @@ export GCCFLAGS # For Mavericks (and Mountain Lion) I set up gcc using macports: # sudo port install gcc47 # sudo port select --set gcc mp-gcc47 -# sudo port install boost -# sudo port install liblbfgs +# sudo port install boost liblbfgs # CC = condor_compile gcc CC = gcc From bc3500b8c53210d7f44e4e737ca1a26591902945 Mon Sep 17 00:00:00 2001 From: Jim White Date: Thu, 10 Jul 2014 00:06:14 -0700 Subject: [PATCH 07/17] Move Mac-specific settings into Makefile.mac from Makefile. --- Makefile | 30 +++++++++++++++++++--------- Makefile.mac | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+), 9 deletions(-) create mode 100644 Makefile.mac diff --git a/Makefile b/Makefile index bc8325d..20af839 100644 --- a/Makefile +++ b/Makefile @@ -31,7 +31,7 @@ # # The following high-level goals may also be useful: # -# make nbestrain-clean # removes temporary files used in nbesttrain +# make nbesttrain-clean # removes temporary files used in nbesttrain # make nbest-oracle # oracle evaluation of n-best results # make features # extracts features from 20-fold parses # make train-reranker # trains reranker model @@ -70,11 +70,19 @@ # # GCCFLAGS = -march=native -mfpmath=sse -msse2 -mmmx -m32 +# CC = condor_compile gcc +CC ?= gcc +export CC + +# CXX = condor_compile g++ +CXX ?= g++ +export CXX + # CFLAGS is used for all C and C++ compilation # -CFLAGS = -MMD -O3 -Wall -ffast-math -finline-functions -fomit-frame-pointer -fstrict-aliasing $(GCCFLAGS) -LDFLAGS = $(GCCLDFLAGS) -EXEC = time +CFLAGS ?= -MMD -O3 -Wall -ffast-math -finline-functions -fomit-frame-pointer -fstrict-aliasing $(GCCFLAGS) +LDFLAGS ?= $(GCCLDFLAGS) +EXEC ?= time # for SWIG wrappers, use these flags instead # @@ -88,7 +96,7 @@ EXEC = time # LDFLAGS = -g -Wall $(GCCLDFLAGS) # EXEC = valgrind -CXXFLAGS = $(CFLAGS) -Wno-deprecated +CXXFLAGS ?= $(CFLAGS) -Wno-deprecated export CFLAGS export CXXFLAGS export LDFLAGS @@ -101,11 +109,11 @@ export LDFLAGS # # PENNWSJTREEBANK must be set to the base directory of the Penn WSJ Treebank # -PENNWSJTREEBANK=/usr/local/data/Penn3/parsed/mrg/wsj/ +PENNWSJTREEBANK ?= /usr/local/data/Penn3/parsed/mrg/wsj/ # NPARSES is the number of alternative parses to consider for each sentence # -NPARSES=50 +NPARSES ?= 50 # NFOLDS is the number of folds to use, and FOLDS is a list of the numbers # from 00 to NFOLDS-1 (I couldn't see how to program this in make). @@ -520,8 +528,12 @@ train-reranker: $(WEIGHTSFILEGZ) # $(WEIGHTSFILEGZ): $(ESTIMATOR) $(WEIGHTSFILEGZ): $(ESTIMATOR) $(MODELDIR)/features.gz $(FEATDIR)/train.gz $(FEATDIR)/dev.gz $(FEATDIR)/test1.gz $(ESTIMATORENV) $(ZCAT) $(FEATDIR)/train.gz | $(EXEC) $(ESTIMATOR) $(ESTIMATORFLAGS) -e $(FEATDIR)/dev.gz -f $(MODELDIR)/features.gz -o $(WEIGHTSFILE) -x $(FEATDIR)/test1.gz - rm -f $(WEIGHTSFILEGZ) - gzip $(WEIGHTSFILE) + # If you use gzip's automagic renaming, be sure to use -f in case some backup program + # throws in some extra hardlinks (I'm looking at you Time Machine /.MobileBackups). + # gzip -f $(WEIGHTSFILE) + # But let's avoid that business entirely and use gzip as a filter like elsewhere in this Makefile. + gzip -c $(WEIGHTSFILE) >$(WEIGHTSFILEGZ) + rm -f $(WEIGHTSFILE) ######################################################################## # # diff --git a/Makefile.mac b/Makefile.mac new file mode 100644 index 0000000..4da6a53 --- /dev/null +++ b/Makefile.mac @@ -0,0 +1,55 @@ +# To use these defaults set the MAKEFILES environment variable when calling make. +# export MAKEFILES=`pwd`/Makefile.mac + +uname_S := $(shell sh -c 'uname -s 2>/dev/null || echo not') + +# For Mavericks (and Mountain Lion) I set up gcc using macports: +# sudo port install gcc47 +# sudo port select --set gcc mp-gcc47 +# sudo port install boost liblbfgs + +# On Mac OS X using -march=native doesn't seem to work (a compilation error will occur). +# Turns out there is a problem with AVX instructions on OSX for gcc after 4.2. +# http://stackoverflow.com/questions/12016281/g-no-such-instruction-with-avx +# http://mac-os-forge.2317878.n4.nabble.com/gcc-as-AVX-binutils-and-MacOS-X-10-7-td144472.html +# So here's what works for me (with or without the -mfpmath=sse - the default is 387): + +GCCFLAGS = -m64 -march=x86-64 -mfpmath=sse -msse -msse2 -msse3 -msse4 -msse4.1 -msse4.2 -mssse3 -I/opt/local/include + +# Must use export because otherwise second-stage/programs/wlle/Makefile doesn't get the message. +export GCCFLAGS + +# CC = condor_compile gcc +CC = gcc +export CC + +# CXX = condor_compile g++ +CXX = g++ +export CXX + +# fast options +# Compilation help: you may need to remove -march=native on older compilers. +# GCCFLAGS=-march=native -mfpmath=sse -msse2 -mmmx +FOPENMP=-fopenmp +# CFLAGS=-MMD -O3 -ffast-math -fstrict-aliasing -Wall -finline-functions $(GCCFLAGS) $(FOPENMP) +# LDFLAGS=$(FOPENMP) -L/opt/local/lib + +# debugging options +# GCCFLAGS= +# FOPENMP= +# CFLAGS=-MMD -O0 -g $(GCCFLAGS) $(FOPENMP) +# LDFLAGS=-g $(FOPENMP) +# CXXFLAGS=${CFLAGS} -Wno-deprecated + +# CFLAGS is used for all C and C++ compilation +# +CFLAGS = -MMD -O3 -Wall -ffast-math -finline-functions -fomit-frame-pointer -fstrict-aliasing $(GCCFLAGS) +export CFLAGS + +LDFLAGS = -L/opt/local/lib $(GCCLDFLAGS) +export LDFLAGS + +CXXFLAGS=${CFLAGS} -Wno-deprecated +export CXXFLAGS + +PENNWSJTREEBANK = /usr/local/data/Penn3/parsed/mrg/wsj From bd9be5c1d45abfd342e67f5ab02e0a3db67f689a Mon Sep 17 00:00:00 2001 From: Jim White Date: Thu, 10 Jul 2014 00:06:51 -0700 Subject: [PATCH 08/17] Use ESTIMATORNICKNAME=lbfgs-l1c10F1n1p2 to match the other current settings. --- parse.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/parse.sh b/parse.sh index 4c7a76c..0bf5668 100755 --- a/parse.sh +++ b/parse.sh @@ -14,5 +14,6 @@ # RERANKDATA=ec50-connll-ic-s5 # RERANKDATA=ec50-f050902-lics5 MODELDIR=second-stage/models/ec50spfinal -ESTIMATORNICKNAME=cvlm-l1c10P1 +# ESTIMATORNICKNAME=cvlm-l1c10P1 +ESTIMATORNICKNAME=lbfgs-l1c10F1n1p2 first-stage/PARSE/parseIt -l399 -N50 first-stage/DATA/EN/ $* | second-stage/programs/features/best-parses -l $MODELDIR/features.gz $MODELDIR/$ESTIMATORNICKNAME-weights.gz From 77837e591694693575749076dc7268a26e710307 Mon Sep 17 00:00:00 2001 From: Jim White Date: Thu, 10 Jul 2014 13:43:23 -0700 Subject: [PATCH 09/17] A Git .gitignore copied from .hgignore. --- .gitignore | 57 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..69e8f09 --- /dev/null +++ b/.gitignore @@ -0,0 +1,57 @@ +glob:*.dep +glob:*.o +glob:*.a +glob:*.d +glob:second-stage/programs/*/read-tree.cc +glob:evalb/evalb +glob:first-stage/PARSE/parseIt +glob:first-stage/TRAIN/iScale +glob:first-stage/TRAIN/kn3Counts +glob:first-stage/TRAIN/pSfgT +glob:first-stage/TRAIN/pSgT +glob:first-stage/TRAIN/pTgNt +glob:first-stage/TRAIN/pUgT +glob:first-stage/TRAIN/rCounts +glob:first-stage/TRAIN/selFeats +glob:first-stage/TRAIN/trainRs +glob:second-stage/programs/eval-beam/main +glob:second-stage/programs/eval-weights/eval-weights +glob:second-stage/programs/features/best-*parses +glob:second-stage/programs/features/extract-*features +glob:second-stage/programs/features/count-*features +glob:second-stage/programs/features/oracle-score +glob:second-stage/programs/features/parallel-extract-nfeatures +glob:second-stage/programs/features/parallel-extract-spfeatures +glob:second-stage/programs/prepare-data/copy-trees-ss +glob:second-stage/programs/prepare-data/prepare-ec-data +glob:second-stage/programs/prepare-data/prepare-ec-data100 +glob:second-stage/programs/prepare-data/prepare-new-data +glob:second-stage/programs/prepare-data/ptb +glob:second-stage/programs/wlle/avper +glob:second-stage/programs/wlle/gavper +glob:second-stage/programs/wlle/cvlm +glob:second-stage/programs/wlle/cvlm-lbfgs +glob:second-stage/programs/wlle/oracle +glob:*.swp +glob:*.orig +glob:tags +glob:TAGS +glob:first-stage/PARSE/evalTree +glob:first-stage/PARSE/parseAndEval +glob:*.py[co] +glob:*.class +glob:*.so +glob:*_wrapper.cxx +glob:first-stage/PARSE/swig/*/lib/* +glob:second-stage/programs/features/swig/*/lib/* +glob:first-stage/PARSE/swig/*/build/* +glob:second-stage/programs/features/swig/*/build/* +glob:SParseval/* +glob:regression-test-* +glob:build* +glob:dist* +glob:first-stage/PARSE/parser_wrapper.C +glob:second-stage/programs/features/reranker_wrapper.C +glob:python/bllipparser/CharniakParser.py +glob:python/bllipparser/JohnsonReranker.py +glob:MANIFEST From cfaa9746dfae80b110d978bfce1bb1a1ebedc6b6 Mon Sep 17 00:00:00 2001 From: Jim White Date: Thu, 10 Jul 2014 14:05:22 -0700 Subject: [PATCH 10/17] Tidied up .gitignore and add second-stage/nbest and tmp dirs. --- .gitignore | 124 +++++++++++++++++++++++++++++------------------------ 1 file changed, 67 insertions(+), 57 deletions(-) diff --git a/.gitignore b/.gitignore index 69e8f09..d9be4e5 100644 --- a/.gitignore +++ b/.gitignore @@ -1,57 +1,67 @@ -glob:*.dep -glob:*.o -glob:*.a -glob:*.d -glob:second-stage/programs/*/read-tree.cc -glob:evalb/evalb -glob:first-stage/PARSE/parseIt -glob:first-stage/TRAIN/iScale -glob:first-stage/TRAIN/kn3Counts -glob:first-stage/TRAIN/pSfgT -glob:first-stage/TRAIN/pSgT -glob:first-stage/TRAIN/pTgNt -glob:first-stage/TRAIN/pUgT -glob:first-stage/TRAIN/rCounts -glob:first-stage/TRAIN/selFeats -glob:first-stage/TRAIN/trainRs -glob:second-stage/programs/eval-beam/main -glob:second-stage/programs/eval-weights/eval-weights -glob:second-stage/programs/features/best-*parses -glob:second-stage/programs/features/extract-*features -glob:second-stage/programs/features/count-*features -glob:second-stage/programs/features/oracle-score -glob:second-stage/programs/features/parallel-extract-nfeatures -glob:second-stage/programs/features/parallel-extract-spfeatures -glob:second-stage/programs/prepare-data/copy-trees-ss -glob:second-stage/programs/prepare-data/prepare-ec-data -glob:second-stage/programs/prepare-data/prepare-ec-data100 -glob:second-stage/programs/prepare-data/prepare-new-data -glob:second-stage/programs/prepare-data/ptb -glob:second-stage/programs/wlle/avper -glob:second-stage/programs/wlle/gavper -glob:second-stage/programs/wlle/cvlm -glob:second-stage/programs/wlle/cvlm-lbfgs -glob:second-stage/programs/wlle/oracle -glob:*.swp -glob:*.orig -glob:tags -glob:TAGS -glob:first-stage/PARSE/evalTree -glob:first-stage/PARSE/parseAndEval -glob:*.py[co] -glob:*.class -glob:*.so -glob:*_wrapper.cxx -glob:first-stage/PARSE/swig/*/lib/* -glob:second-stage/programs/features/swig/*/lib/* -glob:first-stage/PARSE/swig/*/build/* -glob:second-stage/programs/features/swig/*/build/* -glob:SParseval/* -glob:regression-test-* -glob:build* -glob:dist* -glob:first-stage/PARSE/parser_wrapper.C -glob:second-stage/programs/features/reranker_wrapper.C -glob:python/bllipparser/CharniakParser.py -glob:python/bllipparser/JohnsonReranker.py -glob:MANIFEST +*_wrapper.cxx +*.a +*.class +*.d +*.dep +*.o +*.orig +*.py[co] +*.so +*.swp + +/SParseval +/tmp + +evalb/evalb + +build* +dist* + +MANIFEST +regression-test-* +tags +TAGS + +python/bllipparser/CharniakParser.py +python/bllipparser/JohnsonReranker.py + +first-stage/PARSE/evalTree +first-stage/PARSE/parseAndEval +first-stage/PARSE/parseIt +first-stage/PARSE/parser_wrapper.C +first-stage/PARSE/swig/*/build/* +first-stage/PARSE/swig/*/lib/* +first-stage/TRAIN/iScale +first-stage/TRAIN/kn3Counts +first-stage/TRAIN/pSfgT +first-stage/TRAIN/pSgT +first-stage/TRAIN/pTgNt +first-stage/TRAIN/pUgT +first-stage/TRAIN/rCounts +first-stage/TRAIN/selFeats +first-stage/TRAIN/trainRs + +/second-stage/nbest + +second-stage/programs/*/read-tree.cc +second-stage/programs/eval-beam/main +second-stage/programs/eval-weights/eval-weights +second-stage/programs/features/best-*parses +second-stage/programs/features/count-*features +second-stage/programs/features/extract-*features +second-stage/programs/features/oracle-score +second-stage/programs/features/parallel-extract-nfeatures +second-stage/programs/features/parallel-extract-spfeatures +second-stage/programs/features/reranker_wrapper.C +second-stage/programs/features/swig/*/build/* +second-stage/programs/features/swig/*/lib/* +second-stage/programs/prepare-data/copy-trees-ss +second-stage/programs/prepare-data/prepare-ec-data +second-stage/programs/prepare-data/prepare-ec-data100 +second-stage/programs/prepare-data/prepare-new-data +second-stage/programs/prepare-data/ptb +second-stage/programs/wlle/avper +second-stage/programs/wlle/cvlm +second-stage/programs/wlle/cvlm-lbfgs +second-stage/programs/wlle/gavper +second-stage/programs/wlle/oracle From 13fb007ed00d98ef0f11cb9c15c964e8f3d44325 Mon Sep 17 00:00:00 2001 From: Jim White Date: Thu, 10 Jul 2014 14:13:35 -0700 Subject: [PATCH 11/17] Set LD_LIBRARY_PATH too. --- Makefile.mac | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/Makefile.mac b/Makefile.mac index 4da6a53..5c455bd 100644 --- a/Makefile.mac +++ b/Makefile.mac @@ -8,13 +8,25 @@ uname_S := $(shell sh -c 'uname -s 2>/dev/null || echo not') # sudo port select --set gcc mp-gcc47 # sudo port install boost liblbfgs +# Using MacPorts means that we have to override the default include and library locations. +LD_INCLUDE_PATH=/opt/local/include +LD_LIBRARY_PATH=/opt/local/lib + +export LD_INCLUDE_PATH +export LD_LIBRARY_PATH + +# The SParseval makefile uses a -lm dependency (a bad idea imho) which fails because there +# is no libm.a to be used. This trick works by mapping that to the system's libm.dylib. +# .LIBPATTERNS+=lib%.dylib +# export .LIBPATTERNS + # On Mac OS X using -march=native doesn't seem to work (a compilation error will occur). # Turns out there is a problem with AVX instructions on OSX for gcc after 4.2. # http://stackoverflow.com/questions/12016281/g-no-such-instruction-with-avx # http://mac-os-forge.2317878.n4.nabble.com/gcc-as-AVX-binutils-and-MacOS-X-10-7-td144472.html # So here's what works for me (with or without the -mfpmath=sse - the default is 387): -GCCFLAGS = -m64 -march=x86-64 -mfpmath=sse -msse -msse2 -msse3 -msse4 -msse4.1 -msse4.2 -mssse3 -I/opt/local/include +GCCFLAGS = -m64 -march=x86-64 -mfpmath=sse -msse -msse2 -msse3 -msse4 -msse4.1 -msse4.2 -mssse3 -I${LD_INCLUDE_PATH} # Must use export because otherwise second-stage/programs/wlle/Makefile doesn't get the message. export GCCFLAGS @@ -46,10 +58,11 @@ FOPENMP=-fopenmp CFLAGS = -MMD -O3 -Wall -ffast-math -finline-functions -fomit-frame-pointer -fstrict-aliasing $(GCCFLAGS) export CFLAGS -LDFLAGS = -L/opt/local/lib $(GCCLDFLAGS) -export LDFLAGS - CXXFLAGS=${CFLAGS} -Wno-deprecated export CXXFLAGS -PENNWSJTREEBANK = /usr/local/data/Penn3/parsed/mrg/wsj +LDFLAGS = -L${LD_LIBRARY_PATH} $(GCCLDFLAGS) +export LDFLAGS + +# This is a handy place to put a local setting without changing Makefile. +# PENNWSJTREEBANK = /usr/local/data/Penn3/parsed/mrg/wsj From 9343b8c13104777ac21f28f38f051c2b4a5f1989 Mon Sep 17 00:00:00 2001 From: Jim White Date: Thu, 10 Jul 2014 14:14:14 -0700 Subject: [PATCH 12/17] Add a rule to .LIBPATTERNS so that SParseval will make. --- Makefile | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 20af839..55dccfc 100644 --- a/Makefile +++ b/Makefile @@ -279,9 +279,17 @@ SParseval: tar xvzf SParseval.tgz rm SParseval.tgz +# The SParseval makefile uses a -lm dependency (a bad idea imho) which fails because there +# is no libm.a to be used. This trick works by mapping that to the system's libm.dylib. +# .LIBPATTERNS+=lib%.dylib +# export .LIBPATTERNS +# But we can't put that in Makefile.mac because it would mess up some of the other programs. +# So I put it here in the Make arguments and it shouldn't hurt Linuxen where the default +# rules were already working. + SParseval/src/sparseval: SParseval rm -f SParseval/src/*.o - $(MAKE) -C SParseval/src sparseval + $(MAKE) -C SParseval/src .LIBPATTERNS+=lib%.dylib sparseval # clean removes object files. # From 7a4d1e00eb4ef471cf21ce4715f43ce4e2afac02 Mon Sep 17 00:00:00 2001 From: Jim White Date: Thu, 10 Jul 2014 14:46:14 -0700 Subject: [PATCH 13/17] Don't set standard make variables if they are already set. --- second-stage/programs/eval-weights/Makefile | 2 +- second-stage/programs/wlle/Makefile | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/second-stage/programs/eval-weights/Makefile b/second-stage/programs/eval-weights/Makefile index b6dbd65..a5612f6 100644 --- a/second-stage/programs/eval-weights/Makefile +++ b/second-stage/programs/eval-weights/Makefile @@ -14,7 +14,7 @@ SOURCES = best-indices.cc best-parse.cc best-parses.cc compare-models.cc data.c TARGETS = eval-weights # best-indices best-parse best-parses compare-models pretty-print OBJECTS = $(patsubst %.l,%.o,$(patsubst %.c,%.o,$(SOURCES:%.cc=%.o))) -CC = gcc +CC ?= gcc all: $(TARGETS) diff --git a/second-stage/programs/wlle/Makefile b/second-stage/programs/wlle/Makefile index 75a803c..2efa68f 100644 --- a/second-stage/programs/wlle/Makefile +++ b/second-stage/programs/wlle/Makefile @@ -46,15 +46,15 @@ libdata.a: data.o liblmdata.a: lmdata.o ar rcv liblmdata.a lmdata.o; ranlib liblmdata.a -CC=gcc +CC?=gcc # fast options # Compilation help: you may need to remove -march=native on older compilers. -GCCFLAGS=-march=native -mfpmath=sse -msse2 -mmmx -FOPENMP=-fopenmp -CFLAGS=-MMD -O3 -ffast-math -fstrict-aliasing -Wall -finline-functions $(GCCFLAGS) $(FOPENMP) -LDFLAGS=$(FOPENMP) -CXXFLAGS=${CFLAGS} -Wno-deprecated +GCCFLAGS?=-march=native -mfpmath=sse -msse2 -mmmx +FOPENMP?=-fopenmp +CFLAGS?=-MMD -O3 -ffast-math -fstrict-aliasing -Wall -finline-functions $(GCCFLAGS) $(FOPENMP) +LDFLAGS?=$(FOPENMP) +CXXFLAGS?=${CFLAGS} -Wno-deprecated # debugging options # GCCFLAGS= From d4d375bcdf86f95132ee7713b34b25ed24dde0a3 Mon Sep 17 00:00:00 2001 From: Jim White Date: Thu, 10 Jul 2014 14:48:07 -0700 Subject: [PATCH 14/17] index on BLLIP_FOR_MACOS: bd9be5c Use ESTIMATORNICKNAME=lbfgs-l1c10F1n1p2 to match the other current settings. From 4bc24de26480f9452d42b48b7b3f0db0617546b1 Mon Sep 17 00:00:00 2001 From: Jim White Date: Thu, 10 Jul 2014 15:29:22 -0700 Subject: [PATCH 15/17] Remove Mac-specific stuff from Makefile. --- Makefile | 34 +++++----------------------------- 1 file changed, 5 insertions(+), 29 deletions(-) diff --git a/Makefile b/Makefile index 8a1dde1..fed52f5 100644 --- a/Makefile +++ b/Makefile @@ -68,36 +68,12 @@ # Version 4.1 and later gcc permit -march=native, but older # versions will need -march=pentium4 or -march=opteron # -# GCCFLAGS = -march=native -mfpmath=sse -msse2 -mmmx -m32 -# -# On Mac OS X using -march=native doesn't seem to work (a compilation error will occur). -# Turns out there is a problem with AVX instructions on OSX for gcc after 4.2. -# http://stackoverflow.com/questions/12016281/g-no-such-instruction-with-avx -# http://mac-os-forge.2317878.n4.nabble.com/gcc-as-AVX-binutils-and-MacOS-X-10-7-td144472.html -# So here's what works for me (with or without the -mfpmath=sse - the default is 387): -GCCFLAGS = -m64 -march=native -mno-avx -mfpmath=sse - -# Must use export because otherwise second-stage/programs/wlle/Makefile doesn't get the message. -export GCCFLAGS - -# For Mavericks (and Mountain Lion) I set up gcc using macports: -# sudo port install gcc47 -# sudo port select --set gcc mp-gcc47 -# sudo port install boost liblbfgs - -# CC = condor_compile gcc -CC = gcc -export CC - -# CXX = condor_compile g++ -CXX = g++ -export CXX +# GCCFLAGS ?= -march=native -mfpmath=sse -msse2 -mmmx -m32 # CFLAGS is used for all C and C++ compilation # CFLAGS = -MMD -O3 -Wall -ffast-math -finline-functions -fomit-frame-pointer -fstrict-aliasing $(GCCFLAGS) -# For some reason MacPorts does not put liblbfgs' files on the right path, so I add it on here. -LDFLAGS = -L/opt/local/lib $(GCCLDFLAGS) + EXEC = time # for SWIG wrappers, use these flags instead @@ -112,13 +88,13 @@ EXEC = time # LDFLAGS = -g -Wall $(GCCLDFLAGS) # EXEC = valgrind -CXXFLAGS = $(CFLAGS) -Wno-deprecated +CXXFLAGS ?= $(CFLAGS) -Wno-deprecated export CFLAGS export CXXFLAGS export LDFLAGS -CC=gcc -CXX=g++ +CC ?= gcc +CXX ?= g++ export CC export CXX From 6b6ccde05fbf2b74c4ef502f3719f762c2274ab2 Mon Sep 17 00:00:00 2001 From: Jim White Date: Fri, 11 Jul 2014 09:47:09 -0700 Subject: [PATCH 16/17] Always use 'gzip -c' to avoid failures caused by extra hardlinks. --- Makefile | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index fed52f5..243b762 100644 --- a/Makefile +++ b/Makefile @@ -522,11 +522,14 @@ train-reranker: $(WEIGHTSFILEGZ) # This goal estimates the reranker feature weights (i.e., trains the # reranker). # +# Don't use auto-renaming as in "gzip foo" because it fails if there is +# more than one hardlink on the file (I'm looking at you Time Machine!). +# # $(WEIGHTSFILEGZ): $(ESTIMATOR) $(WEIGHTSFILEGZ): $(ESTIMATOR) $(MODELDIR)/features.gz $(FEATDIR)/train.gz $(FEATDIR)/dev.gz $(FEATDIR)/test1.gz $(ESTIMATORENV) $(ZCAT) $(FEATDIR)/train.gz | $(EXEC) $(ESTIMATOR) $(ESTIMATORFLAGS) -e $(FEATDIR)/dev.gz -f $(MODELDIR)/features.gz -o $(WEIGHTSFILE) -x $(FEATDIR)/test1.gz - rm -f $(WEIGHTSFILEGZ) - gzip $(WEIGHTSFILE) + gzip -c $(WEIGHTSFILE) >$(WEIGHTSFILEGZ) + rm -f $(WEIGHTSFILE) ######################################################################## # # From 04ba357a835b14148b4051839d51bf962e87a66b Mon Sep 17 00:00:00 2001 From: Jim White Date: Sat, 25 Oct 2014 21:51:12 -0700 Subject: [PATCH 17/17] nltk.tree.Tree.parse has apparently been removed/replace by fromstring and/or bracket_parse. --- python/bllipparser/ParsingShell.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/python/bllipparser/ParsingShell.py b/python/bllipparser/ParsingShell.py index 1b12737..61246d5 100644 --- a/python/bllipparser/ParsingShell.py +++ b/python/bllipparser/ParsingShell.py @@ -15,10 +15,13 @@ import nltk.tree try: import nltk.draw.tree + have_tree_drawing = False + read_nltk_tree = nltk.tree.Tree.fromstring have_tree_drawing = True - read_nltk_tree = nltk.tree.Tree.parse except ImportError: have_tree_drawing = False +except AttributeError: + have_tree_drawing = False from bllipparser.RerankingParser import RerankingParser