@article{Gmyrek:128346,
      recid = {128346},
      author = {Gmyrek, P. and Lutz, C. and Newlands, G.},
      title = {A Technological Construction of  Society: Comparing GPT-4  and Human Respondents for Occupational  Evaluation in the  UK},
      publisher = {International Labour Organization (ILO),},
      address = {Geneva (Switzerland):},
      pages = {54 p.},
      year = {2024},
      note = {Electronic ed.},
      abstract = {Despite initial research about the biases and perceptions  of Large Language Models (LLMs), we  lack evidence on how  LLMs evaluate occupations, especially in comparison to  human evaluators. In this paper, we present a systematic  comparison of occupational evaluations by GPT-4  with those  from an in-depth, high-quality and recent human respondents  survey in the United  Kingdom. Covering the full ISCO-08  occupational landscape, with 580 occupations and two  distinct metrics (prestige and social value), our findings  indicate that GPT-4 and human scores are  highly correlated  across all ISCO-08 major groups. In absolute terms, GPT-4  scores are more  generous than those of the human  respondents. At the same time, GPT-4 substantially underor  overestimates the occupational prestige and social value of  many occupations, particularly  for emerging digital and  stigmatized occupations.  Our analyses show both the  potentials and risks of using LLM-generated data for  sociological and  occupational research. Potentials include  LLMs’ efficiency, cost effectiveness, speed, and accuracy  in capturing general tendencies. By contrast, there are  risks of bias, contextual misalignment,  and downstream  issues, for example when problematic and opaque  occupational evaluations  of LLMs may feed back into  working life, thus leading to potentially problematic  technological  constructions of society. We also discuss  the policy implications of our findings for the integration  of LLM tools into the world of work},
      url = {http://wfp.tind.io/record/128346},
}