|
| 1 | +{%- capture title -%} |
| 2 | +PromptAssembler |
| 3 | +{%- endcapture -%} |
| 4 | + |
| 5 | +{%- capture description -%} |
| 6 | +Assembles a sequence of messages into a single string using a template. These strings can then |
| 7 | +be used as prompts for large language models. |
| 8 | + |
| 9 | +This annotator expects an array of two-tuples as the type of the input column (one array of |
| 10 | +tuples per row). The first element of the tuples should be the role and the second element is |
| 11 | +the text of the message. Possible roles are "system", "user" and "assistant". |
| 12 | + |
| 13 | +An assistant header can be added to the end of the generated string by using |
| 14 | +`setAddAssistant(true)`. |
| 15 | + |
| 16 | +At the moment, this annotator uses llama.cpp as a backend to parse and apply the templates. |
| 17 | +llama.cpp uses basic pattern matching to determine the type of the template, then applies a |
| 18 | +basic version of the template to the messages. This means that more advanced templates are not |
| 19 | +supported. |
| 20 | + |
| 21 | +For an extended example see the |
| 22 | +[example notebook](https://github.com/JohnSnowLabs/spark-nlp/blob/master/examples/python/llama.cpp/PromptAssember_with_AutoGGUFModel.ipynb). |
| 23 | +{%- endcapture -%} |
| 24 | + |
| 25 | +{%- capture input_anno -%} |
| 26 | +NONE |
| 27 | +{%- endcapture -%} |
| 28 | + |
| 29 | +{%- capture output_anno -%} |
| 30 | +DOCUMENT |
| 31 | +{%- endcapture -%} |
| 32 | + |
| 33 | +{%- capture python_example -%} |
| 34 | +from sparknlp.base import * |
| 35 | + |
| 36 | +messages = [ |
| 37 | + [ |
| 38 | + ("system", "You are a helpful assistant."), |
| 39 | + ("assistant", "Hello there, how can I help you?"), |
| 40 | + ("user", "I need help with organizing my room."), |
| 41 | + ] |
| 42 | +] |
| 43 | +df = spark.createDataFrame([messages]).toDF("messages") |
| 44 | + |
| 45 | +{% raw %} |
| 46 | +# llama3.1 |
| 47 | +template = ( |
| 48 | + "{{- bos_token }} {%- if custom_tools is defined %} {%- set tools = custom_tools %} {%- " |
| 49 | + "endif %} {%- if not tools_in_user_message is defined %} {%- set tools_in_user_message = true %} {%- " |
| 50 | + 'endif %} {%- if not date_string is defined %} {%- set date_string = "26 Jul 2024" %} {%- endif %} ' |
| 51 | + "{%- if not tools is defined %} {%- set tools = none %} {%- endif %} {#- This block extracts the " |
| 52 | + "system message, so we can slot it into the right place. #} {%- if messages[0]['role'] == 'system' %}" |
| 53 | + " {%- set system_message = messages[0]['content']|trim %} {%- set messages = messages[1:] %} {%- else" |
| 54 | + ' %} {%- set system_message = "" %} {%- endif %} {#- System message + builtin tools #} {{- ' |
| 55 | + '"<|start_header_id|>system<|end_header_id|>\\n\n" }} {%- if builtin_tools is defined or tools is ' |
| 56 | + 'not none %} {{- "Environment: ipython\\n" }} {%- endif %} {%- if builtin_tools is defined %} {{- ' |
| 57 | + '"Tools: " + builtin_tools | reject(\'equalto\', \'code_interpreter\') | join(", ") + "\\n\n"}} ' |
| 58 | + '{%- endif %} {{- "Cutting Knowledge Date: December 2023\\n" }} {{- "Today Date: " + date_string ' |
| 59 | + '+ "\\n\n" }} {%- if tools is not none and not tools_in_user_message %} {{- "You have access to ' |
| 60 | + 'the following functions. To call a function, please respond with JSON for a function call." }} {{- ' |
| 61 | + '\'Respond in the format {"name": function name, "parameters": dictionary of argument name and its' |
| 62 | + ' value}.\' }} {{- "Do not use variables.\\n\n" }} {%- for t in tools %} {{- t | tojson(indent=4) ' |
| 63 | + '}} {{- "\\n\n" }} {%- endfor %} {%- endif %} {{- system_message }} {{- "<|eot_id|>" }} {#- ' |
| 64 | + "Custom tools are passed in a user message with some extra guidance #} {%- if tools_in_user_message " |
| 65 | + "and not tools is none %} {#- Extract the first user message so we can plug it in here #} {%- if " |
| 66 | + "messages | length != 0 %} {%- set first_user_message = messages[0]['content']|trim %} {%- set " |
| 67 | + 'messages = messages[1:] %} {%- else %} {{- raise_exception("Cannot put tools in the first user ' |
| 68 | + "message when there's no first user message!\") }} {%- endif %} {{- " |
| 69 | + "'<|start_header_id|>user<|end_header_id|>\\n\n' -}} {{- \"Given the following functions, please " |
| 70 | + 'respond with a JSON for a function call " }} {{- "with its proper arguments that best answers the ' |
| 71 | + 'given prompt.\\n\n" }} {{- \'Respond in the format {"name": function name, "parameters": ' |
| 72 | + 'dictionary of argument name and its value}.\' }} {{- "Do not use variables.\\n\n" }} {%- for t in ' |
| 73 | + 'tools %} {{- t | tojson(indent=4) }} {{- "\\n\n" }} {%- endfor %} {{- first_user_message + ' |
| 74 | + "\"<|eot_id|>\"}} {%- endif %} {%- for message in messages %} {%- if not (message.role == 'ipython' " |
| 75 | + "or message.role == 'tool' or 'tool_calls' in message) %} {{- '<|start_header_id|>' + message['role']" |
| 76 | + " + '<|end_header_id|>\\n\n'+ message['content'] | trim + '<|eot_id|>' }} {%- elif 'tool_calls' in " |
| 77 | + 'message %} {%- if not message.tool_calls|length == 1 %} {{- raise_exception("This model only ' |
| 78 | + 'supports single tool-calls at once!") }} {%- endif %} {%- set tool_call = message.tool_calls[0]' |
| 79 | + ".function %} {%- if builtin_tools is defined and tool_call.name in builtin_tools %} {{- " |
| 80 | + "'<|start_header_id|>assistant<|end_header_id|>\\n\n' -}} {{- \"<|python_tag|>\" + tool_call.name + " |
| 81 | + '".call(" }} {%- for arg_name, arg_val in tool_call.arguments | items %} {{- arg_name + \'="\' + ' |
| 82 | + 'arg_val + \'"\' }} {%- if not loop.last %} {{- ", " }} {%- endif %} {%- endfor %} {{- ")" }} {%- ' |
| 83 | + "else %} {{- '<|start_header_id|>assistant<|end_header_id|>\\n\n' -}} {{- '{\"name\": \"' + " |
| 84 | + 'tool_call.name + \'", \' }} {{- \'"parameters": \' }} {{- tool_call.arguments | tojson }} {{- "}" ' |
| 85 | + "}} {%- endif %} {%- if builtin_tools is defined %} {#- This means we're in ipython mode #} {{- " |
| 86 | + '"<|eom_id|>" }} {%- else %} {{- "<|eot_id|>" }} {%- endif %} {%- elif message.role == "tool" ' |
| 87 | + 'or message.role == "ipython" %} {{- "<|start_header_id|>ipython<|end_header_id|>\\n\n" }} {%- ' |
| 88 | + "if message.content is mapping or message.content is iterable %} {{- message.content | tojson }} {%- " |
| 89 | + 'else %} {{- message.content }} {%- endif %} {{- "<|eot_id|>" }} {%- endif %} {%- endfor %} {%- if ' |
| 90 | + "add_generation_prompt %} {{- '<|start_header_id|>assistant<|end_header_id|>\\n\n' }} {%- endif %} " |
| 91 | +) |
| 92 | +{% endraw %} |
| 93 | + |
| 94 | +prompt_assembler = ( |
| 95 | + PromptAssembler() |
| 96 | + .setInputCol("messages") |
| 97 | + .setOutputCol("prompt") |
| 98 | + .setChatTemplate(template) |
| 99 | +) |
| 100 | + |
| 101 | +prompt_assembler.transform(df).select("prompt.result").show(truncate=False) |
| 102 | ++----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ |
| 103 | +|result | |
| 104 | ++----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ |
| 105 | +|[<|start_header_id|>system<|end_header_id|>\n\nYou are a helpful assistant.<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nHello there, how can I help you?<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nI need help with organizing my room.<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n]| |
| 106 | ++----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ |
| 107 | +{%- endcapture -%} |
| 108 | + |
| 109 | +{%- capture scala_example -%} |
| 110 | +// Batches (whole conversations) of arrays of messages |
| 111 | +val data: Seq[Seq[(String, String)]] = Seq( |
| 112 | + Seq( |
| 113 | + ("system", "You are a helpful assistant."), |
| 114 | + ("assistant", "Hello there, how can I help you?"), |
| 115 | + ("user", "I need help with organizing my room."))) |
| 116 | + |
| 117 | +val dataDF = data.toDF("messages") |
| 118 | + |
| 119 | +{% raw %} |
| 120 | +// llama3.1 |
| 121 | +val template = |
| 122 | + "{{- bos_token }} {%- if custom_tools is defined %} {%- set tools = custom_tools %} {%- " + |
| 123 | + "endif %} {%- if not tools_in_user_message is defined %} {%- set tools_in_user_message = true %} {%- " + |
| 124 | + "endif %} {%- if not date_string is defined %} {%- set date_string = \"26 Jul 2024\" %} {%- endif %} " + |
| 125 | + "{%- if not tools is defined %} {%- set tools = none %} {%- endif %} {#- This block extracts the " + |
| 126 | + "system message, so we can slot it into the right place. #} {%- if messages[0]['role'] == 'system' %}" + |
| 127 | + " {%- set system_message = messages[0]['content']|trim %} {%- set messages = messages[1:] %} {%- else" + |
| 128 | + " %} {%- set system_message = \"\" %} {%- endif %} {#- System message + builtin tools #} {{- " + |
| 129 | + "\"<|start_header_id|>system<|end_header_id|>\\n\\n\" }} {%- if builtin_tools is defined or tools is " + |
| 130 | + "not none %} {{- \"Environment: ipython\\n\" }} {%- endif %} {%- if builtin_tools is defined %} {{- " + |
| 131 | + "\"Tools: \" + builtin_tools | reject('equalto', 'code_interpreter') | join(\", \") + \"\\n\\n\"}} " + |
| 132 | + "{%- endif %} {{- \"Cutting Knowledge Date: December 2023\\n\" }} {{- \"Today Date: \" + date_string " + |
| 133 | + "+ \"\\n\\n\" }} {%- if tools is not none and not tools_in_user_message %} {{- \"You have access to " + |
| 134 | + "the following functions. To call a function, please respond with JSON for a function call.\" }} {{- " + |
| 135 | + "'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its" + |
| 136 | + " value}.' }} {{- \"Do not use variables.\\n\\n\" }} {%- for t in tools %} {{- t | tojson(indent=4) " + |
| 137 | + "}} {{- \"\\n\\n\" }} {%- endfor %} {%- endif %} {{- system_message }} {{- \"<|eot_id|>\" }} {#- " + |
| 138 | + "Custom tools are passed in a user message with some extra guidance #} {%- if tools_in_user_message " + |
| 139 | + "and not tools is none %} {#- Extract the first user message so we can plug it in here #} {%- if " + |
| 140 | + "messages | length != 0 %} {%- set first_user_message = messages[0]['content']|trim %} {%- set " + |
| 141 | + "messages = messages[1:] %} {%- else %} {{- raise_exception(\"Cannot put tools in the first user " + |
| 142 | + "message when there's no first user message!\") }} {%- endif %} {{- " + |
| 143 | + "'<|start_header_id|>user<|end_header_id|>\\n\\n' -}} {{- \"Given the following functions, please " + |
| 144 | + "respond with a JSON for a function call \" }} {{- \"with its proper arguments that best answers the " + |
| 145 | + "given prompt.\\n\\n\" }} {{- 'Respond in the format {\"name\": function name, \"parameters\": " + |
| 146 | + "dictionary of argument name and its value}.' }} {{- \"Do not use variables.\\n\\n\" }} {%- for t in " + |
| 147 | + "tools %} {{- t | tojson(indent=4) }} {{- \"\\n\\n\" }} {%- endfor %} {{- first_user_message + " + |
| 148 | + "\"<|eot_id|>\"}} {%- endif %} {%- for message in messages %} {%- if not (message.role == 'ipython' " + |
| 149 | + "or message.role == 'tool' or 'tool_calls' in message) %} {{- '<|start_header_id|>' + message['role']" + |
| 150 | + " + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }} {%- elif 'tool_calls' in " + |
| 151 | + "message %} {%- if not message.tool_calls|length == 1 %} {{- raise_exception(\"This model only " + |
| 152 | + "supports single tool-calls at once!\") }} {%- endif %} {%- set tool_call = message.tool_calls[0]" + |
| 153 | + ".function %} {%- if builtin_tools is defined and tool_call.name in builtin_tools %} {{- " + |
| 154 | + "'<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}} {{- \"<|python_tag|>\" + tool_call.name + " + |
| 155 | + "\".call(\" }} {%- for arg_name, arg_val in tool_call.arguments | items %} {{- arg_name + '=\"' + " + |
| 156 | + "arg_val + '\"' }} {%- if not loop.last %} {{- \", \" }} {%- endif %} {%- endfor %} {{- \")\" }} {%- " + |
| 157 | + "else %} {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}} {{- '{\"name\": \"' + " + |
| 158 | + "tool_call.name + '\", ' }} {{- '\"parameters\": ' }} {{- tool_call.arguments | tojson }} {{- \"}\" " + |
| 159 | + "}} {%- endif %} {%- if builtin_tools is defined %} {#- This means we're in ipython mode #} {{- " + |
| 160 | + "\"<|eom_id|>\" }} {%- else %} {{- \"<|eot_id|>\" }} {%- endif %} {%- elif message.role == \"tool\" " + |
| 161 | + "or message.role == \"ipython\" %} {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }} {%- " + |
| 162 | + "if message.content is mapping or message.content is iterable %} {{- message.content | tojson }} {%- " + |
| 163 | + "else %} {{- message.content }} {%- endif %} {{- \"<|eot_id|>\" }} {%- endif %} {%- endfor %} {%- if " + |
| 164 | + "add_generation_prompt %} {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }} {%- endif %} " |
| 165 | +{% endraw %} |
| 166 | + |
| 167 | +val promptAssembler = new PromptAssembler() |
| 168 | + .setInputCol("messages") |
| 169 | + .setOutputCol("prompt") |
| 170 | + .setChatTemplate(template) |
| 171 | + |
| 172 | +promptAssembler.transform(dataDF).select("prompt.result").show(truncate = false) |
| 173 | ++----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ |
| 174 | +|result | |
| 175 | ++----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ |
| 176 | +|[<|start_header_id|>system<|end_header_id|>\n\nYou are a helpful assistant.<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nHello there, how can I help you?<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nI need help with organizing my room.<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n]| |
| 177 | ++----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ |
| 178 | + |
| 179 | +{%- endcapture -%} |
| 180 | + |
| 181 | +{%- capture api_link -%} |
| 182 | +[PromptAssembler](/api/com/johnsnowlabs/nlp/PromptAssembler) |
| 183 | +{%- endcapture -%} |
| 184 | + |
| 185 | +{%- capture python_api_link -%} |
| 186 | +[PromptAssembler](/api/python/reference/autosummary/sparknlp/base/prompt_assembler/index.html#sparknlp.base.prompt_assembler.PromptAssembler) |
| 187 | +{%- endcapture -%} |
| 188 | + |
| 189 | +{%- capture source_link -%} |
| 190 | +[PromptAssembler](https://github.com/JohnSnowLabs/spark-nlp/tree/master/src/main/scala/com/johnsnowlabs/nlp/PromptAssembler.scala) |
| 191 | +{%- endcapture -%} |
| 192 | + |
| 193 | +{% include templates/anno_template.md |
| 194 | +title=title |
| 195 | +description=description |
| 196 | +input_anno=input_anno |
| 197 | +output_anno=output_anno |
| 198 | +python_example=python_example |
| 199 | +scala_example=scala_example |
| 200 | +api_link=api_link |
| 201 | +python_api_link=python_api_link |
| 202 | +source_link=source_link |
| 203 | +%} |
0 commit comments