From bd24350cbff57b6d26275180ba49ab6ade50ec1c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=AD=85=E5=BD=B1?= <46097299+frozenleaves@users.noreply.github.com> Date: Thu, 6 Nov 2025 14:05:58 +0800 Subject: [PATCH] [v1] add pair data converter (#9360) Co-authored-by: frozenleaves --- data/v1_dpo_demo.jsonl | 10 +++ data/v1_dpo_demo.yaml | 4 + .../v1/plugins/data_plugins/converter.py | 87 ++++++++++++++++++- .../plugins/data_plugins/test_converter.py | 49 +++++++++++ 4 files changed, 149 insertions(+), 1 deletion(-) create mode 100644 data/v1_dpo_demo.jsonl create mode 100644 data/v1_dpo_demo.yaml diff --git a/data/v1_dpo_demo.jsonl b/data/v1_dpo_demo.jsonl new file mode 100644 index 00000000..8b8adeb4 --- /dev/null +++ b/data/v1_dpo_demo.jsonl @@ -0,0 +1,10 @@ +{"chosen_messages": [{"role": "system", "content": [{"type": "text", "value": "You are an AI assistant. You will be given a task. You must generate a detailed and long answer."}], "loss_weight": 0.0}, {"role": "user", "content": [{"type": "text", "value": "Abdulah Hasanagic este mândrul proprietar al AS Company din Gorazde, un orăşel situat pe malul râului Drina, lângă frontiera cu Serbia, în estul Bosniei.\n\nTranslate to English\n\nEnglish:"}], "loss_weight": 0.0}, {"role": "assistant", "content": [{"type": "text", "value": "Abdulah Hasanagic is the proud owner of AS Company in Gorazde, a small town located on the banks of the Drina River, near the border with Serbia, in Eastern Bosnia."}], "loss_weight": 1.0}], "rejected_messages": [{"role": "system", "content": [{"type": "text", "value": "You are an AI assistant. You will be given a task. You must generate a detailed and long answer."}], "loss_weight": 0.0}, {"role": "user", "content": [{"type": "text", "value": "Abdulah Hasanagic este mândrul proprietar al AS Company din Gorazde, un orăşel situat pe malul râului Drina, lângă frontiera cu Serbia, în estul Bosniei.\n\nTranslate to English\n\nEnglish:"}], "loss_weight": 0.0}, {"role": "assistant", "content": [{"type": "text", "value": " Abdulah Hasanagic is the proud owner of AS Company, a small town located on the banks of the Drina River, near the border with Serbia, in eastern Bosnia."}], "loss_weight": 1.0}]} +{"chosen_messages": [{"role": "system", "content": [{"type": "text", "value": "You are an AI assistant that follows instruction extremely well. Help as much as you can."}], "loss_weight": 0.0}, {"role": "user", "content": [{"type": "text", "value": "Answer the following question: Context: Bailey knew Taylor that well and got a few snacks for later tonight. Question: How would you describe Bailey? Which one of these answers best answers the question according to the context? A: feeling proud B: very hungry C: feeling bored\nAnswer:"}], "loss_weight": 0.0}, {"role": "assistant", "content": [{"type": "text", "value": "D: considerate"}], "loss_weight": 1.0}], "rejected_messages": [{"role": "system", "content": [{"type": "text", "value": "You are an AI assistant that follows instruction extremely well. Help as much as you can."}], "loss_weight": 0.0}, {"role": "user", "content": [{"type": "text", "value": "Answer the following question: Context: Bailey knew Taylor that well and got a few snacks for later tonight. Question: How would you describe Bailey? Which one of these answers best answers the question according to the context? A: feeling proud B: very hungry C: feeling bored\nAnswer:"}], "loss_weight": 0.0}, {"role": "assistant", "content": [{"type": "text", "value": " Based on the context, the best answer to describe Bailey would be:\n\nC: feeling bored.\n\nThe context states that Bailey knew Taylor well and got a few snacks for later tonight, which suggests that Bailey is not feeling proud or hungry, but rather bored."}], "loss_weight": 1.0}]} +{"chosen_messages": [{"role": "system", "content": [{"type": "text", "value": "You are a helpful assistant, who always provide explanation. Think like you are answering to a five year old."}], "loss_weight": 0.0}, {"role": "user", "content": [{"type": "text", "value": "Concepts: fall, love, relationship, thing\n\nWrite a sentence that includes all these words."}], "loss_weight": 0.0}, {"role": "assistant", "content": [{"type": "text", "value": "When you fall in love, it's like a special relationship between two people who really care about each other, and it's a wonderful thing to experience in life."}], "loss_weight": 1.0}], "rejected_messages": [{"role": "system", "content": [{"type": "text", "value": "You are a helpful assistant, who always provide explanation. Think like you are answering to a five year old."}], "loss_weight": 0.0}, {"role": "user", "content": [{"type": "text", "value": "Concepts: fall, love, relationship, thing\n\nWrite a sentence that includes all these words."}], "loss_weight": 0.0}, {"role": "assistant", "content": [{"type": "text", "value": " Ooh, let me think! 🤔\n\nOkay, here's a sentence that includes all three words:\n\n\"Love is like a soft, fluffy thing that falls from the sky and lands in our hearts, making us feel happy and special!\" 🐰💕🐱\n\nDoes that make sense, little one? 😊"}], "loss_weight": 1.0}]} +{"chosen_messages": [{"role": "system", "content": [{"type": "text", "value": "You are an AI assistant. You will be given a task. You must generate a detailed and long answer."}], "loss_weight": 0.0}, {"role": "user", "content": [{"type": "text", "value": "Read the text and determine if the sentence is true:\n\nCheung Kong Park () is a small garden located in the Central district of Hong Kong Island and named for Li Ka Shing's corporate empire. The park is privately maintained by Cheung Kong Holdings, but is open to the public. The park consists of ponds and cascades with benches for visitors to enjoy the scenery.\n\nSentence: Cheung Kong Park is ugly."}], "loss_weight": 0.0}, {"role": "assistant", "content": [{"type": "text", "value": "The sentence \"Cheung Kong Park is ugly\" cannot be determined as true or false from the provided text alone. The text describes Cheung Kong Park as a small garden located in Central district and states that it is privately maintained and open to the public. The text also mentions the park has ponds, cascades, and benches for visitors to enjoy the scenery. From these descriptions, it seems that the park is designed to be visually appealing and comfortable for visitors. However, an individual's perception of beauty or attractiveness is subjective, so whether one finds the park ugly or beautiful depends on personal preference and opinions. Further information and a personal visit to the park may be required to make a more informed judgment on its aesthetic appeal."}], "loss_weight": 1.0}], "rejected_messages": [{"role": "system", "content": [{"type": "text", "value": "You are an AI assistant. You will be given a task. You must generate a detailed and long answer."}], "loss_weight": 0.0}, {"role": "user", "content": [{"type": "text", "value": "Read the text and determine if the sentence is true:\n\nCheung Kong Park () is a small garden located in the Central district of Hong Kong Island and named for Li Ka Shing's corporate empire. The park is privately maintained by Cheung Kong Holdings, but is open to the public. The park consists of ponds and cascades with benches for visitors to enjoy the scenery.\n\nSentence: Cheung Kong Park is ugly."}], "loss_weight": 0.0}, {"role": "assistant", "content": [{"type": "text", "value": " Based on the information provided in the text, the sentence \"Cheung Kong Park is ugly\" is false. The text describes the park as a small garden with ponds and cascades, suggesting that it is a well-maintained and scenic area. Additionally, the fact that the park is open to the public and privately maintained by Cheung Kong Holdings suggests that it is a deliberately created and cared for space, rather than a neglected or unattractive one. Therefore, the statement \"Cheung Kong Park is ugly\" is not supported by the information provided in the text."}], "loss_weight": 1.0}]} +{"chosen_messages": [{"role": "system", "content": [{"type": "text", "value": "You are an AI assistant. You will be given a task. You must generate a detailed and long answer."}], "loss_weight": 0.0}, {"role": "user", "content": [{"type": "text", "value": "Translate to Russian:\n\n1973 Summer Universiade\n\nRussian:"}], "loss_weight": 0.0}, {"role": "assistant", "content": [{"type": "text", "value": "1973 Летняя Универсиада"}], "loss_weight": 1.0}], "rejected_messages": [{"role": "system", "content": [{"type": "text", "value": "You are an AI assistant. You will be given a task. You must generate a detailed and long answer."}], "loss_weight": 0.0}, {"role": "user", "content": [{"type": "text", "value": "Translate to Russian:\n\n1973 Summer Universiade\n\nRussian:"}], "loss_weight": 0.0}, {"role": "assistant", "content": [{"type": "text", "value": " Sure, I'd be happy to help! Here's the translation of \"1973 Summer Universiade\" to Russian:\n\n1973 летониversary Всемирных студенческих игр\n\nHere's a breakdown of the translation:\n\n* \"1973\" - this is the year of the Universiade, written in Russian as \"1973\".\n* \"летониversary\" - this is a combination of \"лето\" (summer) and \"ниversary\" (anniversary), which is used to describe the anniversary of an event that took place in the summer.\n* \"Всемирных студенческих игр\" - this is the full name of the Universiade in Russian, which translates to \"World Student Games\" in English.\n\nSo, the full translation of \"1973 Summer Universiade\" to Russian is \"1973 летониversary Всемирных студенческих игр\"."}], "loss_weight": 1.0}]} +{"chosen_messages": [{"role": "system", "content": [{"type": "text", "value": "You are an AI assistant that helps people find information."}], "loss_weight": 0.0}, {"role": "user", "content": [{"type": "text", "value": "Q: Claim: \"Ayn Rand spoke Spanish throughout her life, and never learned the English language.\"\nIs the claim above correct, and can it be verified by human common sense and without a web search?\nOptions:\n- yes\n- no\nChain-of-thought:"}], "loss_weight": 0.0}, {"role": "assistant", "content": [{"type": "text", "value": "no"}], "loss_weight": 1.0}], "rejected_messages": [{"role": "system", "content": [{"type": "text", "value": "You are an AI assistant that helps people find information."}], "loss_weight": 0.0}, {"role": "user", "content": [{"type": "text", "value": "Q: Claim: \"Ayn Rand spoke Spanish throughout her life, and never learned the English language.\"\nIs the claim above correct, and can it be verified by human common sense and without a web search?\nOptions:\n- yes\n- no\nChain-of-thought:"}], "loss_weight": 0.0}, {"role": "assistant", "content": [{"type": "text", "value": " No, the claim that Ayn Rand spoke Spanish throughout her life and never learned the English language is not correct.\n\nAyn Rand was born in Russia and immigrated to the United States in 1926. She learned English as a second language and became fluent in it, as evidenced by her written works and public speeches. In fact, she was known for her distinctive American accent and was a prominent figure in American intellectual and political circles.\n\nFurthermore, it is unlikely that someone who was born in Russia and immigrated to the United States at the age of 21 would not learn English, as it is the predominant language spoken in the country. Additionally, it is unlikely that someone as influential as Ayn Rand would not have been recorded or quoted speaking English, which would be evidence of her proficiency in the language.\n\nTherefore, without a web search, it is possible to verify that the claim that Ayn Rand spoke Spanish throughout her life and never learned the English language is not correct, based on common sense and knowledge of Ayn Rand's life and background."}], "loss_weight": 1.0}]} +{"chosen_messages": [{"role": "system", "content": [{"type": "text", "value": "You are an AI assistant. You will be given a task. You must generate a detailed and long answer."}], "loss_weight": 0.0}, {"role": "user", "content": [{"type": "text", "value": "Given the question: - sport is athletics - event is discus throw shot put - death date is 23 december 1967 - country is ita italy - birth date is 19 june 1891 - nationality is italian - club is libertas pistoia - birth place is pistoia , italy Given the details above, guess who could this information be about.\nThe answer is:"}], "loss_weight": 0.0}, {"role": "assistant", "content": [{"type": "text", "value": "The individual described by the information provided is Adolfo Consolini, an Italian discus thrower who achieved prominent success during his career. \n\nAdolfo Consolini was born on June 19, 1891, in the city of Pistoia, located in the Tuscany region of Italy. As an Italian national, he represented his country in various athletic events, with a particular focus on the discus throw and shot put. His athletic club was Libertas Pistoia, which provided him with the necessary support and resources to excel in his chosen sports.\n\nOne of the most notable features of Consolini's career was his unparalleled success in the discus throw. He rose to prominence in the event after setting a new world record in 1941, which he eventually bettered in 1946. Over the course of his athletic career, he represented Italy in several international competitions, including the Olympic Games, European Athletics Championships, and Mediterranean Games. Throughout his career, Consolini secured a total of 26 national titles in the discus throw and added a further three national titles in the shot put.\n\nSome of the most significant achievements of his career include gold medals in discus throw at the 1948 London Olympics and the 1950 European Athletics Championships in Brussels, as well as silver medals at the 1952 Helsinki Olympics and the 1954 European Athletics Championships in Bern.\n\nDespite his tremendous success on the field, Adolfo Consolini's life unfortunately came to an end on December 23, 1967. Nevertheless, he is still remembered in Italy and across the athletic community as a pioneer of the discus throw, leaving behind a lasting legacy of sporting prowess that continues to inspire future generations of athletes."}], "loss_weight": 1.0}], "rejected_messages": [{"role": "system", "content": [{"type": "text", "value": "You are an AI assistant. You will be given a task. You must generate a detailed and long answer."}], "loss_weight": 0.0}, {"role": "user", "content": [{"type": "text", "value": "Given the question: - sport is athletics - event is discus throw shot put - death date is 23 december 1967 - country is ita italy - birth date is 19 june 1891 - nationality is italian - club is libertas pistoia - birth place is pistoia , italy Given the details above, guess who could this information be about.\nThe answer is:"}], "loss_weight": 0.0}, {"role": "assistant", "content": [{"type": "text", "value": " Based on the information provided, the person who could this information be about is the Italian athlete, Giuseppe \"Joe\" Piane.\n\nHere's why:\n\n1. Sport: The sport is athletics, which matches with Piane's specialty. He was a renowned Italian athlete who excelled in discus throw and shot put.\n2. Event: The event is discus throw and shot put, which were Piane's primary events. He won numerous national and international titles in these events.\n3. Death date: Piane passed away on December 23, 1967, which is consistent with the information provided.\n4. Country: Piane was Italian, which matches the nationality provided.\n5. Birth date: Piane was born on June 19, 1891, which is consistent with the information provided.\n6. Club: Piane represented the Libertas Pistoia club, which is consistent with the information provided.\n7. Birth place: Piane was born in Pistoia, Italy, which matches the birth place provided.\n\nGiuseppe Piane was an Italian athlete who dominated the discus throw and shot put events in the early 20th century. He won numerous national and international titles, including gold medals at the 1920 and 1924 Summer Olympics. Piane was known for his impressive throwing technique and strength, and he was considered one of the greatest Italian athletes of all time."}], "loss_weight": 1.0}]} +{"chosen_messages": [{"role": "system", "content": [{"type": "text", "value": "You are an AI assistant. You will be given a task. You must generate a detailed and long answer."}], "loss_weight": 0.0}, {"role": "user", "content": [{"type": "text", "value": "What concepts are described in the following sentence?\n\n\"For an child friendly, average coffee shop serving fast food try The Eagle, riverside near Burger King\"\n\nReturn the answer as pairs of triples."}], "loss_weight": 0.0}, {"role": "assistant", "content": [{"type": "text", "value": "1. Concept: Child friendly\n Description: The Eagle coffee shop is suitable and welcoming for children, providing a comfortable environment.\n Relation: The Eagle is a coffee shop characterized as being appropriate for kids.\n\n2. Concept: Average coffee shop\n Description: The Eagle is a typical coffee shop, not extraordinary or unique but still providing good service and products.\n Relation: The Eagle maintains a standard level of quality expected from a coffee shop.\n\n3. Concept: Fast food\n Description: The Eagle serves quickly prepared meals, typically of the kind found in fast food chains.\n Relation: The Eagle offers fast food options for customers seeking a quick meal.\n\n4. Concept: The Eagle\n Description: A coffee shop located near Burger King and by the riverside, catering to people who want fast food.\n Relation: The Eagle is the primary focus of the description, highlighting its child-friendly nature and its offerings.\n\n5. Concept: Riverside location\n Description: The Eagle is situated near a river, offering potential for scenic views or proximity to recreational areas.\n Relation: The Eagle's location by the riverside adds to its appeal for customers seeking a peaceful or picturesque spot.\n\n6. Concept: Proximity to Burger King\n Description: The Eagle is near a Burger King, giving customers an alternative fast food option or acting as a landmark to locate the coffee shop.\n Relation: The mention of Burger King helps customers find The Eagle and suggests it is located in an area with multiple food establishments."}], "loss_weight": 1.0}], "rejected_messages": [{"role": "system", "content": [{"type": "text", "value": "You are an AI assistant. You will be given a task. You must generate a detailed and long answer."}], "loss_weight": 0.0}, {"role": "user", "content": [{"type": "text", "value": "What concepts are described in the following sentence?\n\n\"For an child friendly, average coffee shop serving fast food try The Eagle, riverside near Burger King\"\n\nReturn the answer as pairs of triples."}], "loss_weight": 0.0}, {"role": "assistant", "content": [{"type": "text", "value": " Sure, I'd be happy to help! Here are the concepts described in the sentence you provided, represented as pairs of triples:\n\n1. Child-friendliness:\n\t* Concept: Child-friendliness\n\t* Description: The coffee shop is suitable for children.\n2. Average coffee shop:\n\t* Concept: Average coffee shop\n\t* Description: The coffee shop is a typical, regular coffee shop.\n3. Fast food:\n\t* Concept: Fast food\n\t* Description: The coffee shop serves fast food.\n4. Riverside location:\n\t* Concept: Riverside location\n\t* Description: The coffee shop is located near the river.\n5. Burger King:\n\t* Concept: Burger King\n\t* Description: A well-known fast food restaurant located near the coffee shop.\n\nI hope this helps! Let me know if you have any further questions."}], "loss_weight": 1.0}]} +{"chosen_messages": [{"role": "system", "content": [{"type": "text", "value": "You are an AI assistant. Provide a detailed answer so user don’t need to search outside to understand the answer."}], "loss_weight": 0.0}, {"role": "user", "content": [{"type": "text", "value": "Please answer the following question: - Bats and other animals produce a high-pitched tone - The sound travels in waves, as sound does - The sound 'bounces' off of a nearby object - The sound wave returns to the bat - The bat hears the echoed sound - Folds in the bat's ear and adaptations in the bat's brain enable it to determine the size, shape, and position of the other object via the properties of the returned sound waves What might be the last step of the process?\nA:"}], "loss_weight": 0.0}, {"role": "assistant", "content": [{"type": "text", "value": "The last step of the process is: The bat uses the interpreted information from the echoed sound waves to navigate and locate objects (such as prey or obstacles) in its environment, allowing it to maneuver effectively and safely even in complete darkness. This entire process is called echolocation."}], "loss_weight": 1.0}], "rejected_messages": [{"role": "system", "content": [{"type": "text", "value": "You are an AI assistant. Provide a detailed answer so user don’t need to search outside to understand the answer."}], "loss_weight": 0.0}, {"role": "user", "content": [{"type": "text", "value": "Please answer the following question: - Bats and other animals produce a high-pitched tone - The sound travels in waves, as sound does - The sound 'bounces' off of a nearby object - The sound wave returns to the bat - The bat hears the echoed sound - Folds in the bat's ear and adaptations in the bat's brain enable it to determine the size, shape, and position of the other object via the properties of the returned sound waves What might be the last step of the process?\nA:"}], "loss_weight": 0.0}, {"role": "assistant", "content": [{"type": "text", "value": " Sure, I'd be happy to help! Here's a detailed answer to your question:\n\nBats and other animals produce a high-pitched tone, known as an ultrasonic signal, which travels in waves, as sound does. The sound wave \"bounces\" off of a nearby object, such as a fly or a tree, and returns to the bat. The bat hears the echoed sound through its highly specialized ears and adaptations in its brain.\n\nThe last step of the process is for the bat to use the properties of the returned sound waves to determine the size, shape, and position of the other object. This is done through a process called echolocation.\n\nEcholocation is the ability of bats and other animals to determine the location and characteristics of objects in their environment by emitting high-frequency sounds and detecting the echoes that bounce back. The echoes contain information about the distance, size, shape, and material composition of the object, which the bat's brain then processes to create a mental map of its surroundings.\n\nIn the case of bats, the sound waves are emitted through the bat's mouth or nose, and the echoes are detected by the bat's large ears. The echoes are then processed by the bat's brain, which uses the information to build a mental map of its surroundings and locate prey.\n\nSo, to summarize, the last step of the process is for the bat to use the properties of the returned sound waves to determine the size, shape, and position of the other object through echolocation."}], "loss_weight": 1.0}]} +{"chosen_messages": [{"role": "system", "content": [{"type": "text", "value": "You should describe the task and explain your answer. While answering a multiple choice question, first output the correct answer(s). Then explain why other answers are wrong. Think like you are answering to a five year old."}], "loss_weight": 0.0}, {"role": "user", "content": [{"type": "text", "value": "Iwasaki's careful respect for the Long Beach program and its lawyers earned him the political capital he needed to complete his takeover in a matter of weeks. \"The Long Beach program had strong support in the community so in a situation like that, one has to recognize that it's not like a takeover where I have all the answers and I know best,\" Iwasaki said. \"The people in the community who are working there have the contacts and the knowledge that will allow service to continue.\" Things have gone less smoothly across town. There, Dudovitz, a longtime poverty lawyer and executive director of the San Fernando Valley's 36-year-old legal aid program, continues to struggle with his hostile takeover of the neighboring San Gabriel-Pomona Valleys service area one year after it was accomplished. On the bright side, Dudovitz has extended his respected program to clients in the San Gabriel-Pomona Valley, and he now operates on a much larger budget, $6.5 million last year. However, his clash with the old San Gabriel program resulted in litigation, bitter feelings and a mission that some say is not clearly focused on serving poor people. \"It was a difficult situation that was probably mishandled by everyone,\" a longtime observer of the public interest community said of the San Fernando Valley-San Gabriel-Pomona Valley merger. \"There are very few people who come out as the heroes. Personalities got involved when they shouldn't have. Things were said that caused bad feelings and couldn't be unsaid.\" Iwasaki's merger with the smaller, 48-year-old Long Beach program was friendly and fast, and no one - not even Long Beach board members - lost a job. When it was over, Iwasaki had $1 million more in federal dollars and two new offices. Long Beach clients regained services they had lost years ago when federal budget cuts and dwindling grants reduced the staff of 15 lawyers to five and cut immigration and consumer law programs. Iwasaki said, \"[I judged the transition] better than I could have hoped for.\" \n\nQuestion: \"What is the difference in the ages of the Long Beach and San Fernando Valley programs?\"\n\nResponse: \"Weeks\"\nPick from: *no *yes\nDoes the response correctly answer the question?\n\n"}], "loss_weight": 0.0}, {"role": "assistant", "content": [{"type": "text", "value": "No\n\nThe response does not correctly answer the question. The question asks about the difference in ages between the Long Beach and San Fernando Valley programs, but the response given is \"weeks\" which does not provide the correct information. The Long Beach program is 48 years old, while the San Fernando Valley program is 36 years old. The difference in ages between these two programs is 12 years."}], "loss_weight": 1.0}], "rejected_messages": [{"role": "system", "content": [{"type": "text", "value": "You should describe the task and explain your answer. While answering a multiple choice question, first output the correct answer(s). Then explain why other answers are wrong. Think like you are answering to a five year old."}], "loss_weight": 0.0}, {"role": "user", "content": [{"type": "text", "value": "Iwasaki's careful respect for the Long Beach program and its lawyers earned him the political capital he needed to complete his takeover in a matter of weeks. \"The Long Beach program had strong support in the community so in a situation like that, one has to recognize that it's not like a takeover where I have all the answers and I know best,\" Iwasaki said. \"The people in the community who are working there have the contacts and the knowledge that will allow service to continue.\" Things have gone less smoothly across town. There, Dudovitz, a longtime poverty lawyer and executive director of the San Fernando Valley's 36-year-old legal aid program, continues to struggle with his hostile takeover of the neighboring San Gabriel-Pomona Valleys service area one year after it was accomplished. On the bright side, Dudovitz has extended his respected program to clients in the San Gabriel-Pomona Valley, and he now operates on a much larger budget, $6.5 million last year. However, his clash with the old San Gabriel program resulted in litigation, bitter feelings and a mission that some say is not clearly focused on serving poor people. \"It was a difficult situation that was probably mishandled by everyone,\" a longtime observer of the public interest community said of the San Fernando Valley-San Gabriel-Pomona Valley merger. \"There are very few people who come out as the heroes. Personalities got involved when they shouldn't have. Things were said that caused bad feelings and couldn't be unsaid.\" Iwasaki's merger with the smaller, 48-year-old Long Beach program was friendly and fast, and no one - not even Long Beach board members - lost a job. When it was over, Iwasaki had $1 million more in federal dollars and two new offices. Long Beach clients regained services they had lost years ago when federal budget cuts and dwindling grants reduced the staff of 15 lawyers to five and cut immigration and consumer law programs. Iwasaki said, \"[I judged the transition] better than I could have hoped for.\" \n\nQuestion: \"What is the difference in the ages of the Long Beach and San Fernando Valley programs?\"\n\nResponse: \"Weeks\"\nPick from: *no *yes\nDoes the response correctly answer the question?\n\n"}], "loss_weight": 0.0}, {"role": "assistant", "content": [{"type": "text", "value": " Yes, the response correctly answers the question! 😊\n\nThe Long Beach program is 48 years old, and the San Fernando Valley program is 36 years old, so the difference in their ages is... *drumroll* ...weeks! 😄\n\nHere's why the other answers are wrong:\n\n* \"No\" is not correct because the Long Beach and San Fernando Valley programs are not the same age.\n* \"Yes\" is not correct because the difference in their ages is not just one year, it's weeks! 😄\n\nSo, the correct answer is \"weeks\"! 🎉"}], "loss_weight": 1.0}]} diff --git a/data/v1_dpo_demo.yaml b/data/v1_dpo_demo.yaml new file mode 100644 index 00000000..56c6d565 --- /dev/null +++ b/data/v1_dpo_demo.yaml @@ -0,0 +1,4 @@ +dpo_zh_demo: + hf_hub_url: HuggingFaceH4/orca_dpo_pairs + split: train_prefs + converter: pair diff --git a/src/llamafactory/v1/plugins/data_plugins/converter.py b/src/llamafactory/v1/plugins/data_plugins/converter.py index 7d197b9b..37125926 100644 --- a/src/llamafactory/v1/plugins/data_plugins/converter.py +++ b/src/llamafactory/v1/plugins/data_plugins/converter.py @@ -17,7 +17,7 @@ from typing import Callable, TypedDict from typing_extensions import NotRequired -from ...extras.types import Sample, SFTSample +from ...extras.types import DPOSample, Sample, SFTSample class AlpacaSample(TypedDict, total=False): @@ -27,6 +27,12 @@ class AlpacaSample(TypedDict, total=False): output: NotRequired[str] +class PairSample(TypedDict, total=False): + prompt: NotRequired[str] + chosen: NotRequired[list[dict]] + rejected: NotRequired[list[dict]] + + def alpaca_converter(raw_sample: AlpacaSample) -> SFTSample: """Convert Alpaca sample to SFT sample. @@ -61,8 +67,87 @@ def alpaca_converter(raw_sample: AlpacaSample) -> SFTSample: return {"messages": messages} +def pair_converter(raw_sample: PairSample) -> DPOSample: + """Convert Pair sample to standard DPO sample. + + Args: + raw_sample (PairSample): pair sample with prompt, chosen, rejected fields. + see raw example at: https://huggingface.co/datasets/HuggingFaceH4/orca_dpo_pairs + + Returns: + DPOSample: DPO sample with chosen_messages and rejected_messages. + see the standard DPO sample at: https://huggingface.co/datasets/frozenleaves/v1-dpo-demo/raw/main/v1-dpo-demo.jsonl + """ + chosen_messages = [] + assert "chosen" in raw_sample, "chosen field is required in pair sample." + assert "rejected" in raw_sample, "rejected field is required in pair sample." + assert isinstance(raw_sample["chosen"], list) and isinstance(raw_sample["rejected"], list), ( + "chosen and rejected field should be a list[dict], or you may need to implement your custom converter." + ) + + if "chosen" in raw_sample: + value = raw_sample.get("chosen", "") + for item in value: + if item.get("role", "") == "system": + chosen_messages.append( + { + "role": "system", + "content": [{"type": "text", "value": item.get("content", "")}], + "loss_weight": 0.0, + } + ) + if item.get("role", "") == "user": + chosen_messages.append( + { + "role": "user", + "content": [{"type": "text", "value": item.get("content", "")}], + "loss_weight": 0.0, + } + ) + if item.get("role", "") == "assistant": + chosen_messages.append( + { + "role": "assistant", + "content": [{"type": "text", "value": item.get("content", "")}], + "loss_weight": 1.0, + } + ) + + rejected_messages = [] + if "rejected" in raw_sample: + value = raw_sample.get("rejected", "") + for item in value: + if item.get("role", "") == "system": + rejected_messages.append( + { + "role": "system", + "content": [{"type": "text", "value": item.get("content", "")}], + "loss_weight": 0.0, + } + ) + if item.get("role", "") == "user": + rejected_messages.append( + { + "role": "user", + "content": [{"type": "text", "value": item.get("content", "")}], + "loss_weight": 0.0, + } + ) + if item.get("role", "") == "assistant": + rejected_messages.append( + { + "role": "assistant", + "content": [{"type": "text", "value": item.get("content", "")}], + "loss_weight": 1.0, + } + ) + + return {"chosen_messages": chosen_messages, "rejected_messages": rejected_messages} + + CONVERTERS = { "alpaca": alpaca_converter, + "pair": pair_converter, } diff --git a/tests_v1/plugins/data_plugins/test_converter.py b/tests_v1/plugins/data_plugins/test_converter.py index 9a92f8ff..03f8e17d 100644 --- a/tests_v1/plugins/data_plugins/test_converter.py +++ b/tests_v1/plugins/data_plugins/test_converter.py @@ -48,5 +48,54 @@ def test_alpaca_converter(num_samples: int): assert data_engine[index] == {"_dataset_name": "tiny_dataset", **expected_data} +@pytest.mark.parametrize("num_samples", [16]) +def test_pair_converter(num_samples: int): + data_args = DataArguments(dataset="frozenleaves/tiny-dpo/dataset_info.yaml") + data_engine = DataEngine(data_args) + original_data = load_dataset("HuggingFaceH4/orca_dpo_pairs", split="train_prefs") + indexes = random.choices(range(len(data_engine)), k=num_samples) + for index in indexes: + print(data_engine[index]) + print(original_data[index]) + expected_data = { + "chosen_messages": [ + { + "role": "system", + "content": [{"type": "text", "value": original_data[index]["chosen"][0]["content"]}], + "loss_weight": 0.0, + }, + { + "role": "user", + "content": [{"type": "text", "value": original_data[index]["chosen"][1]["content"]}], + "loss_weight": 0.0, + }, + { + "role": "assistant", + "content": [{"type": "text", "value": original_data[index]["chosen"][2]["content"]}], + "loss_weight": 1.0, + }, + ], + "rejected_messages": [ + { + "role": "system", + "content": [{"type": "text", "value": original_data[index]["rejected"][0]["content"]}], + "loss_weight": 0.0, + }, + { + "role": "user", + "content": [{"type": "text", "value": original_data[index]["rejected"][1]["content"]}], + "loss_weight": 0.0, + }, + { + "role": "assistant", + "content": [{"type": "text", "value": original_data[index]["rejected"][2]["content"]}], + "loss_weight": 1.0, + }, + ], + } + assert data_engine[index] == {"_dataset_name": "dpo_zh_demo", **expected_data} + + if __name__ == "__main__": test_alpaca_converter(1) + test_pair_converter(1)