-
Notifications
You must be signed in to change notification settings - Fork 214
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Offsite-tuning model generation #676
Open
HarliWu
wants to merge
21
commits into
alibaba:dev/llm
Choose a base branch
from
HarliWu:dev/llm
base: dev/llm
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from all commits
Commits
Show all changes
21 commits
Select commit
Hold shift + click to select a range
1fdfb5d
Support flops calculation on LLM
HarliWu aeb4ac5
Merge branch 'alibaba:dev/llm' into dev/llm
HarliWu a52f4d0
Merge branch 'alibaba:dev/llm' into dev/llm
HarliWu 10729c4
Merge branch 'alibaba:dev/llm' into dev/llm
HarliWu 782da16
Merge branch 'alibaba:dev/llm' into dev/llm
HarliWu 7fdaf3f
Merge branch 'alibaba:dev/llm' into dev/llm
HarliWu 78557d0
Merge branch 'alibaba:dev/llm' into dev/llm
HarliWu 3470d07
Merge branch 'alibaba:dev/llm' into dev/llm
HarliWu 595e633
Merge branch 'alibaba:dev/llm' into dev/llm
HarliWu bc125fb
Fix bugs for human_eval
HarliWu 8d29321
Merge branch 'alibaba:dev/llm' into dev/llm
HarliWu d728b26
Fix bugs on HumanEval
HarliWu ed96262
Remove \n\n in HumanEval
HarliWu 1dd3c38
Merge branch 'alibaba:dev/llm' into dev/llm
HarliWu ca41591
Merge branch 'alibaba:dev/llm' into dev/llm
HarliWu acab21c
Merge branch 'alibaba:dev/llm' into dev/llm
HarliWu 2639504
Merge branch 'alibaba:dev/llm' into dev/llm
HarliWu e94c34a
Merge branch 'alibaba:dev/llm' into dev/llm
HarliWu f41614b
method-oriented offsite-tuning model generation
HarliWu f756262
Merge branch 'alibaba:dev/llm' into dev/llm
HarliWu 3f5c58d
Merge branch 'alibaba:dev/llm' into dev/llm
HarliWu File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,7 +9,7 @@ | |
from federatedscope.core.workers.server import Server | ||
|
||
from federatedscope.llm.offsite_tuning.utils import \ | ||
generate_emulator_and_adapter, align_student_with_teacher | ||
generate_adap_model, align_student_with_teacher | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
@@ -30,17 +30,8 @@ def __init__(self, | |
device='cpu', | ||
strategy=None, | ||
**kwargs): | ||
compress_strategy = config.llm.offsite_tuning.strategy | ||
emulator_l = config.llm.offsite_tuning.emu_l | ||
emulator_r = config.llm.offsite_tuning.emu_r | ||
offsite_tuning_kwargs = config.llm.offsite_tuning.kwargs[0] | ||
logger.info('Server: Generating emulator and adapter...') | ||
adap_model = \ | ||
generate_emulator_and_adapter(model, | ||
strategy=compress_strategy, | ||
emulator_l=emulator_l, | ||
emulator_r=emulator_r, | ||
**offsite_tuning_kwargs) | ||
adap_model = generate_adap_model(model, config.llm.offsite_tuning) | ||
# Emulator alignment | ||
if config.llm.offsite_tuning.emu_align.use: | ||
adap_model = align_student_with_teacher(raw_model=model, | ||
|
@@ -54,7 +45,11 @@ def __init__(self, | |
os._exit(0) | ||
# No need for this attr | ||
if hasattr(adap_model, 'teacher'): | ||
import gc | ||
import torch | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How about move line 48-49 to top:
|
||
del adap_model.teacher | ||
gc.collect() | ||
torch.cuda.empty_cache() | ||
|
||
self.raw_model = model | ||
super(OffsiteTuningServer, | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -95,7 +95,7 @@ def get_layers(adapter_model): | |
return layers | ||
|
||
|
||
def set_layers(adapter_model, layers, emu_l=0, emu_r=-1): | ||
def set_layers(adapter_model, layers): | ||
if isinstance(adapter_model.model, OPTForCausalLM): | ||
adapter_model.model.model.decoder.layers = layers | ||
elif isinstance(adapter_model.model, GPT2LMHeadModel): | ||
|
@@ -109,12 +109,6 @@ def set_layers(adapter_model, layers, emu_l=0, emu_r=-1): | |
logger.warning(f'Model {type(adapter_model.model)} not support, ' | ||
f'use default setting.') | ||
adapter_model.model.transformer.h = layers | ||
adapter_model.student = layers[emu_l:emu_r] | ||
adapter_model.adapter = layers[:emu_l] + layers[emu_r:] | ||
add_prologue(adapter_model.student[0], None) | ||
add_epilogue(adapter_model.student[-1], None) | ||
adapter_model.student_l = adapter_model.student[0] | ||
adapter_model.student_r = adapter_model.student[-1] | ||
return adapter_model | ||
|
||
|
||
|
@@ -152,13 +146,31 @@ def model_distillation(model, **kwargs): | |
} | ||
|
||
|
||
def generate_adap_model(model: AdapterModel, offsite_tuning_cfg): | ||
if offsite_tuning_cfg.strategy in COMP_FUNC_MAPPING.keys(): | ||
compress_strategy = offsite_tuning_cfg.strategy | ||
emulator_l = offsite_tuning_cfg.emu_l | ||
emulator_r = offsite_tuning_cfg.emu_r | ||
emu_align = offsite_tuning_cfg.emu_align.use | ||
offsite_tuning_kwargs = offsite_tuning_cfg.kwargs[0] | ||
return generate_emulator_and_adapter(model, | ||
strategy=compress_strategy, | ||
emulator_l=emulator_l, | ||
emulator_r=emulator_r, | ||
emulator_alignment=emu_align, | ||
**offsite_tuning_kwargs) | ||
else: | ||
raise NotImplementedError | ||
|
||
|
||
def generate_emulator_and_adapter(model: AdapterModel, | ||
strategy='drop_layer', | ||
emulator_l=1, | ||
emulator_l=0, | ||
emulator_r=1000, | ||
emulator_alignment=False, | ||
**kwargs): | ||
layers = get_layers(model) | ||
l, r = max(emulator_l, 1), min(emulator_r, len(layers) - 1) | ||
l, r = max(emulator_l, 0), min(emulator_r, len(layers) - 1) | ||
|
||
# Set the to-compress part untrainable | ||
for layer in layers[l:r]: | ||
|
@@ -186,7 +198,14 @@ def generate_emulator_and_adapter(model: AdapterModel, | |
|
||
new_model = copy.deepcopy(model) | ||
# Set student model | ||
new_model = set_layers(new_model, emulator_and_adapter, l, r) | ||
new_model = set_layers(new_model, emulator_and_adapter) | ||
|
||
if emulator_alignment: | ||
new_model.student = layers | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please merge the latest commits in which bugs are fixed. (layers should be detached from |
||
add_prologue(new_model.student[0], None) | ||
add_epilogue(new_model.student[-1], None) | ||
new_model.student_l = new_model.student[0] | ||
new_model.student_r = new_model.student[-1] | ||
|
||
gc.collect() | ||
torch.cuda.empty_cache() | ||
|
@@ -303,20 +322,11 @@ def build_cfg_for_alignment(config): | |
return adap_model | ||
|
||
|
||
def wrap_offsite_tuning_for_eval(model, config): | ||
def wrap_offsite_tuning_for_eval(model, config, ckpt_path=None): | ||
logger.info('===============use offsite tuning===============') | ||
# We use offsite-tuning in this experiment | ||
# Use adapter model instead | ||
compress_strategy = config.llm.offsite_tuning.strategy | ||
emulator_l = config.llm.offsite_tuning.emu_l | ||
emulator_r = config.llm.offsite_tuning.emu_r | ||
offsite_tuning_kwargs = config.llm.offsite_tuning.kwargs[0] | ||
adap_model = \ | ||
generate_emulator_and_adapter(model, | ||
strategy=compress_strategy, | ||
emulator_l=emulator_l, | ||
emulator_r=emulator_r, | ||
**offsite_tuning_kwargs) | ||
adap_model = generate_adap_model(model, config.llm.offsite_tuning) | ||
# Load kd model if ckpt exits | ||
if config.llm.offsite_tuning.emu_align.use and \ | ||
config.llm.offsite_tuning.eval_type == 'emu': | ||
|
@@ -333,17 +343,21 @@ def wrap_offsite_tuning_for_eval(model, config): | |
|
||
# Load ckpt for eval | ||
try: | ||
ckpt = torch.load(config.federate.save_to, map_location='cpu') | ||
if ckpt_path is None: | ||
ckpt_path = config.federate.save_to | ||
ckpt = torch.load(ckpt_path, map_location='cpu') | ||
if 'model' and 'cur_round' in ckpt: | ||
adap_model.load_state_dict(ckpt['model']) | ||
logger.info(f"Load with the model of Round {ckpt['cur_round']}") | ||
else: | ||
adap_model.load_state_dict(ckpt) | ||
except Exception as error: | ||
logger.warning(f"{error}, will use raw model.") | ||
|
||
if config.llm.offsite_tuning.eval_type == 'emu': | ||
model = adap_model | ||
del model.teacher | ||
if hasattr(model, 'teacher'): | ||
del model.teacher | ||
elif config.llm.offsite_tuning.eval_type == 'full': | ||
# Raw model load adapter from adapter_and_emulator | ||
new_model_state_dict = model.state_dict() | ||
|
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should
prefix
be passed by theconfig
?